From 6c34fd20beb74e009778870a8e30811b393f745c Mon Sep 17 00:00:00 2001 From: Francisco Jerez Date: Wed, 11 Feb 2015 18:15:44 +0200 Subject: [PATCH] i965/vec4: Calculate register allocation q values manually. This fixes a regression in the running time of Piglit introduced by commit 78e9043475d4bed8b50f7e413963c960fa0935bb, which increased the number of register allocation classes set up by the VEC4 back-end from 2 to 16. The algorithm used by ra_set_finalize() to calculate them is unnecessarily expensive, do it manually like the FS back-end does. Reported-by: Mark Janes Reviewed-by: Matt Turner --- .../drivers/dri/i965/brw_vec4_reg_allocate.cpp | 18 +++++++++++++++++- 1 file changed, 17 insertions(+), 1 deletion(-) diff --git a/src/mesa/drivers/dri/i965/brw_vec4_reg_allocate.cpp b/src/mesa/drivers/dri/i965/brw_vec4_reg_allocate.cpp index 46f0bfd085a..a286f8ace77 100644 --- a/src/mesa/drivers/dri/i965/brw_vec4_reg_allocate.cpp +++ b/src/mesa/drivers/dri/i965/brw_vec4_reg_allocate.cpp @@ -129,10 +129,13 @@ brw_vec4_alloc_reg_set(struct intel_screen *screen) * between them and the base GRF registers (and also each other). */ int reg = 0; + unsigned *q_values[MAX_VGRF_SIZE]; for (int i = 0; i < class_count; i++) { int class_reg_count = base_reg_count - (class_sizes[i] - 1); screen->vec4_reg_set.classes[i] = ra_alloc_reg_class(screen->vec4_reg_set.regs); + q_values[i] = new unsigned[MAX_VGRF_SIZE]; + for (int j = 0; j < class_reg_count; j++) { ra_class_add_reg(screen->vec4_reg_set.regs, screen->vec4_reg_set.classes[i], reg); @@ -146,10 +149,23 @@ brw_vec4_alloc_reg_set(struct intel_screen *screen) reg++; } + + for (int j = 0; j < class_count; j++) { + /* Calculate the q values manually because the algorithm used by + * ra_set_finalize() to do it has higher complexity affecting the + * start-up time of some applications. q(i, j) is just the maximum + * number of registers from class i a register from class j can + * conflict with. + */ + q_values[i][j] = class_sizes[i] + class_sizes[j] - 1; + } } assert(reg == ra_reg_count); - ra_set_finalize(screen->vec4_reg_set.regs, NULL); + ra_set_finalize(screen->vec4_reg_set.regs, q_values); + + for (int i = 0; i < MAX_VGRF_SIZE; i++) + delete[] q_values[i]; } void -- 2.30.2