From 2d288cb9ea5b1b46eb4fe0061d694560bf54943f Mon Sep 17 00:00:00 2001 From: Francisco Jerez Date: Wed, 18 May 2016 13:52:25 -0700 Subject: [PATCH] i965/fs: Implement SIMD32 register allocation support. Reviewed-by: Jason Ekstrand --- src/mesa/drivers/dri/i965/brw_compiler.h | 2 +- src/mesa/drivers/dri/i965/brw_fs.cpp | 2 +- src/mesa/drivers/dri/i965/brw_fs_reg_allocate.cpp | 13 +++++++------ 3 files changed, 9 insertions(+), 8 deletions(-) diff --git a/src/mesa/drivers/dri/i965/brw_compiler.h b/src/mesa/drivers/dri/i965/brw_compiler.h index a8fb4864e6e..0844694694c 100644 --- a/src/mesa/drivers/dri/i965/brw_compiler.h +++ b/src/mesa/drivers/dri/i965/brw_compiler.h @@ -85,7 +85,7 @@ struct brw_compiler { * appear in *classes. */ int aligned_pairs_class; - } fs_reg_sets[2]; + } fs_reg_sets[3]; void (*shader_debug_log)(void *, const char *str, ...) PRINTFLIKE(2, 3); void (*shader_perf_log)(void *, const char *str, ...) PRINTFLIKE(2, 3); diff --git a/src/mesa/drivers/dri/i965/brw_fs.cpp b/src/mesa/drivers/dri/i965/brw_fs.cpp index f1afbd095ed..cfe9f023537 100644 --- a/src/mesa/drivers/dri/i965/brw_fs.cpp +++ b/src/mesa/drivers/dri/i965/brw_fs.cpp @@ -5803,7 +5803,7 @@ fs_visitor::allocate_registers(bool allow_spilling) * SIMD8. There's probably actually some intermediate point where * SIMD16 with a couple of spills is still better. */ - if (dispatch_width == 16 && min_dispatch_width <= 8) { + if (dispatch_width > min_dispatch_width) { fail("Failure to register allocate. Reduce number of " "live scalar values to avoid this."); } else { diff --git a/src/mesa/drivers/dri/i965/brw_fs_reg_allocate.cpp b/src/mesa/drivers/dri/i965/brw_fs_reg_allocate.cpp index cd84dc22254..d5c54e0e02b 100644 --- a/src/mesa/drivers/dri/i965/brw_fs_reg_allocate.cpp +++ b/src/mesa/drivers/dri/i965/brw_fs_reg_allocate.cpp @@ -77,7 +77,7 @@ brw_alloc_reg_set(struct brw_compiler *compiler, int dispatch_width) { const struct brw_device_info *devinfo = compiler->devinfo; int base_reg_count = BRW_MAX_GRF; - int index = (dispatch_width / 8) - 1; + const int index = _mesa_logbase2(dispatch_width / 8); if (dispatch_width > 8 && devinfo->gen >= 7) { /* For IVB+, we don't need the PLN hacks or the even-reg alignment in @@ -115,7 +115,7 @@ brw_alloc_reg_set(struct brw_compiler *compiler, int dispatch_width) /* Compute the total number of registers across all classes. */ int ra_reg_count = 0; for (int i = 0; i < class_count; i++) { - if (devinfo->gen <= 5 && dispatch_width == 16) { + if (devinfo->gen <= 5 && dispatch_width >= 16) { /* From the G45 PRM: * * In order to reduce the hardware complexity, the following @@ -162,7 +162,7 @@ brw_alloc_reg_set(struct brw_compiler *compiler, int dispatch_width) int pairs_reg_count = 0; for (int i = 0; i < class_count; i++) { int class_reg_count; - if (devinfo->gen <= 5 && dispatch_width == 16) { + if (devinfo->gen <= 5 && dispatch_width >= 16) { class_reg_count = (base_reg_count - (class_sizes[i] - 1)) / 2; /* See comment below. The only difference here is that we are @@ -208,7 +208,7 @@ brw_alloc_reg_set(struct brw_compiler *compiler, int dispatch_width) pairs_reg_count = class_reg_count; } - if (devinfo->gen <= 5 && dispatch_width == 16) { + if (devinfo->gen <= 5 && dispatch_width >= 16) { for (int j = 0; j < class_reg_count; j++) { ra_class_add_reg(regs, classes[i], reg); @@ -289,6 +289,7 @@ brw_fs_alloc_reg_sets(struct brw_compiler *compiler) { brw_alloc_reg_set(compiler, 8); brw_alloc_reg_set(compiler, 16); + brw_alloc_reg_set(compiler, 32); } static int @@ -443,7 +444,7 @@ fs_visitor::setup_payload_interference(struct ra_graph *g, * The alternative would be to have per-physical-register classes, which * would just be silly. */ - if (devinfo->gen <= 5 && dispatch_width == 16) { + if (devinfo->gen <= 5 && dispatch_width >= 16) { /* We have to divide by 2 here because we only have even numbered * registers. Some of the payload registers will be odd, but * that's ok because their physical register numbers have already @@ -538,7 +539,7 @@ fs_visitor::assign_regs(bool allow_spilling, bool spill_all) int reg_width = dispatch_width / 8; unsigned hw_reg_mapping[this->alloc.count]; int payload_node_count = ALIGN(this->first_non_payload_grf, reg_width); - int rsi = reg_width - 1; /* Which compiler->fs_reg_sets[] to use */ + int rsi = _mesa_logbase2(reg_width); /* Which compiler->fs_reg_sets[] to use */ calculate_live_intervals(); int node_count = this->alloc.count; -- 2.30.2