i965/fs: Implement SIMD32 register allocation support.

author Francisco Jerez <currojerez@riseup.net>

Wed, 18 May 2016 20:52:25 +0000 (13:52 -0700)

committer Francisco Jerez <currojerez@riseup.net>

Sat, 28 May 2016 06:29:06 +0000 (23:29 -0700)
author Francisco Jerez <currojerez@riseup.net>
Wed, 18 May 2016 20:52:25 +0000 (13:52 -0700)
committer Francisco Jerez <currojerez@riseup.net>
Sat, 28 May 2016 06:29:06 +0000 (23:29 -0700)
diff --git a/src/mesa/drivers/dri/i965/brw_compiler.h b/src/mesa/drivers/dri/i965/brw_compiler.h

index a8fb4864e6e50ed0c2d2c3943caf1d36a6432748..0844694694c347053aacace79bd82aad5c7b4c79 100644 (file)
--- a/src/mesa/drivers/dri/i965/brw_compiler.h
+++ b/src/mesa/drivers/dri/i965/brw_compiler.h
@@ -85,7 +85,7 @@ struct brw_compiler {
         * appear in *classes.
         */
        int aligned_pairs_class;
-   } fs_reg_sets[2];
+   } fs_reg_sets[3];
  
     void (*shader_debug_log)(void *, const char *str, ...) PRINTFLIKE(2, 3);
     void (*shader_perf_log)(void *, const char *str, ...) PRINTFLIKE(2, 3);
diff --git a/src/mesa/drivers/dri/i965/brw_fs.cpp b/src/mesa/drivers/dri/i965/brw_fs.cpp

index f1afbd095ed7403320ebdee703ccf54064dfb817..cfe9f02353747eb084c66741c48624eac527b62a 100644 (file)
--- a/src/mesa/drivers/dri/i965/brw_fs.cpp
+++ b/src/mesa/drivers/dri/i965/brw_fs.cpp
@@ -5803,7 +5803,7 @@ fs_visitor::allocate_registers(bool allow_spilling)
         * SIMD8.  There's probably actually some intermediate point where
         * SIMD16 with a couple of spills is still better.
         */
-      if (dispatch_width == 16 && min_dispatch_width <= 8) {
+      if (dispatch_width > min_dispatch_width) {
           fail("Failure to register allocate.  Reduce number of "
                "live scalar values to avoid this.");
        } else {
diff --git a/src/mesa/drivers/dri/i965/brw_fs_reg_allocate.cpp b/src/mesa/drivers/dri/i965/brw_fs_reg_allocate.cpp

index cd84dc22254dffedbc53a56553e990bdfed87d2b..d5c54e0e02baf8ce4222af8d7d77992f60f1b37e 100644 (file)
--- a/src/mesa/drivers/dri/i965/brw_fs_reg_allocate.cpp
+++ b/src/mesa/drivers/dri/i965/brw_fs_reg_allocate.cpp
@@ -77,7 +77,7 @@ brw_alloc_reg_set(struct brw_compiler *compiler, int dispatch_width)
  {
     const struct brw_device_info *devinfo = compiler->devinfo;
     int base_reg_count = BRW_MAX_GRF;
-   int index = (dispatch_width / 8) - 1;
+   const int index = _mesa_logbase2(dispatch_width / 8);
  
     if (dispatch_width > 8 && devinfo->gen >= 7) {
        /* For IVB+, we don't need the PLN hacks or the even-reg alignment in
@@ -115,7 +115,7 @@ brw_alloc_reg_set(struct brw_compiler *compiler, int dispatch_width)
     /* Compute the total number of registers across all classes. */
     int ra_reg_count = 0;
     for (int i = 0; i < class_count; i++) {
-      if (devinfo->gen <= 5 && dispatch_width == 16) {
+      if (devinfo->gen <= 5 && dispatch_width >= 16) {
           /* From the G45 PRM:
            *
            * In order to reduce the hardware complexity, the following
@@ -162,7 +162,7 @@ brw_alloc_reg_set(struct brw_compiler *compiler, int dispatch_width)
     int pairs_reg_count = 0;
     for (int i = 0; i < class_count; i++) {
        int class_reg_count;
-      if (devinfo->gen <= 5 && dispatch_width == 16) {
+      if (devinfo->gen <= 5 && dispatch_width >= 16) {
           class_reg_count = (base_reg_count - (class_sizes[i] - 1)) / 2;
  
           /* See comment below.  The only difference here is that we are
@@ -208,7 +208,7 @@ brw_alloc_reg_set(struct brw_compiler *compiler, int dispatch_width)
           pairs_reg_count = class_reg_count;
        }
  
-      if (devinfo->gen <= 5 && dispatch_width == 16) {
+      if (devinfo->gen <= 5 && dispatch_width >= 16) {
           for (int j = 0; j < class_reg_count; j++) {
              ra_class_add_reg(regs, classes[i], reg);
  
@@ -289,6 +289,7 @@ brw_fs_alloc_reg_sets(struct brw_compiler *compiler)
  {
     brw_alloc_reg_set(compiler, 8);
     brw_alloc_reg_set(compiler, 16);
+   brw_alloc_reg_set(compiler, 32);
  }
  
  static int
@@ -443,7 +444,7 @@ fs_visitor::setup_payload_interference(struct ra_graph *g,
         * The alternative would be to have per-physical-register classes, which
         * would just be silly.
         */
-      if (devinfo->gen <= 5 && dispatch_width == 16) {
+      if (devinfo->gen <= 5 && dispatch_width >= 16) {
           /* We have to divide by 2 here because we only have even numbered
            * registers.  Some of the payload registers will be odd, but
            * that's ok because their physical register numbers have already
@@ -538,7 +539,7 @@ fs_visitor::assign_regs(bool allow_spilling, bool spill_all)
     int reg_width = dispatch_width / 8;
     unsigned hw_reg_mapping[this->alloc.count];
     int payload_node_count = ALIGN(this->first_non_payload_grf, reg_width);
-   int rsi = reg_width - 1; /* Which compiler->fs_reg_sets[] to use */
+   int rsi = _mesa_logbase2(reg_width); /* Which compiler->fs_reg_sets[] to use */
     calculate_live_intervals();
  
     int node_count = this->alloc.count;
author	Francisco Jerez <currojerez@riseup.net>
	Wed, 18 May 2016 20:52:25 +0000 (13:52 -0700)
committer	Francisco Jerez <currojerez@riseup.net>
	Sat, 28 May 2016 06:29:06 +0000 (23:29 -0700)
src/mesa/drivers/dri/i965/brw_compiler.h		patch \| blob \| history
src/mesa/drivers/dri/i965/brw_fs.cpp		patch \| blob \| history
src/mesa/drivers/dri/i965/brw_fs_reg_allocate.cpp		patch \| blob \| history