mesa: add/update comments in _mesa_copy_buffer_subdata()

[mesa.git] / src / mesa / drivers / dri / i965 / brw_fs_reg_allocate.cpp
diff --git a/src/mesa/drivers/dri/i965/brw_fs_reg_allocate.cpp b/src/mesa/drivers/dri/i965/brw_fs_reg_allocate.cpp

index 42ab66df6d8bb2ab6ae50d2521b2b72b9818a99b..3f875cc63d93bbeca944184004ab1dead3db5a59 100644 (file)
--- a/src/mesa/drivers/dri/i965/brw_fs_reg_allocate.cpp
+++ b/src/mesa/drivers/dri/i965/brw_fs_reg_allocate.cpp
@@ -25,35 +25,18 @@
   *
   */
  
-extern "C" {
-
-#include <sys/types.h>
-
-#include "main/macros.h"
-#include "main/shaderobj.h"
-#include "main/uniforms.h"
-#include "program/prog_parameter.h"
-#include "program/prog_print.h"
-#include "program/prog_optimize.h"
-#include "program/register_allocate.h"
-#include "program/sampler.h"
-#include "program/hash_table.h"
-#include "brw_context.h"
-#include "brw_eu.h"
-#include "brw_wm.h"
-}
  #include "brw_fs.h"
-#include "../glsl/glsl_types.h"
-#include "../glsl/ir_optimization.h"
-#include "../glsl/ir_print_visitor.h"
+#include "glsl/glsl_types.h"
+#include "glsl/ir_optimization.h"
+#include "glsl/ir_print_visitor.h"
  
  static void
  assign_reg(int *reg_hw_locations, fs_reg *reg, int reg_width)
  {
     if (reg->file == GRF) {
        assert(reg->reg_offset >= 0);
-      reg->hw_reg = reg_hw_locations[reg->reg] + reg->reg_offset * reg_width;
-      reg->reg = 0;
+      reg->reg = reg_hw_locations[reg->reg] + reg->reg_offset * reg_width;
+      reg->reg_offset = 0;
     }
  }
  
@@ -87,55 +70,14 @@ fs_visitor::assign_regs_trivial()
  
  }
  
-bool
-fs_visitor::assign_regs()
+static void
+brw_alloc_reg_set_for_classes(struct brw_context *brw,
+                             int *class_sizes,
+                             int class_count,
+                             int reg_width,
+                             int base_reg_count)
  {
-   /* Most of this allocation was written for a reg_width of 1
-    * (dispatch_width == 8).  In extending to 16-wide, the code was
-    * left in place and it was converted to have the hardware
-    * registers it's allocating be contiguous physical pairs of regs
-    * for reg_width == 2.
-    */
-   int reg_width = c->dispatch_width / 8;
-   int hw_reg_mapping[this->virtual_grf_next];
-   int first_assigned_grf = ALIGN(this->first_non_payload_grf, reg_width);
-   int base_reg_count = (BRW_MAX_GRF - first_assigned_grf) / reg_width;
-   int class_sizes[base_reg_count];
-   int class_count = 0;
-   int aligned_pairs_class = -1;
-
-   calculate_live_intervals();
-
-   /* Set up the register classes.
-    *
-    * The base registers store a scalar value.  For texture samples,
-    * we get virtual GRFs composed of 4 contiguous hw register.  For
-    * structures and arrays, we store them as contiguous larger things
-    * than that, though we should be able to do better most of the
-    * time.
-    */
-   class_sizes[class_count++] = 1;
-   if (brw->has_pln && intel->gen < 6) {
-      /* Always set up the (unaligned) pairs for gen5, so we can find
-       * them for making the aligned pair class.
-       */
-      class_sizes[class_count++] = 2;
-   }
-   for (int r = 0; r < this->virtual_grf_next; r++) {
-      int i;
-
-      for (i = 0; i < class_count; i++) {
-        if (class_sizes[i] == this->virtual_grf_sizes[r])
-           break;
-      }
-      if (i == class_count) {
-        if (this->virtual_grf_sizes[r] >= base_reg_count) {
-           fail("Object too large to register allocate.\n");
-        }
-
-        class_sizes[class_count++] = this->virtual_grf_sizes[r];
-      }
-   }
+   struct intel_context *intel = &brw->intel;
  
     /* Compute the total number of registers across all classes. */
     int ra_reg_count = 0;
@@ -143,9 +85,14 @@ fs_visitor::assign_regs()
        ra_reg_count += base_reg_count - (class_sizes[i] - 1);
     }
  
-   struct ra_regs *regs = ra_alloc_reg_set(ra_reg_count);
-   uint8_t ra_reg_to_grf[ra_reg_count];
-   int classes[class_count + 1];
+   ralloc_free(brw->wm.ra_reg_to_grf);
+   brw->wm.ra_reg_to_grf = ralloc_array(brw, uint8_t, ra_reg_count);
+   ralloc_free(brw->wm.regs);
+   brw->wm.regs = ra_alloc_reg_set(ra_reg_count);
+   ralloc_free(brw->wm.classes);
+   brw->wm.classes = ralloc_array(brw, int, class_count + 1);
+
+   brw->wm.aligned_pairs_class = -1;
  
     /* Now, add the registers to their classes, and add the conflicts
      * between them and the base GRF registers (and also each other).
@@ -155,7 +102,7 @@ fs_visitor::assign_regs()
     int pairs_reg_count = 0;
     for (int i = 0; i < class_count; i++) {
        int class_reg_count = base_reg_count - (class_sizes[i] - 1);
-      classes[i] = ra_alloc_reg_class(regs);
+      brw->wm.classes[i] = ra_alloc_reg_class(brw->wm.regs);
  
        /* Save this off for the aligned pair class at the end. */
        if (class_sizes[i] == 2) {
@@ -164,14 +111,14 @@ fs_visitor::assign_regs()
        }
  
        for (int j = 0; j < class_reg_count; j++) {
-        ra_class_add_reg(regs, classes[i], reg);
+        ra_class_add_reg(brw->wm.regs, brw->wm.classes[i], reg);
  
-        ra_reg_to_grf[reg] = j;
+        brw->wm.ra_reg_to_grf[reg] = j;
  
          for (int base_reg = j;
               base_reg < j + class_sizes[i];
               base_reg++) {
-           ra_add_transitive_reg_conflict(regs, base_reg, reg);
+           ra_add_transitive_reg_conflict(brw->wm.regs, base_reg, reg);
          }
  
          reg++;
@@ -183,30 +130,92 @@ fs_visitor::assign_regs()
      * in on gen5 so that we can do PLN.
      */
     if (brw->has_pln && reg_width == 1 && intel->gen < 6) {
-      aligned_pairs_class = ra_alloc_reg_class(regs);
+      brw->wm.aligned_pairs_class = ra_alloc_reg_class(brw->wm.regs);
  
        for (int i = 0; i < pairs_reg_count; i++) {
-        if ((ra_reg_to_grf[pairs_base_reg + i] & 1) == 0) {
-           ra_class_add_reg(regs, aligned_pairs_class,
+        if ((brw->wm.ra_reg_to_grf[pairs_base_reg + i] & 1) == 0) {
+           ra_class_add_reg(brw->wm.regs, brw->wm.aligned_pairs_class,
                              pairs_base_reg + i);
          }
        }
        class_count++;
     }
  
-   ra_set_finalize(regs);
+   ra_set_finalize(brw->wm.regs);
+}
  
-   struct ra_graph *g = ra_alloc_interference_graph(regs,
+bool
+fs_visitor::assign_regs()
+{
+   /* Most of this allocation was written for a reg_width of 1
+    * (dispatch_width == 8).  In extending to 16-wide, the code was
+    * left in place and it was converted to have the hardware
+    * registers it's allocating be contiguous physical pairs of regs
+    * for reg_width == 2.
+    */
+   int reg_width = c->dispatch_width / 8;
+   int hw_reg_mapping[this->virtual_grf_next];
+   int first_assigned_grf = ALIGN(this->first_non_payload_grf, reg_width);
+   int base_reg_count = (BRW_MAX_GRF - first_assigned_grf) / reg_width;
+   int class_sizes[base_reg_count];
+   int class_count = 0;
+
+   calculate_live_intervals();
+
+   /* Set up the register classes.
+    *
+    * The base registers store a scalar value.  For texture samples,
+    * we get virtual GRFs composed of 4 contiguous hw register.  For
+    * structures and arrays, we store them as contiguous larger things
+    * than that, though we should be able to do better most of the
+    * time.
+    */
+   class_sizes[class_count++] = 1;
+   if (brw->has_pln && intel->gen < 6) {
+      /* Always set up the (unaligned) pairs for gen5, so we can find
+       * them for making the aligned pair class.
+       */
+      class_sizes[class_count++] = 2;
+   }
+   for (int r = 0; r < this->virtual_grf_next; r++) {
+      int i;
+
+      for (i = 0; i < class_count; i++) {
+        if (class_sizes[i] == this->virtual_grf_sizes[r])
+           break;
+      }
+      if (i == class_count) {
+        if (this->virtual_grf_sizes[r] >= base_reg_count) {
+           fail("Object too large to register allocate.\n");
+        }
+
+        class_sizes[class_count++] = this->virtual_grf_sizes[r];
+      }
+   }
+
+   brw_alloc_reg_set_for_classes(brw, class_sizes, class_count,
+                                reg_width, base_reg_count);
+
+   struct ra_graph *g = ra_alloc_interference_graph(brw->wm.regs,
                                                     this->virtual_grf_next);
  
     for (int i = 0; i < this->virtual_grf_next; i++) {
        for (int c = 0; c < class_count; c++) {
          if (class_sizes[c] == this->virtual_grf_sizes[i]) {
-           if (aligned_pairs_class >= 0 &&
-               this->delta_x.reg == i) {
-              ra_set_node_class(g, i, aligned_pairs_class);
+            /* Special case: on pre-GEN6 hardware that supports PLN, the
+             * second operand of a PLN instruction needs to be an
+             * even-numbered register, so we have a special register class
+             * wm_aligned_pairs_class to handle this case.  pre-GEN6 always
+             * uses this->delta_x[BRW_WM_PERSPECTIVE_PIXEL_BARYCENTRIC] as the
+             * second operand of a PLN instruction (since it doesn't support
+             * any other interpolation modes).  So all we need to do is find
+             * that register and set it to the appropriate class.
+             */
+           if (brw->wm.aligned_pairs_class >= 0 &&
+               this->delta_x[BRW_WM_PERSPECTIVE_PIXEL_BARYCENTRIC].reg == i) {
+              ra_set_node_class(g, i, brw->wm.aligned_pairs_class);
             } else {
-              ra_set_node_class(g, i, classes[c]);
+              ra_set_node_class(g, i, brw->wm.classes[c]);
             }
             break;
          }
@@ -237,7 +246,6 @@ fs_visitor::assign_regs()
  
  
        ralloc_free(g);
-      ralloc_free(regs);
  
        return false;
     }
@@ -250,7 +258,8 @@ fs_visitor::assign_regs()
     for (int i = 0; i < this->virtual_grf_next; i++) {
        int reg = ra_get_node_reg(g, i);
  
-      hw_reg_mapping[i] = first_assigned_grf + ra_reg_to_grf[reg] * reg_width;
+      hw_reg_mapping[i] = (first_assigned_grf +
+                          brw->wm.ra_reg_to_grf[reg] * reg_width);
        this->grf_used = MAX2(this->grf_used,
                             hw_reg_mapping[i] + this->virtual_grf_sizes[i] *
                             reg_width);
@@ -265,7 +274,6 @@ fs_visitor::assign_regs()
     }
  
     ralloc_free(g);
-   ralloc_free(regs);
  
     return true;
  }
@@ -343,6 +351,9 @@ fs_visitor::choose_spill_reg(struct ra_graph *g)
          if (inst->dst.file == GRF)
             no_spill[inst->dst.reg] = true;
          break;
+
+      default:
+        break;
        }
     }