i965/fs: Add support for 16-wide dispatch to the register allocator.
[mesa.git] / src / mesa / drivers / dri / i965 / brw_fs_reg_allocate.cpp
index 078a349abdf1466c7d397a72c6f89264f91b86d8..1e2cf9171163034e01663be7e431c78954676156 100644 (file)
@@ -41,7 +41,6 @@ extern "C" {
 #include "brw_context.h"
 #include "brw_eu.h"
 #include "brw_wm.h"
-#include "talloc.h"
 }
 #include "brw_fs.h"
 #include "../glsl/glsl_types.h"
@@ -49,11 +48,11 @@ extern "C" {
 #include "../glsl/ir_print_visitor.h"
 
 static void
-assign_reg(int *reg_hw_locations, fs_reg *reg)
+assign_reg(int *reg_hw_locations, fs_reg *reg, int reg_width)
 {
    if (reg->file == GRF && reg->reg != 0) {
       assert(reg->reg_offset >= 0);
-      reg->hw_reg = reg_hw_locations[reg->reg] + reg->reg_offset;
+      reg->hw_reg = reg_hw_locations[reg->reg] + reg->reg_offset * reg_width;
       reg->reg = 0;
    }
 }
@@ -64,32 +63,48 @@ fs_visitor::assign_regs_trivial()
    int last_grf = 0;
    int hw_reg_mapping[this->virtual_grf_next];
    int i;
+   int reg_width = c->dispatch_width / 8;
 
    hw_reg_mapping[0] = 0;
-   hw_reg_mapping[1] = this->first_non_payload_grf;
+   /* Note that compressed instructions require alignment to 2 registers. */
+   hw_reg_mapping[1] = ALIGN(this->first_non_payload_grf, reg_width);
    for (i = 2; i < this->virtual_grf_next; i++) {
       hw_reg_mapping[i] = (hw_reg_mapping[i - 1] +
-                          this->virtual_grf_sizes[i - 1]);
+                          this->virtual_grf_sizes[i - 1] * reg_width);
    }
-   last_grf = hw_reg_mapping[i - 1] + this->virtual_grf_sizes[i - 1];
+   last_grf = hw_reg_mapping[i - 1] + (this->virtual_grf_sizes[i - 1] *
+                                      reg_width);
 
    foreach_iter(exec_list_iterator, iter, this->instructions) {
       fs_inst *inst = (fs_inst *)iter.get();
 
-      assign_reg(hw_reg_mapping, &inst->dst);
-      assign_reg(hw_reg_mapping, &inst->src[0]);
-      assign_reg(hw_reg_mapping, &inst->src[1]);
+      assign_reg(hw_reg_mapping, &inst->dst, reg_width);
+      assign_reg(hw_reg_mapping, &inst->src[0], reg_width);
+      assign_reg(hw_reg_mapping, &inst->src[1], reg_width);
    }
 
-   this->grf_used = last_grf + 1;
+   if (last_grf >= BRW_MAX_GRF) {
+      fail("Ran out of regs on trivial allocator (%d/%d)\n",
+          last_grf, BRW_MAX_GRF);
+   }
+
+   this->grf_used = last_grf + reg_width;
 }
 
 bool
 fs_visitor::assign_regs()
 {
+   /* Most of this allocation was written for a reg_width of 1
+    * (dispatch_width == 8).  In extending to 16-wide, the code was
+    * left in place and it was converted to have the hardware
+    * registers it's allocating be contiguous physical pairs of regs
+    * for reg_width == 2.
+    */
+   int reg_width = c->dispatch_width / 8;
    int last_grf = 0;
    int hw_reg_mapping[this->virtual_grf_next + 1];
-   int base_reg_count = BRW_MAX_GRF - this->first_non_payload_grf;
+   int first_assigned_grf = ALIGN(this->first_non_payload_grf, reg_width);
+   int base_reg_count = (BRW_MAX_GRF - first_assigned_grf) / reg_width;
    int class_sizes[base_reg_count];
    int class_count = 0;
    int aligned_pair_class = -1;
@@ -120,8 +135,7 @@ fs_visitor::assign_regs()
       }
       if (i == class_count) {
         if (this->virtual_grf_sizes[r] >= base_reg_count) {
-           fprintf(stderr, "Object too large to register allocate.\n");
-           this->fail = true;
+           fail("Object too large to register allocate.\n");
         }
 
         class_sizes[class_count++] = this->virtual_grf_sizes[r];
@@ -159,8 +173,8 @@ fs_visitor::assign_regs()
 
               if (0) {
                  printf("%d/%d conflicts %d/%d\n",
-                        class_sizes[i], this->first_non_payload_grf + i_r,
-                        class_sizes[c], this->first_non_payload_grf + c_r);
+                        class_sizes[i], first_assigned_grf + i_r,
+                        class_sizes[c], first_assigned_grf + c_r);
               }
 
               ra_add_reg_conflict(regs,
@@ -174,7 +188,7 @@ fs_visitor::assign_regs()
    /* Add a special class for aligned pairs, which we'll put delta_x/y
     * in on gen5 so that we can do PLN.
     */
-   if (brw->has_pln && intel->gen < 6) {
+   if (brw->has_pln && reg_width == 1 && intel->gen < 6) {
       int reg_count = (base_reg_count - 1) / 2;
       int unaligned_pair_class = 1;
       assert(class_sizes[unaligned_pair_class] == 2);
@@ -184,7 +198,7 @@ fs_visitor::assign_regs()
       class_sizes[aligned_pair_class] = 2;
       class_base_reg[aligned_pair_class] = 0;
       class_reg_count[aligned_pair_class] = 0;
-      int start = (this->first_non_payload_grf & 1) ? 1 : 0;
+      int start = (first_assigned_grf & 1) ? 1 : 0;
 
       for (int i = 0; i < reg_count; i++) {
         ra_class_add_reg(regs, classes[aligned_pair_class],
@@ -227,15 +241,18 @@ fs_visitor::assign_regs()
        * loop back into here to try again.
        */
       int reg = choose_spill_reg(g);
-      if (reg == -1 || intel->gen >= 6) {
-        this->fail = true;
+
+      if (reg == -1) {
+        fail("no register to spill\n");
+      } else if (c->dispatch_width == 16) {
+        fail("no spilling support on 16-wide yet\n");
       } else {
         spill_reg(reg);
       }
 
 
-      talloc_free(g);
-      talloc_free(regs);
+      ralloc_free(g);
+      ralloc_free(regs);
 
       return false;
    }
@@ -258,7 +275,7 @@ fs_visitor::assign_regs()
       }
 
       assert(hw_reg >= 0);
-      hw_reg_mapping[i] = this->first_non_payload_grf + hw_reg;
+      hw_reg_mapping[i] = first_assigned_grf + hw_reg * reg_width;
       last_grf = MAX2(last_grf,
                      hw_reg_mapping[i] + this->virtual_grf_sizes[i] - 1);
    }
@@ -266,15 +283,15 @@ fs_visitor::assign_regs()
    foreach_iter(exec_list_iterator, iter, this->instructions) {
       fs_inst *inst = (fs_inst *)iter.get();
 
-      assign_reg(hw_reg_mapping, &inst->dst);
-      assign_reg(hw_reg_mapping, &inst->src[0]);
-      assign_reg(hw_reg_mapping, &inst->src[1]);
+      assign_reg(hw_reg_mapping, &inst->dst, reg_width);
+      assign_reg(hw_reg_mapping, &inst->src[0], reg_width);
+      assign_reg(hw_reg_mapping, &inst->src[1], reg_width);
    }
 
-   this->grf_used = last_grf + 1;
+   this->grf_used = last_grf + reg_width;
 
-   talloc_free(g);
-   talloc_free(regs);
+   ralloc_free(g);
+   ralloc_free(regs);
 
    return true;
 }