intel/fs/ra: Pull the guts of RA into its own class

author Jason Ekstrand <jason@jlekstrand.net>

Wed, 8 May 2019 01:09:08 +0000 (20:09 -0500)

committer Jason Ekstrand <jason@jlekstrand.net>

Tue, 14 May 2019 17:30:22 +0000 (12:30 -0500)
author Jason Ekstrand <jason@jlekstrand.net>
Wed, 8 May 2019 01:09:08 +0000 (20:09 -0500)
committer Jason Ekstrand <jason@jlekstrand.net>
Tue, 14 May 2019 17:30:22 +0000 (12:30 -0500)
diff --git a/src/intel/compiler/brw_fs.h b/src/intel/compiler/brw_fs.h

index f05a9e0625ac055114ae50fa9d7d1ebbe5e9d209..098725b80d1425a0f4abef33f27b83299c6d6127 100644 (file)
--- a/src/intel/compiler/brw_fs.h
+++ b/src/intel/compiler/brw_fs.h
@@ -117,10 +117,6 @@ public:
     void assign_regs_trivial();
     void calculate_payload_ranges(int payload_node_count,
                                   int *payload_last_use_ip);
-   void setup_payload_interference(struct ra_graph *g, int payload_reg_count,
-                                   int first_payload_node);
-   int choose_spill_reg(struct ra_graph *g);
-   void spill_reg(unsigned spill_reg);
     void split_virtual_grfs();
     bool compact_virtual_grfs();
     void assign_constant_locations();
diff --git a/src/intel/compiler/brw_fs_reg_allocate.cpp b/src/intel/compiler/brw_fs_reg_allocate.cpp

index 0462c77675cebb5d8bfb68822736e9045e54dc17..14a110c326be3b8736adfdc3cd56cf3c6c73f8cb 100644 (file)
--- a/src/intel/compiler/brw_fs_reg_allocate.cpp
+++ b/src/intel/compiler/brw_fs_reg_allocate.cpp
@@ -393,6 +393,44 @@ void fs_visitor::calculate_payload_ranges(int payload_node_count,
     }
  }
  
+class fs_reg_alloc {
+public:
+   fs_reg_alloc(fs_visitor *fs):
+      fs(fs), devinfo(fs->devinfo), compiler(fs->compiler), g(NULL)
+   {
+      mem_ctx = ralloc_context(NULL);
+      int reg_width = fs->dispatch_width / 8;
+      rsi = _mesa_logbase2(reg_width);
+   }
+
+   ~fs_reg_alloc()
+   {
+      ralloc_free(mem_ctx);
+   }
+
+   bool assign_regs(bool allow_spilling, bool spill_all);
+
+private:
+   void setup_payload_interference(int payload_node_count,
+                                   int first_payload_node);
+   void setup_mrf_hack_interference(int first_mrf_node,
+                                    int *first_used_mrf);
+   void build_interference_graph();
+
+   int choose_spill_reg();
+   void spill_reg(unsigned spill_reg);
+
+   void *mem_ctx;
+   fs_visitor *fs;
+   const gen_device_info *devinfo;
+   const brw_compiler *compiler;
+
+   /* Which compiler->fs_reg_sets[] to use */
+   int rsi;
+
+   ra_graph *g;
+};
+
  
  /**
   * Sets up interference between thread payload registers and the virtual GRFs
@@ -412,12 +450,11 @@ void fs_visitor::calculate_payload_ranges(int payload_node_count,
   * (note that in SIMD16, a node is two registers).
   */
  void
-fs_visitor::setup_payload_interference(struct ra_graph *g,
-                                       int payload_node_count,
-                                       int first_payload_node)
+fs_reg_alloc::setup_payload_interference(int payload_node_count,
+                                         int first_payload_node)
  {
     int payload_last_use_ip[payload_node_count];
-   calculate_payload_ranges(payload_node_count, payload_last_use_ip);
+   fs->calculate_payload_ranges(payload_node_count, payload_last_use_ip);
  
     for (int i = 0; i < payload_node_count; i++) {
        if (payload_last_use_ip[i] == -1)
@@ -427,12 +464,12 @@ fs_visitor::setup_payload_interference(struct ra_graph *g,
         * live between the start of the program and our last use of the payload
         * node.
         */
-      for (unsigned j = 0; j < this->alloc.count; j++) {
+      for (unsigned j = 0; j < fs->alloc.count; j++) {
           /* Note that we use a <= comparison, unlike virtual_grf_interferes(),
            * in order to not have to worry about the uniform issue described in
            * calculate_live_intervals().
            */
-         if (this->virtual_grf_start[j] <= payload_last_use_ip[i]) {
+         if (fs->virtual_grf_start[j] <= payload_last_use_ip[i]) {
              ra_add_node_interference(g, first_payload_node + i, j);
           }
        }
@@ -444,7 +481,7 @@ fs_visitor::setup_payload_interference(struct ra_graph *g,
         * The alternative would be to have per-physical-register classes, which
         * would just be silly.
         */
-      if (devinfo->gen <= 5 && dispatch_width >= 16) {
+      if (devinfo->gen <= 5 && fs->dispatch_width >= 16) {
           /* We have to divide by 2 here because we only have even numbered
            * registers.  Some of the payload registers will be odd, but
            * that's ok because their physical register numbers have already
@@ -497,15 +534,15 @@ get_used_mrfs(fs_visitor *v, bool *mrf_used)
   * Sets interference between virtual GRFs and usage of the high GRFs for SEND
   * messages (treated as MRFs in code generation).
   */
-static void
-setup_mrf_hack_interference(fs_visitor *v, struct ra_graph *g,
-                            int first_mrf_node, int *first_used_mrf)
+void
+fs_reg_alloc::setup_mrf_hack_interference(int first_mrf_node,
+                                          int *first_used_mrf)
  {
-   bool mrf_used[BRW_MAX_MRF(v->devinfo->gen)];
-   get_used_mrfs(v, mrf_used);
+   bool mrf_used[BRW_MAX_MRF(fs->devinfo->gen)];
+   get_used_mrfs(fs, mrf_used);
  
-   *first_used_mrf = BRW_MAX_MRF(v->devinfo->gen);
-   for (int i = 0; i < BRW_MAX_MRF(v->devinfo->gen); i++) {
+   *first_used_mrf = BRW_MAX_MRF(devinfo->gen);
+   for (int i = 0; i < BRW_MAX_MRF(devinfo->gen); i++) {
        /* Mark each MRF reg node as being allocated to its physical register.
         *
         * The alternative would be to have per-physical-register classes, which
@@ -520,15 +557,15 @@ setup_mrf_hack_interference(fs_visitor *v, struct ra_graph *g,
           if (i < *first_used_mrf)
              *first_used_mrf = i;
  
-         for (unsigned j = 0; j < v->alloc.count; j++) {
+         for (unsigned j = 0; j < fs->alloc.count; j++) {
              ra_add_node_interference(g, first_mrf_node + i, j);
           }
        }
     }
  }
  
-static ra_graph *
-build_interference_graph(fs_visitor *fs)
+void
+fs_reg_alloc::build_interference_graph()
  {
     const gen_device_info *devinfo = fs->devinfo;
     const brw_compiler *compiler = fs->compiler;
@@ -541,7 +578,7 @@ build_interference_graph(fs_visitor *fs)
      */
     int reg_width = fs->dispatch_width / 8;
     int payload_node_count = ALIGN(fs->first_non_payload_grf, reg_width);
-   int rsi = _mesa_logbase2(reg_width); /* Which compiler->fs_reg_sets[] to use */
+
     fs->calculate_live_intervals();
  
     int node_count = fs->alloc.count;
@@ -553,8 +590,10 @@ build_interference_graph(fs_visitor *fs)
     int grf127_send_hack_node = node_count;
     if (devinfo->gen >= 8)
        node_count ++;
-   struct ra_graph *g =
-      ra_alloc_interference_graph(compiler->fs_reg_sets[rsi].regs, node_count);
+
+   assert(g == NULL);
+   g = ra_alloc_interference_graph(compiler->fs_reg_sets[rsi].regs, node_count);
+   ralloc_steal(mem_ctx, g);
  
     for (unsigned i = 0; i < fs->alloc.count; i++) {
        unsigned size = fs->alloc.sizes[i];
@@ -601,10 +640,10 @@ build_interference_graph(fs_visitor *fs)
        }
     }
  
-   fs->setup_payload_interference(g, payload_node_count, first_payload_node);
+   setup_payload_interference(payload_node_count, first_payload_node);
     if (devinfo->gen >= 7) {
        int first_used_mrf = BRW_MAX_MRF(devinfo->gen);
-      setup_mrf_hack_interference(fs, g, first_mrf_hack_node,
+      setup_mrf_hack_interference(first_mrf_hack_node,
                                    &first_used_mrf);
  
        foreach_block_and_inst(block, fs_inst, inst, fs->cfg) {
@@ -717,8 +756,6 @@ build_interference_graph(fs_visitor *fs)
                                       inst->src[3].nr);
        }
     }
-
-   return g;
  }
  
  namespace {
@@ -816,13 +853,13 @@ emit_spill(const fs_builder &bld, fs_reg src,
  }
  
  int
-fs_visitor::choose_spill_reg(struct ra_graph *g)
+fs_reg_alloc::choose_spill_reg()
  {
     float block_scale = 1.0;
-   float spill_costs[this->alloc.count];
-   bool no_spill[this->alloc.count];
+   float spill_costs[fs->alloc.count];
+   bool no_spill[fs->alloc.count];
  
-   for (unsigned i = 0; i < this->alloc.count; i++) {
+   for (unsigned i = 0; i < fs->alloc.count; i++) {
        spill_costs[i] = 0.0;
        no_spill[i] = false;
     }
@@ -831,7 +868,7 @@ fs_visitor::choose_spill_reg(struct ra_graph *g)
      * spill/unspill we'll have to do, and guess that the insides of
      * loops run 10 times.
      */
-   foreach_block_and_inst(block, fs_inst, inst, cfg) {
+   foreach_block_and_inst(block, fs_inst, inst, fs->cfg) {
        for (unsigned int i = 0; i < inst->sources; i++) {
          if (inst->src[i].file == VGRF)
              spill_costs[inst->src[i].nr] += regs_read(inst, i) * block_scale;
@@ -875,8 +912,8 @@ fs_visitor::choose_spill_reg(struct ra_graph *g)
        }
     }
  
-   for (unsigned i = 0; i < this->alloc.count; i++) {
-      int live_length = virtual_grf_end[i] - virtual_grf_start[i];
+   for (unsigned i = 0; i < fs->alloc.count; i++) {
+      int live_length = fs->virtual_grf_end[i] - fs->virtual_grf_start[i];
        if (live_length <= 0)
           continue;
  
@@ -896,10 +933,10 @@ fs_visitor::choose_spill_reg(struct ra_graph *g)
  }
  
  void
-fs_visitor::spill_reg(unsigned spill_reg)
+fs_reg_alloc::spill_reg(unsigned spill_reg)
  {
-   int size = alloc.sizes[spill_reg];
-   unsigned int spill_offset = last_scratch;
+   int size = fs->alloc.sizes[spill_reg];
+   unsigned int spill_offset = fs->last_scratch;
     assert(ALIGN(spill_offset, 16) == spill_offset); /* oword read/write req. */
  
     /* Spills may use MRFs 13-15 in the SIMD16 case.  Our texturing is done
@@ -909,29 +946,29 @@ fs_visitor::spill_reg(unsigned spill_reg)
      * depth), starting from m1.  In summary: We may not be able to spill in
      * SIMD16 mode, because we'd stomp the FB writes.
      */
-   if (!spilled_any_registers) {
+   if (!fs->spilled_any_registers) {
        bool mrf_used[BRW_MAX_MRF(devinfo->gen)];
-      get_used_mrfs(this, mrf_used);
+      get_used_mrfs(fs, mrf_used);
  
-      for (int i = spill_base_mrf(this); i < BRW_MAX_MRF(devinfo->gen); i++) {
+      for (int i = spill_base_mrf(fs); i < BRW_MAX_MRF(devinfo->gen); i++) {
           if (mrf_used[i]) {
-            fail("Register spilling not supported with m%d used", i);
+            fs->fail("Register spilling not supported with m%d used", i);
            return;
           }
        }
  
-      spilled_any_registers = true;
+      fs->spilled_any_registers = true;
     }
  
-   last_scratch += size * REG_SIZE;
+   fs->last_scratch += size * REG_SIZE;
  
     /* Generate spill/unspill instructions for the objects being
      * spilled.  Right now, we spill or unspill the whole thing to a
      * virtual grf of the same size.  For most instructions, though, we
      * could just spill/unspill the GRF being accessed.
      */
-   foreach_block_and_inst (block, fs_inst, inst, cfg) {
-      const fs_builder ibld = fs_builder(this, block, inst);
+   foreach_block_and_inst (block, fs_inst, inst, fs->cfg) {
+      const fs_builder ibld = fs_builder(fs, block, inst);
  
        for (unsigned int i = 0; i < inst->sources; i++) {
          if (inst->src[i].file == VGRF &&
@@ -939,7 +976,7 @@ fs_visitor::spill_reg(unsigned spill_reg)
              int count = regs_read(inst, i);
              int subset_spill_offset = spill_offset +
                 ROUND_DOWN_TO(inst->src[i].offset, REG_SIZE);
-            fs_reg unspill_dst(VGRF, alloc.allocate(count));
+            fs_reg unspill_dst(VGRF, fs->alloc.allocate(count));
  
              inst->src[i].nr = unspill_dst.nr;
              inst->src[i].offset %= REG_SIZE;
@@ -967,7 +1004,7 @@ fs_visitor::spill_reg(unsigned spill_reg)
            inst->dst.nr == spill_reg) {
           int subset_spill_offset = spill_offset +
              ROUND_DOWN_TO(inst->dst.offset, REG_SIZE);
-         fs_reg spill_src(VGRF, alloc.allocate(regs_written(inst)));
+         fs_reg spill_src(VGRF, fs->alloc.allocate(regs_written(inst)));
  
           inst->dst.nr = spill_src.nr;
           inst->dst.offset %= REG_SIZE;
@@ -989,7 +1026,7 @@ fs_visitor::spill_reg(unsigned spill_reg)
            */
           const unsigned width = 8 * MIN2(
              DIV_ROUND_UP(inst->dst.component_size(inst->exec_size), REG_SIZE),
-            spill_max_size(this));
+            spill_max_size(fs));
  
           /* Spills should only write data initialized by the instruction for
            * whichever channels are enabled in the excution mask.  If that's
@@ -1020,29 +1057,20 @@ fs_visitor::spill_reg(unsigned spill_reg)
        }
     }
  
-   invalidate_live_intervals();
+   fs->invalidate_live_intervals();
  }
  
  bool
-fs_visitor::assign_regs(bool allow_spilling, bool spill_all)
+fs_reg_alloc::assign_regs(bool allow_spilling, bool spill_all)
  {
-   /* Most of this allocation was written for a reg_width of 1
-    * (dispatch_width == 8).  In extending to SIMD16, the code was
-    * left in place and it was converted to have the hardware
-    * registers it's allocating be contiguous physical pairs of regs
-    * for reg_width == 2.
-    */
-   int reg_width = dispatch_width / 8;
-   int rsi = _mesa_logbase2(reg_width); /* Which compiler->fs_reg_sets[] to use */
-   ra_graph *g = build_interference_graph(this);
+   build_interference_graph();
  
     /* Debug of register spilling: Go spill everything. */
     if (unlikely(spill_all)) {
-      int reg = choose_spill_reg(g);
+      int reg = choose_spill_reg();
  
        if (reg != -1) {
           spill_reg(reg);
-         ralloc_free(g);
           return false;
        }
     }
@@ -1051,17 +1079,15 @@ fs_visitor::assign_regs(bool allow_spilling, bool spill_all)
        /* Failed to allocate registers.  Spill a reg, and the caller will
         * loop back into here to try again.
         */
-      int reg = choose_spill_reg(g);
+      int reg = choose_spill_reg();
  
        if (reg == -1) {
-         fail("no register to spill:\n");
-         dump_instructions(NULL);
+         fs->fail("no register to spill:\n");
+         fs->dump_instructions(NULL);
        } else if (allow_spilling) {
           spill_reg(reg);
        }
  
-      ralloc_free(g);
-
        return false;
     }
  
@@ -1069,26 +1095,31 @@ fs_visitor::assign_regs(bool allow_spilling, bool spill_all)
      * regs in the register classes back down to real hardware reg
      * numbers.
      */
-   unsigned hw_reg_mapping[alloc.count];
-   this->grf_used = this->first_non_payload_grf;
-   for (unsigned i = 0; i < this->alloc.count; i++) {
+   unsigned hw_reg_mapping[fs->alloc.count];
+   fs->grf_used = fs->first_non_payload_grf;
+   for (unsigned i = 0; i < fs->alloc.count; i++) {
        int reg = ra_get_node_reg(g, i);
  
        hw_reg_mapping[i] = compiler->fs_reg_sets[rsi].ra_reg_to_grf[reg];
-      this->grf_used = MAX2(this->grf_used,
-                           hw_reg_mapping[i] + this->alloc.sizes[i]);
+      fs->grf_used = MAX2(fs->grf_used,
+                         hw_reg_mapping[i] + fs->alloc.sizes[i]);
     }
  
-   foreach_block_and_inst(block, fs_inst, inst, cfg) {
+   foreach_block_and_inst(block, fs_inst, inst, fs->cfg) {
        assign_reg(hw_reg_mapping, &inst->dst);
        for (int i = 0; i < inst->sources; i++) {
           assign_reg(hw_reg_mapping, &inst->src[i]);
        }
     }
  
-   this->alloc.count = this->grf_used;
-
-   ralloc_free(g);
+   fs->alloc.count = fs->grf_used;
  
     return true;
  }
+
+bool
+fs_visitor::assign_regs(bool allow_spilling, bool spill_all)
+{
+   fs_reg_alloc alloc(this);
+   return alloc.assign_regs(allow_spilling, spill_all);
+}
author	Jason Ekstrand <jason@jlekstrand.net>
	Wed, 8 May 2019 01:09:08 +0000 (20:09 -0500)
committer	Jason Ekstrand <jason@jlekstrand.net>
	Tue, 14 May 2019 17:30:22 +0000 (12:30 -0500)
src/intel/compiler/brw_fs.h		patch \| blob \| history
src/intel/compiler/brw_fs_reg_allocate.cpp		patch \| blob \| history