intel/fs: Add a generic SEND opcode

[mesa.git] / src / intel / compiler / brw_fs_reg_allocate.cpp
diff --git a/src/intel/compiler/brw_fs_reg_allocate.cpp b/src/intel/compiler/brw_fs_reg_allocate.cpp

index c981d72e4f2e664b7cecda4d7072a84c807b2e3c..5db5242452ea9d23480043723a6be3c878d3b0e4 100644 (file)
--- a/src/intel/compiler/brw_fs_reg_allocate.cpp
+++ b/src/intel/compiler/brw_fs_reg_allocate.cpp
@@ -548,6 +548,9 @@ fs_visitor::assign_regs(bool allow_spilling, bool spill_all)
     int first_mrf_hack_node = node_count;
     if (devinfo->gen >= 7)
        node_count += BRW_MAX_GRF - GEN7_MRF_HACK_START;
+   int grf127_send_hack_node = node_count;
+   if (devinfo->gen >= 8)
+      node_count ++;
     struct ra_graph *g =
        ra_alloc_interference_graph(compiler->fs_reg_sets[rsi].regs, node_count);
  
@@ -614,7 +617,9 @@ fs_visitor::assign_regs(bool allow_spilling, bool spill_all)
            * highest register that works.
            */
           if (inst->eot) {
-            int size = alloc.sizes[inst->src[0].nr];
+            const int vgrf = inst->opcode == SHADER_OPCODE_SEND ?
+                             inst->src[2].nr : inst->src[0].nr;
+            int size = alloc.sizes[vgrf];
              int reg = compiler->fs_reg_sets[rsi].class_to_ra_reg_range[size] - 1;
  
              /* If something happened to spill, we want to push the EOT send
@@ -623,32 +628,69 @@ fs_visitor::assign_regs(bool allow_spilling, bool spill_all)
               */
              reg -= BRW_MAX_MRF(devinfo->gen) - first_used_mrf;
  
-            ra_set_node_reg(g, inst->src[0].nr, reg);
+            ra_set_node_reg(g, vgrf, reg);
              break;
           }
        }
     }
  
-   if (dispatch_width > 8) {
-      /* In 16-wide dispatch we have an issue where a compressed
-       * instruction is actually two instructions executed simultaneiously.
-       * It's actually ok to have the source and destination registers be
-       * the same.  In this case, each instruction over-writes its own
-       * source and there's no problem.  The real problem here is if the
-       * source and destination registers are off by one.  Then you can end
-       * up in a scenario where the first instruction over-writes the
-       * source of the second instruction.  Since the compiler doesn't know
-       * about this level of granularity, we simply make the source and
-       * destination interfere.
+   /* In 16-wide instructions we have an issue where a compressed
+    * instruction is actually two instructions executed simultaneously.
+    * It's actually ok to have the source and destination registers be
+    * the same.  In this case, each instruction over-writes its own
+    * source and there's no problem.  The real problem here is if the
+    * source and destination registers are off by one.  Then you can end
+    * up in a scenario where the first instruction over-writes the
+    * source of the second instruction.  Since the compiler doesn't know
+    * about this level of granularity, we simply make the source and
+    * destination interfere.
+    */
+   foreach_block_and_inst(block, fs_inst, inst, cfg) {
+      if (inst->exec_size < 16 || inst->dst.file != VGRF)
+         continue;
+
+      for (int i = 0; i < inst->sources; ++i) {
+         if (inst->src[i].file == VGRF) {
+            ra_add_node_interference(g, inst->dst.nr, inst->src[i].nr);
+         }
+      }
+   }
+
+   if (devinfo->gen >= 8) {
+      /* At Intel Broadwell PRM, vol 07, section "Instruction Set Reference",
+       * subsection "EUISA Instructions", Send Message (page 990):
+       *
+       * "r127 must not be used for return address when there is a src and
+       * dest overlap in send instruction."
+       *
+       * We are avoiding using grf127 as part of the destination of send
+       * messages adding a node interference to the grf127_send_hack_node.
+       * This node has a fixed asignment to grf127.
+       *
+       * We don't apply it to SIMD16 because previous code avoids any register
+       * overlap between sources and destination.
         */
-      foreach_block_and_inst(block, fs_inst, inst, cfg) {
-         if (inst->dst.file != VGRF)
-            continue;
+      ra_set_node_reg(g, grf127_send_hack_node, 127);
+      if (dispatch_width == 8) {
+         foreach_block_and_inst(block, fs_inst, inst, cfg) {
+            if (inst->is_send_from_grf() && inst->dst.file == VGRF)
+               ra_add_node_interference(g, inst->dst.nr, grf127_send_hack_node);
+         }
+      }
  
-         for (int i = 0; i < inst->sources; ++i) {
-            if (inst->src[i].file == VGRF) {
-               ra_add_node_interference(g, inst->dst.nr, inst->src[i].nr);
-            }
+      if (spilled_any_registers) {
+         foreach_block_and_inst(block, fs_inst, inst, cfg) {
+            /* Spilling instruction are genereated as SEND messages from MRF
+             * but as Gen7+ supports sending from GRF the driver will maps
+             * assingn these MRF registers to a GRF. Implementations reuses
+             * the dest of the send message as source. So as we will have an
+             * overlap for sure, we create an interference between destination
+             * and grf127.
+             */
+            if ((inst->opcode == SHADER_OPCODE_GEN7_SCRATCH_READ ||
+                 inst->opcode == SHADER_OPCODE_GEN4_SCRATCH_READ) &&
+                inst->dst.file == VGRF)
+               ra_add_node_interference(g, inst->dst.nr, grf127_send_hack_node);
           }
        }
     }
@@ -822,12 +864,11 @@ fs_visitor::choose_spill_reg(struct ra_graph *g)
     foreach_block_and_inst(block, fs_inst, inst, cfg) {
        for (unsigned int i = 0; i < inst->sources; i++) {
          if (inst->src[i].file == VGRF)
-            spill_costs[inst->src[i].nr] += block_scale;
+            spill_costs[inst->src[i].nr] += regs_read(inst, i) * block_scale;
        }
  
        if (inst->dst.file == VGRF)
-         spill_costs[inst->dst.nr] += DIV_ROUND_UP(inst->size_written, REG_SIZE)
-                                      * block_scale;
+         spill_costs[inst->dst.nr] += regs_written(inst) * block_scale;
  
        switch (inst->opcode) {
  
@@ -873,7 +914,7 @@ fs_visitor::choose_spill_reg(struct ra_graph *g)
  }
  
  void
-fs_visitor::spill_reg(int spill_reg)
+fs_visitor::spill_reg(unsigned spill_reg)
  {
     int size = alloc.sizes[spill_reg];
     unsigned int spill_offset = last_scratch;