ir3: Plumb through bindless support
[mesa.git] / src / freedreno / ir3 / ir3_delay.c
index 506e29693261eb90de0334df73084f8a3287e2e7..3fc4d911f1454009e61bde8fccbfdb49677bb832 100644 (file)
@@ -69,7 +69,7 @@ ignore_dep(struct ir3_instruction *assigner,
  */
 int
 ir3_delayslots(struct ir3_instruction *assigner,
-               struct ir3_instruction *consumer, unsigned n)
+               struct ir3_instruction *consumer, unsigned n, bool soft)
 {
        if (ignore_dep(assigner, consumer, n))
                return 0;
@@ -82,9 +82,23 @@ ir3_delayslots(struct ir3_instruction *assigner,
        if (is_meta(assigner) || is_meta(consumer))
                return 0;
 
-       if (writes_addr(assigner))
+       if (writes_addr0(assigner) || writes_addr1(assigner))
                return 6;
 
+       /* On a6xx, it takes the number of delay slots to get a SFU result
+        * back (ie. using nop's instead of (ss) is:
+        *
+        *     8 - single warp
+        *     9 - two warps
+        *    10 - four warps
+        *
+        * and so on.  Not quite sure where it tapers out (ie. how many
+        * warps share an SFU unit).  But 10 seems like a reasonable #
+        * to choose:
+        */
+       if (soft && is_sfu(assigner))
+               return 10;
+
        /* handled via sync flags: */
        if (is_sfu(assigner) || is_tex(assigner) || is_mem(assigner))
                return 0;
@@ -126,8 +140,8 @@ count_instruction(struct ir3_instruction *n)
  *    find the worst case (shortest) distance (only possible after
  *    individual blocks are all scheduled)
  */
-unsigned
-ir3_distance(struct ir3_block *block, struct ir3_instruction *instr,
+static unsigned
+distance(struct ir3_block *block, struct ir3_instruction *instr,
                unsigned maxd, bool pred)
 {
        unsigned d = 0;
@@ -162,7 +176,7 @@ ir3_distance(struct ir3_block *block, struct ir3_instruction *instr,
                        struct ir3_block *pred = (struct ir3_block *)entry->key;
                        unsigned n;
 
-                       n = ir3_distance(pred, instr, min, pred);
+                       n = distance(pred, instr, min, pred);
 
                        min = MIN2(min, n);
                }
@@ -195,16 +209,8 @@ delay_calc_srcn(struct ir3_block *block,
                        delay = MAX2(delay, d);
                }
        } else {
-               if (soft) {
-                       if (is_sfu(assigner)) {
-                               delay = 4;
-                       } else {
-                               delay = ir3_delayslots(assigner, consumer, srcn);
-                       }
-               } else {
-                       delay = ir3_delayslots(assigner, consumer, srcn);
-               }
-               delay -= ir3_distance(block, assigner, delay, pred);
+               delay = ir3_delayslots(assigner, consumer, srcn, soft);
+               delay -= distance(block, assigner, delay, pred);
        }
 
        return delay;
@@ -335,3 +341,24 @@ ir3_delay_calc(struct ir3_block *block, struct ir3_instruction *instr,
 
        return delay;
 }
+
+/**
+ * Remove nop instructions.  The scheduler can insert placeholder nop's
+ * so that ir3_delay_calc() can account for nop's that won't be needed
+ * due to nop's triggered by a previous instruction.  However, before
+ * legalize, we want to remove these.  The legalize pass can insert
+ * some nop's if needed to hold (for example) sync flags.  This final
+ * remaining nops are inserted by legalize after this.
+ */
+void
+ir3_remove_nops(struct ir3 *ir)
+{
+       foreach_block (block, &ir->block_list) {
+               foreach_instr_safe (instr, &block->instr_list) {
+                       if (instr->opc == OPC_NOP) {
+                               list_del(&instr->node);
+                       }
+               }
+       }
+
+}