freedreno/ir3: update SFU delay

author Rob Clark <robdclark@chromium.org>

Tue, 25 Feb 2020 18:44:26 +0000 (10:44 -0800)

committer Marge Bot <eric+marge@anholt.net>

Fri, 28 Feb 2020 16:53:41 +0000 (16:53 +0000)
author Rob Clark <robdclark@chromium.org>
Tue, 25 Feb 2020 18:44:26 +0000 (10:44 -0800)
committer Marge Bot <eric+marge@anholt.net>
Fri, 28 Feb 2020 16:53:41 +0000 (16:53 +0000)
diff --git a/src/freedreno/ir3/ir3.h b/src/freedreno/ir3/ir3.h

index 21fd8c602b98561839a1b46be4a323835d4b91c9..b66d8e2d6fd14897e2d05f9d0a352b49740e51ac 100644 (file)
--- a/src/freedreno/ir3/ir3.h
+++ b/src/freedreno/ir3/ir3.h
@@ -1157,7 +1157,7 @@ void ir3_print_instr(struct ir3_instruction *instr);
  
  /* delay calculation: */
  int ir3_delayslots(struct ir3_instruction *assigner,
-               struct ir3_instruction *consumer, unsigned n);
+               struct ir3_instruction *consumer, unsigned n, bool soft);
  unsigned ir3_delay_calc(struct ir3_block *block, struct ir3_instruction *instr,
                 bool soft, bool pred);
  void ir3_remove_nops(struct ir3 *ir);
diff --git a/src/freedreno/ir3/ir3_delay.c b/src/freedreno/ir3/ir3_delay.c

index 0b796a4183a9b2f945a16b950d3cf676edd7ee2b..5839128a4c63ef4b838d77398a29d510fe89d589 100644 (file)
--- a/src/freedreno/ir3/ir3_delay.c
+++ b/src/freedreno/ir3/ir3_delay.c
@@ -69,7 +69,7 @@ ignore_dep(struct ir3_instruction *assigner,
   */
  int
  ir3_delayslots(struct ir3_instruction *assigner,
-               struct ir3_instruction *consumer, unsigned n)
+               struct ir3_instruction *consumer, unsigned n, bool soft)
  {
         if (ignore_dep(assigner, consumer, n))
                 return 0;
@@ -85,6 +85,20 @@ ir3_delayslots(struct ir3_instruction *assigner,
         if (writes_addr(assigner))
                 return 6;
  
+       /* On a6xx, it takes the number of delay slots to get a SFU result
+        * back (ie. using nop's instead of (ss) is:
+        *
+        *     8 - single warp
+        *     9 - two warps
+        *    10 - four warps
+        *
+        * and so on.  Not quite sure where it tapers out (ie. how many
+        * warps share an SFU unit).  But 10 seems like a reasonable #
+        * to choose:
+        */
+       if (soft && is_sfu(assigner))
+               return 10;
+
         /* handled via sync flags: */
         if (is_sfu(assigner) || is_tex(assigner) || is_mem(assigner))
                 return 0;
@@ -195,15 +209,7 @@ delay_calc_srcn(struct ir3_block *block,
                         delay = MAX2(delay, d);
                 }
         } else {
-               if (soft) {
-                       if (is_sfu(assigner)) {
-                               delay = 4;
-                       } else {
-                               delay = ir3_delayslots(assigner, consumer, srcn);
-                       }
-               } else {
-                       delay = ir3_delayslots(assigner, consumer, srcn);
-               }
+               delay = ir3_delayslots(assigner, consumer, srcn, soft);
                 delay -= distance(block, assigner, delay, pred);
         }
  
diff --git a/src/freedreno/ir3/ir3_depth.c b/src/freedreno/ir3/ir3_depth.c

index 135d4365d2ea5b3123705480e65ff093cf32a675..6bb946871e5b49d47d6059f12b3ca660a4db034a 100644 (file)
--- a/src/freedreno/ir3/ir3_depth.c
+++ b/src/freedreno/ir3/ir3_depth.c
@@ -89,7 +89,7 @@ ir3_instr_depth(struct ir3_instruction *instr, unsigned boost, bool falsedep)
                 if (i == 0)
                         continue;
  
-               sd = ir3_delayslots(src, instr, i) + src->depth;
+               sd = ir3_delayslots(src, instr, i, true) + src->depth;
                 sd += boost;
  
                 instr->depth = MAX2(instr->depth, sd);
diff --git a/src/freedreno/ir3/ir3_postsched.c b/src/freedreno/ir3/ir3_postsched.c

index 4290e8822499544057408090a5d65183935a1cc2..47a8e52fdeb884a5c657a7f4621bd5360aaee24b 100644 (file)
--- a/src/freedreno/ir3/ir3_postsched.c
+++ b/src/freedreno/ir3/ir3_postsched.c
@@ -380,7 +380,7 @@ calculate_deps(struct ir3_postsched_deps_state *state,
  
                                 struct ir3_postsched_node *dep = dep_reg(state, reg->num + b);
                                 if (dep && (state->direction == F)) {
-                                       unsigned d = ir3_delayslots(dep->instr, node->instr, i);
+                                       unsigned d = ir3_delayslots(dep->instr, node->instr, i, true);
                                         node->delay = MAX2(node->delay, d);
                                 }
                         }
author	Rob Clark <robdclark@chromium.org>
	Tue, 25 Feb 2020 18:44:26 +0000 (10:44 -0800)
committer	Marge Bot <eric+marge@anholt.net>
	Fri, 28 Feb 2020 16:53:41 +0000 (16:53 +0000)
src/freedreno/ir3/ir3.h		patch \| blob \| history
src/freedreno/ir3/ir3_delay.c		patch \| blob \| history
src/freedreno/ir3/ir3_depth.c		patch \| blob \| history
src/freedreno/ir3/ir3_postsched.c		patch \| blob \| history