vc4: Try to schedule QIR instructions between writing to and reading math.
authorEric Anholt <eric@anholt.net>
Wed, 30 Nov 2016 20:19:38 +0000 (12:19 -0800)
committerEric Anholt <eric@anholt.net>
Thu, 1 Dec 2016 03:58:09 +0000 (19:58 -0800)
This helps us get the delay slots between SFU writes and reads filled.

total instructions in shared programs: 94494 -> 93970 (-0.55%)
instructions in affected programs:     59206 -> 58682 (-0.89%)

3DMMES performance +1.89967% +/- 0.157611% (n=10,9)

src/gallium/drivers/vc4/vc4_qir_schedule.c

index ea48a8583377996a55322ba4eef680134355505f..89e6d1d0d60d59f96a1df3e491b87db28cbcc93c 100644 (file)
@@ -569,6 +569,28 @@ latency_between(struct schedule_node *before, struct schedule_node *after)
             after->inst->op == QOP_TEX_RESULT)
                 return 100;
 
+        switch (before->inst->op) {
+        case QOP_RCP:
+        case QOP_RSQ:
+        case QOP_EXP2:
+        case QOP_LOG2:
+                for (int i = 0; i < qir_get_nsrc(after->inst); i++) {
+                        if (after->inst->src[i].file ==
+                            before->inst->dst.file &&
+                            after->inst->src[i].index ==
+                            before->inst->dst.index) {
+                                /* There are two QPU delay slots before we can
+                                 * read a math result, which could be up to 4
+                                 * QIR instructions if they packed well.
+                                 */
+                                return 4;
+                        }
+                }
+                break;
+        default:
+                break;
+        }
+
         return 1;
 }