vc4: Allow unpack_8[abcd]_f's src to stay in r4.
authorEric Anholt <eric@anholt.net>
Wed, 19 Aug 2015 05:07:47 +0000 (22:07 -0700)
committerEric Anholt <eric@anholt.net>
Fri, 21 Aug 2015 06:43:04 +0000 (23:43 -0700)
I had QPU emit code to do it, but forgot to flag the register class.

total instructions in shared programs: 97974 -> 97590 (-0.39%)
instructions in affected programs:     25291 -> 24907 (-1.52%)

src/gallium/drivers/vc4/vc4_register_allocate.c

index 58d812bce8a58bfedb84a90cce31d91a7daca1cc..93013147d49080f9009ff2d438020bbbbdf707b2 100644 (file)
@@ -279,7 +279,21 @@ vc4_register_allocate(struct vc4_context *vc4, struct vc4_compile *c)
                 }
 
                 if (qir_src_needs_a_file(inst)) {
-                        class_bits[inst->src[0].index] &= CLASS_BIT_A;
+                        switch (inst->op) {
+                        case QOP_UNPACK_8A_F:
+                        case QOP_UNPACK_8B_F:
+                        case QOP_UNPACK_8C_F:
+                        case QOP_UNPACK_8D_F:
+                                /* Special case: these can be done as R4
+                                 * unpacks, as well.
+                                 */
+                                class_bits[inst->src[0].index] &= (CLASS_BIT_A |
+                                                                   CLASS_BIT_R4);
+                                break;
+                        default:
+                                class_bits[inst->src[0].index] &= CLASS_BIT_A;
+                                break;
+                        }
                 }
                 ip++;
         }