vc4: Actually allow math results to allocate into r4.
authorEric Anholt <eric@anholt.net>
Fri, 21 Aug 2015 17:57:24 +0000 (10:57 -0700)
committerEric Anholt <eric@anholt.net>
Fri, 21 Aug 2015 20:29:26 +0000 (13:29 -0700)
I switched us to tracking whether the results *could* go to r4, but then
didn't make a separate register class for the class bits that included r4.
Switch the "any" class to actually be "any", and name the "any but r4"
class more appropriately.

total instructions in shared programs: 96798 -> 94680 (-2.19%)
instructions in affected programs:     62736 -> 60618 (-3.38%)

src/gallium/drivers/vc4/vc4_context.h
src/gallium/drivers/vc4/vc4_register_allocate.c

index 654c46f3c0df6fea40135bb4736003c3b3b1bbe9..3a63af8f2b00c503f40142fdc3036defc7ced118 100644 (file)
@@ -270,6 +270,7 @@ struct vc4_context {
 
         struct ra_regs *regs;
         unsigned int reg_class_any;
+        unsigned int reg_class_a_or_b_or_acc;
         unsigned int reg_class_r4_or_a;
         unsigned int reg_class_a;
 
index 2ea88500227696443bfc35d742404422c7764cdd..3ced50f3a4425b2f33cbb6ac506cd820afb892c8 100644 (file)
@@ -116,6 +116,7 @@ vc4_alloc_reg_set(struct vc4_context *vc4)
         vc4->regs = ra_alloc_reg_set(vc4, ARRAY_SIZE(vc4_regs), true);
 
         vc4->reg_class_any = ra_alloc_reg_class(vc4->regs);
+        vc4->reg_class_a_or_b_or_acc = ra_alloc_reg_class(vc4->regs);
         vc4->reg_class_r4_or_a = ra_alloc_reg_class(vc4->regs);
         vc4->reg_class_a = ra_alloc_reg_class(vc4->regs);
         for (uint32_t i = 0; i < ARRAY_SIZE(vc4_regs); i++) {
@@ -130,10 +131,12 @@ vc4_alloc_reg_set(struct vc4_context *vc4)
                  */
                 if (vc4_regs[i].mux == QPU_MUX_R4) {
                         ra_class_add_reg(vc4->regs, vc4->reg_class_r4_or_a, i);
+                        ra_class_add_reg(vc4->regs, vc4->reg_class_any, i);
                         continue;
                 }
 
                 ra_class_add_reg(vc4->regs, vc4->reg_class_any, i);
+                ra_class_add_reg(vc4->regs, vc4->reg_class_a_or_b_or_acc, i);
         }
 
         for (uint32_t i = AB_INDEX; i < AB_INDEX + 64; i += 2) {
@@ -304,9 +307,11 @@ vc4_register_allocate(struct vc4_context *vc4, struct vc4_compile *c)
 
                 switch (class_bits[i]) {
                 case CLASS_BIT_A | CLASS_BIT_B_OR_ACC | CLASS_BIT_R4:
-                case CLASS_BIT_A | CLASS_BIT_B_OR_ACC:
                         ra_set_node_class(g, node, vc4->reg_class_any);
                         break;
+                case CLASS_BIT_A | CLASS_BIT_B_OR_ACC:
+                        ra_set_node_class(g, node, vc4->reg_class_a_or_b_or_acc);
+                        break;
                 case CLASS_BIT_A | CLASS_BIT_R4:
                         ra_set_node_class(g, node, vc4->reg_class_r4_or_a);
                         break;