freedreno/ir3/ra: fix half-class conflicts
authorRob Clark <robdclark@gmail.com>
Sat, 16 Mar 2019 14:03:12 +0000 (10:03 -0400)
committerRob Clark <robdclark@gmail.com>
Thu, 21 Mar 2019 13:13:05 +0000 (09:13 -0400)
On a6xx, half-regs conflict with full-regs.  But we were only setting up
conflicts for the first class (ie. scalar, but not hvec2/hvec3/hvec4),
resulting in higher half-reg classes getting assigned to regs that
overwrite full-regs.

Noticed while trying to enable indirect-sampler (sam.s2en) which uses an
hvec2 argument to pass the sampler/tex index.

Signed-off-by: Rob Clark <robdclark@gmail.com>
src/freedreno/ir3/ir3_ra.c

index a9e1894a3504ae997411e9ffe86ecb000db631b4..46d3e7e904447e8fe65c1bd97f8c102213d657f4 100644 (file)
@@ -286,13 +286,20 @@ ir3_ra_alloc_reg_set(struct ir3_compiler *compiler)
                /* because of transitivity, we can get away with just setting up
                 * conflicts between the first class of full and half regs:
                 */
-               for (unsigned j = 0; j < CLASS_REGS(0) / 2; j++) {
-                       unsigned freg  = set->gpr_to_ra_reg[0][j];
-                       unsigned hreg0 = set->gpr_to_ra_reg[HALF_OFFSET][(j * 2) + 0];
-                       unsigned hreg1 = set->gpr_to_ra_reg[HALF_OFFSET][(j * 2) + 1];
-
-                       ra_add_transitive_reg_conflict(set->regs, freg, hreg0);
-                       ra_add_transitive_reg_conflict(set->regs, freg, hreg1);
+               for (unsigned i = 0; i < half_class_count; i++) {
+                       /* NOTE there are fewer half class sizes, but they match the
+                        * first N full class sizes.. but assert in case that ever
+                        * accidentially changes:
+                        */
+                       debug_assert(class_sizes[i] == half_class_sizes[i]);
+                       for (unsigned j = 0; j < CLASS_REGS(i) / 2; j++) {
+                               unsigned freg  = set->gpr_to_ra_reg[i][j];
+                               unsigned hreg0 = set->gpr_to_ra_reg[i + HALF_OFFSET][(j * 2) + 0];
+                               unsigned hreg1 = set->gpr_to_ra_reg[i + HALF_OFFSET][(j * 2) + 1];
+
+                               ra_add_transitive_reg_conflict(set->regs, freg, hreg0);
+                               ra_add_transitive_reg_conflict(set->regs, freg, hreg1);
+                       }
                }
 
                // TODO also need to update q_values, but for now: