freedreno/ir3: add support for a6xx 'merged' register set
authorRob Clark <robdclark@gmail.com>
Sat, 11 Aug 2018 14:30:38 +0000 (10:30 -0400)
committerRob Clark <robdclark@gmail.com>
Tue, 14 Aug 2018 21:59:02 +0000 (17:59 -0400)
Starting with a6xx, half and full precision registers conflict.  Which
makes things a bit more efficient, ie. if some parts of the shader are
heavy on half-precision and others on full precision, you don't have to
allocate the worst case for both.  But it means we need to setup some
additional conflicts.

Signed-off-by: Rob Clark <robdclark@gmail.com>
src/gallium/drivers/freedreno/ir3/ir3.c
src/gallium/drivers/freedreno/ir3/ir3_ra.c

index d1a73ddc727c494532ea2027114d7a6c009de02c..3f1216bdaa73ed6c0637c7babd08da232618262c 100644 (file)
@@ -115,7 +115,12 @@ static uint32_t reg(struct ir3_register *reg, struct ir3_info *info,
                        /* ignore writes to dummy register r63.x */
                } else if (max < 48) {
                        if (reg->flags & IR3_REG_HALF) {
-                               info->max_half_reg = MAX2(info->max_half_reg, max);
+                               if (info->gpu_id >= 600) {
+                                       /* starting w/ a6xx, half regs conflict with full regs: */
+                                       info->max_reg = MAX2(info->max_reg, (max+1)/2);
+                               } else {
+                                       info->max_half_reg = MAX2(info->max_half_reg, max);
+                               }
                        } else {
                                info->max_reg = MAX2(info->max_reg, max);
                        }
index a1c048c4a15351c2690006664c765e888e70af65..f6605443e5bd08ecf4d749e60df10d930831ee24 100644 (file)
@@ -285,8 +285,25 @@ ir3_ra_alloc_reg_set(struct ir3_compiler *compiler)
                }
        }
 
+       /* starting a6xx, half precision regs conflict w/ full precision regs: */
+       if (compiler->gpu_id >= 600) {
+               /* because of transitivity, we can get away with just setting up
+                * conflicts between the first class of full and half regs:
+                */
+               for (unsigned j = 0; j < CLASS_REGS(0) / 2; j++) {
+                       unsigned freg  = set->gpr_to_ra_reg[0][j];
+                       unsigned hreg0 = set->gpr_to_ra_reg[HALF_OFFSET][(j * 2) + 0];
+                       unsigned hreg1 = set->gpr_to_ra_reg[HALF_OFFSET][(j * 2) + 1];
+
+                       ra_add_transitive_reg_conflict(set->regs, freg, hreg0);
+                       ra_add_transitive_reg_conflict(set->regs, freg, hreg1);
+               }
 
-       ra_set_finalize(set->regs, q_values);
+               // TODO also need to update q_values, but for now:
+               ra_set_finalize(set->regs, NULL);
+       } else {
+               ra_set_finalize(set->regs, q_values);
+       }
 
        ralloc_free(q_values);