From d73b2c0517feb37a77d1b28b6cc063d699374867 Mon Sep 17 00:00:00 2001 From: Rob Clark Date: Sat, 15 Feb 2014 19:01:38 -0500 Subject: [PATCH] freedreno/a3xx/compiler: use (ss) for WAR hazards Seems texture sample instructions don't immediately consume there src(s). In fact, some shaders from blob compiler seem to indiciate that it does not even count the texture sample instructions when calculating number of delay slots to fill for non-sample instructions. (Although so far it seems inconclusive as to whether this is required.) In particular, when a src register of a previous texture sample instruction is clobbered, the (ss) bit is needed to synchronize with the tex pipeline to ensure it has picked up the previous values before they are overwritten. Signed-off-by: Rob Clark --- src/gallium/drivers/freedreno/a3xx/ir3_ra.c | 21 +++++++++++++++++++-- 1 file changed, 19 insertions(+), 2 deletions(-) diff --git a/src/gallium/drivers/freedreno/a3xx/ir3_ra.c b/src/gallium/drivers/freedreno/a3xx/ir3_ra.c index 5bc1e2f1619..f4834a3778c 100644 --- a/src/gallium/drivers/freedreno/a3xx/ir3_ra.c +++ b/src/gallium/drivers/freedreno/a3xx/ir3_ra.c @@ -480,22 +480,25 @@ static void legalize(struct ir3_ra_ctx *ctx, struct ir3_block *block) struct ir3_instruction *end = ir3_instr_create(block, 0, OPC_END); struct ir3_instruction *last_input = NULL; + regmask_t needs_ss_war; regmask_t needs_ss; regmask_t needs_sy; + regmask_init(&needs_ss_war); regmask_init(&needs_ss); regmask_init(&needs_sy); shader->instrs_count = 0; for (n = block->head; n; n = n->next) { + struct ir3_register *reg; unsigned i; if (is_meta(n)) continue; for (i = 1; i < n->regs_count; i++) { - struct ir3_register *reg = n->regs[i]; + reg = n->regs[i]; if (reg_gpr(reg)) { @@ -515,12 +518,26 @@ static void legalize(struct ir3_ra_ctx *ctx, struct ir3_block *block) } } + if (n->regs_count > 0) { + reg = n->regs[0]; + if (regmask_get(&needs_ss_war, reg)) { + n->flags |= IR3_INSTR_SS; + regmask_init(&needs_ss_war); // ??? I assume? + } + } + shader->instrs[shader->instrs_count++] = n; if (is_sfu(n)) regmask_set(&needs_ss, n->regs[0]); - if (is_tex(n)) + if (is_tex(n)) { regmask_set(&needs_sy, n->regs[0]); + for (i = 1; i < n->regs_count; i++) { + reg = n->regs[i]; + if (reg_gpr(reg)) + regmask_set(&needs_ss_war, reg); + } + } if (is_input(n)) last_input = n; } -- 2.30.2