freedreno/ir3: add dummy bary.f(ei) for pre-fs-fetch
authorRob Clark <robdclark@chromium.org>
Wed, 9 Oct 2019 22:51:01 +0000 (15:51 -0700)
committerRob Clark <robdclark@gmail.com>
Fri, 18 Oct 2019 21:11:54 +0000 (21:11 +0000)
If the only use of varyings is a pre-shader texture-fetch, we still need
to issue a bary.f with the end-input flag, otherwise we'll block further
VS invocations, as the hw will think varying storage is still busy.

Signed-off-by: Rob Clark <robdclark@chromium.org>
Reviewed-by: Kristian H. Kristensen <hoegsberg@google.com>
src/freedreno/ir3/ir3_legalize.c

index 9a0c83042a45c0a3440c9580e3e26264cf530949..76362c4468cd3924489d2a2b20722ed85b7e1cbf 100644 (file)
@@ -88,6 +88,7 @@ legalize_block(struct ir3_legalize_ctx *ctx, struct ir3_block *block)
        struct ir3_legalize_state prev_state = bd->state;
        struct ir3_legalize_state *state = &bd->state;
        bool last_input_needs_ss = false;
+       bool has_tex_prefetch = false;
 
        /* our input state is the OR of all predecessor blocks' state: */
        set_foreach(block->predecessors, entry) {
@@ -243,6 +244,8 @@ legalize_block(struct ir3_legalize_ctx *ctx, struct ir3_block *block)
                if (is_tex(n) || (n->opc == OPC_META_TEX_PREFETCH)) {
                        regmask_set(&state->needs_sy, n->regs[0]);
                        ctx->need_pixlod = true;
+                       if (n->opc == OPC_META_TEX_PREFETCH)
+                               has_tex_prefetch = true;
                } else if (n->opc == OPC_RESINFO) {
                        regmask_set(&state->needs_ss, n->regs[0]);
                        ir3_NOP(block)->flags |= IR3_INSTR_SS;
@@ -319,6 +322,22 @@ legalize_block(struct ir3_legalize_ctx *ctx, struct ir3_block *block)
                last_input->regs[0]->flags |= IR3_REG_EI;
                if (last_input_needs_ss)
                        last_input->flags |= IR3_INSTR_SS;
+       } else if (has_tex_prefetch) {
+               /* texture prefetch, but *no* inputs.. we need to insert a
+                * dummy bary.f at the top of the shader to unblock varying
+                * storage:
+                */
+               struct ir3_instruction *baryf;
+
+               /* (ss)bary.f (ei)r63.x, 0, r0.x */
+               baryf = ir3_instr_create(block, OPC_BARY_F);
+               ir3_reg_create(baryf, regid(63, 0), 0)->flags |= IR3_REG_EI;
+               ir3_reg_create(baryf, 0, IR3_REG_IMMED)->iim_val = 0;
+               ir3_reg_create(baryf, regid(0, 0), 0);
+
+               /* insert the dummy bary.f at head: */
+               list_delinit(&baryf->node);
+               list_add(&baryf->node, &block->instr_list);
        }
 
        if (last_rel)