freedreno/ir3: handle flat bypass for a4xx
authorRob Clark <robclark@freedesktop.org>
Wed, 25 Feb 2015 18:54:25 +0000 (13:54 -0500)
committerRob Clark <robclark@freedesktop.org>
Tue, 3 Mar 2015 15:41:00 +0000 (10:41 -0500)
We may not need this for later a4xx patchlevels, but we do at least need
this for patchlevel 0.  Bypass bary.f for fetching varyings when flat
shading is needed (rather than configure via cmdstream).  This requires
a special dummy bary.f w/ (ei) flag to signal to scheduler when all
varyings are consumed.  And requires shader variants based on rasterizer
flatshade state to handle TGSI_INTERPOLATE_COLOR.

Signed-off-by: Rob Clark <robclark@freedesktop.org>
src/gallium/drivers/freedreno/a4xx/fd4_draw.c
src/gallium/drivers/freedreno/a4xx/fd4_emit.h
src/gallium/drivers/freedreno/ir3/ir3.c
src/gallium/drivers/freedreno/ir3/ir3.h
src/gallium/drivers/freedreno/ir3/ir3_compiler.c
src/gallium/drivers/freedreno/ir3/ir3_legalize.c
src/gallium/drivers/freedreno/ir3/ir3_shader.c
src/gallium/drivers/freedreno/ir3/ir3_shader.h

index b6bf650cfe040c195f4d10d49bff9049a90ee6be..57f257478a792910605ad8a23b8ad97df68096bc 100644 (file)
@@ -100,6 +100,9 @@ fixup_shader_state(struct fd_context *ctx, struct ir3_shader_key *key)
                if (last_key->alpha != key->alpha)
                        ctx->prog.dirty |= FD_SHADER_DIRTY_FP;
 
+               if (last_key->rasterflat != key->rasterflat)
+                       ctx->prog.dirty |= FD_SHADER_DIRTY_FP;
+
                fd4_ctx->last_key = *key;
        }
 }
@@ -118,6 +121,7 @@ fd4_draw_vbo(struct fd_context *ctx, const struct pipe_draw_info *info)
                        .binning_pass = true,
                        .color_two_side = ctx->rasterizer ? ctx->rasterizer->light_twoside : false,
                        .alpha = util_format_is_alpha(pipe_surface_format(pfb->cbufs[0])),
+                       .rasterflat = ctx->rasterizer && ctx->rasterizer->flatshade,
                        // TODO set .half_precision based on render target format,
                        // ie. float16 and smaller use half, float32 use full..
                        .half_precision = !!(fd_mesa_debug & FD_DBG_FRAGHALF),
@@ -130,7 +134,6 @@ fd4_draw_vbo(struct fd_context *ctx, const struct pipe_draw_info *info)
                        .fsaturate_r = fd4_ctx->fsaturate_r,
                },
                .format = fd4_emit_format(pfb->cbufs[0]),
-               .rasterflat = ctx->rasterizer && ctx->rasterizer->flatshade,
        };
        unsigned dirty;
 
index cdfa0f4c53a154786c72b2769850a04697a1fa10..5dc3db8880e39e69ab0464ff4bbad96f65b72245 100644 (file)
@@ -55,7 +55,6 @@ struct fd4_emit {
        struct ir3_shader_key key;
        enum a4xx_color_fmt format;
        uint32_t dirty;
-       bool rasterflat;
 
        /* cached to avoid repeated lookups of same variants: */
        struct ir3_shader_variant *vp, *fp;
index a02b06f059a7baaf77486da7152c2fe5a198f996..fe0ffc98c9751d61f84f3408c1afd9a31fb8062a 100644 (file)
@@ -113,6 +113,8 @@ static uint32_t reg(struct ir3_register *reg, struct ir3_info *info,
 
                if (reg->flags & IR3_REG_CONST) {
                        info->max_const = MAX2(info->max_const, max);
+               } else if (val.num == 63) {
+                       /* ignore writes to dummy register r63.x */
                } else if ((max != REG_A0) && (max != REG_P0)) {
                        if (reg->flags & IR3_REG_HALF) {
                                info->max_half_reg = MAX2(info->max_half_reg, max);
index f90392b72972f58bceada2232c2b5701c3a4563e..18d59fa7d55c56601d9b2a83f1718a22ce573b02 100644 (file)
@@ -427,6 +427,12 @@ static inline bool is_mem(struct ir3_instruction *instr)
 
 static inline bool is_input(struct ir3_instruction *instr)
 {
+       /* in some cases, ldlv is used to fetch varying without
+        * interpolation.. fortunately inloc is the first src
+        * register in either case
+        */
+       if (is_mem(instr) && (instr->opc == OPC_LDLV))
+               return true;
        return (instr->category == 2) && (instr->opc == OPC_BARY_F);
 }
 
index 3ee9642efcc5529c311ac4ae55ccbe9f4d84aa86..2084ad31a565615dafe4e230ae6d25254024aca8 100644 (file)
@@ -105,6 +105,11 @@ struct ir3_compile_context {
        /* for calculating input/output positions/linkages: */
        unsigned next_inloc;
 
+       /* a4xx (at least patchlevel 0) cannot seem to flat-interpolate
+        * so we need to use ldlv.u32 to load the varying directly:
+        */
+       bool flat_bypass;
+
        unsigned num_internal_temps;
        struct tgsi_src_register internal_temps[8];
 
@@ -204,9 +209,13 @@ compile_init(struct ir3_compile_context *ctx, struct ir3_shader_variant *so,
        } else if (ir3_shader_gpuid(so->shader) >= 400) {
                /* a4xx seems to have *no* sam.p */
                lconfig.lower_TXP = ~0;  /* lower all txp */
+               /* need special handling for "flat" */
+               ctx->flat_bypass = true;
        } else {
                /* a3xx just needs to avoid sam.p for 3d tex */
                lconfig.lower_TXP = (1 << TGSI_TEXTURE_3D);
+               /* no special handling for "flat" */
+               ctx->flat_bypass = false;
        }
 
        ctx->tokens = tgsi_transform_lowering(&lconfig, tokens, &ctx->info);
@@ -2745,11 +2754,23 @@ decl_semantic(const struct tgsi_declaration_semantic *sem)
 
 static struct ir3_instruction *
 decl_in_frag_bary(struct ir3_compile_context *ctx, unsigned regid,
-               unsigned j, unsigned inloc)
+               unsigned j, unsigned inloc, bool use_ldlv)
 {
        struct ir3_instruction *instr;
        struct ir3_register *src;
 
+       if (use_ldlv) {
+               /* ldlv.u32 dst, l[#inloc], 1 */
+               instr = instr_create(ctx, 6, OPC_LDLV);
+               instr->cat6.type = TYPE_U32;
+               instr->cat6.iim_val = 1;
+               ir3_reg_create(instr, regid, 0);   /* dummy dst */
+               ir3_reg_create(instr, 0, IR3_REG_IMMED)->iim_val = inloc;
+               ir3_reg_create(instr, 0, IR3_REG_IMMED)->iim_val = 1;
+
+               return instr;
+       }
+
        /* bary.f dst, #inloc, r0.x */
        instr = instr_create(ctx, 2, OPC_BARY_F);
        ir3_reg_create(instr, regid, 0);   /* dummy dst */
@@ -2943,9 +2964,31 @@ decl_in(struct ir3_compile_context *ctx, struct tgsi_full_declaration *decl)
                                        so->frag_face = true;
                                        instr = decl_in_frag_face(ctx, r + j, j);
                                } else {
+                                       bool use_ldlv = false;
+
+                                       /* I don't believe it is valid to not have Interp
+                                        * on a normal frag shader input, and various parts
+                                        * that that handle flat/smooth shading make this
+                                        * assumption as well.
+                                        */
+                                       compile_assert(ctx, decl->Declaration.Interpolate);
+
+                                       if (ctx->flat_bypass) {
+                                               switch (decl->Interp.Interpolate) {
+                                               case TGSI_INTERPOLATE_COLOR:
+                                                       if (!ctx->so->key.rasterflat)
+                                                               break;
+                                                       /* fallthrough */
+                                               case TGSI_INTERPOLATE_CONSTANT:
+                                                       use_ldlv = true;
+                                                       break;
+                                               }
+                                       }
+
                                        so->inputs[n].bary = true;
+
                                        instr = decl_in_frag_bary(ctx, r + j, j,
-                                                       so->inputs[n].inloc + j - 8);
+                                                       so->inputs[n].inloc + j - 8, use_ldlv);
                                }
                        } else {
                                instr = create_input(ctx->block, NULL, (i * 4) + j);
index 11629f61f7348e211b04afe5da0433a0328d39b8..4e0b42b8d3abdbd250fee3cb3be7dc58f2914016 100644 (file)
@@ -187,8 +187,44 @@ static void legalize(struct ir3_legalize_ctx *ctx)
                        last_input = n;
        }
 
-       if (last_input)
+       if (last_input) {
+               /* special hack.. if using ldlv to bypass interpolation,
+                * we need to insert a dummy bary.f on which we can set
+                * the (ei) flag:
+                */
+               if (is_mem(last_input) && (last_input->opc == OPC_LDLV)) {
+                       int i, cnt;
+
+                       /* note that ir3_instr_create() inserts into
+                        * shader->instrs[] and increments the count..
+                        * so we need to bump up the cnt initially (to
+                        * avoid it clobbering the last real instr) and
+                        * restore it after.
+                        */
+                       cnt = ++shader->instrs_count;
+
+                       /* inserting instructions would be a bit nicer if list.. */
+                       for (i = cnt - 2; i >= 0; i--) {
+                               if (shader->instrs[i] == last_input) {
+
+                                       /* (ss)bary.f (ei)r63.x, 0, r0.x */
+                                       last_input = ir3_instr_create(block, 2, OPC_BARY_F);
+                                       last_input->flags |= IR3_INSTR_SS;
+                                       ir3_reg_create(last_input, regid(63, 0), 0);
+                                       ir3_reg_create(last_input, 0, IR3_REG_IMMED)->iim_val = 0;
+                                       ir3_reg_create(last_input, regid(0, 0), 0);
+
+                                       shader->instrs[i + 1] = last_input;
+
+                                       break;
+                               }
+                               shader->instrs[i + 1] = shader->instrs[i];
+                       }
+
+                       shader->instrs_count = cnt;
+               }
                last_input->regs[0]->flags |= IR3_REG_EI;
+       }
 
        if (last_rel)
                last_rel->flags |= IR3_INSTR_UL;
index 5e43e2866c95f63e19d9ed531a65780b8b464bb9..7e7ae365bb5f575d926172911428606076fcf32f 100644 (file)
@@ -246,6 +246,7 @@ ir3_shader_variant(struct ir3_shader *shader, struct ir3_shader_key key)
                key.color_two_side = false;
                key.half_precision = false;
                key.alpha = false;
+               key.rasterflat = false;
                if (key.has_per_samp) {
                        key.fsaturate_s = 0;
                        key.fsaturate_t = 0;
index e5d57af1ea64978acd50c95a0e8e12c73481d0bc..7f38067972b08f627eb7c7aa179735491df46a10 100644 (file)
@@ -77,6 +77,10 @@ struct ir3_shader_key {
                         * let's start with this and see how it goes:
                         */
                        unsigned alpha : 1;
+                       /* used when shader needs to handle flat varyings (a4xx),
+                        * for TGSI_INTERPOLATE_COLOR:
+                        */
+                       unsigned rasterflat : 1;
                };
                uint32_t global;
        };