if (last_key->alpha != key->alpha)
ctx->prog.dirty |= FD_SHADER_DIRTY_FP;
+ if (last_key->rasterflat != key->rasterflat)
+ ctx->prog.dirty |= FD_SHADER_DIRTY_FP;
+
fd4_ctx->last_key = *key;
}
}
.binning_pass = true,
.color_two_side = ctx->rasterizer ? ctx->rasterizer->light_twoside : false,
.alpha = util_format_is_alpha(pipe_surface_format(pfb->cbufs[0])),
+ .rasterflat = ctx->rasterizer && ctx->rasterizer->flatshade,
// TODO set .half_precision based on render target format,
// ie. float16 and smaller use half, float32 use full..
.half_precision = !!(fd_mesa_debug & FD_DBG_FRAGHALF),
.fsaturate_r = fd4_ctx->fsaturate_r,
},
.format = fd4_emit_format(pfb->cbufs[0]),
- .rasterflat = ctx->rasterizer && ctx->rasterizer->flatshade,
};
unsigned dirty;
struct ir3_shader_key key;
enum a4xx_color_fmt format;
uint32_t dirty;
- bool rasterflat;
/* cached to avoid repeated lookups of same variants: */
struct ir3_shader_variant *vp, *fp;
if (reg->flags & IR3_REG_CONST) {
info->max_const = MAX2(info->max_const, max);
+ } else if (val.num == 63) {
+ /* ignore writes to dummy register r63.x */
} else if ((max != REG_A0) && (max != REG_P0)) {
if (reg->flags & IR3_REG_HALF) {
info->max_half_reg = MAX2(info->max_half_reg, max);
static inline bool is_input(struct ir3_instruction *instr)
{
+ /* in some cases, ldlv is used to fetch varying without
+ * interpolation.. fortunately inloc is the first src
+ * register in either case
+ */
+ if (is_mem(instr) && (instr->opc == OPC_LDLV))
+ return true;
return (instr->category == 2) && (instr->opc == OPC_BARY_F);
}
/* for calculating input/output positions/linkages: */
unsigned next_inloc;
+ /* a4xx (at least patchlevel 0) cannot seem to flat-interpolate
+ * so we need to use ldlv.u32 to load the varying directly:
+ */
+ bool flat_bypass;
+
unsigned num_internal_temps;
struct tgsi_src_register internal_temps[8];
} else if (ir3_shader_gpuid(so->shader) >= 400) {
/* a4xx seems to have *no* sam.p */
lconfig.lower_TXP = ~0; /* lower all txp */
+ /* need special handling for "flat" */
+ ctx->flat_bypass = true;
} else {
/* a3xx just needs to avoid sam.p for 3d tex */
lconfig.lower_TXP = (1 << TGSI_TEXTURE_3D);
+ /* no special handling for "flat" */
+ ctx->flat_bypass = false;
}
ctx->tokens = tgsi_transform_lowering(&lconfig, tokens, &ctx->info);
static struct ir3_instruction *
decl_in_frag_bary(struct ir3_compile_context *ctx, unsigned regid,
- unsigned j, unsigned inloc)
+ unsigned j, unsigned inloc, bool use_ldlv)
{
struct ir3_instruction *instr;
struct ir3_register *src;
+ if (use_ldlv) {
+ /* ldlv.u32 dst, l[#inloc], 1 */
+ instr = instr_create(ctx, 6, OPC_LDLV);
+ instr->cat6.type = TYPE_U32;
+ instr->cat6.iim_val = 1;
+ ir3_reg_create(instr, regid, 0); /* dummy dst */
+ ir3_reg_create(instr, 0, IR3_REG_IMMED)->iim_val = inloc;
+ ir3_reg_create(instr, 0, IR3_REG_IMMED)->iim_val = 1;
+
+ return instr;
+ }
+
/* bary.f dst, #inloc, r0.x */
instr = instr_create(ctx, 2, OPC_BARY_F);
ir3_reg_create(instr, regid, 0); /* dummy dst */
so->frag_face = true;
instr = decl_in_frag_face(ctx, r + j, j);
} else {
+ bool use_ldlv = false;
+
+ /* I don't believe it is valid to not have Interp
+ * on a normal frag shader input, and various parts
+ * that that handle flat/smooth shading make this
+ * assumption as well.
+ */
+ compile_assert(ctx, decl->Declaration.Interpolate);
+
+ if (ctx->flat_bypass) {
+ switch (decl->Interp.Interpolate) {
+ case TGSI_INTERPOLATE_COLOR:
+ if (!ctx->so->key.rasterflat)
+ break;
+ /* fallthrough */
+ case TGSI_INTERPOLATE_CONSTANT:
+ use_ldlv = true;
+ break;
+ }
+ }
+
so->inputs[n].bary = true;
+
instr = decl_in_frag_bary(ctx, r + j, j,
- so->inputs[n].inloc + j - 8);
+ so->inputs[n].inloc + j - 8, use_ldlv);
}
} else {
instr = create_input(ctx->block, NULL, (i * 4) + j);
last_input = n;
}
- if (last_input)
+ if (last_input) {
+ /* special hack.. if using ldlv to bypass interpolation,
+ * we need to insert a dummy bary.f on which we can set
+ * the (ei) flag:
+ */
+ if (is_mem(last_input) && (last_input->opc == OPC_LDLV)) {
+ int i, cnt;
+
+ /* note that ir3_instr_create() inserts into
+ * shader->instrs[] and increments the count..
+ * so we need to bump up the cnt initially (to
+ * avoid it clobbering the last real instr) and
+ * restore it after.
+ */
+ cnt = ++shader->instrs_count;
+
+ /* inserting instructions would be a bit nicer if list.. */
+ for (i = cnt - 2; i >= 0; i--) {
+ if (shader->instrs[i] == last_input) {
+
+ /* (ss)bary.f (ei)r63.x, 0, r0.x */
+ last_input = ir3_instr_create(block, 2, OPC_BARY_F);
+ last_input->flags |= IR3_INSTR_SS;
+ ir3_reg_create(last_input, regid(63, 0), 0);
+ ir3_reg_create(last_input, 0, IR3_REG_IMMED)->iim_val = 0;
+ ir3_reg_create(last_input, regid(0, 0), 0);
+
+ shader->instrs[i + 1] = last_input;
+
+ break;
+ }
+ shader->instrs[i + 1] = shader->instrs[i];
+ }
+
+ shader->instrs_count = cnt;
+ }
last_input->regs[0]->flags |= IR3_REG_EI;
+ }
if (last_rel)
last_rel->flags |= IR3_INSTR_UL;
key.color_two_side = false;
key.half_precision = false;
key.alpha = false;
+ key.rasterflat = false;
if (key.has_per_samp) {
key.fsaturate_s = 0;
key.fsaturate_t = 0;
* let's start with this and see how it goes:
*/
unsigned alpha : 1;
+ /* used when shader needs to handle flat varyings (a4xx),
+ * for TGSI_INTERPOLATE_COLOR:
+ */
+ unsigned rasterflat : 1;
};
uint32_t global;
};