bool compact_branch;
bool writeout;
+ bool writeout_depth;
+ bool writeout_stencil;
bool last_writeout;
/* Kind of a hack, but hint against aggressive DCE */
MIDGARD_COLOR_RT1,
MIDGARD_COLOR_RT2,
MIDGARD_COLOR_RT3,
+ MIDGARD_ZS_RT,
MIDGARD_NUM_RTS,
};
}
static void
-emit_fragment_store(compiler_context *ctx, unsigned src, unsigned rt)
+emit_fragment_store(compiler_context *ctx, unsigned src, enum midgard_rt_id rt)
{
+ assert(rt < ARRAY_SIZE(ctx->writeout_branch));
+
+ midgard_instruction *br = ctx->writeout_branch[rt];
+
+ assert(!br);
+
emit_explicit_constant(ctx, src, src);
struct midgard_instruction ins =
/* Add dependencies */
ins.src[0] = src;
- ins.constants.u32[0] = rt * 0x100;
+ ins.constants.u32[0] = rt == MIDGARD_ZS_RT ?
+ 0xFF : (rt - MIDGARD_COLOR_RT0) * 0x100;
/* Emit the branch */
- midgard_instruction *br = emit_mir_instruction(ctx, ins);
+ br = emit_mir_instruction(ctx, ins);
schedule_barrier(ctx);
-
- assert(rt < ARRAY_SIZE(ctx->writeout_branch));
- assert(!ctx->writeout_branch[rt]);
ctx->writeout_branch[rt] = br;
/* Push our current location = current block count - 1 = where we'll
break;
}
+ case nir_intrinsic_store_zs_output_pan: {
+ assert(ctx->stage == MESA_SHADER_FRAGMENT);
+ emit_fragment_store(ctx, nir_src_index(ctx, &instr->src[0]),
+ MIDGARD_ZS_RT);
+
+ midgard_instruction *br = ctx->writeout_branch[MIDGARD_ZS_RT];
+
+ if (!nir_intrinsic_component(instr))
+ br->writeout_depth = true;
+ if (nir_intrinsic_component(instr) ||
+ instr->num_components)
+ br->writeout_stencil = true;
+ assert(br->writeout_depth | br->writeout_stencil);
+ break;
+ }
+
case nir_intrinsic_store_output:
assert(nir_src_is_const(instr->src[1]) && "no indirect outputs");
emit_fragment_epilogue(compiler_context *ctx, unsigned rt)
{
/* Loop to ourselves */
-
+ midgard_instruction *br = ctx->writeout_branch[rt];
struct midgard_instruction ins = v_branch(false, false);
ins.writeout = true;
+ ins.writeout_depth = br->writeout_depth;
+ ins.writeout_stencil = br->writeout_stencil;
ins.branch.target_block = ctx->block_count - 1;
- ins.constants.u32[0] = rt * 0x100;
+ ins.constants.u32[0] = br->constants.u32[0];
emit_mir_instruction(ctx, ins);
ctx->current_block->epilogue = true;
ctx->stage = nir->info.stage;
ctx->is_blend = is_blend;
ctx->alpha_ref = program->alpha_ref;
- ctx->blend_rt = blend_rt;
+ ctx->blend_rt = MIDGARD_COLOR_RT0 + blend_rt;
ctx->quirks = midgard_get_quirks(gpu_id);
/* Start off with a safe cutoff, allowing usage of all 16 work
mir_foreach_instr_global(ctx, ins) {
if (!(ins->compact_branch && ins->writeout)) continue;
- if (ins->src[0] < ctx->temp_count)
- l->solutions[ins->src[0]] = 0;
+ if (ins->src[0] < ctx->temp_count) {
+ if (ins->writeout_depth)
+ l->solutions[ins->src[0]] = (16 * 1) + COMPONENT_X * 4;
+ else if (ins->writeout_stencil)
+ l->solutions[ins->src[0]] = (16 * 1) + COMPONENT_Y * 4;
+ else
+ l->solutions[ins->src[0]] = 0;
+ }
if (ins->src[1] < ctx->temp_count)
l->solutions[ins->src[1]] = (16 * 1) + COMPONENT_Z * 4;
mir_choose_alu(&branch, instructions, worklist, len, &predicate, ALU_ENAB_BR_COMPACT);
mir_update_worklist(worklist, len, instructions, branch);
bool writeout = branch && branch->writeout;
+ bool zs_writeout = writeout && (branch->writeout_depth | branch->writeout_stencil);
if (branch && branch->branch.conditional) {
midgard_instruction *cond = mir_schedule_condition(ctx, &predicate, worklist, len, instructions, branch);
mir_choose_alu(&smul, instructions, worklist, len, &predicate, UNIT_SMUL);
- if (!writeout)
+ if (!writeout) {
mir_choose_alu(&vlut, instructions, worklist, len, &predicate, UNIT_VLUT);
-
- if (writeout) {
+ } else {
/* Propagate up */
bundle.last_writeout = branch->last_writeout;
+ }
+ if (writeout && !zs_writeout) {
vadd = ralloc(ctx, midgard_instruction);
*vadd = v_mov(~0, make_compiler_temp(ctx));
/* Check if writeout reads its own register */
- if (branch && branch->writeout) {
+ if (writeout) {
midgard_instruction *stages[] = { sadd, vadd, smul };
- unsigned src = (branch->src[0] == ~0) ? SSA_FIXED_REGISTER(0) : branch->src[0];
+ unsigned src = (branch->src[0] == ~0) ? SSA_FIXED_REGISTER(zs_writeout ? 1 : 0) : branch->src[0];
unsigned writeout_mask = 0x0;
bool bad_writeout = false;
}
/* It's possible we'll be able to schedule something into vmul
- * to fill r0. Let's peak into the future, trying to schedule
+ * to fill r0/r1. Let's peak into the future, trying to schedule
* vmul specially that way. */
- if (!bad_writeout && writeout_mask != 0xF) {
+ unsigned full_mask = zs_writeout ?
+ (1 << (branch->writeout_depth + branch->writeout_stencil)) - 1 :
+ 0xF;
+
+ if (!bad_writeout && writeout_mask != full_mask) {
predicate.unit = UNIT_VMUL;
predicate.dest = src;
- predicate.mask = writeout_mask ^ 0xF;
+ predicate.mask = writeout_mask ^ full_mask;
struct midgard_instruction *peaked =
mir_choose_instruction(instructions, worklist, len, &predicate);
vmul = peaked;
vmul->unit = UNIT_VMUL;
writeout_mask |= predicate.mask;
- assert(writeout_mask == 0xF);
+ assert(writeout_mask == full_mask);
}
/* Cleanup */
}
/* Finally, add a move if necessary */
- if (bad_writeout || writeout_mask != 0xF) {
- unsigned temp = (branch->src[0] == ~0) ? SSA_FIXED_REGISTER(0) : make_compiler_temp(ctx);
+ if (bad_writeout || writeout_mask != full_mask) {
+ unsigned temp = (branch->src[0] == ~0) ? SSA_FIXED_REGISTER(zs_writeout ? 1 : 0) : make_compiler_temp(ctx);
vmul = ralloc(ctx, midgard_instruction);
*vmul = v_mov(src, temp);
vmul->unit = UNIT_VMUL;
- vmul->mask = 0xF ^ writeout_mask;
+ vmul->mask = full_mask ^ writeout_mask;
/* Rewrite to use our temp */
uint16_t
mir_bytemask_of_read_components_index(midgard_instruction *ins, unsigned i)
{
- /* Branch writeout uses all components */
- if (ins->compact_branch && ins->writeout && (i == 0))
- return 0xFFFF;
+ if (ins->compact_branch && ins->writeout && (i == 0)) {
+ /* Non-ZS writeout uses all components */
+ if (!ins->writeout_depth && !ins->writeout_stencil)
+ return 0xFFFF;
+
+ /* For ZS-writeout, if both Z and S are written we need two
+ * components, otherwise we only need one.
+ */
+ if (ins->writeout_depth && ins->writeout_stencil)
+ return 0xFF;
+ else
+ return 0xF;
+ }
/* Conditional branches read one 32-bit component = 4 bytes (TODO: multi branch??) */
if (ins->compact_branch && ins->branch.conditional && (i == 0))