From e1ba0cd452cb456e5d06ee22fdecaed451a7a48b Mon Sep 17 00:00:00 2001 From: Boris Brezillon Date: Fri, 31 Jan 2020 10:05:16 +0100 Subject: [PATCH] pan/midgard: Add nir_intrinsic_store_zs_output_pan support ZS fragment stores are done like color fragment stores, except it's using a different RT id (0xFF), the depth and stencil values are stored in r1.x and r1.y. Signed-off-by: Boris Brezillon [Fix the scheduling part] Signed-off-by: Alyssa Rosenzweig Reviewed-by: Alyssa Rosenzweig Part-of: --- src/panfrost/midgard/compiler.h | 3 ++ src/panfrost/midgard/midgard_compile.c | 40 +++++++++++++++++++------ src/panfrost/midgard/midgard_ra.c | 10 +++++-- src/panfrost/midgard/midgard_schedule.c | 30 +++++++++++-------- src/panfrost/midgard/mir.c | 16 ++++++++-- 5 files changed, 73 insertions(+), 26 deletions(-) diff --git a/src/panfrost/midgard/compiler.h b/src/panfrost/midgard/compiler.h index 317124b3921..23dcba521ca 100644 --- a/src/panfrost/midgard/compiler.h +++ b/src/panfrost/midgard/compiler.h @@ -110,6 +110,8 @@ typedef struct midgard_instruction { bool compact_branch; bool writeout; + bool writeout_depth; + bool writeout_stencil; bool last_writeout; /* Kind of a hack, but hint against aggressive DCE */ @@ -227,6 +229,7 @@ enum midgard_rt_id { MIDGARD_COLOR_RT1, MIDGARD_COLOR_RT2, MIDGARD_COLOR_RT3, + MIDGARD_ZS_RT, MIDGARD_NUM_RTS, }; diff --git a/src/panfrost/midgard/midgard_compile.c b/src/panfrost/midgard/midgard_compile.c index cf11973cb6f..dca502bd425 100644 --- a/src/panfrost/midgard/midgard_compile.c +++ b/src/panfrost/midgard/midgard_compile.c @@ -1444,8 +1444,14 @@ compute_builtin_arg(nir_op op) } static void -emit_fragment_store(compiler_context *ctx, unsigned src, unsigned rt) +emit_fragment_store(compiler_context *ctx, unsigned src, enum midgard_rt_id rt) { + assert(rt < ARRAY_SIZE(ctx->writeout_branch)); + + midgard_instruction *br = ctx->writeout_branch[rt]; + + assert(!br); + emit_explicit_constant(ctx, src, src); struct midgard_instruction ins = @@ -1455,14 +1461,12 @@ emit_fragment_store(compiler_context *ctx, unsigned src, unsigned rt) /* Add dependencies */ ins.src[0] = src; - ins.constants.u32[0] = rt * 0x100; + ins.constants.u32[0] = rt == MIDGARD_ZS_RT ? + 0xFF : (rt - MIDGARD_COLOR_RT0) * 0x100; /* Emit the branch */ - midgard_instruction *br = emit_mir_instruction(ctx, ins); + br = emit_mir_instruction(ctx, ins); schedule_barrier(ctx); - - assert(rt < ARRAY_SIZE(ctx->writeout_branch)); - assert(!ctx->writeout_branch[rt]); ctx->writeout_branch[rt] = br; /* Push our current location = current block count - 1 = where we'll @@ -1656,6 +1660,22 @@ emit_intrinsic(compiler_context *ctx, nir_intrinsic_instr *instr) break; } + case nir_intrinsic_store_zs_output_pan: { + assert(ctx->stage == MESA_SHADER_FRAGMENT); + emit_fragment_store(ctx, nir_src_index(ctx, &instr->src[0]), + MIDGARD_ZS_RT); + + midgard_instruction *br = ctx->writeout_branch[MIDGARD_ZS_RT]; + + if (!nir_intrinsic_component(instr)) + br->writeout_depth = true; + if (nir_intrinsic_component(instr) || + instr->num_components) + br->writeout_stencil = true; + assert(br->writeout_depth | br->writeout_stencil); + break; + } + case nir_intrinsic_store_output: assert(nir_src_is_const(instr->src[1]) && "no indirect outputs"); @@ -2449,11 +2469,13 @@ static unsigned emit_fragment_epilogue(compiler_context *ctx, unsigned rt) { /* Loop to ourselves */ - + midgard_instruction *br = ctx->writeout_branch[rt]; struct midgard_instruction ins = v_branch(false, false); ins.writeout = true; + ins.writeout_depth = br->writeout_depth; + ins.writeout_stencil = br->writeout_stencil; ins.branch.target_block = ctx->block_count - 1; - ins.constants.u32[0] = rt * 0x100; + ins.constants.u32[0] = br->constants.u32[0]; emit_mir_instruction(ctx, ins); ctx->current_block->epilogue = true; @@ -2754,7 +2776,7 @@ midgard_compile_shader_nir(nir_shader *nir, midgard_program *program, bool is_bl ctx->stage = nir->info.stage; ctx->is_blend = is_blend; ctx->alpha_ref = program->alpha_ref; - ctx->blend_rt = blend_rt; + ctx->blend_rt = MIDGARD_COLOR_RT0 + blend_rt; ctx->quirks = midgard_get_quirks(gpu_id); /* Start off with a safe cutoff, allowing usage of all 16 work diff --git a/src/panfrost/midgard/midgard_ra.c b/src/panfrost/midgard/midgard_ra.c index 92be82fe7b8..eec4876eefe 100644 --- a/src/panfrost/midgard/midgard_ra.c +++ b/src/panfrost/midgard/midgard_ra.c @@ -561,8 +561,14 @@ allocate_registers(compiler_context *ctx, bool *spilled) mir_foreach_instr_global(ctx, ins) { if (!(ins->compact_branch && ins->writeout)) continue; - if (ins->src[0] < ctx->temp_count) - l->solutions[ins->src[0]] = 0; + if (ins->src[0] < ctx->temp_count) { + if (ins->writeout_depth) + l->solutions[ins->src[0]] = (16 * 1) + COMPONENT_X * 4; + else if (ins->writeout_stencil) + l->solutions[ins->src[0]] = (16 * 1) + COMPONENT_Y * 4; + else + l->solutions[ins->src[0]] = 0; + } if (ins->src[1] < ctx->temp_count) l->solutions[ins->src[1]] = (16 * 1) + COMPONENT_Z * 4; diff --git a/src/panfrost/midgard/midgard_schedule.c b/src/panfrost/midgard/midgard_schedule.c index e359cbb2ed1..1697f086390 100644 --- a/src/panfrost/midgard/midgard_schedule.c +++ b/src/panfrost/midgard/midgard_schedule.c @@ -845,6 +845,7 @@ mir_schedule_alu( mir_choose_alu(&branch, instructions, worklist, len, &predicate, ALU_ENAB_BR_COMPACT); mir_update_worklist(worklist, len, instructions, branch); bool writeout = branch && branch->writeout; + bool zs_writeout = writeout && (branch->writeout_depth | branch->writeout_stencil); if (branch && branch->branch.conditional) { midgard_instruction *cond = mir_schedule_condition(ctx, &predicate, worklist, len, instructions, branch); @@ -859,13 +860,14 @@ mir_schedule_alu( mir_choose_alu(&smul, instructions, worklist, len, &predicate, UNIT_SMUL); - if (!writeout) + if (!writeout) { mir_choose_alu(&vlut, instructions, worklist, len, &predicate, UNIT_VLUT); - - if (writeout) { + } else { /* Propagate up */ bundle.last_writeout = branch->last_writeout; + } + if (writeout && !zs_writeout) { vadd = ralloc(ctx, midgard_instruction); *vadd = v_mov(~0, make_compiler_temp(ctx)); @@ -928,9 +930,9 @@ mir_schedule_alu( /* Check if writeout reads its own register */ - if (branch && branch->writeout) { + if (writeout) { midgard_instruction *stages[] = { sadd, vadd, smul }; - unsigned src = (branch->src[0] == ~0) ? SSA_FIXED_REGISTER(0) : branch->src[0]; + unsigned src = (branch->src[0] == ~0) ? SSA_FIXED_REGISTER(zs_writeout ? 1 : 0) : branch->src[0]; unsigned writeout_mask = 0x0; bool bad_writeout = false; @@ -946,13 +948,17 @@ mir_schedule_alu( } /* It's possible we'll be able to schedule something into vmul - * to fill r0. Let's peak into the future, trying to schedule + * to fill r0/r1. Let's peak into the future, trying to schedule * vmul specially that way. */ - if (!bad_writeout && writeout_mask != 0xF) { + unsigned full_mask = zs_writeout ? + (1 << (branch->writeout_depth + branch->writeout_stencil)) - 1 : + 0xF; + + if (!bad_writeout && writeout_mask != full_mask) { predicate.unit = UNIT_VMUL; predicate.dest = src; - predicate.mask = writeout_mask ^ 0xF; + predicate.mask = writeout_mask ^ full_mask; struct midgard_instruction *peaked = mir_choose_instruction(instructions, worklist, len, &predicate); @@ -961,7 +967,7 @@ mir_schedule_alu( vmul = peaked; vmul->unit = UNIT_VMUL; writeout_mask |= predicate.mask; - assert(writeout_mask == 0xF); + assert(writeout_mask == full_mask); } /* Cleanup */ @@ -969,13 +975,13 @@ mir_schedule_alu( } /* Finally, add a move if necessary */ - if (bad_writeout || writeout_mask != 0xF) { - unsigned temp = (branch->src[0] == ~0) ? SSA_FIXED_REGISTER(0) : make_compiler_temp(ctx); + if (bad_writeout || writeout_mask != full_mask) { + unsigned temp = (branch->src[0] == ~0) ? SSA_FIXED_REGISTER(zs_writeout ? 1 : 0) : make_compiler_temp(ctx); vmul = ralloc(ctx, midgard_instruction); *vmul = v_mov(src, temp); vmul->unit = UNIT_VMUL; - vmul->mask = 0xF ^ writeout_mask; + vmul->mask = full_mask ^ writeout_mask; /* Rewrite to use our temp */ diff --git a/src/panfrost/midgard/mir.c b/src/panfrost/midgard/mir.c index 5241c6334b2..5e9acc05dbd 100644 --- a/src/panfrost/midgard/mir.c +++ b/src/panfrost/midgard/mir.c @@ -468,9 +468,19 @@ mir_bytemask_of_read_components_single(unsigned *swizzle, unsigned inmask, midga uint16_t mir_bytemask_of_read_components_index(midgard_instruction *ins, unsigned i) { - /* Branch writeout uses all components */ - if (ins->compact_branch && ins->writeout && (i == 0)) - return 0xFFFF; + if (ins->compact_branch && ins->writeout && (i == 0)) { + /* Non-ZS writeout uses all components */ + if (!ins->writeout_depth && !ins->writeout_stencil) + return 0xFFFF; + + /* For ZS-writeout, if both Z and S are written we need two + * components, otherwise we only need one. + */ + if (ins->writeout_depth && ins->writeout_stencil) + return 0xFF; + else + return 0xF; + } /* Conditional branches read one 32-bit component = 4 bytes (TODO: multi branch??) */ if (ins->compact_branch && ins->branch.conditional && (i == 0)) -- 2.30.2