From 2f3d60c84bbad121b4507f39d50a9217ae6fc83a Mon Sep 17 00:00:00 2001 From: Icecream95 Date: Sat, 6 Jun 2020 15:21:21 +1200 Subject: [PATCH] pan/mdg: Add new depth writeout code We schedule depth writeout to smul and stencil to vlut, so scheduling to smul has to be disabled in these cases. When only writing stencil, scheduling to smul is still disabled to prevent stencil writeout from being scheduled there. Reviewed-by: Alyssa Rosenzweig Part-of: --- src/panfrost/midgard/midgard_ra.c | 9 +++- src/panfrost/midgard/midgard_schedule.c | 66 +++++++++++++++++++++++-- 2 files changed, 71 insertions(+), 4 deletions(-) diff --git a/src/panfrost/midgard/midgard_ra.c b/src/panfrost/midgard/midgard_ra.c index bebd9c7af56..88795a777f5 100644 --- a/src/panfrost/midgard/midgard_ra.c +++ b/src/panfrost/midgard/midgard_ra.c @@ -600,13 +600,20 @@ allocate_registers(compiler_context *ctx, bool *spilled) assert(check_read_class(l->class, ins->type, ins->src[2])); } - /* Mark writeout to r0, render target to r1.z, unknown to r1.w */ + /* Mark writeout to r0, depth to r1.x, stencil to r1.y, + * render target to r1.z, unknown to r1.w */ mir_foreach_instr_global(ctx, ins) { if (!(ins->compact_branch && ins->writeout)) continue; if (ins->src[0] < ctx->temp_count) l->solutions[ins->src[0]] = 0; + if (ins->src[2] < ctx->temp_count) + l->solutions[ins->src[2]] = (16 * 1) + COMPONENT_X * 4; + + if (ins->src[3] < ctx->temp_count) + l->solutions[ins->src[3]] = (16 * 1) + COMPONENT_Y * 4; + if (ins->src[1] < ctx->temp_count) l->solutions[ins->src[1]] = (16 * 1) + COMPONENT_Z * 4; diff --git a/src/panfrost/midgard/midgard_schedule.c b/src/panfrost/midgard/midgard_schedule.c index f030027d983..114a8de3211 100644 --- a/src/panfrost/midgard/midgard_schedule.c +++ b/src/panfrost/midgard/midgard_schedule.c @@ -906,7 +906,7 @@ mir_schedule_alu( mir_choose_alu(&branch, instructions, worklist, len, &predicate, ALU_ENAB_BR_COMPACT); mir_update_worklist(worklist, len, instructions, branch); - bool writeout = branch && branch->writeout; + unsigned writeout = branch ? branch->writeout : 0; if (branch && branch->branch.conditional) { midgard_instruction *cond = mir_schedule_condition(ctx, &predicate, worklist, len, instructions, branch); @@ -938,7 +938,8 @@ mir_schedule_alu( predicate.no_cond = true; } - mir_choose_alu(&smul, instructions, worklist, len, &predicate, UNIT_SMUL); + if (writeout < PAN_WRITEOUT_Z) + mir_choose_alu(&smul, instructions, worklist, len, &predicate, UNIT_SMUL); if (!writeout) { mir_choose_alu(&vlut, instructions, worklist, len, &predicate, UNIT_VLUT); @@ -975,8 +976,67 @@ mir_schedule_alu( branch->dest_type = vadd->dest_type; } + if (writeout & PAN_WRITEOUT_Z) { + /* Depth writeout */ + + unsigned src = (branch->src[0] == ~0) ? SSA_FIXED_REGISTER(1) : branch->src[2]; + + predicate.unit = UNIT_SMUL; + predicate.dest = src; + predicate.mask = 0x1; + + midgard_instruction *z_store; + + z_store = mir_choose_instruction(instructions, worklist, len, &predicate); + + predicate.dest = predicate.mask = 0; + + if (!z_store) { + z_store = ralloc(ctx, midgard_instruction); + *z_store = v_mov(src, make_compiler_temp(ctx)); + + branch->src[2] = z_store->dest; + } + + smul = z_store; + smul->unit = UNIT_SMUL; + } + + if (writeout & PAN_WRITEOUT_S) { + /* Stencil writeout */ + + unsigned src = (branch->src[0] == ~0) ? SSA_FIXED_REGISTER(1) : branch->src[3]; + + predicate.unit = UNIT_VLUT; + predicate.dest = src; + predicate.mask = 0x1; + + midgard_instruction *z_store; + + z_store = mir_choose_instruction(instructions, worklist, len, &predicate); + + predicate.dest = predicate.mask = 0; + + if (!z_store) { + z_store = ralloc(ctx, midgard_instruction); + *z_store = v_mov(src, make_compiler_temp(ctx)); + + branch->src[3] = z_store->dest; + + z_store->mask = 0x1; + + unsigned swizzle = (branch->src[0] == ~0) ? COMPONENT_Y : COMPONENT_X; + + for (unsigned c = 0; c < 16; ++c) + z_store->swizzle[1][c] = swizzle; + } + + vlut = z_store; + vlut->unit = UNIT_VLUT; + } + mir_choose_alu(&vadd, instructions, worklist, len, &predicate, UNIT_VADD); - + mir_update_worklist(worklist, len, instructions, vlut); mir_update_worklist(worklist, len, instructions, vadd); mir_update_worklist(worklist, len, instructions, smul); -- 2.30.2