pan/mdg: Add new depth writeout code
authorIcecream95 <ixn@keemail.me>
Sat, 6 Jun 2020 03:21:21 +0000 (15:21 +1200)
committerMarge Bot <eric+marge@anholt.net>
Wed, 10 Jun 2020 13:54:03 +0000 (13:54 +0000)
We schedule depth writeout to smul and stencil to vlut, so scheduling
to smul has to be disabled in these cases.

When only writing stencil, scheduling to smul is still disabled to
prevent stencil writeout from being scheduled there.

Reviewed-by: Alyssa Rosenzweig <alyssa.rosenzweig@collabora.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/5065>

src/panfrost/midgard/midgard_ra.c
src/panfrost/midgard/midgard_schedule.c

index bebd9c7af563ca5876d0ffdbdb90c48bf8f98c8e..88795a777f55ca8ddd38e977e42f49afc7324c3f 100644 (file)
@@ -600,13 +600,20 @@ allocate_registers(compiler_context *ctx, bool *spilled)
                 assert(check_read_class(l->class, ins->type, ins->src[2]));
         }
 
-        /* Mark writeout to r0, render target to r1.z, unknown to r1.w */
+        /* Mark writeout to r0, depth to r1.x, stencil to r1.y,
+         * render target to r1.z, unknown to r1.w */
         mir_foreach_instr_global(ctx, ins) {
                 if (!(ins->compact_branch && ins->writeout)) continue;
 
                 if (ins->src[0] < ctx->temp_count)
                         l->solutions[ins->src[0]] = 0;
 
+                if (ins->src[2] < ctx->temp_count)
+                        l->solutions[ins->src[2]] = (16 * 1) + COMPONENT_X * 4;
+
+                if (ins->src[3] < ctx->temp_count)
+                        l->solutions[ins->src[3]] = (16 * 1) + COMPONENT_Y * 4;
+
                 if (ins->src[1] < ctx->temp_count)
                         l->solutions[ins->src[1]] = (16 * 1) + COMPONENT_Z * 4;
 
index f030027d983ce04552923cd097be9e1586d2ba98..114a8de3211b7a51410c512e272d909fb0e5e26a 100644 (file)
@@ -906,7 +906,7 @@ mir_schedule_alu(
 
         mir_choose_alu(&branch, instructions, worklist, len, &predicate, ALU_ENAB_BR_COMPACT);
         mir_update_worklist(worklist, len, instructions, branch);
-        bool writeout = branch && branch->writeout;
+        unsigned writeout = branch ? branch->writeout : 0;
 
         if (branch && branch->branch.conditional) {
                 midgard_instruction *cond = mir_schedule_condition(ctx, &predicate, worklist, len, instructions, branch);
@@ -938,7 +938,8 @@ mir_schedule_alu(
                 predicate.no_cond = true;
         }
 
-        mir_choose_alu(&smul, instructions, worklist, len, &predicate, UNIT_SMUL);
+        if (writeout < PAN_WRITEOUT_Z)
+                mir_choose_alu(&smul, instructions, worklist, len, &predicate, UNIT_SMUL);
 
         if (!writeout) {
                 mir_choose_alu(&vlut, instructions, worklist, len, &predicate, UNIT_VLUT);
@@ -975,8 +976,67 @@ mir_schedule_alu(
                 branch->dest_type = vadd->dest_type;
         }
 
+        if (writeout & PAN_WRITEOUT_Z) {
+                /* Depth writeout */
+
+                unsigned src = (branch->src[0] == ~0) ? SSA_FIXED_REGISTER(1) : branch->src[2];
+
+                predicate.unit = UNIT_SMUL;
+                predicate.dest = src;
+                predicate.mask = 0x1;
+
+                midgard_instruction *z_store;
+
+                z_store = mir_choose_instruction(instructions, worklist, len, &predicate);
+
+                predicate.dest = predicate.mask = 0;
+
+                if (!z_store) {
+                        z_store = ralloc(ctx, midgard_instruction);
+                        *z_store = v_mov(src, make_compiler_temp(ctx));
+
+                        branch->src[2] = z_store->dest;
+                }
+
+                smul = z_store;
+                smul->unit = UNIT_SMUL;
+        }
+
+        if (writeout & PAN_WRITEOUT_S) {
+                /* Stencil writeout */
+
+                unsigned src = (branch->src[0] == ~0) ? SSA_FIXED_REGISTER(1) : branch->src[3];
+
+                predicate.unit = UNIT_VLUT;
+                predicate.dest = src;
+                predicate.mask = 0x1;
+
+                midgard_instruction *z_store;
+
+                z_store = mir_choose_instruction(instructions, worklist, len, &predicate);
+
+                predicate.dest = predicate.mask = 0;
+
+                if (!z_store) {
+                        z_store = ralloc(ctx, midgard_instruction);
+                        *z_store = v_mov(src, make_compiler_temp(ctx));
+
+                        branch->src[3] = z_store->dest;
+
+                        z_store->mask = 0x1;
+
+                        unsigned swizzle = (branch->src[0] == ~0) ? COMPONENT_Y : COMPONENT_X;
+
+                        for (unsigned c = 0; c < 16; ++c)
+                                z_store->swizzle[1][c] = swizzle;
+                }
+
+                vlut = z_store;
+                vlut->unit = UNIT_VLUT;
+        }
+
         mir_choose_alu(&vadd, instructions, worklist, len, &predicate, UNIT_VADD);
-        
+
         mir_update_worklist(worklist, len, instructions, vlut);
         mir_update_worklist(worklist, len, instructions, vadd);
         mir_update_worklist(worklist, len, instructions, smul);