panfrost/midgard: Move blend load/store into NIR
authorAlyssa Rosenzweig <alyssa.rosenzweig@collabora.com>
Mon, 1 Jul 2019 22:02:40 +0000 (15:02 -0700)
committerAlyssa Rosenzweig <alyssa.rosenzweig@collabora.com>
Wed, 10 Jul 2019 13:12:04 +0000 (06:12 -0700)
We have dedicated intrinsics to access the raw contents of the tile
buffer so we can use a dedicated NIR pass to lower appropriately for
blend shaders, rather than introducing a bizarre hardcoded blend
epilogue that only works for RGBA8_UNORM.

Signed-off-by: Alyssa Rosenzweig <alyssa.rosenzweig@collabora.com>
src/gallium/drivers/panfrost/midgard/midgard_compile.c
src/gallium/drivers/panfrost/midgard/nir_lower_framebuffer.c

index cb3b4689c8200e9cad46db1e2de087dc6b08b80d..292c6dc363d238132a2b0063100f3693493ad73e 100644 (file)
@@ -1380,18 +1380,11 @@ emit_intrinsic(compiler_context *ctx, nir_intrinsic_instr *instr)
                 break;
        }
 
-        case nir_intrinsic_load_output:
-                assert(nir_src_is_const(instr->src[0]));
+        /* Reads off the tilebuffer during blending, tasty */
+        case nir_intrinsic_load_raw_output_pan:
                 reg = nir_dest_index(ctx, &instr->dest);
-
-                if (ctx->is_blend) {
-                        /* TODO: MRT */
-                        emit_fb_read_blend_scalar(ctx, reg);
-                } else {
-                        DBG("Unknown output load\n");
-                        assert(0);
-                }
-
+                assert(ctx->is_blend);
+                emit_fb_read_blend_scalar(ctx, reg);
                 break;
 
         case nir_intrinsic_load_blend_const_color_rgba: {
@@ -1459,6 +1452,17 @@ emit_intrinsic(compiler_context *ctx, nir_intrinsic_instr *instr)
 
                 break;
 
+        /* Special case of store_output for lowered blend shaders */
+        case nir_intrinsic_store_raw_output_pan:
+                assert (ctx->stage == MESA_SHADER_FRAGMENT);
+                reg = nir_src_index(ctx, &instr->src[0]);
+
+                midgard_instruction move = v_mov(reg, blank_alu_src, SSA_FIXED_REGISTER(0));
+                emit_mir_instruction(ctx, move);
+                ctx->fragment_output = reg;
+
+                break;
+
         case nir_intrinsic_load_alpha_ref_float:
                 assert(instr->dest.is_ssa);
 
@@ -2364,64 +2368,6 @@ emit_fragment_epilogue(compiler_context *ctx)
         EMIT(alu_br_compact_cond, midgard_jmp_writeout_op_writeout, TAG_ALU_4, -1, midgard_condition_always);
 }
 
-/* For the blend epilogue, we need to convert the blended fragment vec4 (stored
- * in r0) to a RGBA8888 value by scaling and type converting. We then output it
- * with the int8 analogue to the fragment epilogue */
-
-static void
-emit_blend_epilogue(compiler_context *ctx)
-{
-        /* fmov hr48, [...], r0*/
-
-        midgard_instruction scale = {
-                .type = TAG_ALU_4,
-                .unit = UNIT_VMUL,
-                .ssa_args = {
-                        .src0 = SSA_FIXED_REGISTER(24),
-                        .src1 = SSA_FIXED_REGISTER(0),
-                        .dest = SSA_FIXED_REGISTER(24),
-                },
-                .alu = {
-                        .op = midgard_alu_op_fmov,
-                        .reg_mode = midgard_reg_mode_32,
-                        .dest_override = midgard_dest_override_lower,
-                        .mask = 0xFF,
-                        .src1 = vector_alu_srco_unsigned(blank_alu_src),
-                        .src2 = vector_alu_srco_unsigned(blank_alu_src),
-                }
-        };
-
-        emit_mir_instruction(ctx, scale);
-
-        /* vadd.f2u_rte qr0, hr48, #0 */
-
-        midgard_vector_alu_src alu_src = blank_alu_src;
-        alu_src.half = true;
-
-        midgard_instruction f2u_rte = {
-                .type = TAG_ALU_4,
-                .ssa_args = {
-                        .src0 = SSA_FIXED_REGISTER(24),
-                        .src1 = SSA_UNUSED_0,
-                        .dest = SSA_FIXED_REGISTER(0),
-                        .inline_constant = true
-                },
-                .alu = {
-                        .op = midgard_alu_op_f2u_rte,
-                        .reg_mode = midgard_reg_mode_16,
-                        .dest_override = midgard_dest_override_lower,
-                        .mask = 0xF,
-                        .src1 = vector_alu_srco_unsigned(alu_src),
-                        .src2 = vector_alu_srco_unsigned(blank_alu_src),
-                }
-        };
-
-        emit_mir_instruction(ctx, f2u_rte);
-
-        EMIT(alu_br_compact_cond, midgard_jmp_writeout_op_writeout, TAG_ALU_4, 0, midgard_condition_always);
-        EMIT(alu_br_compact_cond, midgard_jmp_writeout_op_writeout, TAG_ALU_4, -1, midgard_condition_always);
-}
-
 static midgard_block *
 emit_block(compiler_context *ctx, nir_block *block)
 {
@@ -2458,10 +2404,7 @@ emit_block(compiler_context *ctx, nir_block *block)
         /* Append fragment shader epilogue (value writeout) */
         if (ctx->stage == MESA_SHADER_FRAGMENT) {
                 if (block == nir_impl_last_block(ctx->func->impl)) {
-                        if (ctx->is_blend)
-                                emit_blend_epilogue(ctx);
-                        else
-                                emit_fragment_epilogue(ctx);
+                        emit_fragment_epilogue(ctx);
                 }
         }
 
index 115fe5f09ddc622efcd3ab55a754791de2fa41e5..08ef290a20b2f556163625ee36efbea351463876 100644 (file)
 static nir_ssa_def *
 nir_float_to_native(nir_builder *b, nir_ssa_def *c_float)
 {
+   /* First, we scale from [0, 1] to [0, 255.0] */
    nir_ssa_def *scaled = nir_fmul_imm(b, nir_fsat(b, c_float), 255.0);
-   return scaled;
+
+   /* Next, we type convert */
+   nir_ssa_def *converted = nir_u2u8(b, nir_f2u32(b,
+            nir_fround_even(b, scaled)));
+
+   return converted;
+}
+
+static nir_ssa_def *
+nir_native_to_float(nir_builder *b, nir_ssa_def *c_native)
+{
+   return c_native;
 }
 
 void
@@ -87,11 +99,43 @@ nir_lower_framebuffer(nir_shader *shader)
                /* Format convert */
                nir_ssa_def *converted = nir_float_to_native(&b, c_nir);
 
-               /* Write out the converted color instead of the input */
-               nir_instr_rewrite_src(instr, &intr->src[1],
-               nir_src_for_ssa(converted));
+               /* Rewrite to use a native store by creating a new intrinsic */
+               nir_intrinsic_instr *new =
+                  nir_intrinsic_instr_create(shader, nir_intrinsic_store_raw_output_pan);
+               new->src[0] = nir_src_for_ssa(converted);
+
+               /* TODO: What about non-RGBA? Is that different? */
+               new->num_components = 4;
+
+               nir_builder_instr_insert(&b, &new->instr);
+
+               /* (And finally removing the old) */
+               nir_instr_remove(instr);
             } else {
-               /* TODO loads */
+               /* For loads, add conversion after */
+               b.cursor = nir_after_instr(instr);
+
+               /* Rewrite to use a native load by creating a new intrinsic */
+
+               nir_intrinsic_instr *new =
+                  nir_intrinsic_instr_create(shader, nir_intrinsic_load_raw_output_pan);
+
+               new->num_components = 4;
+
+               unsigned bitsize = 32;
+               nir_ssa_dest_init(&new->instr, &new->dest, 4, bitsize, NULL);
+               nir_builder_instr_insert(&b, &new->instr);
+
+               /* Convert the raw value */
+               nir_ssa_def *raw = &new->dest.ssa;
+               nir_ssa_def *converted = nir_native_to_float(&b, raw);
+
+               /* Rewrite to use the converted value */
+               nir_src rewritten = nir_src_for_ssa(converted);
+               nir_ssa_def_rewrite_uses_after(&intr->dest.ssa, rewritten, instr);
+
+               /* Finally, remove the old load */
+               nir_instr_remove(instr);
             }
          }
       }