From 541b329bd1bcb157dd239b132f49f73ef173132d Mon Sep 17 00:00:00 2001 From: Alyssa Rosenzweig Date: Mon, 1 Jul 2019 15:02:40 -0700 Subject: [PATCH] panfrost/midgard: Move blend load/store into NIR We have dedicated intrinsics to access the raw contents of the tile buffer so we can use a dedicated NIR pass to lower appropriately for blend shaders, rather than introducing a bizarre hardcoded blend epilogue that only works for RGBA8_UNORM. Signed-off-by: Alyssa Rosenzweig --- .../panfrost/midgard/midgard_compile.c | 89 ++++--------------- .../panfrost/midgard/nir_lower_framebuffer.c | 54 +++++++++-- 2 files changed, 65 insertions(+), 78 deletions(-) diff --git a/src/gallium/drivers/panfrost/midgard/midgard_compile.c b/src/gallium/drivers/panfrost/midgard/midgard_compile.c index cb3b4689c82..292c6dc363d 100644 --- a/src/gallium/drivers/panfrost/midgard/midgard_compile.c +++ b/src/gallium/drivers/panfrost/midgard/midgard_compile.c @@ -1380,18 +1380,11 @@ emit_intrinsic(compiler_context *ctx, nir_intrinsic_instr *instr) break; } - case nir_intrinsic_load_output: - assert(nir_src_is_const(instr->src[0])); + /* Reads off the tilebuffer during blending, tasty */ + case nir_intrinsic_load_raw_output_pan: reg = nir_dest_index(ctx, &instr->dest); - - if (ctx->is_blend) { - /* TODO: MRT */ - emit_fb_read_blend_scalar(ctx, reg); - } else { - DBG("Unknown output load\n"); - assert(0); - } - + assert(ctx->is_blend); + emit_fb_read_blend_scalar(ctx, reg); break; case nir_intrinsic_load_blend_const_color_rgba: { @@ -1459,6 +1452,17 @@ emit_intrinsic(compiler_context *ctx, nir_intrinsic_instr *instr) break; + /* Special case of store_output for lowered blend shaders */ + case nir_intrinsic_store_raw_output_pan: + assert (ctx->stage == MESA_SHADER_FRAGMENT); + reg = nir_src_index(ctx, &instr->src[0]); + + midgard_instruction move = v_mov(reg, blank_alu_src, SSA_FIXED_REGISTER(0)); + emit_mir_instruction(ctx, move); + ctx->fragment_output = reg; + + break; + case nir_intrinsic_load_alpha_ref_float: assert(instr->dest.is_ssa); @@ -2364,64 +2368,6 @@ emit_fragment_epilogue(compiler_context *ctx) EMIT(alu_br_compact_cond, midgard_jmp_writeout_op_writeout, TAG_ALU_4, -1, midgard_condition_always); } -/* For the blend epilogue, we need to convert the blended fragment vec4 (stored - * in r0) to a RGBA8888 value by scaling and type converting. We then output it - * with the int8 analogue to the fragment epilogue */ - -static void -emit_blend_epilogue(compiler_context *ctx) -{ - /* fmov hr48, [...], r0*/ - - midgard_instruction scale = { - .type = TAG_ALU_4, - .unit = UNIT_VMUL, - .ssa_args = { - .src0 = SSA_FIXED_REGISTER(24), - .src1 = SSA_FIXED_REGISTER(0), - .dest = SSA_FIXED_REGISTER(24), - }, - .alu = { - .op = midgard_alu_op_fmov, - .reg_mode = midgard_reg_mode_32, - .dest_override = midgard_dest_override_lower, - .mask = 0xFF, - .src1 = vector_alu_srco_unsigned(blank_alu_src), - .src2 = vector_alu_srco_unsigned(blank_alu_src), - } - }; - - emit_mir_instruction(ctx, scale); - - /* vadd.f2u_rte qr0, hr48, #0 */ - - midgard_vector_alu_src alu_src = blank_alu_src; - alu_src.half = true; - - midgard_instruction f2u_rte = { - .type = TAG_ALU_4, - .ssa_args = { - .src0 = SSA_FIXED_REGISTER(24), - .src1 = SSA_UNUSED_0, - .dest = SSA_FIXED_REGISTER(0), - .inline_constant = true - }, - .alu = { - .op = midgard_alu_op_f2u_rte, - .reg_mode = midgard_reg_mode_16, - .dest_override = midgard_dest_override_lower, - .mask = 0xF, - .src1 = vector_alu_srco_unsigned(alu_src), - .src2 = vector_alu_srco_unsigned(blank_alu_src), - } - }; - - emit_mir_instruction(ctx, f2u_rte); - - EMIT(alu_br_compact_cond, midgard_jmp_writeout_op_writeout, TAG_ALU_4, 0, midgard_condition_always); - EMIT(alu_br_compact_cond, midgard_jmp_writeout_op_writeout, TAG_ALU_4, -1, midgard_condition_always); -} - static midgard_block * emit_block(compiler_context *ctx, nir_block *block) { @@ -2458,10 +2404,7 @@ emit_block(compiler_context *ctx, nir_block *block) /* Append fragment shader epilogue (value writeout) */ if (ctx->stage == MESA_SHADER_FRAGMENT) { if (block == nir_impl_last_block(ctx->func->impl)) { - if (ctx->is_blend) - emit_blend_epilogue(ctx); - else - emit_fragment_epilogue(ctx); + emit_fragment_epilogue(ctx); } } diff --git a/src/gallium/drivers/panfrost/midgard/nir_lower_framebuffer.c b/src/gallium/drivers/panfrost/midgard/nir_lower_framebuffer.c index 115fe5f09dd..08ef290a20b 100644 --- a/src/gallium/drivers/panfrost/midgard/nir_lower_framebuffer.c +++ b/src/gallium/drivers/panfrost/midgard/nir_lower_framebuffer.c @@ -44,8 +44,20 @@ static nir_ssa_def * nir_float_to_native(nir_builder *b, nir_ssa_def *c_float) { + /* First, we scale from [0, 1] to [0, 255.0] */ nir_ssa_def *scaled = nir_fmul_imm(b, nir_fsat(b, c_float), 255.0); - return scaled; + + /* Next, we type convert */ + nir_ssa_def *converted = nir_u2u8(b, nir_f2u32(b, + nir_fround_even(b, scaled))); + + return converted; +} + +static nir_ssa_def * +nir_native_to_float(nir_builder *b, nir_ssa_def *c_native) +{ + return c_native; } void @@ -87,11 +99,43 @@ nir_lower_framebuffer(nir_shader *shader) /* Format convert */ nir_ssa_def *converted = nir_float_to_native(&b, c_nir); - /* Write out the converted color instead of the input */ - nir_instr_rewrite_src(instr, &intr->src[1], - nir_src_for_ssa(converted)); + /* Rewrite to use a native store by creating a new intrinsic */ + nir_intrinsic_instr *new = + nir_intrinsic_instr_create(shader, nir_intrinsic_store_raw_output_pan); + new->src[0] = nir_src_for_ssa(converted); + + /* TODO: What about non-RGBA? Is that different? */ + new->num_components = 4; + + nir_builder_instr_insert(&b, &new->instr); + + /* (And finally removing the old) */ + nir_instr_remove(instr); } else { - /* TODO loads */ + /* For loads, add conversion after */ + b.cursor = nir_after_instr(instr); + + /* Rewrite to use a native load by creating a new intrinsic */ + + nir_intrinsic_instr *new = + nir_intrinsic_instr_create(shader, nir_intrinsic_load_raw_output_pan); + + new->num_components = 4; + + unsigned bitsize = 32; + nir_ssa_dest_init(&new->instr, &new->dest, 4, bitsize, NULL); + nir_builder_instr_insert(&b, &new->instr); + + /* Convert the raw value */ + nir_ssa_def *raw = &new->dest.ssa; + nir_ssa_def *converted = nir_native_to_float(&b, raw); + + /* Rewrite to use the converted value */ + nir_src rewritten = nir_src_for_ssa(converted); + nir_ssa_def_rewrite_uses_after(&intr->dest.ssa, rewritten, instr); + + /* Finally, remove the old load */ + nir_instr_remove(instr); } } } -- 2.30.2