From 7c65b714ed974248f09dcc0b4f020b2e2bf50227 Mon Sep 17 00:00:00 2001 From: Eric Anholt Date: Thu, 14 Aug 2014 13:27:11 -0700 Subject: [PATCH] vc4: Add support for blending. Passes blendminmax and blendsquare. glean's more serious blendFunc fails in simulation due to binner memory overflow (I really need to work around that), and fbo-blending-formats fails due to Mesa refusing one of the getter requests, even before it could fail due to the driver not actually supporting different formats yet. --- src/gallium/drivers/vc4/vc4_program.c | 199 ++++++++++++++++-- src/gallium/drivers/vc4/vc4_qir.c | 1 + src/gallium/drivers/vc4/vc4_qir.h | 3 + src/gallium/drivers/vc4/vc4_qpu_emit.c | 7 + .../vc4/vc4_simulator_validate_shaders.c | 1 + 5 files changed, 199 insertions(+), 12 deletions(-) diff --git a/src/gallium/drivers/vc4/vc4_program.c b/src/gallium/drivers/vc4/vc4_program.c index 98785c2056a..24f7620b7fd 100644 --- a/src/gallium/drivers/vc4/vc4_program.c +++ b/src/gallium/drivers/vc4/vc4_program.c @@ -72,6 +72,8 @@ struct vc4_fs_key { bool depth_enabled; bool is_points; bool is_lines; + + struct pipe_rt_blend_state blend; }; struct vc4_vs_key { @@ -762,6 +764,169 @@ parse_tgsi_immediate(struct tgsi_to_qir *trans, struct tgsi_full_immediate *imm) } } +static struct qreg +vc4_blend_channel(struct tgsi_to_qir *trans, + struct qreg *dst, + struct qreg *src, + struct qreg val, + unsigned factor, + int channel) +{ + struct qcompile *c = trans->c; + + switch(factor) { + case PIPE_BLENDFACTOR_ONE: + return val; + case PIPE_BLENDFACTOR_SRC_COLOR: + return qir_FMUL(c, val, src[channel]); + case PIPE_BLENDFACTOR_SRC_ALPHA: + return qir_FMUL(c, val, src[3]); + case PIPE_BLENDFACTOR_DST_ALPHA: + return qir_FMUL(c, val, dst[3]); + case PIPE_BLENDFACTOR_DST_COLOR: + return qir_FMUL(c, val, dst[channel]); + case PIPE_BLENDFACTOR_SRC_ALPHA_SATURATE: + return qir_FMIN(c, src[3], qir_FSUB(c, + qir_uniform_f(trans, 1.0), + dst[3])); + case PIPE_BLENDFACTOR_CONST_COLOR: + return qir_FMUL(c, val, + get_temp_for_uniform(trans, + QUNIFORM_BLEND_CONST_COLOR, + channel)); + case PIPE_BLENDFACTOR_CONST_ALPHA: + return qir_FMUL(c, val, + get_temp_for_uniform(trans, + QUNIFORM_BLEND_CONST_COLOR, + 3)); + case PIPE_BLENDFACTOR_ZERO: + return qir_uniform_f(trans, 0.0); + case PIPE_BLENDFACTOR_INV_SRC_COLOR: + return qir_FMUL(c, val, qir_FSUB(c, qir_uniform_f(trans, 1.0), + src[channel])); + case PIPE_BLENDFACTOR_INV_SRC_ALPHA: + return qir_FMUL(c, val, qir_FSUB(c, qir_uniform_f(trans, 1.0), + src[3])); + case PIPE_BLENDFACTOR_INV_DST_ALPHA: + return qir_FMUL(c, val, qir_FSUB(c, qir_uniform_f(trans, 1.0), + dst[3])); + case PIPE_BLENDFACTOR_INV_DST_COLOR: + return qir_FMUL(c, val, qir_FSUB(c, qir_uniform_f(trans, 1.0), + dst[channel])); + case PIPE_BLENDFACTOR_INV_CONST_COLOR: + return qir_FMUL(c, val, + qir_FSUB(c, qir_uniform_f(trans, 1.0), + get_temp_for_uniform(trans, + QUNIFORM_BLEND_CONST_COLOR, + channel))); + case PIPE_BLENDFACTOR_INV_CONST_ALPHA: + return qir_FMUL(c, val, + qir_FSUB(c, qir_uniform_f(trans, 1.0), + get_temp_for_uniform(trans, + QUNIFORM_BLEND_CONST_COLOR, + 3))); + + default: + case PIPE_BLENDFACTOR_SRC1_COLOR: + case PIPE_BLENDFACTOR_SRC1_ALPHA: + case PIPE_BLENDFACTOR_INV_SRC1_COLOR: + case PIPE_BLENDFACTOR_INV_SRC1_ALPHA: + /* Unsupported. */ + fprintf(stderr, "Unknown blend factor %d\n", factor); + return val; + } +} + +static struct qreg +vc4_blend_func(struct tgsi_to_qir *trans, + struct qreg src, struct qreg dst, + unsigned func) +{ + struct qcompile *c = trans->c; + + switch (func) { + case PIPE_BLEND_ADD: + return qir_FADD(c, src, dst); + case PIPE_BLEND_SUBTRACT: + return qir_FSUB(c, src, dst); + case PIPE_BLEND_REVERSE_SUBTRACT: + return qir_FSUB(c, dst, src); + case PIPE_BLEND_MIN: + return qir_FMIN(c, src, dst); + case PIPE_BLEND_MAX: + return qir_FMAX(c, src, dst); + + default: + /* Unsupported. */ + fprintf(stderr, "Unknown blend func %d\n", func); + return src; + + } +} + +/** + * Implements fixed function blending in shader code. + * + * VC4 doesn't have any hardware support for blending. Instead, you read the + * current contents of the destination from the tile buffer after having + * waited for the scoreboard (which is handled by vc4_qpu_emit.c), then do + * math using your output color and that destination value, and update the + * output color appropriately. + */ +static void +vc4_blend(struct tgsi_to_qir *trans, struct qreg *result, + struct qreg *src_color) +{ + struct qcompile *c = trans->c; + struct pipe_rt_blend_state *blend = &trans->fs_key->blend; + + if (!blend->blend_enable) { + for (int i = 0; i < 4; i++) + result[i] = src_color[i]; + return; + } + + qir_emit(c, qir_inst(QOP_TLB_COLOR_READ, c->undef, + c->undef, c->undef)); + struct qreg dst_color[4]; + for (int i = 0; i < 4; i++) { + dst_color[i] = qir_get_temp(c); + qir_emit(c, qir_inst(QOP_R4_UNPACK_A + i, + dst_color[i], + c->undef, c->undef)); + /* XXX: Swizzles? */ + } + + struct qreg src_blend[4], dst_blend[4]; + for (int i = 0; i < 3; i++) { + src_blend[i] = vc4_blend_channel(trans, + dst_color, src_color, + src_color[i], + blend->rgb_src_factor, i); + dst_blend[i] = vc4_blend_channel(trans, + dst_color, src_color, + dst_color[i], + blend->rgb_dst_factor, i); + } + src_blend[3] = vc4_blend_channel(trans, + dst_color, src_color, + src_color[3], + blend->alpha_src_factor, 3); + dst_blend[3] = vc4_blend_channel(trans, + dst_color, src_color, + dst_color[3], + blend->alpha_dst_factor, 3); + + for (int i = 0; i < 3; i++) { + result[i] = vc4_blend_func(trans, + src_blend[i], dst_blend[i], + blend->rgb_func); + } + result[3] = vc4_blend_func(trans, + src_blend[3], dst_blend[3], + blend->alpha_func); +} + static void emit_frag_end(struct tgsi_to_qir *trans) { @@ -772,26 +937,30 @@ emit_frag_end(struct tgsi_to_qir *trans) const struct util_format_description *format_desc = util_format_description(trans->fs_key->color_format); + struct qreg output_color[4] = { + trans->outputs[0], trans->outputs[1], + trans->outputs[2], trans->outputs[3], + }; + + struct qreg blend_color[4]; + vc4_blend(trans, blend_color, output_color); + /* Debug: Sometimes you're getting a black output and just want to see * if the FS is getting executed at all. Spam magenta into the color * output. */ if (0) { - trans->outputs[format_desc->swizzle[0]] = - qir_uniform_f(trans, 1.0); - trans->outputs[format_desc->swizzle[1]] = - qir_uniform_f(trans, 0.0); - trans->outputs[format_desc->swizzle[2]] = - qir_uniform_f(trans, 1.0); - trans->outputs[format_desc->swizzle[3]] = - qir_uniform_f(trans, 0.5); + blend_color[0] = qir_uniform_f(trans, 1.0); + blend_color[1] = qir_uniform_f(trans, 0.0); + blend_color[2] = qir_uniform_f(trans, 1.0); + blend_color[3] = qir_uniform_f(trans, 0.5); } struct qreg swizzled_outputs[4] = { - trans->outputs[format_desc->swizzle[0]], - trans->outputs[format_desc->swizzle[1]], - trans->outputs[format_desc->swizzle[2]], - trans->outputs[format_desc->swizzle[3]], + blend_color[format_desc->swizzle[0]], + blend_color[format_desc->swizzle[1]], + blend_color[format_desc->swizzle[2]], + blend_color[format_desc->swizzle[3]], }; if (trans->fs_key->depth_enabled) { @@ -1074,6 +1243,7 @@ vc4_update_compiled_fs(struct vc4_context *vc4, uint8_t prim_mode) key->is_points = (prim_mode == PIPE_PRIM_POINTS); key->is_lines = (prim_mode >= PIPE_PRIM_LINES && prim_mode <= PIPE_PRIM_LINE_STRIP); + key->blend = vc4->blend->rt[0]; if (vc4->framebuffer.cbufs[0]) key->color_format = vc4->framebuffer.cbufs[0]->format; @@ -1334,6 +1504,11 @@ vc4_write_uniforms(struct vc4_context *vc4, struct vc4_compiled_shader *shader, uinfo->contents[i], uinfo->data[i])); break; + + case QUNIFORM_BLEND_CONST_COLOR: + cl_f(&vc4->uniforms, + vc4->blend_color.color[uinfo->data[i]]); + break; } #if 0 uint32_t written_val = *(uint32_t *)(vc4->uniforms.next - 4); diff --git a/src/gallium/drivers/vc4/vc4_qir.c b/src/gallium/drivers/vc4/vc4_qir.c index 6509a2bb621..0911e4e326d 100644 --- a/src/gallium/drivers/vc4/vc4_qir.c +++ b/src/gallium/drivers/vc4/vc4_qir.c @@ -63,6 +63,7 @@ static const struct qir_op_info qir_op_info[] = { [QOP_VPM_READ] = { "vpm_read", 0, 1, true }, [QOP_TLB_PASSTHROUGH_Z_WRITE] = { "tlb_passthrough_z", 0, 0, true }, [QOP_TLB_COLOR_WRITE] = { "tlb_color", 0, 1, true }, + [QOP_TLB_COLOR_READ] = { "tlb_color_read", 0, 0, true }, [QOP_VARY_ADD_C] = { "vary_add_c", 1, 1 }, [QOP_FRAG_X] = { "frag_x", 1, 0 }, diff --git a/src/gallium/drivers/vc4/vc4_qir.h b/src/gallium/drivers/vc4/vc4_qir.h index 7d9806268fe..c25a58e831e 100644 --- a/src/gallium/drivers/vc4/vc4_qir.h +++ b/src/gallium/drivers/vc4/vc4_qir.h @@ -72,6 +72,7 @@ enum qop { QOP_VPM_READ, QOP_TLB_PASSTHROUGH_Z_WRITE, QOP_TLB_COLOR_WRITE, + QOP_TLB_COLOR_READ, QOP_VARY_ADD_C, QOP_FRAG_X, @@ -169,6 +170,8 @@ enum quniform_contents { QUNIFORM_TEXRECT_SCALE_X, QUNIFORM_TEXRECT_SCALE_Y, + + QUNIFORM_BLEND_CONST_COLOR, }; struct qcompile { diff --git a/src/gallium/drivers/vc4/vc4_qpu_emit.c b/src/gallium/drivers/vc4/vc4_qpu_emit.c index 63f37dd8fa0..4e8a6b2d8e9 100644 --- a/src/gallium/drivers/vc4/vc4_qpu_emit.c +++ b/src/gallium/drivers/vc4/vc4_qpu_emit.c @@ -494,6 +494,13 @@ vc4_generate_code(struct qcompile *c) qpu_m_NOP())); break; + case QOP_TLB_COLOR_READ: + queue(c, qpu_inst(qpu_a_NOP(), qpu_m_NOP())); + *last_inst(c) = qpu_set_sig(*last_inst(c), + QPU_SIG_COLOR_LOAD); + + break; + case QOP_TLB_COLOR_WRITE: queue(c, qpu_inst(qpu_a_MOV(qpu_tlbc(), src[0]), diff --git a/src/gallium/drivers/vc4/vc4_simulator_validate_shaders.c b/src/gallium/drivers/vc4/vc4_simulator_validate_shaders.c index fe3d8505610..40b7f35309b 100644 --- a/src/gallium/drivers/vc4/vc4_simulator_validate_shaders.c +++ b/src/gallium/drivers/vc4/vc4_simulator_validate_shaders.c @@ -256,6 +256,7 @@ vc4_validate_shader(struct drm_gem_cma_object *shader_obj, case QPU_SIG_NONE: case QPU_SIG_WAIT_FOR_SCOREBOARD: case QPU_SIG_SCOREBOARD_UNLOCK: + case QPU_SIG_COLOR_LOAD: case QPU_SIG_LOAD_TMU0: case QPU_SIG_LOAD_TMU1: if (!check_instruction_writes(inst, validated_shader, -- 2.30.2