vc4: Add support for blending.
authorEric Anholt <eric@anholt.net>
Thu, 14 Aug 2014 20:27:11 +0000 (13:27 -0700)
committerEric Anholt <eric@anholt.net>
Fri, 15 Aug 2014 19:01:32 +0000 (12:01 -0700)
Passes blendminmax and blendsquare.  glean's more serious blendFunc fails
in simulation due to binner memory overflow (I really need to work around
that), and fbo-blending-formats fails due to Mesa refusing one of the
getter requests, even before it could fail due to the driver not actually
supporting different formats yet.

src/gallium/drivers/vc4/vc4_program.c
src/gallium/drivers/vc4/vc4_qir.c
src/gallium/drivers/vc4/vc4_qir.h
src/gallium/drivers/vc4/vc4_qpu_emit.c
src/gallium/drivers/vc4/vc4_simulator_validate_shaders.c

index 98785c2056af9e6d90d203f79f0c728576cd7896..24f7620b7fd1b90b7c823be63f44b03d2d55f566 100644 (file)
@@ -72,6 +72,8 @@ struct vc4_fs_key {
         bool depth_enabled;
         bool is_points;
         bool is_lines;
+
+        struct pipe_rt_blend_state blend;
 };
 
 struct vc4_vs_key {
@@ -762,6 +764,169 @@ parse_tgsi_immediate(struct tgsi_to_qir *trans, struct tgsi_full_immediate *imm)
         }
 }
 
+static struct qreg
+vc4_blend_channel(struct tgsi_to_qir *trans,
+                  struct qreg *dst,
+                  struct qreg *src,
+                  struct qreg val,
+                  unsigned factor,
+                  int channel)
+{
+        struct qcompile *c = trans->c;
+
+        switch(factor) {
+        case PIPE_BLENDFACTOR_ONE:
+                return val;
+        case PIPE_BLENDFACTOR_SRC_COLOR:
+                return qir_FMUL(c, val, src[channel]);
+        case PIPE_BLENDFACTOR_SRC_ALPHA:
+                return qir_FMUL(c, val, src[3]);
+        case PIPE_BLENDFACTOR_DST_ALPHA:
+                return qir_FMUL(c, val, dst[3]);
+        case PIPE_BLENDFACTOR_DST_COLOR:
+                return qir_FMUL(c, val, dst[channel]);
+        case PIPE_BLENDFACTOR_SRC_ALPHA_SATURATE:
+                return qir_FMIN(c, src[3], qir_FSUB(c,
+                                                    qir_uniform_f(trans, 1.0),
+                                                    dst[3]));
+        case PIPE_BLENDFACTOR_CONST_COLOR:
+                return qir_FMUL(c, val,
+                                get_temp_for_uniform(trans,
+                                                     QUNIFORM_BLEND_CONST_COLOR,
+                                                     channel));
+        case PIPE_BLENDFACTOR_CONST_ALPHA:
+                return qir_FMUL(c, val,
+                                get_temp_for_uniform(trans,
+                                                     QUNIFORM_BLEND_CONST_COLOR,
+                                                     3));
+        case PIPE_BLENDFACTOR_ZERO:
+                return qir_uniform_f(trans, 0.0);
+        case PIPE_BLENDFACTOR_INV_SRC_COLOR:
+                return qir_FMUL(c, val, qir_FSUB(c, qir_uniform_f(trans, 1.0),
+                                                 src[channel]));
+        case PIPE_BLENDFACTOR_INV_SRC_ALPHA:
+                return qir_FMUL(c, val, qir_FSUB(c, qir_uniform_f(trans, 1.0),
+                                                 src[3]));
+        case PIPE_BLENDFACTOR_INV_DST_ALPHA:
+                return qir_FMUL(c, val, qir_FSUB(c, qir_uniform_f(trans, 1.0),
+                                                 dst[3]));
+        case PIPE_BLENDFACTOR_INV_DST_COLOR:
+                return qir_FMUL(c, val, qir_FSUB(c, qir_uniform_f(trans, 1.0),
+                                                 dst[channel]));
+        case PIPE_BLENDFACTOR_INV_CONST_COLOR:
+                return qir_FMUL(c, val,
+                                qir_FSUB(c, qir_uniform_f(trans, 1.0),
+                                         get_temp_for_uniform(trans,
+                                                              QUNIFORM_BLEND_CONST_COLOR,
+                                                              channel)));
+        case PIPE_BLENDFACTOR_INV_CONST_ALPHA:
+                return qir_FMUL(c, val,
+                                qir_FSUB(c, qir_uniform_f(trans, 1.0),
+                                         get_temp_for_uniform(trans,
+                                                              QUNIFORM_BLEND_CONST_COLOR,
+                                                              3)));
+
+        default:
+        case PIPE_BLENDFACTOR_SRC1_COLOR:
+        case PIPE_BLENDFACTOR_SRC1_ALPHA:
+        case PIPE_BLENDFACTOR_INV_SRC1_COLOR:
+        case PIPE_BLENDFACTOR_INV_SRC1_ALPHA:
+                /* Unsupported. */
+                fprintf(stderr, "Unknown blend factor %d\n", factor);
+                return val;
+        }
+}
+
+static struct qreg
+vc4_blend_func(struct tgsi_to_qir *trans,
+               struct qreg src, struct qreg dst,
+               unsigned func)
+{
+        struct qcompile *c = trans->c;
+
+        switch (func) {
+        case PIPE_BLEND_ADD:
+                return qir_FADD(c, src, dst);
+        case PIPE_BLEND_SUBTRACT:
+                return qir_FSUB(c, src, dst);
+        case PIPE_BLEND_REVERSE_SUBTRACT:
+                return qir_FSUB(c, dst, src);
+        case PIPE_BLEND_MIN:
+                return qir_FMIN(c, src, dst);
+        case PIPE_BLEND_MAX:
+                return qir_FMAX(c, src, dst);
+
+        default:
+                /* Unsupported. */
+                fprintf(stderr, "Unknown blend func %d\n", func);
+                return src;
+
+        }
+}
+
+/**
+ * Implements fixed function blending in shader code.
+ *
+ * VC4 doesn't have any hardware support for blending.  Instead, you read the
+ * current contents of the destination from the tile buffer after having
+ * waited for the scoreboard (which is handled by vc4_qpu_emit.c), then do
+ * math using your output color and that destination value, and update the
+ * output color appropriately.
+ */
+static void
+vc4_blend(struct tgsi_to_qir *trans, struct qreg *result,
+          struct qreg *src_color)
+{
+        struct qcompile *c = trans->c;
+        struct pipe_rt_blend_state *blend = &trans->fs_key->blend;
+
+        if (!blend->blend_enable) {
+                for (int i = 0; i < 4; i++)
+                        result[i] = src_color[i];
+                return;
+        }
+
+        qir_emit(c, qir_inst(QOP_TLB_COLOR_READ, c->undef,
+                             c->undef, c->undef));
+        struct qreg dst_color[4];
+        for (int i = 0; i < 4; i++) {
+                dst_color[i] = qir_get_temp(c);
+                qir_emit(c, qir_inst(QOP_R4_UNPACK_A + i,
+                                     dst_color[i],
+                                     c->undef, c->undef));
+                /* XXX: Swizzles? */
+        }
+
+        struct qreg src_blend[4], dst_blend[4];
+        for (int i = 0; i < 3; i++) {
+                src_blend[i] = vc4_blend_channel(trans,
+                                                 dst_color, src_color,
+                                                 src_color[i],
+                                                 blend->rgb_src_factor, i);
+                dst_blend[i] = vc4_blend_channel(trans,
+                                                 dst_color, src_color,
+                                                 dst_color[i],
+                                                 blend->rgb_dst_factor, i);
+        }
+        src_blend[3] = vc4_blend_channel(trans,
+                                         dst_color, src_color,
+                                         src_color[3],
+                                         blend->alpha_src_factor, 3);
+        dst_blend[3] = vc4_blend_channel(trans,
+                                         dst_color, src_color,
+                                         dst_color[3],
+                                         blend->alpha_dst_factor, 3);
+
+        for (int i = 0; i < 3; i++) {
+                result[i] = vc4_blend_func(trans,
+                                           src_blend[i], dst_blend[i],
+                                           blend->rgb_func);
+        }
+        result[3] = vc4_blend_func(trans,
+                                   src_blend[3], dst_blend[3],
+                                   blend->alpha_func);
+}
+
 static void
 emit_frag_end(struct tgsi_to_qir *trans)
 {
@@ -772,26 +937,30 @@ emit_frag_end(struct tgsi_to_qir *trans)
         const struct util_format_description *format_desc =
                 util_format_description(trans->fs_key->color_format);
 
+        struct qreg output_color[4] = {
+                trans->outputs[0], trans->outputs[1],
+                trans->outputs[2], trans->outputs[3],
+        };
+
+        struct qreg blend_color[4];
+        vc4_blend(trans, blend_color, output_color);
+
         /* Debug: Sometimes you're getting a black output and just want to see
          * if the FS is getting executed at all.  Spam magenta into the color
          * output.
          */
         if (0) {
-                trans->outputs[format_desc->swizzle[0]] =
-                        qir_uniform_f(trans, 1.0);
-                trans->outputs[format_desc->swizzle[1]] =
-                        qir_uniform_f(trans, 0.0);
-                trans->outputs[format_desc->swizzle[2]] =
-                        qir_uniform_f(trans, 1.0);
-                trans->outputs[format_desc->swizzle[3]] =
-                        qir_uniform_f(trans, 0.5);
+                blend_color[0] = qir_uniform_f(trans, 1.0);
+                blend_color[1] = qir_uniform_f(trans, 0.0);
+                blend_color[2] = qir_uniform_f(trans, 1.0);
+                blend_color[3] = qir_uniform_f(trans, 0.5);
         }
 
         struct qreg swizzled_outputs[4] = {
-                trans->outputs[format_desc->swizzle[0]],
-                trans->outputs[format_desc->swizzle[1]],
-                trans->outputs[format_desc->swizzle[2]],
-                trans->outputs[format_desc->swizzle[3]],
+                blend_color[format_desc->swizzle[0]],
+                blend_color[format_desc->swizzle[1]],
+                blend_color[format_desc->swizzle[2]],
+                blend_color[format_desc->swizzle[3]],
         };
 
         if (trans->fs_key->depth_enabled) {
@@ -1074,6 +1243,7 @@ vc4_update_compiled_fs(struct vc4_context *vc4, uint8_t prim_mode)
         key->is_points = (prim_mode == PIPE_PRIM_POINTS);
         key->is_lines = (prim_mode >= PIPE_PRIM_LINES &&
                          prim_mode <= PIPE_PRIM_LINE_STRIP);
+        key->blend = vc4->blend->rt[0];
 
         if (vc4->framebuffer.cbufs[0])
                 key->color_format = vc4->framebuffer.cbufs[0]->format;
@@ -1334,6 +1504,11 @@ vc4_write_uniforms(struct vc4_context *vc4, struct vc4_compiled_shader *shader,
                                                  uinfo->contents[i],
                                                  uinfo->data[i]));
                         break;
+
+                case QUNIFORM_BLEND_CONST_COLOR:
+                        cl_f(&vc4->uniforms,
+                             vc4->blend_color.color[uinfo->data[i]]);
+                        break;
                 }
 #if 0
                 uint32_t written_val = *(uint32_t *)(vc4->uniforms.next - 4);
index 6509a2bb621e0424414e2d3d25d9fb1402a1847c..0911e4e326dd7ae3bc3e82fc339ebb86e55f3bae 100644 (file)
@@ -63,6 +63,7 @@ static const struct qir_op_info qir_op_info[] = {
         [QOP_VPM_READ] = { "vpm_read", 0, 1, true },
         [QOP_TLB_PASSTHROUGH_Z_WRITE] = { "tlb_passthrough_z", 0, 0, true },
         [QOP_TLB_COLOR_WRITE] = { "tlb_color", 0, 1, true },
+        [QOP_TLB_COLOR_READ] = { "tlb_color_read", 0, 0, true },
         [QOP_VARY_ADD_C] = { "vary_add_c", 1, 1 },
 
         [QOP_FRAG_X] = { "frag_x", 1, 0 },
index 7d9806268fe9131c6bacdb7e9849dd875eb4d073..c25a58e831e8882fb5da359bfb6a1a7c0b812d62 100644 (file)
@@ -72,6 +72,7 @@ enum qop {
         QOP_VPM_READ,
         QOP_TLB_PASSTHROUGH_Z_WRITE,
         QOP_TLB_COLOR_WRITE,
+        QOP_TLB_COLOR_READ,
         QOP_VARY_ADD_C,
 
         QOP_FRAG_X,
@@ -169,6 +170,8 @@ enum quniform_contents {
 
         QUNIFORM_TEXRECT_SCALE_X,
         QUNIFORM_TEXRECT_SCALE_Y,
+
+        QUNIFORM_BLEND_CONST_COLOR,
 };
 
 struct qcompile {
index 63f37dd8fa0f210b4c45cda61af7f249dadf1d97..4e8a6b2d8e9506aee1d2386bd48af64071794084 100644 (file)
@@ -494,6 +494,13 @@ vc4_generate_code(struct qcompile *c)
                                           qpu_m_NOP()));
                         break;
 
+                case QOP_TLB_COLOR_READ:
+                        queue(c, qpu_inst(qpu_a_NOP(), qpu_m_NOP()));
+                        *last_inst(c) = qpu_set_sig(*last_inst(c),
+                                                    QPU_SIG_COLOR_LOAD);
+
+                        break;
+
                 case QOP_TLB_COLOR_WRITE:
                         queue(c, qpu_inst(qpu_a_MOV(qpu_tlbc(),
                                                     src[0]),
index fe3d85056107bbbe4672b91db6be0359612a0eef..40b7f35309bda12eeadc2edba641860034c9b392 100644 (file)
@@ -256,6 +256,7 @@ vc4_validate_shader(struct drm_gem_cma_object *shader_obj,
                case QPU_SIG_NONE:
                case QPU_SIG_WAIT_FOR_SCOREBOARD:
                case QPU_SIG_SCOREBOARD_UNLOCK:
+               case QPU_SIG_COLOR_LOAD:
                case QPU_SIG_LOAD_TMU0:
                case QPU_SIG_LOAD_TMU1:
                        if (!check_instruction_writes(inst, validated_shader,