vc4: Add support for stencil operations.
authorEric Anholt <eric@anholt.net>
Thu, 18 Sep 2014 19:22:07 +0000 (12:22 -0700)
committerEric Anholt <eric@anholt.net>
Fri, 19 Sep 2014 00:46:43 +0000 (17:46 -0700)
While depth test state is passed through the fragment shader as sideband,
data, the stencil test state has to be set by the fragment shader itself.

Many tests are still failing, but this gets most of hiz/ passing.

src/gallium/drivers/vc4/vc4_context.h
src/gallium/drivers/vc4/vc4_draw.c
src/gallium/drivers/vc4/vc4_program.c
src/gallium/drivers/vc4/vc4_qir.c
src/gallium/drivers/vc4/vc4_qir.h
src/gallium/drivers/vc4/vc4_qpu_emit.c
src/gallium/drivers/vc4/vc4_simulator_validate_shaders.c
src/gallium/drivers/vc4/vc4_state.c

index e5864333a68d026b9032e0e58296c21b771a3c8d..549becdbf66a52c22e719e875687be9e1f6a31b2 100644 (file)
@@ -203,6 +203,14 @@ struct vc4_depth_stencil_alpha_state {
 
         /* VC4_CONFIGURATION_BITS */
         uint8_t config_bits[3];
+
+        /** Uniforms for stencil state.
+         *
+         * Index 0 is either the front config, or the front-and-back config.
+         * Index 1 is the back config if doing separate back stencil.
+         * Index 2 is the writemask config if it's not a common mask value.
+         */
+        uint32_t stencil_uniforms[3];
 };
 
 static inline struct vc4_context *
index 90d98b37b0110641578b58b54929edcb09d13983..c88e43cf823f1fa4cd500e1232030524a078042d 100644 (file)
@@ -190,6 +190,8 @@ vc4_draw_vbo(struct pipe_context *pctx, const struct pipe_draw_info *info)
         if (vc4->zsa && vc4->zsa->base.depth.enabled) {
                 vc4->resolve |= PIPE_CLEAR_DEPTH;
         }
+        if (vc4->zsa && vc4->zsa->base.stencil[0].enabled)
+                vc4->resolve |= PIPE_CLEAR_STENCIL;
         vc4->resolve |= PIPE_CLEAR_COLOR0;
 
         vc4->shader_rec_count++;
index c6603767b5ec7c244bfac275b89c5bcea31de76f..1afb587754ba4d928f21bd6da3bbb16b7e023abf 100644 (file)
@@ -53,6 +53,9 @@ struct vc4_fs_key {
         struct vc4_key base;
         enum pipe_format color_format;
         bool depth_enabled;
+        bool stencil_enabled;
+        bool stencil_twoside;
+        bool stencil_full_writemasks;
         bool is_points;
         bool is_lines;
 
@@ -1253,6 +1256,16 @@ emit_frag_end(struct vc4_compile *c)
         if (c->discard.file != QFILE_NULL)
                 qir_TLB_DISCARD_SETUP(c, c->discard);
 
+        if (c->fs_key->stencil_enabled) {
+                qir_TLB_STENCIL_SETUP(c, add_uniform(c, QUNIFORM_STENCIL, 0));
+                if (c->fs_key->stencil_twoside) {
+                        qir_TLB_STENCIL_SETUP(c, add_uniform(c, QUNIFORM_STENCIL, 1));
+                }
+                if (c->fs_key->stencil_full_writemasks) {
+                        qir_TLB_STENCIL_SETUP(c, add_uniform(c, QUNIFORM_STENCIL, 2));
+                }
+        }
+
         if (c->fs_key->depth_enabled) {
                 struct qreg z;
                 if (c->output_position_index != -1) {
@@ -1567,7 +1580,11 @@ vc4_update_compiled_fs(struct vc4_context *vc4, uint8_t prim_mode)
         if (vc4->framebuffer.cbufs[0])
                 key->color_format = vc4->framebuffer.cbufs[0]->format;
 
-        key->depth_enabled = vc4->zsa->base.depth.enabled;
+        key->stencil_enabled = vc4->zsa->stencil_uniforms[0] != 0;
+        key->stencil_twoside = vc4->zsa->stencil_uniforms[1] != 0;
+        key->stencil_full_writemasks = vc4->zsa->stencil_uniforms[2] != 0;
+        key->depth_enabled = (vc4->zsa->base.depth.enabled ||
+                              key->stencil_enabled);
 
         vc4->prog.fs = util_hash_table_get(vc4->fs_cache, key);
         if (vc4->prog.fs)
@@ -1826,6 +1843,14 @@ vc4_write_uniforms(struct vc4_context *vc4, struct vc4_compiled_shader *shader,
                         cl_f(&vc4->uniforms,
                              vc4->blend_color.color[uinfo->data[i]]);
                         break;
+
+                case QUNIFORM_STENCIL:
+                        cl_u32(&vc4->uniforms,
+                               vc4->zsa->stencil_uniforms[uinfo->data[i]] |
+                               (uinfo->data[i] <= 1 ?
+                                (vc4->stencil_ref.ref_value[uinfo->data[i]] << 8) :
+                                0));
+                        break;
                 }
 #if 0
                 uint32_t written_val = *(uint32_t *)(vc4->uniforms.next - 4);
index 69152e79dda15a1fb2c4a6d05bde3c2dfe4b4d87..6196b92e2e0992c8b243e4a36766fa1e49a9dfde 100644 (file)
@@ -77,6 +77,7 @@ static const struct qir_op_info qir_op_info[] = {
         [QOP_VPM_WRITE] = { "vpm_write", 0, 1, true },
         [QOP_VPM_READ] = { "vpm_read", 0, 1, true },
         [QOP_TLB_DISCARD_SETUP] = { "discard", 0, 1, true },
+        [QOP_TLB_STENCIL_SETUP] = { "tlb_stencil_setup", 0, 1, true },
         [QOP_TLB_Z_WRITE] = { "tlb_z", 0, 1, true },
         [QOP_TLB_COLOR_WRITE] = { "tlb_color", 0, 1, true },
         [QOP_TLB_COLOR_READ] = { "tlb_color_read", 1, 0, true },
index 2ab30496aad34c396857f0d55027f895a96457ae..833795afcc5f8e611870f5c058cab59932a5c4ee 100644 (file)
@@ -97,6 +97,7 @@ enum qop {
         QOP_VPM_WRITE,
         QOP_VPM_READ,
         QOP_TLB_DISCARD_SETUP,
+        QOP_TLB_STENCIL_SETUP,
         QOP_TLB_Z_WRITE,
         QOP_TLB_COLOR_WRITE,
         QOP_TLB_COLOR_READ,
@@ -199,6 +200,7 @@ enum quniform_contents {
         QUNIFORM_TEXRECT_SCALE_Y,
 
         QUNIFORM_BLEND_CONST_COLOR,
+        QUNIFORM_STENCIL,
 };
 
 struct vc4_compile {
@@ -366,6 +368,7 @@ QIR_ALU0(TEX_RESULT)
 QIR_ALU0(TLB_COLOR_READ)
 QIR_NODST_1(TLB_Z_WRITE)
 QIR_NODST_1(TLB_DISCARD_SETUP)
+QIR_NODST_1(TLB_STENCIL_SETUP)
 
 static inline struct qreg
 qir_R4_UNPACK(struct vc4_compile *c, struct qreg r4, int i)
index 592fab907850c913168394e860522168a9a5b28a..b8524e36e2031394169296e254d6726de80003c9 100644 (file)
@@ -419,6 +419,10 @@ vc4_generate_code(struct vc4_compile *c)
                         *last_inst(c) |= QPU_SF;
                         break;
 
+                case QOP_TLB_STENCIL_SETUP:
+                        queue(c, qpu_a_MOV(qpu_ra(QPU_W_TLB_STENCIL_SETUP), src[0]));
+                        break;
+
                 case QOP_TLB_Z_WRITE:
                         queue(c, qpu_a_MOV(qpu_ra(QPU_W_TLB_Z), src[0]));
                         if (discard) {
index 40b7f35309bda12eeadc2edba641860034c9b392..183cd4c977e68ab00ce4da002a80efd538b554bc 100644 (file)
@@ -153,7 +153,6 @@ check_register_write(struct vc4_validated_shader_info *validated_shader,
 
        case QPU_W_HOST_INT:
        case QPU_W_TMU_NOSWAP:
-       case QPU_W_TLB_STENCIL_SETUP:
        case QPU_W_TLB_ALPHA_MASK:
        case QPU_W_MUTEX_RELEASE:
                /* XXX: I haven't thought about these, so don't support them
@@ -173,6 +172,9 @@ check_register_write(struct vc4_validated_shader_info *validated_shader,
                 * triggered by QPU_W_VPM_ADDR writes.
                 */
                return true;
+
+       case QPU_W_TLB_STENCIL_SETUP:
+                return true;
        }
 
        return true;
index 58c300e6549746d90628152bfa78b6586045f421..c7757709ee605852fd3d832b070b1697735ce19b 100644 (file)
@@ -116,6 +116,50 @@ vc4_create_blend_state(struct pipe_context *pctx,
         return vc4_generic_cso_state_create(cso, sizeof(*cso));
 }
 
+/**
+ * The TLB_STENCIL_SETUP data has a little bitfield for common writemask
+ * values, so you don't have to do a separate writemask setup.
+ */
+static uint8_t
+tlb_stencil_setup_writemask(uint8_t mask)
+{
+        switch (mask) {
+        case 0x1: return 0;
+        case 0x3: return 1;
+        case 0xf: return 2;
+        case 0xff: return 3;
+        default: return 0xff;
+        }
+}
+
+static uint32_t
+tlb_stencil_setup_bits(const struct pipe_stencil_state *state,
+                       uint8_t writemask_bits)
+{
+        static const uint8_t op_map[] = {
+                [PIPE_STENCIL_OP_ZERO] = 0,
+                [PIPE_STENCIL_OP_KEEP] = 1,
+                [PIPE_STENCIL_OP_REPLACE] = 2,
+                [PIPE_STENCIL_OP_INCR] = 3,
+                [PIPE_STENCIL_OP_DECR] = 4,
+                [PIPE_STENCIL_OP_INVERT] = 5,
+                [PIPE_STENCIL_OP_INCR_WRAP] = 6,
+                [PIPE_STENCIL_OP_DECR_WRAP] = 7,
+        };
+        uint32_t bits = 0;
+
+        if (writemask_bits != 0xff)
+                bits |= writemask_bits << 28;
+        bits |= op_map[state->zfail_op] << 25;
+        bits |= op_map[state->zpass_op] << 22;
+        bits |= op_map[state->fail_op] << 19;
+        bits |= state->func << 16;
+        /* Ref is filled in at uniform upload time */
+        bits |= state->valuemask << 0;
+
+        return bits;
+}
+
 static void *
 vc4_create_depth_stencil_alpha_state(struct pipe_context *pctx,
                                      const struct pipe_depth_stencil_alpha_state *cso)
@@ -139,6 +183,33 @@ vc4_create_depth_stencil_alpha_state(struct pipe_context *pctx,
                                        VC4_CONFIG_BITS_DEPTH_FUNC_SHIFT);
         }
 
+        if (cso->stencil[0].enabled) {
+                const struct pipe_stencil_state *front = &cso->stencil[0];
+                const struct pipe_stencil_state *back = &cso->stencil[1];
+
+                uint8_t front_writemask_bits =
+                        tlb_stencil_setup_writemask(front->writemask);
+                uint8_t back_writemask_bits =
+                        tlb_stencil_setup_writemask(back->writemask);
+
+                so->stencil_uniforms[0] =
+                        tlb_stencil_setup_bits(front, front_writemask_bits);
+                if (back->enabled) {
+                        so->stencil_uniforms[0] |= (1 << 30);
+                        so->stencil_uniforms[1] =
+                                tlb_stencil_setup_bits(back, back_writemask_bits);
+                        so->stencil_uniforms[1] |= (2 << 30);
+                } else {
+                        so->stencil_uniforms[0] |= (3 << 30);
+                }
+
+                if (front_writemask_bits == 0xff ||
+                    back_writemask_bits == 0xff) {
+                        so->stencil_uniforms[2] = (front_writemask_bits |
+                                                   (back_writemask_bits << 8));
+                }
+        }
+
         return so;
 }