vc4: Add support for storing sample mask.
authorEric Anholt <eric@anholt.net>
Sat, 21 Nov 2015 01:18:03 +0000 (17:18 -0800)
committerEric Anholt <eric@anholt.net>
Fri, 4 Dec 2015 17:23:55 +0000 (09:23 -0800)
From the API perspective, writing 1 bits can't turn on pixels that were
off, so we AND it with the sample mask from the payload.

src/gallium/drivers/vc4/vc4_program.c
src/gallium/drivers/vc4/vc4_qir.c
src/gallium/drivers/vc4/vc4_qir.h
src/gallium/drivers/vc4/vc4_qpu_emit.c
src/gallium/drivers/vc4/vc4_qpu_schedule.c

index 9c6e8647256cea8df2d74f56a58d50ab9794dcb5..081adfd185c2fcafd5510049dbcec68a6a2ead36 100644 (file)
@@ -1109,6 +1109,10 @@ emit_frag_end(struct vc4_compile *c)
                 }
         }
 
+        if (c->output_sample_mask_index != -1) {
+                qir_MS_MASK(c, c->outputs[c->output_sample_mask_index]);
+        }
+
         if (c->fs_key->depth_enabled) {
                 struct qreg z;
                 if (c->output_position_index != -1) {
@@ -1359,6 +1363,9 @@ ntq_setup_outputs(struct vc4_compile *c)
                         case FRAG_RESULT_DEPTH:
                                 c->output_position_index = loc;
                                 break;
+                        case FRAG_RESULT_SAMPLE_MASK:
+                                c->output_sample_mask_index = loc;
+                                break;
                         }
                 } else {
                         switch (var->data.location) {
index f2855e159fc53d3a2fa91b20f0e3900760d4aea7..4c6667a9d9f04b7798b14d6a38ab8a484523d129 100644 (file)
@@ -87,6 +87,7 @@ static const struct qir_op_info qir_op_info[] = {
         [QOP_TLB_Z_WRITE] = { "tlb_z", 0, 1, true },
         [QOP_TLB_COLOR_WRITE] = { "tlb_color", 0, 1, true },
         [QOP_TLB_COLOR_READ] = { "tlb_color_read", 1, 0 },
+        [QOP_MS_MASK] = { "ms_mask", 0, 1, true },
         [QOP_VARY_ADD_C] = { "vary_add_c", 1, 1 },
 
         [QOP_FRAG_X] = { "frag_x", 1, 0 },
@@ -399,6 +400,7 @@ qir_compile_init(void)
         c->output_position_index = -1;
         c->output_color_index = -1;
         c->output_point_size_index = -1;
+        c->output_sample_mask_index = -1;
 
         c->def_ht = _mesa_hash_table_create(c, _mesa_hash_pointer,
                                             _mesa_key_pointer_equal);
index ad243ec111390d5e9971090b29c33e46aea197d1..97a23df10c6336a3bb4fafdce4f8e9e9caa73ffd 100644 (file)
@@ -122,6 +122,7 @@ enum qop {
         QOP_TLB_Z_WRITE,
         QOP_TLB_COLOR_WRITE,
         QOP_TLB_COLOR_READ,
+        QOP_MS_MASK,
         QOP_VARY_ADD_C,
 
         QOP_FRAG_X,
@@ -397,6 +398,7 @@ struct vc4_compile {
         uint32_t output_position_index;
         uint32_t output_color_index;
         uint32_t output_point_size_index;
+        uint32_t output_sample_mask_index;
 
         struct qreg undef;
         enum qstage stage;
@@ -620,6 +622,7 @@ QIR_NODST_1(TLB_COLOR_WRITE)
 QIR_NODST_1(TLB_Z_WRITE)
 QIR_NODST_1(TLB_DISCARD_SETUP)
 QIR_NODST_1(TLB_STENCIL_SETUP)
+QIR_NODST_1(MS_MASK)
 
 static inline struct qreg
 qir_UNPACK_8_F(struct vc4_compile *c, struct qreg src, int i)
index e0d3633da4268b22233ae2527143a7684dae4d79..a3d1627156fecf4c8720802daa3d31fee61cbfe0 100644 (file)
@@ -387,6 +387,14 @@ vc4_generate_code(struct vc4_context *vc4, struct vc4_compile *c)
                                             qpu_rb(QPU_R_MS_REV_FLAGS)));
                         break;
 
+                case QOP_MS_MASK:
+                        src[1] = qpu_ra(QPU_R_MS_REV_FLAGS);
+                        fixup_raddr_conflict(c, dst, &src[0], &src[1],
+                                             qinst, &unpack);
+                        queue(c, qpu_a_AND(qpu_ra(QPU_W_MS_FLAGS),
+                                           src[0], src[1]) | unpack);
+                        break;
+
                 case QOP_FRAG_Z:
                 case QOP_FRAG_W:
                         /* QOP_FRAG_Z/W don't emit instructions, just allocate
index 19cbf7bb98c35e2d58b51653eba1c983a9d1b73f..94303d942ec1b9808f481a43510f10e1c4b0ad2d 100644 (file)
@@ -295,6 +295,10 @@ process_waddr_deps(struct schedule_state *state, struct schedule_node *n,
                         add_write_dep(state, &state->last_tlb, n);
                         break;
 
+                case QPU_W_MS_FLAGS:
+                        add_write_dep(state, &state->last_tlb, n);
+                        break;
+
                 case QPU_W_NOP:
                         break;