vc4: Move discard handling to the condition flag.
authorEric Anholt <eric@anholt.net>
Wed, 16 Mar 2016 01:00:22 +0000 (18:00 -0700)
committerEric Anholt <eric@anholt.net>
Wed, 16 Mar 2016 18:28:47 +0000 (11:28 -0700)
Now that the field exists in the instruction, we can make discards less
special.  As a bonus, that means that we should be able to merge some more
.sf instructions together when we get around to that.

This causes some scheduling changes, as it allows tlb_color_reads to be
delayed past the discard condition setup.  Since the tlb_color_read ends
up later, this may mean performance improvements, but I haven't tested.

total instructions in shared programs: 78114 -> 78035 (-0.10%)
instructions in affected programs:     1922 -> 1843 (-4.11%)
total estimated cycles in shared programs: 234318 -> 234329 (0.00%)
estimated cycles in affected programs:     8200 -> 8211 (0.13%)

src/gallium/drivers/vc4/vc4_program.c
src/gallium/drivers/vc4/vc4_qir.c
src/gallium/drivers/vc4/vc4_qir.h
src/gallium/drivers/vc4/vc4_qir_schedule.c
src/gallium/drivers/vc4/vc4_qpu_emit.c

index 81e8e9150d6f2ab4b187cc47dc0b5ec9f88e1c3e..f5826d851744611db90d5bb791cbf639cc959456 100644 (file)
@@ -1184,8 +1184,11 @@ emit_frag_end(struct vc4_compile *c)
                 color = qir_uniform_ui(c, 0);
         }
 
-        if (c->discard.file != QFILE_NULL)
-                qir_TLB_DISCARD_SETUP(c, c->discard);
+        uint32_t discard_cond = QPU_COND_ALWAYS;
+        if (c->discard.file != QFILE_NULL) {
+                qir_SF(c, c->discard);
+                discard_cond = QPU_COND_ZS;
+        }
 
         if (c->fs_key->stencil_enabled) {
                 qir_TLB_STENCIL_SETUP(c, qir_uniform(c, QUNIFORM_STENCIL, 0));
@@ -1209,14 +1212,18 @@ emit_frag_end(struct vc4_compile *c)
                 } else {
                         z = qir_FRAG_Z(c);
                 }
-                qir_TLB_Z_WRITE(c, z);
+                struct qinst *inst = qir_TLB_Z_WRITE(c, z);
+                inst->cond = discard_cond;
         }
 
         if (!c->msaa_per_sample_output) {
-                qir_TLB_COLOR_WRITE(c, color);
+                struct qinst *inst = qir_TLB_COLOR_WRITE(c, color);
+                inst->cond = discard_cond;
         } else {
-                for (int i = 0; i < VC4_MAX_SAMPLES; i++)
-                        qir_TLB_COLOR_WRITE_MS(c, c->sample_colors[i]);
+                for (int i = 0; i < VC4_MAX_SAMPLES; i++) {
+                        struct qinst *inst = qir_TLB_COLOR_WRITE_MS(c, c->sample_colors[i]);
+                        inst->cond = discard_cond;
+                }
         }
 }
 
index 125a9525e1b35f6c60c24db691e1dcf8370a204e..e73e3899410463c16796fdda3d2e7c9bc26a5765 100644 (file)
@@ -69,7 +69,6 @@ static const struct qir_op_info qir_op_info[] = {
         [QOP_RSQ] = { "rsq", 1, 1, false, true },
         [QOP_EXP2] = { "exp2", 1, 2, false, true },
         [QOP_LOG2] = { "log2", 1, 2, false, true },
-        [QOP_TLB_DISCARD_SETUP] = { "discard", 0, 1, true },
         [QOP_TLB_STENCIL_SETUP] = { "tlb_stencil_setup", 0, 1, true },
         [QOP_TLB_Z_WRITE] = { "tlb_z", 0, 1, true },
         [QOP_TLB_COLOR_WRITE] = { "tlb_color", 0, 1, true },
index 4f39d72f552281dfef6506c9ff728edb27f7a6e7..3fbf5d749e771e62bd9df0928be4723eeb5cbd77 100644 (file)
@@ -101,7 +101,6 @@ enum qop {
         QOP_LOG2,
         QOP_VW_SETUP,
         QOP_VR_SETUP,
-        QOP_TLB_DISCARD_SETUP,
         QOP_TLB_STENCIL_SETUP,
         QOP_TLB_Z_WRITE,
         QOP_TLB_COLOR_WRITE,
@@ -551,17 +550,23 @@ qir_##name##_dest(struct vc4_compile *c, struct qreg dest,               \
 }
 
 #define QIR_NODST_1(name)                                               \
-static inline void                                                      \
+static inline struct qinst *                                            \
 qir_##name(struct vc4_compile *c, struct qreg a)                        \
 {                                                                       \
-        qir_emit(c, qir_inst(QOP_##name, c->undef, a, c->undef));       \
+        struct qinst *inst = qir_inst(QOP_##name, c->undef,             \
+                                      a, c->undef);                     \
+        qir_emit(c, inst);                                              \
+        return inst;                                                    \
 }
 
 #define QIR_NODST_2(name)                                               \
-static inline void                                                      \
+static inline struct qinst *                                            \
 qir_##name(struct vc4_compile *c, struct qreg a, struct qreg b)         \
 {                                                                       \
-        qir_emit(c, qir_inst(QOP_##name, c->undef, a, b));       \
+        struct qinst *inst = qir_inst(QOP_##name, c->undef,             \
+                                      a, b);                            \
+        qir_emit(c, inst);                                              \
+        return inst;                                                    \
 }
 
 #define QIR_PACK(name)                                                   \
@@ -623,7 +628,6 @@ QIR_ALU0(TLB_COLOR_READ)
 QIR_NODST_1(TLB_COLOR_WRITE)
 QIR_NODST_1(TLB_COLOR_WRITE_MS)
 QIR_NODST_1(TLB_Z_WRITE)
-QIR_NODST_1(TLB_DISCARD_SETUP)
 QIR_NODST_1(TLB_STENCIL_SETUP)
 QIR_NODST_1(MS_MASK)
 
index ee1e9aafbb91cf4c4f459d03a7512fa87589b94b..186e81be750e5051c81ef81d17c8e8ea7722d46a 100644 (file)
@@ -236,11 +236,6 @@ calculate_deps(struct schedule_setup_state *state, struct schedule_node *n)
                 add_write_dep(dir, &state->last_tlb, n);
                 break;
 
-        case QOP_TLB_DISCARD_SETUP:
-                add_write_dep(dir, &state->last_sf, n);
-                add_write_dep(dir, &state->last_tlb, n);
-                break;
-
         default:
                 break;
         }
index 450b97fc014f63d03f6a1c8a13835ce1ebaacb99..b507e370683a63d498b622423f90fb5caaa65a02 100644 (file)
@@ -171,7 +171,6 @@ void
 vc4_generate_code(struct vc4_context *vc4, struct vc4_compile *c)
 {
         struct qpu_reg *temp_registers = vc4_register_allocate(vc4, c);
-        bool discard = false;
         uint32_t inputs_remaining = c->num_inputs;
         uint32_t vpm_read_fifo_count = 0;
         uint32_t vpm_read_offset = 0;
@@ -375,12 +374,6 @@ vc4_generate_code(struct vc4_context *vc4, struct vc4_compile *c)
                          */
                         break;
 
-                case QOP_TLB_DISCARD_SETUP:
-                        discard = true;
-                        queue(c, qpu_a_MOV(src[0], src[0]) | unpack);
-                        *last_inst(c) |= QPU_SF;
-                        break;
-
                 case QOP_TLB_STENCIL_SETUP:
                         assert(!unpack);
                         queue(c, qpu_a_MOV(qpu_ra(QPU_W_TLB_STENCIL_SETUP),
@@ -390,9 +383,8 @@ vc4_generate_code(struct vc4_context *vc4, struct vc4_compile *c)
                 case QOP_TLB_Z_WRITE:
                         queue(c, qpu_a_MOV(qpu_ra(QPU_W_TLB_Z),
                                            src[0]) | unpack);
-                        if (discard) {
-                                set_last_cond_add(c, QPU_COND_ZS);
-                        }
+                        set_last_cond_add(c, qinst->cond);
+                        handled_qinst_cond = true;
                         break;
 
                 case QOP_TLB_COLOR_READ:
@@ -406,16 +398,14 @@ vc4_generate_code(struct vc4_context *vc4, struct vc4_compile *c)
 
                 case QOP_TLB_COLOR_WRITE:
                         queue(c, qpu_a_MOV(qpu_tlbc(), src[0]) | unpack);
-                        if (discard) {
-                                set_last_cond_add(c, QPU_COND_ZS);
-                        }
+                        set_last_cond_add(c, qinst->cond);
+                        handled_qinst_cond = true;
                         break;
 
                 case QOP_TLB_COLOR_WRITE_MS:
                         queue(c, qpu_a_MOV(qpu_tlbc_ms(), src[0]));
-                        if (discard) {
-                                set_last_cond_add(c, QPU_COND_ZS);
-                        }
+                        set_last_cond_add(c, qinst->cond);
+                        handled_qinst_cond = true;
                         break;
 
                 case QOP_VARY_ADD_C: