vc4: Allow TLB Z/color/stencil writes from any ALU operation in QIR.
authorEric Anholt <eric@anholt.net>
Mon, 21 Mar 2016 20:12:41 +0000 (13:12 -0700)
committerEric Anholt <eric@anholt.net>
Sat, 9 Apr 2016 01:41:46 +0000 (18:41 -0700)
This lets us write the Z directly from the FTOI for computed Z, and may
let us coalesce color writes in the future.

No change in my shader-db, but clearly drops an instruction in piglit's
early-z test.

src/gallium/drivers/vc4/vc4_program.c
src/gallium/drivers/vc4/vc4_qir.c
src/gallium/drivers/vc4/vc4_qir.h
src/gallium/drivers/vc4/vc4_qir_schedule.c
src/gallium/drivers/vc4/vc4_qpu_emit.c

index d1e893a76a97b20c570cebc5507a3a5b00fe6f37..35bad7e9296a1f51c5b6bfd6341fce6178775288 100644 (file)
@@ -1192,12 +1192,15 @@ emit_frag_end(struct vc4_compile *c)
         }
 
         if (c->fs_key->stencil_enabled) {
-                qir_TLB_STENCIL_SETUP(c, qir_uniform(c, QUNIFORM_STENCIL, 0));
+                qir_MOV_dest(c, qir_reg(QFILE_TLB_STENCIL_SETUP, 0),
+                             qir_uniform(c, QUNIFORM_STENCIL, 0));
                 if (c->fs_key->stencil_twoside) {
-                        qir_TLB_STENCIL_SETUP(c, qir_uniform(c, QUNIFORM_STENCIL, 1));
+                        qir_MOV_dest(c, qir_reg(QFILE_TLB_STENCIL_SETUP, 0),
+                                     qir_uniform(c, QUNIFORM_STENCIL, 1));
                 }
                 if (c->fs_key->stencil_full_writemasks) {
-                        qir_TLB_STENCIL_SETUP(c, qir_uniform(c, QUNIFORM_STENCIL, 2));
+                        qir_MOV_dest(c, qir_reg(QFILE_TLB_STENCIL_SETUP, 0),
+                                     qir_uniform(c, QUNIFORM_STENCIL, 2));
                 }
         }
 
@@ -1206,24 +1209,24 @@ emit_frag_end(struct vc4_compile *c)
         }
 
         if (c->fs_key->depth_enabled) {
-                struct qreg z;
                 if (c->output_position_index != -1) {
-                        z = qir_FTOI(c, qir_FMUL(c, c->outputs[c->output_position_index + 2],
-                                                 qir_uniform_f(c, 0xffffff)));
+                        qir_FTOI_dest(c, qir_reg(QFILE_TLB_Z_WRITE, 0),
+                                      qir_FMUL(c,
+                                               c->outputs[c->output_position_index + 2],
+                                               qir_uniform_f(c, 0xffffff)))->cond = discard_cond;
                 } else {
-                        z = qir_FRAG_Z(c);
+                        qir_MOV_dest(c, qir_reg(QFILE_TLB_Z_WRITE, 0),
+                                     qir_FRAG_Z(c))->cond = discard_cond;
                 }
-                struct qinst *inst = qir_TLB_Z_WRITE(c, z);
-                inst->cond = discard_cond;
         }
 
         if (!c->msaa_per_sample_output) {
-                struct qinst *inst = qir_TLB_COLOR_WRITE(c, color);
-                inst->cond = discard_cond;
+                qir_MOV_dest(c, qir_reg(QFILE_TLB_COLOR_WRITE, 0),
+                             color)->cond = discard_cond;
         } else {
                 for (int i = 0; i < VC4_MAX_SAMPLES; i++) {
-                        struct qinst *inst = qir_TLB_COLOR_WRITE_MS(c, c->sample_colors[i]);
-                        inst->cond = discard_cond;
+                        qir_MOV_dest(c, qir_reg(QFILE_TLB_COLOR_WRITE_MS, 0),
+                                     c->sample_colors[i])->cond = discard_cond;
                 }
         }
 }
index c6d5a79eae165fd1380868155c85b7ce447959b5..10b82ffc16c17f550130767f6fdc47ba208ea482 100644 (file)
@@ -68,10 +68,6 @@ static const struct qir_op_info qir_op_info[] = {
         [QOP_RSQ] = { "rsq", 1, 1 },
         [QOP_EXP2] = { "exp2", 1, 2 },
         [QOP_LOG2] = { "log2", 1, 2 },
-        [QOP_TLB_STENCIL_SETUP] = { "tlb_stencil_setup", 0, 1, true },
-        [QOP_TLB_Z_WRITE] = { "tlb_z", 0, 1, true },
-        [QOP_TLB_COLOR_WRITE] = { "tlb_color", 0, 1, true },
-        [QOP_TLB_COLOR_WRITE_MS] = { "tlb_color_ms", 0, 1, true },
         [QOP_TLB_COLOR_READ] = { "tlb_color_read", 1, 0 },
         [QOP_MS_MASK] = { "ms_mask", 0, 1, true },
         [QOP_VARY_ADD_C] = { "vary_add_c", 1, 1 },
@@ -115,6 +111,16 @@ qir_get_op_nsrc(enum qop qop)
 bool
 qir_has_side_effects(struct vc4_compile *c, struct qinst *inst)
 {
+        switch (inst->dst.file) {
+        case QFILE_TLB_Z_WRITE:
+        case QFILE_TLB_COLOR_WRITE:
+        case QFILE_TLB_COLOR_WRITE_MS:
+        case QFILE_TLB_STENCIL_SETUP:
+                return true;
+        default:
+                break;
+        }
+
         return qir_op_info[inst->op].has_side_effects;
 }
 
@@ -226,24 +232,44 @@ qir_print_reg(struct vc4_compile *c, struct qreg reg, bool write)
                 [QFILE_TEMP] = "t",
                 [QFILE_VARY] = "v",
                 [QFILE_UNIF] = "u",
+                [QFILE_TLB_COLOR_WRITE] = "tlb_c",
+                [QFILE_TLB_COLOR_WRITE_MS] = "tlb_c_ms",
+                [QFILE_TLB_Z_WRITE] = "tlb_z",
+                [QFILE_TLB_STENCIL_SETUP] = "tlb_stencil",
         };
 
-        if (reg.file == QFILE_NULL) {
+        switch (reg.file) {
+
+        case QFILE_NULL:
                 fprintf(stderr, "null");
-        } else if (reg.file == QFILE_SMALL_IMM) {
+                break;
+
+        case QFILE_SMALL_IMM:
                 if ((int)reg.index >= -16 && (int)reg.index <= 15)
                         fprintf(stderr, "%d", reg.index);
                 else
                         fprintf(stderr, "%f", uif(reg.index));
-        } else if (reg.file == QFILE_VPM) {
+                break;
+
+        case QFILE_VPM:
                 if (write) {
                         fprintf(stderr, "vpm");
                 } else {
                         fprintf(stderr, "vpm%d.%d",
                                 reg.index / 4, reg.index % 4);
                 }
-        } else {
+                break;
+
+        case QFILE_TLB_COLOR_WRITE:
+        case QFILE_TLB_COLOR_WRITE_MS:
+        case QFILE_TLB_Z_WRITE:
+        case QFILE_TLB_STENCIL_SETUP:
+                fprintf(stderr, "%s", files[reg.file]);
+                break;
+
+        default:
                 fprintf(stderr, "%s%d", files[reg.file], reg.index);
+                break;
         }
 
         if (reg.file == QFILE_UNIF &&
index 4aec313831f95585018fd2cc1088f9d256b9689b..970eafd542bc5915bff5b2fe7d9dacfbe33e5be5 100644 (file)
@@ -49,6 +49,10 @@ enum qfile {
         QFILE_VARY,
         QFILE_UNIF,
         QFILE_VPM,
+        QFILE_TLB_COLOR_WRITE,
+        QFILE_TLB_COLOR_WRITE_MS,
+        QFILE_TLB_Z_WRITE,
+        QFILE_TLB_STENCIL_SETUP,
 
         /**
          * Stores an immediate value in the index field that can be turned
@@ -106,10 +110,6 @@ enum qop {
         QOP_LOG2,
         QOP_VW_SETUP,
         QOP_VR_SETUP,
-        QOP_TLB_STENCIL_SETUP,
-        QOP_TLB_Z_WRITE,
-        QOP_TLB_COLOR_WRITE,
-        QOP_TLB_COLOR_WRITE_MS,
         QOP_TLB_COLOR_READ,
         QOP_MS_MASK,
         QOP_VARY_ADD_C,
@@ -629,10 +629,6 @@ QIR_ALU0(FRAG_W)
 QIR_ALU0(FRAG_REV_FLAG)
 QIR_ALU0(TEX_RESULT)
 QIR_ALU0(TLB_COLOR_READ)
-QIR_NODST_1(TLB_COLOR_WRITE)
-QIR_NODST_1(TLB_COLOR_WRITE_MS)
-QIR_NODST_1(TLB_Z_WRITE)
-QIR_NODST_1(TLB_STENCIL_SETUP)
 QIR_NODST_1(MS_MASK)
 
 static inline struct qreg
index 4585918bc7d3684f4df7327a1258baf11c7fbbde..8b843a3a15800f7033a254bbd0212dbac2b0bd3c 100644 (file)
@@ -228,11 +228,7 @@ calculate_deps(struct schedule_setup_state *state, struct schedule_node *n)
                 add_write_dep(dir, &state->last_tex_result, n);
                 break;
 
-        case QOP_TLB_COLOR_WRITE:
-        case QOP_TLB_COLOR_WRITE_MS:
         case QOP_TLB_COLOR_READ:
-        case QOP_TLB_Z_WRITE:
-        case QOP_TLB_STENCIL_SETUP:
         case QOP_MS_MASK:
                 add_write_dep(dir, &state->last_tlb, n);
                 break;
@@ -241,10 +237,25 @@ calculate_deps(struct schedule_setup_state *state, struct schedule_node *n)
                 break;
         }
 
-        if (inst->dst.file == QFILE_VPM)
+        switch (inst->dst.file) {
+        case QFILE_VPM:
                 add_write_dep(dir, &state->last_vpm_write, n);
-        else if (inst->dst.file == QFILE_TEMP)
+                break;
+
+        case QFILE_TEMP:
                 add_write_dep(dir, &state->last_temp_write[inst->dst.index], n);
+                break;
+
+        case QFILE_TLB_COLOR_WRITE:
+        case QFILE_TLB_COLOR_WRITE_MS:
+        case QFILE_TLB_Z_WRITE:
+        case QFILE_TLB_STENCIL_SETUP:
+                add_write_dep(dir, &state->last_tlb, n);
+                break;
+
+        default:
+                break;
+        }
 
         if (qir_depends_on_flags(inst))
                 add_dep(dir, state->last_sf, n);
@@ -358,11 +369,13 @@ get_register_pressure_cost(struct schedule_state *state, struct qinst *inst)
 static bool
 locks_scoreboard(struct qinst *inst)
 {
-        switch (inst->op) {
-        case QOP_TLB_Z_WRITE:
-        case QOP_TLB_COLOR_WRITE:
-        case QOP_TLB_COLOR_WRITE_MS:
-        case QOP_TLB_COLOR_READ:
+        if (inst->op == QOP_TLB_COLOR_READ)
+                return true;
+
+        switch (inst->dst.file) {
+        case QFILE_TLB_Z_WRITE:
+        case QFILE_TLB_COLOR_WRITE:
+        case QFILE_TLB_COLOR_WRITE_MS:
                 return true;
         default:
                 return false;
index 63e1ee543053af32013d42b18672afc1f784479b..5c655495c2bd256100a9738360afbb940b9975a7 100644 (file)
@@ -300,6 +300,11 @@ vc4_generate_code(struct vc4_context *vc4, struct vc4_compile *c)
                                 last_vpm_read_index = qinst->src[i].index;
                                 src[i] = qpu_ra(QPU_R_VPM);
                                 break;
+                        case QFILE_TLB_COLOR_WRITE:
+                        case QFILE_TLB_COLOR_WRITE_MS:
+                        case QFILE_TLB_Z_WRITE:
+                        case QFILE_TLB_STENCIL_SETUP:
+                                unreachable("bad qir src file");
                         }
                 }
 
@@ -314,6 +319,23 @@ vc4_generate_code(struct vc4_context *vc4, struct vc4_compile *c)
                 case QFILE_VPM:
                         dst = qpu_ra(QPU_W_VPM);
                         break;
+
+                case QFILE_TLB_COLOR_WRITE:
+                        dst = qpu_tlbc();
+                        break;
+
+                case QFILE_TLB_COLOR_WRITE_MS:
+                        dst = qpu_tlbc_ms();
+                        break;
+
+                case QFILE_TLB_Z_WRITE:
+                        dst = qpu_ra(QPU_W_TLB_Z);
+                        break;
+
+                case QFILE_TLB_STENCIL_SETUP:
+                        dst = qpu_ra(QPU_W_TLB_STENCIL_SETUP);
+                        break;
+
                 case QFILE_VARY:
                 case QFILE_UNIF:
                 case QFILE_SMALL_IMM:
@@ -383,19 +405,6 @@ vc4_generate_code(struct vc4_context *vc4, struct vc4_compile *c)
                          */
                         break;
 
-                case QOP_TLB_STENCIL_SETUP:
-                        assert(!unpack);
-                        queue(c, qpu_a_MOV(qpu_ra(QPU_W_TLB_STENCIL_SETUP),
-                                           src[0]) | unpack);
-                        break;
-
-                case QOP_TLB_Z_WRITE:
-                        queue(c, qpu_a_MOV(qpu_ra(QPU_W_TLB_Z),
-                                           src[0]) | unpack);
-                        set_last_cond_add(c, qinst->cond);
-                        handled_qinst_cond = true;
-                        break;
-
                 case QOP_TLB_COLOR_READ:
                         queue(c, qpu_NOP());
                         *last_inst(c) = qpu_set_sig(*last_inst(c),
@@ -403,18 +412,6 @@ vc4_generate_code(struct vc4_context *vc4, struct vc4_compile *c)
                         handle_r4_qpu_write(c, qinst, dst);
                         break;
 
-                case QOP_TLB_COLOR_WRITE:
-                        queue(c, qpu_a_MOV(qpu_tlbc(), src[0]) | unpack);
-                        set_last_cond_add(c, qinst->cond);
-                        handled_qinst_cond = true;
-                        break;
-
-                case QOP_TLB_COLOR_WRITE_MS:
-                        queue(c, qpu_a_MOV(qpu_tlbc_ms(), src[0]));
-                        set_last_cond_add(c, qinst->cond);
-                        handled_qinst_cond = true;
-                        break;
-
                 case QOP_VARY_ADD_C:
                         queue(c, qpu_a_FADD(dst, src[0], qpu_r5()) | unpack);
                         break;