vc4: Make the pack-to-unorm instructions be non-SSA.
authorEric Anholt <eric@anholt.net>
Thu, 6 Aug 2015 03:31:21 +0000 (20:31 -0700)
committerEric Anholt <eric@anholt.net>
Fri, 21 Aug 2015 06:42:17 +0000 (23:42 -0700)
This helps ensure that the register allocator doesn't force the later pack
operations to insert extra MOVs.

total instructions in shared programs: 98170 -> 98159 (-0.01%)
instructions in affected programs:     2134 -> 2123 (-0.52%)

src/gallium/drivers/vc4/vc4_program.c
src/gallium/drivers/vc4/vc4_qir.c
src/gallium/drivers/vc4/vc4_qir.h
src/gallium/drivers/vc4/vc4_qpu_emit.c

index 13c472152d8f8caa80d115f50f121212825a1af8..303132f3a3b7d6a62a1d02fb881fe3da89e073e3 100644 (file)
@@ -839,14 +839,13 @@ ntq_emit_alu(struct vc4_compile *c, nir_alu_instr *instr)
         }
 
         if (instr->op == nir_op_pack_unorm_4x8) {
-                struct qreg result;
+                struct qreg result = qir_get_temp(c);
+
                 for (int i = 0; i < 4; i++) {
-                        struct qreg src = ntq_get_src(c, instr->src[0].src,
-                                                      instr->src[0].swizzle[i]);
-                        if (i == 0)
-                                result = qir_PACK_8888_F(c, src);
-                        else
-                                result = qir_PACK_8_F(c, result, src, i);
+                        qir_PACK_8_F(c, result,
+                                     ntq_get_src(c, instr->src[0].src,
+                                                 instr->src[0].swizzle[i]),
+                                     i);
                 }
                 struct qreg *dest = ntq_get_dest(c, &instr->dest.dest);
                 *dest = result;
index 90d1c1ff69b4d6ab216fd039e22c3ed99117a24c..3a37451a3ca252ae08800d83f519bbdec1176244 100644 (file)
@@ -71,11 +71,11 @@ static const struct qir_op_info qir_op_info[] = {
         [QOP_RSQ] = { "rsq", 1, 1, false, true },
         [QOP_EXP2] = { "exp2", 1, 2, false, true },
         [QOP_LOG2] = { "log2", 1, 2, false, true },
-        [QOP_PACK_8888_F] = { "pack_8888_f", 1, 1, false, true },
-        [QOP_PACK_8A_F] = { "pack_8a_f", 1, 2, false, true },
-        [QOP_PACK_8B_F] = { "pack_8b_f", 1, 2, false, true },
-        [QOP_PACK_8C_F] = { "pack_8c_f", 1, 2, false, true },
-        [QOP_PACK_8D_F] = { "pack_8d_f", 1, 2, false, true },
+        [QOP_PACK_8888_F] = { "pack_8888_f", 1, 1 },
+        [QOP_PACK_8A_F] = { "pack_8a_f", 1, 1 },
+        [QOP_PACK_8B_F] = { "pack_8b_f", 1, 1 },
+        [QOP_PACK_8C_F] = { "pack_8c_f", 1, 1 },
+        [QOP_PACK_8D_F] = { "pack_8d_f", 1, 1 },
         [QOP_PACK_SCALED] = { "pack_scaled", 1, 2, false, true },
         [QOP_TLB_DISCARD_SETUP] = { "discard", 0, 1, true },
         [QOP_TLB_STENCIL_SETUP] = { "tlb_stencil_setup", 0, 1, true },
index cade795c12afcadfdbd5ac9736b12a4d594b911a..ca93ab8641fc82deeb3377f53827cc252622b9d1 100644 (file)
@@ -534,6 +534,16 @@ qir_##name(struct vc4_compile *c, struct qreg a, struct qreg b)         \
         qir_emit(c, qir_inst(QOP_##name, c->undef, a, b));       \
 }
 
+#define QIR_PACK(name)                                                   \
+static inline struct qreg                                                \
+qir_##name(struct vc4_compile *c, struct qreg dest, struct qreg a)       \
+{                                                                        \
+        qir_emit(c, qir_inst(QOP_##name, dest, a, c->undef));            \
+        if (dest.file == QFILE_TEMP)                                     \
+                c->defs[dest.index] = NULL;                              \
+        return dest;                                                     \
+}
+
 QIR_ALU1(MOV)
 QIR_ALU2(FADD)
 QIR_ALU2(FSUB)
@@ -572,10 +582,10 @@ QIR_ALU1(EXP2)
 QIR_ALU1(LOG2)
 QIR_ALU2(PACK_SCALED)
 QIR_ALU1(PACK_8888_F)
-QIR_ALU2(PACK_8A_F)
-QIR_ALU2(PACK_8B_F)
-QIR_ALU2(PACK_8C_F)
-QIR_ALU2(PACK_8D_F)
+QIR_PACK(PACK_8A_F)
+QIR_PACK(PACK_8B_F)
+QIR_PACK(PACK_8C_F)
+QIR_PACK(PACK_8D_F)
 QIR_ALU1(VARY_ADD_C)
 QIR_NODST_2(TEX_S)
 QIR_NODST_2(TEX_T)
@@ -627,11 +637,12 @@ qir_UNPACK_16_I(struct vc4_compile *c, struct qreg src, int i)
 }
 
 static inline struct qreg
-qir_PACK_8_F(struct vc4_compile *c, struct qreg rest, struct qreg val, int chan)
+qir_PACK_8_F(struct vc4_compile *c, struct qreg dest, struct qreg val, int chan)
 {
-        struct qreg t = qir_get_temp(c);
-        qir_emit(c, qir_inst(QOP_PACK_8A_F + chan, t, rest, val));
-        return t;
+        qir_emit(c, qir_inst(QOP_PACK_8A_F + chan, dest, val, c->undef));
+        if (dest.file == QFILE_TEMP)
+                c->defs[dest.index] = NULL;
+        return dest;
 }
 
 static inline struct qreg
index f324056258cef2082d5f69e1728b23d04857d6f0..e89db3e4f05346254c3d756254903e3921b09f0b 100644 (file)
@@ -336,28 +336,12 @@ vc4_generate_code(struct vc4_context *vc4, struct vc4_compile *c)
                 case QOP_PACK_8B_F:
                 case QOP_PACK_8C_F:
                 case QOP_PACK_8D_F:
-                        /* If dst doesn't happen to already contain src[0],
-                         * then we have to move it in.
-                         */
-                        if (qinst->src[0].file != QFILE_NULL &&
-                            (src[0].mux != dst.mux || src[0].addr != dst.addr)) {
-                                /* Don't overwrite src1 while setting up
-                                 * the dst!
-                                 */
-                                if (dst.mux == src[1].mux &&
-                                    dst.addr == src[1].addr) {
-                                        queue(c, qpu_m_MOV(qpu_rb(31), src[1]));
-                                        src[1] = qpu_rb(31);
-                                }
-
-                                queue(c, qpu_m_MOV(dst, src[0]));
-                        }
-
-                        queue(c, qpu_m_MOV(dst, src[1]));
-                        *last_inst(c) |= QPU_PM;
-                        *last_inst(c) |= QPU_SET_FIELD(QPU_PACK_MUL_8A +
-                                                       qinst->op - QOP_PACK_8A_F,
-                                                       QPU_PACK);
+                        queue(c,
+                              qpu_m_MOV(dst, src[0]) |
+                              QPU_PM |
+                              QPU_SET_FIELD(QPU_PACK_MUL_8A +
+                                            qinst->op - QOP_PACK_8A_F,
+                                            QPU_PACK));
                         break;
 
                 case QOP_FRAG_X: