vc4: Switch QPU_PACK_SCALED to be two non-SSA instructions.
authorEric Anholt <eric@anholt.net>
Thu, 6 Aug 2015 03:54:02 +0000 (20:54 -0700)
committerEric Anholt <eric@anholt.net>
Fri, 21 Aug 2015 06:42:45 +0000 (23:42 -0700)
total instructions in shared programs: 98159 -> 98136 (-0.02%)
instructions in affected programs:     12279 -> 12256 (-0.19%)

src/gallium/drivers/vc4/vc4_program.c
src/gallium/drivers/vc4/vc4_qir.c
src/gallium/drivers/vc4/vc4_qir.h
src/gallium/drivers/vc4/vc4_qpu_emit.c
src/gallium/drivers/vc4/vc4_register_allocate.c

index 303132f3a3b7d6a62a1d02fb881fe3da89e073e3..ff41779e6c172391f560dec8e9bc0d9b2ea85582 100644 (file)
@@ -1142,7 +1142,10 @@ emit_scaled_viewport_write(struct vc4_compile *c, struct qreg rcp_w)
                                               rcp_w));
         }
 
-        qir_VPM_WRITE(c, qir_PACK_SCALED(c, xyi[0], xyi[1]));
+        struct qreg packed = qir_get_temp(c);
+        qir_PACK_16A_I(c, packed, xyi[0]);
+        qir_PACK_16B_I(c, packed, xyi[1]);
+        qir_VPM_WRITE(c, packed);
 }
 
 static void
index 3a37451a3ca252ae08800d83f519bbdec1176244..a7b4bd637066a528cb3e862d1ebedf2c6b369850 100644 (file)
@@ -76,7 +76,8 @@ static const struct qir_op_info qir_op_info[] = {
         [QOP_PACK_8B_F] = { "pack_8b_f", 1, 1 },
         [QOP_PACK_8C_F] = { "pack_8c_f", 1, 1 },
         [QOP_PACK_8D_F] = { "pack_8d_f", 1, 1 },
-        [QOP_PACK_SCALED] = { "pack_scaled", 1, 2, false, true },
+        [QOP_PACK_16A_I] = { "pack_16a_i", 1, 1 },
+        [QOP_PACK_16B_I] = { "pack_16b_i", 1, 1 },
         [QOP_TLB_DISCARD_SETUP] = { "discard", 0, 1, true },
         [QOP_TLB_STENCIL_SETUP] = { "tlb_stencil_setup", 0, 1, true },
         [QOP_TLB_Z_WRITE] = { "tlb_z", 0, 1, true },
index ca93ab8641fc82deeb3377f53827cc252622b9d1..5e23420f8e502ddc46f2509d0c06d6910733f559 100644 (file)
@@ -104,12 +104,13 @@ enum qop {
         QOP_LOG2,
         QOP_VW_SETUP,
         QOP_VR_SETUP,
-        QOP_PACK_SCALED,
         QOP_PACK_8888_F,
         QOP_PACK_8A_F,
         QOP_PACK_8B_F,
         QOP_PACK_8C_F,
         QOP_PACK_8D_F,
+        QOP_PACK_16A_I,
+        QOP_PACK_16B_I,
         QOP_TLB_DISCARD_SETUP,
         QOP_TLB_STENCIL_SETUP,
         QOP_TLB_Z_WRITE,
@@ -580,12 +581,13 @@ QIR_ALU1(RCP)
 QIR_ALU1(RSQ)
 QIR_ALU1(EXP2)
 QIR_ALU1(LOG2)
-QIR_ALU2(PACK_SCALED)
 QIR_ALU1(PACK_8888_F)
 QIR_PACK(PACK_8A_F)
 QIR_PACK(PACK_8B_F)
 QIR_PACK(PACK_8C_F)
 QIR_PACK(PACK_8D_F)
+QIR_PACK(PACK_16A_I)
+QIR_PACK(PACK_16B_I)
 QIR_ALU1(VARY_ADD_C)
 QIR_NODST_2(TEX_S)
 QIR_NODST_2(TEX_T)
index e89db3e4f05346254c3d756254903e3921b09f0b..573a557f63e5d67e4dfe617fe5db943edd3cbfda 100644 (file)
@@ -403,23 +403,14 @@ vc4_generate_code(struct vc4_context *vc4, struct vc4_compile *c)
                         queue(c, qpu_a_FADD(dst, src[0], qpu_r5()));
                         break;
 
-                case QOP_PACK_SCALED: {
-                        uint64_t a = (qpu_a_MOV(dst, src[0]) |
-                                      QPU_SET_FIELD(QPU_PACK_A_16A,
-                                                    QPU_PACK));
-                        uint64_t b = (qpu_a_MOV(dst, src[1]) |
-                                      QPU_SET_FIELD(QPU_PACK_A_16B,
-                                                    QPU_PACK));
-
-                        if (dst.mux == src[1].mux && dst.addr == src[1].addr) {
-                                queue(c, b);
-                                queue(c, a);
-                        } else {
-                                queue(c, a);
-                                queue(c, b);
-                        }
+                case QOP_PACK_16A_I:
+                case QOP_PACK_16B_I:
+                        queue(c,
+                              qpu_a_MOV(dst, src[0]) |
+                              QPU_SET_FIELD(qinst->op == QOP_PACK_16A_I ?
+                                            QPU_PACK_A_16A : QPU_PACK_A_16B,
+                                            QPU_PACK));
                         break;
-                }
 
                 case QOP_TEX_S:
                 case QOP_TEX_T:
index fa67a3a5911e1e82daeaaf6cbcaf80f5e157fc27..58d812bce8a58bfedb84a90cce31d91a7daca1cc 100644 (file)
@@ -268,7 +268,8 @@ vc4_register_allocate(struct vc4_context *vc4, struct vc4_compile *c)
                                         AB_INDEX + QPU_R_FRAG_PAYLOAD_ZW * 2);
                         break;
 
-                case QOP_PACK_SCALED:
+                case QOP_PACK_16A_I:
+                case QOP_PACK_16B_I:
                         /* The pack flags require an A-file dst register. */
                         class_bits[inst->dst.index] &= CLASS_BIT_A;
                         break;