vc4: Fold the 16-bit integer pack into the instructions generating it.
authorEric Anholt <eric@anholt.net>
Fri, 21 Aug 2015 07:08:13 +0000 (00:08 -0700)
committerEric Anholt <eric@anholt.net>
Fri, 21 Aug 2015 20:29:26 +0000 (13:29 -0700)
total instructions in shared programs: 97580 -> 96798 (-0.80%)
instructions in affected programs:     52826 -> 52044 (-1.48%)

src/gallium/drivers/vc4/vc4_program.c
src/gallium/drivers/vc4/vc4_qir.c
src/gallium/drivers/vc4/vc4_qir.h
src/gallium/drivers/vc4/vc4_qpu_emit.c
src/gallium/drivers/vc4/vc4_register_allocate.c

index 6bf4c9eab9bca82f2bdff002c5654345b6868716..e002983fdbb4446922b3143007f9e5762d51c16b 100644 (file)
@@ -1186,22 +1186,23 @@ emit_frag_end(struct vc4_compile *c)
 static void
 emit_scaled_viewport_write(struct vc4_compile *c, struct qreg rcp_w)
 {
-        struct qreg xyi[2];
+        struct qreg packed = qir_get_temp(c);
 
         for (int i = 0; i < 2; i++) {
                 struct qreg scale =
                         qir_uniform(c, QUNIFORM_VIEWPORT_X_SCALE + i, 0);
 
-                xyi[i] = qir_FTOI(c, qir_FMUL(c,
-                                              qir_FMUL(c,
-                                                       c->outputs[c->output_position_index + i],
-                                                       scale),
-                                              rcp_w));
+                struct qreg packed_chan = packed;
+                packed_chan.pack = QPU_PACK_A_16A + i;
+
+                qir_FTOI_dest(c, packed_chan,
+                              qir_FMUL(c,
+                                       qir_FMUL(c,
+                                                c->outputs[c->output_position_index + i],
+                                                scale),
+                                       rcp_w));
         }
 
-        struct qreg packed = qir_get_temp(c);
-        qir_PACK_16A_I(c, packed, xyi[0]);
-        qir_PACK_16B_I(c, packed, xyi[1]);
         qir_VPM_WRITE(c, packed);
 }
 
index e5efb7cba7e9013bdc6e6b50696437e4007f2934..9d930715f9bff0fe5b20ca08151e052918bb4ba5 100644 (file)
@@ -76,8 +76,6 @@ static const struct qir_op_info qir_op_info[] = {
         [QOP_PACK_8B_F] = { "pack_8b_f", 1, 1 },
         [QOP_PACK_8C_F] = { "pack_8c_f", 1, 1 },
         [QOP_PACK_8D_F] = { "pack_8d_f", 1, 1 },
-        [QOP_PACK_16A_I] = { "pack_16a_i", 1, 1 },
-        [QOP_PACK_16B_I] = { "pack_16b_i", 1, 1 },
         [QOP_TLB_DISCARD_SETUP] = { "discard", 0, 1, true },
         [QOP_TLB_STENCIL_SETUP] = { "tlb_stencil_setup", 0, 1, true },
         [QOP_TLB_Z_WRITE] = { "tlb_z", 0, 1, true },
index cbeff43d5be89285817eac736c7ae9210d9b6b53..a2b21fa17bb055fb8a8d6490d52f0344818a118b 100644 (file)
@@ -110,8 +110,6 @@ enum qop {
         QOP_PACK_8B_F,
         QOP_PACK_8C_F,
         QOP_PACK_8D_F,
-        QOP_PACK_16A_I,
-        QOP_PACK_16B_I,
         QOP_TLB_DISCARD_SETUP,
         QOP_TLB_STENCIL_SETUP,
         QOP_TLB_Z_WRITE,
@@ -604,8 +602,6 @@ QIR_PACK(PACK_8A_F)
 QIR_PACK(PACK_8B_F)
 QIR_PACK(PACK_8C_F)
 QIR_PACK(PACK_8D_F)
-QIR_PACK(PACK_16A_I)
-QIR_PACK(PACK_16B_I)
 QIR_ALU1(VARY_ADD_C)
 QIR_NODST_2(TEX_S)
 QIR_NODST_2(TEX_T)
index bf614a2c1fd776c2c98982bb7eedf3c981e6de2a..adf3a8b3658afcc71c14e10f91f6f49216f740a0 100644 (file)
@@ -402,15 +402,6 @@ vc4_generate_code(struct vc4_context *vc4, struct vc4_compile *c)
                         queue(c, qpu_a_FADD(dst, src[0], qpu_r5()));
                         break;
 
-                case QOP_PACK_16A_I:
-                case QOP_PACK_16B_I:
-                        queue(c,
-                              qpu_a_MOV(dst, src[0]) |
-                              QPU_SET_FIELD(qinst->op == QOP_PACK_16A_I ?
-                                            QPU_PACK_A_16A : QPU_PACK_A_16B,
-                                            QPU_PACK));
-                        break;
-
                 case QOP_TEX_S:
                 case QOP_TEX_T:
                 case QOP_TEX_R:
@@ -516,6 +507,11 @@ vc4_generate_code(struct vc4_context *vc4, struct vc4_compile *c)
                                 queue(c, qpu_a_alu2(translate[qinst->op].op,
                                                     dst,
                                                     src[0], src[1]));
+                                if (qinst->dst.pack) {
+                                        assert(dst.mux == QPU_MUX_A);
+                                        *last_inst(c) |= QPU_SET_FIELD(qinst->dst.pack,
+                                                                       QPU_PACK);
+                                }
                         }
 
                         break;
index 93013147d49080f9009ff2d438020bbbbdf707b2..2ea88500227696443bfc35d742404422c7764cdd 100644 (file)
@@ -268,16 +268,17 @@ vc4_register_allocate(struct vc4_context *vc4, struct vc4_compile *c)
                                         AB_INDEX + QPU_R_FRAG_PAYLOAD_ZW * 2);
                         break;
 
-                case QOP_PACK_16A_I:
-                case QOP_PACK_16B_I:
-                        /* The pack flags require an A-file dst register. */
-                        class_bits[inst->dst.index] &= CLASS_BIT_A;
-                        break;
-
                 default:
                         break;
                 }
 
+                if (inst->dst.pack && !qir_is_mul(inst)) {
+                        /* The non-MUL pack flags require an A-file dst
+                         * register.
+                         */
+                        class_bits[inst->dst.index] &= CLASS_BIT_A;
+                }
+
                 if (qir_src_needs_a_file(inst)) {
                         switch (inst->op) {
                         case QOP_UNPACK_8A_F: