vc4: Take advantage of the 8888 pack function in pack_unorm_4x8.
authorEric Anholt <eric@anholt.net>
Sun, 25 Oct 2015 00:04:49 +0000 (17:04 -0700)
committerEric Anholt <eric@anholt.net>
Sun, 25 Oct 2015 00:55:22 +0000 (17:55 -0700)
One instruction instead of four, and it turns out you do this a lot for
the Over operator.

total uniforms in shared programs: 32168 -> 32087 (-0.25%)
uniforms in affected programs:     318 -> 237 (-25.47%)
total instructions in shared programs: 89830 -> 89472 (-0.40%)
instructions in affected programs:     6434 -> 6076 (-5.56%)

src/gallium/drivers/vc4/vc4_program.c

index 5d7564b46d58091f86e326ae4dcb3d92a4cc6aa9..a48dad804e27477b88301e4d65176301cdd6b134 100644 (file)
@@ -738,6 +738,20 @@ ntq_emit_pack_unorm_4x8(struct vc4_compile *c, nir_alu_instr *instr)
                 vec4 = nir_instr_as_alu(instr->src[0].src.ssa->parent_instr);
         }
 
+        /* If the pack is replicating the same channel 4 times, use the 8888
+         * pack flag.  This is common for blending using the alpha
+         * channel.
+         */
+        if (instr->src[0].swizzle[0] == instr->src[0].swizzle[1] &&
+            instr->src[0].swizzle[0] == instr->src[0].swizzle[2] &&
+            instr->src[0].swizzle[0] == instr->src[0].swizzle[3]) {
+                struct qreg *dest = ntq_get_dest(c, &instr->dest.dest);
+                *dest = qir_PACK_8888_F(c,
+                                        ntq_get_src(c, instr->src[0].src,
+                                                    instr->src[0].swizzle[0]));
+                return;
+        }
+
         for (int i = 0; i < 4; i++) {
                 int swiz = instr->src[0].swizzle[i];
                 struct qreg src;