vc4: Don't bother masking out the low 24 bits for integer multiplies
authorEric Anholt <eric@anholt.net>
Mon, 30 Mar 2015 04:26:16 +0000 (21:26 -0700)
committerEric Anholt <eric@anholt.net>
Mon, 30 Mar 2015 16:23:39 +0000 (09:23 -0700)
The hardware just uses the low 24 lines, saving us an AND to drop the high
bits.

total uniforms in shared programs: 13433 -> 13423 (-0.07%)
uniforms in affected programs:     356 -> 346 (-2.81%)
total instructions in shared programs: 40003 -> 39989 (-0.03%)
instructions in affected programs:     910 -> 896 (-1.54%)

src/gallium/drivers/vc4/vc4_program.c

index 49b94666b886885dda1f7776fd9319d619f4d36f..9e145e54ccd8ac8bfc117332a20f11f62c849f9f 100644 (file)
@@ -366,18 +366,14 @@ tgsi_to_qir_umul(struct vc4_compile *c,
                  struct tgsi_full_instruction *tgsi_inst,
                  enum qop op, struct qreg *src, int i)
 {
-        struct qreg src0_hi = qir_SHR(c, src[0 * 4 + i],
-                                      qir_uniform_ui(c, 24));
-        struct qreg src0_lo = qir_AND(c, src[0 * 4 + i],
-                                      qir_uniform_ui(c, 0xffffff));
-        struct qreg src1_hi = qir_SHR(c, src[1 * 4 + i],
-                                      qir_uniform_ui(c, 24));
-        struct qreg src1_lo = qir_AND(c, src[1 * 4 + i],
-                                      qir_uniform_ui(c, 0xffffff));
-
-        struct qreg hilo = qir_MUL24(c, src0_hi, src1_lo);
-        struct qreg lohi = qir_MUL24(c, src0_lo, src1_hi);
-        struct qreg lolo = qir_MUL24(c, src0_lo, src1_lo);
+        struct qreg src0 = src[0 * 4 + i];
+        struct qreg src0_hi = qir_SHR(c, src0, qir_uniform_ui(c, 24));
+        struct qreg src1 = src[1 * 4 + i];
+        struct qreg src1_hi = qir_SHR(c, src1, qir_uniform_ui(c, 24));
+
+        struct qreg hilo = qir_MUL24(c, src0_hi, src1);
+        struct qreg lohi = qir_MUL24(c, src0, src1_hi);
+        struct qreg lolo = qir_MUL24(c, src0, src1);
 
         return qir_ADD(c, lolo, qir_SHL(c,
                                         qir_ADD(c, hilo, lohi),