vc4: Make integer multiply use 24 bits for the low parts.
authorEric Anholt <eric@anholt.net>
Mon, 30 Mar 2015 04:21:10 +0000 (21:21 -0700)
committerEric Anholt <eric@anholt.net>
Mon, 30 Mar 2015 16:23:39 +0000 (09:23 -0700)
The hardware uses the low 24 bits in integer multiplies, so we can have
fewer high bits (and so probably drop them more frequently).

src/gallium/drivers/vc4/vc4_program.c

index 56a3a96c1b5c0472c50ed36b6cfa9a698dfea840..49b94666b886885dda1f7776fd9319d619f4d36f 100644 (file)
@@ -367,13 +367,13 @@ tgsi_to_qir_umul(struct vc4_compile *c,
                  enum qop op, struct qreg *src, int i)
 {
         struct qreg src0_hi = qir_SHR(c, src[0 * 4 + i],
-                                      qir_uniform_ui(c, 16));
+                                      qir_uniform_ui(c, 24));
         struct qreg src0_lo = qir_AND(c, src[0 * 4 + i],
-                                      qir_uniform_ui(c, 0xffff));
+                                      qir_uniform_ui(c, 0xffffff));
         struct qreg src1_hi = qir_SHR(c, src[1 * 4 + i],
-                                      qir_uniform_ui(c, 16));
+                                      qir_uniform_ui(c, 24));
         struct qreg src1_lo = qir_AND(c, src[1 * 4 + i],
-                                      qir_uniform_ui(c, 0xffff));
+                                      qir_uniform_ui(c, 0xffffff));
 
         struct qreg hilo = qir_MUL24(c, src0_hi, src1_lo);
         struct qreg lohi = qir_MUL24(c, src0_lo, src1_hi);
@@ -381,7 +381,7 @@ tgsi_to_qir_umul(struct vc4_compile *c,
 
         return qir_ADD(c, lolo, qir_SHL(c,
                                         qir_ADD(c, hilo, lohi),
-                                        qir_uniform_ui(c, 16)));
+                                        qir_uniform_ui(c, 24)));
 }
 
 static struct qreg