From c519c4d85e7b4f9cad4e51dc08e8ae99bf3c810d Mon Sep 17 00:00:00 2001 From: Eric Anholt Date: Sun, 29 Mar 2015 21:26:16 -0700 Subject: [PATCH] vc4: Don't bother masking out the low 24 bits for integer multiplies The hardware just uses the low 24 lines, saving us an AND to drop the high bits. total uniforms in shared programs: 13433 -> 13423 (-0.07%) uniforms in affected programs: 356 -> 346 (-2.81%) total instructions in shared programs: 40003 -> 39989 (-0.03%) instructions in affected programs: 910 -> 896 (-1.54%) --- src/gallium/drivers/vc4/vc4_program.c | 20 ++++++++------------ 1 file changed, 8 insertions(+), 12 deletions(-) diff --git a/src/gallium/drivers/vc4/vc4_program.c b/src/gallium/drivers/vc4/vc4_program.c index 49b94666b88..9e145e54ccd 100644 --- a/src/gallium/drivers/vc4/vc4_program.c +++ b/src/gallium/drivers/vc4/vc4_program.c @@ -366,18 +366,14 @@ tgsi_to_qir_umul(struct vc4_compile *c, struct tgsi_full_instruction *tgsi_inst, enum qop op, struct qreg *src, int i) { - struct qreg src0_hi = qir_SHR(c, src[0 * 4 + i], - qir_uniform_ui(c, 24)); - struct qreg src0_lo = qir_AND(c, src[0 * 4 + i], - qir_uniform_ui(c, 0xffffff)); - struct qreg src1_hi = qir_SHR(c, src[1 * 4 + i], - qir_uniform_ui(c, 24)); - struct qreg src1_lo = qir_AND(c, src[1 * 4 + i], - qir_uniform_ui(c, 0xffffff)); - - struct qreg hilo = qir_MUL24(c, src0_hi, src1_lo); - struct qreg lohi = qir_MUL24(c, src0_lo, src1_hi); - struct qreg lolo = qir_MUL24(c, src0_lo, src1_lo); + struct qreg src0 = src[0 * 4 + i]; + struct qreg src0_hi = qir_SHR(c, src0, qir_uniform_ui(c, 24)); + struct qreg src1 = src[1 * 4 + i]; + struct qreg src1_hi = qir_SHR(c, src1, qir_uniform_ui(c, 24)); + + struct qreg hilo = qir_MUL24(c, src0_hi, src1); + struct qreg lohi = qir_MUL24(c, src0, src1_hi); + struct qreg lolo = qir_MUL24(c, src0, src1); return qir_ADD(c, lolo, qir_SHL(c, qir_ADD(c, hilo, lohi), -- 2.30.2