From c1299615fbe25440f82d6b69667967db294e6e31 Mon Sep 17 00:00:00 2001 From: Eric Anholt Date: Wed, 18 Jan 2017 14:23:14 +1100 Subject: [PATCH] vc4: Avoid an extra temporary and mov in ffloor/ffract/fceil. shader-db results: total instructions in shared programs: 92611 -> 91764 (-0.91%) instructions in affected programs: 27417 -> 26570 (-3.09%) The star is one shader in glmark2's terrain (drops 16% of its instructions), but there are also wins in mupen64plus and glb2.7. --- src/gallium/drivers/vc4/vc4_program.c | 31 ++++++++++++++++----------- 1 file changed, 18 insertions(+), 13 deletions(-) diff --git a/src/gallium/drivers/vc4/vc4_program.c b/src/gallium/drivers/vc4/vc4_program.c index c2d67277ef8..a7cc6687962 100644 --- a/src/gallium/drivers/vc4/vc4_program.c +++ b/src/gallium/drivers/vc4/vc4_program.c @@ -599,9 +599,11 @@ ntq_ffract(struct vc4_compile *c, struct qreg src) struct qreg trunc = qir_ITOF(c, qir_FTOI(c, src)); struct qreg diff = qir_FSUB(c, src, trunc); qir_SF(c, diff); - return qir_MOV(c, qir_SEL(c, QPU_COND_NS, - qir_FADD(c, diff, qir_uniform_f(c, 1.0)), - diff)); + + qir_FADD_dest(c, diff, + diff, qir_uniform_f(c, 1.0))->cond = QPU_COND_NS; + + return qir_MOV(c, diff); } /** @@ -611,16 +613,18 @@ ntq_ffract(struct vc4_compile *c, struct qreg src) static struct qreg ntq_ffloor(struct vc4_compile *c, struct qreg src) { - struct qreg trunc = qir_ITOF(c, qir_FTOI(c, src)); + struct qreg result = qir_ITOF(c, qir_FTOI(c, src)); /* This will be < 0 if we truncated and the truncation was of a value * that was < 0 in the first place. */ - qir_SF(c, qir_FSUB(c, src, trunc)); + qir_SF(c, qir_FSUB(c, src, result)); - return qir_MOV(c, qir_SEL(c, QPU_COND_NS, - qir_FSUB(c, trunc, qir_uniform_f(c, 1.0)), - trunc)); + struct qinst *sub = qir_FSUB_dest(c, result, + result, qir_uniform_f(c, 1.0)); + sub->cond = QPU_COND_NS; + + return qir_MOV(c, result); } /** @@ -630,16 +634,17 @@ ntq_ffloor(struct vc4_compile *c, struct qreg src) static struct qreg ntq_fceil(struct vc4_compile *c, struct qreg src) { - struct qreg trunc = qir_ITOF(c, qir_FTOI(c, src)); + struct qreg result = qir_ITOF(c, qir_FTOI(c, src)); /* This will be < 0 if we truncated and the truncation was of a value * that was > 0 in the first place. */ - qir_SF(c, qir_FSUB(c, trunc, src)); + qir_SF(c, qir_FSUB(c, result, src)); + + qir_FADD_dest(c, result, + result, qir_uniform_f(c, 1.0))->cond = QPU_COND_NS; - return qir_MOV(c, qir_SEL(c, QPU_COND_NS, - qir_FADD(c, trunc, qir_uniform_f(c, 1.0)), - trunc)); + return qir_MOV(c, result); } static struct qreg -- 2.30.2