From f0e9a632a12798bd727799e396cde665bd960665 Mon Sep 17 00:00:00 2001 From: Rob Clark Date: Mon, 6 Apr 2015 10:48:11 -0400 Subject: [PATCH] freedreno/ir3/cp: support to swap mad src's For a normal MAD (ie. not MADSH), if first source is gpr and second source is const, we can swap the first two sources to avoid needing a mov instruction. This gives back the biggest advantage TGSI f/e had over NIR f/e for common shaders, since TGSI f/e had this logic in the f/e. Note that doing this in copy-prop step has the advantage that it will also work for cases like: MOV TEMP[b], CONST[x] MAD TEMP[d], TEMP[a], TEMP[b], TEMP[c] Signed-off-by: Rob Clark --- .../drivers/freedreno/ir3/instr-a3xx.h | 13 ++++++-- src/gallium/drivers/freedreno/ir3/ir3.h | 4 +-- src/gallium/drivers/freedreno/ir3/ir3_cp.c | 32 ++++++++++++++++--- src/gallium/drivers/freedreno/ir3/ir3_depth.c | 3 +- 4 files changed, 43 insertions(+), 9 deletions(-) diff --git a/src/gallium/drivers/freedreno/ir3/instr-a3xx.h b/src/gallium/drivers/freedreno/ir3/instr-a3xx.h index 4d75d771435..98637c7874d 100644 --- a/src/gallium/drivers/freedreno/ir3/instr-a3xx.h +++ b/src/gallium/drivers/freedreno/ir3/instr-a3xx.h @@ -676,9 +676,7 @@ static inline bool is_mad(opc_t opc) { switch (opc) { case OPC_MAD_U16: - case OPC_MADSH_U16: case OPC_MAD_S16: - case OPC_MADSH_M16: case OPC_MAD_U24: case OPC_MAD_S24: case OPC_MAD_F16: @@ -689,4 +687,15 @@ static inline bool is_mad(opc_t opc) } } +static inline bool is_madsh(opc_t opc) +{ + switch (opc) { + case OPC_MADSH_U16: + case OPC_MADSH_M16: + return true; + default: + return false; + } +} + #endif /* INSTR_A3XX_H_ */ diff --git a/src/gallium/drivers/freedreno/ir3/ir3.h b/src/gallium/drivers/freedreno/ir3/ir3.h index f424f73bec3..1a8beade25b 100644 --- a/src/gallium/drivers/freedreno/ir3/ir3.h +++ b/src/gallium/drivers/freedreno/ir3/ir3.h @@ -540,10 +540,10 @@ static inline bool reg_gpr(struct ir3_register *r) return true; } -/* some cat2 instructions (ie. those which are not float can embed an +/* some cat2 instructions (ie. those which are not float) can embed an * immediate: */ -static inline bool ir3_cat2_immed(opc_t opc) +static inline bool ir3_cat2_int(opc_t opc) { switch (opc) { case OPC_ADD_U: diff --git a/src/gallium/drivers/freedreno/ir3/ir3_cp.c b/src/gallium/drivers/freedreno/ir3/ir3_cp.c index 3eb85f660e2..77bfbc53e2a 100644 --- a/src/gallium/drivers/freedreno/ir3/ir3_cp.c +++ b/src/gallium/drivers/freedreno/ir3/ir3_cp.c @@ -115,7 +115,7 @@ static bool valid_flags(struct ir3_instruction *instr, unsigned n, case 2: valid_flags = ir3_cat2_absneg(instr->opc) | IR3_REG_CONST; - if (ir3_cat2_immed(instr->opc)) + if (ir3_cat2_int(instr->opc)) valid_flags |= IR3_REG_IMMED; if (flags & ~valid_flags) @@ -199,6 +199,15 @@ static void combine_flags(unsigned *dstflags, unsigned srcflags) static struct ir3_instruction * instr_cp(struct ir3_instruction *instr, unsigned *flags); +/* the "plain" MAD's (ie. the ones that don't shift first src prior to + * multiply) can swap their first two srcs if src[0] is !CONST and + * src[1] is CONST: + */ +static bool is_valid_mad(struct ir3_instruction *instr) +{ + return (instr->category == 3) && is_mad(instr->opc); +} + /** * Handle cp for a given src register. This additionally handles * the cases of collapsing immedate/const (which replace the src @@ -255,8 +264,23 @@ reg_cp(struct ir3_instruction *instr, struct ir3_register *reg, unsigned n) combine_flags(&new_flags, reg->flags); - if (!valid_flags(instr, n, new_flags)) - return; + if (!valid_flags(instr, n, new_flags)) { + /* special case for "normal" mad instructions, we can + * try swapping the first two args if that fits better. + */ + if ((n == 1) && is_valid_mad(instr) && + !(instr->regs[0 + 1]->flags & IR3_REG_CONST) && + valid_flags(instr, 0, new_flags)) { + /* swap src[0] and src[1]: */ + struct ir3_register *tmp; + tmp = instr->regs[0 + 1]; + instr->regs[0 + 1] = instr->regs[1 + 1]; + instr->regs[1 + 1] = tmp; + n = 0; + } else { + return; + } + } /* Here we handle the special case of mov from * CONST and/or RELATIV. These need to be handled @@ -305,7 +329,7 @@ reg_cp(struct ir3_instruction *instr, struct ir3_register *reg, unsigned n) debug_assert((instr->category == 6) || ((instr->category == 2) && - ir3_cat2_immed(instr->opc))); + ir3_cat2_int(instr->opc))); if (new_flags & IR3_REG_SABS) iim_val = abs(iim_val); diff --git a/src/gallium/drivers/freedreno/ir3/ir3_depth.c b/src/gallium/drivers/freedreno/ir3/ir3_depth.c index 0cda62bf102..9e1f45dabaf 100644 --- a/src/gallium/drivers/freedreno/ir3/ir3_depth.c +++ b/src/gallium/drivers/freedreno/ir3/ir3_depth.c @@ -74,7 +74,8 @@ int ir3_delayslots(struct ir3_instruction *assigner, if (is_flow(consumer) || is_sfu(consumer) || is_tex(consumer)) { return 6; } else if ((consumer->category == 3) && - is_mad(consumer->opc) && (n == 2)) { + (is_mad(consumer->opc) || is_madsh(consumer->opc)) && + (n == 2)) { /* special case, 3rd src to cat3 not required on first cycle */ return 1; } else { -- 2.30.2