X-Git-Url: https://git.libre-soc.org/?a=blobdiff_plain;f=src%2Fgallium%2Fdrivers%2Ffreedreno%2Fir3%2Fir3_cp.c;h=8c907eb5a534d9be15b7d6849d6793b1c78d1741;hb=33f5f63b8fc157fa2fd2a142783f31db987c9d55;hp=f4c825b2ab602b6a3fe93ac4e5da5fea200c6bf7;hpb=6b9f5cd5f7b25e9e03104fe279df74817f69fe87;p=mesa.git diff --git a/src/gallium/drivers/freedreno/ir3/ir3_cp.c b/src/gallium/drivers/freedreno/ir3/ir3_cp.c index f4c825b2ab6..8c907eb5a53 100644 --- a/src/gallium/drivers/freedreno/ir3/ir3_cp.c +++ b/src/gallium/drivers/freedreno/ir3/ir3_cp.c @@ -29,11 +29,18 @@ #include "freedreno_util.h" #include "ir3.h" +#include "ir3_shader.h" /* * Copy Propagate: */ +struct ir3_cp_ctx { + struct ir3 *shader; + struct ir3_shader_variant *so; + unsigned immediate_idx; +}; + /* is it a type preserving mov, with ok flags? */ static bool is_eligible_mov(struct ir3_instruction *instr, bool allow_flags) { @@ -41,25 +48,31 @@ static bool is_eligible_mov(struct ir3_instruction *instr, bool allow_flags) struct ir3_register *dst = instr->regs[0]; struct ir3_register *src = instr->regs[1]; struct ir3_instruction *src_instr = ssa(src); + + /* only if mov src is SSA (not const/immed): */ + if (!src_instr) + return false; + + /* no indirect: */ if (dst->flags & IR3_REG_RELATIV) return false; if (src->flags & IR3_REG_RELATIV) return false; + if (!allow_flags) if (src->flags & (IR3_REG_FABS | IR3_REG_FNEG | IR3_REG_SABS | IR3_REG_SNEG | IR3_REG_BNOT)) return false; - if (!src_instr) - return false; + /* TODO: remove this hack: */ - if (is_meta(src_instr) && (src_instr->opc == OPC_META_FO)) + if (src_instr->opc == OPC_META_FO) return false; /* TODO: we currently don't handle left/right neighbors * very well when inserting parallel-copies into phi.. * to avoid problems don't eliminate a mov coming out * of phi.. */ - if (is_meta(src_instr) && (src_instr->opc == OPC_META_PHI)) + if (src_instr->opc == OPC_META_PHI) return false; return true; } @@ -82,20 +95,25 @@ static bool valid_flags(struct ir3_instruction *instr, unsigned n, unsigned valid_flags; flags = cp_flags(flags); - /* clear flags that are 'ok' */ - switch (instr->category) { + /* If destination is indirect, then source cannot be.. at least + * I don't think so.. + */ + if ((instr->regs[0]->flags & IR3_REG_RELATIV) && + (flags & IR3_REG_RELATIV)) + return false; + + /* TODO it seems to *mostly* work to cp RELATIV, except we get some + * intermittent piglit variable-indexing fails. Newer blob driver + * doesn't seem to cp these. Possibly this is hw workaround? Not + * sure, but until that is understood better, lets just switch off + * cp for indirect src's: + */ + if (flags & IR3_REG_RELATIV) + return false; + + switch (opc_cat(instr->opc)) { case 1: - valid_flags = IR3_REG_IMMED | IR3_REG_RELATIV; - if (flags & ~valid_flags) - return false; - break; - case 5: - /* no flags allowed */ - if (flags) - return false; - break; - case 6: - valid_flags = IR3_REG_IMMED; + valid_flags = IR3_REG_IMMED | IR3_REG_CONST | IR3_REG_RELATIV; if (flags & ~valid_flags) return false; break; @@ -150,11 +168,40 @@ static bool valid_flags(struct ir3_instruction *instr, unsigned n, case 4: /* seems like blob compiler avoids const as src.. */ /* TODO double check if this is still the case on a4xx */ - if (flags & IR3_REG_CONST) + if (flags & (IR3_REG_CONST | IR3_REG_IMMED)) return false; if (flags & (IR3_REG_SABS | IR3_REG_SNEG)) return false; break; + case 5: + /* no flags allowed */ + if (flags) + return false; + break; + case 6: + valid_flags = IR3_REG_IMMED; + if (flags & ~valid_flags) + return false; + + if (flags & IR3_REG_IMMED) { + /* doesn't seem like we can have immediate src for store + * instructions: + * + * TODO this restriction could also apply to load instructions, + * but for load instructions this arg is the address (and not + * really sure any good way to test a hard-coded immed addr src) + */ + if (is_store(instr) && (n == 1)) + return false; + + /* disallow CP into anything but the SSBO slot argument for + * atomics: + */ + if (is_atomic(instr->opc) && (n != 0)) + return false; + } + + break; } return true; @@ -163,8 +210,10 @@ static bool valid_flags(struct ir3_instruction *instr, unsigned n, /* propagate register flags from src to dst.. negates need special * handling to cancel each other out. */ -static void combine_flags(unsigned *dstflags, unsigned srcflags) +static void combine_flags(unsigned *dstflags, struct ir3_instruction *src) { + unsigned srcflags = src->regs[1]->flags; + /* if what we are combining into already has (abs) flags, * we can drop (neg) from src: */ @@ -183,17 +232,78 @@ static void combine_flags(unsigned *dstflags, unsigned srcflags) *dstflags ^= IR3_REG_SNEG; if (srcflags & IR3_REG_BNOT) *dstflags ^= IR3_REG_BNOT; -} -static struct ir3_instruction * instr_cp(struct ir3_instruction *instr, unsigned *flags); + *dstflags &= ~IR3_REG_SSA; + *dstflags |= srcflags & IR3_REG_SSA; + *dstflags |= srcflags & IR3_REG_CONST; + *dstflags |= srcflags & IR3_REG_IMMED; + *dstflags |= srcflags & IR3_REG_RELATIV; + *dstflags |= srcflags & IR3_REG_ARRAY; + + /* if src of the src is boolean we can drop the (abs) since we know + * the source value is already a postitive integer. This cleans + * up the absnegs that get inserted when converting between nir and + * native boolean (see ir3_b2n/n2b) + */ + struct ir3_instruction *srcsrc = ssa(src->regs[1]); + if (srcsrc && is_bool(srcsrc)) + *dstflags &= ~IR3_REG_SABS; +} -/* the "plain" MAD's (ie. the ones that don't shift first src prior to - * multiply) can swap their first two srcs if src[0] is !CONST and - * src[1] is CONST: - */ -static bool is_valid_mad(struct ir3_instruction *instr) +static struct ir3_register * +lower_immed(struct ir3_cp_ctx *ctx, struct ir3_register *reg, unsigned new_flags) { - return (instr->category == 3) && is_mad(instr->opc); + unsigned swiz, idx, i; + + reg = ir3_reg_clone(ctx->shader, reg); + + /* in some cases, there are restrictions on (abs)/(neg) plus const.. + * so just evaluate those and clear the flags: + */ + if (new_flags & IR3_REG_SABS) { + reg->iim_val = abs(reg->iim_val); + new_flags &= ~IR3_REG_SABS; + } + + if (new_flags & IR3_REG_FABS) { + reg->fim_val = fabs(reg->fim_val); + new_flags &= ~IR3_REG_FABS; + } + + if (new_flags & IR3_REG_SNEG) { + reg->iim_val = -reg->iim_val; + new_flags &= ~IR3_REG_SNEG; + } + + if (new_flags & IR3_REG_FNEG) { + reg->fim_val = -reg->fim_val; + new_flags &= ~IR3_REG_FNEG; + } + + for (i = 0; i < ctx->immediate_idx; i++) { + swiz = i % 4; + idx = i / 4; + + if (ctx->so->immediates[idx].val[swiz] == reg->uim_val) { + break; + } + } + + if (i == ctx->immediate_idx) { + /* need to generate a new immediate: */ + swiz = i % 4; + idx = i / 4; + ctx->so->immediates[idx].val[swiz] = reg->uim_val; + ctx->so->immediates_count = idx + 1; + ctx->immediate_idx++; + } + + new_flags &= ~IR3_REG_IMMED; + new_flags |= IR3_REG_CONST; + reg->flags = new_flags; + reg->num = i + (4 * ctx->so->constbase.immediate); + + return reg; } /** @@ -204,60 +314,61 @@ static bool is_valid_mad(struct ir3_instruction *instr) * instruction). */ static void -reg_cp(struct ir3_instruction *instr, struct ir3_register *reg, unsigned n) +reg_cp(struct ir3_cp_ctx *ctx, struct ir3_instruction *instr, + struct ir3_register *reg, unsigned n) { - unsigned src_flags = 0, new_flags; - struct ir3_instruction *src_instr; + struct ir3_instruction *src = ssa(reg); - if (is_meta(instr)) { - /* meta instructions cannot fold up register - * flags.. they are usually src for texture - * fetch, etc, where we cannot specify abs/neg - */ - reg->instr = instr_cp(reg->instr, NULL); + /* don't propagate copies into a PHI, since we don't know if the + * src block executed: + */ + if (instr->opc == OPC_META_PHI) return; - } - src_instr = instr_cp(reg->instr, &src_flags); + if (is_eligible_mov(src, true)) { + /* simple case, no immed/const/relativ, only mov's w/ ssa src: */ + struct ir3_register *src_reg = src->regs[1]; + unsigned new_flags = reg->flags; - new_flags = reg->flags; - combine_flags(&new_flags, src_flags); + combine_flags(&new_flags, src); - reg->flags = new_flags; - reg->instr = src_instr; - - if (!valid_flags(instr, n, reg->flags)) { - /* insert an absneg.f */ - if (reg->flags & (IR3_REG_SNEG | IR3_REG_SABS | IR3_REG_BNOT)) { - debug_assert(!(reg->flags & (IR3_REG_FNEG | IR3_REG_FABS))); - reg->instr = ir3_ABSNEG_S(instr->block, - reg->instr, cp_flags(src_flags)); - } else { - debug_assert(!(reg->flags & (IR3_REG_SNEG | IR3_REG_SABS | IR3_REG_BNOT))); - reg->instr = ir3_ABSNEG_F(instr->block, - reg->instr, cp_flags(src_flags)); + if (valid_flags(instr, n, new_flags)) { + if (new_flags & IR3_REG_ARRAY) { + debug_assert(!(reg->flags & IR3_REG_ARRAY)); + reg->array = src_reg->array; + } + reg->flags = new_flags; + reg->instr = ssa(src_reg); } - reg->flags &= ~cp_flags(src_flags); - debug_assert(valid_flags(instr, n, reg->flags)); - /* send it through instr_cp() again since - * the absneg src might be a mov from const - * that could be cleaned up: - */ - reg->instr = instr_cp(reg->instr, NULL); - return; - } - if (is_same_type_mov(reg->instr)) { - struct ir3_register *src_reg = reg->instr->regs[1]; - unsigned new_flags = src_reg->flags; + src = ssa(reg); /* could be null for IR3_REG_ARRAY case */ + if (!src) + return; + } else if (is_same_type_mov(src) && + /* cannot collapse const/immed/etc into meta instrs: */ + !is_meta(instr)) { + /* immed/const/etc cases, which require some special handling: */ + struct ir3_register *src_reg = src->regs[1]; + unsigned new_flags = reg->flags; - combine_flags(&new_flags, reg->flags); + combine_flags(&new_flags, src); if (!valid_flags(instr, n, new_flags)) { + /* See if lowering an immediate to const would help. */ + if (valid_flags(instr, n, (new_flags & ~IR3_REG_IMMED) | IR3_REG_CONST)) { + debug_assert(new_flags & IR3_REG_IMMED); + instr->regs[n + 1] = lower_immed(ctx, src_reg, new_flags); + return; + } + /* special case for "normal" mad instructions, we can * try swapping the first two args if that fits better. + * + * the "plain" MAD's (ie. the ones that don't shift first + * src prior to multiply) can swap their first two srcs if + * src[0] is !CONST and src[1] is CONST: */ - if ((n == 1) && is_valid_mad(instr) && + if ((n == 1) && is_mad(instr->opc) && !(instr->regs[0 + 1]->flags & (IR3_REG_CONST | IR3_REG_RELATIV)) && valid_flags(instr, 0, new_flags)) { /* swap src[0] and src[1]: */ @@ -287,6 +398,16 @@ reg_cp(struct ir3_instruction *instr, struct ir3_register *reg, unsigned n) conflicts(instr->address, reg->instr->address)) return; + /* This seems to be a hw bug, or something where the timings + * just somehow don't work out. This restriction may only + * apply if the first src is also CONST. + */ + if ((opc_cat(instr->opc) == 3) && (n == 2) && + (src_reg->flags & IR3_REG_RELATIV) && + (src_reg->array.offset == 0)) + return; + + src_reg = ir3_reg_clone(instr->block->shader, src_reg); src_reg->flags = new_flags; instr->regs[n+1] = src_reg; @@ -298,6 +419,7 @@ reg_cp(struct ir3_instruction *instr, struct ir3_register *reg, unsigned n) if ((src_reg->flags & IR3_REG_RELATIV) && !conflicts(instr->address, reg->instr->address)) { + src_reg = ir3_reg_clone(instr->block->shader, src_reg); src_reg->flags = new_flags; instr->regs[n+1] = src_reg; ir3_instr_set_address(instr, reg->instr->address); @@ -316,10 +438,9 @@ reg_cp(struct ir3_instruction *instr, struct ir3_register *reg, unsigned n) if (src_reg->flags & IR3_REG_IMMED) { int32_t iim_val = src_reg->iim_val; - debug_assert((instr->category == 1) || - (instr->category == 6) || - ((instr->category == 2) && - ir3_cat2_int(instr->opc))); + debug_assert((opc_cat(instr->opc) == 1) || + (opc_cat(instr->opc) == 6) || + ir3_cat2_int(instr->opc)); if (new_flags & IR3_REG_SABS) iim_val = abs(iim_val); @@ -330,11 +451,17 @@ reg_cp(struct ir3_instruction *instr, struct ir3_register *reg, unsigned n) if (new_flags & IR3_REG_BNOT) iim_val = ~iim_val; - if (!(iim_val & ~0x3ff)) { + /* other than category 1 (mov) we can only encode up to 10 bits: */ + if ((instr->opc == OPC_MOV) || + !((iim_val & ~0x3ff) && (-iim_val & ~0x3ff))) { new_flags &= ~(IR3_REG_SABS | IR3_REG_SNEG | IR3_REG_BNOT); + src_reg = ir3_reg_clone(instr->block->shader, src_reg); src_reg->flags = new_flags; src_reg->iim_val = iim_val; instr->regs[n+1] = src_reg; + } else if (valid_flags(instr, n, (new_flags & ~IR3_REG_IMMED) | IR3_REG_CONST)) { + /* See if lowering an immediate to const would help. */ + instr->regs[n+1] = lower_immed(ctx, src_reg, new_flags); } return; @@ -342,74 +469,127 @@ reg_cp(struct ir3_instruction *instr, struct ir3_register *reg, unsigned n) } } -/** - * Given an SSA src (instruction), return the one with extraneous - * mov's removed, ie, for (to copy NIR syntax): - * - * vec1 ssa1 = fadd , - * vec1 ssa2 = fabs ssa1 - * vec1 ssa3 = fneg ssa1 - * - * then calling instr_cp(ssa3, &flags) would return ssa1 with - * (IR3_REG_ABS | IR3_REG_NEGATE) in flags. If flags is NULL, - * then disallow eliminating copies which would require flag - * propagation (for example, we cannot propagate abs/neg into - * an output). +/* Handle special case of eliminating output mov, and similar cases where + * there isn't a normal "consuming" instruction. In this case we cannot + * collapse flags (ie. output mov from const, or w/ abs/neg flags, cannot + * be eliminated) */ static struct ir3_instruction * -instr_cp(struct ir3_instruction *instr, unsigned *flags) +eliminate_output_mov(struct ir3_instruction *instr) { - struct ir3_register *reg; - - if (is_eligible_mov(instr, !!flags)) { + if (is_eligible_mov(instr, false)) { struct ir3_register *reg = instr->regs[1]; - struct ir3_instruction *src_instr = ssa(reg); - if (flags) - combine_flags(flags, reg->flags); - return instr_cp(src_instr, flags); + if (!(reg->flags & IR3_REG_ARRAY)) { + struct ir3_instruction *src_instr = ssa(reg); + debug_assert(src_instr); + return src_instr; + } } + return instr; +} + +/** + * Find instruction src's which are mov's that can be collapsed, replacing + * the mov dst with the mov src + */ +static void +instr_cp(struct ir3_cp_ctx *ctx, struct ir3_instruction *instr) +{ + struct ir3_register *reg; + + if (instr->regs_count == 0) + return; - /* Check termination condition before walking children (rather - * than before checking eligible-mov). A mov instruction may - * appear as ssa-src for multiple other instructions, and we - * want to consider it for removal for each, rather than just - * the first one. (But regardless of how many places it shows - * up as a src, we only need to recursively walk the children - * once.) - */ if (ir3_instr_check_mark(instr)) - return instr; + return; /* walk down the graph from each src: */ foreach_src_n(reg, n, instr) { - if (!(reg->flags & IR3_REG_SSA)) + struct ir3_instruction *src = ssa(reg); + + if (!src) + continue; + + instr_cp(ctx, src); + + /* TODO non-indirect access we could figure out which register + * we actually want and allow cp.. + */ + if (reg->flags & IR3_REG_ARRAY) continue; - reg_cp(instr, reg, n); + reg_cp(ctx, instr, reg, n); } - if (instr->address) - ir3_instr_set_address(instr, instr_cp(instr->address, NULL)); + if (instr->regs[0]->flags & IR3_REG_ARRAY) { + struct ir3_instruction *src = ssa(instr->regs[0]); + if (src) + instr_cp(ctx, src); + } - return instr; + if (instr->address) { + instr_cp(ctx, instr->address); + ir3_instr_set_address(instr, eliminate_output_mov(instr->address)); + } + + /* we can end up with extra cmps.s from frontend, which uses a + * + * cmps.s p0.x, cond, 0 + * + * as a way to mov into the predicate register. But frequently 'cond' + * is itself a cmps.s/cmps.f/cmps.u. So detect this special case and + * just re-write the instruction writing predicate register to get rid + * of the double cmps. + */ + if ((instr->opc == OPC_CMPS_S) && + (instr->regs[0]->num == regid(REG_P0, 0)) && + ssa(instr->regs[1]) && + (instr->regs[2]->flags & IR3_REG_IMMED) && + (instr->regs[2]->iim_val == 0)) { + struct ir3_instruction *cond = ssa(instr->regs[1]); + switch (cond->opc) { + case OPC_CMPS_S: + case OPC_CMPS_F: + case OPC_CMPS_U: + instr->opc = cond->opc; + instr->flags = cond->flags; + instr->cat2 = cond->cat2; + instr->address = cond->address; + instr->regs[1] = cond->regs[1]; + instr->regs[2] = cond->regs[2]; + break; + default: + break; + } + } } void -ir3_cp(struct ir3 *ir) +ir3_cp(struct ir3 *ir, struct ir3_shader_variant *so) { + struct ir3_cp_ctx ctx = { + .shader = ir, + .so = so, + }; + ir3_clear_mark(ir); for (unsigned i = 0; i < ir->noutputs; i++) { if (ir->outputs[i]) { - struct ir3_instruction *out = - instr_cp(ir->outputs[i], NULL); - - ir->outputs[i] = out; + instr_cp(&ctx, ir->outputs[i]); + ir->outputs[i] = eliminate_output_mov(ir->outputs[i]); } } list_for_each_entry (struct ir3_block, block, &ir->block_list, node) { - if (block->condition) - block->condition = instr_cp(block->condition, NULL); + if (block->condition) { + instr_cp(&ctx, block->condition); + block->condition = eliminate_output_mov(block->condition); + } + + for (unsigned i = 0; i < block->keeps_count; i++) { + instr_cp(&ctx, block->keeps[i]); + block->keeps[i] = eliminate_output_mov(block->keeps[i]); + } } }