From a60d48a863d8e9d459562b874c463cd798f47a87 Mon Sep 17 00:00:00 2001 From: Rob Clark Date: Thu, 14 May 2020 09:31:32 -0700 Subject: [PATCH] freedreno/ir3/cp: extract valid_flags We'll also need this in the postsched-cp pass. Signed-off-by: Rob Clark Part-of: --- src/freedreno/ir3/ir3.c | 170 +++++++++++++++++++++++++++++++++++ src/freedreno/ir3/ir3.h | 2 + src/freedreno/ir3/ir3_cp.c | 180 ++----------------------------------- 3 files changed, 178 insertions(+), 174 deletions(-) diff --git a/src/freedreno/ir3/ir3.c b/src/freedreno/ir3/ir3.c index 5f4b86d610d..463c7664332 100644 --- a/src/freedreno/ir3/ir3.c +++ b/src/freedreno/ir3/ir3.c @@ -1285,3 +1285,173 @@ ir3_fixup_src_type(struct ir3_instruction *instr) break; } } + +static unsigned +cp_flags(unsigned flags) +{ + /* only considering these flags (at least for now): */ + flags &= (IR3_REG_CONST | IR3_REG_IMMED | + IR3_REG_FNEG | IR3_REG_FABS | + IR3_REG_SNEG | IR3_REG_SABS | + IR3_REG_BNOT | IR3_REG_RELATIV); + return flags; +} + +bool +ir3_valid_flags(struct ir3_instruction *instr, unsigned n, + unsigned flags) +{ + struct ir3_compiler *compiler = instr->block->shader->compiler; + unsigned valid_flags; + + if ((flags & IR3_REG_HIGH) && + (opc_cat(instr->opc) > 1) && + (compiler->gpu_id >= 600)) + return false; + + flags = cp_flags(flags); + + /* If destination is indirect, then source cannot be.. at least + * I don't think so.. + */ + if ((instr->regs[0]->flags & IR3_REG_RELATIV) && + (flags & IR3_REG_RELATIV)) + return false; + + if (flags & IR3_REG_RELATIV) { + /* TODO need to test on earlier gens.. pretty sure the earlier + * problem was just that we didn't check that the src was from + * same block (since we can't propagate address register values + * across blocks currently) + */ + if (compiler->gpu_id < 600) + return false; + + /* NOTE in the special try_swap_mad_two_srcs() case we can be + * called on a src that has already had an indirect load folded + * in, in which case ssa() returns NULL + */ + if (instr->regs[n+1]->flags & IR3_REG_SSA) { + struct ir3_instruction *src = ssa(instr->regs[n+1]); + if (src->address->block != instr->block) + return false; + } + } + + switch (opc_cat(instr->opc)) { + case 1: + valid_flags = IR3_REG_IMMED | IR3_REG_CONST | IR3_REG_RELATIV; + if (flags & ~valid_flags) + return false; + break; + case 2: + valid_flags = ir3_cat2_absneg(instr->opc) | + IR3_REG_CONST | IR3_REG_RELATIV; + + if (ir3_cat2_int(instr->opc)) + valid_flags |= IR3_REG_IMMED; + + if (flags & ~valid_flags) + return false; + + if (flags & (IR3_REG_CONST | IR3_REG_IMMED)) { + unsigned m = (n ^ 1) + 1; + /* cannot deal w/ const in both srcs: + * (note that some cat2 actually only have a single src) + */ + if (m < instr->regs_count) { + struct ir3_register *reg = instr->regs[m]; + if ((flags & IR3_REG_CONST) && (reg->flags & IR3_REG_CONST)) + return false; + if ((flags & IR3_REG_IMMED) && (reg->flags & IR3_REG_IMMED)) + return false; + } + } + break; + case 3: + valid_flags = ir3_cat3_absneg(instr->opc) | + IR3_REG_CONST | IR3_REG_RELATIV; + + if (flags & ~valid_flags) + return false; + + if (flags & (IR3_REG_CONST | IR3_REG_RELATIV)) { + /* cannot deal w/ const/relativ in 2nd src: */ + if (n == 1) + return false; + } + + break; + case 4: + /* seems like blob compiler avoids const as src.. */ + /* TODO double check if this is still the case on a4xx */ + if (flags & (IR3_REG_CONST | IR3_REG_IMMED)) + return false; + if (flags & (IR3_REG_SABS | IR3_REG_SNEG)) + return false; + break; + case 5: + /* no flags allowed */ + if (flags) + return false; + break; + case 6: + valid_flags = IR3_REG_IMMED; + if (flags & ~valid_flags) + return false; + + if (flags & IR3_REG_IMMED) { + /* doesn't seem like we can have immediate src for store + * instructions: + * + * TODO this restriction could also apply to load instructions, + * but for load instructions this arg is the address (and not + * really sure any good way to test a hard-coded immed addr src) + */ + if (is_store(instr) && (n == 1)) + return false; + + if ((instr->opc == OPC_LDL) && (n == 0)) + return false; + + if ((instr->opc == OPC_STL) && (n != 2)) + return false; + + if (instr->opc == OPC_STLW && n == 0) + return false; + + if (instr->opc == OPC_LDLW && n == 0) + return false; + + /* disallow immediates in anything but the SSBO slot argument for + * cat6 instructions: + */ + if (is_atomic(instr->opc) && (n != 0)) + return false; + + if (is_atomic(instr->opc) && !(instr->flags & IR3_INSTR_G)) + return false; + + if (instr->opc == OPC_STG && (instr->flags & IR3_INSTR_G) && (n != 2)) + return false; + + /* as with atomics, these cat6 instrs can only have an immediate + * for SSBO/IBO slot argument + */ + switch (instr->opc) { + case OPC_LDIB: + case OPC_LDC: + case OPC_RESINFO: + if (n != 0) + return false; + break; + default: + break; + } + } + + break; + } + + return true; +} diff --git a/src/freedreno/ir3/ir3.h b/src/freedreno/ir3/ir3.h index 90086294de8..1d37afe0ad6 100644 --- a/src/freedreno/ir3/ir3.h +++ b/src/freedreno/ir3/ir3.h @@ -620,6 +620,8 @@ void ir3_find_ssa_uses(struct ir3 *ir, void *mem_ctx, bool falsedeps); void ir3_set_dst_type(struct ir3_instruction *instr, bool half); void ir3_fixup_src_type(struct ir3_instruction *instr); +bool ir3_valid_flags(struct ir3_instruction *instr, unsigned n, unsigned flags); + #include "util/set.h" #define foreach_ssa_use(__use, __instr) \ for (struct ir3_instruction *__use = (void *)~0; \ diff --git a/src/freedreno/ir3/ir3_cp.c b/src/freedreno/ir3/ir3_cp.c index b95827e97e9..ed116a9f3b7 100644 --- a/src/freedreno/ir3/ir3_cp.c +++ b/src/freedreno/ir3/ir3_cp.c @@ -100,174 +100,6 @@ static bool is_eligible_mov(struct ir3_instruction *instr, return false; } -static unsigned cp_flags(unsigned flags) -{ - /* only considering these flags (at least for now): */ - flags &= (IR3_REG_CONST | IR3_REG_IMMED | - IR3_REG_FNEG | IR3_REG_FABS | - IR3_REG_SNEG | IR3_REG_SABS | - IR3_REG_BNOT | IR3_REG_RELATIV); - return flags; -} - -static bool valid_flags(struct ir3_instruction *instr, unsigned n, - unsigned flags) -{ - struct ir3_compiler *compiler = instr->block->shader->compiler; - unsigned valid_flags; - - if ((flags & IR3_REG_HIGH) && - (opc_cat(instr->opc) > 1) && - (compiler->gpu_id >= 600)) - return false; - - flags = cp_flags(flags); - - /* If destination is indirect, then source cannot be.. at least - * I don't think so.. - */ - if ((instr->regs[0]->flags & IR3_REG_RELATIV) && - (flags & IR3_REG_RELATIV)) - return false; - - if (flags & IR3_REG_RELATIV) { - /* TODO need to test on earlier gens.. pretty sure the earlier - * problem was just that we didn't check that the src was from - * same block (since we can't propagate address register values - * across blocks currently) - */ - if (compiler->gpu_id < 600) - return false; - - /* NOTE in the special try_swap_mad_two_srcs() case we can be - * called on a src that has already had an indirect load folded - * in, in which case ssa() returns NULL - */ - if (instr->regs[n+1]->flags & IR3_REG_SSA) { - struct ir3_instruction *src = ssa(instr->regs[n+1]); - if (src->address->block != instr->block) - return false; - } - } - - switch (opc_cat(instr->opc)) { - case 1: - valid_flags = IR3_REG_IMMED | IR3_REG_CONST | IR3_REG_RELATIV; - if (flags & ~valid_flags) - return false; - break; - case 2: - valid_flags = ir3_cat2_absneg(instr->opc) | - IR3_REG_CONST | IR3_REG_RELATIV; - - if (ir3_cat2_int(instr->opc)) - valid_flags |= IR3_REG_IMMED; - - if (flags & ~valid_flags) - return false; - - if (flags & (IR3_REG_CONST | IR3_REG_IMMED)) { - unsigned m = (n ^ 1) + 1; - /* cannot deal w/ const in both srcs: - * (note that some cat2 actually only have a single src) - */ - if (m < instr->regs_count) { - struct ir3_register *reg = instr->regs[m]; - if ((flags & IR3_REG_CONST) && (reg->flags & IR3_REG_CONST)) - return false; - if ((flags & IR3_REG_IMMED) && (reg->flags & IR3_REG_IMMED)) - return false; - } - } - break; - case 3: - valid_flags = ir3_cat3_absneg(instr->opc) | - IR3_REG_CONST | IR3_REG_RELATIV; - - if (flags & ~valid_flags) - return false; - - if (flags & (IR3_REG_CONST | IR3_REG_RELATIV)) { - /* cannot deal w/ const/relativ in 2nd src: */ - if (n == 1) - return false; - } - - break; - case 4: - /* seems like blob compiler avoids const as src.. */ - /* TODO double check if this is still the case on a4xx */ - if (flags & (IR3_REG_CONST | IR3_REG_IMMED)) - return false; - if (flags & (IR3_REG_SABS | IR3_REG_SNEG)) - return false; - break; - case 5: - /* no flags allowed */ - if (flags) - return false; - break; - case 6: - valid_flags = IR3_REG_IMMED; - if (flags & ~valid_flags) - return false; - - if (flags & IR3_REG_IMMED) { - /* doesn't seem like we can have immediate src for store - * instructions: - * - * TODO this restriction could also apply to load instructions, - * but for load instructions this arg is the address (and not - * really sure any good way to test a hard-coded immed addr src) - */ - if (is_store(instr) && (n == 1)) - return false; - - if ((instr->opc == OPC_LDL) && (n == 0)) - return false; - - if ((instr->opc == OPC_STL) && (n != 2)) - return false; - - if (instr->opc == OPC_STLW && n == 0) - return false; - - if (instr->opc == OPC_LDLW && n == 0) - return false; - - /* disallow immediates in anything but the SSBO slot argument for - * cat6 instructions: - */ - if (is_atomic(instr->opc) && (n != 0)) - return false; - - if (is_atomic(instr->opc) && !(instr->flags & IR3_INSTR_G)) - return false; - - if (instr->opc == OPC_STG && (instr->flags & IR3_INSTR_G) && (n != 2)) - return false; - - /* as with atomics, these cat6 instrs can only have an immediate - * for SSBO/IBO slot argument - */ - switch (instr->opc) { - case OPC_LDIB: - case OPC_LDC: - case OPC_RESINFO: - if (n != 0) - return false; - break; - default: - break; - } - } - - break; - } - - return true; -} - /* propagate register flags from src to dst.. negates need special * handling to cancel each other out. */ @@ -326,7 +158,7 @@ lower_immed(struct ir3_cp_ctx *ctx, struct ir3_instruction *instr, unsigned n, new_flags &= ~IR3_REG_IMMED; new_flags |= IR3_REG_CONST; - if (!valid_flags(instr, n, new_flags)) + if (!ir3_valid_flags(instr, n, new_flags)) return false; unsigned swiz, idx, i; @@ -455,9 +287,9 @@ try_swap_mad_two_srcs(struct ir3_instruction *instr, unsigned new_flags) bool valid_swap = /* can we propagate mov if we move 2nd src to first? */ - valid_flags(instr, 0, new_flags) && + ir3_valid_flags(instr, 0, new_flags) && /* and does first src fit in second slot? */ - valid_flags(instr, 1, instr->regs[1 + 1]->flags); + ir3_valid_flags(instr, 1, instr->regs[1 + 1]->flags); if (!valid_swap) { /* put things back the way they were: */ @@ -487,7 +319,7 @@ reg_cp(struct ir3_cp_ctx *ctx, struct ir3_instruction *instr, combine_flags(&new_flags, src); - if (valid_flags(instr, n, new_flags)) { + if (ir3_valid_flags(instr, n, new_flags)) { if (new_flags & IR3_REG_ARRAY) { debug_assert(!(reg->flags & IR3_REG_ARRAY)); reg->array = src_reg->array; @@ -512,7 +344,7 @@ reg_cp(struct ir3_cp_ctx *ctx, struct ir3_instruction *instr, combine_flags(&new_flags, src); - if (!valid_flags(instr, n, new_flags)) { + if (!ir3_valid_flags(instr, n, new_flags)) { /* See if lowering an immediate to const would help. */ if (lower_immed(ctx, instr, n, src_reg, new_flags)) return true; @@ -613,7 +445,7 @@ reg_cp(struct ir3_cp_ctx *ctx, struct ir3_instruction *instr, iim_val = ~iim_val; /* other than category 1 (mov) we can only encode up to 10 bits: */ - if (valid_flags(instr, n, new_flags) && + if (ir3_valid_flags(instr, n, new_flags) && ((instr->opc == OPC_MOV) || !((iim_val & ~0x3ff) && (-iim_val & ~0x3ff)))) { new_flags &= ~(IR3_REG_SABS | IR3_REG_SNEG | IR3_REG_BNOT); -- 2.30.2