return false;
}
-static unsigned cp_flags(unsigned flags)
-{
- /* only considering these flags (at least for now): */
- flags &= (IR3_REG_CONST | IR3_REG_IMMED |
- IR3_REG_FNEG | IR3_REG_FABS |
- IR3_REG_SNEG | IR3_REG_SABS |
- IR3_REG_BNOT | IR3_REG_RELATIV);
- return flags;
-}
-
-static bool valid_flags(struct ir3_instruction *instr, unsigned n,
- unsigned flags)
-{
- struct ir3_compiler *compiler = instr->block->shader->compiler;
- unsigned valid_flags;
-
- if ((flags & IR3_REG_HIGH) &&
- (opc_cat(instr->opc) > 1) &&
- (compiler->gpu_id >= 600))
- return false;
-
- flags = cp_flags(flags);
-
- /* If destination is indirect, then source cannot be.. at least
- * I don't think so..
- */
- if ((instr->regs[0]->flags & IR3_REG_RELATIV) &&
- (flags & IR3_REG_RELATIV))
- return false;
-
- if (flags & IR3_REG_RELATIV) {
- /* TODO need to test on earlier gens.. pretty sure the earlier
- * problem was just that we didn't check that the src was from
- * same block (since we can't propagate address register values
- * across blocks currently)
- */
- if (compiler->gpu_id < 600)
- return false;
-
- /* NOTE in the special try_swap_mad_two_srcs() case we can be
- * called on a src that has already had an indirect load folded
- * in, in which case ssa() returns NULL
- */
- struct ir3_instruction *src = ssa(instr->regs[n+1]);
- if (src && src->address->block != instr->block)
- return false;
- }
-
- switch (opc_cat(instr->opc)) {
- case 1:
- valid_flags = IR3_REG_IMMED | IR3_REG_CONST | IR3_REG_RELATIV;
- if (flags & ~valid_flags)
- return false;
- break;
- case 2:
- valid_flags = ir3_cat2_absneg(instr->opc) |
- IR3_REG_CONST | IR3_REG_RELATIV;
-
- if (ir3_cat2_int(instr->opc))
- valid_flags |= IR3_REG_IMMED;
-
- if (flags & ~valid_flags)
- return false;
-
- if (flags & (IR3_REG_CONST | IR3_REG_IMMED)) {
- unsigned m = (n ^ 1) + 1;
- /* cannot deal w/ const in both srcs:
- * (note that some cat2 actually only have a single src)
- */
- if (m < instr->regs_count) {
- struct ir3_register *reg = instr->regs[m];
- if ((flags & IR3_REG_CONST) && (reg->flags & IR3_REG_CONST))
- return false;
- if ((flags & IR3_REG_IMMED) && (reg->flags & IR3_REG_IMMED))
- return false;
- }
- }
- break;
- case 3:
- valid_flags = ir3_cat3_absneg(instr->opc) |
- IR3_REG_CONST | IR3_REG_RELATIV;
-
- if (flags & ~valid_flags)
- return false;
-
- if (flags & (IR3_REG_CONST | IR3_REG_RELATIV)) {
- /* cannot deal w/ const/relativ in 2nd src: */
- if (n == 1)
- return false;
- }
-
- break;
- case 4:
- /* seems like blob compiler avoids const as src.. */
- /* TODO double check if this is still the case on a4xx */
- if (flags & (IR3_REG_CONST | IR3_REG_IMMED))
- return false;
- if (flags & (IR3_REG_SABS | IR3_REG_SNEG))
- return false;
- break;
- case 5:
- /* no flags allowed */
- if (flags)
- return false;
- break;
- case 6:
- valid_flags = IR3_REG_IMMED;
- if (flags & ~valid_flags)
- return false;
-
- if (flags & IR3_REG_IMMED) {
- /* doesn't seem like we can have immediate src for store
- * instructions:
- *
- * TODO this restriction could also apply to load instructions,
- * but for load instructions this arg is the address (and not
- * really sure any good way to test a hard-coded immed addr src)
- */
- if (is_store(instr) && (n == 1))
- return false;
-
- if ((instr->opc == OPC_LDL) && (n == 0))
- return false;
-
- if ((instr->opc == OPC_STL) && (n != 2))
- return false;
-
- if (instr->opc == OPC_STLW && n == 0)
- return false;
-
- if (instr->opc == OPC_LDLW && n == 0)
- return false;
-
- /* disallow immediates in anything but the SSBO slot argument for
- * cat6 instructions:
- */
- if (is_atomic(instr->opc) && (n != 0))
- return false;
-
- if (is_atomic(instr->opc) && !(instr->flags & IR3_INSTR_G))
- return false;
-
- if (instr->opc == OPC_STG && (instr->flags & IR3_INSTR_G) && (n != 2))
- return false;
-
- /* as with atomics, these cat6 instrs can only have an immediate
- * for SSBO/IBO slot argument
- */
- switch (instr->opc) {
- case OPC_LDIB:
- case OPC_LDC:
- case OPC_RESINFO:
- if (n != 0)
- return false;
- break;
- default:
- break;
- }
- }
-
- break;
- }
-
- return true;
-}
-
/* propagate register flags from src to dst.. negates need special
* handling to cancel each other out.
*/
*dstflags &= ~IR3_REG_SABS;
}
-static struct ir3_register *
-lower_immed(struct ir3_cp_ctx *ctx, struct ir3_register *reg, unsigned new_flags, bool f_opcode)
+/* Tries lowering an immediate register argument to a const buffer access by
+ * adding to the list of immediates to be pushed to the const buffer when
+ * switching to this shader.
+ */
+static bool
+lower_immed(struct ir3_cp_ctx *ctx, struct ir3_instruction *instr, unsigned n,
+ struct ir3_register *reg, unsigned new_flags)
{
- unsigned swiz, idx, i;
+ if (!(new_flags & IR3_REG_IMMED))
+ return false;
+
+ new_flags &= ~IR3_REG_IMMED;
+ new_flags |= IR3_REG_CONST;
+
+ if (!ir3_valid_flags(instr, n, new_flags))
+ return false;
reg = ir3_reg_clone(ctx->shader, reg);
/* Half constant registers seems to handle only 32-bit values
* within floating-point opcodes. So convert back to 32-bit values.
*/
+ bool f_opcode = (is_cat2_float(instr->opc) ||
+ is_cat3_float(instr->opc)) ? true : false;
if (f_opcode && (new_flags & IR3_REG_HALF))
reg->uim_val = fui(_mesa_half_to_float(reg->uim_val));
new_flags &= ~IR3_REG_FNEG;
}
- /* Reallocate for 4 more elements whenever it's necessary */
- struct ir3_const_state *const_state = &ctx->so->shader->const_state;
- if (const_state->immediate_idx == const_state->immediates_size * 4) {
+ /* Reallocate for 4 more elements whenever it's necessary. Note that ir3
+ * printing relies on having groups of 4 dwords, so we fill the unused
+ * slots with a dummy value.
+ */
+ struct ir3_const_state *const_state = ir3_const_state(ctx->so);
+ if (const_state->immediates_count == const_state->immediates_size) {
+ const_state->immediates = rerzalloc(const_state,
+ const_state->immediates,
+ __typeof__(const_state->immediates[0]),
+ const_state->immediates_size,
+ const_state->immediates_size + 4);
const_state->immediates_size += 4;
- const_state->immediates = realloc (const_state->immediates,
- const_state->immediates_size * sizeof(const_state->immediates[0]));
- for (int i = const_state->immediate_idx; i < const_state->immediates_size * 4; i++)
- const_state->immediates[i / 4].val[i % 4] = 0xd0d0d0d0;
+ for (int i = const_state->immediates_count; i < const_state->immediates_size; i++)
+ const_state->immediates[i] = 0xd0d0d0d0;
}
- for (i = 0; i < const_state->immediate_idx; i++) {
- swiz = i % 4;
- idx = i / 4;
-
- if (const_state->immediates[idx].val[swiz] == reg->uim_val) {
+ int i;
+ for (i = 0; i < const_state->immediates_count; i++) {
+ if (const_state->immediates[i] == reg->uim_val)
break;
- }
}
- if (i == const_state->immediate_idx) {
- /* need to generate a new immediate: */
- swiz = i % 4;
- idx = i / 4;
+ if (i == const_state->immediates_count) {
+ /* Add on a new immediate to be pushed, if we have space left in the
+ * constbuf.
+ */
+ if (const_state->offsets.immediate + const_state->immediates_count / 4 >=
+ ir3_max_const(ctx->so))
+ return false;
- const_state->immediates[idx].val[swiz] = reg->uim_val;
- const_state->immediates_count = idx + 1;
- const_state->immediate_idx++;
+ const_state->immediates[i] = reg->uim_val;
+ const_state->immediates_count++;
}
- new_flags &= ~IR3_REG_IMMED;
- new_flags |= IR3_REG_CONST;
reg->flags = new_flags;
reg->num = i + (4 * const_state->offsets.immediate);
- return reg;
+ instr->regs[n + 1] = reg;
+
+ return true;
}
static void
bool valid_swap =
/* can we propagate mov if we move 2nd src to first? */
- valid_flags(instr, 0, new_flags) &&
+ ir3_valid_flags(instr, 0, new_flags) &&
/* and does first src fit in second slot? */
- valid_flags(instr, 1, instr->regs[1 + 1]->flags);
+ ir3_valid_flags(instr, 1, instr->regs[1 + 1]->flags);
if (!valid_swap) {
/* put things back the way they were: */
combine_flags(&new_flags, src);
- if (valid_flags(instr, n, new_flags)) {
+ if (ir3_valid_flags(instr, n, new_flags)) {
if (new_flags & IR3_REG_ARRAY) {
debug_assert(!(reg->flags & IR3_REG_ARRAY));
reg->array = src_reg->array;
combine_flags(&new_flags, src);
- if (!valid_flags(instr, n, new_flags)) {
+ if (!ir3_valid_flags(instr, n, new_flags)) {
/* See if lowering an immediate to const would help. */
- if (valid_flags(instr, n, (new_flags & ~IR3_REG_IMMED) | IR3_REG_CONST)) {
- bool f_opcode = (is_cat2_float(instr->opc) ||
- is_cat3_float(instr->opc)) ? true : false;
-
- debug_assert(new_flags & IR3_REG_IMMED);
-
- instr->regs[n + 1] = lower_immed(ctx, src_reg, new_flags, f_opcode);
+ if (lower_immed(ctx, instr, n, src_reg, new_flags))
return true;
- }
/* special case for "normal" mad instructions, we can
* try swapping the first two args if that fits better.
iim_val = ~iim_val;
/* other than category 1 (mov) we can only encode up to 10 bits: */
- if (valid_flags(instr, n, new_flags) &&
+ if (ir3_valid_flags(instr, n, new_flags) &&
((instr->opc == OPC_MOV) ||
!((iim_val & ~0x3ff) && (-iim_val & ~0x3ff)))) {
new_flags &= ~(IR3_REG_SABS | IR3_REG_SNEG | IR3_REG_BNOT);
instr->regs[n+1] = src_reg;
return true;
- } else if (valid_flags(instr, n, (new_flags & ~IR3_REG_IMMED) | IR3_REG_CONST)) {
- bool f_opcode = (is_cat2_float(instr->opc) ||
- is_cat3_float(instr->opc)) ? true : false;
-
- /* See if lowering an immediate to const would help. */
- instr->regs[n+1] = lower_immed(ctx, src_reg, new_flags, f_opcode);
-
+ } else if (lower_immed(ctx, instr, n, src_reg, new_flags)) {
+ /* Fell back to loading the immediate as a const */
return true;
}
}