#include "ir3_compiler.h"
#include "ir3_shader.h"
+#define swap(a, b) \
+ do { __typeof(a) __tmp = (a); (a) = (b); (b) = __tmp; } while (0)
+
/*
* Copy Propagate:
*/
static bool valid_flags(struct ir3_instruction *instr, unsigned n,
unsigned flags)
{
+ struct ir3_compiler *compiler = instr->block->shader->compiler;
unsigned valid_flags;
if ((flags & IR3_REG_HIGH) &&
(opc_cat(instr->opc) > 1) &&
- (instr->block->shader->compiler->gpu_id >= 600))
+ (compiler->gpu_id >= 600))
return false;
flags = cp_flags(flags);
(flags & IR3_REG_RELATIV))
return false;
- /* TODO it seems to *mostly* work to cp RELATIV, except we get some
- * intermittent piglit variable-indexing fails. Newer blob driver
- * doesn't seem to cp these. Possibly this is hw workaround? Not
- * sure, but until that is understood better, lets just switch off
- * cp for indirect src's:
- */
- if (flags & IR3_REG_RELATIV)
- return false;
+ if (flags & IR3_REG_RELATIV) {
+ /* TODO need to test on earlier gens.. pretty sure the earlier
+ * problem was just that we didn't check that the src was from
+ * same block (since we can't propagate address register values
+ * across blocks currently)
+ */
+ if (compiler->gpu_id < 600)
+ return false;
+
+ /* NOTE in the special try_swap_mad_two_srcs() case we can be
+ * called on a src that has already had an indirect load folded
+ * in, in which case ssa() returns NULL
+ */
+ struct ir3_instruction *src = ssa(instr->regs[n+1]);
+ if (src && src->address->block != instr->block)
+ return false;
+ }
switch (opc_cat(instr->opc)) {
case 1:
if (is_store(instr) && (n == 1))
return false;
- if ((instr->opc == OPC_LDL) && (n != 1))
+ if ((instr->opc == OPC_LDL) && (n == 0))
return false;
if ((instr->opc == OPC_STL) && (n != 2))
return false;
+ if (instr->opc == OPC_STLW && n == 0)
+ return false;
+
/* disallow CP into anything but the SSBO slot argument for
* atomics:
*/
}
}
+/**
+ * Handles the special case of the 2nd src (n == 1) to "normal" mad
+ * instructions, which cannot reference a constant. See if it is
+ * possible to swap the 1st and 2nd sources.
+ */
+static bool
+try_swap_mad_two_srcs(struct ir3_instruction *instr, unsigned new_flags)
+{
+ if (!is_mad(instr->opc))
+ return false;
+
+ /* NOTE: pre-swap first two src's before valid_flags(),
+ * which might try to dereference the n'th src:
+ */
+ swap(instr->regs[0 + 1], instr->regs[1 + 1]);
+
+ bool valid_swap =
+ /* can we propagate mov if we move 2nd src to first? */
+ valid_flags(instr, 0, new_flags) &&
+ /* and does first src fit in second slot? */
+ valid_flags(instr, 1, instr->regs[1 + 1]->flags);
+
+ if (!valid_swap) {
+ /* put things back the way they were: */
+ swap(instr->regs[0 + 1], instr->regs[1 + 1]);
+ } /* otherwise leave things swapped */
+
+ return valid_swap;
+}
+
/**
* Handle cp for a given src register. This additionally handles
* the cases of collapsing immedate/const (which replace the src
* src prior to multiply) can swap their first two srcs if
* src[0] is !CONST and src[1] is CONST:
*/
- if ((n == 1) && is_mad(instr->opc) &&
- !(instr->regs[0 + 1]->flags & (IR3_REG_CONST | IR3_REG_RELATIV)) &&
- valid_flags(instr, 0, new_flags & ~IR3_REG_IMMED)) {
- /* swap src[0] and src[1]: */
- struct ir3_register *tmp;
- tmp = instr->regs[0 + 1];
- instr->regs[0 + 1] = instr->regs[1 + 1];
- instr->regs[1 + 1] = tmp;
-
+ if ((n == 1) && try_swap_mad_two_srcs(instr, new_flags)) {
+ /* we swapped, so now we are dealing with 1st src: */
n = 0;
} else {
return;
instr->opc = cond->opc;
instr->flags = cond->flags;
instr->cat2 = cond->cat2;
- instr->address = cond->address;
+ ir3_instr_set_address(instr, cond->address);
instr->regs[1] = cond->regs[1];
instr->regs[2] = cond->regs[2];
instr->barrier_class |= cond->barrier_class;