freedreno/ir3: Add new LDLW/STLW instructions
[mesa.git] / src / freedreno / ir3 / ir3_cp.c
index 9bd97b690e34fa095441f17009370ec0b7024e12..d60c1aae49cbae9bd844e11667cd8591c838b523 100644 (file)
@@ -32,6 +32,9 @@
 #include "ir3_compiler.h"
 #include "ir3_shader.h"
 
+#define swap(a, b) \
+       do { __typeof(a) __tmp = (a); (a) = (b); (b) = __tmp; } while (0)
+
 /*
  * Copy Propagate:
  */
@@ -89,11 +92,12 @@ static unsigned cp_flags(unsigned flags)
 static bool valid_flags(struct ir3_instruction *instr, unsigned n,
                unsigned flags)
 {
+       struct ir3_compiler *compiler = instr->block->shader->compiler;
        unsigned valid_flags;
 
        if ((flags & IR3_REG_HIGH) &&
                        (opc_cat(instr->opc) > 1) &&
-                       (instr->block->shader->compiler->gpu_id >= 600))
+                       (compiler->gpu_id >= 600))
                return false;
 
        flags = cp_flags(flags);
@@ -105,14 +109,23 @@ static bool valid_flags(struct ir3_instruction *instr, unsigned n,
                        (flags & IR3_REG_RELATIV))
                return false;
 
-       /* TODO it seems to *mostly* work to cp RELATIV, except we get some
-        * intermittent piglit variable-indexing fails.  Newer blob driver
-        * doesn't seem to cp these.  Possibly this is hw workaround?  Not
-        * sure, but until that is understood better, lets just switch off
-        * cp for indirect src's:
-        */
-       if (flags & IR3_REG_RELATIV)
-               return false;
+       if (flags & IR3_REG_RELATIV) {
+               /* TODO need to test on earlier gens.. pretty sure the earlier
+                * problem was just that we didn't check that the src was from
+                * same block (since we can't propagate address register values
+                * across blocks currently)
+                */
+               if (compiler->gpu_id < 600)
+                       return false;
+
+               /* NOTE in the special try_swap_mad_two_srcs() case we can be
+                * called on a src that has already had an indirect load folded
+                * in, in which case ssa() returns NULL
+                */
+               struct ir3_instruction *src = ssa(instr->regs[n+1]);
+               if (src && src->address->block != instr->block)
+                       return false;
+       }
 
        switch (opc_cat(instr->opc)) {
        case 1:
@@ -197,12 +210,15 @@ static bool valid_flags(struct ir3_instruction *instr, unsigned n,
                        if (is_store(instr) && (n == 1))
                                return false;
 
-                       if ((instr->opc == OPC_LDL) && (n != 1))
+                       if ((instr->opc == OPC_LDL) && (n == 0))
                                return false;
 
                        if ((instr->opc == OPC_STL) && (n != 2))
                                return false;
 
+                       if (instr->opc == OPC_STLW && n == 0)
+                               return false;
+
                        /* disallow CP into anything but the SSBO slot argument for
                         * atomics:
                         */
@@ -360,6 +376,36 @@ unuse(struct ir3_instruction *instr)
        }
 }
 
+/**
+ * Handles the special case of the 2nd src (n == 1) to "normal" mad
+ * instructions, which cannot reference a constant.  See if it is
+ * possible to swap the 1st and 2nd sources.
+ */
+static bool
+try_swap_mad_two_srcs(struct ir3_instruction *instr, unsigned new_flags)
+{
+       if (!is_mad(instr->opc))
+               return false;
+
+       /* NOTE: pre-swap first two src's before valid_flags(),
+        * which might try to dereference the n'th src:
+        */
+       swap(instr->regs[0 + 1], instr->regs[1 + 1]);
+
+       bool valid_swap =
+               /* can we propagate mov if we move 2nd src to first? */
+               valid_flags(instr, 0, new_flags) &&
+               /* and does first src fit in second slot? */
+               valid_flags(instr, 1, instr->regs[1 + 1]->flags);
+
+       if (!valid_swap) {
+               /* put things back the way they were: */
+               swap(instr->regs[0 + 1], instr->regs[1 + 1]);
+       }   /* otherwise leave things swapped */
+
+       return valid_swap;
+}
+
 /**
  * Handle cp for a given src register.  This additionally handles
  * the cases of collapsing immedate/const (which replace the src
@@ -423,15 +469,8 @@ reg_cp(struct ir3_cp_ctx *ctx, struct ir3_instruction *instr,
                         * src prior to multiply) can swap their first two srcs if
                         * src[0] is !CONST and src[1] is CONST:
                         */
-                       if ((n == 1) && is_mad(instr->opc) &&
-                                       !(instr->regs[0 + 1]->flags & (IR3_REG_CONST | IR3_REG_RELATIV)) &&
-                                       valid_flags(instr, 0, new_flags & ~IR3_REG_IMMED)) {
-                               /* swap src[0] and src[1]: */
-                               struct ir3_register *tmp;
-                               tmp = instr->regs[0 + 1];
-                               instr->regs[0 + 1] = instr->regs[1 + 1];
-                               instr->regs[1 + 1] = tmp;
-
+                       if ((n == 1) && try_swap_mad_two_srcs(instr, new_flags)) {
+                               /* we swapped, so now we are dealing with 1st src: */
                                n = 0;
                        } else {
                                return;
@@ -617,7 +656,7 @@ instr_cp(struct ir3_cp_ctx *ctx, struct ir3_instruction *instr)
                        instr->opc   = cond->opc;
                        instr->flags = cond->flags;
                        instr->cat2  = cond->cat2;
-                       instr->address = cond->address;
+                       ir3_instr_set_address(instr, cond->address);
                        instr->regs[1] = cond->regs[1];
                        instr->regs[2] = cond->regs[2];
                        instr->barrier_class |= cond->barrier_class;