freedreno/ir3: handle const/immed/abs/neg in cp
authorRob Clark <robclark@freedesktop.org>
Sun, 29 Mar 2015 15:24:57 +0000 (11:24 -0400)
committerRob Clark <robclark@freedesktop.org>
Sun, 5 Apr 2015 20:36:34 +0000 (16:36 -0400)
Be smarter about propagating copies from const or immed, or with abs/neg
modifiers.  Also, realize that absneg.s and absneg.f are really "fancy"
mov instructions.

This opens up the possibility to remove more copies.  It helps the TGSI
frontend a bit, but will be really needed for the NIR f/e which builds
everything up in SSA form (ie. will *always* insert a mov from const or
immediate).

Signed-off-by: Rob Clark <robclark@freedesktop.org>
src/gallium/drivers/freedreno/ir3/ir3.h
src/gallium/drivers/freedreno/ir3/ir3_cp.c
src/gallium/drivers/freedreno/ir3/ir3_group.c

index 8310286102e4a9feca4c2aeda1d8236b3f690724..f424f73bec3c05d096cf02f702d847c63575f010 100644 (file)
@@ -525,6 +525,12 @@ static inline struct ir3_instruction *ssa(struct ir3_register *reg)
        return NULL;
 }
 
+static inline bool conflicts(struct ir3_instruction *a,
+               struct ir3_instruction *b)
+{
+       return (a && b) && (a != b);
+}
+
 static inline bool reg_gpr(struct ir3_register *r)
 {
        if (r->flags & (IR3_REG_CONST | IR3_REG_IMMED | IR3_REG_ADDR))
index ceebf803ee1ac8d3ad453e2eecfd29e78442034b..3eb85f660e28b8628254b06093e8beece4859309 100644 (file)
 
 /*
  * Copy Propagate:
- *
- * We could eventually drop this, if the front-end did not insert any
- * mov's..  For now keeping it as a separate pass since that is less
- * painful than updating the existing frontend.  It is expected that
- * with an eventual new NIR based frontend that we won't need this.
  */
 
-static void block_cp(struct ir3_block *block);
-static struct ir3_instruction * instr_cp(struct ir3_instruction *instr);
 
-static bool is_eligible_mov(struct ir3_instruction *instr)
+/* Is it a non-transformative (ie. not type changing) mov?  This can
+ * also include absneg.s/absneg.f, which for the most part can be
+ * treated as a mov (single src argument).
+ */
+static bool is_same_type_mov(struct ir3_instruction *instr)
 {
+       struct ir3_register *dst = instr->regs[0];
+
+       /* mov's that write to a0.x or p0.x are special: */
+       if (dst->num == regid(REG_P0, 0))
+               return false;
+       if (dst->num == regid(REG_A0, 0))
+               return false;
+
        if ((instr->category == 1) &&
-                       (instr->cat1.src_type == instr->cat1.dst_type)) {
+                       (instr->cat1.src_type == instr->cat1.dst_type))
+               return true;
+       if ((instr->category == 2) && ((instr->opc == OPC_ABSNEG_F) ||
+                       (instr->opc == OPC_ABSNEG_S)))
+               return true;
+       return false;
+}
+
+/* is it a type preserving mov, with ok flags? */
+static bool is_eligible_mov(struct ir3_instruction *instr, bool allow_flags)
+{
+       if (is_same_type_mov(instr)) {
                struct ir3_register *dst = instr->regs[0];
                struct ir3_register *src = instr->regs[1];
                struct ir3_instruction *src_instr = ssa(src);
                if (dst->flags & (IR3_REG_ADDR | IR3_REG_RELATIV))
                        return false;
-               /* TODO: propagate abs/neg modifiers if possible */
-               if (src->flags & (IR3_REG_FABS | IR3_REG_FNEG |
-                               IR3_REG_SABS | IR3_REG_SNEG | IR3_REG_RELATIV))
+               if (src->flags & IR3_REG_RELATIV)
                        return false;
+               if (!allow_flags)
+                       if (src->flags & (IR3_REG_FABS | IR3_REG_FNEG |
+                                       IR3_REG_SABS | IR3_REG_SNEG | IR3_REG_BNOT))
+                               return false;
                if (!src_instr)
                        return false;
                /* TODO: remove this hack: */
@@ -65,10 +83,270 @@ static bool is_eligible_mov(struct ir3_instruction *instr)
        return false;
 }
 
+static unsigned cp_flags(unsigned flags)
+{
+       /* only considering these flags (at least for now): */
+       flags &= (IR3_REG_CONST | IR3_REG_IMMED |
+                       IR3_REG_FNEG | IR3_REG_FABS |
+                       IR3_REG_SNEG | IR3_REG_SABS |
+                       IR3_REG_BNOT);
+       return flags;
+}
+
+static bool valid_flags(struct ir3_instruction *instr, unsigned n,
+               unsigned flags)
+{
+       unsigned valid_flags;
+       flags = cp_flags(flags);
+
+       /* clear flags that are 'ok' */
+       switch (instr->category) {
+       case 1:
+       case 5:
+               /* no flags allowed */
+               if (flags)
+                       return false;
+               break;
+       case 6:
+               valid_flags = IR3_REG_IMMED;
+               if (flags & ~valid_flags)
+                       return false;
+               break;
+       case 2:
+               valid_flags = ir3_cat2_absneg(instr->opc) | IR3_REG_CONST;
+
+               if (ir3_cat2_immed(instr->opc))
+                       valid_flags |= IR3_REG_IMMED;
+
+               if (flags & ~valid_flags)
+                       return false;
+
+               if (flags & (IR3_REG_CONST | IR3_REG_IMMED)) {
+                       unsigned m = (n ^ 1) + 1;
+                       /* cannot deal w/ const in both srcs:
+                        * (note that some cat2 actually only have a single src)
+                        */
+                       if (m < instr->regs_count) {
+                               struct ir3_register *reg = instr->regs[m];
+                               if ((flags & IR3_REG_CONST) && (reg->flags & IR3_REG_CONST))
+                                       return false;
+                               if ((flags & IR3_REG_IMMED) && (reg->flags & IR3_REG_IMMED))
+                                       return false;
+                       }
+                       /* cannot be const + ABS|NEG: */
+                       if (flags & (IR3_REG_FABS | IR3_REG_FNEG |
+                                       IR3_REG_SABS | IR3_REG_SNEG | IR3_REG_BNOT))
+                               return false;
+               }
+               break;
+       case 3:
+               valid_flags = ir3_cat3_absneg(instr->opc) | IR3_REG_CONST;
+
+               if (flags & ~valid_flags)
+                       return false;
+
+               if (flags & IR3_REG_CONST) {
+                       /* cannot deal w/ const in 2nd src: */
+                       /* TODO in some common cases, like mad, we can swap
+                        * first two args.. possibly we should allow that here
+                        * and fixup in legalize?
+                        */
+                       if (n == 1)
+                               return false;
+                       /* cannot be const + ABS|NEG: */
+                       if (flags & (IR3_REG_FABS | IR3_REG_FNEG |
+                                       IR3_REG_SABS | IR3_REG_SNEG | IR3_REG_BNOT))
+                               return false;
+               }
+               break;
+       case 4:
+               /* seems like blob compiler avoids const as src.. */
+               /* TODO double check if this is still the case on a4xx */
+               if (flags & IR3_REG_CONST)
+                       return false;
+               if (flags & (IR3_REG_SABS | IR3_REG_SNEG))
+                       return false;
+               break;
+       }
+
+       return true;
+}
+
+/* propagate register flags from src to dst.. negates need special
+ * handling to cancel each other out.
+ */
+static void combine_flags(unsigned *dstflags, unsigned srcflags)
+{
+       /* if what we are combining into already has (abs) flags,
+        * we can drop (neg) from src:
+        */
+       if (*dstflags & IR3_REG_FABS)
+               srcflags &= ~IR3_REG_FNEG;
+       if (*dstflags & IR3_REG_SABS)
+               srcflags &= ~IR3_REG_SNEG;
+
+       if (srcflags & IR3_REG_FABS)
+               *dstflags |= IR3_REG_FABS;
+       if (srcflags & IR3_REG_SABS)
+               *dstflags |= IR3_REG_SABS;
+       if (srcflags & IR3_REG_FNEG)
+               *dstflags ^= IR3_REG_FNEG;
+       if (srcflags & IR3_REG_SNEG)
+               *dstflags ^= IR3_REG_SNEG;
+       if (srcflags & IR3_REG_BNOT)
+               *dstflags ^= IR3_REG_BNOT;
+}
+
+static struct ir3_instruction * instr_cp(struct ir3_instruction *instr, unsigned *flags);
 
+/**
+ * Handle cp for a given src register.  This additionally handles
+ * the cases of collapsing immedate/const (which replace the src
+ * register with a non-ssa src) or collapsing mov's from relative
+ * src (which needs to also fixup the address src reference by the
+ * instruction).
+ */
+static void
+reg_cp(struct ir3_instruction *instr, struct ir3_register *reg, unsigned n)
+{
+       unsigned src_flags = 0, new_flags;
+       struct ir3_instruction *src_instr;
+
+       if (is_meta(instr)) {
+               /* meta instructions cannot fold up register
+                * flags.. they are usually src for texture
+                * fetch, etc, where we cannot specify abs/neg
+                */
+               reg->instr = instr_cp(reg->instr, NULL);
+               return;
+       }
+
+       src_instr = instr_cp(reg->instr, &src_flags);
+
+       new_flags = reg->flags;
+       combine_flags(&new_flags, src_flags);
+
+       reg->flags = new_flags;
+       reg->instr = src_instr;
+
+       if (!valid_flags(instr, n, reg->flags)) {
+               /* insert an absneg.f */
+               if (reg->flags & (IR3_REG_SNEG | IR3_REG_SABS)) {
+                       debug_assert(!(reg->flags & (IR3_REG_FNEG | IR3_REG_FABS)));
+                       reg->instr = ir3_ABSNEG_S(instr->block,
+                                       reg->instr, cp_flags(src_flags));
+               } else {
+                       debug_assert(!(reg->flags & (IR3_REG_SNEG | IR3_REG_SABS)));
+                       reg->instr = ir3_ABSNEG_F(instr->block,
+                                       reg->instr, cp_flags(src_flags));
+               }
+               reg->flags &= ~cp_flags(src_flags);
+               /* send it through instr_cp() again since
+                * the absneg src might be a mov from const
+                * that could be cleaned up:
+                */
+               reg->instr = instr_cp(reg->instr, NULL);
+               return;
+       }
+
+       if (is_same_type_mov(reg->instr)) {
+               struct ir3_register *src_reg = reg->instr->regs[1];
+               unsigned new_flags = src_reg->flags;
+
+               combine_flags(&new_flags, reg->flags);
+
+               if (!valid_flags(instr, n, new_flags))
+                       return;
+
+               /* Here we handle the special case of mov from
+                * CONST and/or RELATIV.  These need to be handled
+                * specially, because in the case of move from CONST
+                * there is no src ir3_instruction so we need to
+                * replace the ir3_register.  And in the case of
+                * RELATIV we need to handle the address register
+                * dependency.
+                */
+               if (src_reg->flags & IR3_REG_CONST) {
+                       /* an instruction cannot reference two different
+                        * address registers:
+                        */
+                       if ((src_reg->flags & IR3_REG_RELATIV) &&
+                                       conflicts(instr->address, reg->instr->address))
+                               return;
+
+                       src_reg->flags = new_flags;
+                       instr->regs[n+1] = src_reg;
+
+                       if (src_reg->flags & IR3_REG_RELATIV)
+                               instr->address = reg->instr->address;
+
+                       return;
+               }
+
+               if ((src_reg->flags & IR3_REG_RELATIV) &&
+                               !conflicts(instr->address, reg->instr->address)) {
+                       src_reg->flags = new_flags;
+                       instr->regs[n+1] = src_reg;
+                       instr->address = reg->instr->address;
+
+                       return;
+               }
+
+               /* NOTE: seems we can only do immed integers, so don't
+                * need to care about float.  But we do need to handle
+                * abs/neg *before* checking that the immediate requires
+                * few enough bits to encode:
+                *
+                * TODO: do we need to do something to avoid accidentally
+                * catching a float immed?
+                */
+               if (src_reg->flags & IR3_REG_IMMED) {
+                       int32_t iim_val = src_reg->iim_val;
+
+                       debug_assert((instr->category == 6) ||
+                                       ((instr->category == 2) &&
+                                               ir3_cat2_immed(instr->opc)));
+
+                       if (new_flags & IR3_REG_SABS)
+                               iim_val = abs(iim_val);
+
+                       if (new_flags & IR3_REG_SNEG)
+                               iim_val = -iim_val;
+
+                       if (new_flags & IR3_REG_BNOT)
+                               iim_val = ~iim_val;
+
+                       if (!(iim_val & ~0x3ff)) {
+                               new_flags &= ~(IR3_REG_SABS | IR3_REG_SNEG | IR3_REG_BNOT);
+                               src_reg->flags = new_flags;
+                               src_reg->iim_val = iim_val;
+                               instr->regs[n+1] = src_reg;
+                       }
+
+                       return;
+               }
+       }
+}
+
+/**
+ * Given an SSA src (instruction), return the one with extraneous
+ * mov's removed, ie, for (to copy NIR syntax):
+ *
+ *   vec1 ssa1 = fadd <something>, <somethingelse>
+ *   vec1 ssa2 = fabs ssa1
+ *   vec1 ssa3 = fneg ssa1
+ *
+ * then calling instr_cp(ssa3, &flags) would return ssa1 with
+ * (IR3_REG_ABS | IR3_REG_NEGATE) in flags.  If flags is NULL,
+ * then disallow eliminating copies which would require flag
+ * propagation (for example, we cannot propagate abs/neg into
+ * an output).
+ */
 static struct ir3_instruction *
-instr_cp(struct ir3_instruction *instr)
+instr_cp(struct ir3_instruction *instr, unsigned *flags)
 {
+       struct ir3_register *reg;
+
        /* stay within the block.. don't try to operate across
         * basic block boundaries or we'll have problems when
         * dealing with multiple basic blocks:
@@ -76,9 +354,12 @@ instr_cp(struct ir3_instruction *instr)
        if (is_meta(instr) && (instr->opc == OPC_META_INPUT))
                return instr;
 
-       if (is_eligible_mov(instr)) {
-               struct ir3_instruction *src_instr = ssa(instr->regs[1]);
-               return instr_cp(src_instr);
+       if (is_eligible_mov(instr, !!flags)) {
+               struct ir3_register *reg = instr->regs[1];
+               struct ir3_instruction *src_instr = ssa(reg);
+               if (flags)
+                       combine_flags(flags, reg->flags);
+               return instr_cp(src_instr, flags);
        }
 
        /* Check termination condition before walking children (rather
@@ -89,18 +370,20 @@ instr_cp(struct ir3_instruction *instr)
         * up as a src, we only need to recursively walk the children
         * once.)
         */
-       if (!ir3_instr_check_mark(instr)) {
-               struct ir3_register *reg;
+       if (ir3_instr_check_mark(instr))
+               return instr;
 
-               /* walk down the graph from each src: */
-               foreach_src(reg, instr)
-                       if (reg->flags & IR3_REG_SSA)
-                               reg->instr = instr_cp(reg->instr);
+       /* walk down the graph from each src: */
+       foreach_src_n(reg, n, instr) {
+               if (!(reg->flags & IR3_REG_SSA))
+                       continue;
 
-               if (instr->address)
-                       instr->address = instr_cp(instr->address);
+               reg_cp(instr, reg, n);
        }
 
+       if (instr->address)
+               instr->address = instr_cp(instr->address, NULL);
+
        return instr;
 }
 
@@ -111,7 +394,7 @@ static void block_cp(struct ir3_block *block)
        for (i = 0; i < block->noutputs; i++) {
                if (block->outputs[i]) {
                        struct ir3_instruction *out =
-                                       instr_cp(block->outputs[i]);
+                                       instr_cp(block->outputs[i], NULL);
 
                        block->outputs[i] = out;
                }
index d48ecc307aa8ff751706f3f559a9f06accea9042..782f6e87e5651762c026e4c63ab3507e2373a783 100644 (file)
@@ -118,12 +118,6 @@ static void instr_insert_mov(void *arr, int idx, struct ir3_instruction *instr)
 static struct group_ops instr_ops = { instr_get, instr_insert_mov };
 
 
-
-static bool conflicts(struct ir3_instruction *a, struct ir3_instruction *b)
-{
-       return (a && b) && (a != b);
-}
-
 static void group_n(struct group_ops *ops, void *arr, unsigned n)
 {
        unsigned i, j;