freedreno/ir3: optimize immed 2nd src to mad
authorRob Clark <robdclark@chromium.org>
Mon, 30 Sep 2019 18:44:16 +0000 (11:44 -0700)
committerRob Clark <robdclark@chromium.org>
Fri, 18 Oct 2019 22:08:54 +0000 (15:08 -0700)
We can't encode immed sources for cat3 (mad) instructions, but we can
use const in first or third src.  We handled this case already, but we
weren't considering that we could lower immed to const.

For manhattan:

  total instructions in shared programs: 35202 -> 34718 (-1.37%)
  instructions in affected programs: 14931 -> 14447 (-3.24%)
  helped: 90
  HURT: 0
  total full in shared programs: 2451 -> 2359 (-3.75%)
  full in affected programs: 653 -> 561 (-14.09%)
  helped: 69
  HURT: 2

Signed-off-by: Rob Clark <robdclark@chromium.org>
Reviewed-by: Kristian H. Kristensen <hoegsberg@google.com>
src/freedreno/ir3/ir3_cp.c

index d60c1aae49cbae9bd844e11667cd8591c838b523..a79560ec5798442637b1725cbccf0c0238cc7b3d 100644 (file)
@@ -392,6 +392,14 @@ try_swap_mad_two_srcs(struct ir3_instruction *instr, unsigned new_flags)
         */
        swap(instr->regs[0 + 1], instr->regs[1 + 1]);
 
+       /* cat3 doesn't encode immediate, but we can lower immediate
+        * to const if that helps:
+        */
+       if (new_flags & IR3_REG_IMMED) {
+               new_flags &= ~IR3_REG_IMMED;
+               new_flags |=  IR3_REG_CONST;
+       }
+
        bool valid_swap =
                /* can we propagate mov if we move 2nd src to first? */
                valid_flags(instr, 0, new_flags) &&
@@ -548,8 +556,9 @@ reg_cp(struct ir3_cp_ctx *ctx, struct ir3_instruction *instr,
                                iim_val = ~iim_val;
 
                        /* other than category 1 (mov) we can only encode up to 10 bits: */
-                       if ((instr->opc == OPC_MOV) ||
-                                       !((iim_val & ~0x3ff) && (-iim_val & ~0x3ff))) {
+                       if (valid_flags(instr, n, new_flags) &&
+                                       ((instr->opc == OPC_MOV) ||
+                                        !((iim_val & ~0x3ff) && (-iim_val & ~0x3ff)))) {
                                new_flags &= ~(IR3_REG_SABS | IR3_REG_SNEG | IR3_REG_BNOT);
                                src_reg = ir3_reg_clone(instr->block->shader, src_reg);
                                src_reg->flags = new_flags;