src/mesa/drivers/dri/i965/brw_vec4_copy_propagation.cpp

   1 /*
   2  * Copyright © 2011 Intel Corporation
   3  *
   4  * Permission is hereby granted, free of charge, to any person obtaining a
   5  * copy of this software and associated documentation files (the "Software"),
   6  * to deal in the Software without restriction, including without limitation
   7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
   8  * and/or sell copies of the Software, and to permit persons to whom the
   9  * Software is furnished to do so, subject to the following conditions:
  10  *
  11  * The above copyright notice and this permission notice (including the next
  12  * paragraph) shall be included in all copies or substantial portions of the
  13  * Software.
  14  *
  15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  17  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
  18  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
  19  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
  20  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
  21  * IN THE SOFTWARE.
  22  */
  23
  24 /**
  25  * @file brw_vec4_copy_propagation.cpp
  26  *
  27  * Implements tracking of values copied between registers, and
  28  * optimizations based on that: copy propagation and constant
  29  * propagation.
  30  */
  31
  32 #include "brw_vec4.h"
  33 #include "brw_cfg.h"
  34 #include "brw_eu.h"
  35
  36 namespace brw {
  37
  38 struct copy_entry {
  39    src_reg *value[4];
  40    int saturatemask;
  41 };
  42
  43 static bool
  44 is_direct_copy(vec4_instruction *inst)
  45 {
  46    return (inst->opcode == BRW_OPCODE_MOV &&
  47            !inst->predicate &&
  48            inst->dst.file == VGRF &&
  49            inst->dst.offset % REG_SIZE == 0 &&
  50            !inst->dst.reladdr &&
  51            !inst->src[0].reladdr &&
  52            (inst->dst.type == inst->src[0].type ||
  53             (inst->dst.type == BRW_REGISTER_TYPE_F &&
  54              inst->src[0].type == BRW_REGISTER_TYPE_VF)));
  55 }
  56
  57 static bool
  58 is_dominated_by_previous_instruction(vec4_instruction *inst)
  59 {
  60    return (inst->opcode != BRW_OPCODE_DO &&
  61            inst->opcode != BRW_OPCODE_WHILE &&
  62            inst->opcode != BRW_OPCODE_ELSE &&
  63            inst->opcode != BRW_OPCODE_ENDIF);
  64 }
  65
  66 static bool
  67 is_channel_updated(vec4_instruction *inst, src_reg *values[4], int ch)
  68 {
  69    const src_reg *src = values[ch];
  70
  71    /* consider GRF only */
  72    assert(inst->dst.file == VGRF);
  73    if (!src || src->file != VGRF)
  74       return false;
  75
  76    return regions_overlap(*src, REG_SIZE, inst->dst, inst->size_written) &&
  77           (inst->dst.offset != src->offset ||
  78            inst->dst.writemask & (1 << BRW_GET_SWZ(src->swizzle, ch)));
  79 }
  80
  81 static bool
  82 is_logic_op(enum opcode opcode)
  83 {
  84    return (opcode == BRW_OPCODE_AND ||
  85            opcode == BRW_OPCODE_OR  ||
  86            opcode == BRW_OPCODE_XOR ||
  87            opcode == BRW_OPCODE_NOT);
  88 }
  89
  90 /**
  91  * Get the origin of a copy as a single register if all components present in
  92  * the given readmask originate from the same register and have compatible
  93  * regions, otherwise return a BAD_FILE register.
  94  */
  95 static src_reg
  96 get_copy_value(const copy_entry &entry, unsigned readmask)
  97 {
  98    unsigned swz[4] = {};
  99    src_reg value;
 100
 101    for (unsigned i = 0; i < 4; i++) {
 102       if (readmask & (1 << i)) {
 103          if (entry.value[i]) {
 104             src_reg src = *entry.value[i];
 105
 106             if (src.file == IMM) {
 107                swz[i] = i;
 108             } else {
 109                swz[i] = BRW_GET_SWZ(src.swizzle, i);
 110                /* Overwrite the original swizzle so the src_reg::equals call
 111                 * below doesn't care about it, the correct swizzle will be
 112                 * calculated once the swizzles of all components are known.
 113                 */
 114                src.swizzle = BRW_SWIZZLE_XYZW;
 115             }
 116
 117             if (value.file == BAD_FILE) {
 118                value = src;
 119             } else if (!value.equals(src)) {
 120                return src_reg();
 121             }
 122          } else {
 123             return src_reg();
 124          }
 125       }
 126    }
 127
 128    return swizzle(value,
 129                   brw_compose_swizzle(brw_swizzle_for_mask(readmask),
 130                                       BRW_SWIZZLE4(swz[0], swz[1],
 131                                                    swz[2], swz[3])));
 132 }
 133
 134 static bool
 135 try_constant_propagate(const struct gen_device_info *devinfo,
 136                        vec4_instruction *inst,
 137                        int arg, const copy_entry *entry)
 138 {
 139    /* For constant propagation, we only handle the same constant
 140     * across all 4 channels.  Some day, we should handle the 8-bit
 141     * float vector format, which would let us constant propagate
 142     * vectors better.
 143     * We could be more aggressive here -- some channels might not get used
 144     * based on the destination writemask.
 145     */
 146    src_reg value =
 147       get_copy_value(*entry,
 148                      brw_apply_inv_swizzle_to_mask(inst->src[arg].swizzle,
 149                                                    WRITEMASK_XYZW));
 150
 151    if (value.file != IMM)
 152       return false;
 153
 154    if (value.type == BRW_REGISTER_TYPE_VF) {
 155       /* The result of bit-casting the component values of a vector float
 156        * cannot in general be represented as an immediate.
 157        */
 158       if (inst->src[arg].type != BRW_REGISTER_TYPE_F)
 159          return false;
 160    } else {
 161       value.type = inst->src[arg].type;
 162    }
 163
 164    if (inst->src[arg].abs) {
 165       if ((devinfo->gen >= 8 && is_logic_op(inst->opcode)) ||
 166           !brw_abs_immediate(value.type, &value.as_brw_reg())) {
 167          return false;
 168       }
 169    }
 170
 171    if (inst->src[arg].negate) {
 172       if ((devinfo->gen >= 8 && is_logic_op(inst->opcode)) ||
 173           !brw_negate_immediate(value.type, &value.as_brw_reg())) {
 174          return false;
 175       }
 176    }
 177
 178    value = swizzle(value, inst->src[arg].swizzle);
 179
 180    switch (inst->opcode) {
 181    case BRW_OPCODE_MOV:
 182    case SHADER_OPCODE_BROADCAST:
 183       inst->src[arg] = value;
 184       return true;
 185
 186    case SHADER_OPCODE_POW:
 187    case SHADER_OPCODE_INT_QUOTIENT:
 188    case SHADER_OPCODE_INT_REMAINDER:
 189       if (devinfo->gen < 8)
 190          break;
 191       /* fallthrough */
 192    case BRW_OPCODE_DP2:
 193    case BRW_OPCODE_DP3:
 194    case BRW_OPCODE_DP4:
 195    case BRW_OPCODE_DPH:
 196    case BRW_OPCODE_BFI1:
 197    case BRW_OPCODE_ASR:
 198    case BRW_OPCODE_SHL:
 199    case BRW_OPCODE_SHR:
 200    case BRW_OPCODE_SUBB:
 201       if (arg == 1) {
 202          inst->src[arg] = value;
 203          return true;
 204       }
 205       break;
 206
 207    case BRW_OPCODE_MACH:
 208    case BRW_OPCODE_MUL:
 209    case SHADER_OPCODE_MULH:
 210    case BRW_OPCODE_ADD:
 211    case BRW_OPCODE_OR:
 212    case BRW_OPCODE_AND:
 213    case BRW_OPCODE_XOR:
 214    case BRW_OPCODE_ADDC:
 215       if (arg == 1) {
 216          inst->src[arg] = value;
 217          return true;
 218       } else if (arg == 0 && inst->src[1].file != IMM) {
 219          /* Fit this constant in by commuting the operands.  Exception: we
 220           * can't do this for 32-bit integer MUL/MACH because it's asymmetric.
 221           */
 222          if ((inst->opcode == BRW_OPCODE_MUL ||
 223               inst->opcode == BRW_OPCODE_MACH) &&
 224              (inst->src[1].type == BRW_REGISTER_TYPE_D ||
 225               inst->src[1].type == BRW_REGISTER_TYPE_UD))
 226             break;
 227          inst->src[0] = inst->src[1];
 228          inst->src[1] = value;
 229          return true;
 230       }
 231       break;
 232    case GS_OPCODE_SET_WRITE_OFFSET:
 233       /* This is just a multiply by a constant with special strides.
 234        * The generator will handle immediates in both arguments (generating
 235        * a single MOV of the product).  So feel free to propagate in src0.
 236        */
 237       inst->src[arg] = value;
 238       return true;
 239
 240    case BRW_OPCODE_CMP:
 241       if (arg == 1) {
 242          inst->src[arg] = value;
 243          return true;
 244       } else if (arg == 0 && inst->src[1].file != IMM) {
 245          enum brw_conditional_mod new_cmod;
 246
 247          new_cmod = brw_swap_cmod(inst->conditional_mod);
 248          if (new_cmod != BRW_CONDITIONAL_NONE) {
 249             /* Fit this constant in by swapping the operands and
 250              * flipping the test.
 251              */
 252             inst->src[0] = inst->src[1];
 253             inst->src[1] = value;
 254             inst->conditional_mod = new_cmod;
 255             return true;
 256          }
 257       }
 258       break;
 259
 260    case BRW_OPCODE_SEL:
 261       if (arg == 1) {
 262          inst->src[arg] = value;
 263          return true;
 264       } else if (arg == 0 && inst->src[1].file != IMM) {
 265          inst->src[0] = inst->src[1];
 266          inst->src[1] = value;
 267
 268          /* If this was predicated, flipping operands means
 269           * we also need to flip the predicate.
 270           */
 271          if (inst->conditional_mod == BRW_CONDITIONAL_NONE) {
 272             inst->predicate_inverse = !inst->predicate_inverse;
 273          }
 274          return true;
 275       }
 276       break;
 277
 278    default:
 279       break;
 280    }
 281
 282    return false;
 283 }
 284
 285 static bool
 286 try_copy_propagate(const struct gen_device_info *devinfo,
 287                    vec4_instruction *inst, int arg,
 288                    const copy_entry *entry, int attributes_per_reg)
 289 {
 290    /* Build up the value we are propagating as if it were the source of a
 291     * single MOV
 292     */
 293    src_reg value =
 294       get_copy_value(*entry,
 295                      brw_apply_inv_swizzle_to_mask(inst->src[arg].swizzle,
 296                                                    WRITEMASK_XYZW));
 297
 298    /* Check that we can propagate that value */
 299    if (value.file != UNIFORM &&
 300        value.file != VGRF &&
 301        value.file != ATTR)
 302       return false;
 303
 304    if (devinfo->gen >= 8 && (value.negate || value.abs) &&
 305        is_logic_op(inst->opcode)) {
 306       return false;
 307    }
 308
 309    bool has_source_modifiers = value.negate || value.abs;
 310
 311    /* gen6 math and gen7+ SENDs from GRFs ignore source modifiers on
 312     * instructions.
 313     */
 314    if ((has_source_modifiers || value.file == UNIFORM ||
 315         value.swizzle != BRW_SWIZZLE_XYZW) && !inst->can_do_source_mods(devinfo))
 316       return false;
 317
 318    if (has_source_modifiers &&
 319        value.type != inst->src[arg].type &&
 320        !inst->can_change_types())
 321       return false;
 322
 323    if (has_source_modifiers &&
 324        inst->opcode == SHADER_OPCODE_GEN4_SCRATCH_WRITE)
 325       return false;
 326
 327    unsigned composed_swizzle = brw_compose_swizzle(inst->src[arg].swizzle,
 328                                                    value.swizzle);
 329    if (inst->is_3src(devinfo) &&
 330        (value.file == UNIFORM ||
 331         (value.file == ATTR && attributes_per_reg != 1)) &&
 332        !brw_is_single_value_swizzle(composed_swizzle))
 333       return false;
 334
 335    if (inst->is_send_from_grf())
 336       return false;
 337
 338    /* we can't generally copy-propagate UD negations becuse we
 339     * end up accessing the resulting values as signed integers
 340     * instead. See also resolve_ud_negate().
 341     */
 342    if (value.negate &&
 343        value.type == BRW_REGISTER_TYPE_UD)
 344       return false;
 345
 346    /* Don't report progress if this is a noop. */
 347    if (value.equals(inst->src[arg]))
 348       return false;
 349
 350    const unsigned dst_saturate_mask = inst->dst.writemask &
 351       brw_apply_swizzle_to_mask(inst->src[arg].swizzle, entry->saturatemask);
 352
 353    if (dst_saturate_mask) {
 354       /* We either saturate all or nothing. */
 355       if (dst_saturate_mask != inst->dst.writemask)
 356          return false;
 357
 358       /* Limit saturate propagation only to SEL with src1 bounded within 0.0
 359        * and 1.0, otherwise skip copy propagate altogether.
 360        */
 361       switch(inst->opcode) {
 362       case BRW_OPCODE_SEL:
 363          if (arg != 0 ||
 364              inst->src[0].type != BRW_REGISTER_TYPE_F ||
 365              inst->src[1].file != IMM ||
 366              inst->src[1].type != BRW_REGISTER_TYPE_F ||
 367              inst->src[1].f < 0.0 ||
 368              inst->src[1].f > 1.0) {
 369             return false;
 370          }
 371          if (!inst->saturate)
 372             inst->saturate = true;
 373          break;
 374       default:
 375          return false;
 376       }
 377    }
 378
 379    /* Build the final value */
 380    if (inst->src[arg].abs) {
 381       value.negate = false;
 382       value.abs = true;
 383    }
 384    if (inst->src[arg].negate)
 385       value.negate = !value.negate;
 386
 387    value.swizzle = composed_swizzle;
 388    if (has_source_modifiers &&
 389        value.type != inst->src[arg].type) {
 390       assert(inst->can_change_types());
 391       for (int i = 0; i < 3; i++) {
 392          inst->src[i].type = value.type;
 393       }
 394       inst->dst.type = value.type;
 395    } else {
 396       value.type = inst->src[arg].type;
 397    }
 398
 399    inst->src[arg] = value;
 400    return true;
 401 }
 402
 403 bool
 404 vec4_visitor::opt_copy_propagation(bool do_constant_prop)
 405 {
 406    /* If we are in dual instanced or single mode, then attributes are going
 407     * to be interleaved, so one register contains two attribute slots.
 408     */
 409    const int attributes_per_reg =
 410       prog_data->dispatch_mode == DISPATCH_MODE_4X2_DUAL_OBJECT ? 1 : 2;
 411    bool progress = false;
 412    struct copy_entry entries[alloc.total_size];
 413
 414    memset(&entries, 0, sizeof(entries));
 415
 416    foreach_block_and_inst(block, vec4_instruction, inst, cfg) {
 417       /* This pass only works on basic blocks.  If there's flow
 418        * control, throw out all our information and start from
 419        * scratch.
 420        *
 421        * This should really be fixed by using a structure like in
 422        * src/glsl/opt_copy_propagation.cpp to track available copies.
 423        */
 424       if (!is_dominated_by_previous_instruction(inst)) {
 425          memset(&entries, 0, sizeof(entries));
 426          continue;
 427       }
 428
 429       /* For each source arg, see if each component comes from a copy
 430        * from the same type file (IMM, VGRF, UNIFORM), and try
 431        * optimizing out access to the copy result
 432        */
 433       for (int i = 2; i >= 0; i--) {
 434          /* Copied values end up in GRFs, and we don't track reladdr
 435           * accesses.
 436           */
 437          if (inst->src[i].file != VGRF ||
 438              inst->src[i].reladdr)
 439             continue;
 440
 441          /* We only handle register-aligned single GRF copies. */
 442          if (inst->size_read(i) != REG_SIZE ||
 443              inst->src[i].offset % REG_SIZE)
 444             continue;
 445
 446          const unsigned reg = (alloc.offsets[inst->src[i].nr] +
 447                                inst->src[i].offset / REG_SIZE);
 448          const copy_entry &entry = entries[reg];
 449
 450          if (do_constant_prop && try_constant_propagate(devinfo, inst, i, &entry))
 451             progress = true;
 452          else if (try_copy_propagate(devinfo, inst, i, &entry, attributes_per_reg))
 453             progress = true;
 454       }
 455
 456       /* Track available source registers. */
 457       if (inst->dst.file == VGRF) {
 458          const int reg =
 459             alloc.offsets[inst->dst.nr] + inst->dst.offset / REG_SIZE;
 460
 461          /* Update our destination's current channel values.  For a direct copy,
 462           * the value is the newly propagated source.  Otherwise, we don't know
 463           * the new value, so clear it.
 464           */
 465          bool direct_copy = is_direct_copy(inst);
 466          entries[reg].saturatemask &= ~inst->dst.writemask;
 467          for (int i = 0; i < 4; i++) {
 468             if (inst->dst.writemask & (1 << i)) {
 469                entries[reg].value[i] = direct_copy ? &inst->src[0] : NULL;
 470                entries[reg].saturatemask |=
 471                   inst->saturate && direct_copy ? 1 << i : 0;
 472             }
 473          }
 474
 475          /* Clear the records for any registers whose current value came from
 476           * our destination's updated channels, as the two are no longer equal.
 477           */
 478          if (inst->dst.reladdr)
 479             memset(&entries, 0, sizeof(entries));
 480          else {
 481             for (unsigned i = 0; i < alloc.total_size; i++) {
 482                for (int j = 0; j < 4; j++) {
 483                   if (is_channel_updated(inst, entries[i].value, j)) {
 484                      entries[i].value[j] = NULL;
 485                      entries[i].saturatemask &= ~(1 << j);
 486                   }
 487                }
 488             }
 489          }
 490       }
 491    }
 492
 493    if (progress)
 494       invalidate_live_intervals();
 495
 496    return progress;
 497 }
 498
 499 } /* namespace brw */