src/intel/compiler/brw_ir_fs.h

   1 /* -*- c++ -*- */
   2 /*
   3  * Copyright © 2010-2015 Intel Corporation
   4  *
   5  * Permission is hereby granted, free of charge, to any person obtaining a
   6  * copy of this software and associated documentation files (the "Software"),
   7  * to deal in the Software without restriction, including without limitation
   8  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
   9  * and/or sell copies of the Software, and to permit persons to whom the
  10  * Software is furnished to do so, subject to the following conditions:
  11  *
  12  * The above copyright notice and this permission notice (including the next
  13  * paragraph) shall be included in all copies or substantial portions of the
  14  * Software.
  15  *
  16  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  17  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  18  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
  19  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
  20  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
  21  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
  22  * IN THE SOFTWARE.
  23  */
  24
  25 #ifndef BRW_IR_FS_H
  26 #define BRW_IR_FS_H
  27
  28 #include "brw_shader.h"
  29
  30 class fs_inst;
  31
  32 class fs_reg : public backend_reg {
  33 public:
  34    DECLARE_RALLOC_CXX_OPERATORS(fs_reg)
  35
  36    void init();
  37
  38    fs_reg();
  39    fs_reg(struct ::brw_reg reg);
  40    fs_reg(enum brw_reg_file file, int nr);
  41    fs_reg(enum brw_reg_file file, int nr, enum brw_reg_type type);
  42
  43    bool equals(const fs_reg &r) const;
  44    bool negative_equals(const fs_reg &r) const;
  45    bool is_contiguous() const;
  46
  47    /**
  48     * Return the size in bytes of a single logical component of the
  49     * register assuming the given execution width.
  50     */
  51    unsigned component_size(unsigned width) const;
  52
  53    /** Register region horizontal stride */
  54    uint8_t stride;
  55 };
  56
  57 static inline fs_reg
  58 negate(fs_reg reg)
  59 {
  60    assert(reg.file != IMM);
  61    reg.negate = !reg.negate;
  62    return reg;
  63 }
  64
  65 static inline fs_reg
  66 retype(fs_reg reg, enum brw_reg_type type)
  67 {
  68    reg.type = type;
  69    return reg;
  70 }
  71
  72 static inline fs_reg
  73 byte_offset(fs_reg reg, unsigned delta)
  74 {
  75    switch (reg.file) {
  76    case BAD_FILE:
  77       break;
  78    case VGRF:
  79    case ATTR:
  80    case UNIFORM:
  81       reg.offset += delta;
  82       break;
  83    case MRF: {
  84       const unsigned suboffset = reg.offset + delta;
  85       reg.nr += suboffset / REG_SIZE;
  86       reg.offset = suboffset % REG_SIZE;
  87       break;
  88    }
  89    case ARF:
  90    case FIXED_GRF: {
  91       const unsigned suboffset = reg.subnr + delta;
  92       reg.nr += suboffset / REG_SIZE;
  93       reg.subnr = suboffset % REG_SIZE;
  94       break;
  95    }
  96    case IMM:
  97    default:
  98       assert(delta == 0);
  99    }
 100    return reg;
 101 }
 102
 103 static inline fs_reg
 104 horiz_offset(const fs_reg &reg, unsigned delta)
 105 {
 106    switch (reg.file) {
 107    case BAD_FILE:
 108    case UNIFORM:
 109    case IMM:
 110       /* These only have a single component that is implicitly splatted.  A
 111        * horizontal offset should be a harmless no-op.
 112        * XXX - Handle vector immediates correctly.
 113        */
 114       return reg;
 115    case VGRF:
 116    case MRF:
 117    case ATTR:
 118       return byte_offset(reg, delta * reg.stride * type_sz(reg.type));
 119    case ARF:
 120    case FIXED_GRF:
 121       if (reg.is_null()) {
 122          return reg;
 123       } else {
 124          const unsigned stride = reg.hstride ? 1 << (reg.hstride - 1) : 0;
 125          return byte_offset(reg, delta * stride * type_sz(reg.type));
 126       }
 127    }
 128    unreachable("Invalid register file");
 129 }
 130
 131 static inline fs_reg
 132 offset(fs_reg reg, unsigned width, unsigned delta)
 133 {
 134    switch (reg.file) {
 135    case BAD_FILE:
 136       break;
 137    case ARF:
 138    case FIXED_GRF:
 139    case MRF:
 140    case VGRF:
 141    case ATTR:
 142    case UNIFORM:
 143       return byte_offset(reg, delta * reg.component_size(width));
 144    case IMM:
 145       assert(delta == 0);
 146    }
 147    return reg;
 148 }
 149
 150 /**
 151  * Get the scalar channel of \p reg given by \p idx and replicate it to all
 152  * channels of the result.
 153  */
 154 static inline fs_reg
 155 component(fs_reg reg, unsigned idx)
 156 {
 157    reg = horiz_offset(reg, idx);
 158    reg.stride = 0;
 159    return reg;
 160 }
 161
 162 /**
 163  * Return an integer identifying the discrete address space a register is
 164  * contained in.  A register is by definition fully contained in the single
 165  * reg_space it belongs to, so two registers with different reg_space ids are
 166  * guaranteed not to overlap.  Most register files are a single reg_space of
 167  * its own, only the VGRF file is composed of multiple discrete address
 168  * spaces, one for each VGRF allocation.
 169  */
 170 static inline uint32_t
 171 reg_space(const fs_reg &r)
 172 {
 173    return r.file << 16 | (r.file == VGRF ? r.nr : 0);
 174 }
 175
 176 /**
 177  * Return the base offset in bytes of a register relative to the start of its
 178  * reg_space().
 179  */
 180 static inline unsigned
 181 reg_offset(const fs_reg &r)
 182 {
 183    return (r.file == VGRF || r.file == IMM ? 0 : r.nr) *
 184           (r.file == UNIFORM ? 4 : REG_SIZE) + r.offset +
 185           (r.file == ARF || r.file == FIXED_GRF ? r.subnr : 0);
 186 }
 187
 188 /**
 189  * Return the amount of padding in bytes left unused between individual
 190  * components of register \p r due to a (horizontal) stride value greater than
 191  * one, or zero if components are tightly packed in the register file.
 192  */
 193 static inline unsigned
 194 reg_padding(const fs_reg &r)
 195 {
 196    const unsigned stride = ((r.file != ARF && r.file != FIXED_GRF) ? r.stride :
 197                             r.hstride == 0 ? 0 :
 198                             1 << (r.hstride - 1));
 199    return (MAX2(1, stride) - 1) * type_sz(r.type);
 200 }
 201
 202 /**
 203  * Return whether the register region starting at \p r and spanning \p dr
 204  * bytes could potentially overlap the register region starting at \p s and
 205  * spanning \p ds bytes.
 206  */
 207 static inline bool
 208 regions_overlap(const fs_reg &r, unsigned dr, const fs_reg &s, unsigned ds)
 209 {
 210    if (r.file == MRF && (r.nr & BRW_MRF_COMPR4)) {
 211       fs_reg t = r;
 212       t.nr &= ~BRW_MRF_COMPR4;
 213       /* COMPR4 regions are translated by the hardware during decompression
 214        * into two separate half-regions 4 MRFs apart from each other.
 215        */
 216       return regions_overlap(t, dr / 2, s, ds) ||
 217              regions_overlap(byte_offset(t, 4 * REG_SIZE), dr / 2, s, ds);
 218
 219    } else if (s.file == MRF && (s.nr & BRW_MRF_COMPR4)) {
 220       return regions_overlap(s, ds, r, dr);
 221
 222    } else {
 223       return reg_space(r) == reg_space(s) &&
 224              !(reg_offset(r) + dr <= reg_offset(s) ||
 225                reg_offset(s) + ds <= reg_offset(r));
 226    }
 227 }
 228
 229 /**
 230  * Check that the register region given by r [r.offset, r.offset + dr[
 231  * is fully contained inside the register region given by s
 232  * [s.offset, s.offset + ds[.
 233  */
 234 static inline bool
 235 region_contained_in(const fs_reg &r, unsigned dr, const fs_reg &s, unsigned ds)
 236 {
 237    return reg_space(r) == reg_space(s) &&
 238           reg_offset(r) >= reg_offset(s) &&
 239           reg_offset(r) + dr <= reg_offset(s) + ds;
 240 }
 241
 242 /**
 243  * Return whether the given register region is n-periodic, i.e. whether the
 244  * original region remains invariant after shifting it by \p n scalar
 245  * channels.
 246  */
 247 static inline bool
 248 is_periodic(const fs_reg &reg, unsigned n)
 249 {
 250    if (reg.file == BAD_FILE || reg.is_null()) {
 251       return true;
 252
 253    } else if (reg.file == IMM) {
 254       const unsigned period = (reg.type == BRW_REGISTER_TYPE_UV ||
 255                                reg.type == BRW_REGISTER_TYPE_V ? 8 :
 256                                reg.type == BRW_REGISTER_TYPE_VF ? 4 :
 257                                1);
 258       return n % period == 0;
 259
 260    } else if (reg.file == ARF || reg.file == FIXED_GRF) {
 261       const unsigned period = (reg.hstride == 0 && reg.vstride == 0 ? 1 :
 262                                reg.vstride == 0 ? 1 << reg.width :
 263                                ~0);
 264       return n % period == 0;
 265
 266    } else {
 267       return reg.stride == 0;
 268    }
 269 }
 270
 271 static inline bool
 272 is_uniform(const fs_reg &reg)
 273 {
 274    return is_periodic(reg, 1);
 275 }
 276
 277 /**
 278  * Get the specified 8-component quarter of a register.
 279  */
 280 static inline fs_reg
 281 quarter(const fs_reg &reg, unsigned idx)
 282 {
 283    assert(idx < 4);
 284    return horiz_offset(reg, 8 * idx);
 285 }
 286
 287 /**
 288  * Reinterpret each channel of register \p reg as a vector of values of the
 289  * given smaller type and take the i-th subcomponent from each.
 290  */
 291 static inline fs_reg
 292 subscript(fs_reg reg, brw_reg_type type, unsigned i)
 293 {
 294    assert((i + 1) * type_sz(type) <= type_sz(reg.type));
 295
 296    if (reg.file == ARF || reg.file == FIXED_GRF) {
 297       /* The stride is encoded inconsistently for fixed GRF and ARF registers
 298        * as the log2 of the actual vertical and horizontal strides.
 299        */
 300       const int delta = util_logbase2(type_sz(reg.type)) -
 301                         util_logbase2(type_sz(type));
 302       reg.hstride += (reg.hstride ? delta : 0);
 303       reg.vstride += (reg.vstride ? delta : 0);
 304
 305    } else if (reg.file == IMM) {
 306       assert(reg.type == type);
 307
 308    } else {
 309       reg.stride *= type_sz(reg.type) / type_sz(type);
 310    }
 311
 312    return byte_offset(retype(reg, type), i * type_sz(type));
 313 }
 314
 315 static inline fs_reg
 316 horiz_stride(fs_reg reg, unsigned s)
 317 {
 318    reg.stride *= s;
 319    return reg;
 320 }
 321
 322 static const fs_reg reg_undef;
 323
 324 class fs_inst : public backend_instruction {
 325    fs_inst &operator=(const fs_inst &);
 326
 327    void init(enum opcode opcode, uint8_t exec_width, const fs_reg &dst,
 328              const fs_reg *src, unsigned sources);
 329
 330 public:
 331    DECLARE_RALLOC_CXX_OPERATORS(fs_inst)
 332
 333    fs_inst();
 334    fs_inst(enum opcode opcode, uint8_t exec_size);
 335    fs_inst(enum opcode opcode, uint8_t exec_size, const fs_reg &dst);
 336    fs_inst(enum opcode opcode, uint8_t exec_size, const fs_reg &dst,
 337            const fs_reg &src0);
 338    fs_inst(enum opcode opcode, uint8_t exec_size, const fs_reg &dst,
 339            const fs_reg &src0, const fs_reg &src1);
 340    fs_inst(enum opcode opcode, uint8_t exec_size, const fs_reg &dst,
 341            const fs_reg &src0, const fs_reg &src1, const fs_reg &src2);
 342    fs_inst(enum opcode opcode, uint8_t exec_size, const fs_reg &dst,
 343            const fs_reg src[], unsigned sources);
 344    fs_inst(const fs_inst &that);
 345    ~fs_inst();
 346
 347    void resize_sources(uint8_t num_sources);
 348
 349    bool is_send_from_grf() const;
 350    bool is_payload(unsigned arg) const;
 351    bool is_partial_write() const;
 352    unsigned components_read(unsigned i) const;
 353    unsigned size_read(int arg) const;
 354    bool can_do_source_mods(const struct gen_device_info *devinfo) const;
 355    bool can_do_cmod();
 356    bool can_change_types() const;
 357    bool has_source_and_destination_hazard() const;
 358    unsigned implied_mrf_writes() const;
 359
 360    /**
 361     * Return whether \p arg is a control source of a virtual instruction which
 362     * shouldn't contribute to the execution type and usual regioning
 363     * restriction calculations of arithmetic instructions.
 364     */
 365    bool is_control_source(unsigned arg) const;
 366
 367    /**
 368     * Return the subset of flag registers read by the instruction as a bitset
 369     * with byte granularity.
 370     */
 371    unsigned flags_read(const gen_device_info *devinfo) const;
 372
 373    /**
 374     * Return the subset of flag registers updated by the instruction (either
 375     * partially or fully) as a bitset with byte granularity.
 376     */
 377    unsigned flags_written() const;
 378
 379    fs_reg dst;
 380    fs_reg *src;
 381
 382    uint8_t sources; /**< Number of fs_reg sources. */
 383
 384    bool last_rt:1;
 385    bool pi_noperspective:1;   /**< Pixel interpolator noperspective flag */
 386
 387    tgl_swsb sched; /**< Scheduling info. */
 388 };
 389
 390 /**
 391  * Make the execution of \p inst dependent on the evaluation of a possibly
 392  * inverted predicate.
 393  */
 394 static inline fs_inst *
 395 set_predicate_inv(enum brw_predicate pred, bool inverse,
 396                   fs_inst *inst)
 397 {
 398    inst->predicate = pred;
 399    inst->predicate_inverse = inverse;
 400    return inst;
 401 }
 402
 403 /**
 404  * Make the execution of \p inst dependent on the evaluation of a predicate.
 405  */
 406 static inline fs_inst *
 407 set_predicate(enum brw_predicate pred, fs_inst *inst)
 408 {
 409    return set_predicate_inv(pred, false, inst);
 410 }
 411
 412 /**
 413  * Write the result of evaluating the condition given by \p mod to a flag
 414  * register.
 415  */
 416 static inline fs_inst *
 417 set_condmod(enum brw_conditional_mod mod, fs_inst *inst)
 418 {
 419    inst->conditional_mod = mod;
 420    return inst;
 421 }
 422
 423 /**
 424  * Clamp the result of \p inst to the saturation range of its destination
 425  * datatype.
 426  */
 427 static inline fs_inst *
 428 set_saturate(bool saturate, fs_inst *inst)
 429 {
 430    inst->saturate = saturate;
 431    return inst;
 432 }
 433
 434 /**
 435  * Return the number of dataflow registers written by the instruction (either
 436  * fully or partially) counted from 'floor(reg_offset(inst->dst) /
 437  * register_size)'.  The somewhat arbitrary register size unit is 4B for the
 438  * UNIFORM and IMM files and 32B for all other files.
 439  */
 440 inline unsigned
 441 regs_written(const fs_inst *inst)
 442 {
 443    assert(inst->dst.file != UNIFORM && inst->dst.file != IMM);
 444    return DIV_ROUND_UP(reg_offset(inst->dst) % REG_SIZE +
 445                        inst->size_written -
 446                        MIN2(inst->size_written, reg_padding(inst->dst)),
 447                        REG_SIZE);
 448 }
 449
 450 /**
 451  * Return the number of dataflow registers read by the instruction (either
 452  * fully or partially) counted from 'floor(reg_offset(inst->src[i]) /
 453  * register_size)'.  The somewhat arbitrary register size unit is 4B for the
 454  * UNIFORM and IMM files and 32B for all other files.
 455  */
 456 inline unsigned
 457 regs_read(const fs_inst *inst, unsigned i)
 458 {
 459    const unsigned reg_size =
 460       inst->src[i].file == UNIFORM || inst->src[i].file == IMM ? 4 : REG_SIZE;
 461    return DIV_ROUND_UP(reg_offset(inst->src[i]) % reg_size +
 462                        inst->size_read(i) -
 463                        MIN2(inst->size_read(i), reg_padding(inst->src[i])),
 464                        reg_size);
 465 }
 466
 467 static inline enum brw_reg_type
 468 get_exec_type(const fs_inst *inst)
 469 {
 470    brw_reg_type exec_type = BRW_REGISTER_TYPE_B;
 471
 472    for (int i = 0; i < inst->sources; i++) {
 473       if (inst->src[i].file != BAD_FILE &&
 474           !inst->is_control_source(i)) {
 475          const brw_reg_type t = get_exec_type(inst->src[i].type);
 476          if (type_sz(t) > type_sz(exec_type))
 477             exec_type = t;
 478          else if (type_sz(t) == type_sz(exec_type) &&
 479                   brw_reg_type_is_floating_point(t))
 480             exec_type = t;
 481       }
 482    }
 483
 484    if (exec_type == BRW_REGISTER_TYPE_B)
 485       exec_type = inst->dst.type;
 486
 487    assert(exec_type != BRW_REGISTER_TYPE_B);
 488
 489    /* Promotion of the execution type to 32-bit for conversions from or to
 490     * half-float seems to be consistent with the following text from the
 491     * Cherryview PRM Vol. 7, "Execution Data Type":
 492     *
 493     * "When single precision and half precision floats are mixed between
 494     *  source operands or between source and destination operand [..] single
 495     *  precision float is the execution datatype."
 496     *
 497     * and from "Register Region Restrictions":
 498     *
 499     * "Conversion between Integer and HF (Half Float) must be DWord aligned
 500     *  and strided by a DWord on the destination."
 501     */
 502    if (type_sz(exec_type) == 2 &&
 503        inst->dst.type != exec_type) {
 504       if (exec_type == BRW_REGISTER_TYPE_HF)
 505          exec_type = BRW_REGISTER_TYPE_F;
 506       else if (inst->dst.type == BRW_REGISTER_TYPE_HF)
 507          exec_type = BRW_REGISTER_TYPE_D;
 508    }
 509
 510    return exec_type;
 511 }
 512
 513 static inline unsigned
 514 get_exec_type_size(const fs_inst *inst)
 515 {
 516    return type_sz(get_exec_type(inst));
 517 }
 518
 519 static inline bool
 520 is_send(const fs_inst *inst)
 521 {
 522    return inst->mlen || inst->is_send_from_grf();
 523 }
 524
 525 /**
 526  * Return whether the instruction isn't an ALU instruction and cannot be
 527  * assumed to complete in-order.
 528  */
 529 static inline bool
 530 is_unordered(const fs_inst *inst)
 531 {
 532    return is_send(inst) || inst->is_math();
 533 }
 534
 535 /**
 536  * Return whether the following regioning restriction applies to the specified
 537  * instruction.  From the Cherryview PRM Vol 7. "Register Region
 538  * Restrictions":
 539  *
 540  * "When source or destination datatype is 64b or operation is integer DWord
 541  *  multiply, regioning in Align1 must follow these rules:
 542  *
 543  *  1. Source and Destination horizontal stride must be aligned to the same qword.
 544  *  2. Regioning must ensure Src.Vstride = Src.Width * Src.Hstride.
 545  *  3. Source and Destination offset must be the same, except the case of
 546  *     scalar source."
 547  */
 548 static inline bool
 549 has_dst_aligned_region_restriction(const gen_device_info *devinfo,
 550                                    const fs_inst *inst)
 551 {
 552    const brw_reg_type exec_type = get_exec_type(inst);
 553    /* Even though the hardware spec claims that "integer DWord multiply"
 554     * operations are restricted, empirical evidence and the behavior of the
 555     * simulator suggest that only 32x32-bit integer multiplication is
 556     * restricted.
 557     */
 558    const bool is_dword_multiply = !brw_reg_type_is_floating_point(exec_type) &&
 559       ((inst->opcode == BRW_OPCODE_MUL &&
 560         MIN2(type_sz(inst->src[0].type), type_sz(inst->src[1].type)) >= 4) ||
 561        (inst->opcode == BRW_OPCODE_MAD &&
 562         MIN2(type_sz(inst->src[1].type), type_sz(inst->src[2].type)) >= 4));
 563
 564    if (type_sz(inst->dst.type) > 4 || type_sz(exec_type) > 4 ||
 565        (type_sz(exec_type) == 4 && is_dword_multiply))
 566       return devinfo->is_cherryview || gen_device_info_is_9lp(devinfo);
 567    else
 568       return false;
 569 }
 570
 571 /**
 572  * Return whether the LOAD_PAYLOAD instruction is a plain copy of bits from
 573  * the specified register file into a VGRF.
 574  *
 575  * This implies identity register regions without any source-destination
 576  * overlap, but otherwise has no implications on the location of sources and
 577  * destination in the register file: Gathering any number of portions from
 578  * multiple virtual registers in any order is allowed.
 579  */
 580 inline bool
 581 is_copy_payload(brw_reg_file file, const fs_inst *inst)
 582 {
 583    if (inst->opcode != SHADER_OPCODE_LOAD_PAYLOAD ||
 584        inst->is_partial_write() || inst->saturate ||
 585        inst->dst.file != VGRF)
 586       return false;
 587
 588    for (unsigned i = 0; i < inst->sources; i++) {
 589       if (inst->src[i].file != file ||
 590           inst->src[i].abs || inst->src[i].negate)
 591          return false;
 592
 593       if (!inst->src[i].is_contiguous())
 594          return false;
 595
 596       if (regions_overlap(inst->dst, inst->size_written,
 597                           inst->src[i], inst->size_read(i)))
 598          return false;
 599    }
 600
 601    return true;
 602 }
 603
 604 /**
 605  * Like is_copy_payload(), but the instruction is required to copy a single
 606  * contiguous block of registers from the given register file into the
 607  * destination without any reordering.
 608  */
 609 inline bool
 610 is_identity_payload(brw_reg_file file, const fs_inst *inst) {
 611    if (is_copy_payload(file, inst)) {
 612       fs_reg reg = inst->src[0];
 613
 614       for (unsigned i = 0; i < inst->sources; i++) {
 615          reg.type = inst->src[i].type;
 616          if (!inst->src[i].equals(reg))
 617             return false;
 618
 619          reg = byte_offset(reg, inst->size_read(i));
 620       }
 621
 622       return true;
 623    } else {
 624       return false;
 625    }
 626 }
 627
 628 /**
 629  * Like is_copy_payload(), but the instruction is required to source data from
 630  * at least two disjoint VGRFs.
 631  *
 632  * This doesn't necessarily rule out the elimination of this instruction
 633  * through register coalescing, but due to limitations of the register
 634  * coalesce pass it might be impossible to do so directly until a later stage,
 635  * when the LOAD_PAYLOAD instruction is unrolled into a sequence of MOV
 636  * instructions.
 637  */
 638 inline bool
 639 is_multi_copy_payload(const fs_inst *inst) {
 640    if (is_copy_payload(VGRF, inst)) {
 641       for (unsigned i = 0; i < inst->sources; i++) {
 642             if (inst->src[i].nr != inst->src[0].nr)
 643                return true;
 644       }
 645    }
 646
 647    return false;
 648 }
 649
 650 /**
 651  * Like is_identity_payload(), but the instruction is required to copy the
 652  * whole contents of a single VGRF into the destination.
 653  *
 654  * This means that there is a good chance that the instruction will be
 655  * eliminated through register coalescing, but it's neither a necessary nor a
 656  * sufficient condition for that to happen -- E.g. consider the case where
 657  * source and destination registers diverge due to other instructions in the
 658  * program overwriting part of their contents, which isn't something we can
 659  * predict up front based on a cheap strictly local test of the copy
 660  * instruction.
 661  */
 662 inline bool
 663 is_coalescing_payload(const brw::simple_allocator &alloc, const fs_inst *inst)
 664 {
 665    return is_identity_payload(VGRF, inst) &&
 666           inst->src[0].offset == 0 &&
 667           alloc.sizes[inst->src[0].nr] * REG_SIZE == inst->size_written;
 668 }
 669
 670 bool
 671 has_bank_conflict(const gen_device_info *devinfo, const fs_inst *inst);
 672
 673 #endif