src/amd/compiler/aco_ir.h

   1 /*
   2  * Copyright © 2018 Valve Corporation
   3  *
   4  * Permission is hereby granted, free of charge, to any person obtaining a
   5  * copy of this software and associated documentation files (the "Software"),
   6  * to deal in the Software without restriction, including without limitation
   7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
   8  * and/or sell copies of the Software, and to permit persons to whom the
   9  * Software is furnished to do so, subject to the following conditions:
  10  *
  11  * The above copyright notice and this permission notice (including the next
  12  * paragraph) shall be included in all copies or substantial portions of the
  13  * Software.
  14  *
  15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  17  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
  18  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
  19  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
  20  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
  21  * IN THE SOFTWARE.
  22  *
  23  */
  24
  25 #ifndef ACO_IR_H
  26 #define ACO_IR_H
  27
  28 #include <vector>
  29 #include <set>
  30 #include <bitset>
  31 #include <memory>
  32
  33 #include "nir.h"
  34 #include "ac_binary.h"
  35 #include "amd_family.h"
  36 #include "aco_opcodes.h"
  37 #include "aco_util.h"
  38
  39 struct radv_nir_compiler_options;
  40 struct radv_shader_args;
  41 struct radv_shader_info;
  42
  43 namespace aco {
  44
  45 extern uint64_t debug_flags;
  46
  47 enum {
  48    DEBUG_VALIDATE = 0x1,
  49    DEBUG_VALIDATE_RA = 0x2,
  50    DEBUG_PERFWARN = 0x4,
  51 };
  52
  53 /**
  54  * Representation of the instruction's microcode encoding format
  55  * Note: Some Vector ALU Formats can be combined, such that:
  56  * - VOP2* | VOP3A represents a VOP2 instruction in VOP3A encoding
  57  * - VOP2* | DPP represents a VOP2 instruction with data parallel primitive.
  58  * - VOP2* | SDWA represents a VOP2 instruction with sub-dword addressing.
  59  *
  60  * (*) The same is applicable for VOP1 and VOPC instructions.
  61  */
  62 enum class Format : std::uint16_t {
  63    /* Pseudo Instruction Format */
  64    PSEUDO = 0,
  65    /* Scalar ALU & Control Formats */
  66    SOP1 = 1,
  67    SOP2 = 2,
  68    SOPK = 3,
  69    SOPP = 4,
  70    SOPC = 5,
  71    /* Scalar Memory Format */
  72    SMEM = 6,
  73    /* LDS/GDS Format */
  74    DS = 8,
  75    /* Vector Memory Buffer Formats */
  76    MTBUF = 9,
  77    MUBUF = 10,
  78    /* Vector Memory Image Format */
  79    MIMG = 11,
  80    /* Export Format */
  81    EXP = 12,
  82    /* Flat Formats */
  83    FLAT = 13,
  84    GLOBAL = 14,
  85    SCRATCH = 15,
  86
  87    PSEUDO_BRANCH = 16,
  88    PSEUDO_BARRIER = 17,
  89    PSEUDO_REDUCTION = 18,
  90
  91    /* Vector ALU Formats */
  92    VOP1 = 1 << 8,
  93    VOP2 = 1 << 9,
  94    VOPC = 1 << 10,
  95    VOP3 = 1 << 11,
  96    VOP3A = 1 << 11,
  97    VOP3B = 1 << 11,
  98    VOP3P = 1 << 12,
  99    /* Vector Parameter Interpolation Format */
 100    VINTRP = 1 << 13,
 101    DPP = 1 << 14,
 102    SDWA = 1 << 15,
 103 };
 104
 105 enum barrier_interaction : uint8_t {
 106    barrier_none = 0,
 107    barrier_buffer = 0x1,
 108    barrier_image = 0x2,
 109    barrier_atomic = 0x4,
 110    barrier_shared = 0x8,
 111    /* used for geometry shaders to ensure vertex data writes are before the
 112     * GS_DONE s_sendmsg. */
 113    barrier_gs_data = 0x10,
 114    /* used for geometry shaders to ensure s_sendmsg instructions are in-order. */
 115    barrier_gs_sendmsg = 0x20,
 116    barrier_count = 6,
 117 };
 118
 119 enum fp_round {
 120    fp_round_ne = 0,
 121    fp_round_pi = 1,
 122    fp_round_ni = 2,
 123    fp_round_tz = 3,
 124 };
 125
 126 enum fp_denorm {
 127    /* Note that v_rcp_f32, v_exp_f32, v_log_f32, v_sqrt_f32, v_rsq_f32 and
 128     * v_mad_f32/v_madak_f32/v_madmk_f32/v_mac_f32 always flush denormals. */
 129    fp_denorm_flush = 0x0,
 130    fp_denorm_keep = 0x3,
 131 };
 132
 133 struct float_mode {
 134    /* matches encoding of the MODE register */
 135    union {
 136       struct {
 137           fp_round round32:2;
 138           fp_round round16_64:2;
 139           unsigned denorm32:2;
 140           unsigned denorm16_64:2;
 141       };
 142       uint8_t val = 0;
 143    };
 144    /* if false, optimizations which may remove infs/nan/-0.0 can be done */
 145    bool preserve_signed_zero_inf_nan32:1;
 146    bool preserve_signed_zero_inf_nan16_64:1;
 147    /* if false, optimizations which may remove denormal flushing can be done */
 148    bool must_flush_denorms32:1;
 149    bool must_flush_denorms16_64:1;
 150    bool care_about_round32:1;
 151    bool care_about_round16_64:1;
 152
 153    /* Returns true if instructions using the mode "other" can safely use the
 154     * current one instead. */
 155    bool canReplace(float_mode other) const noexcept {
 156       return val == other.val &&
 157              (preserve_signed_zero_inf_nan32 || !other.preserve_signed_zero_inf_nan32) &&
 158              (preserve_signed_zero_inf_nan16_64 || !other.preserve_signed_zero_inf_nan16_64) &&
 159              (must_flush_denorms32  || !other.must_flush_denorms32) &&
 160              (must_flush_denorms16_64 || !other.must_flush_denorms16_64) &&
 161              (care_about_round32 || !other.care_about_round32) &&
 162              (care_about_round16_64 || !other.care_about_round16_64);
 163    }
 164 };
 165
 166 constexpr Format asVOP3(Format format) {
 167    return (Format) ((uint32_t) Format::VOP3 | (uint32_t) format);
 168 };
 169
 170 enum class RegType {
 171    none = 0,
 172    sgpr,
 173    vgpr,
 174    linear_vgpr,
 175 };
 176
 177 struct RegClass {
 178
 179    enum RC : uint8_t {
 180       s1 = 1,
 181       s2 = 2,
 182       s3 = 3,
 183       s4 = 4,
 184       s6 = 6,
 185       s8 = 8,
 186       s16 = 16,
 187       v1 = s1 | (1 << 5),
 188       v2 = s2 | (1 << 5),
 189       v3 = s3 | (1 << 5),
 190       v4 = s4 | (1 << 5),
 191       v5 = 5  | (1 << 5),
 192       v6 = 6  | (1 << 5),
 193       v7 = 7  | (1 << 5),
 194       v8 = 8  | (1 << 5),
 195       /* these are used for WWM and spills to vgpr */
 196       v1_linear = v1 | (1 << 6),
 197       v2_linear = v2 | (1 << 6),
 198    };
 199
 200    RegClass() = default;
 201    constexpr RegClass(RC rc)
 202       : rc(rc) {}
 203    constexpr RegClass(RegType type, unsigned size)
 204       : rc((RC) ((type == RegType::vgpr ? 1 << 5 : 0) | size)) {}
 205
 206    constexpr operator RC() const { return rc; }
 207    explicit operator bool() = delete;
 208
 209    constexpr RegType type() const { return rc <= RC::s16 ? RegType::sgpr : RegType::vgpr; }
 210    constexpr unsigned size() const { return (unsigned) rc & 0x1F; }
 211    constexpr bool is_linear() const { return rc <= RC::s16 || rc & (1 << 6); }
 212    constexpr RegClass as_linear() const { return RegClass((RC) (rc | (1 << 6))); }
 213
 214 private:
 215    RC rc;
 216 };
 217
 218 /* transitional helper expressions */
 219 static constexpr RegClass s1{RegClass::s1};
 220 static constexpr RegClass s2{RegClass::s2};
 221 static constexpr RegClass s3{RegClass::s3};
 222 static constexpr RegClass s4{RegClass::s4};
 223 static constexpr RegClass s8{RegClass::s8};
 224 static constexpr RegClass s16{RegClass::s16};
 225 static constexpr RegClass v1{RegClass::v1};
 226 static constexpr RegClass v2{RegClass::v2};
 227 static constexpr RegClass v3{RegClass::v3};
 228 static constexpr RegClass v4{RegClass::v4};
 229 static constexpr RegClass v5{RegClass::v5};
 230 static constexpr RegClass v6{RegClass::v6};
 231 static constexpr RegClass v7{RegClass::v7};
 232 static constexpr RegClass v8{RegClass::v8};
 233
 234 /**
 235  * Temp Class
 236  * Each temporary virtual register has a
 237  * register class (i.e. size and type)
 238  * and SSA id.
 239  */
 240 struct Temp {
 241    Temp() = default;
 242    constexpr Temp(uint32_t id, RegClass cls) noexcept
 243       : id_(id), reg_class(cls) {}
 244
 245    constexpr uint32_t id() const noexcept { return id_; }
 246    constexpr RegClass regClass() const noexcept { return reg_class; }
 247
 248    constexpr unsigned size() const noexcept { return reg_class.size(); }
 249    constexpr RegType type() const noexcept { return reg_class.type(); }
 250    constexpr bool is_linear() const noexcept { return reg_class.is_linear(); }
 251
 252    constexpr bool operator <(Temp other) const noexcept { return id() < other.id(); }
 253    constexpr bool operator==(Temp other) const noexcept { return id() == other.id(); }
 254    constexpr bool operator!=(Temp other) const noexcept { return id() != other.id(); }
 255
 256 private:
 257    uint32_t id_:24;
 258    RegClass reg_class;
 259 };
 260
 261 /**
 262  * PhysReg
 263  * Represents the physical register for each
 264  * Operand and Definition.
 265  */
 266 struct PhysReg {
 267    constexpr PhysReg() = default;
 268    explicit constexpr PhysReg(unsigned r) : reg(r) {}
 269    constexpr operator unsigned() const { return reg; }
 270
 271    uint16_t reg = 0;
 272 };
 273
 274 /* helper expressions for special registers */
 275 static constexpr PhysReg m0{124};
 276 static constexpr PhysReg vcc{106};
 277 static constexpr PhysReg sgpr_null{125}; /* GFX10+ */
 278 static constexpr PhysReg exec{126};
 279 static constexpr PhysReg exec_lo{126};
 280 static constexpr PhysReg exec_hi{127};
 281 static constexpr PhysReg scc{253};
 282
 283 /**
 284  * Operand Class
 285  * Initially, each Operand refers to either
 286  * a temporary virtual register
 287  * or to a constant value
 288  * Temporary registers get mapped to physical register during RA
 289  * Constant values are inlined into the instruction sequence.
 290  */
 291 class Operand final
 292 {
 293 public:
 294    constexpr Operand()
 295       : reg_(PhysReg{128}), isTemp_(false), isFixed_(true), isConstant_(false),
 296         isKill_(false), isUndef_(true), isFirstKill_(false), is64BitConst_(false) {}
 297
 298    explicit Operand(Temp r) noexcept
 299    {
 300       data_.temp = r;
 301       if (r.id()) {
 302          isTemp_ = true;
 303       } else {
 304          isUndef_ = true;
 305          setFixed(PhysReg{128});
 306       }
 307    };
 308    explicit Operand(uint32_t v, bool is64bit = false) noexcept
 309    {
 310       data_.i = v;
 311       isConstant_ = true;
 312       is64BitConst_ = is64bit;
 313       if (v <= 64)
 314          setFixed(PhysReg{128 + v});
 315       else if (v >= 0xFFFFFFF0) /* [-16 .. -1] */
 316          setFixed(PhysReg{192 - v});
 317       else if (v == 0x3f000000) /* 0.5 */
 318          setFixed(PhysReg{240});
 319       else if (v == 0xbf000000) /* -0.5 */
 320          setFixed(PhysReg{241});
 321       else if (v == 0x3f800000) /* 1.0 */
 322          setFixed(PhysReg{242});
 323       else if (v == 0xbf800000) /* -1.0 */
 324          setFixed(PhysReg{243});
 325       else if (v == 0x40000000) /* 2.0 */
 326          setFixed(PhysReg{244});
 327       else if (v == 0xc0000000) /* -2.0 */
 328          setFixed(PhysReg{245});
 329       else if (v == 0x40800000) /* 4.0 */
 330          setFixed(PhysReg{246});
 331       else if (v == 0xc0800000) /* -4.0 */
 332          setFixed(PhysReg{247});
 333       else { /* Literal Constant */
 334          assert(!is64bit && "attempt to create a 64-bit literal constant");
 335          setFixed(PhysReg{255});
 336       }
 337    };
 338    explicit Operand(uint64_t v) noexcept
 339    {
 340       isConstant_ = true;
 341       is64BitConst_ = true;
 342       if (v <= 64) {
 343          data_.i = (uint32_t) v;
 344          setFixed(PhysReg{128 + (uint32_t) v});
 345       } else if (v >= 0xFFFFFFFFFFFFFFF0) { /* [-16 .. -1] */
 346          data_.i = (uint32_t) v;
 347          setFixed(PhysReg{192 - (uint32_t) v});
 348       } else if (v == 0x3FE0000000000000) { /* 0.5 */
 349          data_.i = 0x3f000000;
 350          setFixed(PhysReg{240});
 351       } else if (v == 0xBFE0000000000000) { /* -0.5 */
 352          data_.i = 0xbf000000;
 353          setFixed(PhysReg{241});
 354       } else if (v == 0x3FF0000000000000) { /* 1.0 */
 355          data_.i = 0x3f800000;
 356          setFixed(PhysReg{242});
 357       } else if (v == 0xBFF0000000000000) { /* -1.0 */
 358          data_.i = 0xbf800000;
 359          setFixed(PhysReg{243});
 360       } else if (v == 0x4000000000000000) { /* 2.0 */
 361          data_.i = 0x40000000;
 362          setFixed(PhysReg{244});
 363       } else if (v == 0xC000000000000000) { /* -2.0 */
 364          data_.i = 0xc0000000;
 365          setFixed(PhysReg{245});
 366       } else if (v == 0x4010000000000000) { /* 4.0 */
 367          data_.i = 0x40800000;
 368          setFixed(PhysReg{246});
 369       } else if (v == 0xC010000000000000) { /* -4.0 */
 370          data_.i = 0xc0800000;
 371          setFixed(PhysReg{247});
 372       } else { /* Literal Constant: we don't know if it is a long or double.*/
 373          isConstant_ = 0;
 374          assert(false && "attempt to create a 64-bit literal constant");
 375       }
 376    };
 377    explicit Operand(RegClass type) noexcept
 378    {
 379       isUndef_ = true;
 380       data_.temp = Temp(0, type);
 381       setFixed(PhysReg{128});
 382    };
 383    explicit Operand(PhysReg reg, RegClass type) noexcept
 384    {
 385       data_.temp = Temp(0, type);
 386       setFixed(reg);
 387    }
 388
 389    constexpr bool isTemp() const noexcept
 390    {
 391       return isTemp_;
 392    }
 393
 394    constexpr void setTemp(Temp t) noexcept {
 395       assert(!isConstant_);
 396       isTemp_ = true;
 397       data_.temp = t;
 398    }
 399
 400    constexpr Temp getTemp() const noexcept
 401    {
 402       return data_.temp;
 403    }
 404
 405    constexpr uint32_t tempId() const noexcept
 406    {
 407       return data_.temp.id();
 408    }
 409
 410    constexpr bool hasRegClass() const noexcept
 411    {
 412       return isTemp() || isUndefined();
 413    }
 414
 415    constexpr RegClass regClass() const noexcept
 416    {
 417       return data_.temp.regClass();
 418    }
 419
 420    constexpr unsigned size() const noexcept
 421    {
 422       if (isConstant())
 423          return is64BitConst_ ? 2 : 1;
 424       else
 425          return data_.temp.size();
 426    }
 427
 428    constexpr bool isFixed() const noexcept
 429    {
 430       return isFixed_;
 431    }
 432
 433    constexpr PhysReg physReg() const noexcept
 434    {
 435       return reg_;
 436    }
 437
 438    constexpr void setFixed(PhysReg reg) noexcept
 439    {
 440       isFixed_ = reg != unsigned(-1);
 441       reg_ = reg;
 442    }
 443
 444    constexpr bool isConstant() const noexcept
 445    {
 446       return isConstant_;
 447    }
 448
 449    constexpr bool isLiteral() const noexcept
 450    {
 451       return isConstant() && reg_ == 255;
 452    }
 453
 454    constexpr bool isUndefined() const noexcept
 455    {
 456       return isUndef_;
 457    }
 458
 459    constexpr uint32_t constantValue() const noexcept
 460    {
 461       return data_.i;
 462    }
 463
 464    constexpr bool constantEquals(uint32_t cmp) const noexcept
 465    {
 466       return isConstant() && constantValue() == cmp;
 467    }
 468
 469    constexpr void setKill(bool flag) noexcept
 470    {
 471       isKill_ = flag;
 472       if (!flag)
 473          setFirstKill(false);
 474    }
 475
 476    constexpr bool isKill() const noexcept
 477    {
 478       return isKill_ || isFirstKill();
 479    }
 480
 481    constexpr void setFirstKill(bool flag) noexcept
 482    {
 483       isFirstKill_ = flag;
 484       if (flag)
 485          setKill(flag);
 486    }
 487
 488    /* When there are multiple operands killing the same temporary,
 489     * isFirstKill() is only returns true for the first one. */
 490    constexpr bool isFirstKill() const noexcept
 491    {
 492       return isFirstKill_;
 493    }
 494
 495 private:
 496    union {
 497       uint32_t i;
 498       float f;
 499       Temp temp = Temp(0, s1);
 500    } data_;
 501    PhysReg reg_;
 502    union {
 503       struct {
 504          uint8_t isTemp_:1;
 505          uint8_t isFixed_:1;
 506          uint8_t isConstant_:1;
 507          uint8_t isKill_:1;
 508          uint8_t isUndef_:1;
 509          uint8_t isFirstKill_:1;
 510          uint8_t is64BitConst_:1;
 511       };
 512       /* can't initialize bit-fields in c++11, so work around using a union */
 513       uint8_t control_ = 0;
 514    };
 515 };
 516
 517 /**
 518  * Definition Class
 519  * Definitions are the results of Instructions
 520  * and refer to temporary virtual registers
 521  * which are later mapped to physical registers
 522  */
 523 class Definition final
 524 {
 525 public:
 526    constexpr Definition() : temp(Temp(0, s1)), reg_(0), isFixed_(0), hasHint_(0), isKill_(0) {}
 527    Definition(uint32_t index, RegClass type) noexcept
 528       : temp(index, type) {}
 529    explicit Definition(Temp tmp) noexcept
 530       : temp(tmp) {}
 531    Definition(PhysReg reg, RegClass type) noexcept
 532       : temp(Temp(0, type))
 533    {
 534       setFixed(reg);
 535    }
 536    Definition(uint32_t tmpId, PhysReg reg, RegClass type) noexcept
 537       : temp(Temp(tmpId, type))
 538    {
 539       setFixed(reg);
 540    }
 541
 542    constexpr bool isTemp() const noexcept
 543    {
 544       return tempId() > 0;
 545    }
 546
 547    constexpr Temp getTemp() const noexcept
 548    {
 549       return temp;
 550    }
 551
 552    constexpr uint32_t tempId() const noexcept
 553    {
 554       return temp.id();
 555    }
 556
 557    constexpr void setTemp(Temp t) noexcept {
 558       temp = t;
 559    }
 560
 561    constexpr RegClass regClass() const noexcept
 562    {
 563       return temp.regClass();
 564    }
 565
 566    constexpr unsigned size() const noexcept
 567    {
 568       return temp.size();
 569    }
 570
 571    constexpr bool isFixed() const noexcept
 572    {
 573       return isFixed_;
 574    }
 575
 576    constexpr PhysReg physReg() const noexcept
 577    {
 578       return reg_;
 579    }
 580
 581    constexpr void setFixed(PhysReg reg) noexcept
 582    {
 583       isFixed_ = 1;
 584       reg_ = reg;
 585    }
 586
 587    constexpr void setHint(PhysReg reg) noexcept
 588    {
 589       hasHint_ = 1;
 590       reg_ = reg;
 591    }
 592
 593    constexpr bool hasHint() const noexcept
 594    {
 595       return hasHint_;
 596    }
 597
 598    constexpr void setKill(bool flag) noexcept
 599    {
 600       isKill_ = flag;
 601    }
 602
 603    constexpr bool isKill() const noexcept
 604    {
 605       return isKill_;
 606    }
 607
 608 private:
 609    Temp temp = Temp(0, s1);
 610    PhysReg reg_;
 611    union {
 612       struct {
 613          uint8_t isFixed_:1;
 614          uint8_t hasHint_:1;
 615          uint8_t isKill_:1;
 616       };
 617       /* can't initialize bit-fields in c++11, so work around using a union */
 618       uint8_t control_ = 0;
 619    };
 620 };
 621
 622 class Block;
 623
 624 struct Instruction {
 625    aco_opcode opcode;
 626    Format format;
 627    uint32_t pass_flags;
 628
 629    aco::span<Operand> operands;
 630    aco::span<Definition> definitions;
 631
 632    constexpr bool isVALU() const noexcept
 633    {
 634       return ((uint16_t) format & (uint16_t) Format::VOP1) == (uint16_t) Format::VOP1
 635           || ((uint16_t) format & (uint16_t) Format::VOP2) == (uint16_t) Format::VOP2
 636           || ((uint16_t) format & (uint16_t) Format::VOPC) == (uint16_t) Format::VOPC
 637           || ((uint16_t) format & (uint16_t) Format::VOP3A) == (uint16_t) Format::VOP3A
 638           || ((uint16_t) format & (uint16_t) Format::VOP3B) == (uint16_t) Format::VOP3B
 639           || ((uint16_t) format & (uint16_t) Format::VOP3P) == (uint16_t) Format::VOP3P;
 640    }
 641
 642    constexpr bool isSALU() const noexcept
 643    {
 644       return format == Format::SOP1 ||
 645              format == Format::SOP2 ||
 646              format == Format::SOPC ||
 647              format == Format::SOPK ||
 648              format == Format::SOPP;
 649    }
 650
 651    constexpr bool isVMEM() const noexcept
 652    {
 653       return format == Format::MTBUF ||
 654              format == Format::MUBUF ||
 655              format == Format::MIMG;
 656    }
 657
 658    constexpr bool isDPP() const noexcept
 659    {
 660       return (uint16_t) format & (uint16_t) Format::DPP;
 661    }
 662
 663    constexpr bool isVOP3() const noexcept
 664    {
 665       return ((uint16_t) format & (uint16_t) Format::VOP3A) ||
 666              ((uint16_t) format & (uint16_t) Format::VOP3B) ||
 667              format == Format::VOP3P;
 668    }
 669
 670    constexpr bool isSDWA() const noexcept
 671    {
 672       return (uint16_t) format & (uint16_t) Format::SDWA;
 673    }
 674
 675    constexpr bool isFlatOrGlobal() const noexcept
 676    {
 677       return format == Format::FLAT || format == Format::GLOBAL;
 678    }
 679
 680    constexpr bool usesModifiers() const noexcept;
 681
 682    constexpr bool reads_exec() const noexcept
 683    {
 684       for (const Operand& op : operands) {
 685          if (op.isFixed() && op.physReg() == exec)
 686             return true;
 687       }
 688       return false;
 689    }
 690 };
 691
 692 struct SOPK_instruction : public Instruction {
 693    uint16_t imm;
 694 };
 695
 696 struct SOPP_instruction : public Instruction {
 697    uint32_t imm;
 698    int block;
 699 };
 700
 701 struct SOPC_instruction : public Instruction {
 702 };
 703
 704 struct SOP1_instruction : public Instruction {
 705 };
 706
 707 struct SOP2_instruction : public Instruction {
 708 };
 709
 710 /**
 711  * Scalar Memory Format:
 712  * For s_(buffer_)load_dword*:
 713  * Operand(0): SBASE - SGPR-pair which provides base address
 714  * Operand(1): Offset - immediate (un)signed offset or SGPR
 715  * Operand(2) / Definition(0): SDATA - SGPR for read / write result
 716  * Operand(n-1): SOffset - SGPR offset (Vega only)
 717  *
 718  * Having no operands is also valid for instructions such as s_dcache_inv.
 719  *
 720  */
 721 struct SMEM_instruction : public Instruction {
 722    bool glc : 1; /* VI+: globally coherent */
 723    bool dlc : 1; /* NAVI: device level coherent */
 724    bool nv : 1; /* VEGA only: Non-volatile */
 725    bool can_reorder : 1;
 726    bool disable_wqm : 1;
 727    barrier_interaction barrier;
 728 };
 729
 730 struct VOP1_instruction : public Instruction {
 731 };
 732
 733 struct VOP2_instruction : public Instruction {
 734 };
 735
 736 struct VOPC_instruction : public Instruction {
 737 };
 738
 739 struct VOP3A_instruction : public Instruction {
 740    bool abs[3];
 741    bool neg[3];
 742    uint8_t opsel : 4;
 743    uint8_t omod : 2;
 744    bool clamp : 1;
 745 };
 746
 747 /**
 748  * Data Parallel Primitives Format:
 749  * This format can be used for VOP1, VOP2 or VOPC instructions.
 750  * The swizzle applies to the src0 operand.
 751  *
 752  */
 753 struct DPP_instruction : public Instruction {
 754    bool abs[2];
 755    bool neg[2];
 756    uint16_t dpp_ctrl;
 757    uint8_t row_mask : 4;
 758    uint8_t bank_mask : 4;
 759    bool bound_ctrl : 1;
 760 };
 761
 762 struct Interp_instruction : public Instruction {
 763    uint8_t attribute;
 764    uint8_t component;
 765 };
 766
 767 /**
 768  * Local and Global Data Sharing instructions
 769  * Operand(0): ADDR - VGPR which supplies the address.
 770  * Operand(1): DATA0 - First data VGPR.
 771  * Operand(2): DATA1 - Second data VGPR.
 772  * Operand(n-1): M0 - LDS size.
 773  * Definition(0): VDST - Destination VGPR when results returned to VGPRs.
 774  *
 775  */
 776 struct DS_instruction : public Instruction {
 777    int16_t offset0;
 778    int8_t offset1;
 779    bool gds;
 780 };
 781
 782 /**
 783  * Vector Memory Untyped-buffer Instructions
 784  * Operand(0): SRSRC - Specifies which SGPR supplies T# (resource constant)
 785  * Operand(1): VADDR - Address source. Can carry an index and/or offset
 786  * Operand(2): SOFFSET - SGPR to supply unsigned byte offset. (SGPR, M0, or inline constant)
 787  * Operand(3) / Definition(0): VDATA - Vector GPR for write result / read data
 788  *
 789  */
 790 struct MUBUF_instruction : public Instruction {
 791    uint16_t offset : 12; /* Unsigned byte offset - 12 bit */
 792    bool offen : 1; /* Supply an offset from VGPR (VADDR) */
 793    bool idxen : 1; /* Supply an index from VGPR (VADDR) */
 794    bool addr64 : 1; /* SI, CIK: Address size is 64-bit */
 795    bool glc : 1; /* globally coherent */
 796    bool dlc : 1; /* NAVI: device level coherent */
 797    bool slc : 1; /* system level coherent */
 798    bool tfe : 1; /* texture fail enable */
 799    bool lds : 1; /* Return read-data to LDS instead of VGPRs */
 800    bool disable_wqm : 1; /* Require an exec mask without helper invocations */
 801    bool can_reorder : 1;
 802    barrier_interaction barrier;
 803 };
 804
 805 /**
 806  * Vector Memory Typed-buffer Instructions
 807  * Operand(0): SRSRC - Specifies which SGPR supplies T# (resource constant)
 808  * Operand(1): VADDR - Address source. Can carry an index and/or offset
 809  * Operand(2): SOFFSET - SGPR to supply unsigned byte offset. (SGPR, M0, or inline constant)
 810  * Operand(3) / Definition(0): VDATA - Vector GPR for write result / read data
 811  *
 812  */
 813 struct MTBUF_instruction : public Instruction {
 814    uint16_t offset; /* Unsigned byte offset - 12 bit */
 815    uint8_t dfmt : 4; /* Data Format of data in memory buffer */
 816    uint8_t nfmt : 3; /* Numeric format of data in memory */
 817    bool offen : 1; /* Supply an offset from VGPR (VADDR) */
 818    bool idxen : 1; /* Supply an index from VGPR (VADDR) */
 819    bool glc : 1; /* globally coherent */
 820    bool dlc : 1; /* NAVI: device level coherent */
 821    bool slc : 1; /* system level coherent */
 822    bool tfe : 1; /* texture fail enable */
 823    bool disable_wqm : 1; /* Require an exec mask without helper invocations */
 824    bool can_reorder : 1;
 825    barrier_interaction barrier;
 826 };
 827
 828 /**
 829  * Vector Memory Image Instructions
 830  * Operand(0) SRSRC - Scalar GPR that specifies the resource constant.
 831  * Operand(1): SSAMP - Scalar GPR that specifies sampler constant.
 832  *             or VDATA - Vector GPR for write data.
 833  * Operand(2): VADDR - Address source. Can carry an offset or an index.
 834  * Definition(0): VDATA - Vector GPR for read result.
 835  *
 836  */
 837 struct MIMG_instruction : public Instruction {
 838    uint8_t dmask; /* Data VGPR enable mask */
 839    uint8_t dim : 3; /* NAVI: dimensionality */
 840    bool unrm : 1; /* Force address to be un-normalized */
 841    bool dlc : 1; /* NAVI: device level coherent */
 842    bool glc : 1; /* globally coherent */
 843    bool slc : 1; /* system level coherent */
 844    bool tfe : 1; /* texture fail enable */
 845    bool da : 1; /* declare an array */
 846    bool lwe : 1; /* Force data to be un-normalized */
 847    bool r128 : 1; /* NAVI: Texture resource size */
 848    bool a16 : 1; /* VEGA, NAVI: Address components are 16-bits */
 849    bool d16 : 1; /* Convert 32-bit data to 16-bit data */
 850    bool disable_wqm : 1; /* Require an exec mask without helper invocations */
 851    bool can_reorder : 1;
 852    barrier_interaction barrier;
 853 };
 854
 855 /**
 856  * Flat/Scratch/Global Instructions
 857  * Operand(0): ADDR
 858  * Operand(1): SADDR
 859  * Operand(2) / Definition(0): DATA/VDST
 860  *
 861  */
 862 struct FLAT_instruction : public Instruction {
 863    uint16_t offset; /* Vega/Navi only */
 864    bool slc : 1; /* system level coherent */
 865    bool glc : 1; /* globally coherent */
 866    bool dlc : 1; /* NAVI: device level coherent */
 867    bool lds : 1;
 868    bool nv : 1;
 869    bool disable_wqm : 1; /* Require an exec mask without helper invocations */
 870    bool can_reorder : 1;
 871    barrier_interaction barrier;
 872 };
 873
 874 struct Export_instruction : public Instruction {
 875    uint8_t enabled_mask;
 876    uint8_t dest;
 877    bool compressed : 1;
 878    bool done : 1;
 879    bool valid_mask : 1;
 880 };
 881
 882 struct Pseudo_instruction : public Instruction {
 883    bool tmp_in_scc;
 884    PhysReg scratch_sgpr; /* might not be valid if it's not needed */
 885 };
 886
 887 struct Pseudo_branch_instruction : public Instruction {
 888    /* target[0] is the block index of the branch target.
 889     * For conditional branches, target[1] contains the fall-through alternative.
 890     * A value of 0 means the target has not been initialized (BB0 cannot be a branch target).
 891     */
 892    uint32_t target[2];
 893 };
 894
 895 struct Pseudo_barrier_instruction : public Instruction {
 896 };
 897
 898 enum ReduceOp {
 899    iadd32, iadd64,
 900    imul32, imul64,
 901    fadd32, fadd64,
 902    fmul32, fmul64,
 903    imin32, imin64,
 904    imax32, imax64,
 905    umin32, umin64,
 906    umax32, umax64,
 907    fmin32, fmin64,
 908    fmax32, fmax64,
 909    iand32, iand64,
 910    ior32, ior64,
 911    ixor32, ixor64,
 912    gfx10_wave64_bpermute
 913 };
 914
 915 /**
 916  * Subgroup Reduction Instructions, everything except for the data to be
 917  * reduced and the result as inserted by setup_reduce_temp().
 918  * Operand(0): data to be reduced
 919  * Operand(1): reduce temporary
 920  * Operand(2): vector temporary
 921  * Definition(0): result
 922  * Definition(1): scalar temporary
 923  * Definition(2): scalar identity temporary (not used to store identity on GFX10)
 924  * Definition(3): scc clobber
 925  * Definition(4): vcc clobber
 926  *
 927  */
 928 struct Pseudo_reduction_instruction : public Instruction {
 929    ReduceOp reduce_op;
 930    unsigned cluster_size; // must be 0 for scans
 931 };
 932
 933 struct instr_deleter_functor {
 934    void operator()(void* p) {
 935       free(p);
 936    }
 937 };
 938
 939 template<typename T>
 940 using aco_ptr = std::unique_ptr<T, instr_deleter_functor>;
 941
 942 template<typename T>
 943 T* create_instruction(aco_opcode opcode, Format format, uint32_t num_operands, uint32_t num_definitions)
 944 {
 945    std::size_t size = sizeof(T) + num_operands * sizeof(Operand) + num_definitions * sizeof(Definition);
 946    char *data = (char*) calloc(1, size);
 947    T* inst = (T*) data;
 948
 949    inst->opcode = opcode;
 950    inst->format = format;
 951
 952    uint16_t operands_offset = data + sizeof(T) - (char*)&inst->operands;
 953    inst->operands = aco::span<Operand>(operands_offset, num_operands);
 954    uint16_t definitions_offset = (char*)inst->operands.end() - (char*)&inst->definitions;
 955    inst->definitions = aco::span<Definition>(definitions_offset, num_definitions);
 956
 957    return inst;
 958 }
 959
 960 constexpr bool Instruction::usesModifiers() const noexcept
 961 {
 962    if (isDPP() || isSDWA())
 963       return true;
 964    if (!isVOP3())
 965       return false;
 966    const VOP3A_instruction *vop3 = static_cast<const VOP3A_instruction*>(this);
 967    for (unsigned i = 0; i < operands.size(); i++) {
 968       if (vop3->abs[i] || vop3->neg[i])
 969          return true;
 970    }
 971    return vop3->opsel || vop3->clamp || vop3->omod;
 972 }
 973
 974 constexpr bool is_phi(Instruction* instr)
 975 {
 976    return instr->opcode == aco_opcode::p_phi || instr->opcode == aco_opcode::p_linear_phi;
 977 }
 978
 979 static inline bool is_phi(aco_ptr<Instruction>& instr)
 980 {
 981    return is_phi(instr.get());
 982 }
 983
 984 barrier_interaction get_barrier_interaction(Instruction* instr);
 985
 986 bool is_dead(const std::vector<uint16_t>& uses, Instruction *instr);
 987
 988 enum block_kind {
 989    /* uniform indicates that leaving this block,
 990     * all actives lanes stay active */
 991    block_kind_uniform = 1 << 0,
 992    block_kind_top_level = 1 << 1,
 993    block_kind_loop_preheader = 1 << 2,
 994    block_kind_loop_header = 1 << 3,
 995    block_kind_loop_exit = 1 << 4,
 996    block_kind_continue = 1 << 5,
 997    block_kind_break = 1 << 6,
 998    block_kind_continue_or_break = 1 << 7,
 999    block_kind_discard = 1 << 8,
1000    block_kind_branch = 1 << 9,
1001    block_kind_merge = 1 << 10,
1002    block_kind_invert = 1 << 11,
1003    block_kind_uses_discard_if = 1 << 12,
1004    block_kind_needs_lowering = 1 << 13,
1005    block_kind_uses_demote = 1 << 14,
1006    block_kind_export_end = 1 << 15,
1007 };
1008
1009
1010 struct RegisterDemand {
1011    constexpr RegisterDemand() = default;
1012    constexpr RegisterDemand(const int16_t v, const int16_t s) noexcept
1013       : vgpr{v}, sgpr{s} {}
1014    int16_t vgpr = 0;
1015    int16_t sgpr = 0;
1016
1017    constexpr friend bool operator==(const RegisterDemand a, const RegisterDemand b) noexcept {
1018       return a.vgpr == b.vgpr && a.sgpr == b.sgpr;
1019    }
1020
1021    constexpr bool exceeds(const RegisterDemand other) const noexcept {
1022       return vgpr > other.vgpr || sgpr > other.sgpr;
1023    }
1024
1025    constexpr RegisterDemand operator+(const Temp t) const noexcept {
1026       if (t.type() == RegType::sgpr)
1027          return RegisterDemand( vgpr, sgpr + t.size() );
1028       else
1029          return RegisterDemand( vgpr + t.size(), sgpr );
1030    }
1031
1032    constexpr RegisterDemand operator+(const RegisterDemand other) const noexcept {
1033       return RegisterDemand(vgpr + other.vgpr, sgpr + other.sgpr);
1034    }
1035
1036    constexpr RegisterDemand operator-(const RegisterDemand other) const noexcept {
1037       return RegisterDemand(vgpr - other.vgpr, sgpr - other.sgpr);
1038    }
1039
1040    constexpr RegisterDemand& operator+=(const RegisterDemand other) noexcept {
1041       vgpr += other.vgpr;
1042       sgpr += other.sgpr;
1043       return *this;
1044    }
1045
1046    constexpr RegisterDemand& operator-=(const RegisterDemand other) noexcept {
1047       vgpr -= other.vgpr;
1048       sgpr -= other.sgpr;
1049       return *this;
1050    }
1051
1052    constexpr RegisterDemand& operator+=(const Temp t) noexcept {
1053       if (t.type() == RegType::sgpr)
1054          sgpr += t.size();
1055       else
1056          vgpr += t.size();
1057       return *this;
1058    }
1059
1060    constexpr RegisterDemand& operator-=(const Temp t) noexcept {
1061       if (t.type() == RegType::sgpr)
1062          sgpr -= t.size();
1063       else
1064          vgpr -= t.size();
1065       return *this;
1066    }
1067
1068    constexpr void update(const RegisterDemand other) noexcept {
1069       vgpr = std::max(vgpr, other.vgpr);
1070       sgpr = std::max(sgpr, other.sgpr);
1071    }
1072
1073 };
1074
1075 /* CFG */
1076 struct Block {
1077    float_mode fp_mode;
1078    unsigned index;
1079    unsigned offset = 0;
1080    std::vector<aco_ptr<Instruction>> instructions;
1081    std::vector<unsigned> logical_preds;
1082    std::vector<unsigned> linear_preds;
1083    std::vector<unsigned> logical_succs;
1084    std::vector<unsigned> linear_succs;
1085    RegisterDemand register_demand = RegisterDemand();
1086    uint16_t loop_nest_depth = 0;
1087    uint16_t kind = 0;
1088    int logical_idom = -1;
1089    int linear_idom = -1;
1090    Temp live_out_exec = Temp();
1091
1092    /* this information is needed for predecessors to blocks with phis when
1093     * moving out of ssa */
1094    bool scc_live_out = false;
1095    PhysReg scratch_sgpr = PhysReg(); /* only needs to be valid if scc_live_out != false */
1096
1097    Block(unsigned idx) : index(idx) {}
1098    Block() : index(0) {}
1099 };
1100
1101 using Stage = uint16_t;
1102
1103 /* software stages */
1104 static constexpr Stage sw_vs = 1 << 0;
1105 static constexpr Stage sw_gs = 1 << 1;
1106 static constexpr Stage sw_tcs = 1 << 2;
1107 static constexpr Stage sw_tes = 1 << 3;
1108 static constexpr Stage sw_fs = 1 << 4;
1109 static constexpr Stage sw_cs = 1 << 5;
1110 static constexpr Stage sw_gs_copy = 1 << 6;
1111 static constexpr Stage sw_mask = 0x7f;
1112
1113 /* hardware stages (can't be OR'd, just a mask for convenience when testing multiple) */
1114 static constexpr Stage hw_vs = 1 << 7;
1115 static constexpr Stage hw_es = 1 << 8; /* not on GFX9. combined into GS on GFX9 (and GFX10/legacy). */
1116 static constexpr Stage hw_gs = 1 << 9;
1117 static constexpr Stage hw_ls = 1 << 10; /* not on GFX9. combined into HS on GFX9 (and GFX10/legacy). */
1118 static constexpr Stage hw_hs = 1 << 11;
1119 static constexpr Stage hw_fs = 1 << 12;
1120 static constexpr Stage hw_cs = 1 << 13;
1121 static constexpr Stage hw_mask = 0x7f << 7;
1122
1123 /* possible settings of Program::stage */
1124 static constexpr Stage vertex_vs = sw_vs | hw_vs;
1125 static constexpr Stage fragment_fs = sw_fs | hw_fs;
1126 static constexpr Stage compute_cs = sw_cs | hw_cs;
1127 static constexpr Stage tess_eval_vs = sw_tes | hw_vs;
1128 static constexpr Stage gs_copy_vs = sw_gs_copy | hw_vs;
1129 /* GFX10/NGG */
1130 static constexpr Stage ngg_vertex_gs = sw_vs | hw_gs;
1131 static constexpr Stage ngg_vertex_geometry_gs = sw_vs | sw_gs | hw_gs;
1132 static constexpr Stage ngg_tess_eval_geometry_gs = sw_tes | sw_gs | hw_gs;
1133 static constexpr Stage ngg_vertex_tess_control_hs = sw_vs | sw_tcs | hw_hs;
1134 /* GFX9 (and GFX10 if NGG isn't used) */
1135 static constexpr Stage vertex_geometry_gs = sw_vs | sw_gs | hw_gs;
1136 static constexpr Stage vertex_tess_control_hs = sw_vs | sw_tcs | hw_hs;
1137 static constexpr Stage tess_eval_geometry_gs = sw_tes | sw_gs | hw_gs;
1138 /* pre-GFX9 */
1139 static constexpr Stage vertex_ls = sw_vs | hw_ls; /* vertex before tesselation control */
1140 static constexpr Stage vertex_es = sw_vs | hw_es; /* vertex before geometry */
1141 static constexpr Stage tess_control_hs = sw_tcs | hw_hs;
1142 static constexpr Stage tess_eval_es = sw_tes | hw_gs; /* tesselation evaluation before geometry */
1143 static constexpr Stage geometry_gs = sw_gs | hw_gs;
1144
1145 class Program final {
1146 public:
1147    float_mode next_fp_mode;
1148    std::vector<Block> blocks;
1149    RegisterDemand max_reg_demand = RegisterDemand();
1150    uint16_t num_waves = 0;
1151    uint16_t max_waves = 0; /* maximum number of waves, regardless of register usage */
1152    ac_shader_config* config;
1153    struct radv_shader_info *info;
1154    enum chip_class chip_class;
1155    enum radeon_family family;
1156    unsigned wave_size;
1157    RegClass lane_mask;
1158    Stage stage; /* Stage */
1159    bool needs_exact = false; /* there exists an instruction with disable_wqm = true */
1160    bool needs_wqm = false; /* there exists a p_wqm instruction */
1161    bool wb_smem_l1_on_end = false;
1162
1163    std::vector<uint8_t> constant_data;
1164    Temp private_segment_buffer;
1165    Temp scratch_offset;
1166
1167    uint16_t min_waves = 0;
1168    uint16_t lds_alloc_granule;
1169    uint32_t lds_limit; /* in bytes */
1170    uint16_t vgpr_limit;
1171    uint16_t sgpr_limit;
1172    uint16_t physical_sgprs;
1173    uint16_t sgpr_alloc_granule; /* minus one. must be power of two */
1174    uint16_t vgpr_alloc_granule; /* minus one. must be power of two */
1175
1176    bool needs_vcc = false;
1177    bool needs_xnack_mask = false;
1178    bool needs_flat_scr = false;
1179
1180    uint32_t allocateId()
1181    {
1182       assert(allocationID <= 16777215);
1183       return allocationID++;
1184    }
1185
1186    uint32_t peekAllocationId()
1187    {
1188       return allocationID;
1189    }
1190
1191    void setAllocationId(uint32_t id)
1192    {
1193       allocationID = id;
1194    }
1195
1196    Block* create_and_insert_block() {
1197       blocks.emplace_back(blocks.size());
1198       blocks.back().fp_mode = next_fp_mode;
1199       return &blocks.back();
1200    }
1201
1202    Block* insert_block(Block&& block) {
1203       block.index = blocks.size();
1204       block.fp_mode = next_fp_mode;
1205       blocks.emplace_back(std::move(block));
1206       return &blocks.back();
1207    }
1208
1209 private:
1210    uint32_t allocationID = 1;
1211 };
1212
1213 struct live {
1214    /* live temps out per block */
1215    std::vector<std::set<Temp>> live_out;
1216    /* register demand (sgpr/vgpr) per instruction per block */
1217    std::vector<std::vector<RegisterDemand>> register_demand;
1218 };
1219
1220 void select_program(Program *program,
1221                     unsigned shader_count,
1222                     struct nir_shader *const *shaders,
1223                     ac_shader_config* config,
1224                     struct radv_shader_args *args);
1225 void select_gs_copy_shader(Program *program, struct nir_shader *gs_shader,
1226                            ac_shader_config* config,
1227                            struct radv_shader_args *args);
1228
1229 void lower_wqm(Program* program, live& live_vars,
1230                const struct radv_nir_compiler_options *options);
1231 void lower_bool_phis(Program* program);
1232 void calc_min_waves(Program* program);
1233 void update_vgpr_sgpr_demand(Program* program, const RegisterDemand new_demand);
1234 live live_var_analysis(Program* program, const struct radv_nir_compiler_options *options);
1235 std::vector<uint16_t> dead_code_analysis(Program *program);
1236 void dominator_tree(Program* program);
1237 void insert_exec_mask(Program *program);
1238 void value_numbering(Program* program);
1239 void optimize(Program* program);
1240 void setup_reduce_temp(Program* program);
1241 void lower_to_cssa(Program* program, live& live_vars, const struct radv_nir_compiler_options *options);
1242 void register_allocation(Program *program, std::vector<std::set<Temp>> live_out_per_block);
1243 void ssa_elimination(Program* program);
1244 void lower_to_hw_instr(Program* program);
1245 void schedule_program(Program* program, live& live_vars);
1246 void spill(Program* program, live& live_vars, const struct radv_nir_compiler_options *options);
1247 void insert_wait_states(Program* program);
1248 void insert_NOPs(Program* program);
1249 unsigned emit_program(Program* program, std::vector<uint32_t>& code);
1250 void print_asm(Program *program, std::vector<uint32_t>& binary,
1251                unsigned exec_size, std::ostream& out);
1252 void validate(Program* program, FILE *output);
1253 bool validate_ra(Program* program, const struct radv_nir_compiler_options *options, FILE *output);
1254 #ifndef NDEBUG
1255 void perfwarn(bool cond, const char *msg, Instruction *instr=NULL);
1256 #else
1257 #define perfwarn(program, cond, msg, ...) do {} while(0)
1258 #endif
1259
1260 void aco_print_instr(Instruction *instr, FILE *output);
1261 void aco_print_program(Program *program, FILE *output);
1262
1263 /* number of sgprs that need to be allocated but might notbe addressable as s0-s105 */
1264 uint16_t get_extra_sgprs(Program *program);
1265
1266 /* get number of sgprs/vgprs allocated required to address a number of sgprs/vgprs */
1267 uint16_t get_sgpr_alloc(Program *program, uint16_t addressable_sgprs);
1268 uint16_t get_vgpr_alloc(Program *program, uint16_t addressable_vgprs);
1269
1270 /* return number of addressable sgprs/vgprs for max_waves */
1271 uint16_t get_addr_sgpr_from_waves(Program *program, uint16_t max_waves);
1272 uint16_t get_addr_vgpr_from_waves(Program *program, uint16_t max_waves);
1273
1274 typedef struct {
1275    const int16_t opcode_gfx7[static_cast<int>(aco_opcode::num_opcodes)];
1276    const int16_t opcode_gfx9[static_cast<int>(aco_opcode::num_opcodes)];
1277    const int16_t opcode_gfx10[static_cast<int>(aco_opcode::num_opcodes)];
1278    const std::bitset<static_cast<int>(aco_opcode::num_opcodes)> can_use_input_modifiers;
1279    const std::bitset<static_cast<int>(aco_opcode::num_opcodes)> can_use_output_modifiers;
1280    const std::bitset<static_cast<int>(aco_opcode::num_opcodes)> is_atomic;
1281    const char *name[static_cast<int>(aco_opcode::num_opcodes)];
1282    const aco::Format format[static_cast<int>(aco_opcode::num_opcodes)];
1283 } Info;
1284
1285 extern const Info instr_info;
1286
1287 }
1288
1289 #endif /* ACO_IR_H */
1290