src/amd/compiler/aco_ir.h

   1 /*
   2  * Copyright © 2018 Valve Corporation
   3  *
   4  * Permission is hereby granted, free of charge, to any person obtaining a
   5  * copy of this software and associated documentation files (the "Software"),
   6  * to deal in the Software without restriction, including without limitation
   7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
   8  * and/or sell copies of the Software, and to permit persons to whom the
   9  * Software is furnished to do so, subject to the following conditions:
  10  *
  11  * The above copyright notice and this permission notice (including the next
  12  * paragraph) shall be included in all copies or substantial portions of the
  13  * Software.
  14  *
  15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  17  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
  18  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
  19  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
  20  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
  21  * IN THE SOFTWARE.
  22  *
  23  */
  24
  25 #ifndef ACO_IR_H
  26 #define ACO_IR_H
  27
  28 #include <vector>
  29 #include <set>
  30 #include <bitset>
  31 #include <memory>
  32
  33 #include "nir.h"
  34 #include "ac_binary.h"
  35 #include "amd_family.h"
  36 #include "aco_opcodes.h"
  37 #include "aco_util.h"
  38
  39 struct radv_nir_compiler_options;
  40 struct radv_shader_args;
  41 struct radv_shader_info;
  42
  43 namespace aco {
  44
  45 extern uint64_t debug_flags;
  46
  47 enum {
  48    DEBUG_VALIDATE = 0x1,
  49    DEBUG_VALIDATE_RA = 0x2,
  50    DEBUG_PERFWARN = 0x4,
  51 };
  52
  53 /**
  54  * Representation of the instruction's microcode encoding format
  55  * Note: Some Vector ALU Formats can be combined, such that:
  56  * - VOP2* | VOP3A represents a VOP2 instruction in VOP3A encoding
  57  * - VOP2* | DPP represents a VOP2 instruction with data parallel primitive.
  58  * - VOP2* | SDWA represents a VOP2 instruction with sub-dword addressing.
  59  *
  60  * (*) The same is applicable for VOP1 and VOPC instructions.
  61  */
  62 enum class Format : std::uint16_t {
  63    /* Pseudo Instruction Format */
  64    PSEUDO = 0,
  65    /* Scalar ALU & Control Formats */
  66    SOP1 = 1,
  67    SOP2 = 2,
  68    SOPK = 3,
  69    SOPP = 4,
  70    SOPC = 5,
  71    /* Scalar Memory Format */
  72    SMEM = 6,
  73    /* LDS/GDS Format */
  74    DS = 8,
  75    /* Vector Memory Buffer Formats */
  76    MTBUF = 9,
  77    MUBUF = 10,
  78    /* Vector Memory Image Format */
  79    MIMG = 11,
  80    /* Export Format */
  81    EXP = 12,
  82    /* Flat Formats */
  83    FLAT = 13,
  84    GLOBAL = 14,
  85    SCRATCH = 15,
  86
  87    PSEUDO_BRANCH = 16,
  88    PSEUDO_BARRIER = 17,
  89    PSEUDO_REDUCTION = 18,
  90
  91    /* Vector ALU Formats */
  92    VOP1 = 1 << 8,
  93    VOP2 = 1 << 9,
  94    VOPC = 1 << 10,
  95    VOP3 = 1 << 11,
  96    VOP3A = 1 << 11,
  97    VOP3B = 1 << 11,
  98    VOP3P = 1 << 12,
  99    /* Vector Parameter Interpolation Format */
 100    VINTRP = 1 << 13,
 101    DPP = 1 << 14,
 102    SDWA = 1 << 15,
 103 };
 104
 105 enum barrier_interaction : uint8_t {
 106    barrier_none = 0,
 107    barrier_buffer = 0x1,
 108    barrier_image = 0x2,
 109    barrier_atomic = 0x4,
 110    barrier_shared = 0x8,
 111    barrier_count = 4,
 112 };
 113
 114 enum fp_round {
 115    fp_round_ne = 0,
 116    fp_round_pi = 1,
 117    fp_round_ni = 2,
 118    fp_round_tz = 3,
 119 };
 120
 121 enum fp_denorm {
 122    /* Note that v_rcp_f32, v_exp_f32, v_log_f32, v_sqrt_f32, v_rsq_f32 and
 123     * v_mad_f32/v_madak_f32/v_madmk_f32/v_mac_f32 always flush denormals. */
 124    fp_denorm_flush = 0x0,
 125    fp_denorm_keep = 0x3,
 126 };
 127
 128 struct float_mode {
 129    /* matches encoding of the MODE register */
 130    union {
 131       struct {
 132           fp_round round32:2;
 133           fp_round round16_64:2;
 134           unsigned denorm32:2;
 135           unsigned denorm16_64:2;
 136       };
 137       uint8_t val = 0;
 138    };
 139    /* if false, optimizations which may remove infs/nan/-0.0 can be done */
 140    bool preserve_signed_zero_inf_nan32:1;
 141    bool preserve_signed_zero_inf_nan16_64:1;
 142    /* if false, optimizations which may remove denormal flushing can be done */
 143    bool must_flush_denorms32:1;
 144    bool must_flush_denorms16_64:1;
 145    bool care_about_round32:1;
 146    bool care_about_round16_64:1;
 147
 148    /* Returns true if instructions using the mode "other" can safely use the
 149     * current one instead. */
 150    bool canReplace(float_mode other) const noexcept {
 151       return val == other.val &&
 152              (preserve_signed_zero_inf_nan32 || !other.preserve_signed_zero_inf_nan32) &&
 153              (preserve_signed_zero_inf_nan16_64 || !other.preserve_signed_zero_inf_nan16_64) &&
 154              (must_flush_denorms32  || !other.must_flush_denorms32) &&
 155              (must_flush_denorms16_64 || !other.must_flush_denorms16_64) &&
 156              (care_about_round32 || !other.care_about_round32) &&
 157              (care_about_round16_64 || !other.care_about_round16_64);
 158    }
 159 };
 160
 161 constexpr Format asVOP3(Format format) {
 162    return (Format) ((uint32_t) Format::VOP3 | (uint32_t) format);
 163 };
 164
 165 enum class RegType {
 166    none = 0,
 167    sgpr,
 168    vgpr,
 169    linear_vgpr,
 170 };
 171
 172 struct RegClass {
 173
 174    enum RC : uint8_t {
 175       s1 = 1,
 176       s2 = 2,
 177       s3 = 3,
 178       s4 = 4,
 179       s6 = 6,
 180       s8 = 8,
 181       s16 = 16,
 182       v1 = s1 | (1 << 5),
 183       v2 = s2 | (1 << 5),
 184       v3 = s3 | (1 << 5),
 185       v4 = s4 | (1 << 5),
 186       v5 = 5  | (1 << 5),
 187       v6 = 6  | (1 << 5),
 188       v7 = 7  | (1 << 5),
 189       v8 = 8  | (1 << 5),
 190       /* these are used for WWM and spills to vgpr */
 191       v1_linear = v1 | (1 << 6),
 192       v2_linear = v2 | (1 << 6),
 193    };
 194
 195    RegClass() = default;
 196    constexpr RegClass(RC rc)
 197       : rc(rc) {}
 198    constexpr RegClass(RegType type, unsigned size)
 199       : rc((RC) ((type == RegType::vgpr ? 1 << 5 : 0) | size)) {}
 200
 201    constexpr operator RC() const { return rc; }
 202    explicit operator bool() = delete;
 203
 204    constexpr RegType type() const { return rc <= RC::s16 ? RegType::sgpr : RegType::vgpr; }
 205    constexpr unsigned size() const { return (unsigned) rc & 0x1F; }
 206    constexpr bool is_linear() const { return rc <= RC::s16 || rc & (1 << 6); }
 207    constexpr RegClass as_linear() const { return RegClass((RC) (rc | (1 << 6))); }
 208
 209 private:
 210    RC rc;
 211 };
 212
 213 /* transitional helper expressions */
 214 static constexpr RegClass s1{RegClass::s1};
 215 static constexpr RegClass s2{RegClass::s2};
 216 static constexpr RegClass s3{RegClass::s3};
 217 static constexpr RegClass s4{RegClass::s4};
 218 static constexpr RegClass s8{RegClass::s8};
 219 static constexpr RegClass s16{RegClass::s16};
 220 static constexpr RegClass v1{RegClass::v1};
 221 static constexpr RegClass v2{RegClass::v2};
 222 static constexpr RegClass v3{RegClass::v3};
 223 static constexpr RegClass v4{RegClass::v4};
 224 static constexpr RegClass v5{RegClass::v5};
 225 static constexpr RegClass v6{RegClass::v6};
 226 static constexpr RegClass v7{RegClass::v7};
 227 static constexpr RegClass v8{RegClass::v8};
 228
 229 /**
 230  * Temp Class
 231  * Each temporary virtual register has a
 232  * register class (i.e. size and type)
 233  * and SSA id.
 234  */
 235 struct Temp {
 236    Temp() = default;
 237    constexpr Temp(uint32_t id, RegClass cls) noexcept
 238       : id_(id), reg_class(cls) {}
 239
 240    constexpr uint32_t id() const noexcept { return id_; }
 241    constexpr RegClass regClass() const noexcept { return reg_class; }
 242
 243    constexpr unsigned size() const noexcept { return reg_class.size(); }
 244    constexpr RegType type() const noexcept { return reg_class.type(); }
 245    constexpr bool is_linear() const noexcept { return reg_class.is_linear(); }
 246
 247    constexpr bool operator <(Temp other) const noexcept { return id() < other.id(); }
 248    constexpr bool operator==(Temp other) const noexcept { return id() == other.id(); }
 249    constexpr bool operator!=(Temp other) const noexcept { return id() != other.id(); }
 250
 251 private:
 252    uint32_t id_:24;
 253    RegClass reg_class;
 254 };
 255
 256 /**
 257  * PhysReg
 258  * Represents the physical register for each
 259  * Operand and Definition.
 260  */
 261 struct PhysReg {
 262    constexpr PhysReg() = default;
 263    explicit constexpr PhysReg(unsigned r) : reg(r) {}
 264    constexpr operator unsigned() const { return reg; }
 265
 266    uint16_t reg = 0;
 267 };
 268
 269 /* helper expressions for special registers */
 270 static constexpr PhysReg m0{124};
 271 static constexpr PhysReg vcc{106};
 272 static constexpr PhysReg sgpr_null{125}; /* GFX10+ */
 273 static constexpr PhysReg exec{126};
 274 static constexpr PhysReg exec_lo{126};
 275 static constexpr PhysReg exec_hi{127};
 276 static constexpr PhysReg scc{253};
 277
 278 /**
 279  * Operand Class
 280  * Initially, each Operand refers to either
 281  * a temporary virtual register
 282  * or to a constant value
 283  * Temporary registers get mapped to physical register during RA
 284  * Constant values are inlined into the instruction sequence.
 285  */
 286 class Operand final
 287 {
 288 public:
 289    constexpr Operand()
 290       : reg_(PhysReg{128}), isTemp_(false), isFixed_(true), isConstant_(false),
 291         isKill_(false), isUndef_(true), isFirstKill_(false), is64BitConst_(false) {}
 292
 293    explicit Operand(Temp r) noexcept
 294    {
 295       data_.temp = r;
 296       if (r.id()) {
 297          isTemp_ = true;
 298       } else {
 299          isUndef_ = true;
 300          setFixed(PhysReg{128});
 301       }
 302    };
 303    explicit Operand(uint32_t v, bool is64bit = false) noexcept
 304    {
 305       data_.i = v;
 306       isConstant_ = true;
 307       is64BitConst_ = is64bit;
 308       if (v <= 64)
 309          setFixed(PhysReg{128 + v});
 310       else if (v >= 0xFFFFFFF0) /* [-16 .. -1] */
 311          setFixed(PhysReg{192 - v});
 312       else if (v == 0x3f000000) /* 0.5 */
 313          setFixed(PhysReg{240});
 314       else if (v == 0xbf000000) /* -0.5 */
 315          setFixed(PhysReg{241});
 316       else if (v == 0x3f800000) /* 1.0 */
 317          setFixed(PhysReg{242});
 318       else if (v == 0xbf800000) /* -1.0 */
 319          setFixed(PhysReg{243});
 320       else if (v == 0x40000000) /* 2.0 */
 321          setFixed(PhysReg{244});
 322       else if (v == 0xc0000000) /* -2.0 */
 323          setFixed(PhysReg{245});
 324       else if (v == 0x40800000) /* 4.0 */
 325          setFixed(PhysReg{246});
 326       else if (v == 0xc0800000) /* -4.0 */
 327          setFixed(PhysReg{247});
 328       else { /* Literal Constant */
 329          assert(!is64bit && "attempt to create a 64-bit literal constant");
 330          setFixed(PhysReg{255});
 331       }
 332    };
 333    explicit Operand(uint64_t v) noexcept
 334    {
 335       isConstant_ = true;
 336       is64BitConst_ = true;
 337       if (v <= 64) {
 338          data_.i = (uint32_t) v;
 339          setFixed(PhysReg{128 + (uint32_t) v});
 340       } else if (v >= 0xFFFFFFFFFFFFFFF0) { /* [-16 .. -1] */
 341          data_.i = (uint32_t) v;
 342          setFixed(PhysReg{192 - (uint32_t) v});
 343       } else if (v == 0x3FE0000000000000) { /* 0.5 */
 344          data_.i = 0x3f000000;
 345          setFixed(PhysReg{240});
 346       } else if (v == 0xBFE0000000000000) { /* -0.5 */
 347          data_.i = 0xbf000000;
 348          setFixed(PhysReg{241});
 349       } else if (v == 0x3FF0000000000000) { /* 1.0 */
 350          data_.i = 0x3f800000;
 351          setFixed(PhysReg{242});
 352       } else if (v == 0xBFF0000000000000) { /* -1.0 */
 353          data_.i = 0xbf800000;
 354          setFixed(PhysReg{243});
 355       } else if (v == 0x4000000000000000) { /* 2.0 */
 356          data_.i = 0x40000000;
 357          setFixed(PhysReg{244});
 358       } else if (v == 0xC000000000000000) { /* -2.0 */
 359          data_.i = 0xc0000000;
 360          setFixed(PhysReg{245});
 361       } else if (v == 0x4010000000000000) { /* 4.0 */
 362          data_.i = 0x40800000;
 363          setFixed(PhysReg{246});
 364       } else if (v == 0xC010000000000000) { /* -4.0 */
 365          data_.i = 0xc0800000;
 366          setFixed(PhysReg{247});
 367       } else { /* Literal Constant: we don't know if it is a long or double.*/
 368          isConstant_ = 0;
 369          assert(false && "attempt to create a 64-bit literal constant");
 370       }
 371    };
 372    explicit Operand(RegClass type) noexcept
 373    {
 374       isUndef_ = true;
 375       data_.temp = Temp(0, type);
 376       setFixed(PhysReg{128});
 377    };
 378    explicit Operand(PhysReg reg, RegClass type) noexcept
 379    {
 380       data_.temp = Temp(0, type);
 381       setFixed(reg);
 382    }
 383
 384    constexpr bool isTemp() const noexcept
 385    {
 386       return isTemp_;
 387    }
 388
 389    constexpr void setTemp(Temp t) noexcept {
 390       assert(!isConstant_);
 391       isTemp_ = true;
 392       data_.temp = t;
 393    }
 394
 395    constexpr Temp getTemp() const noexcept
 396    {
 397       return data_.temp;
 398    }
 399
 400    constexpr uint32_t tempId() const noexcept
 401    {
 402       return data_.temp.id();
 403    }
 404
 405    constexpr bool hasRegClass() const noexcept
 406    {
 407       return isTemp() || isUndefined();
 408    }
 409
 410    constexpr RegClass regClass() const noexcept
 411    {
 412       return data_.temp.regClass();
 413    }
 414
 415    constexpr unsigned size() const noexcept
 416    {
 417       if (isConstant())
 418          return is64BitConst_ ? 2 : 1;
 419       else
 420          return data_.temp.size();
 421    }
 422
 423    constexpr bool isFixed() const noexcept
 424    {
 425       return isFixed_;
 426    }
 427
 428    constexpr PhysReg physReg() const noexcept
 429    {
 430       return reg_;
 431    }
 432
 433    constexpr void setFixed(PhysReg reg) noexcept
 434    {
 435       isFixed_ = reg != unsigned(-1);
 436       reg_ = reg;
 437    }
 438
 439    constexpr bool isConstant() const noexcept
 440    {
 441       return isConstant_;
 442    }
 443
 444    constexpr bool isLiteral() const noexcept
 445    {
 446       return isConstant() && reg_ == 255;
 447    }
 448
 449    constexpr bool isUndefined() const noexcept
 450    {
 451       return isUndef_;
 452    }
 453
 454    constexpr uint32_t constantValue() const noexcept
 455    {
 456       return data_.i;
 457    }
 458
 459    constexpr bool constantEquals(uint32_t cmp) const noexcept
 460    {
 461       return isConstant() && constantValue() == cmp;
 462    }
 463
 464    constexpr void setKill(bool flag) noexcept
 465    {
 466       isKill_ = flag;
 467       if (!flag)
 468          setFirstKill(false);
 469    }
 470
 471    constexpr bool isKill() const noexcept
 472    {
 473       return isKill_ || isFirstKill();
 474    }
 475
 476    constexpr void setFirstKill(bool flag) noexcept
 477    {
 478       isFirstKill_ = flag;
 479       if (flag)
 480          setKill(flag);
 481    }
 482
 483    /* When there are multiple operands killing the same temporary,
 484     * isFirstKill() is only returns true for the first one. */
 485    constexpr bool isFirstKill() const noexcept
 486    {
 487       return isFirstKill_;
 488    }
 489
 490 private:
 491    union {
 492       uint32_t i;
 493       float f;
 494       Temp temp = Temp(0, s1);
 495    } data_;
 496    PhysReg reg_;
 497    union {
 498       struct {
 499          uint8_t isTemp_:1;
 500          uint8_t isFixed_:1;
 501          uint8_t isConstant_:1;
 502          uint8_t isKill_:1;
 503          uint8_t isUndef_:1;
 504          uint8_t isFirstKill_:1;
 505          uint8_t is64BitConst_:1;
 506       };
 507       /* can't initialize bit-fields in c++11, so work around using a union */
 508       uint8_t control_ = 0;
 509    };
 510 };
 511
 512 /**
 513  * Definition Class
 514  * Definitions are the results of Instructions
 515  * and refer to temporary virtual registers
 516  * which are later mapped to physical registers
 517  */
 518 class Definition final
 519 {
 520 public:
 521    constexpr Definition() : temp(Temp(0, s1)), reg_(0), isFixed_(0), hasHint_(0), isKill_(0) {}
 522    Definition(uint32_t index, RegClass type) noexcept
 523       : temp(index, type) {}
 524    explicit Definition(Temp tmp) noexcept
 525       : temp(tmp) {}
 526    Definition(PhysReg reg, RegClass type) noexcept
 527       : temp(Temp(0, type))
 528    {
 529       setFixed(reg);
 530    }
 531    Definition(uint32_t tmpId, PhysReg reg, RegClass type) noexcept
 532       : temp(Temp(tmpId, type))
 533    {
 534       setFixed(reg);
 535    }
 536
 537    constexpr bool isTemp() const noexcept
 538    {
 539       return tempId() > 0;
 540    }
 541
 542    constexpr Temp getTemp() const noexcept
 543    {
 544       return temp;
 545    }
 546
 547    constexpr uint32_t tempId() const noexcept
 548    {
 549       return temp.id();
 550    }
 551
 552    constexpr void setTemp(Temp t) noexcept {
 553       temp = t;
 554    }
 555
 556    constexpr RegClass regClass() const noexcept
 557    {
 558       return temp.regClass();
 559    }
 560
 561    constexpr unsigned size() const noexcept
 562    {
 563       return temp.size();
 564    }
 565
 566    constexpr bool isFixed() const noexcept
 567    {
 568       return isFixed_;
 569    }
 570
 571    constexpr PhysReg physReg() const noexcept
 572    {
 573       return reg_;
 574    }
 575
 576    constexpr void setFixed(PhysReg reg) noexcept
 577    {
 578       isFixed_ = 1;
 579       reg_ = reg;
 580    }
 581
 582    constexpr void setHint(PhysReg reg) noexcept
 583    {
 584       hasHint_ = 1;
 585       reg_ = reg;
 586    }
 587
 588    constexpr bool hasHint() const noexcept
 589    {
 590       return hasHint_;
 591    }
 592
 593    constexpr void setKill(bool flag) noexcept
 594    {
 595       isKill_ = flag;
 596    }
 597
 598    constexpr bool isKill() const noexcept
 599    {
 600       return isKill_;
 601    }
 602
 603 private:
 604    Temp temp = Temp(0, s1);
 605    PhysReg reg_;
 606    union {
 607       struct {
 608          uint8_t isFixed_:1;
 609          uint8_t hasHint_:1;
 610          uint8_t isKill_:1;
 611       };
 612       /* can't initialize bit-fields in c++11, so work around using a union */
 613       uint8_t control_ = 0;
 614    };
 615 };
 616
 617 class Block;
 618
 619 struct Instruction {
 620    aco_opcode opcode;
 621    Format format;
 622    uint32_t pass_flags;
 623
 624    aco::span<Operand> operands;
 625    aco::span<Definition> definitions;
 626
 627    constexpr bool isVALU() const noexcept
 628    {
 629       return ((uint16_t) format & (uint16_t) Format::VOP1) == (uint16_t) Format::VOP1
 630           || ((uint16_t) format & (uint16_t) Format::VOP2) == (uint16_t) Format::VOP2
 631           || ((uint16_t) format & (uint16_t) Format::VOPC) == (uint16_t) Format::VOPC
 632           || ((uint16_t) format & (uint16_t) Format::VOP3A) == (uint16_t) Format::VOP3A
 633           || ((uint16_t) format & (uint16_t) Format::VOP3B) == (uint16_t) Format::VOP3B
 634           || ((uint16_t) format & (uint16_t) Format::VOP3P) == (uint16_t) Format::VOP3P;
 635    }
 636
 637    constexpr bool isSALU() const noexcept
 638    {
 639       return format == Format::SOP1 ||
 640              format == Format::SOP2 ||
 641              format == Format::SOPC ||
 642              format == Format::SOPK ||
 643              format == Format::SOPP;
 644    }
 645
 646    constexpr bool isVMEM() const noexcept
 647    {
 648       return format == Format::MTBUF ||
 649              format == Format::MUBUF ||
 650              format == Format::MIMG;
 651    }
 652
 653    constexpr bool isDPP() const noexcept
 654    {
 655       return (uint16_t) format & (uint16_t) Format::DPP;
 656    }
 657
 658    constexpr bool isVOP3() const noexcept
 659    {
 660       return ((uint16_t) format & (uint16_t) Format::VOP3A) ||
 661              ((uint16_t) format & (uint16_t) Format::VOP3B) ||
 662              format == Format::VOP3P;
 663    }
 664
 665    constexpr bool isSDWA() const noexcept
 666    {
 667       return (uint16_t) format & (uint16_t) Format::SDWA;
 668    }
 669
 670    constexpr bool isFlatOrGlobal() const noexcept
 671    {
 672       return format == Format::FLAT || format == Format::GLOBAL;
 673    }
 674
 675    constexpr bool usesModifiers() const noexcept;
 676
 677    constexpr bool reads_exec() const noexcept
 678    {
 679       for (const Operand& op : operands) {
 680          if (op.isFixed() && op.physReg() == exec)
 681             return true;
 682       }
 683       return false;
 684    }
 685 };
 686
 687 struct SOPK_instruction : public Instruction {
 688    uint16_t imm;
 689 };
 690
 691 struct SOPP_instruction : public Instruction {
 692    uint32_t imm;
 693    int block;
 694 };
 695
 696 struct SOPC_instruction : public Instruction {
 697 };
 698
 699 struct SOP1_instruction : public Instruction {
 700 };
 701
 702 struct SOP2_instruction : public Instruction {
 703 };
 704
 705 /**
 706  * Scalar Memory Format:
 707  * For s_(buffer_)load_dword*:
 708  * Operand(0): SBASE - SGPR-pair which provides base address
 709  * Operand(1): Offset - immediate (un)signed offset or SGPR
 710  * Operand(2) / Definition(0): SDATA - SGPR for read / write result
 711  * Operand(n-1): SOffset - SGPR offset (Vega only)
 712  *
 713  * Having no operands is also valid for instructions such as s_dcache_inv.
 714  *
 715  */
 716 struct SMEM_instruction : public Instruction {
 717    bool glc : 1; /* VI+: globally coherent */
 718    bool dlc : 1; /* NAVI: device level coherent */
 719    bool nv : 1; /* VEGA only: Non-volatile */
 720    bool can_reorder : 1;
 721    bool disable_wqm : 1;
 722    barrier_interaction barrier;
 723 };
 724
 725 struct VOP1_instruction : public Instruction {
 726 };
 727
 728 struct VOP2_instruction : public Instruction {
 729 };
 730
 731 struct VOPC_instruction : public Instruction {
 732 };
 733
 734 struct VOP3A_instruction : public Instruction {
 735    bool abs[3];
 736    bool neg[3];
 737    uint8_t opsel : 4;
 738    uint8_t omod : 2;
 739    bool clamp : 1;
 740 };
 741
 742 /**
 743  * Data Parallel Primitives Format:
 744  * This format can be used for VOP1, VOP2 or VOPC instructions.
 745  * The swizzle applies to the src0 operand.
 746  *
 747  */
 748 struct DPP_instruction : public Instruction {
 749    bool abs[2];
 750    bool neg[2];
 751    uint16_t dpp_ctrl;
 752    uint8_t row_mask : 4;
 753    uint8_t bank_mask : 4;
 754    bool bound_ctrl : 1;
 755 };
 756
 757 struct Interp_instruction : public Instruction {
 758    uint8_t attribute;
 759    uint8_t component;
 760 };
 761
 762 /**
 763  * Local and Global Data Sharing instructions
 764  * Operand(0): ADDR - VGPR which supplies the address.
 765  * Operand(1): DATA0 - First data VGPR.
 766  * Operand(2): DATA1 - Second data VGPR.
 767  * Operand(n-1): M0 - LDS size.
 768  * Definition(0): VDST - Destination VGPR when results returned to VGPRs.
 769  *
 770  */
 771 struct DS_instruction : public Instruction {
 772    int16_t offset0;
 773    int8_t offset1;
 774    bool gds;
 775 };
 776
 777 /**
 778  * Vector Memory Untyped-buffer Instructions
 779  * Operand(0): VADDR - Address source. Can carry an index and/or offset
 780  * Operand(1): SRSRC - Specifies which SGPR supplies T# (resource constant)
 781  * Operand(2): SOFFSET - SGPR to supply unsigned byte offset. (SGPR, M0, or inline constant)
 782  * Operand(3) / Definition(0): VDATA - Vector GPR for write result / read data
 783  *
 784  */
 785 struct MUBUF_instruction : public Instruction {
 786    uint16_t offset : 12; /* Unsigned byte offset - 12 bit */
 787    bool offen : 1; /* Supply an offset from VGPR (VADDR) */
 788    bool idxen : 1; /* Supply an index from VGPR (VADDR) */
 789    bool glc : 1; /* globally coherent */
 790    bool dlc : 1; /* NAVI: device level coherent */
 791    bool slc : 1; /* system level coherent */
 792    bool tfe : 1; /* texture fail enable */
 793    bool lds : 1; /* Return read-data to LDS instead of VGPRs */
 794    bool disable_wqm : 1; /* Require an exec mask without helper invocations */
 795    bool can_reorder : 1;
 796    barrier_interaction barrier;
 797 };
 798
 799 /**
 800  * Vector Memory Typed-buffer Instructions
 801  * Operand(0): VADDR - Address source. Can carry an index and/or offset
 802  * Operand(1): SRSRC - Specifies which SGPR supplies T# (resource constant)
 803  * Operand(2): SOFFSET - SGPR to supply unsigned byte offset. (SGPR, M0, or inline constant)
 804  * Operand(3) / Definition(0): VDATA - Vector GPR for write result / read data
 805  *
 806  */
 807 struct MTBUF_instruction : public Instruction {
 808    uint16_t offset; /* Unsigned byte offset - 12 bit */
 809    uint8_t dfmt : 4; /* Data Format of data in memory buffer */
 810    uint8_t nfmt : 3; /* Numeric format of data in memory */
 811    bool offen : 1; /* Supply an offset from VGPR (VADDR) */
 812    bool idxen : 1; /* Supply an index from VGPR (VADDR) */
 813    bool glc : 1; /* globally coherent */
 814    bool dlc : 1; /* NAVI: device level coherent */
 815    bool slc : 1; /* system level coherent */
 816    bool tfe : 1; /* texture fail enable */
 817    bool disable_wqm : 1; /* Require an exec mask without helper invocations */
 818    bool can_reorder : 1;
 819    barrier_interaction barrier;
 820 };
 821
 822 /**
 823  * Vector Memory Image Instructions
 824  * Operand(0): VADDR - Address source. Can carry an offset or an index.
 825  * Operand(1): SRSRC - Scalar GPR that specifies the resource constant.
 826  * Operand(2): SSAMP - Scalar GPR that specifies sampler constant.
 827  * Operand(3) / Definition(0): VDATA - Vector GPR for read / write result.
 828  *
 829  */
 830 struct MIMG_instruction : public Instruction {
 831    uint8_t dmask; /* Data VGPR enable mask */
 832    uint8_t dim : 3; /* NAVI: dimensionality */
 833    bool unrm : 1; /* Force address to be un-normalized */
 834    bool dlc : 1; /* NAVI: device level coherent */
 835    bool glc : 1; /* globally coherent */
 836    bool slc : 1; /* system level coherent */
 837    bool tfe : 1; /* texture fail enable */
 838    bool da : 1; /* declare an array */
 839    bool lwe : 1; /* Force data to be un-normalized */
 840    bool r128 : 1; /* NAVI: Texture resource size */
 841    bool a16 : 1; /* VEGA, NAVI: Address components are 16-bits */
 842    bool d16 : 1; /* Convert 32-bit data to 16-bit data */
 843    bool disable_wqm : 1; /* Require an exec mask without helper invocations */
 844    bool can_reorder : 1;
 845    barrier_interaction barrier;
 846 };
 847
 848 /**
 849  * Flat/Scratch/Global Instructions
 850  * Operand(0): ADDR
 851  * Operand(1): SADDR
 852  * Operand(2) / Definition(0): DATA/VDST
 853  *
 854  */
 855 struct FLAT_instruction : public Instruction {
 856    uint16_t offset; /* Vega/Navi only */
 857    bool slc : 1; /* system level coherent */
 858    bool glc : 1; /* globally coherent */
 859    bool dlc : 1; /* NAVI: device level coherent */
 860    bool lds : 1;
 861    bool nv : 1;
 862    bool disable_wqm : 1; /* Require an exec mask without helper invocations */
 863    bool can_reorder : 1;
 864    barrier_interaction barrier;
 865 };
 866
 867 struct Export_instruction : public Instruction {
 868    uint8_t enabled_mask;
 869    uint8_t dest;
 870    bool compressed : 1;
 871    bool done : 1;
 872    bool valid_mask : 1;
 873 };
 874
 875 struct Pseudo_instruction : public Instruction {
 876    bool tmp_in_scc;
 877    PhysReg scratch_sgpr; /* might not be valid if it's not needed */
 878 };
 879
 880 struct Pseudo_branch_instruction : public Instruction {
 881    /* target[0] is the block index of the branch target.
 882     * For conditional branches, target[1] contains the fall-through alternative.
 883     * A value of 0 means the target has not been initialized (BB0 cannot be a branch target).
 884     */
 885    uint32_t target[2];
 886 };
 887
 888 struct Pseudo_barrier_instruction : public Instruction {
 889 };
 890
 891 enum ReduceOp {
 892    iadd32, iadd64,
 893    imul32, imul64,
 894    fadd32, fadd64,
 895    fmul32, fmul64,
 896    imin32, imin64,
 897    imax32, imax64,
 898    umin32, umin64,
 899    umax32, umax64,
 900    fmin32, fmin64,
 901    fmax32, fmax64,
 902    iand32, iand64,
 903    ior32, ior64,
 904    ixor32, ixor64,
 905    gfx10_wave64_bpermute
 906 };
 907
 908 /**
 909  * Subgroup Reduction Instructions, everything except for the data to be
 910  * reduced and the result as inserted by setup_reduce_temp().
 911  * Operand(0): data to be reduced
 912  * Operand(1): reduce temporary
 913  * Operand(2): vector temporary
 914  * Definition(0): result
 915  * Definition(1): scalar temporary
 916  * Definition(2): scalar identity temporary (not used to store identity on GFX10)
 917  * Definition(3): scc clobber
 918  * Definition(4): vcc clobber
 919  *
 920  */
 921 struct Pseudo_reduction_instruction : public Instruction {
 922    ReduceOp reduce_op;
 923    unsigned cluster_size; // must be 0 for scans
 924 };
 925
 926 struct instr_deleter_functor {
 927    void operator()(void* p) {
 928       free(p);
 929    }
 930 };
 931
 932 template<typename T>
 933 using aco_ptr = std::unique_ptr<T, instr_deleter_functor>;
 934
 935 template<typename T>
 936 T* create_instruction(aco_opcode opcode, Format format, uint32_t num_operands, uint32_t num_definitions)
 937 {
 938    std::size_t size = sizeof(T) + num_operands * sizeof(Operand) + num_definitions * sizeof(Definition);
 939    char *data = (char*) calloc(1, size);
 940    T* inst = (T*) data;
 941
 942    inst->opcode = opcode;
 943    inst->format = format;
 944
 945    uint16_t operands_offset = data + sizeof(T) - (char*)&inst->operands;
 946    inst->operands = aco::span<Operand>(operands_offset, num_operands);
 947    uint16_t definitions_offset = (char*)inst->operands.end() - (char*)&inst->definitions;
 948    inst->definitions = aco::span<Definition>(definitions_offset, num_definitions);
 949
 950    return inst;
 951 }
 952
 953 constexpr bool Instruction::usesModifiers() const noexcept
 954 {
 955    if (isDPP() || isSDWA())
 956       return true;
 957    if (!isVOP3())
 958       return false;
 959    const VOP3A_instruction *vop3 = static_cast<const VOP3A_instruction*>(this);
 960    for (unsigned i = 0; i < operands.size(); i++) {
 961       if (vop3->abs[i] || vop3->neg[i])
 962          return true;
 963    }
 964    return vop3->opsel || vop3->clamp || vop3->omod;
 965 }
 966
 967 constexpr bool is_phi(Instruction* instr)
 968 {
 969    return instr->opcode == aco_opcode::p_phi || instr->opcode == aco_opcode::p_linear_phi;
 970 }
 971
 972 static inline bool is_phi(aco_ptr<Instruction>& instr)
 973 {
 974    return is_phi(instr.get());
 975 }
 976
 977 constexpr barrier_interaction get_barrier_interaction(Instruction* instr)
 978 {
 979    switch (instr->format) {
 980    case Format::SMEM:
 981       return static_cast<SMEM_instruction*>(instr)->barrier;
 982    case Format::MUBUF:
 983       return static_cast<MUBUF_instruction*>(instr)->barrier;
 984    case Format::MIMG:
 985       return static_cast<MIMG_instruction*>(instr)->barrier;
 986    case Format::FLAT:
 987    case Format::GLOBAL:
 988    case Format::SCRATCH:
 989       return static_cast<FLAT_instruction*>(instr)->barrier;
 990    case Format::DS:
 991       return barrier_shared;
 992    default:
 993       return barrier_none;
 994    }
 995 }
 996
 997 bool is_dead(const std::vector<uint16_t>& uses, Instruction *instr);
 998
 999 enum block_kind {
1000    /* uniform indicates that leaving this block,
1001     * all actives lanes stay active */
1002    block_kind_uniform = 1 << 0,
1003    block_kind_top_level = 1 << 1,
1004    block_kind_loop_preheader = 1 << 2,
1005    block_kind_loop_header = 1 << 3,
1006    block_kind_loop_exit = 1 << 4,
1007    block_kind_continue = 1 << 5,
1008    block_kind_break = 1 << 6,
1009    block_kind_continue_or_break = 1 << 7,
1010    block_kind_discard = 1 << 8,
1011    block_kind_branch = 1 << 9,
1012    block_kind_merge = 1 << 10,
1013    block_kind_invert = 1 << 11,
1014    block_kind_uses_discard_if = 1 << 12,
1015    block_kind_needs_lowering = 1 << 13,
1016    block_kind_uses_demote = 1 << 14,
1017 };
1018
1019
1020 struct RegisterDemand {
1021    constexpr RegisterDemand() = default;
1022    constexpr RegisterDemand(const int16_t v, const int16_t s) noexcept
1023       : vgpr{v}, sgpr{s} {}
1024    int16_t vgpr = 0;
1025    int16_t sgpr = 0;
1026
1027    constexpr friend bool operator==(const RegisterDemand a, const RegisterDemand b) noexcept {
1028       return a.vgpr == b.vgpr && a.sgpr == b.sgpr;
1029    }
1030
1031    constexpr bool exceeds(const RegisterDemand other) const noexcept {
1032       return vgpr > other.vgpr || sgpr > other.sgpr;
1033    }
1034
1035    constexpr RegisterDemand operator+(const Temp t) const noexcept {
1036       if (t.type() == RegType::sgpr)
1037          return RegisterDemand( vgpr, sgpr + t.size() );
1038       else
1039          return RegisterDemand( vgpr + t.size(), sgpr );
1040    }
1041
1042    constexpr RegisterDemand operator+(const RegisterDemand other) const noexcept {
1043       return RegisterDemand(vgpr + other.vgpr, sgpr + other.sgpr);
1044    }
1045
1046    constexpr RegisterDemand operator-(const RegisterDemand other) const noexcept {
1047       return RegisterDemand(vgpr - other.vgpr, sgpr - other.sgpr);
1048    }
1049
1050    constexpr RegisterDemand& operator+=(const RegisterDemand other) noexcept {
1051       vgpr += other.vgpr;
1052       sgpr += other.sgpr;
1053       return *this;
1054    }
1055
1056    constexpr RegisterDemand& operator-=(const RegisterDemand other) noexcept {
1057       vgpr -= other.vgpr;
1058       sgpr -= other.sgpr;
1059       return *this;
1060    }
1061
1062    constexpr RegisterDemand& operator+=(const Temp t) noexcept {
1063       if (t.type() == RegType::sgpr)
1064          sgpr += t.size();
1065       else
1066          vgpr += t.size();
1067       return *this;
1068    }
1069
1070    constexpr RegisterDemand& operator-=(const Temp t) noexcept {
1071       if (t.type() == RegType::sgpr)
1072          sgpr -= t.size();
1073       else
1074          vgpr -= t.size();
1075       return *this;
1076    }
1077
1078    constexpr void update(const RegisterDemand other) noexcept {
1079       vgpr = std::max(vgpr, other.vgpr);
1080       sgpr = std::max(sgpr, other.sgpr);
1081    }
1082
1083 };
1084
1085 /* CFG */
1086 struct Block {
1087    float_mode fp_mode;
1088    unsigned index;
1089    unsigned offset = 0;
1090    std::vector<aco_ptr<Instruction>> instructions;
1091    std::vector<unsigned> logical_preds;
1092    std::vector<unsigned> linear_preds;
1093    std::vector<unsigned> logical_succs;
1094    std::vector<unsigned> linear_succs;
1095    RegisterDemand register_demand = RegisterDemand();
1096    uint16_t loop_nest_depth = 0;
1097    uint16_t kind = 0;
1098    int logical_idom = -1;
1099    int linear_idom = -1;
1100    Temp live_out_exec = Temp();
1101
1102    /* this information is needed for predecessors to blocks with phis when
1103     * moving out of ssa */
1104    bool scc_live_out = false;
1105    PhysReg scratch_sgpr = PhysReg(); /* only needs to be valid if scc_live_out != false */
1106
1107    Block(unsigned idx) : index(idx) {}
1108    Block() : index(0) {}
1109 };
1110
1111 using Stage = uint16_t;
1112
1113 /* software stages */
1114 static constexpr Stage sw_vs = 1 << 0;
1115 static constexpr Stage sw_gs = 1 << 1;
1116 static constexpr Stage sw_tcs = 1 << 2;
1117 static constexpr Stage sw_tes = 1 << 3;
1118 static constexpr Stage sw_fs = 1 << 4;
1119 static constexpr Stage sw_cs = 1 << 5;
1120 static constexpr Stage sw_mask = 0x3f;
1121
1122 /* hardware stages (can't be OR'd, just a mask for convenience when testing multiple) */
1123 static constexpr Stage hw_vs = 1 << 6;
1124 static constexpr Stage hw_es = 1 << 7; /* not on GFX9. combined into GS on GFX9 (and GFX10/legacy). */
1125 static constexpr Stage hw_gs = 1 << 8;
1126 static constexpr Stage hw_ls = 1 << 9; /* not on GFX9. combined into HS on GFX9 (and GFX10/legacy). */
1127 static constexpr Stage hw_hs = 1 << 10;
1128 static constexpr Stage hw_fs = 1 << 11;
1129 static constexpr Stage hw_cs = 1 << 12;
1130 static constexpr Stage hw_mask = 0x7f << 6;
1131
1132 /* possible settings of Program::stage */
1133 static constexpr Stage vertex_vs = sw_vs | hw_vs;
1134 static constexpr Stage fragment_fs = sw_fs | hw_fs;
1135 static constexpr Stage compute_cs = sw_cs | hw_cs;
1136 static constexpr Stage tess_eval_vs = sw_tes | hw_vs;
1137 /* GFX10/NGG */
1138 static constexpr Stage ngg_vertex_gs = sw_vs | hw_gs;
1139 static constexpr Stage ngg_vertex_geometry_gs = sw_vs | sw_gs | hw_gs;
1140 static constexpr Stage ngg_tess_eval_geometry_gs = sw_tes | sw_gs | hw_gs;
1141 static constexpr Stage ngg_vertex_tess_control_hs = sw_vs | sw_tcs | hw_hs;
1142 /* GFX9 (and GFX10 if NGG isn't used) */
1143 static constexpr Stage vertex_geometry_gs = sw_vs | sw_gs | hw_gs;
1144 static constexpr Stage vertex_tess_control_hs = sw_vs | sw_tcs | hw_hs;
1145 static constexpr Stage tess_eval_geometry_gs = sw_tes | sw_gs | hw_gs;
1146 /* pre-GFX9 */
1147 static constexpr Stage vertex_ls = sw_vs | hw_ls; /* vertex before tesselation control */
1148 static constexpr Stage vertex_es = sw_vs | hw_es; /* vertex before geometry */
1149 static constexpr Stage tess_control_hs = sw_tcs | hw_hs;
1150 static constexpr Stage tess_eval_es = sw_tes | hw_gs; /* tesselation evaluation before geometry */
1151 static constexpr Stage geometry_gs = sw_gs | hw_gs;
1152
1153 class Program final {
1154 public:
1155    float_mode next_fp_mode;
1156    std::vector<Block> blocks;
1157    RegisterDemand max_reg_demand = RegisterDemand();
1158    uint16_t num_waves = 0;
1159    uint16_t max_waves = 0; /* maximum number of waves, regardless of register usage */
1160    ac_shader_config* config;
1161    struct radv_shader_info *info;
1162    enum chip_class chip_class;
1163    enum radeon_family family;
1164    unsigned wave_size;
1165    RegClass lane_mask;
1166    Stage stage; /* Stage */
1167    bool needs_exact = false; /* there exists an instruction with disable_wqm = true */
1168    bool needs_wqm = false; /* there exists a p_wqm instruction */
1169    bool wb_smem_l1_on_end = false;
1170
1171    std::vector<uint8_t> constant_data;
1172    Temp private_segment_buffer;
1173    Temp scratch_offset;
1174
1175    uint16_t min_waves = 0;
1176    uint16_t lds_alloc_granule;
1177    uint32_t lds_limit; /* in bytes */
1178    uint16_t vgpr_limit;
1179    uint16_t sgpr_limit;
1180    uint16_t physical_sgprs;
1181    uint16_t sgpr_alloc_granule; /* minus one. must be power of two */
1182    uint16_t vgpr_alloc_granule; /* minus one. must be power of two */
1183
1184    bool needs_vcc = false;
1185    bool needs_xnack_mask = false;
1186    bool needs_flat_scr = false;
1187
1188    uint32_t allocateId()
1189    {
1190       assert(allocationID <= 16777215);
1191       return allocationID++;
1192    }
1193
1194    uint32_t peekAllocationId()
1195    {
1196       return allocationID;
1197    }
1198
1199    void setAllocationId(uint32_t id)
1200    {
1201       allocationID = id;
1202    }
1203
1204    Block* create_and_insert_block() {
1205       blocks.emplace_back(blocks.size());
1206       blocks.back().fp_mode = next_fp_mode;
1207       return &blocks.back();
1208    }
1209
1210    Block* insert_block(Block&& block) {
1211       block.index = blocks.size();
1212       block.fp_mode = next_fp_mode;
1213       blocks.emplace_back(std::move(block));
1214       return &blocks.back();
1215    }
1216
1217 private:
1218    uint32_t allocationID = 1;
1219 };
1220
1221 struct live {
1222    /* live temps out per block */
1223    std::vector<std::set<Temp>> live_out;
1224    /* register demand (sgpr/vgpr) per instruction per block */
1225    std::vector<std::vector<RegisterDemand>> register_demand;
1226 };
1227
1228 void select_program(Program *program,
1229                     unsigned shader_count,
1230                     struct nir_shader *const *shaders,
1231                     ac_shader_config* config,
1232                     struct radv_shader_args *args);
1233
1234 void lower_wqm(Program* program, live& live_vars,
1235                const struct radv_nir_compiler_options *options);
1236 void lower_bool_phis(Program* program);
1237 void calc_min_waves(Program* program);
1238 void update_vgpr_sgpr_demand(Program* program, const RegisterDemand new_demand);
1239 live live_var_analysis(Program* program, const struct radv_nir_compiler_options *options);
1240 std::vector<uint16_t> dead_code_analysis(Program *program);
1241 void dominator_tree(Program* program);
1242 void insert_exec_mask(Program *program);
1243 void value_numbering(Program* program);
1244 void optimize(Program* program);
1245 void setup_reduce_temp(Program* program);
1246 void lower_to_cssa(Program* program, live& live_vars, const struct radv_nir_compiler_options *options);
1247 void register_allocation(Program *program, std::vector<std::set<Temp>> live_out_per_block);
1248 void ssa_elimination(Program* program);
1249 void lower_to_hw_instr(Program* program);
1250 void schedule_program(Program* program, live& live_vars);
1251 void spill(Program* program, live& live_vars, const struct radv_nir_compiler_options *options);
1252 void insert_wait_states(Program* program);
1253 void insert_NOPs(Program* program);
1254 unsigned emit_program(Program* program, std::vector<uint32_t>& code);
1255 void print_asm(Program *program, std::vector<uint32_t>& binary,
1256                unsigned exec_size, std::ostream& out);
1257 void validate(Program* program, FILE *output);
1258 bool validate_ra(Program* program, const struct radv_nir_compiler_options *options, FILE *output);
1259 #ifndef NDEBUG
1260 void perfwarn(bool cond, const char *msg, Instruction *instr=NULL);
1261 #else
1262 #define perfwarn(program, cond, msg, ...) do {} while(0)
1263 #endif
1264
1265 void aco_print_instr(Instruction *instr, FILE *output);
1266 void aco_print_program(Program *program, FILE *output);
1267
1268 /* number of sgprs that need to be allocated but might notbe addressable as s0-s105 */
1269 uint16_t get_extra_sgprs(Program *program);
1270
1271 /* get number of sgprs/vgprs allocated required to address a number of sgprs/vgprs */
1272 uint16_t get_sgpr_alloc(Program *program, uint16_t addressable_sgprs);
1273 uint16_t get_vgpr_alloc(Program *program, uint16_t addressable_vgprs);
1274
1275 /* return number of addressable sgprs/vgprs for max_waves */
1276 uint16_t get_addr_sgpr_from_waves(Program *program, uint16_t max_waves);
1277 uint16_t get_addr_vgpr_from_waves(Program *program, uint16_t max_waves);
1278
1279 typedef struct {
1280    const int16_t opcode_gfx7[static_cast<int>(aco_opcode::num_opcodes)];
1281    const int16_t opcode_gfx9[static_cast<int>(aco_opcode::num_opcodes)];
1282    const int16_t opcode_gfx10[static_cast<int>(aco_opcode::num_opcodes)];
1283    const std::bitset<static_cast<int>(aco_opcode::num_opcodes)> can_use_input_modifiers;
1284    const std::bitset<static_cast<int>(aco_opcode::num_opcodes)> can_use_output_modifiers;
1285    const std::bitset<static_cast<int>(aco_opcode::num_opcodes)> is_atomic;
1286    const char *name[static_cast<int>(aco_opcode::num_opcodes)];
1287    const aco::Format format[static_cast<int>(aco_opcode::num_opcodes)];
1288 } Info;
1289
1290 extern const Info instr_info;
1291
1292 }
1293
1294 #endif /* ACO_IR_H */
1295