src/amd/compiler/aco_ir.h

   1 /*
   2  * Copyright © 2018 Valve Corporation
   3  *
   4  * Permission is hereby granted, free of charge, to any person obtaining a
   5  * copy of this software and associated documentation files (the "Software"),
   6  * to deal in the Software without restriction, including without limitation
   7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
   8  * and/or sell copies of the Software, and to permit persons to whom the
   9  * Software is furnished to do so, subject to the following conditions:
  10  *
  11  * The above copyright notice and this permission notice (including the next
  12  * paragraph) shall be included in all copies or substantial portions of the
  13  * Software.
  14  *
  15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  17  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
  18  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
  19  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
  20  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
  21  * IN THE SOFTWARE.
  22  *
  23  */
  24
  25 #ifndef ACO_IR_H
  26 #define ACO_IR_H
  27
  28 #include <vector>
  29 #include <set>
  30 #include <bitset>
  31 #include <memory>
  32
  33 #include "nir.h"
  34 #include "ac_binary.h"
  35 #include "amd_family.h"
  36 #include "aco_opcodes.h"
  37 #include "aco_util.h"
  38
  39 struct radv_nir_compiler_options;
  40 struct radv_shader_args;
  41 struct radv_shader_info;
  42
  43 namespace aco {
  44
  45 extern uint64_t debug_flags;
  46
  47 enum {
  48    DEBUG_VALIDATE = 0x1,
  49    DEBUG_VALIDATE_RA = 0x2,
  50    DEBUG_PERFWARN = 0x4,
  51 };
  52
  53 /**
  54  * Representation of the instruction's microcode encoding format
  55  * Note: Some Vector ALU Formats can be combined, such that:
  56  * - VOP2* | VOP3A represents a VOP2 instruction in VOP3A encoding
  57  * - VOP2* | DPP represents a VOP2 instruction with data parallel primitive.
  58  * - VOP2* | SDWA represents a VOP2 instruction with sub-dword addressing.
  59  *
  60  * (*) The same is applicable for VOP1 and VOPC instructions.
  61  */
  62 enum class Format : std::uint16_t {
  63    /* Pseudo Instruction Format */
  64    PSEUDO = 0,
  65    /* Scalar ALU & Control Formats */
  66    SOP1 = 1,
  67    SOP2 = 2,
  68    SOPK = 3,
  69    SOPP = 4,
  70    SOPC = 5,
  71    /* Scalar Memory Format */
  72    SMEM = 6,
  73    /* LDS/GDS Format */
  74    DS = 8,
  75    /* Vector Memory Buffer Formats */
  76    MTBUF = 9,
  77    MUBUF = 10,
  78    /* Vector Memory Image Format */
  79    MIMG = 11,
  80    /* Export Format */
  81    EXP = 12,
  82    /* Flat Formats */
  83    FLAT = 13,
  84    GLOBAL = 14,
  85    SCRATCH = 15,
  86
  87    PSEUDO_BRANCH = 16,
  88    PSEUDO_BARRIER = 17,
  89    PSEUDO_REDUCTION = 18,
  90
  91    /* Vector ALU Formats */
  92    VOP1 = 1 << 8,
  93    VOP2 = 1 << 9,
  94    VOPC = 1 << 10,
  95    VOP3 = 1 << 11,
  96    VOP3A = 1 << 11,
  97    VOP3B = 1 << 11,
  98    VOP3P = 1 << 12,
  99    /* Vector Parameter Interpolation Format */
 100    VINTRP = 1 << 13,
 101    DPP = 1 << 14,
 102    SDWA = 1 << 15,
 103 };
 104
 105 enum barrier_interaction : uint8_t {
 106    barrier_none = 0,
 107    barrier_buffer = 0x1,
 108    barrier_image = 0x2,
 109    barrier_atomic = 0x4,
 110    barrier_shared = 0x8,
 111    barrier_count = 4,
 112 };
 113
 114 enum fp_round {
 115    fp_round_ne = 0,
 116    fp_round_pi = 1,
 117    fp_round_ni = 2,
 118    fp_round_tz = 3,
 119 };
 120
 121 enum fp_denorm {
 122    /* Note that v_rcp_f32, v_exp_f32, v_log_f32, v_sqrt_f32, v_rsq_f32 and
 123     * v_mad_f32/v_madak_f32/v_madmk_f32/v_mac_f32 always flush denormals. */
 124    fp_denorm_flush = 0x0,
 125    fp_denorm_keep = 0x3,
 126 };
 127
 128 struct float_mode {
 129    /* matches encoding of the MODE register */
 130    union {
 131       struct {
 132           fp_round round32:2;
 133           fp_round round16_64:2;
 134           unsigned denorm32:2;
 135           unsigned denorm16_64:2;
 136       };
 137       uint8_t val = 0;
 138    };
 139    /* if false, optimizations which may remove infs/nan/-0.0 can be done */
 140    bool preserve_signed_zero_inf_nan32:1;
 141    bool preserve_signed_zero_inf_nan16_64:1;
 142    /* if false, optimizations which may remove denormal flushing can be done */
 143    bool must_flush_denorms32:1;
 144    bool must_flush_denorms16_64:1;
 145    bool care_about_round32:1;
 146    bool care_about_round16_64:1;
 147
 148    /* Returns true if instructions using the mode "other" can safely use the
 149     * current one instead. */
 150    bool canReplace(float_mode other) const noexcept {
 151       return val == other.val &&
 152              (preserve_signed_zero_inf_nan32 || !other.preserve_signed_zero_inf_nan32) &&
 153              (preserve_signed_zero_inf_nan16_64 || !other.preserve_signed_zero_inf_nan16_64) &&
 154              (must_flush_denorms32  || !other.must_flush_denorms32) &&
 155              (must_flush_denorms16_64 || !other.must_flush_denorms16_64) &&
 156              (care_about_round32 || !other.care_about_round32) &&
 157              (care_about_round16_64 || !other.care_about_round16_64);
 158    }
 159 };
 160
 161 constexpr Format asVOP3(Format format) {
 162    return (Format) ((uint32_t) Format::VOP3 | (uint32_t) format);
 163 };
 164
 165 enum class RegType {
 166    none = 0,
 167    sgpr,
 168    vgpr,
 169    linear_vgpr,
 170 };
 171
 172 struct RegClass {
 173
 174    enum RC : uint8_t {
 175       s1 = 1,
 176       s2 = 2,
 177       s3 = 3,
 178       s4 = 4,
 179       s6 = 6,
 180       s8 = 8,
 181       s16 = 16,
 182       v1 = s1 | (1 << 5),
 183       v2 = s2 | (1 << 5),
 184       v3 = s3 | (1 << 5),
 185       v4 = s4 | (1 << 5),
 186       v5 = 5  | (1 << 5),
 187       v6 = 6  | (1 << 5),
 188       v7 = 7  | (1 << 5),
 189       v8 = 8  | (1 << 5),
 190       /* these are used for WWM and spills to vgpr */
 191       v1_linear = v1 | (1 << 6),
 192       v2_linear = v2 | (1 << 6),
 193    };
 194
 195    RegClass() = default;
 196    constexpr RegClass(RC rc)
 197       : rc(rc) {}
 198    constexpr RegClass(RegType type, unsigned size)
 199       : rc((RC) ((type == RegType::vgpr ? 1 << 5 : 0) | size)) {}
 200
 201    constexpr operator RC() const { return rc; }
 202    explicit operator bool() = delete;
 203
 204    constexpr RegType type() const { return rc <= RC::s16 ? RegType::sgpr : RegType::vgpr; }
 205    constexpr unsigned size() const { return (unsigned) rc & 0x1F; }
 206    constexpr bool is_linear() const { return rc <= RC::s16 || rc & (1 << 6); }
 207    constexpr RegClass as_linear() const { return RegClass((RC) (rc | (1 << 6))); }
 208
 209 private:
 210    RC rc;
 211 };
 212
 213 /* transitional helper expressions */
 214 static constexpr RegClass s1{RegClass::s1};
 215 static constexpr RegClass s2{RegClass::s2};
 216 static constexpr RegClass s3{RegClass::s3};
 217 static constexpr RegClass s4{RegClass::s4};
 218 static constexpr RegClass s8{RegClass::s8};
 219 static constexpr RegClass s16{RegClass::s16};
 220 static constexpr RegClass v1{RegClass::v1};
 221 static constexpr RegClass v2{RegClass::v2};
 222 static constexpr RegClass v3{RegClass::v3};
 223 static constexpr RegClass v4{RegClass::v4};
 224 static constexpr RegClass v5{RegClass::v5};
 225 static constexpr RegClass v6{RegClass::v6};
 226 static constexpr RegClass v7{RegClass::v7};
 227 static constexpr RegClass v8{RegClass::v8};
 228
 229 /**
 230  * Temp Class
 231  * Each temporary virtual register has a
 232  * register class (i.e. size and type)
 233  * and SSA id.
 234  */
 235 struct Temp {
 236    Temp() = default;
 237    constexpr Temp(uint32_t id, RegClass cls) noexcept
 238       : id_(id), reg_class(cls) {}
 239
 240    constexpr uint32_t id() const noexcept { return id_; }
 241    constexpr RegClass regClass() const noexcept { return reg_class; }
 242
 243    constexpr unsigned size() const noexcept { return reg_class.size(); }
 244    constexpr RegType type() const noexcept { return reg_class.type(); }
 245    constexpr bool is_linear() const noexcept { return reg_class.is_linear(); }
 246
 247    constexpr bool operator <(Temp other) const noexcept { return id() < other.id(); }
 248    constexpr bool operator==(Temp other) const noexcept { return id() == other.id(); }
 249    constexpr bool operator!=(Temp other) const noexcept { return id() != other.id(); }
 250
 251 private:
 252    uint32_t id_:24;
 253    RegClass reg_class;
 254 };
 255
 256 /**
 257  * PhysReg
 258  * Represents the physical register for each
 259  * Operand and Definition.
 260  */
 261 struct PhysReg {
 262    constexpr PhysReg() = default;
 263    explicit constexpr PhysReg(unsigned r) : reg(r) {}
 264    constexpr operator unsigned() const { return reg; }
 265
 266    uint16_t reg = 0;
 267 };
 268
 269 /* helper expressions for special registers */
 270 static constexpr PhysReg m0{124};
 271 static constexpr PhysReg vcc{106};
 272 static constexpr PhysReg sgpr_null{125}; /* GFX10+ */
 273 static constexpr PhysReg exec{126};
 274 static constexpr PhysReg exec_lo{126};
 275 static constexpr PhysReg exec_hi{127};
 276 static constexpr PhysReg scc{253};
 277
 278 /**
 279  * Operand Class
 280  * Initially, each Operand refers to either
 281  * a temporary virtual register
 282  * or to a constant value
 283  * Temporary registers get mapped to physical register during RA
 284  * Constant values are inlined into the instruction sequence.
 285  */
 286 class Operand final
 287 {
 288 public:
 289    constexpr Operand()
 290       : reg_(PhysReg{128}), isTemp_(false), isFixed_(true), isConstant_(false),
 291         isKill_(false), isUndef_(true), isFirstKill_(false), is64BitConst_(false) {}
 292
 293    explicit Operand(Temp r) noexcept
 294    {
 295       data_.temp = r;
 296       if (r.id()) {
 297          isTemp_ = true;
 298       } else {
 299          isUndef_ = true;
 300          setFixed(PhysReg{128});
 301       }
 302    };
 303    explicit Operand(uint32_t v, bool is64bit = false) noexcept
 304    {
 305       data_.i = v;
 306       isConstant_ = true;
 307       is64BitConst_ = is64bit;
 308       if (v <= 64)
 309          setFixed(PhysReg{128 + v});
 310       else if (v >= 0xFFFFFFF0) /* [-16 .. -1] */
 311          setFixed(PhysReg{192 - v});
 312       else if (v == 0x3f000000) /* 0.5 */
 313          setFixed(PhysReg{240});
 314       else if (v == 0xbf000000) /* -0.5 */
 315          setFixed(PhysReg{241});
 316       else if (v == 0x3f800000) /* 1.0 */
 317          setFixed(PhysReg{242});
 318       else if (v == 0xbf800000) /* -1.0 */
 319          setFixed(PhysReg{243});
 320       else if (v == 0x40000000) /* 2.0 */
 321          setFixed(PhysReg{244});
 322       else if (v == 0xc0000000) /* -2.0 */
 323          setFixed(PhysReg{245});
 324       else if (v == 0x40800000) /* 4.0 */
 325          setFixed(PhysReg{246});
 326       else if (v == 0xc0800000) /* -4.0 */
 327          setFixed(PhysReg{247});
 328       else { /* Literal Constant */
 329          assert(!is64bit && "attempt to create a 64-bit literal constant");
 330          setFixed(PhysReg{255});
 331       }
 332    };
 333    explicit Operand(uint64_t v) noexcept
 334    {
 335       isConstant_ = true;
 336       is64BitConst_ = true;
 337       if (v <= 64) {
 338          data_.i = (uint32_t) v;
 339          setFixed(PhysReg{128 + (uint32_t) v});
 340       } else if (v >= 0xFFFFFFFFFFFFFFF0) { /* [-16 .. -1] */
 341          data_.i = (uint32_t) v;
 342          setFixed(PhysReg{192 - (uint32_t) v});
 343       } else if (v == 0x3FE0000000000000) { /* 0.5 */
 344          data_.i = 0x3f000000;
 345          setFixed(PhysReg{240});
 346       } else if (v == 0xBFE0000000000000) { /* -0.5 */
 347          data_.i = 0xbf000000;
 348          setFixed(PhysReg{241});
 349       } else if (v == 0x3FF0000000000000) { /* 1.0 */
 350          data_.i = 0x3f800000;
 351          setFixed(PhysReg{242});
 352       } else if (v == 0xBFF0000000000000) { /* -1.0 */
 353          data_.i = 0xbf800000;
 354          setFixed(PhysReg{243});
 355       } else if (v == 0x4000000000000000) { /* 2.0 */
 356          data_.i = 0x40000000;
 357          setFixed(PhysReg{244});
 358       } else if (v == 0xC000000000000000) { /* -2.0 */
 359          data_.i = 0xc0000000;
 360          setFixed(PhysReg{245});
 361       } else if (v == 0x4010000000000000) { /* 4.0 */
 362          data_.i = 0x40800000;
 363          setFixed(PhysReg{246});
 364       } else if (v == 0xC010000000000000) { /* -4.0 */
 365          data_.i = 0xc0800000;
 366          setFixed(PhysReg{247});
 367       } else { /* Literal Constant: we don't know if it is a long or double.*/
 368          isConstant_ = 0;
 369          assert(false && "attempt to create a 64-bit literal constant");
 370       }
 371    };
 372    explicit Operand(RegClass type) noexcept
 373    {
 374       isUndef_ = true;
 375       data_.temp = Temp(0, type);
 376       setFixed(PhysReg{128});
 377    };
 378    explicit Operand(PhysReg reg, RegClass type) noexcept
 379    {
 380       data_.temp = Temp(0, type);
 381       setFixed(reg);
 382    }
 383
 384    constexpr bool isTemp() const noexcept
 385    {
 386       return isTemp_;
 387    }
 388
 389    constexpr void setTemp(Temp t) noexcept {
 390       assert(!isConstant_);
 391       isTemp_ = true;
 392       data_.temp = t;
 393    }
 394
 395    constexpr Temp getTemp() const noexcept
 396    {
 397       return data_.temp;
 398    }
 399
 400    constexpr uint32_t tempId() const noexcept
 401    {
 402       return data_.temp.id();
 403    }
 404
 405    constexpr bool hasRegClass() const noexcept
 406    {
 407       return isTemp() || isUndefined();
 408    }
 409
 410    constexpr RegClass regClass() const noexcept
 411    {
 412       return data_.temp.regClass();
 413    }
 414
 415    constexpr unsigned size() const noexcept
 416    {
 417       if (isConstant())
 418          return is64BitConst_ ? 2 : 1;
 419       else
 420          return data_.temp.size();
 421    }
 422
 423    constexpr bool isFixed() const noexcept
 424    {
 425       return isFixed_;
 426    }
 427
 428    constexpr PhysReg physReg() const noexcept
 429    {
 430       return reg_;
 431    }
 432
 433    constexpr void setFixed(PhysReg reg) noexcept
 434    {
 435       isFixed_ = reg != unsigned(-1);
 436       reg_ = reg;
 437    }
 438
 439    constexpr bool isConstant() const noexcept
 440    {
 441       return isConstant_;
 442    }
 443
 444    constexpr bool isLiteral() const noexcept
 445    {
 446       return isConstant() && reg_ == 255;
 447    }
 448
 449    constexpr bool isUndefined() const noexcept
 450    {
 451       return isUndef_;
 452    }
 453
 454    constexpr uint32_t constantValue() const noexcept
 455    {
 456       return data_.i;
 457    }
 458
 459    constexpr bool constantEquals(uint32_t cmp) const noexcept
 460    {
 461       return isConstant() && constantValue() == cmp;
 462    }
 463
 464    constexpr void setKill(bool flag) noexcept
 465    {
 466       isKill_ = flag;
 467       if (!flag)
 468          setFirstKill(false);
 469    }
 470
 471    constexpr bool isKill() const noexcept
 472    {
 473       return isKill_ || isFirstKill();
 474    }
 475
 476    constexpr void setFirstKill(bool flag) noexcept
 477    {
 478       isFirstKill_ = flag;
 479       if (flag)
 480          setKill(flag);
 481    }
 482
 483    /* When there are multiple operands killing the same temporary,
 484     * isFirstKill() is only returns true for the first one. */
 485    constexpr bool isFirstKill() const noexcept
 486    {
 487       return isFirstKill_;
 488    }
 489
 490 private:
 491    union {
 492       uint32_t i;
 493       float f;
 494       Temp temp = Temp(0, s1);
 495    } data_;
 496    PhysReg reg_;
 497    union {
 498       struct {
 499          uint8_t isTemp_:1;
 500          uint8_t isFixed_:1;
 501          uint8_t isConstant_:1;
 502          uint8_t isKill_:1;
 503          uint8_t isUndef_:1;
 504          uint8_t isFirstKill_:1;
 505          uint8_t is64BitConst_:1;
 506       };
 507       /* can't initialize bit-fields in c++11, so work around using a union */
 508       uint8_t control_ = 0;
 509    };
 510 };
 511
 512 /**
 513  * Definition Class
 514  * Definitions are the results of Instructions
 515  * and refer to temporary virtual registers
 516  * which are later mapped to physical registers
 517  */
 518 class Definition final
 519 {
 520 public:
 521    constexpr Definition() : temp(Temp(0, s1)), reg_(0), isFixed_(0), hasHint_(0), isKill_(0) {}
 522    Definition(uint32_t index, RegClass type) noexcept
 523       : temp(index, type) {}
 524    explicit Definition(Temp tmp) noexcept
 525       : temp(tmp) {}
 526    Definition(PhysReg reg, RegClass type) noexcept
 527       : temp(Temp(0, type))
 528    {
 529       setFixed(reg);
 530    }
 531    Definition(uint32_t tmpId, PhysReg reg, RegClass type) noexcept
 532       : temp(Temp(tmpId, type))
 533    {
 534       setFixed(reg);
 535    }
 536
 537    constexpr bool isTemp() const noexcept
 538    {
 539       return tempId() > 0;
 540    }
 541
 542    constexpr Temp getTemp() const noexcept
 543    {
 544       return temp;
 545    }
 546
 547    constexpr uint32_t tempId() const noexcept
 548    {
 549       return temp.id();
 550    }
 551
 552    constexpr void setTemp(Temp t) noexcept {
 553       temp = t;
 554    }
 555
 556    constexpr RegClass regClass() const noexcept
 557    {
 558       return temp.regClass();
 559    }
 560
 561    constexpr unsigned size() const noexcept
 562    {
 563       return temp.size();
 564    }
 565
 566    constexpr bool isFixed() const noexcept
 567    {
 568       return isFixed_;
 569    }
 570
 571    constexpr PhysReg physReg() const noexcept
 572    {
 573       return reg_;
 574    }
 575
 576    constexpr void setFixed(PhysReg reg) noexcept
 577    {
 578       isFixed_ = 1;
 579       reg_ = reg;
 580    }
 581
 582    constexpr void setHint(PhysReg reg) noexcept
 583    {
 584       hasHint_ = 1;
 585       reg_ = reg;
 586    }
 587
 588    constexpr bool hasHint() const noexcept
 589    {
 590       return hasHint_;
 591    }
 592
 593    constexpr void setKill(bool flag) noexcept
 594    {
 595       isKill_ = flag;
 596    }
 597
 598    constexpr bool isKill() const noexcept
 599    {
 600       return isKill_;
 601    }
 602
 603 private:
 604    Temp temp = Temp(0, s1);
 605    PhysReg reg_;
 606    union {
 607       struct {
 608          uint8_t isFixed_:1;
 609          uint8_t hasHint_:1;
 610          uint8_t isKill_:1;
 611       };
 612       /* can't initialize bit-fields in c++11, so work around using a union */
 613       uint8_t control_ = 0;
 614    };
 615 };
 616
 617 class Block;
 618
 619 struct Instruction {
 620    aco_opcode opcode;
 621    Format format;
 622    uint32_t pass_flags;
 623
 624    aco::span<Operand> operands;
 625    aco::span<Definition> definitions;
 626
 627    constexpr bool isVALU() const noexcept
 628    {
 629       return ((uint16_t) format & (uint16_t) Format::VOP1) == (uint16_t) Format::VOP1
 630           || ((uint16_t) format & (uint16_t) Format::VOP2) == (uint16_t) Format::VOP2
 631           || ((uint16_t) format & (uint16_t) Format::VOPC) == (uint16_t) Format::VOPC
 632           || ((uint16_t) format & (uint16_t) Format::VOP3A) == (uint16_t) Format::VOP3A
 633           || ((uint16_t) format & (uint16_t) Format::VOP3B) == (uint16_t) Format::VOP3B
 634           || ((uint16_t) format & (uint16_t) Format::VOP3P) == (uint16_t) Format::VOP3P;
 635    }
 636
 637    constexpr bool isSALU() const noexcept
 638    {
 639       return format == Format::SOP1 ||
 640              format == Format::SOP2 ||
 641              format == Format::SOPC ||
 642              format == Format::SOPK ||
 643              format == Format::SOPP;
 644    }
 645
 646    constexpr bool isVMEM() const noexcept
 647    {
 648       return format == Format::MTBUF ||
 649              format == Format::MUBUF ||
 650              format == Format::MIMG;
 651    }
 652
 653    constexpr bool isDPP() const noexcept
 654    {
 655       return (uint16_t) format & (uint16_t) Format::DPP;
 656    }
 657
 658    constexpr bool isVOP3() const noexcept
 659    {
 660       return ((uint16_t) format & (uint16_t) Format::VOP3A) ||
 661              ((uint16_t) format & (uint16_t) Format::VOP3B) ||
 662              format == Format::VOP3P;
 663    }
 664
 665    constexpr bool isSDWA() const noexcept
 666    {
 667       return (uint16_t) format & (uint16_t) Format::SDWA;
 668    }
 669
 670    constexpr bool isFlatOrGlobal() const noexcept
 671    {
 672       return format == Format::FLAT || format == Format::GLOBAL;
 673    }
 674
 675    constexpr bool usesModifiers() const noexcept;
 676
 677    constexpr bool reads_exec() const noexcept
 678    {
 679       for (const Operand& op : operands) {
 680          if (op.isFixed() && op.physReg() == exec)
 681             return true;
 682       }
 683       return false;
 684    }
 685 };
 686
 687 struct SOPK_instruction : public Instruction {
 688    uint16_t imm;
 689 };
 690
 691 struct SOPP_instruction : public Instruction {
 692    uint32_t imm;
 693    int block;
 694 };
 695
 696 struct SOPC_instruction : public Instruction {
 697 };
 698
 699 struct SOP1_instruction : public Instruction {
 700 };
 701
 702 struct SOP2_instruction : public Instruction {
 703 };
 704
 705 /**
 706  * Scalar Memory Format:
 707  * For s_(buffer_)load_dword*:
 708  * Operand(0): SBASE - SGPR-pair which provides base address
 709  * Operand(1): Offset - immediate (un)signed offset or SGPR
 710  * Operand(2) / Definition(0): SDATA - SGPR for read / write result
 711  * Operand(n-1): SOffset - SGPR offset (Vega only)
 712  *
 713  * Having no operands is also valid for instructions such as s_dcache_inv.
 714  *
 715  */
 716 struct SMEM_instruction : public Instruction {
 717    bool glc : 1; /* VI+: globally coherent */
 718    bool dlc : 1; /* NAVI: device level coherent */
 719    bool nv : 1; /* VEGA only: Non-volatile */
 720    bool can_reorder : 1;
 721    bool disable_wqm : 1;
 722    barrier_interaction barrier;
 723 };
 724
 725 struct VOP1_instruction : public Instruction {
 726 };
 727
 728 struct VOP2_instruction : public Instruction {
 729 };
 730
 731 struct VOPC_instruction : public Instruction {
 732 };
 733
 734 struct VOP3A_instruction : public Instruction {
 735    bool abs[3];
 736    bool neg[3];
 737    uint8_t opsel : 4;
 738    uint8_t omod : 2;
 739    bool clamp : 1;
 740 };
 741
 742 /**
 743  * Data Parallel Primitives Format:
 744  * This format can be used for VOP1, VOP2 or VOPC instructions.
 745  * The swizzle applies to the src0 operand.
 746  *
 747  */
 748 struct DPP_instruction : public Instruction {
 749    bool abs[2];
 750    bool neg[2];
 751    uint16_t dpp_ctrl;
 752    uint8_t row_mask : 4;
 753    uint8_t bank_mask : 4;
 754    bool bound_ctrl : 1;
 755 };
 756
 757 struct Interp_instruction : public Instruction {
 758    uint8_t attribute;
 759    uint8_t component;
 760 };
 761
 762 /**
 763  * Local and Global Data Sharing instructions
 764  * Operand(0): ADDR - VGPR which supplies the address.
 765  * Operand(1): DATA0 - First data VGPR.
 766  * Operand(2): DATA1 - Second data VGPR.
 767  * Operand(n-1): M0 - LDS size.
 768  * Definition(0): VDST - Destination VGPR when results returned to VGPRs.
 769  *
 770  */
 771 struct DS_instruction : public Instruction {
 772    int16_t offset0;
 773    int8_t offset1;
 774    bool gds;
 775 };
 776
 777 /**
 778  * Vector Memory Untyped-buffer Instructions
 779  * Operand(0): VADDR - Address source. Can carry an index and/or offset
 780  * Operand(1): SRSRC - Specifies which SGPR supplies T# (resource constant)
 781  * Operand(2): SOFFSET - SGPR to supply unsigned byte offset. (SGPR, M0, or inline constant)
 782  * Operand(3) / Definition(0): VDATA - Vector GPR for write result / read data
 783  *
 784  */
 785 struct MUBUF_instruction : public Instruction {
 786    uint16_t offset : 12; /* Unsigned byte offset - 12 bit */
 787    bool offen : 1; /* Supply an offset from VGPR (VADDR) */
 788    bool idxen : 1; /* Supply an index from VGPR (VADDR) */
 789    bool addr64 : 1; /* SI, CIK: Address size is 64-bit */
 790    bool glc : 1; /* globally coherent */
 791    bool dlc : 1; /* NAVI: device level coherent */
 792    bool slc : 1; /* system level coherent */
 793    bool tfe : 1; /* texture fail enable */
 794    bool lds : 1; /* Return read-data to LDS instead of VGPRs */
 795    bool disable_wqm : 1; /* Require an exec mask without helper invocations */
 796    bool can_reorder : 1;
 797    barrier_interaction barrier;
 798 };
 799
 800 /**
 801  * Vector Memory Typed-buffer Instructions
 802  * Operand(0): VADDR - Address source. Can carry an index and/or offset
 803  * Operand(1): SRSRC - Specifies which SGPR supplies T# (resource constant)
 804  * Operand(2): SOFFSET - SGPR to supply unsigned byte offset. (SGPR, M0, or inline constant)
 805  * Operand(3) / Definition(0): VDATA - Vector GPR for write result / read data
 806  *
 807  */
 808 struct MTBUF_instruction : public Instruction {
 809    uint16_t offset; /* Unsigned byte offset - 12 bit */
 810    uint8_t dfmt : 4; /* Data Format of data in memory buffer */
 811    uint8_t nfmt : 3; /* Numeric format of data in memory */
 812    bool offen : 1; /* Supply an offset from VGPR (VADDR) */
 813    bool idxen : 1; /* Supply an index from VGPR (VADDR) */
 814    bool glc : 1; /* globally coherent */
 815    bool dlc : 1; /* NAVI: device level coherent */
 816    bool slc : 1; /* system level coherent */
 817    bool tfe : 1; /* texture fail enable */
 818    bool disable_wqm : 1; /* Require an exec mask without helper invocations */
 819    bool can_reorder : 1;
 820    barrier_interaction barrier;
 821 };
 822
 823 /**
 824  * Vector Memory Image Instructions
 825  * Operand(0): VADDR - Address source. Can carry an offset or an index.
 826  * Operand(1): SRSRC - Scalar GPR that specifies the resource constant.
 827  * Operand(2): SSAMP - Scalar GPR that specifies sampler constant.
 828  * Operand(3) / Definition(0): VDATA - Vector GPR for read / write result.
 829  *
 830  */
 831 struct MIMG_instruction : public Instruction {
 832    uint8_t dmask; /* Data VGPR enable mask */
 833    uint8_t dim : 3; /* NAVI: dimensionality */
 834    bool unrm : 1; /* Force address to be un-normalized */
 835    bool dlc : 1; /* NAVI: device level coherent */
 836    bool glc : 1; /* globally coherent */
 837    bool slc : 1; /* system level coherent */
 838    bool tfe : 1; /* texture fail enable */
 839    bool da : 1; /* declare an array */
 840    bool lwe : 1; /* Force data to be un-normalized */
 841    bool r128 : 1; /* NAVI: Texture resource size */
 842    bool a16 : 1; /* VEGA, NAVI: Address components are 16-bits */
 843    bool d16 : 1; /* Convert 32-bit data to 16-bit data */
 844    bool disable_wqm : 1; /* Require an exec mask without helper invocations */
 845    bool can_reorder : 1;
 846    barrier_interaction barrier;
 847 };
 848
 849 /**
 850  * Flat/Scratch/Global Instructions
 851  * Operand(0): ADDR
 852  * Operand(1): SADDR
 853  * Operand(2) / Definition(0): DATA/VDST
 854  *
 855  */
 856 struct FLAT_instruction : public Instruction {
 857    uint16_t offset; /* Vega/Navi only */
 858    bool slc : 1; /* system level coherent */
 859    bool glc : 1; /* globally coherent */
 860    bool dlc : 1; /* NAVI: device level coherent */
 861    bool lds : 1;
 862    bool nv : 1;
 863    bool disable_wqm : 1; /* Require an exec mask without helper invocations */
 864    bool can_reorder : 1;
 865    barrier_interaction barrier;
 866 };
 867
 868 struct Export_instruction : public Instruction {
 869    uint8_t enabled_mask;
 870    uint8_t dest;
 871    bool compressed : 1;
 872    bool done : 1;
 873    bool valid_mask : 1;
 874 };
 875
 876 struct Pseudo_instruction : public Instruction {
 877    bool tmp_in_scc;
 878    PhysReg scratch_sgpr; /* might not be valid if it's not needed */
 879 };
 880
 881 struct Pseudo_branch_instruction : public Instruction {
 882    /* target[0] is the block index of the branch target.
 883     * For conditional branches, target[1] contains the fall-through alternative.
 884     * A value of 0 means the target has not been initialized (BB0 cannot be a branch target).
 885     */
 886    uint32_t target[2];
 887 };
 888
 889 struct Pseudo_barrier_instruction : public Instruction {
 890 };
 891
 892 enum ReduceOp {
 893    iadd32, iadd64,
 894    imul32, imul64,
 895    fadd32, fadd64,
 896    fmul32, fmul64,
 897    imin32, imin64,
 898    imax32, imax64,
 899    umin32, umin64,
 900    umax32, umax64,
 901    fmin32, fmin64,
 902    fmax32, fmax64,
 903    iand32, iand64,
 904    ior32, ior64,
 905    ixor32, ixor64,
 906    gfx10_wave64_bpermute
 907 };
 908
 909 /**
 910  * Subgroup Reduction Instructions, everything except for the data to be
 911  * reduced and the result as inserted by setup_reduce_temp().
 912  * Operand(0): data to be reduced
 913  * Operand(1): reduce temporary
 914  * Operand(2): vector temporary
 915  * Definition(0): result
 916  * Definition(1): scalar temporary
 917  * Definition(2): scalar identity temporary (not used to store identity on GFX10)
 918  * Definition(3): scc clobber
 919  * Definition(4): vcc clobber
 920  *
 921  */
 922 struct Pseudo_reduction_instruction : public Instruction {
 923    ReduceOp reduce_op;
 924    unsigned cluster_size; // must be 0 for scans
 925 };
 926
 927 struct instr_deleter_functor {
 928    void operator()(void* p) {
 929       free(p);
 930    }
 931 };
 932
 933 template<typename T>
 934 using aco_ptr = std::unique_ptr<T, instr_deleter_functor>;
 935
 936 template<typename T>
 937 T* create_instruction(aco_opcode opcode, Format format, uint32_t num_operands, uint32_t num_definitions)
 938 {
 939    std::size_t size = sizeof(T) + num_operands * sizeof(Operand) + num_definitions * sizeof(Definition);
 940    char *data = (char*) calloc(1, size);
 941    T* inst = (T*) data;
 942
 943    inst->opcode = opcode;
 944    inst->format = format;
 945
 946    uint16_t operands_offset = data + sizeof(T) - (char*)&inst->operands;
 947    inst->operands = aco::span<Operand>(operands_offset, num_operands);
 948    uint16_t definitions_offset = (char*)inst->operands.end() - (char*)&inst->definitions;
 949    inst->definitions = aco::span<Definition>(definitions_offset, num_definitions);
 950
 951    return inst;
 952 }
 953
 954 constexpr bool Instruction::usesModifiers() const noexcept
 955 {
 956    if (isDPP() || isSDWA())
 957       return true;
 958    if (!isVOP3())
 959       return false;
 960    const VOP3A_instruction *vop3 = static_cast<const VOP3A_instruction*>(this);
 961    for (unsigned i = 0; i < operands.size(); i++) {
 962       if (vop3->abs[i] || vop3->neg[i])
 963          return true;
 964    }
 965    return vop3->opsel || vop3->clamp || vop3->omod;
 966 }
 967
 968 constexpr bool is_phi(Instruction* instr)
 969 {
 970    return instr->opcode == aco_opcode::p_phi || instr->opcode == aco_opcode::p_linear_phi;
 971 }
 972
 973 static inline bool is_phi(aco_ptr<Instruction>& instr)
 974 {
 975    return is_phi(instr.get());
 976 }
 977
 978 constexpr barrier_interaction get_barrier_interaction(Instruction* instr)
 979 {
 980    switch (instr->format) {
 981    case Format::SMEM:
 982       return static_cast<SMEM_instruction*>(instr)->barrier;
 983    case Format::MUBUF:
 984       return static_cast<MUBUF_instruction*>(instr)->barrier;
 985    case Format::MIMG:
 986       return static_cast<MIMG_instruction*>(instr)->barrier;
 987    case Format::FLAT:
 988    case Format::GLOBAL:
 989    case Format::SCRATCH:
 990       return static_cast<FLAT_instruction*>(instr)->barrier;
 991    case Format::DS:
 992       return barrier_shared;
 993    default:
 994       return barrier_none;
 995    }
 996 }
 997
 998 bool is_dead(const std::vector<uint16_t>& uses, Instruction *instr);
 999
1000 enum block_kind {
1001    /* uniform indicates that leaving this block,
1002     * all actives lanes stay active */
1003    block_kind_uniform = 1 << 0,
1004    block_kind_top_level = 1 << 1,
1005    block_kind_loop_preheader = 1 << 2,
1006    block_kind_loop_header = 1 << 3,
1007    block_kind_loop_exit = 1 << 4,
1008    block_kind_continue = 1 << 5,
1009    block_kind_break = 1 << 6,
1010    block_kind_continue_or_break = 1 << 7,
1011    block_kind_discard = 1 << 8,
1012    block_kind_branch = 1 << 9,
1013    block_kind_merge = 1 << 10,
1014    block_kind_invert = 1 << 11,
1015    block_kind_uses_discard_if = 1 << 12,
1016    block_kind_needs_lowering = 1 << 13,
1017    block_kind_uses_demote = 1 << 14,
1018 };
1019
1020
1021 struct RegisterDemand {
1022    constexpr RegisterDemand() = default;
1023    constexpr RegisterDemand(const int16_t v, const int16_t s) noexcept
1024       : vgpr{v}, sgpr{s} {}
1025    int16_t vgpr = 0;
1026    int16_t sgpr = 0;
1027
1028    constexpr friend bool operator==(const RegisterDemand a, const RegisterDemand b) noexcept {
1029       return a.vgpr == b.vgpr && a.sgpr == b.sgpr;
1030    }
1031
1032    constexpr bool exceeds(const RegisterDemand other) const noexcept {
1033       return vgpr > other.vgpr || sgpr > other.sgpr;
1034    }
1035
1036    constexpr RegisterDemand operator+(const Temp t) const noexcept {
1037       if (t.type() == RegType::sgpr)
1038          return RegisterDemand( vgpr, sgpr + t.size() );
1039       else
1040          return RegisterDemand( vgpr + t.size(), sgpr );
1041    }
1042
1043    constexpr RegisterDemand operator+(const RegisterDemand other) const noexcept {
1044       return RegisterDemand(vgpr + other.vgpr, sgpr + other.sgpr);
1045    }
1046
1047    constexpr RegisterDemand operator-(const RegisterDemand other) const noexcept {
1048       return RegisterDemand(vgpr - other.vgpr, sgpr - other.sgpr);
1049    }
1050
1051    constexpr RegisterDemand& operator+=(const RegisterDemand other) noexcept {
1052       vgpr += other.vgpr;
1053       sgpr += other.sgpr;
1054       return *this;
1055    }
1056
1057    constexpr RegisterDemand& operator-=(const RegisterDemand other) noexcept {
1058       vgpr -= other.vgpr;
1059       sgpr -= other.sgpr;
1060       return *this;
1061    }
1062
1063    constexpr RegisterDemand& operator+=(const Temp t) noexcept {
1064       if (t.type() == RegType::sgpr)
1065          sgpr += t.size();
1066       else
1067          vgpr += t.size();
1068       return *this;
1069    }
1070
1071    constexpr RegisterDemand& operator-=(const Temp t) noexcept {
1072       if (t.type() == RegType::sgpr)
1073          sgpr -= t.size();
1074       else
1075          vgpr -= t.size();
1076       return *this;
1077    }
1078
1079    constexpr void update(const RegisterDemand other) noexcept {
1080       vgpr = std::max(vgpr, other.vgpr);
1081       sgpr = std::max(sgpr, other.sgpr);
1082    }
1083
1084 };
1085
1086 /* CFG */
1087 struct Block {
1088    float_mode fp_mode;
1089    unsigned index;
1090    unsigned offset = 0;
1091    std::vector<aco_ptr<Instruction>> instructions;
1092    std::vector<unsigned> logical_preds;
1093    std::vector<unsigned> linear_preds;
1094    std::vector<unsigned> logical_succs;
1095    std::vector<unsigned> linear_succs;
1096    RegisterDemand register_demand = RegisterDemand();
1097    uint16_t loop_nest_depth = 0;
1098    uint16_t kind = 0;
1099    int logical_idom = -1;
1100    int linear_idom = -1;
1101    Temp live_out_exec = Temp();
1102
1103    /* this information is needed for predecessors to blocks with phis when
1104     * moving out of ssa */
1105    bool scc_live_out = false;
1106    PhysReg scratch_sgpr = PhysReg(); /* only needs to be valid if scc_live_out != false */
1107
1108    Block(unsigned idx) : index(idx) {}
1109    Block() : index(0) {}
1110 };
1111
1112 using Stage = uint16_t;
1113
1114 /* software stages */
1115 static constexpr Stage sw_vs = 1 << 0;
1116 static constexpr Stage sw_gs = 1 << 1;
1117 static constexpr Stage sw_tcs = 1 << 2;
1118 static constexpr Stage sw_tes = 1 << 3;
1119 static constexpr Stage sw_fs = 1 << 4;
1120 static constexpr Stage sw_cs = 1 << 5;
1121 static constexpr Stage sw_mask = 0x3f;
1122
1123 /* hardware stages (can't be OR'd, just a mask for convenience when testing multiple) */
1124 static constexpr Stage hw_vs = 1 << 6;
1125 static constexpr Stage hw_es = 1 << 7; /* not on GFX9. combined into GS on GFX9 (and GFX10/legacy). */
1126 static constexpr Stage hw_gs = 1 << 8;
1127 static constexpr Stage hw_ls = 1 << 9; /* not on GFX9. combined into HS on GFX9 (and GFX10/legacy). */
1128 static constexpr Stage hw_hs = 1 << 10;
1129 static constexpr Stage hw_fs = 1 << 11;
1130 static constexpr Stage hw_cs = 1 << 12;
1131 static constexpr Stage hw_mask = 0x7f << 6;
1132
1133 /* possible settings of Program::stage */
1134 static constexpr Stage vertex_vs = sw_vs | hw_vs;
1135 static constexpr Stage fragment_fs = sw_fs | hw_fs;
1136 static constexpr Stage compute_cs = sw_cs | hw_cs;
1137 static constexpr Stage tess_eval_vs = sw_tes | hw_vs;
1138 /* GFX10/NGG */
1139 static constexpr Stage ngg_vertex_gs = sw_vs | hw_gs;
1140 static constexpr Stage ngg_vertex_geometry_gs = sw_vs | sw_gs | hw_gs;
1141 static constexpr Stage ngg_tess_eval_geometry_gs = sw_tes | sw_gs | hw_gs;
1142 static constexpr Stage ngg_vertex_tess_control_hs = sw_vs | sw_tcs | hw_hs;
1143 /* GFX9 (and GFX10 if NGG isn't used) */
1144 static constexpr Stage vertex_geometry_gs = sw_vs | sw_gs | hw_gs;
1145 static constexpr Stage vertex_tess_control_hs = sw_vs | sw_tcs | hw_hs;
1146 static constexpr Stage tess_eval_geometry_gs = sw_tes | sw_gs | hw_gs;
1147 /* pre-GFX9 */
1148 static constexpr Stage vertex_ls = sw_vs | hw_ls; /* vertex before tesselation control */
1149 static constexpr Stage vertex_es = sw_vs | hw_es; /* vertex before geometry */
1150 static constexpr Stage tess_control_hs = sw_tcs | hw_hs;
1151 static constexpr Stage tess_eval_es = sw_tes | hw_gs; /* tesselation evaluation before geometry */
1152 static constexpr Stage geometry_gs = sw_gs | hw_gs;
1153
1154 class Program final {
1155 public:
1156    float_mode next_fp_mode;
1157    std::vector<Block> blocks;
1158    RegisterDemand max_reg_demand = RegisterDemand();
1159    uint16_t num_waves = 0;
1160    uint16_t max_waves = 0; /* maximum number of waves, regardless of register usage */
1161    ac_shader_config* config;
1162    struct radv_shader_info *info;
1163    enum chip_class chip_class;
1164    enum radeon_family family;
1165    unsigned wave_size;
1166    RegClass lane_mask;
1167    Stage stage; /* Stage */
1168    bool needs_exact = false; /* there exists an instruction with disable_wqm = true */
1169    bool needs_wqm = false; /* there exists a p_wqm instruction */
1170    bool wb_smem_l1_on_end = false;
1171
1172    std::vector<uint8_t> constant_data;
1173    Temp private_segment_buffer;
1174    Temp scratch_offset;
1175
1176    uint16_t min_waves = 0;
1177    uint16_t lds_alloc_granule;
1178    uint32_t lds_limit; /* in bytes */
1179    uint16_t vgpr_limit;
1180    uint16_t sgpr_limit;
1181    uint16_t physical_sgprs;
1182    uint16_t sgpr_alloc_granule; /* minus one. must be power of two */
1183    uint16_t vgpr_alloc_granule; /* minus one. must be power of two */
1184
1185    bool needs_vcc = false;
1186    bool needs_xnack_mask = false;
1187    bool needs_flat_scr = false;
1188
1189    uint32_t allocateId()
1190    {
1191       assert(allocationID <= 16777215);
1192       return allocationID++;
1193    }
1194
1195    uint32_t peekAllocationId()
1196    {
1197       return allocationID;
1198    }
1199
1200    void setAllocationId(uint32_t id)
1201    {
1202       allocationID = id;
1203    }
1204
1205    Block* create_and_insert_block() {
1206       blocks.emplace_back(blocks.size());
1207       blocks.back().fp_mode = next_fp_mode;
1208       return &blocks.back();
1209    }
1210
1211    Block* insert_block(Block&& block) {
1212       block.index = blocks.size();
1213       block.fp_mode = next_fp_mode;
1214       blocks.emplace_back(std::move(block));
1215       return &blocks.back();
1216    }
1217
1218 private:
1219    uint32_t allocationID = 1;
1220 };
1221
1222 struct live {
1223    /* live temps out per block */
1224    std::vector<std::set<Temp>> live_out;
1225    /* register demand (sgpr/vgpr) per instruction per block */
1226    std::vector<std::vector<RegisterDemand>> register_demand;
1227 };
1228
1229 void select_program(Program *program,
1230                     unsigned shader_count,
1231                     struct nir_shader *const *shaders,
1232                     ac_shader_config* config,
1233                     struct radv_shader_args *args);
1234
1235 void lower_wqm(Program* program, live& live_vars,
1236                const struct radv_nir_compiler_options *options);
1237 void lower_bool_phis(Program* program);
1238 void calc_min_waves(Program* program);
1239 void update_vgpr_sgpr_demand(Program* program, const RegisterDemand new_demand);
1240 live live_var_analysis(Program* program, const struct radv_nir_compiler_options *options);
1241 std::vector<uint16_t> dead_code_analysis(Program *program);
1242 void dominator_tree(Program* program);
1243 void insert_exec_mask(Program *program);
1244 void value_numbering(Program* program);
1245 void optimize(Program* program);
1246 void setup_reduce_temp(Program* program);
1247 void lower_to_cssa(Program* program, live& live_vars, const struct radv_nir_compiler_options *options);
1248 void register_allocation(Program *program, std::vector<std::set<Temp>> live_out_per_block);
1249 void ssa_elimination(Program* program);
1250 void lower_to_hw_instr(Program* program);
1251 void schedule_program(Program* program, live& live_vars);
1252 void spill(Program* program, live& live_vars, const struct radv_nir_compiler_options *options);
1253 void insert_wait_states(Program* program);
1254 void insert_NOPs(Program* program);
1255 unsigned emit_program(Program* program, std::vector<uint32_t>& code);
1256 void print_asm(Program *program, std::vector<uint32_t>& binary,
1257                unsigned exec_size, std::ostream& out);
1258 void validate(Program* program, FILE *output);
1259 bool validate_ra(Program* program, const struct radv_nir_compiler_options *options, FILE *output);
1260 #ifndef NDEBUG
1261 void perfwarn(bool cond, const char *msg, Instruction *instr=NULL);
1262 #else
1263 #define perfwarn(program, cond, msg, ...) do {} while(0)
1264 #endif
1265
1266 void aco_print_instr(Instruction *instr, FILE *output);
1267 void aco_print_program(Program *program, FILE *output);
1268
1269 /* number of sgprs that need to be allocated but might notbe addressable as s0-s105 */
1270 uint16_t get_extra_sgprs(Program *program);
1271
1272 /* get number of sgprs/vgprs allocated required to address a number of sgprs/vgprs */
1273 uint16_t get_sgpr_alloc(Program *program, uint16_t addressable_sgprs);
1274 uint16_t get_vgpr_alloc(Program *program, uint16_t addressable_vgprs);
1275
1276 /* return number of addressable sgprs/vgprs for max_waves */
1277 uint16_t get_addr_sgpr_from_waves(Program *program, uint16_t max_waves);
1278 uint16_t get_addr_vgpr_from_waves(Program *program, uint16_t max_waves);
1279
1280 typedef struct {
1281    const int16_t opcode_gfx7[static_cast<int>(aco_opcode::num_opcodes)];
1282    const int16_t opcode_gfx9[static_cast<int>(aco_opcode::num_opcodes)];
1283    const int16_t opcode_gfx10[static_cast<int>(aco_opcode::num_opcodes)];
1284    const std::bitset<static_cast<int>(aco_opcode::num_opcodes)> can_use_input_modifiers;
1285    const std::bitset<static_cast<int>(aco_opcode::num_opcodes)> can_use_output_modifiers;
1286    const std::bitset<static_cast<int>(aco_opcode::num_opcodes)> is_atomic;
1287    const char *name[static_cast<int>(aco_opcode::num_opcodes)];
1288    const aco::Format format[static_cast<int>(aco_opcode::num_opcodes)];
1289 } Info;
1290
1291 extern const Info instr_info;
1292
1293 }
1294
1295 #endif /* ACO_IR_H */
1296