src/amd/compiler/aco_ir.h

   1 /*
   2  * Copyright © 2018 Valve Corporation
   3  *
   4  * Permission is hereby granted, free of charge, to any person obtaining a
   5  * copy of this software and associated documentation files (the "Software"),
   6  * to deal in the Software without restriction, including without limitation
   7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
   8  * and/or sell copies of the Software, and to permit persons to whom the
   9  * Software is furnished to do so, subject to the following conditions:
  10  *
  11  * The above copyright notice and this permission notice (including the next
  12  * paragraph) shall be included in all copies or substantial portions of the
  13  * Software.
  14  *
  15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  17  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
  18  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
  19  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
  20  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
  21  * IN THE SOFTWARE.
  22  *
  23  */
  24
  25 #ifndef ACO_IR_H
  26 #define ACO_IR_H
  27
  28 #include <vector>
  29 #include <set>
  30 #include <bitset>
  31 #include <memory>
  32
  33 #include "nir.h"
  34 #include "ac_binary.h"
  35 #include "amd_family.h"
  36 #include "aco_opcodes.h"
  37 #include "aco_util.h"
  38
  39 struct radv_nir_compiler_options;
  40 struct radv_shader_args;
  41 struct radv_shader_info;
  42
  43 namespace aco {
  44
  45 extern uint64_t debug_flags;
  46
  47 enum {
  48    DEBUG_VALIDATE = 0x1,
  49    DEBUG_VALIDATE_RA = 0x2,
  50    DEBUG_PERFWARN = 0x4,
  51 };
  52
  53 /**
  54  * Representation of the instruction's microcode encoding format
  55  * Note: Some Vector ALU Formats can be combined, such that:
  56  * - VOP2* | VOP3A represents a VOP2 instruction in VOP3A encoding
  57  * - VOP2* | DPP represents a VOP2 instruction with data parallel primitive.
  58  * - VOP2* | SDWA represents a VOP2 instruction with sub-dword addressing.
  59  *
  60  * (*) The same is applicable for VOP1 and VOPC instructions.
  61  */
  62 enum class Format : std::uint16_t {
  63    /* Pseudo Instruction Format */
  64    PSEUDO = 0,
  65    /* Scalar ALU & Control Formats */
  66    SOP1 = 1,
  67    SOP2 = 2,
  68    SOPK = 3,
  69    SOPP = 4,
  70    SOPC = 5,
  71    /* Scalar Memory Format */
  72    SMEM = 6,
  73    /* LDS/GDS Format */
  74    DS = 8,
  75    /* Vector Memory Buffer Formats */
  76    MTBUF = 9,
  77    MUBUF = 10,
  78    /* Vector Memory Image Format */
  79    MIMG = 11,
  80    /* Export Format */
  81    EXP = 12,
  82    /* Flat Formats */
  83    FLAT = 13,
  84    GLOBAL = 14,
  85    SCRATCH = 15,
  86
  87    PSEUDO_BRANCH = 16,
  88    PSEUDO_BARRIER = 17,
  89    PSEUDO_REDUCTION = 18,
  90
  91    /* Vector ALU Formats */
  92    VOP1 = 1 << 8,
  93    VOP2 = 1 << 9,
  94    VOPC = 1 << 10,
  95    VOP3 = 1 << 11,
  96    VOP3A = 1 << 11,
  97    VOP3B = 1 << 11,
  98    VOP3P = 1 << 12,
  99    /* Vector Parameter Interpolation Format */
 100    VINTRP = 1 << 13,
 101    DPP = 1 << 14,
 102    SDWA = 1 << 15,
 103 };
 104
 105 enum barrier_interaction : uint8_t {
 106    barrier_none = 0,
 107    barrier_buffer = 0x1,
 108    barrier_image = 0x2,
 109    barrier_atomic = 0x4,
 110    barrier_shared = 0x8,
 111    /* used for geometry shaders to ensure vertex data writes are before the
 112     * GS_DONE s_sendmsg. */
 113    barrier_gs_data = 0x10,
 114    /* used for geometry shaders to ensure s_sendmsg instructions are in-order. */
 115    barrier_gs_sendmsg = 0x20,
 116    barrier_count = 6,
 117 };
 118
 119 enum fp_round {
 120    fp_round_ne = 0,
 121    fp_round_pi = 1,
 122    fp_round_ni = 2,
 123    fp_round_tz = 3,
 124 };
 125
 126 enum fp_denorm {
 127    /* Note that v_rcp_f32, v_exp_f32, v_log_f32, v_sqrt_f32, v_rsq_f32 and
 128     * v_mad_f32/v_madak_f32/v_madmk_f32/v_mac_f32 always flush denormals. */
 129    fp_denorm_flush = 0x0,
 130    fp_denorm_keep = 0x3,
 131 };
 132
 133 struct float_mode {
 134    /* matches encoding of the MODE register */
 135    union {
 136       struct {
 137           fp_round round32:2;
 138           fp_round round16_64:2;
 139           unsigned denorm32:2;
 140           unsigned denorm16_64:2;
 141       };
 142       uint8_t val = 0;
 143    };
 144    /* if false, optimizations which may remove infs/nan/-0.0 can be done */
 145    bool preserve_signed_zero_inf_nan32:1;
 146    bool preserve_signed_zero_inf_nan16_64:1;
 147    /* if false, optimizations which may remove denormal flushing can be done */
 148    bool must_flush_denorms32:1;
 149    bool must_flush_denorms16_64:1;
 150    bool care_about_round32:1;
 151    bool care_about_round16_64:1;
 152
 153    /* Returns true if instructions using the mode "other" can safely use the
 154     * current one instead. */
 155    bool canReplace(float_mode other) const noexcept {
 156       return val == other.val &&
 157              (preserve_signed_zero_inf_nan32 || !other.preserve_signed_zero_inf_nan32) &&
 158              (preserve_signed_zero_inf_nan16_64 || !other.preserve_signed_zero_inf_nan16_64) &&
 159              (must_flush_denorms32  || !other.must_flush_denorms32) &&
 160              (must_flush_denorms16_64 || !other.must_flush_denorms16_64) &&
 161              (care_about_round32 || !other.care_about_round32) &&
 162              (care_about_round16_64 || !other.care_about_round16_64);
 163    }
 164 };
 165
 166 constexpr Format asVOP3(Format format) {
 167    return (Format) ((uint32_t) Format::VOP3 | (uint32_t) format);
 168 };
 169
 170 enum class RegType {
 171    none = 0,
 172    sgpr,
 173    vgpr,
 174    linear_vgpr,
 175 };
 176
 177 struct RegClass {
 178
 179    enum RC : uint8_t {
 180       s1 = 1,
 181       s2 = 2,
 182       s3 = 3,
 183       s4 = 4,
 184       s6 = 6,
 185       s8 = 8,
 186       s16 = 16,
 187       v1 = s1 | (1 << 5),
 188       v2 = s2 | (1 << 5),
 189       v3 = s3 | (1 << 5),
 190       v4 = s4 | (1 << 5),
 191       v5 = 5  | (1 << 5),
 192       v6 = 6  | (1 << 5),
 193       v7 = 7  | (1 << 5),
 194       v8 = 8  | (1 << 5),
 195       /* these are used for WWM and spills to vgpr */
 196       v1_linear = v1 | (1 << 6),
 197       v2_linear = v2 | (1 << 6),
 198    };
 199
 200    RegClass() = default;
 201    constexpr RegClass(RC rc)
 202       : rc(rc) {}
 203    constexpr RegClass(RegType type, unsigned size)
 204       : rc((RC) ((type == RegType::vgpr ? 1 << 5 : 0) | size)) {}
 205
 206    constexpr operator RC() const { return rc; }
 207    explicit operator bool() = delete;
 208
 209    constexpr RegType type() const { return rc <= RC::s16 ? RegType::sgpr : RegType::vgpr; }
 210    constexpr unsigned size() const { return (unsigned) rc & 0x1F; }
 211    constexpr bool is_linear() const { return rc <= RC::s16 || rc & (1 << 6); }
 212    constexpr RegClass as_linear() const { return RegClass((RC) (rc | (1 << 6))); }
 213
 214 private:
 215    RC rc;
 216 };
 217
 218 /* transitional helper expressions */
 219 static constexpr RegClass s1{RegClass::s1};
 220 static constexpr RegClass s2{RegClass::s2};
 221 static constexpr RegClass s3{RegClass::s3};
 222 static constexpr RegClass s4{RegClass::s4};
 223 static constexpr RegClass s8{RegClass::s8};
 224 static constexpr RegClass s16{RegClass::s16};
 225 static constexpr RegClass v1{RegClass::v1};
 226 static constexpr RegClass v2{RegClass::v2};
 227 static constexpr RegClass v3{RegClass::v3};
 228 static constexpr RegClass v4{RegClass::v4};
 229 static constexpr RegClass v5{RegClass::v5};
 230 static constexpr RegClass v6{RegClass::v6};
 231 static constexpr RegClass v7{RegClass::v7};
 232 static constexpr RegClass v8{RegClass::v8};
 233
 234 /**
 235  * Temp Class
 236  * Each temporary virtual register has a
 237  * register class (i.e. size and type)
 238  * and SSA id.
 239  */
 240 struct Temp {
 241    Temp() = default;
 242    constexpr Temp(uint32_t id, RegClass cls) noexcept
 243       : id_(id), reg_class(cls) {}
 244
 245    constexpr uint32_t id() const noexcept { return id_; }
 246    constexpr RegClass regClass() const noexcept { return reg_class; }
 247
 248    constexpr unsigned size() const noexcept { return reg_class.size(); }
 249    constexpr RegType type() const noexcept { return reg_class.type(); }
 250    constexpr bool is_linear() const noexcept { return reg_class.is_linear(); }
 251
 252    constexpr bool operator <(Temp other) const noexcept { return id() < other.id(); }
 253    constexpr bool operator==(Temp other) const noexcept { return id() == other.id(); }
 254    constexpr bool operator!=(Temp other) const noexcept { return id() != other.id(); }
 255
 256 private:
 257    uint32_t id_:24;
 258    RegClass reg_class;
 259 };
 260
 261 /**
 262  * PhysReg
 263  * Represents the physical register for each
 264  * Operand and Definition.
 265  */
 266 struct PhysReg {
 267    constexpr PhysReg() = default;
 268    explicit constexpr PhysReg(unsigned r) : reg(r) {}
 269    constexpr operator unsigned() const { return reg; }
 270
 271    uint16_t reg = 0;
 272 };
 273
 274 /* helper expressions for special registers */
 275 static constexpr PhysReg m0{124};
 276 static constexpr PhysReg vcc{106};
 277 static constexpr PhysReg sgpr_null{125}; /* GFX10+ */
 278 static constexpr PhysReg exec{126};
 279 static constexpr PhysReg exec_lo{126};
 280 static constexpr PhysReg exec_hi{127};
 281 static constexpr PhysReg scc{253};
 282
 283 /**
 284  * Operand Class
 285  * Initially, each Operand refers to either
 286  * a temporary virtual register
 287  * or to a constant value
 288  * Temporary registers get mapped to physical register during RA
 289  * Constant values are inlined into the instruction sequence.
 290  */
 291 class Operand final
 292 {
 293 public:
 294    constexpr Operand()
 295       : reg_(PhysReg{128}), isTemp_(false), isFixed_(true), isConstant_(false),
 296         isKill_(false), isUndef_(true), isFirstKill_(false), is64BitConst_(false) {}
 297
 298    explicit Operand(Temp r) noexcept
 299    {
 300       data_.temp = r;
 301       if (r.id()) {
 302          isTemp_ = true;
 303       } else {
 304          isUndef_ = true;
 305          setFixed(PhysReg{128});
 306       }
 307    };
 308    explicit Operand(uint32_t v, bool is64bit = false) noexcept
 309    {
 310       data_.i = v;
 311       isConstant_ = true;
 312       is64BitConst_ = is64bit;
 313       if (v <= 64)
 314          setFixed(PhysReg{128 + v});
 315       else if (v >= 0xFFFFFFF0) /* [-16 .. -1] */
 316          setFixed(PhysReg{192 - v});
 317       else if (v == 0x3f000000) /* 0.5 */
 318          setFixed(PhysReg{240});
 319       else if (v == 0xbf000000) /* -0.5 */
 320          setFixed(PhysReg{241});
 321       else if (v == 0x3f800000) /* 1.0 */
 322          setFixed(PhysReg{242});
 323       else if (v == 0xbf800000) /* -1.0 */
 324          setFixed(PhysReg{243});
 325       else if (v == 0x40000000) /* 2.0 */
 326          setFixed(PhysReg{244});
 327       else if (v == 0xc0000000) /* -2.0 */
 328          setFixed(PhysReg{245});
 329       else if (v == 0x40800000) /* 4.0 */
 330          setFixed(PhysReg{246});
 331       else if (v == 0xc0800000) /* -4.0 */
 332          setFixed(PhysReg{247});
 333       else { /* Literal Constant */
 334          assert(!is64bit && "attempt to create a 64-bit literal constant");
 335          setFixed(PhysReg{255});
 336       }
 337    };
 338    explicit Operand(uint64_t v) noexcept
 339    {
 340       isConstant_ = true;
 341       is64BitConst_ = true;
 342       if (v <= 64) {
 343          data_.i = (uint32_t) v;
 344          setFixed(PhysReg{128 + (uint32_t) v});
 345       } else if (v >= 0xFFFFFFFFFFFFFFF0) { /* [-16 .. -1] */
 346          data_.i = (uint32_t) v;
 347          setFixed(PhysReg{192 - (uint32_t) v});
 348       } else if (v == 0x3FE0000000000000) { /* 0.5 */
 349          data_.i = 0x3f000000;
 350          setFixed(PhysReg{240});
 351       } else if (v == 0xBFE0000000000000) { /* -0.5 */
 352          data_.i = 0xbf000000;
 353          setFixed(PhysReg{241});
 354       } else if (v == 0x3FF0000000000000) { /* 1.0 */
 355          data_.i = 0x3f800000;
 356          setFixed(PhysReg{242});
 357       } else if (v == 0xBFF0000000000000) { /* -1.0 */
 358          data_.i = 0xbf800000;
 359          setFixed(PhysReg{243});
 360       } else if (v == 0x4000000000000000) { /* 2.0 */
 361          data_.i = 0x40000000;
 362          setFixed(PhysReg{244});
 363       } else if (v == 0xC000000000000000) { /* -2.0 */
 364          data_.i = 0xc0000000;
 365          setFixed(PhysReg{245});
 366       } else if (v == 0x4010000000000000) { /* 4.0 */
 367          data_.i = 0x40800000;
 368          setFixed(PhysReg{246});
 369       } else if (v == 0xC010000000000000) { /* -4.0 */
 370          data_.i = 0xc0800000;
 371          setFixed(PhysReg{247});
 372       } else { /* Literal Constant: we don't know if it is a long or double.*/
 373          isConstant_ = 0;
 374          assert(false && "attempt to create a 64-bit literal constant");
 375       }
 376    };
 377    explicit Operand(RegClass type) noexcept
 378    {
 379       isUndef_ = true;
 380       data_.temp = Temp(0, type);
 381       setFixed(PhysReg{128});
 382    };
 383    explicit Operand(PhysReg reg, RegClass type) noexcept
 384    {
 385       data_.temp = Temp(0, type);
 386       setFixed(reg);
 387    }
 388
 389    constexpr bool isTemp() const noexcept
 390    {
 391       return isTemp_;
 392    }
 393
 394    constexpr void setTemp(Temp t) noexcept {
 395       assert(!isConstant_);
 396       isTemp_ = true;
 397       data_.temp = t;
 398    }
 399
 400    constexpr Temp getTemp() const noexcept
 401    {
 402       return data_.temp;
 403    }
 404
 405    constexpr uint32_t tempId() const noexcept
 406    {
 407       return data_.temp.id();
 408    }
 409
 410    constexpr bool hasRegClass() const noexcept
 411    {
 412       return isTemp() || isUndefined();
 413    }
 414
 415    constexpr RegClass regClass() const noexcept
 416    {
 417       return data_.temp.regClass();
 418    }
 419
 420    constexpr unsigned size() const noexcept
 421    {
 422       if (isConstant())
 423          return is64BitConst_ ? 2 : 1;
 424       else
 425          return data_.temp.size();
 426    }
 427
 428    constexpr bool isFixed() const noexcept
 429    {
 430       return isFixed_;
 431    }
 432
 433    constexpr PhysReg physReg() const noexcept
 434    {
 435       return reg_;
 436    }
 437
 438    constexpr void setFixed(PhysReg reg) noexcept
 439    {
 440       isFixed_ = reg != unsigned(-1);
 441       reg_ = reg;
 442    }
 443
 444    constexpr bool isConstant() const noexcept
 445    {
 446       return isConstant_;
 447    }
 448
 449    constexpr bool isLiteral() const noexcept
 450    {
 451       return isConstant() && reg_ == 255;
 452    }
 453
 454    constexpr bool isUndefined() const noexcept
 455    {
 456       return isUndef_;
 457    }
 458
 459    constexpr uint32_t constantValue() const noexcept
 460    {
 461       return data_.i;
 462    }
 463
 464    constexpr bool constantEquals(uint32_t cmp) const noexcept
 465    {
 466       return isConstant() && constantValue() == cmp;
 467    }
 468
 469    constexpr void setKill(bool flag) noexcept
 470    {
 471       isKill_ = flag;
 472       if (!flag)
 473          setFirstKill(false);
 474    }
 475
 476    constexpr bool isKill() const noexcept
 477    {
 478       return isKill_ || isFirstKill();
 479    }
 480
 481    constexpr void setFirstKill(bool flag) noexcept
 482    {
 483       isFirstKill_ = flag;
 484       if (flag)
 485          setKill(flag);
 486    }
 487
 488    /* When there are multiple operands killing the same temporary,
 489     * isFirstKill() is only returns true for the first one. */
 490    constexpr bool isFirstKill() const noexcept
 491    {
 492       return isFirstKill_;
 493    }
 494
 495 private:
 496    union {
 497       uint32_t i;
 498       float f;
 499       Temp temp = Temp(0, s1);
 500    } data_;
 501    PhysReg reg_;
 502    union {
 503       struct {
 504          uint8_t isTemp_:1;
 505          uint8_t isFixed_:1;
 506          uint8_t isConstant_:1;
 507          uint8_t isKill_:1;
 508          uint8_t isUndef_:1;
 509          uint8_t isFirstKill_:1;
 510          uint8_t is64BitConst_:1;
 511       };
 512       /* can't initialize bit-fields in c++11, so work around using a union */
 513       uint8_t control_ = 0;
 514    };
 515 };
 516
 517 /**
 518  * Definition Class
 519  * Definitions are the results of Instructions
 520  * and refer to temporary virtual registers
 521  * which are later mapped to physical registers
 522  */
 523 class Definition final
 524 {
 525 public:
 526    constexpr Definition() : temp(Temp(0, s1)), reg_(0), isFixed_(0), hasHint_(0), isKill_(0) {}
 527    Definition(uint32_t index, RegClass type) noexcept
 528       : temp(index, type) {}
 529    explicit Definition(Temp tmp) noexcept
 530       : temp(tmp) {}
 531    Definition(PhysReg reg, RegClass type) noexcept
 532       : temp(Temp(0, type))
 533    {
 534       setFixed(reg);
 535    }
 536    Definition(uint32_t tmpId, PhysReg reg, RegClass type) noexcept
 537       : temp(Temp(tmpId, type))
 538    {
 539       setFixed(reg);
 540    }
 541
 542    constexpr bool isTemp() const noexcept
 543    {
 544       return tempId() > 0;
 545    }
 546
 547    constexpr Temp getTemp() const noexcept
 548    {
 549       return temp;
 550    }
 551
 552    constexpr uint32_t tempId() const noexcept
 553    {
 554       return temp.id();
 555    }
 556
 557    constexpr void setTemp(Temp t) noexcept {
 558       temp = t;
 559    }
 560
 561    constexpr RegClass regClass() const noexcept
 562    {
 563       return temp.regClass();
 564    }
 565
 566    constexpr unsigned size() const noexcept
 567    {
 568       return temp.size();
 569    }
 570
 571    constexpr bool isFixed() const noexcept
 572    {
 573       return isFixed_;
 574    }
 575
 576    constexpr PhysReg physReg() const noexcept
 577    {
 578       return reg_;
 579    }
 580
 581    constexpr void setFixed(PhysReg reg) noexcept
 582    {
 583       isFixed_ = 1;
 584       reg_ = reg;
 585    }
 586
 587    constexpr void setHint(PhysReg reg) noexcept
 588    {
 589       hasHint_ = 1;
 590       reg_ = reg;
 591    }
 592
 593    constexpr bool hasHint() const noexcept
 594    {
 595       return hasHint_;
 596    }
 597
 598    constexpr void setKill(bool flag) noexcept
 599    {
 600       isKill_ = flag;
 601    }
 602
 603    constexpr bool isKill() const noexcept
 604    {
 605       return isKill_;
 606    }
 607
 608 private:
 609    Temp temp = Temp(0, s1);
 610    PhysReg reg_;
 611    union {
 612       struct {
 613          uint8_t isFixed_:1;
 614          uint8_t hasHint_:1;
 615          uint8_t isKill_:1;
 616       };
 617       /* can't initialize bit-fields in c++11, so work around using a union */
 618       uint8_t control_ = 0;
 619    };
 620 };
 621
 622 class Block;
 623
 624 struct Instruction {
 625    aco_opcode opcode;
 626    Format format;
 627    uint32_t pass_flags;
 628
 629    aco::span<Operand> operands;
 630    aco::span<Definition> definitions;
 631
 632    constexpr bool isVALU() const noexcept
 633    {
 634       return ((uint16_t) format & (uint16_t) Format::VOP1) == (uint16_t) Format::VOP1
 635           || ((uint16_t) format & (uint16_t) Format::VOP2) == (uint16_t) Format::VOP2
 636           || ((uint16_t) format & (uint16_t) Format::VOPC) == (uint16_t) Format::VOPC
 637           || ((uint16_t) format & (uint16_t) Format::VOP3A) == (uint16_t) Format::VOP3A
 638           || ((uint16_t) format & (uint16_t) Format::VOP3B) == (uint16_t) Format::VOP3B
 639           || ((uint16_t) format & (uint16_t) Format::VOP3P) == (uint16_t) Format::VOP3P;
 640    }
 641
 642    constexpr bool isSALU() const noexcept
 643    {
 644       return format == Format::SOP1 ||
 645              format == Format::SOP2 ||
 646              format == Format::SOPC ||
 647              format == Format::SOPK ||
 648              format == Format::SOPP;
 649    }
 650
 651    constexpr bool isVMEM() const noexcept
 652    {
 653       return format == Format::MTBUF ||
 654              format == Format::MUBUF ||
 655              format == Format::MIMG;
 656    }
 657
 658    constexpr bool isDPP() const noexcept
 659    {
 660       return (uint16_t) format & (uint16_t) Format::DPP;
 661    }
 662
 663    constexpr bool isVOP3() const noexcept
 664    {
 665       return ((uint16_t) format & (uint16_t) Format::VOP3A) ||
 666              ((uint16_t) format & (uint16_t) Format::VOP3B) ||
 667              format == Format::VOP3P;
 668    }
 669
 670    constexpr bool isSDWA() const noexcept
 671    {
 672       return (uint16_t) format & (uint16_t) Format::SDWA;
 673    }
 674
 675    constexpr bool isFlatOrGlobal() const noexcept
 676    {
 677       return format == Format::FLAT || format == Format::GLOBAL;
 678    }
 679
 680    constexpr bool usesModifiers() const noexcept;
 681
 682    constexpr bool reads_exec() const noexcept
 683    {
 684       for (const Operand& op : operands) {
 685          if (op.isFixed() && op.physReg() == exec)
 686             return true;
 687       }
 688       return false;
 689    }
 690 };
 691
 692 struct SOPK_instruction : public Instruction {
 693    uint16_t imm;
 694 };
 695
 696 struct SOPP_instruction : public Instruction {
 697    uint32_t imm;
 698    int block;
 699 };
 700
 701 struct SOPC_instruction : public Instruction {
 702 };
 703
 704 struct SOP1_instruction : public Instruction {
 705 };
 706
 707 struct SOP2_instruction : public Instruction {
 708 };
 709
 710 /**
 711  * Scalar Memory Format:
 712  * For s_(buffer_)load_dword*:
 713  * Operand(0): SBASE - SGPR-pair which provides base address
 714  * Operand(1): Offset - immediate (un)signed offset or SGPR
 715  * Operand(2) / Definition(0): SDATA - SGPR for read / write result
 716  * Operand(n-1): SOffset - SGPR offset (Vega only)
 717  *
 718  * Having no operands is also valid for instructions such as s_dcache_inv.
 719  *
 720  */
 721 struct SMEM_instruction : public Instruction {
 722    bool glc : 1; /* VI+: globally coherent */
 723    bool dlc : 1; /* NAVI: device level coherent */
 724    bool nv : 1; /* VEGA only: Non-volatile */
 725    bool can_reorder : 1;
 726    bool disable_wqm : 1;
 727    barrier_interaction barrier;
 728 };
 729
 730 struct VOP1_instruction : public Instruction {
 731 };
 732
 733 struct VOP2_instruction : public Instruction {
 734 };
 735
 736 struct VOPC_instruction : public Instruction {
 737 };
 738
 739 struct VOP3A_instruction : public Instruction {
 740    bool abs[3];
 741    bool neg[3];
 742    uint8_t opsel : 4;
 743    uint8_t omod : 2;
 744    bool clamp : 1;
 745 };
 746
 747 /**
 748  * Data Parallel Primitives Format:
 749  * This format can be used for VOP1, VOP2 or VOPC instructions.
 750  * The swizzle applies to the src0 operand.
 751  *
 752  */
 753 struct DPP_instruction : public Instruction {
 754    bool abs[2];
 755    bool neg[2];
 756    uint16_t dpp_ctrl;
 757    uint8_t row_mask : 4;
 758    uint8_t bank_mask : 4;
 759    bool bound_ctrl : 1;
 760 };
 761
 762 struct Interp_instruction : public Instruction {
 763    uint8_t attribute;
 764    uint8_t component;
 765 };
 766
 767 /**
 768  * Local and Global Data Sharing instructions
 769  * Operand(0): ADDR - VGPR which supplies the address.
 770  * Operand(1): DATA0 - First data VGPR.
 771  * Operand(2): DATA1 - Second data VGPR.
 772  * Operand(n-1): M0 - LDS size.
 773  * Definition(0): VDST - Destination VGPR when results returned to VGPRs.
 774  *
 775  */
 776 struct DS_instruction : public Instruction {
 777    int16_t offset0;
 778    int8_t offset1;
 779    bool gds;
 780 };
 781
 782 /**
 783  * Vector Memory Untyped-buffer Instructions
 784  * Operand(0): VADDR - Address source. Can carry an index and/or offset
 785  * Operand(1): SRSRC - Specifies which SGPR supplies T# (resource constant)
 786  * Operand(2): SOFFSET - SGPR to supply unsigned byte offset. (SGPR, M0, or inline constant)
 787  * Operand(3) / Definition(0): VDATA - Vector GPR for write result / read data
 788  *
 789  */
 790 struct MUBUF_instruction : public Instruction {
 791    uint16_t offset : 12; /* Unsigned byte offset - 12 bit */
 792    bool offen : 1; /* Supply an offset from VGPR (VADDR) */
 793    bool idxen : 1; /* Supply an index from VGPR (VADDR) */
 794    bool addr64 : 1; /* SI, CIK: Address size is 64-bit */
 795    bool glc : 1; /* globally coherent */
 796    bool dlc : 1; /* NAVI: device level coherent */
 797    bool slc : 1; /* system level coherent */
 798    bool tfe : 1; /* texture fail enable */
 799    bool lds : 1; /* Return read-data to LDS instead of VGPRs */
 800    bool disable_wqm : 1; /* Require an exec mask without helper invocations */
 801    bool can_reorder : 1;
 802    barrier_interaction barrier;
 803 };
 804
 805 /**
 806  * Vector Memory Typed-buffer Instructions
 807  * Operand(0): VADDR - Address source. Can carry an index and/or offset
 808  * Operand(1): SRSRC - Specifies which SGPR supplies T# (resource constant)
 809  * Operand(2): SOFFSET - SGPR to supply unsigned byte offset. (SGPR, M0, or inline constant)
 810  * Operand(3) / Definition(0): VDATA - Vector GPR for write result / read data
 811  *
 812  */
 813 struct MTBUF_instruction : public Instruction {
 814    uint16_t offset; /* Unsigned byte offset - 12 bit */
 815    uint8_t dfmt : 4; /* Data Format of data in memory buffer */
 816    uint8_t nfmt : 3; /* Numeric format of data in memory */
 817    bool offen : 1; /* Supply an offset from VGPR (VADDR) */
 818    bool idxen : 1; /* Supply an index from VGPR (VADDR) */
 819    bool glc : 1; /* globally coherent */
 820    bool dlc : 1; /* NAVI: device level coherent */
 821    bool slc : 1; /* system level coherent */
 822    bool tfe : 1; /* texture fail enable */
 823    bool disable_wqm : 1; /* Require an exec mask without helper invocations */
 824    bool can_reorder : 1;
 825    barrier_interaction barrier;
 826 };
 827
 828 /**
 829  * Vector Memory Image Instructions
 830  * Operand(0): VADDR - Address source. Can carry an offset or an index.
 831  * Operand(1): SRSRC - Scalar GPR that specifies the resource constant.
 832  * Operand(2): SSAMP - Scalar GPR that specifies sampler constant.
 833  * Operand(3) / Definition(0): VDATA - Vector GPR for read / write result.
 834  *
 835  */
 836 struct MIMG_instruction : public Instruction {
 837    uint8_t dmask; /* Data VGPR enable mask */
 838    uint8_t dim : 3; /* NAVI: dimensionality */
 839    bool unrm : 1; /* Force address to be un-normalized */
 840    bool dlc : 1; /* NAVI: device level coherent */
 841    bool glc : 1; /* globally coherent */
 842    bool slc : 1; /* system level coherent */
 843    bool tfe : 1; /* texture fail enable */
 844    bool da : 1; /* declare an array */
 845    bool lwe : 1; /* Force data to be un-normalized */
 846    bool r128 : 1; /* NAVI: Texture resource size */
 847    bool a16 : 1; /* VEGA, NAVI: Address components are 16-bits */
 848    bool d16 : 1; /* Convert 32-bit data to 16-bit data */
 849    bool disable_wqm : 1; /* Require an exec mask without helper invocations */
 850    bool can_reorder : 1;
 851    barrier_interaction barrier;
 852 };
 853
 854 /**
 855  * Flat/Scratch/Global Instructions
 856  * Operand(0): ADDR
 857  * Operand(1): SADDR
 858  * Operand(2) / Definition(0): DATA/VDST
 859  *
 860  */
 861 struct FLAT_instruction : public Instruction {
 862    uint16_t offset; /* Vega/Navi only */
 863    bool slc : 1; /* system level coherent */
 864    bool glc : 1; /* globally coherent */
 865    bool dlc : 1; /* NAVI: device level coherent */
 866    bool lds : 1;
 867    bool nv : 1;
 868    bool disable_wqm : 1; /* Require an exec mask without helper invocations */
 869    bool can_reorder : 1;
 870    barrier_interaction barrier;
 871 };
 872
 873 struct Export_instruction : public Instruction {
 874    uint8_t enabled_mask;
 875    uint8_t dest;
 876    bool compressed : 1;
 877    bool done : 1;
 878    bool valid_mask : 1;
 879 };
 880
 881 struct Pseudo_instruction : public Instruction {
 882    bool tmp_in_scc;
 883    PhysReg scratch_sgpr; /* might not be valid if it's not needed */
 884 };
 885
 886 struct Pseudo_branch_instruction : public Instruction {
 887    /* target[0] is the block index of the branch target.
 888     * For conditional branches, target[1] contains the fall-through alternative.
 889     * A value of 0 means the target has not been initialized (BB0 cannot be a branch target).
 890     */
 891    uint32_t target[2];
 892 };
 893
 894 struct Pseudo_barrier_instruction : public Instruction {
 895 };
 896
 897 enum ReduceOp {
 898    iadd32, iadd64,
 899    imul32, imul64,
 900    fadd32, fadd64,
 901    fmul32, fmul64,
 902    imin32, imin64,
 903    imax32, imax64,
 904    umin32, umin64,
 905    umax32, umax64,
 906    fmin32, fmin64,
 907    fmax32, fmax64,
 908    iand32, iand64,
 909    ior32, ior64,
 910    ixor32, ixor64,
 911    gfx10_wave64_bpermute
 912 };
 913
 914 /**
 915  * Subgroup Reduction Instructions, everything except for the data to be
 916  * reduced and the result as inserted by setup_reduce_temp().
 917  * Operand(0): data to be reduced
 918  * Operand(1): reduce temporary
 919  * Operand(2): vector temporary
 920  * Definition(0): result
 921  * Definition(1): scalar temporary
 922  * Definition(2): scalar identity temporary (not used to store identity on GFX10)
 923  * Definition(3): scc clobber
 924  * Definition(4): vcc clobber
 925  *
 926  */
 927 struct Pseudo_reduction_instruction : public Instruction {
 928    ReduceOp reduce_op;
 929    unsigned cluster_size; // must be 0 for scans
 930 };
 931
 932 struct instr_deleter_functor {
 933    void operator()(void* p) {
 934       free(p);
 935    }
 936 };
 937
 938 template<typename T>
 939 using aco_ptr = std::unique_ptr<T, instr_deleter_functor>;
 940
 941 template<typename T>
 942 T* create_instruction(aco_opcode opcode, Format format, uint32_t num_operands, uint32_t num_definitions)
 943 {
 944    std::size_t size = sizeof(T) + num_operands * sizeof(Operand) + num_definitions * sizeof(Definition);
 945    char *data = (char*) calloc(1, size);
 946    T* inst = (T*) data;
 947
 948    inst->opcode = opcode;
 949    inst->format = format;
 950
 951    uint16_t operands_offset = data + sizeof(T) - (char*)&inst->operands;
 952    inst->operands = aco::span<Operand>(operands_offset, num_operands);
 953    uint16_t definitions_offset = (char*)inst->operands.end() - (char*)&inst->definitions;
 954    inst->definitions = aco::span<Definition>(definitions_offset, num_definitions);
 955
 956    return inst;
 957 }
 958
 959 constexpr bool Instruction::usesModifiers() const noexcept
 960 {
 961    if (isDPP() || isSDWA())
 962       return true;
 963    if (!isVOP3())
 964       return false;
 965    const VOP3A_instruction *vop3 = static_cast<const VOP3A_instruction*>(this);
 966    for (unsigned i = 0; i < operands.size(); i++) {
 967       if (vop3->abs[i] || vop3->neg[i])
 968          return true;
 969    }
 970    return vop3->opsel || vop3->clamp || vop3->omod;
 971 }
 972
 973 constexpr bool is_phi(Instruction* instr)
 974 {
 975    return instr->opcode == aco_opcode::p_phi || instr->opcode == aco_opcode::p_linear_phi;
 976 }
 977
 978 static inline bool is_phi(aco_ptr<Instruction>& instr)
 979 {
 980    return is_phi(instr.get());
 981 }
 982
 983 barrier_interaction get_barrier_interaction(Instruction* instr);
 984
 985 bool is_dead(const std::vector<uint16_t>& uses, Instruction *instr);
 986
 987 enum block_kind {
 988    /* uniform indicates that leaving this block,
 989     * all actives lanes stay active */
 990    block_kind_uniform = 1 << 0,
 991    block_kind_top_level = 1 << 1,
 992    block_kind_loop_preheader = 1 << 2,
 993    block_kind_loop_header = 1 << 3,
 994    block_kind_loop_exit = 1 << 4,
 995    block_kind_continue = 1 << 5,
 996    block_kind_break = 1 << 6,
 997    block_kind_continue_or_break = 1 << 7,
 998    block_kind_discard = 1 << 8,
 999    block_kind_branch = 1 << 9,
1000    block_kind_merge = 1 << 10,
1001    block_kind_invert = 1 << 11,
1002    block_kind_uses_discard_if = 1 << 12,
1003    block_kind_needs_lowering = 1 << 13,
1004    block_kind_uses_demote = 1 << 14,
1005    block_kind_export_end = 1 << 15,
1006 };
1007
1008
1009 struct RegisterDemand {
1010    constexpr RegisterDemand() = default;
1011    constexpr RegisterDemand(const int16_t v, const int16_t s) noexcept
1012       : vgpr{v}, sgpr{s} {}
1013    int16_t vgpr = 0;
1014    int16_t sgpr = 0;
1015
1016    constexpr friend bool operator==(const RegisterDemand a, const RegisterDemand b) noexcept {
1017       return a.vgpr == b.vgpr && a.sgpr == b.sgpr;
1018    }
1019
1020    constexpr bool exceeds(const RegisterDemand other) const noexcept {
1021       return vgpr > other.vgpr || sgpr > other.sgpr;
1022    }
1023
1024    constexpr RegisterDemand operator+(const Temp t) const noexcept {
1025       if (t.type() == RegType::sgpr)
1026          return RegisterDemand( vgpr, sgpr + t.size() );
1027       else
1028          return RegisterDemand( vgpr + t.size(), sgpr );
1029    }
1030
1031    constexpr RegisterDemand operator+(const RegisterDemand other) const noexcept {
1032       return RegisterDemand(vgpr + other.vgpr, sgpr + other.sgpr);
1033    }
1034
1035    constexpr RegisterDemand operator-(const RegisterDemand other) const noexcept {
1036       return RegisterDemand(vgpr - other.vgpr, sgpr - other.sgpr);
1037    }
1038
1039    constexpr RegisterDemand& operator+=(const RegisterDemand other) noexcept {
1040       vgpr += other.vgpr;
1041       sgpr += other.sgpr;
1042       return *this;
1043    }
1044
1045    constexpr RegisterDemand& operator-=(const RegisterDemand other) noexcept {
1046       vgpr -= other.vgpr;
1047       sgpr -= other.sgpr;
1048       return *this;
1049    }
1050
1051    constexpr RegisterDemand& operator+=(const Temp t) noexcept {
1052       if (t.type() == RegType::sgpr)
1053          sgpr += t.size();
1054       else
1055          vgpr += t.size();
1056       return *this;
1057    }
1058
1059    constexpr RegisterDemand& operator-=(const Temp t) noexcept {
1060       if (t.type() == RegType::sgpr)
1061          sgpr -= t.size();
1062       else
1063          vgpr -= t.size();
1064       return *this;
1065    }
1066
1067    constexpr void update(const RegisterDemand other) noexcept {
1068       vgpr = std::max(vgpr, other.vgpr);
1069       sgpr = std::max(sgpr, other.sgpr);
1070    }
1071
1072 };
1073
1074 /* CFG */
1075 struct Block {
1076    float_mode fp_mode;
1077    unsigned index;
1078    unsigned offset = 0;
1079    std::vector<aco_ptr<Instruction>> instructions;
1080    std::vector<unsigned> logical_preds;
1081    std::vector<unsigned> linear_preds;
1082    std::vector<unsigned> logical_succs;
1083    std::vector<unsigned> linear_succs;
1084    RegisterDemand register_demand = RegisterDemand();
1085    uint16_t loop_nest_depth = 0;
1086    uint16_t kind = 0;
1087    int logical_idom = -1;
1088    int linear_idom = -1;
1089    Temp live_out_exec = Temp();
1090
1091    /* this information is needed for predecessors to blocks with phis when
1092     * moving out of ssa */
1093    bool scc_live_out = false;
1094    PhysReg scratch_sgpr = PhysReg(); /* only needs to be valid if scc_live_out != false */
1095
1096    Block(unsigned idx) : index(idx) {}
1097    Block() : index(0) {}
1098 };
1099
1100 using Stage = uint16_t;
1101
1102 /* software stages */
1103 static constexpr Stage sw_vs = 1 << 0;
1104 static constexpr Stage sw_gs = 1 << 1;
1105 static constexpr Stage sw_tcs = 1 << 2;
1106 static constexpr Stage sw_tes = 1 << 3;
1107 static constexpr Stage sw_fs = 1 << 4;
1108 static constexpr Stage sw_cs = 1 << 5;
1109 static constexpr Stage sw_gs_copy = 1 << 6;
1110 static constexpr Stage sw_mask = 0x7f;
1111
1112 /* hardware stages (can't be OR'd, just a mask for convenience when testing multiple) */
1113 static constexpr Stage hw_vs = 1 << 7;
1114 static constexpr Stage hw_es = 1 << 8; /* not on GFX9. combined into GS on GFX9 (and GFX10/legacy). */
1115 static constexpr Stage hw_gs = 1 << 9;
1116 static constexpr Stage hw_ls = 1 << 10; /* not on GFX9. combined into HS on GFX9 (and GFX10/legacy). */
1117 static constexpr Stage hw_hs = 1 << 11;
1118 static constexpr Stage hw_fs = 1 << 12;
1119 static constexpr Stage hw_cs = 1 << 13;
1120 static constexpr Stage hw_mask = 0x7f << 7;
1121
1122 /* possible settings of Program::stage */
1123 static constexpr Stage vertex_vs = sw_vs | hw_vs;
1124 static constexpr Stage fragment_fs = sw_fs | hw_fs;
1125 static constexpr Stage compute_cs = sw_cs | hw_cs;
1126 static constexpr Stage tess_eval_vs = sw_tes | hw_vs;
1127 static constexpr Stage gs_copy_vs = sw_gs_copy | hw_vs;
1128 /* GFX10/NGG */
1129 static constexpr Stage ngg_vertex_gs = sw_vs | hw_gs;
1130 static constexpr Stage ngg_vertex_geometry_gs = sw_vs | sw_gs | hw_gs;
1131 static constexpr Stage ngg_tess_eval_geometry_gs = sw_tes | sw_gs | hw_gs;
1132 static constexpr Stage ngg_vertex_tess_control_hs = sw_vs | sw_tcs | hw_hs;
1133 /* GFX9 (and GFX10 if NGG isn't used) */
1134 static constexpr Stage vertex_geometry_gs = sw_vs | sw_gs | hw_gs;
1135 static constexpr Stage vertex_tess_control_hs = sw_vs | sw_tcs | hw_hs;
1136 static constexpr Stage tess_eval_geometry_gs = sw_tes | sw_gs | hw_gs;
1137 /* pre-GFX9 */
1138 static constexpr Stage vertex_ls = sw_vs | hw_ls; /* vertex before tesselation control */
1139 static constexpr Stage vertex_es = sw_vs | hw_es; /* vertex before geometry */
1140 static constexpr Stage tess_control_hs = sw_tcs | hw_hs;
1141 static constexpr Stage tess_eval_es = sw_tes | hw_gs; /* tesselation evaluation before geometry */
1142 static constexpr Stage geometry_gs = sw_gs | hw_gs;
1143
1144 class Program final {
1145 public:
1146    float_mode next_fp_mode;
1147    std::vector<Block> blocks;
1148    RegisterDemand max_reg_demand = RegisterDemand();
1149    uint16_t num_waves = 0;
1150    uint16_t max_waves = 0; /* maximum number of waves, regardless of register usage */
1151    ac_shader_config* config;
1152    struct radv_shader_info *info;
1153    enum chip_class chip_class;
1154    enum radeon_family family;
1155    unsigned wave_size;
1156    RegClass lane_mask;
1157    Stage stage; /* Stage */
1158    bool needs_exact = false; /* there exists an instruction with disable_wqm = true */
1159    bool needs_wqm = false; /* there exists a p_wqm instruction */
1160    bool wb_smem_l1_on_end = false;
1161
1162    std::vector<uint8_t> constant_data;
1163    Temp private_segment_buffer;
1164    Temp scratch_offset;
1165
1166    uint16_t min_waves = 0;
1167    uint16_t lds_alloc_granule;
1168    uint32_t lds_limit; /* in bytes */
1169    uint16_t vgpr_limit;
1170    uint16_t sgpr_limit;
1171    uint16_t physical_sgprs;
1172    uint16_t sgpr_alloc_granule; /* minus one. must be power of two */
1173    uint16_t vgpr_alloc_granule; /* minus one. must be power of two */
1174
1175    bool needs_vcc = false;
1176    bool needs_xnack_mask = false;
1177    bool needs_flat_scr = false;
1178
1179    uint32_t allocateId()
1180    {
1181       assert(allocationID <= 16777215);
1182       return allocationID++;
1183    }
1184
1185    uint32_t peekAllocationId()
1186    {
1187       return allocationID;
1188    }
1189
1190    void setAllocationId(uint32_t id)
1191    {
1192       allocationID = id;
1193    }
1194
1195    Block* create_and_insert_block() {
1196       blocks.emplace_back(blocks.size());
1197       blocks.back().fp_mode = next_fp_mode;
1198       return &blocks.back();
1199    }
1200
1201    Block* insert_block(Block&& block) {
1202       block.index = blocks.size();
1203       block.fp_mode = next_fp_mode;
1204       blocks.emplace_back(std::move(block));
1205       return &blocks.back();
1206    }
1207
1208 private:
1209    uint32_t allocationID = 1;
1210 };
1211
1212 struct live {
1213    /* live temps out per block */
1214    std::vector<std::set<Temp>> live_out;
1215    /* register demand (sgpr/vgpr) per instruction per block */
1216    std::vector<std::vector<RegisterDemand>> register_demand;
1217 };
1218
1219 void select_program(Program *program,
1220                     unsigned shader_count,
1221                     struct nir_shader *const *shaders,
1222                     ac_shader_config* config,
1223                     struct radv_shader_args *args);
1224 void select_gs_copy_shader(Program *program, struct nir_shader *gs_shader,
1225                            ac_shader_config* config,
1226                            struct radv_shader_args *args);
1227
1228 void lower_wqm(Program* program, live& live_vars,
1229                const struct radv_nir_compiler_options *options);
1230 void lower_bool_phis(Program* program);
1231 void calc_min_waves(Program* program);
1232 void update_vgpr_sgpr_demand(Program* program, const RegisterDemand new_demand);
1233 live live_var_analysis(Program* program, const struct radv_nir_compiler_options *options);
1234 std::vector<uint16_t> dead_code_analysis(Program *program);
1235 void dominator_tree(Program* program);
1236 void insert_exec_mask(Program *program);
1237 void value_numbering(Program* program);
1238 void optimize(Program* program);
1239 void setup_reduce_temp(Program* program);
1240 void lower_to_cssa(Program* program, live& live_vars, const struct radv_nir_compiler_options *options);
1241 void register_allocation(Program *program, std::vector<std::set<Temp>> live_out_per_block);
1242 void ssa_elimination(Program* program);
1243 void lower_to_hw_instr(Program* program);
1244 void schedule_program(Program* program, live& live_vars);
1245 void spill(Program* program, live& live_vars, const struct radv_nir_compiler_options *options);
1246 void insert_wait_states(Program* program);
1247 void insert_NOPs(Program* program);
1248 unsigned emit_program(Program* program, std::vector<uint32_t>& code);
1249 void print_asm(Program *program, std::vector<uint32_t>& binary,
1250                unsigned exec_size, std::ostream& out);
1251 void validate(Program* program, FILE *output);
1252 bool validate_ra(Program* program, const struct radv_nir_compiler_options *options, FILE *output);
1253 #ifndef NDEBUG
1254 void perfwarn(bool cond, const char *msg, Instruction *instr=NULL);
1255 #else
1256 #define perfwarn(program, cond, msg, ...) do {} while(0)
1257 #endif
1258
1259 void aco_print_instr(Instruction *instr, FILE *output);
1260 void aco_print_program(Program *program, FILE *output);
1261
1262 /* number of sgprs that need to be allocated but might notbe addressable as s0-s105 */
1263 uint16_t get_extra_sgprs(Program *program);
1264
1265 /* get number of sgprs/vgprs allocated required to address a number of sgprs/vgprs */
1266 uint16_t get_sgpr_alloc(Program *program, uint16_t addressable_sgprs);
1267 uint16_t get_vgpr_alloc(Program *program, uint16_t addressable_vgprs);
1268
1269 /* return number of addressable sgprs/vgprs for max_waves */
1270 uint16_t get_addr_sgpr_from_waves(Program *program, uint16_t max_waves);
1271 uint16_t get_addr_vgpr_from_waves(Program *program, uint16_t max_waves);
1272
1273 typedef struct {
1274    const int16_t opcode_gfx7[static_cast<int>(aco_opcode::num_opcodes)];
1275    const int16_t opcode_gfx9[static_cast<int>(aco_opcode::num_opcodes)];
1276    const int16_t opcode_gfx10[static_cast<int>(aco_opcode::num_opcodes)];
1277    const std::bitset<static_cast<int>(aco_opcode::num_opcodes)> can_use_input_modifiers;
1278    const std::bitset<static_cast<int>(aco_opcode::num_opcodes)> can_use_output_modifiers;
1279    const std::bitset<static_cast<int>(aco_opcode::num_opcodes)> is_atomic;
1280    const char *name[static_cast<int>(aco_opcode::num_opcodes)];
1281    const aco::Format format[static_cast<int>(aco_opcode::num_opcodes)];
1282 } Info;
1283
1284 extern const Info instr_info;
1285
1286 }
1287
1288 #endif /* ACO_IR_H */
1289