src/amd/compiler/aco_ir.h

   1 /*
   2  * Copyright © 2018 Valve Corporation
   3  *
   4  * Permission is hereby granted, free of charge, to any person obtaining a
   5  * copy of this software and associated documentation files (the "Software"),
   6  * to deal in the Software without restriction, including without limitation
   7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
   8  * and/or sell copies of the Software, and to permit persons to whom the
   9  * Software is furnished to do so, subject to the following conditions:
  10  *
  11  * The above copyright notice and this permission notice (including the next
  12  * paragraph) shall be included in all copies or substantial portions of the
  13  * Software.
  14  *
  15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  17  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
  18  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
  19  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
  20  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
  21  * IN THE SOFTWARE.
  22  *
  23  */
  24
  25 #ifndef ACO_IR_H
  26 #define ACO_IR_H
  27
  28 #include <vector>
  29 #include <set>
  30 #include <bitset>
  31 #include <memory>
  32
  33 #include "nir.h"
  34 #include "ac_binary.h"
  35 #include "amd_family.h"
  36 #include "aco_opcodes.h"
  37 #include "aco_util.h"
  38
  39 struct radv_nir_compiler_options;
  40 struct radv_shader_args;
  41 struct radv_shader_info;
  42
  43 namespace aco {
  44
  45 extern uint64_t debug_flags;
  46
  47 enum {
  48    DEBUG_VALIDATE = 0x1,
  49    DEBUG_VALIDATE_RA = 0x2,
  50    DEBUG_PERFWARN = 0x4,
  51 };
  52
  53 /**
  54  * Representation of the instruction's microcode encoding format
  55  * Note: Some Vector ALU Formats can be combined, such that:
  56  * - VOP2* | VOP3A represents a VOP2 instruction in VOP3A encoding
  57  * - VOP2* | DPP represents a VOP2 instruction with data parallel primitive.
  58  * - VOP2* | SDWA represents a VOP2 instruction with sub-dword addressing.
  59  *
  60  * (*) The same is applicable for VOP1 and VOPC instructions.
  61  */
  62 enum class Format : std::uint16_t {
  63    /* Pseudo Instruction Format */
  64    PSEUDO = 0,
  65    /* Scalar ALU & Control Formats */
  66    SOP1 = 1,
  67    SOP2 = 2,
  68    SOPK = 3,
  69    SOPP = 4,
  70    SOPC = 5,
  71    /* Scalar Memory Format */
  72    SMEM = 6,
  73    /* LDS/GDS Format */
  74    DS = 8,
  75    /* Vector Memory Buffer Formats */
  76    MTBUF = 9,
  77    MUBUF = 10,
  78    /* Vector Memory Image Format */
  79    MIMG = 11,
  80    /* Export Format */
  81    EXP = 12,
  82    /* Flat Formats */
  83    FLAT = 13,
  84    GLOBAL = 14,
  85    SCRATCH = 15,
  86
  87    PSEUDO_BRANCH = 16,
  88    PSEUDO_BARRIER = 17,
  89    PSEUDO_REDUCTION = 18,
  90
  91    /* Vector ALU Formats */
  92    VOP1 = 1 << 8,
  93    VOP2 = 1 << 9,
  94    VOPC = 1 << 10,
  95    VOP3 = 1 << 11,
  96    VOP3A = 1 << 11,
  97    VOP3B = 1 << 11,
  98    VOP3P = 1 << 12,
  99    /* Vector Parameter Interpolation Format */
 100    VINTRP = 1 << 13,
 101    DPP = 1 << 14,
 102    SDWA = 1 << 15,
 103 };
 104
 105 enum barrier_interaction : uint8_t {
 106    barrier_none = 0,
 107    barrier_buffer = 0x1,
 108    barrier_image = 0x2,
 109    barrier_atomic = 0x4,
 110    barrier_shared = 0x8,
 111    /* used for geometry shaders to ensure vertex data writes are before the
 112     * GS_DONE s_sendmsg. */
 113    barrier_gs_data = 0x10,
 114    /* used for geometry shaders to ensure s_sendmsg instructions are in-order. */
 115    barrier_gs_sendmsg = 0x20,
 116    barrier_count = 6,
 117 };
 118
 119 enum fp_round {
 120    fp_round_ne = 0,
 121    fp_round_pi = 1,
 122    fp_round_ni = 2,
 123    fp_round_tz = 3,
 124 };
 125
 126 enum fp_denorm {
 127    /* Note that v_rcp_f32, v_exp_f32, v_log_f32, v_sqrt_f32, v_rsq_f32 and
 128     * v_mad_f32/v_madak_f32/v_madmk_f32/v_mac_f32 always flush denormals. */
 129    fp_denorm_flush = 0x0,
 130    fp_denorm_keep = 0x3,
 131 };
 132
 133 struct float_mode {
 134    /* matches encoding of the MODE register */
 135    union {
 136       struct {
 137           fp_round round32:2;
 138           fp_round round16_64:2;
 139           unsigned denorm32:2;
 140           unsigned denorm16_64:2;
 141       };
 142       uint8_t val = 0;
 143    };
 144    /* if false, optimizations which may remove infs/nan/-0.0 can be done */
 145    bool preserve_signed_zero_inf_nan32:1;
 146    bool preserve_signed_zero_inf_nan16_64:1;
 147    /* if false, optimizations which may remove denormal flushing can be done */
 148    bool must_flush_denorms32:1;
 149    bool must_flush_denorms16_64:1;
 150    bool care_about_round32:1;
 151    bool care_about_round16_64:1;
 152
 153    /* Returns true if instructions using the mode "other" can safely use the
 154     * current one instead. */
 155    bool canReplace(float_mode other) const noexcept {
 156       return val == other.val &&
 157              (preserve_signed_zero_inf_nan32 || !other.preserve_signed_zero_inf_nan32) &&
 158              (preserve_signed_zero_inf_nan16_64 || !other.preserve_signed_zero_inf_nan16_64) &&
 159              (must_flush_denorms32  || !other.must_flush_denorms32) &&
 160              (must_flush_denorms16_64 || !other.must_flush_denorms16_64) &&
 161              (care_about_round32 || !other.care_about_round32) &&
 162              (care_about_round16_64 || !other.care_about_round16_64);
 163    }
 164 };
 165
 166 constexpr Format asVOP3(Format format) {
 167    return (Format) ((uint32_t) Format::VOP3 | (uint32_t) format);
 168 };
 169
 170 enum class RegType {
 171    none = 0,
 172    sgpr,
 173    vgpr,
 174    linear_vgpr,
 175 };
 176
 177 struct RegClass {
 178
 179    enum RC : uint8_t {
 180       s1 = 1,
 181       s2 = 2,
 182       s3 = 3,
 183       s4 = 4,
 184       s6 = 6,
 185       s8 = 8,
 186       s16 = 16,
 187       v1 = s1 | (1 << 5),
 188       v2 = s2 | (1 << 5),
 189       v3 = s3 | (1 << 5),
 190       v4 = s4 | (1 << 5),
 191       v5 = 5  | (1 << 5),
 192       v6 = 6  | (1 << 5),
 193       v7 = 7  | (1 << 5),
 194       v8 = 8  | (1 << 5),
 195       /* these are used for WWM and spills to vgpr */
 196       v1_linear = v1 | (1 << 6),
 197       v2_linear = v2 | (1 << 6),
 198    };
 199
 200    RegClass() = default;
 201    constexpr RegClass(RC rc)
 202       : rc(rc) {}
 203    constexpr RegClass(RegType type, unsigned size)
 204       : rc((RC) ((type == RegType::vgpr ? 1 << 5 : 0) | size)) {}
 205
 206    constexpr operator RC() const { return rc; }
 207    explicit operator bool() = delete;
 208
 209    constexpr RegType type() const { return rc <= RC::s16 ? RegType::sgpr : RegType::vgpr; }
 210    constexpr unsigned size() const { return (unsigned) rc & 0x1F; }
 211    constexpr bool is_linear() const { return rc <= RC::s16 || rc & (1 << 6); }
 212    constexpr RegClass as_linear() const { return RegClass((RC) (rc | (1 << 6))); }
 213
 214 private:
 215    RC rc;
 216 };
 217
 218 /* transitional helper expressions */
 219 static constexpr RegClass s1{RegClass::s1};
 220 static constexpr RegClass s2{RegClass::s2};
 221 static constexpr RegClass s3{RegClass::s3};
 222 static constexpr RegClass s4{RegClass::s4};
 223 static constexpr RegClass s8{RegClass::s8};
 224 static constexpr RegClass s16{RegClass::s16};
 225 static constexpr RegClass v1{RegClass::v1};
 226 static constexpr RegClass v2{RegClass::v2};
 227 static constexpr RegClass v3{RegClass::v3};
 228 static constexpr RegClass v4{RegClass::v4};
 229 static constexpr RegClass v5{RegClass::v5};
 230 static constexpr RegClass v6{RegClass::v6};
 231 static constexpr RegClass v7{RegClass::v7};
 232 static constexpr RegClass v8{RegClass::v8};
 233
 234 /**
 235  * Temp Class
 236  * Each temporary virtual register has a
 237  * register class (i.e. size and type)
 238  * and SSA id.
 239  */
 240 struct Temp {
 241    Temp() = default;
 242    constexpr Temp(uint32_t id, RegClass cls) noexcept
 243       : id_(id), reg_class(cls) {}
 244
 245    constexpr uint32_t id() const noexcept { return id_; }
 246    constexpr RegClass regClass() const noexcept { return reg_class; }
 247
 248    constexpr unsigned size() const noexcept { return reg_class.size(); }
 249    constexpr RegType type() const noexcept { return reg_class.type(); }
 250    constexpr bool is_linear() const noexcept { return reg_class.is_linear(); }
 251
 252    constexpr bool operator <(Temp other) const noexcept { return id() < other.id(); }
 253    constexpr bool operator==(Temp other) const noexcept { return id() == other.id(); }
 254    constexpr bool operator!=(Temp other) const noexcept { return id() != other.id(); }
 255
 256 private:
 257    uint32_t id_:24;
 258    RegClass reg_class;
 259 };
 260
 261 /**
 262  * PhysReg
 263  * Represents the physical register for each
 264  * Operand and Definition.
 265  */
 266 struct PhysReg {
 267    constexpr PhysReg() = default;
 268    explicit constexpr PhysReg(unsigned r) : reg(r) {}
 269    constexpr operator unsigned() const { return reg; }
 270
 271    uint16_t reg = 0;
 272 };
 273
 274 /* helper expressions for special registers */
 275 static constexpr PhysReg m0{124};
 276 static constexpr PhysReg vcc{106};
 277 static constexpr PhysReg vcc_hi{107};
 278 static constexpr PhysReg sgpr_null{125}; /* GFX10+ */
 279 static constexpr PhysReg exec{126};
 280 static constexpr PhysReg exec_lo{126};
 281 static constexpr PhysReg exec_hi{127};
 282 static constexpr PhysReg vccz{251};
 283 static constexpr PhysReg execz{252};
 284 static constexpr PhysReg scc{253};
 285
 286 /**
 287  * Operand Class
 288  * Initially, each Operand refers to either
 289  * a temporary virtual register
 290  * or to a constant value
 291  * Temporary registers get mapped to physical register during RA
 292  * Constant values are inlined into the instruction sequence.
 293  */
 294 class Operand final
 295 {
 296 public:
 297    constexpr Operand()
 298       : reg_(PhysReg{128}), isTemp_(false), isFixed_(true), isConstant_(false),
 299         isKill_(false), isUndef_(true), isFirstKill_(false), is64BitConst_(false) {}
 300
 301    explicit Operand(Temp r) noexcept
 302    {
 303       data_.temp = r;
 304       if (r.id()) {
 305          isTemp_ = true;
 306       } else {
 307          isUndef_ = true;
 308          setFixed(PhysReg{128});
 309       }
 310    };
 311    explicit Operand(uint32_t v, bool is64bit = false) noexcept
 312    {
 313       data_.i = v;
 314       isConstant_ = true;
 315       is64BitConst_ = is64bit;
 316       if (v <= 64)
 317          setFixed(PhysReg{128 + v});
 318       else if (v >= 0xFFFFFFF0) /* [-16 .. -1] */
 319          setFixed(PhysReg{192 - v});
 320       else if (v == 0x3f000000) /* 0.5 */
 321          setFixed(PhysReg{240});
 322       else if (v == 0xbf000000) /* -0.5 */
 323          setFixed(PhysReg{241});
 324       else if (v == 0x3f800000) /* 1.0 */
 325          setFixed(PhysReg{242});
 326       else if (v == 0xbf800000) /* -1.0 */
 327          setFixed(PhysReg{243});
 328       else if (v == 0x40000000) /* 2.0 */
 329          setFixed(PhysReg{244});
 330       else if (v == 0xc0000000) /* -2.0 */
 331          setFixed(PhysReg{245});
 332       else if (v == 0x40800000) /* 4.0 */
 333          setFixed(PhysReg{246});
 334       else if (v == 0xc0800000) /* -4.0 */
 335          setFixed(PhysReg{247});
 336       else { /* Literal Constant */
 337          assert(!is64bit && "attempt to create a 64-bit literal constant");
 338          setFixed(PhysReg{255});
 339       }
 340    };
 341    explicit Operand(uint64_t v) noexcept
 342    {
 343       isConstant_ = true;
 344       is64BitConst_ = true;
 345       if (v <= 64) {
 346          data_.i = (uint32_t) v;
 347          setFixed(PhysReg{128 + (uint32_t) v});
 348       } else if (v >= 0xFFFFFFFFFFFFFFF0) { /* [-16 .. -1] */
 349          data_.i = (uint32_t) v;
 350          setFixed(PhysReg{192 - (uint32_t) v});
 351       } else if (v == 0x3FE0000000000000) { /* 0.5 */
 352          data_.i = 0x3f000000;
 353          setFixed(PhysReg{240});
 354       } else if (v == 0xBFE0000000000000) { /* -0.5 */
 355          data_.i = 0xbf000000;
 356          setFixed(PhysReg{241});
 357       } else if (v == 0x3FF0000000000000) { /* 1.0 */
 358          data_.i = 0x3f800000;
 359          setFixed(PhysReg{242});
 360       } else if (v == 0xBFF0000000000000) { /* -1.0 */
 361          data_.i = 0xbf800000;
 362          setFixed(PhysReg{243});
 363       } else if (v == 0x4000000000000000) { /* 2.0 */
 364          data_.i = 0x40000000;
 365          setFixed(PhysReg{244});
 366       } else if (v == 0xC000000000000000) { /* -2.0 */
 367          data_.i = 0xc0000000;
 368          setFixed(PhysReg{245});
 369       } else if (v == 0x4010000000000000) { /* 4.0 */
 370          data_.i = 0x40800000;
 371          setFixed(PhysReg{246});
 372       } else if (v == 0xC010000000000000) { /* -4.0 */
 373          data_.i = 0xc0800000;
 374          setFixed(PhysReg{247});
 375       } else { /* Literal Constant: we don't know if it is a long or double.*/
 376          isConstant_ = 0;
 377          assert(false && "attempt to create a 64-bit literal constant");
 378       }
 379    };
 380    explicit Operand(RegClass type) noexcept
 381    {
 382       isUndef_ = true;
 383       data_.temp = Temp(0, type);
 384       setFixed(PhysReg{128});
 385    };
 386    explicit Operand(PhysReg reg, RegClass type) noexcept
 387    {
 388       data_.temp = Temp(0, type);
 389       setFixed(reg);
 390    }
 391
 392    constexpr bool isTemp() const noexcept
 393    {
 394       return isTemp_;
 395    }
 396
 397    constexpr void setTemp(Temp t) noexcept {
 398       assert(!isConstant_);
 399       isTemp_ = true;
 400       data_.temp = t;
 401    }
 402
 403    constexpr Temp getTemp() const noexcept
 404    {
 405       return data_.temp;
 406    }
 407
 408    constexpr uint32_t tempId() const noexcept
 409    {
 410       return data_.temp.id();
 411    }
 412
 413    constexpr bool hasRegClass() const noexcept
 414    {
 415       return isTemp() || isUndefined();
 416    }
 417
 418    constexpr RegClass regClass() const noexcept
 419    {
 420       return data_.temp.regClass();
 421    }
 422
 423    constexpr unsigned size() const noexcept
 424    {
 425       if (isConstant())
 426          return is64BitConst_ ? 2 : 1;
 427       else
 428          return data_.temp.size();
 429    }
 430
 431    constexpr bool isFixed() const noexcept
 432    {
 433       return isFixed_;
 434    }
 435
 436    constexpr PhysReg physReg() const noexcept
 437    {
 438       return reg_;
 439    }
 440
 441    constexpr void setFixed(PhysReg reg) noexcept
 442    {
 443       isFixed_ = reg != unsigned(-1);
 444       reg_ = reg;
 445    }
 446
 447    constexpr bool isConstant() const noexcept
 448    {
 449       return isConstant_;
 450    }
 451
 452    constexpr bool isLiteral() const noexcept
 453    {
 454       return isConstant() && reg_ == 255;
 455    }
 456
 457    constexpr bool isUndefined() const noexcept
 458    {
 459       return isUndef_;
 460    }
 461
 462    constexpr uint32_t constantValue() const noexcept
 463    {
 464       return data_.i;
 465    }
 466
 467    constexpr bool constantEquals(uint32_t cmp) const noexcept
 468    {
 469       return isConstant() && constantValue() == cmp;
 470    }
 471
 472    constexpr void setKill(bool flag) noexcept
 473    {
 474       isKill_ = flag;
 475       if (!flag)
 476          setFirstKill(false);
 477    }
 478
 479    constexpr bool isKill() const noexcept
 480    {
 481       return isKill_ || isFirstKill();
 482    }
 483
 484    constexpr void setFirstKill(bool flag) noexcept
 485    {
 486       isFirstKill_ = flag;
 487       if (flag)
 488          setKill(flag);
 489    }
 490
 491    /* When there are multiple operands killing the same temporary,
 492     * isFirstKill() is only returns true for the first one. */
 493    constexpr bool isFirstKill() const noexcept
 494    {
 495       return isFirstKill_;
 496    }
 497
 498 private:
 499    union {
 500       uint32_t i;
 501       float f;
 502       Temp temp = Temp(0, s1);
 503    } data_;
 504    PhysReg reg_;
 505    union {
 506       struct {
 507          uint8_t isTemp_:1;
 508          uint8_t isFixed_:1;
 509          uint8_t isConstant_:1;
 510          uint8_t isKill_:1;
 511          uint8_t isUndef_:1;
 512          uint8_t isFirstKill_:1;
 513          uint8_t is64BitConst_:1;
 514       };
 515       /* can't initialize bit-fields in c++11, so work around using a union */
 516       uint8_t control_ = 0;
 517    };
 518 };
 519
 520 /**
 521  * Definition Class
 522  * Definitions are the results of Instructions
 523  * and refer to temporary virtual registers
 524  * which are later mapped to physical registers
 525  */
 526 class Definition final
 527 {
 528 public:
 529    constexpr Definition() : temp(Temp(0, s1)), reg_(0), isFixed_(0), hasHint_(0), isKill_(0) {}
 530    Definition(uint32_t index, RegClass type) noexcept
 531       : temp(index, type) {}
 532    explicit Definition(Temp tmp) noexcept
 533       : temp(tmp) {}
 534    Definition(PhysReg reg, RegClass type) noexcept
 535       : temp(Temp(0, type))
 536    {
 537       setFixed(reg);
 538    }
 539    Definition(uint32_t tmpId, PhysReg reg, RegClass type) noexcept
 540       : temp(Temp(tmpId, type))
 541    {
 542       setFixed(reg);
 543    }
 544
 545    constexpr bool isTemp() const noexcept
 546    {
 547       return tempId() > 0;
 548    }
 549
 550    constexpr Temp getTemp() const noexcept
 551    {
 552       return temp;
 553    }
 554
 555    constexpr uint32_t tempId() const noexcept
 556    {
 557       return temp.id();
 558    }
 559
 560    constexpr void setTemp(Temp t) noexcept {
 561       temp = t;
 562    }
 563
 564    constexpr RegClass regClass() const noexcept
 565    {
 566       return temp.regClass();
 567    }
 568
 569    constexpr unsigned size() const noexcept
 570    {
 571       return temp.size();
 572    }
 573
 574    constexpr bool isFixed() const noexcept
 575    {
 576       return isFixed_;
 577    }
 578
 579    constexpr PhysReg physReg() const noexcept
 580    {
 581       return reg_;
 582    }
 583
 584    constexpr void setFixed(PhysReg reg) noexcept
 585    {
 586       isFixed_ = 1;
 587       reg_ = reg;
 588    }
 589
 590    constexpr void setHint(PhysReg reg) noexcept
 591    {
 592       hasHint_ = 1;
 593       reg_ = reg;
 594    }
 595
 596    constexpr bool hasHint() const noexcept
 597    {
 598       return hasHint_;
 599    }
 600
 601    constexpr void setKill(bool flag) noexcept
 602    {
 603       isKill_ = flag;
 604    }
 605
 606    constexpr bool isKill() const noexcept
 607    {
 608       return isKill_;
 609    }
 610
 611 private:
 612    Temp temp = Temp(0, s1);
 613    PhysReg reg_;
 614    union {
 615       struct {
 616          uint8_t isFixed_:1;
 617          uint8_t hasHint_:1;
 618          uint8_t isKill_:1;
 619       };
 620       /* can't initialize bit-fields in c++11, so work around using a union */
 621       uint8_t control_ = 0;
 622    };
 623 };
 624
 625 class Block;
 626
 627 struct Instruction {
 628    aco_opcode opcode;
 629    Format format;
 630    uint32_t pass_flags;
 631
 632    aco::span<Operand> operands;
 633    aco::span<Definition> definitions;
 634
 635    constexpr bool isVALU() const noexcept
 636    {
 637       return ((uint16_t) format & (uint16_t) Format::VOP1) == (uint16_t) Format::VOP1
 638           || ((uint16_t) format & (uint16_t) Format::VOP2) == (uint16_t) Format::VOP2
 639           || ((uint16_t) format & (uint16_t) Format::VOPC) == (uint16_t) Format::VOPC
 640           || ((uint16_t) format & (uint16_t) Format::VOP3A) == (uint16_t) Format::VOP3A
 641           || ((uint16_t) format & (uint16_t) Format::VOP3B) == (uint16_t) Format::VOP3B
 642           || ((uint16_t) format & (uint16_t) Format::VOP3P) == (uint16_t) Format::VOP3P;
 643    }
 644
 645    constexpr bool isSALU() const noexcept
 646    {
 647       return format == Format::SOP1 ||
 648              format == Format::SOP2 ||
 649              format == Format::SOPC ||
 650              format == Format::SOPK ||
 651              format == Format::SOPP;
 652    }
 653
 654    constexpr bool isVMEM() const noexcept
 655    {
 656       return format == Format::MTBUF ||
 657              format == Format::MUBUF ||
 658              format == Format::MIMG;
 659    }
 660
 661    constexpr bool isDPP() const noexcept
 662    {
 663       return (uint16_t) format & (uint16_t) Format::DPP;
 664    }
 665
 666    constexpr bool isVOP3() const noexcept
 667    {
 668       return ((uint16_t) format & (uint16_t) Format::VOP3A) ||
 669              ((uint16_t) format & (uint16_t) Format::VOP3B) ||
 670              format == Format::VOP3P;
 671    }
 672
 673    constexpr bool isSDWA() const noexcept
 674    {
 675       return (uint16_t) format & (uint16_t) Format::SDWA;
 676    }
 677
 678    constexpr bool isFlatOrGlobal() const noexcept
 679    {
 680       return format == Format::FLAT || format == Format::GLOBAL;
 681    }
 682
 683    constexpr bool usesModifiers() const noexcept;
 684
 685    constexpr bool reads_exec() const noexcept
 686    {
 687       for (const Operand& op : operands) {
 688          if (op.isFixed() && op.physReg() == exec)
 689             return true;
 690       }
 691       return false;
 692    }
 693 };
 694
 695 struct SOPK_instruction : public Instruction {
 696    uint16_t imm;
 697 };
 698
 699 struct SOPP_instruction : public Instruction {
 700    uint32_t imm;
 701    int block;
 702 };
 703
 704 struct SOPC_instruction : public Instruction {
 705 };
 706
 707 struct SOP1_instruction : public Instruction {
 708 };
 709
 710 struct SOP2_instruction : public Instruction {
 711 };
 712
 713 /**
 714  * Scalar Memory Format:
 715  * For s_(buffer_)load_dword*:
 716  * Operand(0): SBASE - SGPR-pair which provides base address
 717  * Operand(1): Offset - immediate (un)signed offset or SGPR
 718  * Operand(2) / Definition(0): SDATA - SGPR for read / write result
 719  * Operand(n-1): SOffset - SGPR offset (Vega only)
 720  *
 721  * Having no operands is also valid for instructions such as s_dcache_inv.
 722  *
 723  */
 724 struct SMEM_instruction : public Instruction {
 725    bool glc : 1; /* VI+: globally coherent */
 726    bool dlc : 1; /* NAVI: device level coherent */
 727    bool nv : 1; /* VEGA only: Non-volatile */
 728    bool can_reorder : 1;
 729    bool disable_wqm : 1;
 730    barrier_interaction barrier;
 731 };
 732
 733 struct VOP1_instruction : public Instruction {
 734 };
 735
 736 struct VOP2_instruction : public Instruction {
 737 };
 738
 739 struct VOPC_instruction : public Instruction {
 740 };
 741
 742 struct VOP3A_instruction : public Instruction {
 743    bool abs[3];
 744    bool neg[3];
 745    uint8_t opsel : 4;
 746    uint8_t omod : 2;
 747    bool clamp : 1;
 748 };
 749
 750 /**
 751  * Data Parallel Primitives Format:
 752  * This format can be used for VOP1, VOP2 or VOPC instructions.
 753  * The swizzle applies to the src0 operand.
 754  *
 755  */
 756 struct DPP_instruction : public Instruction {
 757    bool abs[2];
 758    bool neg[2];
 759    uint16_t dpp_ctrl;
 760    uint8_t row_mask : 4;
 761    uint8_t bank_mask : 4;
 762    bool bound_ctrl : 1;
 763 };
 764
 765 struct Interp_instruction : public Instruction {
 766    uint8_t attribute;
 767    uint8_t component;
 768 };
 769
 770 /**
 771  * Local and Global Data Sharing instructions
 772  * Operand(0): ADDR - VGPR which supplies the address.
 773  * Operand(1): DATA0 - First data VGPR.
 774  * Operand(2): DATA1 - Second data VGPR.
 775  * Operand(n-1): M0 - LDS size.
 776  * Definition(0): VDST - Destination VGPR when results returned to VGPRs.
 777  *
 778  */
 779 struct DS_instruction : public Instruction {
 780    int16_t offset0;
 781    int8_t offset1;
 782    bool gds;
 783 };
 784
 785 /**
 786  * Vector Memory Untyped-buffer Instructions
 787  * Operand(0): SRSRC - Specifies which SGPR supplies T# (resource constant)
 788  * Operand(1): VADDR - Address source. Can carry an index and/or offset
 789  * Operand(2): SOFFSET - SGPR to supply unsigned byte offset. (SGPR, M0, or inline constant)
 790  * Operand(3) / Definition(0): VDATA - Vector GPR for write result / read data
 791  *
 792  */
 793 struct MUBUF_instruction : public Instruction {
 794    uint16_t offset : 12; /* Unsigned byte offset - 12 bit */
 795    bool offen : 1; /* Supply an offset from VGPR (VADDR) */
 796    bool idxen : 1; /* Supply an index from VGPR (VADDR) */
 797    bool addr64 : 1; /* SI, CIK: Address size is 64-bit */
 798    bool glc : 1; /* globally coherent */
 799    bool dlc : 1; /* NAVI: device level coherent */
 800    bool slc : 1; /* system level coherent */
 801    bool tfe : 1; /* texture fail enable */
 802    bool lds : 1; /* Return read-data to LDS instead of VGPRs */
 803    bool disable_wqm : 1; /* Require an exec mask without helper invocations */
 804    bool can_reorder : 1;
 805    barrier_interaction barrier;
 806 };
 807
 808 /**
 809  * Vector Memory Typed-buffer Instructions
 810  * Operand(0): SRSRC - Specifies which SGPR supplies T# (resource constant)
 811  * Operand(1): VADDR - Address source. Can carry an index and/or offset
 812  * Operand(2): SOFFSET - SGPR to supply unsigned byte offset. (SGPR, M0, or inline constant)
 813  * Operand(3) / Definition(0): VDATA - Vector GPR for write result / read data
 814  *
 815  */
 816 struct MTBUF_instruction : public Instruction {
 817    uint16_t offset; /* Unsigned byte offset - 12 bit */
 818    uint8_t dfmt : 4; /* Data Format of data in memory buffer */
 819    uint8_t nfmt : 3; /* Numeric format of data in memory */
 820    bool offen : 1; /* Supply an offset from VGPR (VADDR) */
 821    bool idxen : 1; /* Supply an index from VGPR (VADDR) */
 822    bool glc : 1; /* globally coherent */
 823    bool dlc : 1; /* NAVI: device level coherent */
 824    bool slc : 1; /* system level coherent */
 825    bool tfe : 1; /* texture fail enable */
 826    bool disable_wqm : 1; /* Require an exec mask without helper invocations */
 827    bool can_reorder : 1;
 828    barrier_interaction barrier;
 829 };
 830
 831 /**
 832  * Vector Memory Image Instructions
 833  * Operand(0) SRSRC - Scalar GPR that specifies the resource constant.
 834  * Operand(1): SSAMP - Scalar GPR that specifies sampler constant.
 835  *             or VDATA - Vector GPR for write data.
 836  * Operand(2): VADDR - Address source. Can carry an offset or an index.
 837  * Definition(0): VDATA - Vector GPR for read result.
 838  *
 839  */
 840 struct MIMG_instruction : public Instruction {
 841    uint8_t dmask; /* Data VGPR enable mask */
 842    uint8_t dim : 3; /* NAVI: dimensionality */
 843    bool unrm : 1; /* Force address to be un-normalized */
 844    bool dlc : 1; /* NAVI: device level coherent */
 845    bool glc : 1; /* globally coherent */
 846    bool slc : 1; /* system level coherent */
 847    bool tfe : 1; /* texture fail enable */
 848    bool da : 1; /* declare an array */
 849    bool lwe : 1; /* Force data to be un-normalized */
 850    bool r128 : 1; /* NAVI: Texture resource size */
 851    bool a16 : 1; /* VEGA, NAVI: Address components are 16-bits */
 852    bool d16 : 1; /* Convert 32-bit data to 16-bit data */
 853    bool disable_wqm : 1; /* Require an exec mask without helper invocations */
 854    bool can_reorder : 1;
 855    barrier_interaction barrier;
 856 };
 857
 858 /**
 859  * Flat/Scratch/Global Instructions
 860  * Operand(0): ADDR
 861  * Operand(1): SADDR
 862  * Operand(2) / Definition(0): DATA/VDST
 863  *
 864  */
 865 struct FLAT_instruction : public Instruction {
 866    uint16_t offset; /* Vega/Navi only */
 867    bool slc : 1; /* system level coherent */
 868    bool glc : 1; /* globally coherent */
 869    bool dlc : 1; /* NAVI: device level coherent */
 870    bool lds : 1;
 871    bool nv : 1;
 872    bool disable_wqm : 1; /* Require an exec mask without helper invocations */
 873    bool can_reorder : 1;
 874    barrier_interaction barrier;
 875 };
 876
 877 struct Export_instruction : public Instruction {
 878    uint8_t enabled_mask;
 879    uint8_t dest;
 880    bool compressed : 1;
 881    bool done : 1;
 882    bool valid_mask : 1;
 883 };
 884
 885 struct Pseudo_instruction : public Instruction {
 886    bool tmp_in_scc;
 887    PhysReg scratch_sgpr; /* might not be valid if it's not needed */
 888 };
 889
 890 struct Pseudo_branch_instruction : public Instruction {
 891    /* target[0] is the block index of the branch target.
 892     * For conditional branches, target[1] contains the fall-through alternative.
 893     * A value of 0 means the target has not been initialized (BB0 cannot be a branch target).
 894     */
 895    uint32_t target[2];
 896 };
 897
 898 struct Pseudo_barrier_instruction : public Instruction {
 899 };
 900
 901 enum ReduceOp {
 902    iadd32, iadd64,
 903    imul32, imul64,
 904    fadd32, fadd64,
 905    fmul32, fmul64,
 906    imin32, imin64,
 907    imax32, imax64,
 908    umin32, umin64,
 909    umax32, umax64,
 910    fmin32, fmin64,
 911    fmax32, fmax64,
 912    iand32, iand64,
 913    ior32, ior64,
 914    ixor32, ixor64,
 915    gfx10_wave64_bpermute
 916 };
 917
 918 /**
 919  * Subgroup Reduction Instructions, everything except for the data to be
 920  * reduced and the result as inserted by setup_reduce_temp().
 921  * Operand(0): data to be reduced
 922  * Operand(1): reduce temporary
 923  * Operand(2): vector temporary
 924  * Definition(0): result
 925  * Definition(1): scalar temporary
 926  * Definition(2): scalar identity temporary (not used to store identity on GFX10)
 927  * Definition(3): scc clobber
 928  * Definition(4): vcc clobber
 929  *
 930  */
 931 struct Pseudo_reduction_instruction : public Instruction {
 932    ReduceOp reduce_op;
 933    unsigned cluster_size; // must be 0 for scans
 934 };
 935
 936 struct instr_deleter_functor {
 937    void operator()(void* p) {
 938       free(p);
 939    }
 940 };
 941
 942 template<typename T>
 943 using aco_ptr = std::unique_ptr<T, instr_deleter_functor>;
 944
 945 template<typename T>
 946 T* create_instruction(aco_opcode opcode, Format format, uint32_t num_operands, uint32_t num_definitions)
 947 {
 948    std::size_t size = sizeof(T) + num_operands * sizeof(Operand) + num_definitions * sizeof(Definition);
 949    char *data = (char*) calloc(1, size);
 950    T* inst = (T*) data;
 951
 952    inst->opcode = opcode;
 953    inst->format = format;
 954
 955    uint16_t operands_offset = data + sizeof(T) - (char*)&inst->operands;
 956    inst->operands = aco::span<Operand>(operands_offset, num_operands);
 957    uint16_t definitions_offset = (char*)inst->operands.end() - (char*)&inst->definitions;
 958    inst->definitions = aco::span<Definition>(definitions_offset, num_definitions);
 959
 960    return inst;
 961 }
 962
 963 constexpr bool Instruction::usesModifiers() const noexcept
 964 {
 965    if (isDPP() || isSDWA())
 966       return true;
 967    if (!isVOP3())
 968       return false;
 969    const VOP3A_instruction *vop3 = static_cast<const VOP3A_instruction*>(this);
 970    for (unsigned i = 0; i < operands.size(); i++) {
 971       if (vop3->abs[i] || vop3->neg[i])
 972          return true;
 973    }
 974    return vop3->opsel || vop3->clamp || vop3->omod;
 975 }
 976
 977 constexpr bool is_phi(Instruction* instr)
 978 {
 979    return instr->opcode == aco_opcode::p_phi || instr->opcode == aco_opcode::p_linear_phi;
 980 }
 981
 982 static inline bool is_phi(aco_ptr<Instruction>& instr)
 983 {
 984    return is_phi(instr.get());
 985 }
 986
 987 barrier_interaction get_barrier_interaction(Instruction* instr);
 988
 989 bool is_dead(const std::vector<uint16_t>& uses, Instruction *instr);
 990
 991 enum block_kind {
 992    /* uniform indicates that leaving this block,
 993     * all actives lanes stay active */
 994    block_kind_uniform = 1 << 0,
 995    block_kind_top_level = 1 << 1,
 996    block_kind_loop_preheader = 1 << 2,
 997    block_kind_loop_header = 1 << 3,
 998    block_kind_loop_exit = 1 << 4,
 999    block_kind_continue = 1 << 5,
1000    block_kind_break = 1 << 6,
1001    block_kind_continue_or_break = 1 << 7,
1002    block_kind_discard = 1 << 8,
1003    block_kind_branch = 1 << 9,
1004    block_kind_merge = 1 << 10,
1005    block_kind_invert = 1 << 11,
1006    block_kind_uses_discard_if = 1 << 12,
1007    block_kind_needs_lowering = 1 << 13,
1008    block_kind_uses_demote = 1 << 14,
1009    block_kind_export_end = 1 << 15,
1010 };
1011
1012
1013 struct RegisterDemand {
1014    constexpr RegisterDemand() = default;
1015    constexpr RegisterDemand(const int16_t v, const int16_t s) noexcept
1016       : vgpr{v}, sgpr{s} {}
1017    int16_t vgpr = 0;
1018    int16_t sgpr = 0;
1019
1020    constexpr friend bool operator==(const RegisterDemand a, const RegisterDemand b) noexcept {
1021       return a.vgpr == b.vgpr && a.sgpr == b.sgpr;
1022    }
1023
1024    constexpr bool exceeds(const RegisterDemand other) const noexcept {
1025       return vgpr > other.vgpr || sgpr > other.sgpr;
1026    }
1027
1028    constexpr RegisterDemand operator+(const Temp t) const noexcept {
1029       if (t.type() == RegType::sgpr)
1030          return RegisterDemand( vgpr, sgpr + t.size() );
1031       else
1032          return RegisterDemand( vgpr + t.size(), sgpr );
1033    }
1034
1035    constexpr RegisterDemand operator+(const RegisterDemand other) const noexcept {
1036       return RegisterDemand(vgpr + other.vgpr, sgpr + other.sgpr);
1037    }
1038
1039    constexpr RegisterDemand operator-(const RegisterDemand other) const noexcept {
1040       return RegisterDemand(vgpr - other.vgpr, sgpr - other.sgpr);
1041    }
1042
1043    constexpr RegisterDemand& operator+=(const RegisterDemand other) noexcept {
1044       vgpr += other.vgpr;
1045       sgpr += other.sgpr;
1046       return *this;
1047    }
1048
1049    constexpr RegisterDemand& operator-=(const RegisterDemand other) noexcept {
1050       vgpr -= other.vgpr;
1051       sgpr -= other.sgpr;
1052       return *this;
1053    }
1054
1055    constexpr RegisterDemand& operator+=(const Temp t) noexcept {
1056       if (t.type() == RegType::sgpr)
1057          sgpr += t.size();
1058       else
1059          vgpr += t.size();
1060       return *this;
1061    }
1062
1063    constexpr RegisterDemand& operator-=(const Temp t) noexcept {
1064       if (t.type() == RegType::sgpr)
1065          sgpr -= t.size();
1066       else
1067          vgpr -= t.size();
1068       return *this;
1069    }
1070
1071    constexpr void update(const RegisterDemand other) noexcept {
1072       vgpr = std::max(vgpr, other.vgpr);
1073       sgpr = std::max(sgpr, other.sgpr);
1074    }
1075
1076 };
1077
1078 /* CFG */
1079 struct Block {
1080    float_mode fp_mode;
1081    unsigned index;
1082    unsigned offset = 0;
1083    std::vector<aco_ptr<Instruction>> instructions;
1084    std::vector<unsigned> logical_preds;
1085    std::vector<unsigned> linear_preds;
1086    std::vector<unsigned> logical_succs;
1087    std::vector<unsigned> linear_succs;
1088    RegisterDemand register_demand = RegisterDemand();
1089    uint16_t loop_nest_depth = 0;
1090    uint16_t kind = 0;
1091    int logical_idom = -1;
1092    int linear_idom = -1;
1093    Temp live_out_exec = Temp();
1094
1095    /* this information is needed for predecessors to blocks with phis when
1096     * moving out of ssa */
1097    bool scc_live_out = false;
1098    PhysReg scratch_sgpr = PhysReg(); /* only needs to be valid if scc_live_out != false */
1099
1100    Block(unsigned idx) : index(idx) {}
1101    Block() : index(0) {}
1102 };
1103
1104 using Stage = uint16_t;
1105
1106 /* software stages */
1107 static constexpr Stage sw_vs = 1 << 0;
1108 static constexpr Stage sw_gs = 1 << 1;
1109 static constexpr Stage sw_tcs = 1 << 2;
1110 static constexpr Stage sw_tes = 1 << 3;
1111 static constexpr Stage sw_fs = 1 << 4;
1112 static constexpr Stage sw_cs = 1 << 5;
1113 static constexpr Stage sw_gs_copy = 1 << 6;
1114 static constexpr Stage sw_mask = 0x7f;
1115
1116 /* hardware stages (can't be OR'd, just a mask for convenience when testing multiple) */
1117 static constexpr Stage hw_vs = 1 << 7;
1118 static constexpr Stage hw_es = 1 << 8; /* not on GFX9. combined into GS on GFX9 (and GFX10/legacy). */
1119 static constexpr Stage hw_gs = 1 << 9;
1120 static constexpr Stage hw_ls = 1 << 10; /* not on GFX9. combined into HS on GFX9 (and GFX10/legacy). */
1121 static constexpr Stage hw_hs = 1 << 11;
1122 static constexpr Stage hw_fs = 1 << 12;
1123 static constexpr Stage hw_cs = 1 << 13;
1124 static constexpr Stage hw_mask = 0x7f << 7;
1125
1126 /* possible settings of Program::stage */
1127 static constexpr Stage vertex_vs = sw_vs | hw_vs;
1128 static constexpr Stage fragment_fs = sw_fs | hw_fs;
1129 static constexpr Stage compute_cs = sw_cs | hw_cs;
1130 static constexpr Stage tess_eval_vs = sw_tes | hw_vs;
1131 static constexpr Stage gs_copy_vs = sw_gs_copy | hw_vs;
1132 /* GFX10/NGG */
1133 static constexpr Stage ngg_vertex_gs = sw_vs | hw_gs;
1134 static constexpr Stage ngg_vertex_geometry_gs = sw_vs | sw_gs | hw_gs;
1135 static constexpr Stage ngg_tess_eval_geometry_gs = sw_tes | sw_gs | hw_gs;
1136 static constexpr Stage ngg_vertex_tess_control_hs = sw_vs | sw_tcs | hw_hs;
1137 /* GFX9 (and GFX10 if NGG isn't used) */
1138 static constexpr Stage vertex_geometry_gs = sw_vs | sw_gs | hw_gs;
1139 static constexpr Stage vertex_tess_control_hs = sw_vs | sw_tcs | hw_hs;
1140 static constexpr Stage tess_eval_geometry_gs = sw_tes | sw_gs | hw_gs;
1141 /* pre-GFX9 */
1142 static constexpr Stage vertex_ls = sw_vs | hw_ls; /* vertex before tesselation control */
1143 static constexpr Stage vertex_es = sw_vs | hw_es; /* vertex before geometry */
1144 static constexpr Stage tess_control_hs = sw_tcs | hw_hs;
1145 static constexpr Stage tess_eval_es = sw_tes | hw_es; /* tesselation evaluation before geometry */
1146 static constexpr Stage geometry_gs = sw_gs | hw_gs;
1147
1148 class Program final {
1149 public:
1150    float_mode next_fp_mode;
1151    std::vector<Block> blocks;
1152    RegisterDemand max_reg_demand = RegisterDemand();
1153    uint16_t num_waves = 0;
1154    uint16_t max_waves = 0; /* maximum number of waves, regardless of register usage */
1155    ac_shader_config* config;
1156    struct radv_shader_info *info;
1157    enum chip_class chip_class;
1158    enum radeon_family family;
1159    unsigned wave_size;
1160    RegClass lane_mask;
1161    Stage stage; /* Stage */
1162    bool needs_exact = false; /* there exists an instruction with disable_wqm = true */
1163    bool needs_wqm = false; /* there exists a p_wqm instruction */
1164    bool wb_smem_l1_on_end = false;
1165
1166    std::vector<uint8_t> constant_data;
1167    Temp private_segment_buffer;
1168    Temp scratch_offset;
1169
1170    uint16_t min_waves = 0;
1171    uint16_t lds_alloc_granule;
1172    uint32_t lds_limit; /* in bytes */
1173    uint16_t vgpr_limit;
1174    uint16_t sgpr_limit;
1175    uint16_t physical_sgprs;
1176    uint16_t sgpr_alloc_granule; /* minus one. must be power of two */
1177    uint16_t vgpr_alloc_granule; /* minus one. must be power of two */
1178
1179    bool needs_vcc = false;
1180    bool needs_xnack_mask = false;
1181    bool needs_flat_scr = false;
1182
1183    uint32_t allocateId()
1184    {
1185       assert(allocationID <= 16777215);
1186       return allocationID++;
1187    }
1188
1189    uint32_t peekAllocationId()
1190    {
1191       return allocationID;
1192    }
1193
1194    void setAllocationId(uint32_t id)
1195    {
1196       allocationID = id;
1197    }
1198
1199    Block* create_and_insert_block() {
1200       blocks.emplace_back(blocks.size());
1201       blocks.back().fp_mode = next_fp_mode;
1202       return &blocks.back();
1203    }
1204
1205    Block* insert_block(Block&& block) {
1206       block.index = blocks.size();
1207       block.fp_mode = next_fp_mode;
1208       blocks.emplace_back(std::move(block));
1209       return &blocks.back();
1210    }
1211
1212 private:
1213    uint32_t allocationID = 1;
1214 };
1215
1216 struct live {
1217    /* live temps out per block */
1218    std::vector<std::set<Temp>> live_out;
1219    /* register demand (sgpr/vgpr) per instruction per block */
1220    std::vector<std::vector<RegisterDemand>> register_demand;
1221 };
1222
1223 void select_program(Program *program,
1224                     unsigned shader_count,
1225                     struct nir_shader *const *shaders,
1226                     ac_shader_config* config,
1227                     struct radv_shader_args *args);
1228 void select_gs_copy_shader(Program *program, struct nir_shader *gs_shader,
1229                            ac_shader_config* config,
1230                            struct radv_shader_args *args);
1231
1232 void lower_wqm(Program* program, live& live_vars,
1233                const struct radv_nir_compiler_options *options);
1234 void lower_bool_phis(Program* program);
1235 void calc_min_waves(Program* program);
1236 void update_vgpr_sgpr_demand(Program* program, const RegisterDemand new_demand);
1237 live live_var_analysis(Program* program, const struct radv_nir_compiler_options *options);
1238 std::vector<uint16_t> dead_code_analysis(Program *program);
1239 void dominator_tree(Program* program);
1240 void insert_exec_mask(Program *program);
1241 void value_numbering(Program* program);
1242 void optimize(Program* program);
1243 void setup_reduce_temp(Program* program);
1244 void lower_to_cssa(Program* program, live& live_vars, const struct radv_nir_compiler_options *options);
1245 void register_allocation(Program *program, std::vector<std::set<Temp>> live_out_per_block);
1246 void ssa_elimination(Program* program);
1247 void lower_to_hw_instr(Program* program);
1248 void schedule_program(Program* program, live& live_vars);
1249 void spill(Program* program, live& live_vars, const struct radv_nir_compiler_options *options);
1250 void insert_wait_states(Program* program);
1251 void insert_NOPs(Program* program);
1252 unsigned emit_program(Program* program, std::vector<uint32_t>& code);
1253 void print_asm(Program *program, std::vector<uint32_t>& binary,
1254                unsigned exec_size, std::ostream& out);
1255 void validate(Program* program, FILE *output);
1256 bool validate_ra(Program* program, const struct radv_nir_compiler_options *options, FILE *output);
1257 #ifndef NDEBUG
1258 void perfwarn(bool cond, const char *msg, Instruction *instr=NULL);
1259 #else
1260 #define perfwarn(program, cond, msg, ...) do {} while(0)
1261 #endif
1262
1263 void aco_print_instr(Instruction *instr, FILE *output);
1264 void aco_print_program(Program *program, FILE *output);
1265
1266 /* number of sgprs that need to be allocated but might notbe addressable as s0-s105 */
1267 uint16_t get_extra_sgprs(Program *program);
1268
1269 /* get number of sgprs/vgprs allocated required to address a number of sgprs/vgprs */
1270 uint16_t get_sgpr_alloc(Program *program, uint16_t addressable_sgprs);
1271 uint16_t get_vgpr_alloc(Program *program, uint16_t addressable_vgprs);
1272
1273 /* return number of addressable sgprs/vgprs for max_waves */
1274 uint16_t get_addr_sgpr_from_waves(Program *program, uint16_t max_waves);
1275 uint16_t get_addr_vgpr_from_waves(Program *program, uint16_t max_waves);
1276
1277 typedef struct {
1278    const int16_t opcode_gfx7[static_cast<int>(aco_opcode::num_opcodes)];
1279    const int16_t opcode_gfx9[static_cast<int>(aco_opcode::num_opcodes)];
1280    const int16_t opcode_gfx10[static_cast<int>(aco_opcode::num_opcodes)];
1281    const std::bitset<static_cast<int>(aco_opcode::num_opcodes)> can_use_input_modifiers;
1282    const std::bitset<static_cast<int>(aco_opcode::num_opcodes)> can_use_output_modifiers;
1283    const std::bitset<static_cast<int>(aco_opcode::num_opcodes)> is_atomic;
1284    const char *name[static_cast<int>(aco_opcode::num_opcodes)];
1285    const aco::Format format[static_cast<int>(aco_opcode::num_opcodes)];
1286 } Info;
1287
1288 extern const Info instr_info;
1289
1290 }
1291
1292 #endif /* ACO_IR_H */
1293