src/amd/compiler/aco_ir.h

   1 /*
   2  * Copyright © 2018 Valve Corporation
   3  *
   4  * Permission is hereby granted, free of charge, to any person obtaining a
   5  * copy of this software and associated documentation files (the "Software"),
   6  * to deal in the Software without restriction, including without limitation
   7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
   8  * and/or sell copies of the Software, and to permit persons to whom the
   9  * Software is furnished to do so, subject to the following conditions:
  10  *
  11  * The above copyright notice and this permission notice (including the next
  12  * paragraph) shall be included in all copies or substantial portions of the
  13  * Software.
  14  *
  15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  17  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
  18  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
  19  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
  20  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
  21  * IN THE SOFTWARE.
  22  *
  23  */
  24
  25 #ifndef ACO_IR_H
  26 #define ACO_IR_H
  27
  28 #include <vector>
  29 #include <set>
  30 #include <bitset>
  31 #include <memory>
  32
  33 #include "nir.h"
  34 #include "ac_binary.h"
  35 #include "amd_family.h"
  36 #include "aco_opcodes.h"
  37 #include "aco_util.h"
  38
  39 struct radv_nir_compiler_options;
  40 struct radv_shader_args;
  41 struct radv_shader_info;
  42
  43 namespace aco {
  44
  45 extern uint64_t debug_flags;
  46
  47 enum {
  48    DEBUG_VALIDATE = 0x1,
  49    DEBUG_VALIDATE_RA = 0x2,
  50    DEBUG_PERFWARN = 0x4,
  51 };
  52
  53 /**
  54  * Representation of the instruction's microcode encoding format
  55  * Note: Some Vector ALU Formats can be combined, such that:
  56  * - VOP2* | VOP3A represents a VOP2 instruction in VOP3A encoding
  57  * - VOP2* | DPP represents a VOP2 instruction with data parallel primitive.
  58  * - VOP2* | SDWA represents a VOP2 instruction with sub-dword addressing.
  59  *
  60  * (*) The same is applicable for VOP1 and VOPC instructions.
  61  */
  62 enum class Format : std::uint16_t {
  63    /* Pseudo Instruction Format */
  64    PSEUDO = 0,
  65    /* Scalar ALU & Control Formats */
  66    SOP1 = 1,
  67    SOP2 = 2,
  68    SOPK = 3,
  69    SOPP = 4,
  70    SOPC = 5,
  71    /* Scalar Memory Format */
  72    SMEM = 6,
  73    /* LDS/GDS Format */
  74    DS = 8,
  75    /* Vector Memory Buffer Formats */
  76    MTBUF = 9,
  77    MUBUF = 10,
  78    /* Vector Memory Image Format */
  79    MIMG = 11,
  80    /* Export Format */
  81    EXP = 12,
  82    /* Flat Formats */
  83    FLAT = 13,
  84    GLOBAL = 14,
  85    SCRATCH = 15,
  86
  87    PSEUDO_BRANCH = 16,
  88    PSEUDO_BARRIER = 17,
  89    PSEUDO_REDUCTION = 18,
  90
  91    /* Vector ALU Formats */
  92    VOP1 = 1 << 8,
  93    VOP2 = 1 << 9,
  94    VOPC = 1 << 10,
  95    VOP3 = 1 << 11,
  96    VOP3A = 1 << 11,
  97    VOP3B = 1 << 11,
  98    VOP3P = 1 << 12,
  99    /* Vector Parameter Interpolation Format */
 100    VINTRP = 1 << 13,
 101    DPP = 1 << 14,
 102    SDWA = 1 << 15,
 103 };
 104
 105 enum barrier_interaction : uint8_t {
 106    barrier_none = 0,
 107    barrier_buffer = 0x1,
 108    barrier_image = 0x2,
 109    barrier_atomic = 0x4,
 110    barrier_shared = 0x8,
 111    /* used for geometry shaders to ensure vertex data writes are before the
 112     * GS_DONE s_sendmsg. */
 113    barrier_gs_data = 0x10,
 114    /* used for geometry shaders to ensure s_sendmsg instructions are in-order. */
 115    barrier_gs_sendmsg = 0x20,
 116    /* used by barriers. created by s_barrier */
 117    barrier_barrier = 0x40,
 118    barrier_count = 6,
 119 };
 120
 121 enum fp_round {
 122    fp_round_ne = 0,
 123    fp_round_pi = 1,
 124    fp_round_ni = 2,
 125    fp_round_tz = 3,
 126 };
 127
 128 enum fp_denorm {
 129    /* Note that v_rcp_f32, v_exp_f32, v_log_f32, v_sqrt_f32, v_rsq_f32 and
 130     * v_mad_f32/v_madak_f32/v_madmk_f32/v_mac_f32 always flush denormals. */
 131    fp_denorm_flush = 0x0,
 132    fp_denorm_keep = 0x3,
 133 };
 134
 135 struct float_mode {
 136    /* matches encoding of the MODE register */
 137    union {
 138       struct {
 139           fp_round round32:2;
 140           fp_round round16_64:2;
 141           unsigned denorm32:2;
 142           unsigned denorm16_64:2;
 143       };
 144       uint8_t val = 0;
 145    };
 146    /* if false, optimizations which may remove infs/nan/-0.0 can be done */
 147    bool preserve_signed_zero_inf_nan32:1;
 148    bool preserve_signed_zero_inf_nan16_64:1;
 149    /* if false, optimizations which may remove denormal flushing can be done */
 150    bool must_flush_denorms32:1;
 151    bool must_flush_denorms16_64:1;
 152    bool care_about_round32:1;
 153    bool care_about_round16_64:1;
 154
 155    /* Returns true if instructions using the mode "other" can safely use the
 156     * current one instead. */
 157    bool canReplace(float_mode other) const noexcept {
 158       return val == other.val &&
 159              (preserve_signed_zero_inf_nan32 || !other.preserve_signed_zero_inf_nan32) &&
 160              (preserve_signed_zero_inf_nan16_64 || !other.preserve_signed_zero_inf_nan16_64) &&
 161              (must_flush_denorms32  || !other.must_flush_denorms32) &&
 162              (must_flush_denorms16_64 || !other.must_flush_denorms16_64) &&
 163              (care_about_round32 || !other.care_about_round32) &&
 164              (care_about_round16_64 || !other.care_about_round16_64);
 165    }
 166 };
 167
 168 constexpr Format asVOP3(Format format) {
 169    return (Format) ((uint32_t) Format::VOP3 | (uint32_t) format);
 170 };
 171
 172 enum class RegType {
 173    none = 0,
 174    sgpr,
 175    vgpr,
 176    linear_vgpr,
 177 };
 178
 179 struct RegClass {
 180
 181    enum RC : uint8_t {
 182       s1 = 1,
 183       s2 = 2,
 184       s3 = 3,
 185       s4 = 4,
 186       s6 = 6,
 187       s8 = 8,
 188       s16 = 16,
 189       v1 = s1 | (1 << 5),
 190       v2 = s2 | (1 << 5),
 191       v3 = s3 | (1 << 5),
 192       v4 = s4 | (1 << 5),
 193       v5 = 5  | (1 << 5),
 194       v6 = 6  | (1 << 5),
 195       v7 = 7  | (1 << 5),
 196       v8 = 8  | (1 << 5),
 197       /* these are used for WWM and spills to vgpr */
 198       v1_linear = v1 | (1 << 6),
 199       v2_linear = v2 | (1 << 6),
 200    };
 201
 202    RegClass() = default;
 203    constexpr RegClass(RC rc)
 204       : rc(rc) {}
 205    constexpr RegClass(RegType type, unsigned size)
 206       : rc((RC) ((type == RegType::vgpr ? 1 << 5 : 0) | size)) {}
 207
 208    constexpr operator RC() const { return rc; }
 209    explicit operator bool() = delete;
 210
 211    constexpr RegType type() const { return rc <= RC::s16 ? RegType::sgpr : RegType::vgpr; }
 212    constexpr unsigned size() const { return (unsigned) rc & 0x1F; }
 213    constexpr bool is_linear() const { return rc <= RC::s16 || rc & (1 << 6); }
 214    constexpr RegClass as_linear() const { return RegClass((RC) (rc | (1 << 6))); }
 215
 216 private:
 217    RC rc;
 218 };
 219
 220 /* transitional helper expressions */
 221 static constexpr RegClass s1{RegClass::s1};
 222 static constexpr RegClass s2{RegClass::s2};
 223 static constexpr RegClass s3{RegClass::s3};
 224 static constexpr RegClass s4{RegClass::s4};
 225 static constexpr RegClass s8{RegClass::s8};
 226 static constexpr RegClass s16{RegClass::s16};
 227 static constexpr RegClass v1{RegClass::v1};
 228 static constexpr RegClass v2{RegClass::v2};
 229 static constexpr RegClass v3{RegClass::v3};
 230 static constexpr RegClass v4{RegClass::v4};
 231 static constexpr RegClass v5{RegClass::v5};
 232 static constexpr RegClass v6{RegClass::v6};
 233 static constexpr RegClass v7{RegClass::v7};
 234 static constexpr RegClass v8{RegClass::v8};
 235
 236 /**
 237  * Temp Class
 238  * Each temporary virtual register has a
 239  * register class (i.e. size and type)
 240  * and SSA id.
 241  */
 242 struct Temp {
 243    Temp() = default;
 244    constexpr Temp(uint32_t id, RegClass cls) noexcept
 245       : id_(id), reg_class(cls) {}
 246
 247    constexpr uint32_t id() const noexcept { return id_; }
 248    constexpr RegClass regClass() const noexcept { return reg_class; }
 249
 250    constexpr unsigned size() const noexcept { return reg_class.size(); }
 251    constexpr RegType type() const noexcept { return reg_class.type(); }
 252    constexpr bool is_linear() const noexcept { return reg_class.is_linear(); }
 253
 254    constexpr bool operator <(Temp other) const noexcept { return id() < other.id(); }
 255    constexpr bool operator==(Temp other) const noexcept { return id() == other.id(); }
 256    constexpr bool operator!=(Temp other) const noexcept { return id() != other.id(); }
 257
 258 private:
 259    uint32_t id_:24;
 260    RegClass reg_class;
 261 };
 262
 263 /**
 264  * PhysReg
 265  * Represents the physical register for each
 266  * Operand and Definition.
 267  */
 268 struct PhysReg {
 269    constexpr PhysReg() = default;
 270    explicit constexpr PhysReg(unsigned r) : reg(r) {}
 271    constexpr operator unsigned() const { return reg; }
 272
 273    uint16_t reg = 0;
 274 };
 275
 276 /* helper expressions for special registers */
 277 static constexpr PhysReg m0{124};
 278 static constexpr PhysReg vcc{106};
 279 static constexpr PhysReg vcc_hi{107};
 280 static constexpr PhysReg sgpr_null{125}; /* GFX10+ */
 281 static constexpr PhysReg exec{126};
 282 static constexpr PhysReg exec_lo{126};
 283 static constexpr PhysReg exec_hi{127};
 284 static constexpr PhysReg vccz{251};
 285 static constexpr PhysReg execz{252};
 286 static constexpr PhysReg scc{253};
 287
 288 /**
 289  * Operand Class
 290  * Initially, each Operand refers to either
 291  * a temporary virtual register
 292  * or to a constant value
 293  * Temporary registers get mapped to physical register during RA
 294  * Constant values are inlined into the instruction sequence.
 295  */
 296 class Operand final
 297 {
 298 public:
 299    constexpr Operand()
 300       : reg_(PhysReg{128}), isTemp_(false), isFixed_(true), isConstant_(false),
 301         isKill_(false), isUndef_(true), isFirstKill_(false), is64BitConst_(false),
 302         isLateKill_(false) {}
 303
 304    explicit Operand(Temp r) noexcept
 305    {
 306       data_.temp = r;
 307       if (r.id()) {
 308          isTemp_ = true;
 309       } else {
 310          isUndef_ = true;
 311          setFixed(PhysReg{128});
 312       }
 313    };
 314    explicit Operand(uint32_t v, bool is64bit = false) noexcept
 315    {
 316       data_.i = v;
 317       isConstant_ = true;
 318       is64BitConst_ = is64bit;
 319       if (v <= 64)
 320          setFixed(PhysReg{128 + v});
 321       else if (v >= 0xFFFFFFF0) /* [-16 .. -1] */
 322          setFixed(PhysReg{192 - v});
 323       else if (v == 0x3f000000) /* 0.5 */
 324          setFixed(PhysReg{240});
 325       else if (v == 0xbf000000) /* -0.5 */
 326          setFixed(PhysReg{241});
 327       else if (v == 0x3f800000) /* 1.0 */
 328          setFixed(PhysReg{242});
 329       else if (v == 0xbf800000) /* -1.0 */
 330          setFixed(PhysReg{243});
 331       else if (v == 0x40000000) /* 2.0 */
 332          setFixed(PhysReg{244});
 333       else if (v == 0xc0000000) /* -2.0 */
 334          setFixed(PhysReg{245});
 335       else if (v == 0x40800000) /* 4.0 */
 336          setFixed(PhysReg{246});
 337       else if (v == 0xc0800000) /* -4.0 */
 338          setFixed(PhysReg{247});
 339       else { /* Literal Constant */
 340          assert(!is64bit && "attempt to create a 64-bit literal constant");
 341          setFixed(PhysReg{255});
 342       }
 343    };
 344    explicit Operand(uint64_t v) noexcept
 345    {
 346       isConstant_ = true;
 347       is64BitConst_ = true;
 348       if (v <= 64) {
 349          data_.i = (uint32_t) v;
 350          setFixed(PhysReg{128 + (uint32_t) v});
 351       } else if (v >= 0xFFFFFFFFFFFFFFF0) { /* [-16 .. -1] */
 352          data_.i = (uint32_t) v;
 353          setFixed(PhysReg{192 - (uint32_t) v});
 354       } else if (v == 0x3FE0000000000000) { /* 0.5 */
 355          data_.i = 0x3f000000;
 356          setFixed(PhysReg{240});
 357       } else if (v == 0xBFE0000000000000) { /* -0.5 */
 358          data_.i = 0xbf000000;
 359          setFixed(PhysReg{241});
 360       } else if (v == 0x3FF0000000000000) { /* 1.0 */
 361          data_.i = 0x3f800000;
 362          setFixed(PhysReg{242});
 363       } else if (v == 0xBFF0000000000000) { /* -1.0 */
 364          data_.i = 0xbf800000;
 365          setFixed(PhysReg{243});
 366       } else if (v == 0x4000000000000000) { /* 2.0 */
 367          data_.i = 0x40000000;
 368          setFixed(PhysReg{244});
 369       } else if (v == 0xC000000000000000) { /* -2.0 */
 370          data_.i = 0xc0000000;
 371          setFixed(PhysReg{245});
 372       } else if (v == 0x4010000000000000) { /* 4.0 */
 373          data_.i = 0x40800000;
 374          setFixed(PhysReg{246});
 375       } else if (v == 0xC010000000000000) { /* -4.0 */
 376          data_.i = 0xc0800000;
 377          setFixed(PhysReg{247});
 378       } else { /* Literal Constant: we don't know if it is a long or double.*/
 379          isConstant_ = 0;
 380          assert(false && "attempt to create a 64-bit literal constant");
 381       }
 382    };
 383    explicit Operand(RegClass type) noexcept
 384    {
 385       isUndef_ = true;
 386       data_.temp = Temp(0, type);
 387       setFixed(PhysReg{128});
 388    };
 389    explicit Operand(PhysReg reg, RegClass type) noexcept
 390    {
 391       data_.temp = Temp(0, type);
 392       setFixed(reg);
 393    }
 394
 395    constexpr bool isTemp() const noexcept
 396    {
 397       return isTemp_;
 398    }
 399
 400    constexpr void setTemp(Temp t) noexcept {
 401       assert(!isConstant_);
 402       isTemp_ = true;
 403       data_.temp = t;
 404    }
 405
 406    constexpr Temp getTemp() const noexcept
 407    {
 408       return data_.temp;
 409    }
 410
 411    constexpr uint32_t tempId() const noexcept
 412    {
 413       return data_.temp.id();
 414    }
 415
 416    constexpr bool hasRegClass() const noexcept
 417    {
 418       return isTemp() || isUndefined();
 419    }
 420
 421    constexpr RegClass regClass() const noexcept
 422    {
 423       return data_.temp.regClass();
 424    }
 425
 426    constexpr unsigned size() const noexcept
 427    {
 428       if (isConstant())
 429          return is64BitConst_ ? 2 : 1;
 430       else
 431          return data_.temp.size();
 432    }
 433
 434    constexpr bool isFixed() const noexcept
 435    {
 436       return isFixed_;
 437    }
 438
 439    constexpr PhysReg physReg() const noexcept
 440    {
 441       return reg_;
 442    }
 443
 444    constexpr void setFixed(PhysReg reg) noexcept
 445    {
 446       isFixed_ = reg != unsigned(-1);
 447       reg_ = reg;
 448    }
 449
 450    constexpr bool isConstant() const noexcept
 451    {
 452       return isConstant_;
 453    }
 454
 455    constexpr bool isLiteral() const noexcept
 456    {
 457       return isConstant() && reg_ == 255;
 458    }
 459
 460    constexpr bool isUndefined() const noexcept
 461    {
 462       return isUndef_;
 463    }
 464
 465    constexpr uint32_t constantValue() const noexcept
 466    {
 467       return data_.i;
 468    }
 469
 470    constexpr bool constantEquals(uint32_t cmp) const noexcept
 471    {
 472       return isConstant() && constantValue() == cmp;
 473    }
 474
 475    /* Indicates that the killed operand's live range intersects with the
 476     * instruction's definitions. Unlike isKill() and isFirstKill(), this is
 477     * not set by liveness analysis. */
 478    constexpr void setLateKill(bool flag) noexcept
 479    {
 480       isLateKill_ = flag;
 481    }
 482
 483    constexpr bool isLateKill() const noexcept
 484    {
 485       return isLateKill_;
 486    }
 487
 488    constexpr void setKill(bool flag) noexcept
 489    {
 490       isKill_ = flag;
 491       if (!flag)
 492          setFirstKill(false);
 493    }
 494
 495    constexpr bool isKill() const noexcept
 496    {
 497       return isKill_ || isFirstKill();
 498    }
 499
 500    constexpr void setFirstKill(bool flag) noexcept
 501    {
 502       isFirstKill_ = flag;
 503       if (flag)
 504          setKill(flag);
 505    }
 506
 507    /* When there are multiple operands killing the same temporary,
 508     * isFirstKill() is only returns true for the first one. */
 509    constexpr bool isFirstKill() const noexcept
 510    {
 511       return isFirstKill_;
 512    }
 513
 514    constexpr bool isKillBeforeDef() const noexcept
 515    {
 516       return isKill() && !isLateKill();
 517    }
 518
 519    constexpr bool isFirstKillBeforeDef() const noexcept
 520    {
 521       return isFirstKill() && !isLateKill();
 522    }
 523
 524 private:
 525    union {
 526       uint32_t i;
 527       float f;
 528       Temp temp = Temp(0, s1);
 529    } data_;
 530    PhysReg reg_;
 531    union {
 532       struct {
 533          uint8_t isTemp_:1;
 534          uint8_t isFixed_:1;
 535          uint8_t isConstant_:1;
 536          uint8_t isKill_:1;
 537          uint8_t isUndef_:1;
 538          uint8_t isFirstKill_:1;
 539          uint8_t is64BitConst_:1;
 540          uint8_t isLateKill_:1;
 541       };
 542       /* can't initialize bit-fields in c++11, so work around using a union */
 543       uint8_t control_ = 0;
 544    };
 545 };
 546
 547 /**
 548  * Definition Class
 549  * Definitions are the results of Instructions
 550  * and refer to temporary virtual registers
 551  * which are later mapped to physical registers
 552  */
 553 class Definition final
 554 {
 555 public:
 556    constexpr Definition() : temp(Temp(0, s1)), reg_(0), isFixed_(0), hasHint_(0), isKill_(0) {}
 557    Definition(uint32_t index, RegClass type) noexcept
 558       : temp(index, type) {}
 559    explicit Definition(Temp tmp) noexcept
 560       : temp(tmp) {}
 561    Definition(PhysReg reg, RegClass type) noexcept
 562       : temp(Temp(0, type))
 563    {
 564       setFixed(reg);
 565    }
 566    Definition(uint32_t tmpId, PhysReg reg, RegClass type) noexcept
 567       : temp(Temp(tmpId, type))
 568    {
 569       setFixed(reg);
 570    }
 571
 572    constexpr bool isTemp() const noexcept
 573    {
 574       return tempId() > 0;
 575    }
 576
 577    constexpr Temp getTemp() const noexcept
 578    {
 579       return temp;
 580    }
 581
 582    constexpr uint32_t tempId() const noexcept
 583    {
 584       return temp.id();
 585    }
 586
 587    constexpr void setTemp(Temp t) noexcept {
 588       temp = t;
 589    }
 590
 591    constexpr RegClass regClass() const noexcept
 592    {
 593       return temp.regClass();
 594    }
 595
 596    constexpr unsigned size() const noexcept
 597    {
 598       return temp.size();
 599    }
 600
 601    constexpr bool isFixed() const noexcept
 602    {
 603       return isFixed_;
 604    }
 605
 606    constexpr PhysReg physReg() const noexcept
 607    {
 608       return reg_;
 609    }
 610
 611    constexpr void setFixed(PhysReg reg) noexcept
 612    {
 613       isFixed_ = 1;
 614       reg_ = reg;
 615    }
 616
 617    constexpr void setHint(PhysReg reg) noexcept
 618    {
 619       hasHint_ = 1;
 620       reg_ = reg;
 621    }
 622
 623    constexpr bool hasHint() const noexcept
 624    {
 625       return hasHint_;
 626    }
 627
 628    constexpr void setKill(bool flag) noexcept
 629    {
 630       isKill_ = flag;
 631    }
 632
 633    constexpr bool isKill() const noexcept
 634    {
 635       return isKill_;
 636    }
 637
 638 private:
 639    Temp temp = Temp(0, s1);
 640    PhysReg reg_;
 641    union {
 642       struct {
 643          uint8_t isFixed_:1;
 644          uint8_t hasHint_:1;
 645          uint8_t isKill_:1;
 646       };
 647       /* can't initialize bit-fields in c++11, so work around using a union */
 648       uint8_t control_ = 0;
 649    };
 650 };
 651
 652 class Block;
 653
 654 struct Instruction {
 655    aco_opcode opcode;
 656    Format format;
 657    uint32_t pass_flags;
 658
 659    aco::span<Operand> operands;
 660    aco::span<Definition> definitions;
 661
 662    constexpr bool isVALU() const noexcept
 663    {
 664       return ((uint16_t) format & (uint16_t) Format::VOP1) == (uint16_t) Format::VOP1
 665           || ((uint16_t) format & (uint16_t) Format::VOP2) == (uint16_t) Format::VOP2
 666           || ((uint16_t) format & (uint16_t) Format::VOPC) == (uint16_t) Format::VOPC
 667           || ((uint16_t) format & (uint16_t) Format::VOP3A) == (uint16_t) Format::VOP3A
 668           || ((uint16_t) format & (uint16_t) Format::VOP3B) == (uint16_t) Format::VOP3B
 669           || ((uint16_t) format & (uint16_t) Format::VOP3P) == (uint16_t) Format::VOP3P;
 670    }
 671
 672    constexpr bool isSALU() const noexcept
 673    {
 674       return format == Format::SOP1 ||
 675              format == Format::SOP2 ||
 676              format == Format::SOPC ||
 677              format == Format::SOPK ||
 678              format == Format::SOPP;
 679    }
 680
 681    constexpr bool isVMEM() const noexcept
 682    {
 683       return format == Format::MTBUF ||
 684              format == Format::MUBUF ||
 685              format == Format::MIMG;
 686    }
 687
 688    constexpr bool isDPP() const noexcept
 689    {
 690       return (uint16_t) format & (uint16_t) Format::DPP;
 691    }
 692
 693    constexpr bool isVOP3() const noexcept
 694    {
 695       return ((uint16_t) format & (uint16_t) Format::VOP3A) ||
 696              ((uint16_t) format & (uint16_t) Format::VOP3B) ||
 697              format == Format::VOP3P;
 698    }
 699
 700    constexpr bool isSDWA() const noexcept
 701    {
 702       return (uint16_t) format & (uint16_t) Format::SDWA;
 703    }
 704
 705    constexpr bool isFlatOrGlobal() const noexcept
 706    {
 707       return format == Format::FLAT || format == Format::GLOBAL;
 708    }
 709
 710    constexpr bool usesModifiers() const noexcept;
 711
 712    constexpr bool reads_exec() const noexcept
 713    {
 714       for (const Operand& op : operands) {
 715          if (op.isFixed() && op.physReg() == exec)
 716             return true;
 717       }
 718       return false;
 719    }
 720 };
 721
 722 struct SOPK_instruction : public Instruction {
 723    uint16_t imm;
 724 };
 725
 726 struct SOPP_instruction : public Instruction {
 727    uint32_t imm;
 728    int block;
 729 };
 730
 731 struct SOPC_instruction : public Instruction {
 732 };
 733
 734 struct SOP1_instruction : public Instruction {
 735 };
 736
 737 struct SOP2_instruction : public Instruction {
 738 };
 739
 740 /**
 741  * Scalar Memory Format:
 742  * For s_(buffer_)load_dword*:
 743  * Operand(0): SBASE - SGPR-pair which provides base address
 744  * Operand(1): Offset - immediate (un)signed offset or SGPR
 745  * Operand(2) / Definition(0): SDATA - SGPR for read / write result
 746  * Operand(n-1): SOffset - SGPR offset (Vega only)
 747  *
 748  * Having no operands is also valid for instructions such as s_dcache_inv.
 749  *
 750  */
 751 struct SMEM_instruction : public Instruction {
 752    bool glc : 1; /* VI+: globally coherent */
 753    bool dlc : 1; /* NAVI: device level coherent */
 754    bool nv : 1; /* VEGA only: Non-volatile */
 755    bool can_reorder : 1;
 756    bool disable_wqm : 1;
 757    barrier_interaction barrier;
 758 };
 759
 760 struct VOP1_instruction : public Instruction {
 761 };
 762
 763 struct VOP2_instruction : public Instruction {
 764 };
 765
 766 struct VOPC_instruction : public Instruction {
 767 };
 768
 769 struct VOP3A_instruction : public Instruction {
 770    bool abs[3];
 771    bool neg[3];
 772    uint8_t opsel : 4;
 773    uint8_t omod : 2;
 774    bool clamp : 1;
 775 };
 776
 777 /**
 778  * Data Parallel Primitives Format:
 779  * This format can be used for VOP1, VOP2 or VOPC instructions.
 780  * The swizzle applies to the src0 operand.
 781  *
 782  */
 783 struct DPP_instruction : public Instruction {
 784    bool abs[2];
 785    bool neg[2];
 786    uint16_t dpp_ctrl;
 787    uint8_t row_mask : 4;
 788    uint8_t bank_mask : 4;
 789    bool bound_ctrl : 1;
 790 };
 791
 792 struct Interp_instruction : public Instruction {
 793    uint8_t attribute;
 794    uint8_t component;
 795 };
 796
 797 /**
 798  * Local and Global Data Sharing instructions
 799  * Operand(0): ADDR - VGPR which supplies the address.
 800  * Operand(1): DATA0 - First data VGPR.
 801  * Operand(2): DATA1 - Second data VGPR.
 802  * Operand(n-1): M0 - LDS size.
 803  * Definition(0): VDST - Destination VGPR when results returned to VGPRs.
 804  *
 805  */
 806 struct DS_instruction : public Instruction {
 807    int16_t offset0;
 808    int8_t offset1;
 809    bool gds;
 810 };
 811
 812 /**
 813  * Vector Memory Untyped-buffer Instructions
 814  * Operand(0): SRSRC - Specifies which SGPR supplies T# (resource constant)
 815  * Operand(1): VADDR - Address source. Can carry an index and/or offset
 816  * Operand(2): SOFFSET - SGPR to supply unsigned byte offset. (SGPR, M0, or inline constant)
 817  * Operand(3) / Definition(0): VDATA - Vector GPR for write result / read data
 818  *
 819  */
 820 struct MUBUF_instruction : public Instruction {
 821    uint16_t offset : 12; /* Unsigned byte offset - 12 bit */
 822    bool offen : 1; /* Supply an offset from VGPR (VADDR) */
 823    bool idxen : 1; /* Supply an index from VGPR (VADDR) */
 824    bool addr64 : 1; /* SI, CIK: Address size is 64-bit */
 825    bool glc : 1; /* globally coherent */
 826    bool dlc : 1; /* NAVI: device level coherent */
 827    bool slc : 1; /* system level coherent */
 828    bool tfe : 1; /* texture fail enable */
 829    bool lds : 1; /* Return read-data to LDS instead of VGPRs */
 830    bool disable_wqm : 1; /* Require an exec mask without helper invocations */
 831    bool can_reorder : 1;
 832    barrier_interaction barrier;
 833 };
 834
 835 /**
 836  * Vector Memory Typed-buffer Instructions
 837  * Operand(0): SRSRC - Specifies which SGPR supplies T# (resource constant)
 838  * Operand(1): VADDR - Address source. Can carry an index and/or offset
 839  * Operand(2): SOFFSET - SGPR to supply unsigned byte offset. (SGPR, M0, or inline constant)
 840  * Operand(3) / Definition(0): VDATA - Vector GPR for write result / read data
 841  *
 842  */
 843 struct MTBUF_instruction : public Instruction {
 844    uint16_t offset; /* Unsigned byte offset - 12 bit */
 845    uint8_t dfmt : 4; /* Data Format of data in memory buffer */
 846    uint8_t nfmt : 3; /* Numeric format of data in memory */
 847    bool offen : 1; /* Supply an offset from VGPR (VADDR) */
 848    bool idxen : 1; /* Supply an index from VGPR (VADDR) */
 849    bool glc : 1; /* globally coherent */
 850    bool dlc : 1; /* NAVI: device level coherent */
 851    bool slc : 1; /* system level coherent */
 852    bool tfe : 1; /* texture fail enable */
 853    bool disable_wqm : 1; /* Require an exec mask without helper invocations */
 854    bool can_reorder : 1;
 855    barrier_interaction barrier;
 856 };
 857
 858 /**
 859  * Vector Memory Image Instructions
 860  * Operand(0) SRSRC - Scalar GPR that specifies the resource constant.
 861  * Operand(1): SSAMP - Scalar GPR that specifies sampler constant.
 862  *             or VDATA - Vector GPR for write data.
 863  * Operand(2): VADDR - Address source. Can carry an offset or an index.
 864  * Definition(0): VDATA - Vector GPR for read result.
 865  *
 866  */
 867 struct MIMG_instruction : public Instruction {
 868    uint8_t dmask; /* Data VGPR enable mask */
 869    uint8_t dim : 3; /* NAVI: dimensionality */
 870    bool unrm : 1; /* Force address to be un-normalized */
 871    bool dlc : 1; /* NAVI: device level coherent */
 872    bool glc : 1; /* globally coherent */
 873    bool slc : 1; /* system level coherent */
 874    bool tfe : 1; /* texture fail enable */
 875    bool da : 1; /* declare an array */
 876    bool lwe : 1; /* Force data to be un-normalized */
 877    bool r128 : 1; /* NAVI: Texture resource size */
 878    bool a16 : 1; /* VEGA, NAVI: Address components are 16-bits */
 879    bool d16 : 1; /* Convert 32-bit data to 16-bit data */
 880    bool disable_wqm : 1; /* Require an exec mask without helper invocations */
 881    bool can_reorder : 1;
 882    barrier_interaction barrier;
 883 };
 884
 885 /**
 886  * Flat/Scratch/Global Instructions
 887  * Operand(0): ADDR
 888  * Operand(1): SADDR
 889  * Operand(2) / Definition(0): DATA/VDST
 890  *
 891  */
 892 struct FLAT_instruction : public Instruction {
 893    uint16_t offset; /* Vega/Navi only */
 894    bool slc : 1; /* system level coherent */
 895    bool glc : 1; /* globally coherent */
 896    bool dlc : 1; /* NAVI: device level coherent */
 897    bool lds : 1;
 898    bool nv : 1;
 899    bool disable_wqm : 1; /* Require an exec mask without helper invocations */
 900    bool can_reorder : 1;
 901    barrier_interaction barrier;
 902 };
 903
 904 struct Export_instruction : public Instruction {
 905    uint8_t enabled_mask;
 906    uint8_t dest;
 907    bool compressed : 1;
 908    bool done : 1;
 909    bool valid_mask : 1;
 910 };
 911
 912 struct Pseudo_instruction : public Instruction {
 913    bool tmp_in_scc;
 914    PhysReg scratch_sgpr; /* might not be valid if it's not needed */
 915 };
 916
 917 struct Pseudo_branch_instruction : public Instruction {
 918    /* target[0] is the block index of the branch target.
 919     * For conditional branches, target[1] contains the fall-through alternative.
 920     * A value of 0 means the target has not been initialized (BB0 cannot be a branch target).
 921     */
 922    uint32_t target[2];
 923 };
 924
 925 struct Pseudo_barrier_instruction : public Instruction {
 926 };
 927
 928 enum ReduceOp {
 929    iadd32, iadd64,
 930    imul32, imul64,
 931    fadd32, fadd64,
 932    fmul32, fmul64,
 933    imin32, imin64,
 934    imax32, imax64,
 935    umin32, umin64,
 936    umax32, umax64,
 937    fmin32, fmin64,
 938    fmax32, fmax64,
 939    iand32, iand64,
 940    ior32, ior64,
 941    ixor32, ixor64,
 942    gfx10_wave64_bpermute
 943 };
 944
 945 /**
 946  * Subgroup Reduction Instructions, everything except for the data to be
 947  * reduced and the result as inserted by setup_reduce_temp().
 948  * Operand(0): data to be reduced
 949  * Operand(1): reduce temporary
 950  * Operand(2): vector temporary
 951  * Definition(0): result
 952  * Definition(1): scalar temporary
 953  * Definition(2): scalar identity temporary (not used to store identity on GFX10)
 954  * Definition(3): scc clobber
 955  * Definition(4): vcc clobber
 956  *
 957  */
 958 struct Pseudo_reduction_instruction : public Instruction {
 959    ReduceOp reduce_op;
 960    unsigned cluster_size; // must be 0 for scans
 961 };
 962
 963 struct instr_deleter_functor {
 964    void operator()(void* p) {
 965       free(p);
 966    }
 967 };
 968
 969 template<typename T>
 970 using aco_ptr = std::unique_ptr<T, instr_deleter_functor>;
 971
 972 template<typename T>
 973 T* create_instruction(aco_opcode opcode, Format format, uint32_t num_operands, uint32_t num_definitions)
 974 {
 975    std::size_t size = sizeof(T) + num_operands * sizeof(Operand) + num_definitions * sizeof(Definition);
 976    char *data = (char*) calloc(1, size);
 977    T* inst = (T*) data;
 978
 979    inst->opcode = opcode;
 980    inst->format = format;
 981
 982    uint16_t operands_offset = data + sizeof(T) - (char*)&inst->operands;
 983    inst->operands = aco::span<Operand>(operands_offset, num_operands);
 984    uint16_t definitions_offset = (char*)inst->operands.end() - (char*)&inst->definitions;
 985    inst->definitions = aco::span<Definition>(definitions_offset, num_definitions);
 986
 987    return inst;
 988 }
 989
 990 constexpr bool Instruction::usesModifiers() const noexcept
 991 {
 992    if (isDPP() || isSDWA())
 993       return true;
 994    if (!isVOP3())
 995       return false;
 996    const VOP3A_instruction *vop3 = static_cast<const VOP3A_instruction*>(this);
 997    for (unsigned i = 0; i < operands.size(); i++) {
 998       if (vop3->abs[i] || vop3->neg[i])
 999          return true;
1000    }
1001    return vop3->opsel || vop3->clamp || vop3->omod;
1002 }
1003
1004 constexpr bool is_phi(Instruction* instr)
1005 {
1006    return instr->opcode == aco_opcode::p_phi || instr->opcode == aco_opcode::p_linear_phi;
1007 }
1008
1009 static inline bool is_phi(aco_ptr<Instruction>& instr)
1010 {
1011    return is_phi(instr.get());
1012 }
1013
1014 barrier_interaction get_barrier_interaction(Instruction* instr);
1015
1016 bool is_dead(const std::vector<uint16_t>& uses, Instruction *instr);
1017
1018 enum block_kind {
1019    /* uniform indicates that leaving this block,
1020     * all actives lanes stay active */
1021    block_kind_uniform = 1 << 0,
1022    block_kind_top_level = 1 << 1,
1023    block_kind_loop_preheader = 1 << 2,
1024    block_kind_loop_header = 1 << 3,
1025    block_kind_loop_exit = 1 << 4,
1026    block_kind_continue = 1 << 5,
1027    block_kind_break = 1 << 6,
1028    block_kind_continue_or_break = 1 << 7,
1029    block_kind_discard = 1 << 8,
1030    block_kind_branch = 1 << 9,
1031    block_kind_merge = 1 << 10,
1032    block_kind_invert = 1 << 11,
1033    block_kind_uses_discard_if = 1 << 12,
1034    block_kind_needs_lowering = 1 << 13,
1035    block_kind_uses_demote = 1 << 14,
1036    block_kind_export_end = 1 << 15,
1037 };
1038
1039
1040 struct RegisterDemand {
1041    constexpr RegisterDemand() = default;
1042    constexpr RegisterDemand(const int16_t v, const int16_t s) noexcept
1043       : vgpr{v}, sgpr{s} {}
1044    int16_t vgpr = 0;
1045    int16_t sgpr = 0;
1046
1047    constexpr friend bool operator==(const RegisterDemand a, const RegisterDemand b) noexcept {
1048       return a.vgpr == b.vgpr && a.sgpr == b.sgpr;
1049    }
1050
1051    constexpr bool exceeds(const RegisterDemand other) const noexcept {
1052       return vgpr > other.vgpr || sgpr > other.sgpr;
1053    }
1054
1055    constexpr RegisterDemand operator+(const Temp t) const noexcept {
1056       if (t.type() == RegType::sgpr)
1057          return RegisterDemand( vgpr, sgpr + t.size() );
1058       else
1059          return RegisterDemand( vgpr + t.size(), sgpr );
1060    }
1061
1062    constexpr RegisterDemand operator+(const RegisterDemand other) const noexcept {
1063       return RegisterDemand(vgpr + other.vgpr, sgpr + other.sgpr);
1064    }
1065
1066    constexpr RegisterDemand operator-(const RegisterDemand other) const noexcept {
1067       return RegisterDemand(vgpr - other.vgpr, sgpr - other.sgpr);
1068    }
1069
1070    constexpr RegisterDemand& operator+=(const RegisterDemand other) noexcept {
1071       vgpr += other.vgpr;
1072       sgpr += other.sgpr;
1073       return *this;
1074    }
1075
1076    constexpr RegisterDemand& operator-=(const RegisterDemand other) noexcept {
1077       vgpr -= other.vgpr;
1078       sgpr -= other.sgpr;
1079       return *this;
1080    }
1081
1082    constexpr RegisterDemand& operator+=(const Temp t) noexcept {
1083       if (t.type() == RegType::sgpr)
1084          sgpr += t.size();
1085       else
1086          vgpr += t.size();
1087       return *this;
1088    }
1089
1090    constexpr RegisterDemand& operator-=(const Temp t) noexcept {
1091       if (t.type() == RegType::sgpr)
1092          sgpr -= t.size();
1093       else
1094          vgpr -= t.size();
1095       return *this;
1096    }
1097
1098    constexpr void update(const RegisterDemand other) noexcept {
1099       vgpr = std::max(vgpr, other.vgpr);
1100       sgpr = std::max(sgpr, other.sgpr);
1101    }
1102
1103 };
1104
1105 /* CFG */
1106 struct Block {
1107    float_mode fp_mode;
1108    unsigned index;
1109    unsigned offset = 0;
1110    std::vector<aco_ptr<Instruction>> instructions;
1111    std::vector<unsigned> logical_preds;
1112    std::vector<unsigned> linear_preds;
1113    std::vector<unsigned> logical_succs;
1114    std::vector<unsigned> linear_succs;
1115    RegisterDemand register_demand = RegisterDemand();
1116    uint16_t loop_nest_depth = 0;
1117    uint16_t kind = 0;
1118    int logical_idom = -1;
1119    int linear_idom = -1;
1120    Temp live_out_exec = Temp();
1121
1122    /* this information is needed for predecessors to blocks with phis when
1123     * moving out of ssa */
1124    bool scc_live_out = false;
1125    PhysReg scratch_sgpr = PhysReg(); /* only needs to be valid if scc_live_out != false */
1126
1127    Block(unsigned idx) : index(idx) {}
1128    Block() : index(0) {}
1129 };
1130
1131 using Stage = uint16_t;
1132
1133 /* software stages */
1134 static constexpr Stage sw_vs = 1 << 0;
1135 static constexpr Stage sw_gs = 1 << 1;
1136 static constexpr Stage sw_tcs = 1 << 2;
1137 static constexpr Stage sw_tes = 1 << 3;
1138 static constexpr Stage sw_fs = 1 << 4;
1139 static constexpr Stage sw_cs = 1 << 5;
1140 static constexpr Stage sw_gs_copy = 1 << 6;
1141 static constexpr Stage sw_mask = 0x7f;
1142
1143 /* hardware stages (can't be OR'd, just a mask for convenience when testing multiple) */
1144 static constexpr Stage hw_vs = 1 << 7;
1145 static constexpr Stage hw_es = 1 << 8; /* not on GFX9. combined into GS on GFX9 (and GFX10/legacy). */
1146 static constexpr Stage hw_gs = 1 << 9;
1147 static constexpr Stage hw_ls = 1 << 10; /* not on GFX9. combined into HS on GFX9 (and GFX10/legacy). */
1148 static constexpr Stage hw_hs = 1 << 11;
1149 static constexpr Stage hw_fs = 1 << 12;
1150 static constexpr Stage hw_cs = 1 << 13;
1151 static constexpr Stage hw_mask = 0x7f << 7;
1152
1153 /* possible settings of Program::stage */
1154 static constexpr Stage vertex_vs = sw_vs | hw_vs;
1155 static constexpr Stage fragment_fs = sw_fs | hw_fs;
1156 static constexpr Stage compute_cs = sw_cs | hw_cs;
1157 static constexpr Stage tess_eval_vs = sw_tes | hw_vs;
1158 static constexpr Stage gs_copy_vs = sw_gs_copy | hw_vs;
1159 /* GFX10/NGG */
1160 static constexpr Stage ngg_vertex_gs = sw_vs | hw_gs;
1161 static constexpr Stage ngg_vertex_geometry_gs = sw_vs | sw_gs | hw_gs;
1162 static constexpr Stage ngg_tess_eval_geometry_gs = sw_tes | sw_gs | hw_gs;
1163 static constexpr Stage ngg_vertex_tess_control_hs = sw_vs | sw_tcs | hw_hs;
1164 /* GFX9 (and GFX10 if NGG isn't used) */
1165 static constexpr Stage vertex_geometry_gs = sw_vs | sw_gs | hw_gs;
1166 static constexpr Stage vertex_tess_control_hs = sw_vs | sw_tcs | hw_hs;
1167 static constexpr Stage tess_eval_geometry_gs = sw_tes | sw_gs | hw_gs;
1168 /* pre-GFX9 */
1169 static constexpr Stage vertex_ls = sw_vs | hw_ls; /* vertex before tesselation control */
1170 static constexpr Stage vertex_es = sw_vs | hw_es; /* vertex before geometry */
1171 static constexpr Stage tess_control_hs = sw_tcs | hw_hs;
1172 static constexpr Stage tess_eval_es = sw_tes | hw_es; /* tesselation evaluation before geometry */
1173 static constexpr Stage geometry_gs = sw_gs | hw_gs;
1174
1175 class Program final {
1176 public:
1177    float_mode next_fp_mode;
1178    std::vector<Block> blocks;
1179    RegisterDemand max_reg_demand = RegisterDemand();
1180    uint16_t num_waves = 0;
1181    uint16_t max_waves = 0; /* maximum number of waves, regardless of register usage */
1182    ac_shader_config* config;
1183    struct radv_shader_info *info;
1184    enum chip_class chip_class;
1185    enum radeon_family family;
1186    unsigned wave_size;
1187    RegClass lane_mask;
1188    Stage stage; /* Stage */
1189    bool needs_exact = false; /* there exists an instruction with disable_wqm = true */
1190    bool needs_wqm = false; /* there exists a p_wqm instruction */
1191    bool wb_smem_l1_on_end = false;
1192
1193    std::vector<uint8_t> constant_data;
1194    Temp private_segment_buffer;
1195    Temp scratch_offset;
1196
1197    uint16_t min_waves = 0;
1198    uint16_t lds_alloc_granule;
1199    uint32_t lds_limit; /* in bytes */
1200    uint16_t vgpr_limit;
1201    uint16_t sgpr_limit;
1202    uint16_t physical_sgprs;
1203    uint16_t sgpr_alloc_granule; /* minus one. must be power of two */
1204    uint16_t vgpr_alloc_granule; /* minus one. must be power of two */
1205
1206    bool needs_vcc = false;
1207    bool needs_xnack_mask = false;
1208    bool needs_flat_scr = false;
1209
1210    uint32_t allocateId()
1211    {
1212       assert(allocationID <= 16777215);
1213       return allocationID++;
1214    }
1215
1216    uint32_t peekAllocationId()
1217    {
1218       return allocationID;
1219    }
1220
1221    void setAllocationId(uint32_t id)
1222    {
1223       allocationID = id;
1224    }
1225
1226    Block* create_and_insert_block() {
1227       blocks.emplace_back(blocks.size());
1228       blocks.back().fp_mode = next_fp_mode;
1229       return &blocks.back();
1230    }
1231
1232    Block* insert_block(Block&& block) {
1233       block.index = blocks.size();
1234       block.fp_mode = next_fp_mode;
1235       blocks.emplace_back(std::move(block));
1236       return &blocks.back();
1237    }
1238
1239 private:
1240    uint32_t allocationID = 1;
1241 };
1242
1243 struct live {
1244    /* live temps out per block */
1245    std::vector<std::set<Temp>> live_out;
1246    /* register demand (sgpr/vgpr) per instruction per block */
1247    std::vector<std::vector<RegisterDemand>> register_demand;
1248 };
1249
1250 void select_program(Program *program,
1251                     unsigned shader_count,
1252                     struct nir_shader *const *shaders,
1253                     ac_shader_config* config,
1254                     struct radv_shader_args *args);
1255 void select_gs_copy_shader(Program *program, struct nir_shader *gs_shader,
1256                            ac_shader_config* config,
1257                            struct radv_shader_args *args);
1258
1259 void lower_wqm(Program* program, live& live_vars,
1260                const struct radv_nir_compiler_options *options);
1261 void lower_bool_phis(Program* program);
1262 void calc_min_waves(Program* program);
1263 void update_vgpr_sgpr_demand(Program* program, const RegisterDemand new_demand);
1264 live live_var_analysis(Program* program, const struct radv_nir_compiler_options *options);
1265 std::vector<uint16_t> dead_code_analysis(Program *program);
1266 void dominator_tree(Program* program);
1267 void insert_exec_mask(Program *program);
1268 void value_numbering(Program* program);
1269 void optimize(Program* program);
1270 void setup_reduce_temp(Program* program);
1271 void lower_to_cssa(Program* program, live& live_vars, const struct radv_nir_compiler_options *options);
1272 void register_allocation(Program *program, std::vector<std::set<Temp>> live_out_per_block);
1273 void ssa_elimination(Program* program);
1274 void lower_to_hw_instr(Program* program);
1275 void schedule_program(Program* program, live& live_vars);
1276 void spill(Program* program, live& live_vars, const struct radv_nir_compiler_options *options);
1277 void insert_wait_states(Program* program);
1278 void insert_NOPs(Program* program);
1279 unsigned emit_program(Program* program, std::vector<uint32_t>& code);
1280 void print_asm(Program *program, std::vector<uint32_t>& binary,
1281                unsigned exec_size, std::ostream& out);
1282 void validate(Program* program, FILE *output);
1283 bool validate_ra(Program* program, const struct radv_nir_compiler_options *options, FILE *output);
1284 #ifndef NDEBUG
1285 void perfwarn(bool cond, const char *msg, Instruction *instr=NULL);
1286 #else
1287 #define perfwarn(program, cond, msg, ...) do {} while(0)
1288 #endif
1289
1290 void aco_print_instr(Instruction *instr, FILE *output);
1291 void aco_print_program(Program *program, FILE *output);
1292
1293 /* utilities for dealing with register demand */
1294 RegisterDemand get_live_changes(aco_ptr<Instruction>& instr);
1295 RegisterDemand get_temp_registers(aco_ptr<Instruction>& instr);
1296 RegisterDemand get_demand_before(RegisterDemand demand, aco_ptr<Instruction>& instr, aco_ptr<Instruction>& instr_before);
1297
1298 /* number of sgprs that need to be allocated but might notbe addressable as s0-s105 */
1299 uint16_t get_extra_sgprs(Program *program);
1300
1301 /* get number of sgprs/vgprs allocated required to address a number of sgprs/vgprs */
1302 uint16_t get_sgpr_alloc(Program *program, uint16_t addressable_sgprs);
1303 uint16_t get_vgpr_alloc(Program *program, uint16_t addressable_vgprs);
1304
1305 /* return number of addressable sgprs/vgprs for max_waves */
1306 uint16_t get_addr_sgpr_from_waves(Program *program, uint16_t max_waves);
1307 uint16_t get_addr_vgpr_from_waves(Program *program, uint16_t max_waves);
1308
1309 typedef struct {
1310    const int16_t opcode_gfx7[static_cast<int>(aco_opcode::num_opcodes)];
1311    const int16_t opcode_gfx9[static_cast<int>(aco_opcode::num_opcodes)];
1312    const int16_t opcode_gfx10[static_cast<int>(aco_opcode::num_opcodes)];
1313    const std::bitset<static_cast<int>(aco_opcode::num_opcodes)> can_use_input_modifiers;
1314    const std::bitset<static_cast<int>(aco_opcode::num_opcodes)> can_use_output_modifiers;
1315    const std::bitset<static_cast<int>(aco_opcode::num_opcodes)> is_atomic;
1316    const char *name[static_cast<int>(aco_opcode::num_opcodes)];
1317    const aco::Format format[static_cast<int>(aco_opcode::num_opcodes)];
1318 } Info;
1319
1320 extern const Info instr_info;
1321
1322 }
1323
1324 #endif /* ACO_IR_H */
1325