barrier_gs_data = 0x10,
/* used for geometry shaders to ensure s_sendmsg instructions are in-order. */
barrier_gs_sendmsg = 0x20,
+ /* used by barriers. created by s_barrier */
+ barrier_barrier = 0x40,
barrier_count = 6,
};
return (Format) ((uint32_t) Format::VOP3 | (uint32_t) format);
};
+constexpr Format asSDWA(Format format) {
+ assert(format == Format::VOP1 || format == Format::VOP2 || format == Format::VOPC);
+ return (Format) ((uint32_t) Format::SDWA | (uint32_t) format);
+}
+
enum class RegType {
none = 0,
sgpr,
*/
struct PhysReg {
constexpr PhysReg() = default;
- explicit constexpr PhysReg(unsigned r) : reg(r) {}
- constexpr operator unsigned() const { return reg; }
-
- uint16_t reg = 0;
+ explicit constexpr PhysReg(unsigned r) : reg_b(r << 2) {}
+ constexpr unsigned reg() const { return reg_b >> 2; }
+ constexpr unsigned byte() const { return reg_b & 0x3; }
+ constexpr operator unsigned() const { return reg(); }
+ constexpr bool operator==(PhysReg other) const { return reg_b == other.reg_b; }
+ constexpr bool operator!=(PhysReg other) const { return reg_b != other.reg_b; }
+ constexpr bool operator <(PhysReg other) const { return reg_b < other.reg_b; }
+
+ uint16_t reg_b = 0;
};
/* helper expressions for special registers */
public:
constexpr Operand()
: reg_(PhysReg{128}), isTemp_(false), isFixed_(true), isConstant_(false),
- isKill_(false), isUndef_(true), isFirstKill_(false), is64BitConst_(false) {}
+ isKill_(false), isUndef_(true), isFirstKill_(false), is64BitConst_(false),
+ isLateKill_(false) {}
explicit Operand(Temp r) noexcept
{
return isConstant() && constantValue() == cmp;
}
+ constexpr uint64_t constantValue64(bool signext=false) const noexcept
+ {
+ if (is64BitConst_) {
+ if (reg_ <= 192)
+ return reg_ - 128;
+ else if (reg_ <= 208)
+ return 0xFFFFFFFFFFFFFFFF - (reg_ - 193);
+
+ switch (reg_) {
+ case 240:
+ return 0x3FE0000000000000;
+ case 241:
+ return 0xBFE0000000000000;
+ case 242:
+ return 0x3FF0000000000000;
+ case 243:
+ return 0xBFF0000000000000;
+ case 244:
+ return 0x4000000000000000;
+ case 245:
+ return 0xC000000000000000;
+ case 246:
+ return 0x4010000000000000;
+ case 247:
+ return 0xC010000000000000;
+ }
+ }
+ return (signext && (data_.i & 0x80000000u) ? 0xffffffff00000000ull : 0ull) | data_.i;
+ }
+
+ /* Indicates that the killed operand's live range intersects with the
+ * instruction's definitions. Unlike isKill() and isFirstKill(), this is
+ * not set by liveness analysis. */
+ constexpr void setLateKill(bool flag) noexcept
+ {
+ isLateKill_ = flag;
+ }
+
+ constexpr bool isLateKill() const noexcept
+ {
+ return isLateKill_;
+ }
+
constexpr void setKill(bool flag) noexcept
{
isKill_ = flag;
return isFirstKill_;
}
+ constexpr bool isKillBeforeDef() const noexcept
+ {
+ return isKill() && !isLateKill();
+ }
+
+ constexpr bool isFirstKillBeforeDef() const noexcept
+ {
+ return isFirstKill() && !isLateKill();
+ }
+
+ constexpr bool operator == (Operand other) const noexcept
+ {
+ if (other.size() != size())
+ return false;
+ if (isFixed() != other.isFixed() || isKillBeforeDef() != other.isKillBeforeDef())
+ return false;
+ if (isFixed() && other.isFixed() && physReg() != other.physReg())
+ return false;
+ if (isLiteral())
+ return other.isLiteral() && other.constantValue() == constantValue();
+ else if (isConstant())
+ return other.isConstant() && other.physReg() == physReg();
+ else if (isUndefined())
+ return other.isUndefined() && other.regClass() == regClass();
+ else
+ return other.isTemp() && other.getTemp() == getTemp();
+ }
private:
union {
uint32_t i;
uint8_t isUndef_:1;
uint8_t isFirstKill_:1;
uint8_t is64BitConst_:1;
+ uint8_t isLateKill_:1;
};
/* can't initialize bit-fields in c++11, so work around using a union */
uint8_t control_ = 0;
bool bound_ctrl : 1;
};
+enum sdwa_sel : uint8_t {
+ /* masks */
+ sdwa_wordnum = 0x1,
+ sdwa_bytenum = 0x3,
+ sdwa_asuint = 0x7,
+
+ /* flags */
+ sdwa_isword = 0x4,
+ sdwa_sext = 0x8,
+
+ /* specific values */
+ sdwa_ubyte0 = 0,
+ sdwa_ubyte1 = 1,
+ sdwa_ubyte2 = 2,
+ sdwa_ubyte3 = 3,
+ sdwa_uword0 = sdwa_isword | 0,
+ sdwa_uword1 = sdwa_isword | 1,
+ sdwa_udword = 6,
+
+ sdwa_sbyte0 = sdwa_ubyte0 | sdwa_sext,
+ sdwa_sbyte1 = sdwa_ubyte1 | sdwa_sext,
+ sdwa_sbyte2 = sdwa_ubyte2 | sdwa_sext,
+ sdwa_sbyte3 = sdwa_ubyte3 | sdwa_sext,
+ sdwa_sword0 = sdwa_uword0 | sdwa_sext,
+ sdwa_sword1 = sdwa_uword1 | sdwa_sext,
+ sdwa_sdword = sdwa_udword | sdwa_sext,
+};
+
+/**
+ * Sub-Dword Addressing Format:
+ * This format can be used for VOP1, VOP2 or VOPC instructions.
+ *
+ * omod and SGPR/constant operands are only available on GFX9+. For VOPC,
+ * the definition doesn't have to be VCC on GFX9+.
+ *
+ */
+struct SDWA_instruction : public Instruction {
+ /* these destination modifiers aren't available with VOPC except for
+ * clamp on GFX8 */
+ unsigned dst_sel:4;
+ bool dst_preserve:1;
+ bool clamp:1;
+ unsigned omod:2; /* GFX9+ */
+
+ unsigned sel[2];
+ bool neg[2];
+ bool abs[2];
+};
+
struct Interp_instruction : public Instruction {
uint8_t attribute;
uint8_t component;
static constexpr Stage vertex_ls = sw_vs | hw_ls; /* vertex before tesselation control */
static constexpr Stage vertex_es = sw_vs | hw_es; /* vertex before geometry */
static constexpr Stage tess_control_hs = sw_tcs | hw_hs;
-static constexpr Stage tess_eval_es = sw_tes | hw_gs; /* tesselation evaluation before geometry */
+static constexpr Stage tess_eval_es = sw_tes | hw_es; /* tesselation evaluation before geometry */
static constexpr Stage geometry_gs = sw_gs | hw_gs;
+enum statistic {
+ statistic_hash,
+ statistic_instructions,
+ statistic_copies,
+ statistic_branches,
+ statistic_cycles,
+ statistic_vmem_clauses,
+ statistic_smem_clauses,
+ statistic_vmem_score,
+ statistic_smem_score,
+ statistic_sgpr_presched,
+ statistic_vgpr_presched,
+ num_statistics
+};
+
class Program final {
public:
float_mode next_fp_mode;
uint16_t min_waves = 0;
uint16_t lds_alloc_granule;
uint32_t lds_limit; /* in bytes */
+ bool has_16bank_lds;
uint16_t vgpr_limit;
uint16_t sgpr_limit;
uint16_t physical_sgprs;
uint16_t sgpr_alloc_granule; /* minus one. must be power of two */
uint16_t vgpr_alloc_granule; /* minus one. must be power of two */
+ unsigned workgroup_size; /* if known; otherwise UINT_MAX */
+
+ bool xnack_enabled = false;
bool needs_vcc = false;
- bool needs_xnack_mask = false;
bool needs_flat_scr = false;
+ bool collect_statistics = false;
+ uint32_t statistics[num_statistics];
+
uint32_t allocateId()
{
assert(allocationID <= 16777215);
#define perfwarn(program, cond, msg, ...) do {} while(0)
#endif
+void collect_presched_stats(Program *program);
+void collect_preasm_stats(Program *program);
+void collect_postasm_stats(Program *program, const std::vector<uint32_t>& code);
+
void aco_print_instr(Instruction *instr, FILE *output);
void aco_print_program(Program *program, FILE *output);
+/* utilities for dealing with register demand */
+RegisterDemand get_live_changes(aco_ptr<Instruction>& instr);
+RegisterDemand get_temp_registers(aco_ptr<Instruction>& instr);
+RegisterDemand get_demand_before(RegisterDemand demand, aco_ptr<Instruction>& instr, aco_ptr<Instruction>& instr_before);
+
/* number of sgprs that need to be allocated but might notbe addressable as s0-s105 */
uint16_t get_extra_sgprs(Program *program);