barrier_image = 0x2,
barrier_atomic = 0x4,
barrier_shared = 0x8,
- barrier_count = 4,
+ /* used for geometry shaders to ensure vertex data writes are before the
+ * GS_DONE s_sendmsg. */
+ barrier_gs_data = 0x10,
+ /* used for geometry shaders to ensure s_sendmsg instructions are in-order. */
+ barrier_gs_sendmsg = 0x20,
+ /* used by barriers. created by s_barrier */
+ barrier_barrier = 0x40,
+ barrier_count = 6,
};
enum fp_round {
return (Format) ((uint32_t) Format::VOP3 | (uint32_t) format);
};
+constexpr Format asSDWA(Format format) {
+ assert(format == Format::VOP1 || format == Format::VOP2 || format == Format::VOPC);
+ return (Format) ((uint32_t) Format::SDWA | (uint32_t) format);
+}
+
enum class RegType {
none = 0,
sgpr,
*/
struct PhysReg {
constexpr PhysReg() = default;
- explicit constexpr PhysReg(unsigned r) : reg(r) {}
- constexpr operator unsigned() const { return reg; }
-
- uint16_t reg = 0;
+ explicit constexpr PhysReg(unsigned r) : reg_b(r << 2) {}
+ constexpr unsigned reg() const { return reg_b >> 2; }
+ constexpr unsigned byte() const { return reg_b & 0x3; }
+ constexpr operator unsigned() const { return reg(); }
+ constexpr bool operator==(PhysReg other) const { return reg_b == other.reg_b; }
+ constexpr bool operator!=(PhysReg other) const { return reg_b != other.reg_b; }
+ constexpr bool operator <(PhysReg other) const { return reg_b < other.reg_b; }
+
+ uint16_t reg_b = 0;
};
/* helper expressions for special registers */
static constexpr PhysReg m0{124};
static constexpr PhysReg vcc{106};
+static constexpr PhysReg vcc_hi{107};
static constexpr PhysReg sgpr_null{125}; /* GFX10+ */
static constexpr PhysReg exec{126};
static constexpr PhysReg exec_lo{126};
static constexpr PhysReg exec_hi{127};
+static constexpr PhysReg vccz{251};
+static constexpr PhysReg execz{252};
static constexpr PhysReg scc{253};
/**
public:
constexpr Operand()
: reg_(PhysReg{128}), isTemp_(false), isFixed_(true), isConstant_(false),
- isKill_(false), isUndef_(true), isFirstKill_(false), is64BitConst_(false) {}
+ isKill_(false), isUndef_(true), isFirstKill_(false), is64BitConst_(false),
+ isLateKill_(false) {}
explicit Operand(Temp r) noexcept
{
setFixed(PhysReg{128});
}
};
- explicit Operand(uint32_t v) noexcept
+ explicit Operand(uint32_t v, bool is64bit = false) noexcept
{
data_.i = v;
isConstant_ = true;
+ is64BitConst_ = is64bit;
if (v <= 64)
setFixed(PhysReg{128 + v});
else if (v >= 0xFFFFFFF0) /* [-16 .. -1] */
setFixed(PhysReg{246});
else if (v == 0xc0800000) /* -4.0 */
setFixed(PhysReg{247});
- else /* Literal Constant */
+ else { /* Literal Constant */
+ assert(!is64bit && "attempt to create a 64-bit literal constant");
setFixed(PhysReg{255});
+ }
};
explicit Operand(uint64_t v) noexcept
{
isConstant_ = true;
is64BitConst_ = true;
- if (v <= 64)
+ if (v <= 64) {
+ data_.i = (uint32_t) v;
setFixed(PhysReg{128 + (uint32_t) v});
- else if (v >= 0xFFFFFFFFFFFFFFF0) /* [-16 .. -1] */
+ } else if (v >= 0xFFFFFFFFFFFFFFF0) { /* [-16 .. -1] */
+ data_.i = (uint32_t) v;
setFixed(PhysReg{192 - (uint32_t) v});
- else if (v == 0x3FE0000000000000) /* 0.5 */
+ } else if (v == 0x3FE0000000000000) { /* 0.5 */
+ data_.i = 0x3f000000;
setFixed(PhysReg{240});
- else if (v == 0xBFE0000000000000) /* -0.5 */
+ } else if (v == 0xBFE0000000000000) { /* -0.5 */
+ data_.i = 0xbf000000;
setFixed(PhysReg{241});
- else if (v == 0x3FF0000000000000) /* 1.0 */
+ } else if (v == 0x3FF0000000000000) { /* 1.0 */
+ data_.i = 0x3f800000;
setFixed(PhysReg{242});
- else if (v == 0xBFF0000000000000) /* -1.0 */
+ } else if (v == 0xBFF0000000000000) { /* -1.0 */
+ data_.i = 0xbf800000;
setFixed(PhysReg{243});
- else if (v == 0x4000000000000000) /* 2.0 */
+ } else if (v == 0x4000000000000000) { /* 2.0 */
+ data_.i = 0x40000000;
setFixed(PhysReg{244});
- else if (v == 0xC000000000000000) /* -2.0 */
+ } else if (v == 0xC000000000000000) { /* -2.0 */
+ data_.i = 0xc0000000;
setFixed(PhysReg{245});
- else if (v == 0x4010000000000000) /* 4.0 */
+ } else if (v == 0x4010000000000000) { /* 4.0 */
+ data_.i = 0x40800000;
setFixed(PhysReg{246});
- else if (v == 0xC010000000000000) /* -4.0 */
+ } else if (v == 0xC010000000000000) { /* -4.0 */
+ data_.i = 0xc0800000;
setFixed(PhysReg{247});
- else { /* Literal Constant: we don't know if it is a long or double.*/
+ } else { /* Literal Constant: we don't know if it is a long or double.*/
isConstant_ = 0;
assert(false && "attempt to create a 64-bit literal constant");
}
return isConstant() && constantValue() == cmp;
}
+ constexpr uint64_t constantValue64(bool signext=false) const noexcept
+ {
+ if (is64BitConst_) {
+ if (reg_ <= 192)
+ return reg_ - 128;
+ else if (reg_ <= 208)
+ return 0xFFFFFFFFFFFFFFFF - (reg_ - 193);
+
+ switch (reg_) {
+ case 240:
+ return 0x3FE0000000000000;
+ case 241:
+ return 0xBFE0000000000000;
+ case 242:
+ return 0x3FF0000000000000;
+ case 243:
+ return 0xBFF0000000000000;
+ case 244:
+ return 0x4000000000000000;
+ case 245:
+ return 0xC000000000000000;
+ case 246:
+ return 0x4010000000000000;
+ case 247:
+ return 0xC010000000000000;
+ }
+ }
+ return (signext && (data_.i & 0x80000000u) ? 0xffffffff00000000ull : 0ull) | data_.i;
+ }
+
+ /* Indicates that the killed operand's live range intersects with the
+ * instruction's definitions. Unlike isKill() and isFirstKill(), this is
+ * not set by liveness analysis. */
+ constexpr void setLateKill(bool flag) noexcept
+ {
+ isLateKill_ = flag;
+ }
+
+ constexpr bool isLateKill() const noexcept
+ {
+ return isLateKill_;
+ }
+
constexpr void setKill(bool flag) noexcept
{
isKill_ = flag;
return isFirstKill_;
}
+ constexpr bool isKillBeforeDef() const noexcept
+ {
+ return isKill() && !isLateKill();
+ }
+
+ constexpr bool isFirstKillBeforeDef() const noexcept
+ {
+ return isFirstKill() && !isLateKill();
+ }
+
+ constexpr bool operator == (Operand other) const noexcept
+ {
+ if (other.size() != size())
+ return false;
+ if (isFixed() != other.isFixed() || isKillBeforeDef() != other.isKillBeforeDef())
+ return false;
+ if (isFixed() && other.isFixed() && physReg() != other.physReg())
+ return false;
+ if (isLiteral())
+ return other.isLiteral() && other.constantValue() == constantValue();
+ else if (isConstant())
+ return other.isConstant() && other.physReg() == physReg();
+ else if (isUndefined())
+ return other.isUndefined() && other.regClass() == regClass();
+ else
+ return other.isTemp() && other.getTemp() == getTemp();
+ }
private:
union {
uint32_t i;
uint8_t isUndef_:1;
uint8_t isFirstKill_:1;
uint8_t is64BitConst_:1;
+ uint8_t isLateKill_:1;
};
/* can't initialize bit-fields in c++11, so work around using a union */
uint8_t control_ = 0;
bool bound_ctrl : 1;
};
+enum sdwa_sel : uint8_t {
+ /* masks */
+ sdwa_wordnum = 0x1,
+ sdwa_bytenum = 0x3,
+ sdwa_asuint = 0x7,
+
+ /* flags */
+ sdwa_isword = 0x4,
+ sdwa_sext = 0x8,
+
+ /* specific values */
+ sdwa_ubyte0 = 0,
+ sdwa_ubyte1 = 1,
+ sdwa_ubyte2 = 2,
+ sdwa_ubyte3 = 3,
+ sdwa_uword0 = sdwa_isword | 0,
+ sdwa_uword1 = sdwa_isword | 1,
+ sdwa_udword = 6,
+
+ sdwa_sbyte0 = sdwa_ubyte0 | sdwa_sext,
+ sdwa_sbyte1 = sdwa_ubyte1 | sdwa_sext,
+ sdwa_sbyte2 = sdwa_ubyte2 | sdwa_sext,
+ sdwa_sbyte3 = sdwa_ubyte3 | sdwa_sext,
+ sdwa_sword0 = sdwa_uword0 | sdwa_sext,
+ sdwa_sword1 = sdwa_uword1 | sdwa_sext,
+ sdwa_sdword = sdwa_udword | sdwa_sext,
+};
+
+/**
+ * Sub-Dword Addressing Format:
+ * This format can be used for VOP1, VOP2 or VOPC instructions.
+ *
+ * omod and SGPR/constant operands are only available on GFX9+. For VOPC,
+ * the definition doesn't have to be VCC on GFX9+.
+ *
+ */
+struct SDWA_instruction : public Instruction {
+ /* these destination modifiers aren't available with VOPC except for
+ * clamp on GFX8 */
+ unsigned dst_sel:4;
+ bool dst_preserve:1;
+ bool clamp:1;
+ unsigned omod:2; /* GFX9+ */
+
+ unsigned sel[2];
+ bool neg[2];
+ bool abs[2];
+};
+
struct Interp_instruction : public Instruction {
uint8_t attribute;
uint8_t component;
/**
* Vector Memory Untyped-buffer Instructions
- * Operand(0): VADDR - Address source. Can carry an index and/or offset
- * Operand(1): SRSRC - Specifies which SGPR supplies T# (resource constant)
+ * Operand(0): SRSRC - Specifies which SGPR supplies T# (resource constant)
+ * Operand(1): VADDR - Address source. Can carry an index and/or offset
* Operand(2): SOFFSET - SGPR to supply unsigned byte offset. (SGPR, M0, or inline constant)
* Operand(3) / Definition(0): VDATA - Vector GPR for write result / read data
*
uint16_t offset : 12; /* Unsigned byte offset - 12 bit */
bool offen : 1; /* Supply an offset from VGPR (VADDR) */
bool idxen : 1; /* Supply an index from VGPR (VADDR) */
+ bool addr64 : 1; /* SI, CIK: Address size is 64-bit */
bool glc : 1; /* globally coherent */
bool dlc : 1; /* NAVI: device level coherent */
bool slc : 1; /* system level coherent */
/**
* Vector Memory Typed-buffer Instructions
- * Operand(0): VADDR - Address source. Can carry an index and/or offset
- * Operand(1): SRSRC - Specifies which SGPR supplies T# (resource constant)
+ * Operand(0): SRSRC - Specifies which SGPR supplies T# (resource constant)
+ * Operand(1): VADDR - Address source. Can carry an index and/or offset
* Operand(2): SOFFSET - SGPR to supply unsigned byte offset. (SGPR, M0, or inline constant)
* Operand(3) / Definition(0): VDATA - Vector GPR for write result / read data
*
/**
* Vector Memory Image Instructions
- * Operand(0): VADDR - Address source. Can carry an offset or an index.
- * Operand(1): SRSRC - Scalar GPR that specifies the resource constant.
- * Operand(2): SSAMP - Scalar GPR that specifies sampler constant.
- * Operand(3) / Definition(0): VDATA - Vector GPR for read / write result.
+ * Operand(0) SRSRC - Scalar GPR that specifies the resource constant.
+ * Operand(1): SSAMP - Scalar GPR that specifies sampler constant.
+ * or VDATA - Vector GPR for write data.
+ * Operand(2): VADDR - Address source. Can carry an offset or an index.
+ * Definition(0): VDATA - Vector GPR for read result.
*
*/
struct MIMG_instruction : public Instruction {
return is_phi(instr.get());
}
-constexpr barrier_interaction get_barrier_interaction(Instruction* instr)
-{
- switch (instr->format) {
- case Format::SMEM:
- return static_cast<SMEM_instruction*>(instr)->barrier;
- case Format::MUBUF:
- return static_cast<MUBUF_instruction*>(instr)->barrier;
- case Format::MIMG:
- return static_cast<MIMG_instruction*>(instr)->barrier;
- case Format::FLAT:
- case Format::GLOBAL:
- case Format::SCRATCH:
- return static_cast<FLAT_instruction*>(instr)->barrier;
- case Format::DS:
- return barrier_shared;
- default:
- return barrier_none;
- }
-}
+barrier_interaction get_barrier_interaction(Instruction* instr);
+
+bool is_dead(const std::vector<uint16_t>& uses, Instruction *instr);
enum block_kind {
/* uniform indicates that leaving this block,
block_kind_uses_discard_if = 1 << 12,
block_kind_needs_lowering = 1 << 13,
block_kind_uses_demote = 1 << 14,
+ block_kind_export_end = 1 << 15,
};
static constexpr Stage sw_tes = 1 << 3;
static constexpr Stage sw_fs = 1 << 4;
static constexpr Stage sw_cs = 1 << 5;
-static constexpr Stage sw_mask = 0x3f;
+static constexpr Stage sw_gs_copy = 1 << 6;
+static constexpr Stage sw_mask = 0x7f;
/* hardware stages (can't be OR'd, just a mask for convenience when testing multiple) */
-static constexpr Stage hw_vs = 1 << 6;
-static constexpr Stage hw_es = 1 << 7; /* not on GFX9. combined into GS on GFX9 (and GFX10/legacy). */
-static constexpr Stage hw_gs = 1 << 8;
-static constexpr Stage hw_ls = 1 << 9; /* not on GFX9. combined into HS on GFX9 (and GFX10/legacy). */
-static constexpr Stage hw_hs = 1 << 10;
-static constexpr Stage hw_fs = 1 << 11;
-static constexpr Stage hw_cs = 1 << 12;
-static constexpr Stage hw_mask = 0x7f << 6;
+static constexpr Stage hw_vs = 1 << 7;
+static constexpr Stage hw_es = 1 << 8; /* not on GFX9. combined into GS on GFX9 (and GFX10/legacy). */
+static constexpr Stage hw_gs = 1 << 9;
+static constexpr Stage hw_ls = 1 << 10; /* not on GFX9. combined into HS on GFX9 (and GFX10/legacy). */
+static constexpr Stage hw_hs = 1 << 11;
+static constexpr Stage hw_fs = 1 << 12;
+static constexpr Stage hw_cs = 1 << 13;
+static constexpr Stage hw_mask = 0x7f << 7;
/* possible settings of Program::stage */
static constexpr Stage vertex_vs = sw_vs | hw_vs;
static constexpr Stage fragment_fs = sw_fs | hw_fs;
static constexpr Stage compute_cs = sw_cs | hw_cs;
static constexpr Stage tess_eval_vs = sw_tes | hw_vs;
+static constexpr Stage gs_copy_vs = sw_gs_copy | hw_vs;
/* GFX10/NGG */
static constexpr Stage ngg_vertex_gs = sw_vs | hw_gs;
static constexpr Stage ngg_vertex_geometry_gs = sw_vs | sw_gs | hw_gs;
static constexpr Stage vertex_ls = sw_vs | hw_ls; /* vertex before tesselation control */
static constexpr Stage vertex_es = sw_vs | hw_es; /* vertex before geometry */
static constexpr Stage tess_control_hs = sw_tcs | hw_hs;
-static constexpr Stage tess_eval_es = sw_tes | hw_gs; /* tesselation evaluation before geometry */
+static constexpr Stage tess_eval_es = sw_tes | hw_es; /* tesselation evaluation before geometry */
static constexpr Stage geometry_gs = sw_gs | hw_gs;
+enum statistic {
+ statistic_hash,
+ statistic_instructions,
+ statistic_copies,
+ statistic_branches,
+ statistic_cycles,
+ statistic_vmem_clauses,
+ statistic_smem_clauses,
+ statistic_vmem_score,
+ statistic_smem_score,
+ statistic_sgpr_presched,
+ statistic_vgpr_presched,
+ num_statistics
+};
+
class Program final {
public:
float_mode next_fp_mode;
uint16_t min_waves = 0;
uint16_t lds_alloc_granule;
uint32_t lds_limit; /* in bytes */
+ bool has_16bank_lds;
uint16_t vgpr_limit;
uint16_t sgpr_limit;
uint16_t physical_sgprs;
uint16_t sgpr_alloc_granule; /* minus one. must be power of two */
uint16_t vgpr_alloc_granule; /* minus one. must be power of two */
+ unsigned workgroup_size; /* if known; otherwise UINT_MAX */
+
+ bool xnack_enabled = false;
bool needs_vcc = false;
- bool needs_xnack_mask = false;
bool needs_flat_scr = false;
+ bool collect_statistics = false;
+ uint32_t statistics[num_statistics];
+
uint32_t allocateId()
{
assert(allocationID <= 16777215);
struct nir_shader *const *shaders,
ac_shader_config* config,
struct radv_shader_args *args);
+void select_gs_copy_shader(Program *program, struct nir_shader *gs_shader,
+ ac_shader_config* config,
+ struct radv_shader_args *args);
void lower_wqm(Program* program, live& live_vars,
const struct radv_nir_compiler_options *options);
#define perfwarn(program, cond, msg, ...) do {} while(0)
#endif
+void collect_presched_stats(Program *program);
+void collect_preasm_stats(Program *program);
+void collect_postasm_stats(Program *program, const std::vector<uint32_t>& code);
+
void aco_print_instr(Instruction *instr, FILE *output);
void aco_print_program(Program *program, FILE *output);
+/* utilities for dealing with register demand */
+RegisterDemand get_live_changes(aco_ptr<Instruction>& instr);
+RegisterDemand get_temp_registers(aco_ptr<Instruction>& instr);
+RegisterDemand get_demand_before(RegisterDemand demand, aco_ptr<Instruction>& instr, aco_ptr<Instruction>& instr_before);
+
/* number of sgprs that need to be allocated but might notbe addressable as s0-s105 */
uint16_t get_extra_sgprs(Program *program);
const int16_t opcode_gfx10[static_cast<int>(aco_opcode::num_opcodes)];
const std::bitset<static_cast<int>(aco_opcode::num_opcodes)> can_use_input_modifiers;
const std::bitset<static_cast<int>(aco_opcode::num_opcodes)> can_use_output_modifiers;
+ const std::bitset<static_cast<int>(aco_opcode::num_opcodes)> is_atomic;
const char *name[static_cast<int>(aco_opcode::num_opcodes)];
const aco::Format format[static_cast<int>(aco_opcode::num_opcodes)];
} Info;