SDWA = 1 << 15,
};
-enum barrier_interaction {
+enum barrier_interaction : uint8_t {
barrier_none = 0,
barrier_buffer = 0x1,
barrier_image = 0x2,
barrier_atomic = 0x4,
barrier_shared = 0x8,
- barrier_count = 4,
+ /* used for geometry shaders to ensure vertex data writes are before the
+ * GS_DONE s_sendmsg. */
+ barrier_gs_data = 0x10,
+ /* used for geometry shaders to ensure s_sendmsg instructions are in-order. */
+ barrier_gs_sendmsg = 0x20,
+ /* used by barriers. created by s_barrier */
+ barrier_barrier = 0x40,
+ barrier_count = 6,
};
enum fp_round {
return (Format) ((uint32_t) Format::VOP3 | (uint32_t) format);
};
+constexpr Format asSDWA(Format format) {
+ assert(format == Format::VOP1 || format == Format::VOP2 || format == Format::VOPC);
+ return (Format) ((uint32_t) Format::SDWA | (uint32_t) format);
+}
+
enum class RegType {
none = 0,
sgpr,
*/
struct PhysReg {
constexpr PhysReg() = default;
- explicit constexpr PhysReg(unsigned r) : reg(r) {}
- constexpr operator unsigned() const { return reg; }
-
- uint16_t reg = 0;
+ explicit constexpr PhysReg(unsigned r) : reg_b(r << 2) {}
+ constexpr unsigned reg() const { return reg_b >> 2; }
+ constexpr unsigned byte() const { return reg_b & 0x3; }
+ constexpr operator unsigned() const { return reg(); }
+ constexpr bool operator==(PhysReg other) const { return reg_b == other.reg_b; }
+ constexpr bool operator!=(PhysReg other) const { return reg_b != other.reg_b; }
+ constexpr bool operator <(PhysReg other) const { return reg_b < other.reg_b; }
+
+ uint16_t reg_b = 0;
};
/* helper expressions for special registers */
static constexpr PhysReg m0{124};
static constexpr PhysReg vcc{106};
+static constexpr PhysReg vcc_hi{107};
static constexpr PhysReg sgpr_null{125}; /* GFX10+ */
static constexpr PhysReg exec{126};
static constexpr PhysReg exec_lo{126};
static constexpr PhysReg exec_hi{127};
+static constexpr PhysReg vccz{251};
+static constexpr PhysReg execz{252};
static constexpr PhysReg scc{253};
/**
public:
constexpr Operand()
: reg_(PhysReg{128}), isTemp_(false), isFixed_(true), isConstant_(false),
- isKill_(false), isUndef_(true), isFirstKill_(false), is64BitConst_(false) {}
+ isKill_(false), isUndef_(true), isFirstKill_(false), is64BitConst_(false),
+ isLateKill_(false) {}
explicit Operand(Temp r) noexcept
{
setFixed(PhysReg{128});
}
};
- explicit Operand(uint32_t v) noexcept
+ explicit Operand(uint32_t v, bool is64bit = false) noexcept
{
data_.i = v;
isConstant_ = true;
+ is64BitConst_ = is64bit;
if (v <= 64)
setFixed(PhysReg{128 + v});
else if (v >= 0xFFFFFFF0) /* [-16 .. -1] */
setFixed(PhysReg{246});
else if (v == 0xc0800000) /* -4.0 */
setFixed(PhysReg{247});
- else if (v == 0x3e22f983) /* 1/(2*PI) */
- setFixed(PhysReg{248});
- else /* Literal Constant */
+ else { /* Literal Constant */
+ assert(!is64bit && "attempt to create a 64-bit literal constant");
setFixed(PhysReg{255});
+ }
};
explicit Operand(uint64_t v) noexcept
{
isConstant_ = true;
is64BitConst_ = true;
- if (v <= 64)
+ if (v <= 64) {
+ data_.i = (uint32_t) v;
setFixed(PhysReg{128 + (uint32_t) v});
- else if (v >= 0xFFFFFFFFFFFFFFF0) /* [-16 .. -1] */
+ } else if (v >= 0xFFFFFFFFFFFFFFF0) { /* [-16 .. -1] */
+ data_.i = (uint32_t) v;
setFixed(PhysReg{192 - (uint32_t) v});
- else if (v == 0x3FE0000000000000) /* 0.5 */
+ } else if (v == 0x3FE0000000000000) { /* 0.5 */
+ data_.i = 0x3f000000;
setFixed(PhysReg{240});
- else if (v == 0xBFE0000000000000) /* -0.5 */
+ } else if (v == 0xBFE0000000000000) { /* -0.5 */
+ data_.i = 0xbf000000;
setFixed(PhysReg{241});
- else if (v == 0x3FF0000000000000) /* 1.0 */
+ } else if (v == 0x3FF0000000000000) { /* 1.0 */
+ data_.i = 0x3f800000;
setFixed(PhysReg{242});
- else if (v == 0xBFF0000000000000) /* -1.0 */
+ } else if (v == 0xBFF0000000000000) { /* -1.0 */
+ data_.i = 0xbf800000;
setFixed(PhysReg{243});
- else if (v == 0x4000000000000000) /* 2.0 */
+ } else if (v == 0x4000000000000000) { /* 2.0 */
+ data_.i = 0x40000000;
setFixed(PhysReg{244});
- else if (v == 0xC000000000000000) /* -2.0 */
+ } else if (v == 0xC000000000000000) { /* -2.0 */
+ data_.i = 0xc0000000;
setFixed(PhysReg{245});
- else if (v == 0x4010000000000000) /* 4.0 */
+ } else if (v == 0x4010000000000000) { /* 4.0 */
+ data_.i = 0x40800000;
setFixed(PhysReg{246});
- else if (v == 0xC010000000000000) /* -4.0 */
+ } else if (v == 0xC010000000000000) { /* -4.0 */
+ data_.i = 0xc0800000;
setFixed(PhysReg{247});
- else if (v == 0x3fc45f306dc9c882) /* 1/(2*PI) */
- setFixed(PhysReg{248});
- else { /* Literal Constant: we don't know if it is a long or double.*/
+ } else { /* Literal Constant: we don't know if it is a long or double.*/
isConstant_ = 0;
assert(false && "attempt to create a 64-bit literal constant");
}
return isConstant() && constantValue() == cmp;
}
+ constexpr uint64_t constantValue64(bool signext=false) const noexcept
+ {
+ if (is64BitConst_) {
+ if (reg_ <= 192)
+ return reg_ - 128;
+ else if (reg_ <= 208)
+ return 0xFFFFFFFFFFFFFFFF - (reg_ - 193);
+
+ switch (reg_) {
+ case 240:
+ return 0x3FE0000000000000;
+ case 241:
+ return 0xBFE0000000000000;
+ case 242:
+ return 0x3FF0000000000000;
+ case 243:
+ return 0xBFF0000000000000;
+ case 244:
+ return 0x4000000000000000;
+ case 245:
+ return 0xC000000000000000;
+ case 246:
+ return 0x4010000000000000;
+ case 247:
+ return 0xC010000000000000;
+ }
+ }
+ return (signext && (data_.i & 0x80000000u) ? 0xffffffff00000000ull : 0ull) | data_.i;
+ }
+
+ /* Indicates that the killed operand's live range intersects with the
+ * instruction's definitions. Unlike isKill() and isFirstKill(), this is
+ * not set by liveness analysis. */
+ constexpr void setLateKill(bool flag) noexcept
+ {
+ isLateKill_ = flag;
+ }
+
+ constexpr bool isLateKill() const noexcept
+ {
+ return isLateKill_;
+ }
+
constexpr void setKill(bool flag) noexcept
{
isKill_ = flag;
return isFirstKill_;
}
+ constexpr bool isKillBeforeDef() const noexcept
+ {
+ return isKill() && !isLateKill();
+ }
+
+ constexpr bool isFirstKillBeforeDef() const noexcept
+ {
+ return isFirstKill() && !isLateKill();
+ }
+
+ constexpr bool operator == (Operand other) const noexcept
+ {
+ if (other.size() != size())
+ return false;
+ if (isFixed() != other.isFixed() || isKillBeforeDef() != other.isKillBeforeDef())
+ return false;
+ if (isFixed() && other.isFixed() && physReg() != other.physReg())
+ return false;
+ if (isLiteral())
+ return other.isLiteral() && other.constantValue() == constantValue();
+ else if (isConstant())
+ return other.isConstant() && other.physReg() == physReg();
+ else if (isUndefined())
+ return other.isUndefined() && other.regClass() == regClass();
+ else
+ return other.isTemp() && other.getTemp() == getTemp();
+ }
private:
union {
uint32_t i;
uint8_t isUndef_:1;
uint8_t isFirstKill_:1;
uint8_t is64BitConst_:1;
+ uint8_t isLateKill_:1;
};
/* can't initialize bit-fields in c++11, so work around using a union */
uint8_t control_ = 0;
*
*/
struct SMEM_instruction : public Instruction {
- bool glc; /* VI+: globally coherent */
- bool dlc; /* NAVI: device level coherent */
- bool nv; /* VEGA only: Non-volatile */
- bool can_reorder;
- bool disable_wqm;
+ bool glc : 1; /* VI+: globally coherent */
+ bool dlc : 1; /* NAVI: device level coherent */
+ bool nv : 1; /* VEGA only: Non-volatile */
+ bool can_reorder : 1;
+ bool disable_wqm : 1;
barrier_interaction barrier;
};
struct VOP3A_instruction : public Instruction {
bool abs[3];
- bool opsel[4];
- bool clamp;
- unsigned omod;
bool neg[3];
+ uint8_t opsel : 4;
+ uint8_t omod : 2;
+ bool clamp : 1;
};
/**
*
*/
struct DPP_instruction : public Instruction {
- uint16_t dpp_ctrl;
- uint8_t row_mask;
- uint8_t bank_mask;
bool abs[2];
bool neg[2];
- bool bound_ctrl;
+ uint16_t dpp_ctrl;
+ uint8_t row_mask : 4;
+ uint8_t bank_mask : 4;
+ bool bound_ctrl : 1;
+};
+
+enum sdwa_sel : uint8_t {
+ /* masks */
+ sdwa_wordnum = 0x1,
+ sdwa_bytenum = 0x3,
+ sdwa_asuint = 0x7,
+
+ /* flags */
+ sdwa_isword = 0x4,
+ sdwa_sext = 0x8,
+
+ /* specific values */
+ sdwa_ubyte0 = 0,
+ sdwa_ubyte1 = 1,
+ sdwa_ubyte2 = 2,
+ sdwa_ubyte3 = 3,
+ sdwa_uword0 = sdwa_isword | 0,
+ sdwa_uword1 = sdwa_isword | 1,
+ sdwa_udword = 6,
+
+ sdwa_sbyte0 = sdwa_ubyte0 | sdwa_sext,
+ sdwa_sbyte1 = sdwa_ubyte1 | sdwa_sext,
+ sdwa_sbyte2 = sdwa_ubyte2 | sdwa_sext,
+ sdwa_sbyte3 = sdwa_ubyte3 | sdwa_sext,
+ sdwa_sword0 = sdwa_uword0 | sdwa_sext,
+ sdwa_sword1 = sdwa_uword1 | sdwa_sext,
+ sdwa_sdword = sdwa_udword | sdwa_sext,
+};
+
+/**
+ * Sub-Dword Addressing Format:
+ * This format can be used for VOP1, VOP2 or VOPC instructions.
+ *
+ * omod and SGPR/constant operands are only available on GFX9+. For VOPC,
+ * the definition doesn't have to be VCC on GFX9+.
+ *
+ */
+struct SDWA_instruction : public Instruction {
+ /* these destination modifiers aren't available with VOPC except for
+ * clamp on GFX8 */
+ unsigned dst_sel:4;
+ bool dst_preserve:1;
+ bool clamp:1;
+ unsigned omod:2; /* GFX9+ */
+
+ unsigned sel[2];
+ bool neg[2];
+ bool abs[2];
};
struct Interp_instruction : public Instruction {
- unsigned attribute;
- unsigned component;
+ uint8_t attribute;
+ uint8_t component;
};
/**
/**
* Vector Memory Untyped-buffer Instructions
- * Operand(0): VADDR - Address source. Can carry an index and/or offset
- * Operand(1): SRSRC - Specifies which SGPR supplies T# (resource constant)
+ * Operand(0): SRSRC - Specifies which SGPR supplies T# (resource constant)
+ * Operand(1): VADDR - Address source. Can carry an index and/or offset
* Operand(2): SOFFSET - SGPR to supply unsigned byte offset. (SGPR, M0, or inline constant)
* Operand(3) / Definition(0): VDATA - Vector GPR for write result / read data
*
*/
struct MUBUF_instruction : public Instruction {
- unsigned offset; /* Unsigned byte offset - 12 bit */
- bool offen; /* Supply an offset from VGPR (VADDR) */
- bool idxen; /* Supply an index from VGPR (VADDR) */
- bool glc; /* globally coherent */
- bool dlc; /* NAVI: device level coherent */
- bool slc; /* system level coherent */
- bool tfe; /* texture fail enable */
- bool lds; /* Return read-data to LDS instead of VGPRs */
- bool disable_wqm; /* Require an exec mask without helper invocations */
- bool can_reorder;
+ uint16_t offset : 12; /* Unsigned byte offset - 12 bit */
+ bool offen : 1; /* Supply an offset from VGPR (VADDR) */
+ bool idxen : 1; /* Supply an index from VGPR (VADDR) */
+ bool addr64 : 1; /* SI, CIK: Address size is 64-bit */
+ bool glc : 1; /* globally coherent */
+ bool dlc : 1; /* NAVI: device level coherent */
+ bool slc : 1; /* system level coherent */
+ bool tfe : 1; /* texture fail enable */
+ bool lds : 1; /* Return read-data to LDS instead of VGPRs */
+ bool disable_wqm : 1; /* Require an exec mask without helper invocations */
+ bool can_reorder : 1;
barrier_interaction barrier;
};
/**
* Vector Memory Typed-buffer Instructions
- * Operand(0): VADDR - Address source. Can carry an index and/or offset
- * Operand(1): SRSRC - Specifies which SGPR supplies T# (resource constant)
+ * Operand(0): SRSRC - Specifies which SGPR supplies T# (resource constant)
+ * Operand(1): VADDR - Address source. Can carry an index and/or offset
* Operand(2): SOFFSET - SGPR to supply unsigned byte offset. (SGPR, M0, or inline constant)
* Operand(3) / Definition(0): VDATA - Vector GPR for write result / read data
*
*/
struct MTBUF_instruction : public Instruction {
+ uint16_t offset; /* Unsigned byte offset - 12 bit */
uint8_t dfmt : 4; /* Data Format of data in memory buffer */
uint8_t nfmt : 3; /* Numeric format of data in memory */
- unsigned offset; /* Unsigned byte offset - 12 bit */
- bool offen; /* Supply an offset from VGPR (VADDR) */
- bool idxen; /* Supply an index from VGPR (VADDR) */
- bool glc; /* globally coherent */
- bool dlc; /* NAVI: device level coherent */
- bool slc; /* system level coherent */
- bool tfe; /* texture fail enable */
- bool disable_wqm; /* Require an exec mask without helper invocations */
- bool can_reorder;
+ bool offen : 1; /* Supply an offset from VGPR (VADDR) */
+ bool idxen : 1; /* Supply an index from VGPR (VADDR) */
+ bool glc : 1; /* globally coherent */
+ bool dlc : 1; /* NAVI: device level coherent */
+ bool slc : 1; /* system level coherent */
+ bool tfe : 1; /* texture fail enable */
+ bool disable_wqm : 1; /* Require an exec mask without helper invocations */
+ bool can_reorder : 1;
barrier_interaction barrier;
};
/**
* Vector Memory Image Instructions
- * Operand(0): VADDR - Address source. Can carry an offset or an index.
- * Operand(1): SRSRC - Scalar GPR that specifies the resource constant.
- * Operand(2): SSAMP - Scalar GPR that specifies sampler constant.
- * Operand(3) / Definition(0): VDATA - Vector GPR for read / write result.
+ * Operand(0) SRSRC - Scalar GPR that specifies the resource constant.
+ * Operand(1): SSAMP - Scalar GPR that specifies sampler constant.
+ * or VDATA - Vector GPR for write data.
+ * Operand(2): VADDR - Address source. Can carry an offset or an index.
+ * Definition(0): VDATA - Vector GPR for read result.
*
*/
struct MIMG_instruction : public Instruction {
- unsigned dmask; /* Data VGPR enable mask */
- unsigned dim; /* NAVI: dimensionality */
- bool unrm; /* Force address to be un-normalized */
- bool dlc; /* NAVI: device level coherent */
- bool glc; /* globally coherent */
- bool slc; /* system level coherent */
- bool tfe; /* texture fail enable */
- bool da; /* declare an array */
- bool lwe; /* Force data to be un-normalized */
- bool r128; /* NAVI: Texture resource size */
- bool a16; /* VEGA, NAVI: Address components are 16-bits */
- bool d16; /* Convert 32-bit data to 16-bit data */
- bool disable_wqm; /* Require an exec mask without helper invocations */
- bool can_reorder;
+ uint8_t dmask; /* Data VGPR enable mask */
+ uint8_t dim : 3; /* NAVI: dimensionality */
+ bool unrm : 1; /* Force address to be un-normalized */
+ bool dlc : 1; /* NAVI: device level coherent */
+ bool glc : 1; /* globally coherent */
+ bool slc : 1; /* system level coherent */
+ bool tfe : 1; /* texture fail enable */
+ bool da : 1; /* declare an array */
+ bool lwe : 1; /* Force data to be un-normalized */
+ bool r128 : 1; /* NAVI: Texture resource size */
+ bool a16 : 1; /* VEGA, NAVI: Address components are 16-bits */
+ bool d16 : 1; /* Convert 32-bit data to 16-bit data */
+ bool disable_wqm : 1; /* Require an exec mask without helper invocations */
+ bool can_reorder : 1;
barrier_interaction barrier;
};
*/
struct FLAT_instruction : public Instruction {
uint16_t offset; /* Vega/Navi only */
- bool slc; /* system level coherent */
- bool glc; /* globally coherent */
- bool dlc; /* NAVI: device level coherent */
- bool lds;
- bool nv;
+ bool slc : 1; /* system level coherent */
+ bool glc : 1; /* globally coherent */
+ bool dlc : 1; /* NAVI: device level coherent */
+ bool lds : 1;
+ bool nv : 1;
+ bool disable_wqm : 1; /* Require an exec mask without helper invocations */
+ bool can_reorder : 1;
+ barrier_interaction barrier;
};
struct Export_instruction : public Instruction {
- unsigned enabled_mask;
- unsigned dest;
- bool compressed;
- bool done;
- bool valid_mask;
+ uint8_t enabled_mask;
+ uint8_t dest;
+ bool compressed : 1;
+ bool done : 1;
+ bool valid_mask : 1;
};
struct Pseudo_instruction : public Instruction {
inst->opcode = opcode;
inst->format = format;
- inst->operands = aco::span<Operand>((Operand*)(data + sizeof(T)), num_operands);
- inst->definitions = aco::span<Definition>((Definition*)inst->operands.end(), num_definitions);
+ uint16_t operands_offset = data + sizeof(T) - (char*)&inst->operands;
+ inst->operands = aco::span<Operand>(operands_offset, num_operands);
+ uint16_t definitions_offset = (char*)inst->operands.end() - (char*)&inst->definitions;
+ inst->definitions = aco::span<Definition>(definitions_offset, num_definitions);
return inst;
}
return false;
const VOP3A_instruction *vop3 = static_cast<const VOP3A_instruction*>(this);
for (unsigned i = 0; i < operands.size(); i++) {
- if (vop3->abs[i] || vop3->opsel[i] || vop3->neg[i])
+ if (vop3->abs[i] || vop3->neg[i])
return true;
}
- return vop3->opsel[3] || vop3->clamp || vop3->omod;
+ return vop3->opsel || vop3->clamp || vop3->omod;
}
constexpr bool is_phi(Instruction* instr)
return is_phi(instr.get());
}
-constexpr barrier_interaction get_barrier_interaction(Instruction* instr)
-{
- switch (instr->format) {
- case Format::SMEM:
- return static_cast<SMEM_instruction*>(instr)->barrier;
- case Format::MUBUF:
- return static_cast<MUBUF_instruction*>(instr)->barrier;
- case Format::MIMG:
- return static_cast<MIMG_instruction*>(instr)->barrier;
- case Format::FLAT:
- case Format::GLOBAL:
- return barrier_buffer;
- case Format::DS:
- return barrier_shared;
- default:
- return barrier_none;
- }
-}
+barrier_interaction get_barrier_interaction(Instruction* instr);
+
+bool is_dead(const std::vector<uint16_t>& uses, Instruction *instr);
enum block_kind {
/* uniform indicates that leaving this block,
block_kind_uses_discard_if = 1 << 12,
block_kind_needs_lowering = 1 << 13,
block_kind_uses_demote = 1 << 14,
+ block_kind_export_end = 1 << 15,
};
static constexpr Stage sw_tes = 1 << 3;
static constexpr Stage sw_fs = 1 << 4;
static constexpr Stage sw_cs = 1 << 5;
-static constexpr Stage sw_mask = 0x3f;
+static constexpr Stage sw_gs_copy = 1 << 6;
+static constexpr Stage sw_mask = 0x7f;
/* hardware stages (can't be OR'd, just a mask for convenience when testing multiple) */
-static constexpr Stage hw_vs = 1 << 6;
-static constexpr Stage hw_es = 1 << 7; /* not on GFX9. combined into GS on GFX9 (and GFX10/legacy). */
-static constexpr Stage hw_gs = 1 << 8;
-static constexpr Stage hw_ls = 1 << 9; /* not on GFX9. combined into HS on GFX9 (and GFX10/legacy). */
-static constexpr Stage hw_hs = 1 << 10;
-static constexpr Stage hw_fs = 1 << 11;
-static constexpr Stage hw_cs = 1 << 12;
-static constexpr Stage hw_mask = 0x7f << 6;
+static constexpr Stage hw_vs = 1 << 7;
+static constexpr Stage hw_es = 1 << 8; /* not on GFX9. combined into GS on GFX9 (and GFX10/legacy). */
+static constexpr Stage hw_gs = 1 << 9;
+static constexpr Stage hw_ls = 1 << 10; /* not on GFX9. combined into HS on GFX9 (and GFX10/legacy). */
+static constexpr Stage hw_hs = 1 << 11;
+static constexpr Stage hw_fs = 1 << 12;
+static constexpr Stage hw_cs = 1 << 13;
+static constexpr Stage hw_mask = 0x7f << 7;
/* possible settings of Program::stage */
static constexpr Stage vertex_vs = sw_vs | hw_vs;
static constexpr Stage fragment_fs = sw_fs | hw_fs;
static constexpr Stage compute_cs = sw_cs | hw_cs;
static constexpr Stage tess_eval_vs = sw_tes | hw_vs;
+static constexpr Stage gs_copy_vs = sw_gs_copy | hw_vs;
/* GFX10/NGG */
static constexpr Stage ngg_vertex_gs = sw_vs | hw_gs;
static constexpr Stage ngg_vertex_geometry_gs = sw_vs | sw_gs | hw_gs;
static constexpr Stage vertex_ls = sw_vs | hw_ls; /* vertex before tesselation control */
static constexpr Stage vertex_es = sw_vs | hw_es; /* vertex before geometry */
static constexpr Stage tess_control_hs = sw_tcs | hw_hs;
-static constexpr Stage tess_eval_es = sw_tes | hw_gs; /* tesselation evaluation before geometry */
+static constexpr Stage tess_eval_es = sw_tes | hw_es; /* tesselation evaluation before geometry */
static constexpr Stage geometry_gs = sw_gs | hw_gs;
+enum statistic {
+ statistic_hash,
+ statistic_instructions,
+ statistic_copies,
+ statistic_branches,
+ statistic_cycles,
+ statistic_vmem_clauses,
+ statistic_smem_clauses,
+ statistic_vmem_score,
+ statistic_smem_score,
+ statistic_sgpr_presched,
+ statistic_vgpr_presched,
+ num_statistics
+};
+
class Program final {
public:
float_mode next_fp_mode;
enum chip_class chip_class;
enum radeon_family family;
unsigned wave_size;
+ RegClass lane_mask;
Stage stage; /* Stage */
bool needs_exact = false; /* there exists an instruction with disable_wqm = true */
bool needs_wqm = false; /* there exists a p_wqm instruction */
Temp private_segment_buffer;
Temp scratch_offset;
+ uint16_t min_waves = 0;
uint16_t lds_alloc_granule;
uint32_t lds_limit; /* in bytes */
+ bool has_16bank_lds;
uint16_t vgpr_limit;
uint16_t sgpr_limit;
uint16_t physical_sgprs;
uint16_t sgpr_alloc_granule; /* minus one. must be power of two */
+ uint16_t vgpr_alloc_granule; /* minus one. must be power of two */
+ unsigned workgroup_size; /* if known; otherwise UINT_MAX */
+
+ bool xnack_enabled = false;
bool needs_vcc = false;
- bool needs_xnack_mask = false;
bool needs_flat_scr = false;
+ bool collect_statistics = false;
+ uint32_t statistics[num_statistics];
+
uint32_t allocateId()
{
assert(allocationID <= 16777215);
struct nir_shader *const *shaders,
ac_shader_config* config,
struct radv_shader_args *args);
+void select_gs_copy_shader(Program *program, struct nir_shader *gs_shader,
+ ac_shader_config* config,
+ struct radv_shader_args *args);
void lower_wqm(Program* program, live& live_vars,
const struct radv_nir_compiler_options *options);
void lower_bool_phis(Program* program);
+void calc_min_waves(Program* program);
void update_vgpr_sgpr_demand(Program* program, const RegisterDemand new_demand);
live live_var_analysis(Program* program, const struct radv_nir_compiler_options *options);
std::vector<uint16_t> dead_code_analysis(Program *program);
#ifndef NDEBUG
void perfwarn(bool cond, const char *msg, Instruction *instr=NULL);
#else
-#define perfwarn(program, cond, msg, ...)
+#define perfwarn(program, cond, msg, ...) do {} while(0)
#endif
+void collect_presched_stats(Program *program);
+void collect_preasm_stats(Program *program);
+void collect_postasm_stats(Program *program, const std::vector<uint32_t>& code);
+
void aco_print_instr(Instruction *instr, FILE *output);
void aco_print_program(Program *program, FILE *output);
+/* utilities for dealing with register demand */
+RegisterDemand get_live_changes(aco_ptr<Instruction>& instr);
+RegisterDemand get_temp_registers(aco_ptr<Instruction>& instr);
+RegisterDemand get_demand_before(RegisterDemand demand, aco_ptr<Instruction>& instr, aco_ptr<Instruction>& instr_before);
+
/* number of sgprs that need to be allocated but might notbe addressable as s0-s105 */
uint16_t get_extra_sgprs(Program *program);
-/* get number of sgprs allocated required to address a number of sgprs */
+/* get number of sgprs/vgprs allocated required to address a number of sgprs/vgprs */
uint16_t get_sgpr_alloc(Program *program, uint16_t addressable_sgprs);
+uint16_t get_vgpr_alloc(Program *program, uint16_t addressable_vgprs);
-/* return number of addressable SGPRs for max_waves */
+/* return number of addressable sgprs/vgprs for max_waves */
uint16_t get_addr_sgpr_from_waves(Program *program, uint16_t max_waves);
+uint16_t get_addr_vgpr_from_waves(Program *program, uint16_t max_waves);
typedef struct {
+ const int16_t opcode_gfx7[static_cast<int>(aco_opcode::num_opcodes)];
const int16_t opcode_gfx9[static_cast<int>(aco_opcode::num_opcodes)];
const int16_t opcode_gfx10[static_cast<int>(aco_opcode::num_opcodes)];
const std::bitset<static_cast<int>(aco_opcode::num_opcodes)> can_use_input_modifiers;
const std::bitset<static_cast<int>(aco_opcode::num_opcodes)> can_use_output_modifiers;
+ const std::bitset<static_cast<int>(aco_opcode::num_opcodes)> is_atomic;
const char *name[static_cast<int>(aco_opcode::num_opcodes)];
const aco::Format format[static_cast<int>(aco_opcode::num_opcodes)];
} Info;