#include <vector>
#include <set>
+#include <unordered_set>
#include <bitset>
#include <memory>
PSEUDO_REDUCTION = 18,
/* Vector ALU Formats */
+ VOP3P = 19,
VOP1 = 1 << 8,
VOP2 = 1 << 9,
VOPC = 1 << 10,
VOP3 = 1 << 11,
VOP3A = 1 << 11,
VOP3B = 1 << 11,
- VOP3P = 1 << 12,
/* Vector Parameter Interpolation Format */
- VINTRP = 1 << 13,
- DPP = 1 << 14,
- SDWA = 1 << 15,
+ VINTRP = 1 << 12,
+ DPP = 1 << 13,
+ SDWA = 1 << 14,
};
enum barrier_interaction : uint8_t {
barrier_gs_sendmsg = 0x20,
/* used by barriers. created by s_barrier */
barrier_barrier = 0x40,
- barrier_count = 6,
+ barrier_count = 7,
};
enum fp_round {
constexpr RegClass as_linear() const { return RegClass((RC) (rc | (1 << 6))); }
constexpr RegClass as_subdword() const { return RegClass((RC) (rc | 1 << 7)); }
+ static constexpr RegClass get(RegType type, unsigned bytes) {
+ if (type == RegType::sgpr) {
+ return RegClass(type, DIV_ROUND_UP(bytes, 4u));
+ } else {
+ return bytes % 4u ? RegClass(type, bytes).as_subdword() :
+ RegClass(type, bytes / 4u);
+ }
+ }
+
private:
RC rc;
};
* and SSA id.
*/
struct Temp {
- Temp() noexcept : id_(0), reg_class(RegType::sgpr, 0) {}
+ Temp() noexcept : id_(0), reg_class(0) {}
constexpr Temp(uint32_t id, RegClass cls) noexcept
- : id_(id), reg_class(cls) {}
+ : id_(id), reg_class(uint8_t(cls)) {}
constexpr uint32_t id() const noexcept { return id_; }
- constexpr RegClass regClass() const noexcept { return reg_class; }
+ constexpr RegClass regClass() const noexcept { return (RegClass::RC)reg_class; }
- constexpr unsigned bytes() const noexcept { return reg_class.bytes(); }
- constexpr unsigned size() const noexcept { return reg_class.size(); }
- constexpr RegType type() const noexcept { return reg_class.type(); }
- constexpr bool is_linear() const noexcept { return reg_class.is_linear(); }
+ constexpr unsigned bytes() const noexcept { return regClass().bytes(); }
+ constexpr unsigned size() const noexcept { return regClass().size(); }
+ constexpr RegType type() const noexcept { return regClass().type(); }
+ constexpr bool is_linear() const noexcept { return regClass().is_linear(); }
constexpr bool operator <(Temp other) const noexcept { return id() < other.id(); }
constexpr bool operator==(Temp other) const noexcept { return id() == other.id(); }
constexpr bool operator!=(Temp other) const noexcept { return id() != other.id(); }
private:
- uint32_t id_:24;
- RegClass reg_class;
+ uint32_t id_: 24;
+ uint32_t reg_class : 8;
};
/**
constexpr bool operator==(PhysReg other) const { return reg_b == other.reg_b; }
constexpr bool operator!=(PhysReg other) const { return reg_b != other.reg_b; }
constexpr bool operator <(PhysReg other) const { return reg_b < other.reg_b; }
+ constexpr PhysReg advance(unsigned bytes) const { PhysReg res = *this; res.reg_b += bytes; return res; }
uint16_t reg_b = 0;
};
};
};
-class Block;
+struct Block;
struct Instruction {
aco_opcode opcode;
|| ((uint16_t) format & (uint16_t) Format::VOPC) == (uint16_t) Format::VOPC
|| ((uint16_t) format & (uint16_t) Format::VOP3A) == (uint16_t) Format::VOP3A
|| ((uint16_t) format & (uint16_t) Format::VOP3B) == (uint16_t) Format::VOP3B
- || ((uint16_t) format & (uint16_t) Format::VOP3P) == (uint16_t) Format::VOP3P;
+ || format == Format::VOP3P;
}
constexpr bool isSALU() const noexcept
constexpr bool isVOP3() const noexcept
{
return ((uint16_t) format & (uint16_t) Format::VOP3A) ||
- ((uint16_t) format & (uint16_t) Format::VOP3B) ||
- format == Format::VOP3P;
+ ((uint16_t) format & (uint16_t) Format::VOP3B);
}
constexpr bool isSDWA() const noexcept
return false;
}
};
+static_assert(sizeof(Instruction) == 16, "Unexpected padding");
struct SOPK_instruction : public Instruction {
uint16_t imm;
+ uint16_t padding;
};
+static_assert(sizeof(SOPK_instruction) == sizeof(Instruction) + 4, "Unexpected padding");
struct SOPP_instruction : public Instruction {
uint32_t imm;
int block;
};
+static_assert(sizeof(SOPP_instruction) == sizeof(Instruction) + 8, "Unexpected padding");
struct SOPC_instruction : public Instruction {
};
+static_assert(sizeof(SOPC_instruction) == sizeof(Instruction) + 0, "Unexpected padding");
struct SOP1_instruction : public Instruction {
};
+static_assert(sizeof(SOP1_instruction) == sizeof(Instruction) + 0, "Unexpected padding");
struct SOP2_instruction : public Instruction {
};
+static_assert(sizeof(SOP2_instruction) == sizeof(Instruction) + 0, "Unexpected padding");
/**
* Scalar Memory Format:
*
*/
struct SMEM_instruction : public Instruction {
+ barrier_interaction barrier;
bool glc : 1; /* VI+: globally coherent */
bool dlc : 1; /* NAVI: device level coherent */
bool nv : 1; /* VEGA only: Non-volatile */
bool can_reorder : 1;
bool disable_wqm : 1;
- barrier_interaction barrier;
+ uint32_t padding: 19;
};
+static_assert(sizeof(SMEM_instruction) == sizeof(Instruction) + 4, "Unexpected padding");
struct VOP1_instruction : public Instruction {
};
+static_assert(sizeof(VOP1_instruction) == sizeof(Instruction) + 0, "Unexpected padding");
struct VOP2_instruction : public Instruction {
};
+static_assert(sizeof(VOP2_instruction) == sizeof(Instruction) + 0, "Unexpected padding");
struct VOPC_instruction : public Instruction {
};
+static_assert(sizeof(VOPC_instruction) == sizeof(Instruction) + 0, "Unexpected padding");
struct VOP3A_instruction : public Instruction {
bool abs[3];
uint8_t opsel : 4;
uint8_t omod : 2;
bool clamp : 1;
+ uint32_t padding : 9;
};
+static_assert(sizeof(VOP3A_instruction) == sizeof(Instruction) + 8, "Unexpected padding");
+
+struct VOP3P_instruction : public Instruction {
+ bool neg_lo[3];
+ bool neg_hi[3];
+ uint8_t opsel_lo : 3;
+ uint8_t opsel_hi : 3;
+ bool clamp : 1;
+ uint32_t padding : 9;
+};
+static_assert(sizeof(VOP3P_instruction) == sizeof(Instruction) + 8, "Unexpected padding");
/**
* Data Parallel Primitives Format:
uint8_t row_mask : 4;
uint8_t bank_mask : 4;
bool bound_ctrl : 1;
+ uint32_t padding : 7;
};
+static_assert(sizeof(DPP_instruction) == sizeof(Instruction) + 8, "Unexpected padding");
enum sdwa_sel : uint8_t {
/* masks */
struct SDWA_instruction : public Instruction {
/* these destination modifiers aren't available with VOPC except for
* clamp on GFX8 */
- unsigned dst_sel:8;
- bool dst_preserve:1;
- bool clamp:1;
- unsigned omod:2; /* GFX9+ */
-
- unsigned sel[2];
+ uint8_t sel[2];
+ uint8_t dst_sel;
bool neg[2];
bool abs[2];
+ bool dst_preserve : 1;
+ bool clamp : 1;
+ uint8_t omod : 2; /* GFX9+ */
+ uint32_t padding : 4;
};
+static_assert(sizeof(SDWA_instruction) == sizeof(Instruction) + 8, "Unexpected padding");
struct Interp_instruction : public Instruction {
uint8_t attribute;
uint8_t component;
+ uint16_t padding;
};
+static_assert(sizeof(Interp_instruction) == sizeof(Instruction) + 4, "Unexpected padding");
/**
* Local and Global Data Sharing instructions
int8_t offset1;
bool gds;
};
+static_assert(sizeof(DS_instruction) == sizeof(Instruction) + 4, "Unexpected padding");
/**
* Vector Memory Untyped-buffer Instructions
bool lds : 1; /* Return read-data to LDS instead of VGPRs */
bool disable_wqm : 1; /* Require an exec mask without helper invocations */
bool can_reorder : 1;
+ uint8_t padding : 2;
barrier_interaction barrier;
};
+static_assert(sizeof(MUBUF_instruction) == sizeof(Instruction) + 4, "Unexpected padding");
/**
* Vector Memory Typed-buffer Instructions
*/
struct MTBUF_instruction : public Instruction {
uint16_t offset; /* Unsigned byte offset - 12 bit */
+ barrier_interaction barrier;
uint8_t dfmt : 4; /* Data Format of data in memory buffer */
uint8_t nfmt : 3; /* Numeric format of data in memory */
bool offen : 1; /* Supply an offset from VGPR (VADDR) */
bool tfe : 1; /* texture fail enable */
bool disable_wqm : 1; /* Require an exec mask without helper invocations */
bool can_reorder : 1;
- barrier_interaction barrier;
+ uint32_t padding : 25;
};
+static_assert(sizeof(MTBUF_instruction) == sizeof(Instruction) + 8, "Unexpected padding");
/**
* Vector Memory Image Instructions
bool d16 : 1; /* Convert 32-bit data to 16-bit data */
bool disable_wqm : 1; /* Require an exec mask without helper invocations */
bool can_reorder : 1;
+ uint8_t padding : 1;
barrier_interaction barrier;
};
+static_assert(sizeof(MIMG_instruction) == sizeof(Instruction) + 4, "Unexpected padding");
/**
* Flat/Scratch/Global Instructions
bool nv : 1;
bool disable_wqm : 1; /* Require an exec mask without helper invocations */
bool can_reorder : 1;
+ uint8_t padding : 1;
barrier_interaction barrier;
};
+static_assert(sizeof(FLAT_instruction) == sizeof(Instruction) + 4, "Unexpected padding");
struct Export_instruction : public Instruction {
uint8_t enabled_mask;
bool compressed : 1;
bool done : 1;
bool valid_mask : 1;
+ uint32_t padding : 13;
};
+static_assert(sizeof(Export_instruction) == sizeof(Instruction) + 4, "Unexpected padding");
struct Pseudo_instruction : public Instruction {
- bool tmp_in_scc;
PhysReg scratch_sgpr; /* might not be valid if it's not needed */
+ bool tmp_in_scc;
+ uint8_t padding;
};
+static_assert(sizeof(Pseudo_instruction) == sizeof(Instruction) + 4, "Unexpected padding");
struct Pseudo_branch_instruction : public Instruction {
/* target[0] is the block index of the branch target.
*/
uint32_t target[2];
};
+static_assert(sizeof(Pseudo_branch_instruction) == sizeof(Instruction) + 8, "Unexpected padding");
struct Pseudo_barrier_instruction : public Instruction {
};
-
-enum ReduceOp {
- iadd32, iadd64,
- imul32, imul64,
- fadd32, fadd64,
- fmul32, fmul64,
- imin32, imin64,
- imax32, imax64,
- umin32, umin64,
- umax32, umax64,
- fmin32, fmin64,
- fmax32, fmax64,
- iand32, iand64,
- ior32, ior64,
- ixor32, ixor64,
- gfx10_wave64_bpermute
+static_assert(sizeof(Pseudo_barrier_instruction) == sizeof(Instruction) + 0, "Unexpected padding");
+
+enum ReduceOp : uint16_t {
+ iadd8, iadd16, iadd32, iadd64,
+ imul8, imul16, imul32, imul64,
+ fadd16, fadd32, fadd64,
+ fmul16, fmul32, fmul64,
+ imin8, imin16, imin32, imin64,
+ imax8, imax16, imax32, imax64,
+ umin8, umin16, umin32, umin64,
+ umax8, umax16, umax32, umax64,
+ fmin16, fmin32, fmin64,
+ fmax16, fmax32, fmax64,
+ iand8, iand16, iand32, iand64,
+ ior8, ior16, ior32, ior64,
+ ixor8, ixor16, ixor32, ixor64,
};
/**
*/
struct Pseudo_reduction_instruction : public Instruction {
ReduceOp reduce_op;
- unsigned cluster_size; // must be 0 for scans
+ uint16_t cluster_size; // must be 0 for scans
};
+static_assert(sizeof(Pseudo_reduction_instruction) == sizeof(Instruction) + 4, "Unexpected padding");
struct instr_deleter_functor {
void operator()(void* p) {
{
if (isDPP() || isSDWA())
return true;
- if (!isVOP3())
- return false;
- const VOP3A_instruction *vop3 = static_cast<const VOP3A_instruction*>(this);
- for (unsigned i = 0; i < operands.size(); i++) {
- if (vop3->abs[i] || vop3->neg[i])
- return true;
+
+ if (format == Format::VOP3P) {
+ const VOP3P_instruction *vop3p = static_cast<const VOP3P_instruction*>(this);
+ for (unsigned i = 0; i < operands.size(); i++) {
+ if (vop3p->neg_lo[i] || vop3p->neg_hi[i])
+ return true;
+ }
+ return vop3p->opsel_lo || vop3p->opsel_hi || vop3p->clamp;
+ } else if (isVOP3()) {
+ const VOP3A_instruction *vop3 = static_cast<const VOP3A_instruction*>(this);
+ for (unsigned i = 0; i < operands.size(); i++) {
+ if (vop3->abs[i] || vop3->neg[i])
+ return true;
+ }
+ return vop3->opsel || vop3->clamp || vop3->omod;
}
- return vop3->opsel || vop3->clamp || vop3->omod;
+ return false;
}
constexpr bool is_phi(Instruction* instr)
return is_phi(instr.get());
}
-barrier_interaction get_barrier_interaction(Instruction* instr);
+barrier_interaction get_barrier_interaction(const Instruction* instr);
bool is_dead(const std::vector<uint16_t>& uses, Instruction *instr);
uint32_t allocationID = 1;
};
+struct TempHash {
+ std::size_t operator()(Temp t) const {
+ return t.id();
+ }
+};
+using TempSet = std::unordered_set<Temp, TempHash>;
+
struct live {
/* live temps out per block */
- std::vector<std::set<Temp>> live_out;
+ std::vector<TempSet> live_out;
/* register demand (sgpr/vgpr) per instruction per block */
std::vector<std::vector<RegisterDemand>> register_demand;
};
void lower_wqm(Program* program, live& live_vars,
const struct radv_nir_compiler_options *options);
-void lower_bool_phis(Program* program);
+void lower_phis(Program* program);
void calc_min_waves(Program* program);
void update_vgpr_sgpr_demand(Program* program, const RegisterDemand new_demand);
live live_var_analysis(Program* program, const struct radv_nir_compiler_options *options);
void optimize(Program* program);
void setup_reduce_temp(Program* program);
void lower_to_cssa(Program* program, live& live_vars, const struct radv_nir_compiler_options *options);
-void register_allocation(Program *program, std::vector<std::set<Temp>> live_out_per_block);
+void register_allocation(Program *program, std::vector<TempSet>& live_out_per_block);
void ssa_elimination(Program* program);
void lower_to_hw_instr(Program* program);
void schedule_program(Program* program, live& live_vars);
void collect_preasm_stats(Program *program);
void collect_postasm_stats(Program *program, const std::vector<uint32_t>& code);
-void aco_print_instr(Instruction *instr, FILE *output);
-void aco_print_program(Program *program, FILE *output);
+void aco_print_instr(const Instruction *instr, FILE *output);
+void aco_print_program(const Program *program, FILE *output);
/* utilities for dealing with register demand */
RegisterDemand get_live_changes(aco_ptr<Instruction>& instr);