#include "aco_opcodes.h"
#include "aco_util.h"
+#include "vulkan/radv_shader.h"
+
struct radv_nir_compiler_options;
struct radv_shader_args;
struct radv_shader_info;
extern uint64_t debug_flags;
enum {
- DEBUG_VALIDATE = 0x1,
+ DEBUG_VALIDATE_IR = 0x1,
DEBUG_VALIDATE_RA = 0x2,
DEBUG_PERFWARN = 0x4,
+ DEBUG_FORCE_WAITCNT = 0x8,
+ DEBUG_NO_VN = 0x10,
+ DEBUG_NO_OPT = 0x20,
+ DEBUG_NO_SCHED = 0x40,
};
/**
SDWA = 1 << 14,
};
-enum barrier_interaction : uint8_t {
- barrier_none = 0,
- barrier_buffer = 0x1,
- barrier_image = 0x2,
- barrier_atomic = 0x4,
- barrier_shared = 0x8,
- /* used for geometry shaders to ensure vertex data writes are before the
- * GS_DONE s_sendmsg. */
- barrier_gs_data = 0x10,
- /* used for geometry shaders to ensure s_sendmsg instructions are in-order. */
- barrier_gs_sendmsg = 0x20,
- /* used by barriers. created by s_barrier */
- barrier_barrier = 0x40,
- barrier_count = 7,
+enum storage_class : uint8_t {
+ storage_none = 0x0, /* no synchronization and can be reordered around aliasing stores */
+ storage_buffer = 0x1, /* SSBOs and global memory */
+ storage_atomic_counter = 0x2, /* not used for Vulkan */
+ storage_image = 0x4,
+ storage_shared = 0x8, /* or TCS output */
+ storage_vmem_output = 0x10, /* GS or TCS output stores using VMEM */
+ storage_scratch = 0x20,
+ storage_vgpr_spill = 0x40,
+ storage_count = 8,
+};
+
+enum memory_semantics : uint8_t {
+ semantic_none = 0x0,
+ /* for loads: don't move any access after this load to before this load (even other loads)
+ * for barriers: don't move any access after the barrier to before any
+ * atomics/control_barriers/sendmsg_gs_done before the barrier */
+ semantic_acquire = 0x1,
+ /* for stores: don't move any access before this store to after this store
+ * for barriers: don't move any access before the barrier to after any
+ * atomics/control_barriers/sendmsg_gs_done after the barrier */
+ semantic_release = 0x2,
+
+ /* the rest are for load/stores/atomics only */
+ /* cannot be DCE'd or CSE'd */
+ semantic_volatile = 0x4,
+ /* does not interact with barriers and assumes this lane is the only lane
+ * accessing this memory */
+ semantic_private = 0x8,
+ /* this operation can be reordered around operations of the same storage. says nothing about barriers */
+ semantic_can_reorder = 0x10,
+ /* this is a atomic instruction (may only read or write memory) */
+ semantic_atomic = 0x20,
+ /* this is instruction both reads and writes memory */
+ semantic_rmw = 0x40,
+
+ semantic_acqrel = semantic_acquire | semantic_release,
+ semantic_atomicrmw = semantic_volatile | semantic_atomic | semantic_rmw,
+};
+
+enum sync_scope : uint8_t {
+ scope_invocation = 0,
+ scope_subgroup = 1,
+ scope_workgroup = 2,
+ scope_queuefamily = 3,
+ scope_device = 4,
+};
+
+struct memory_sync_info {
+ memory_sync_info() : storage(storage_none), semantics(semantic_none), scope(scope_invocation) {}
+ memory_sync_info(int storage, int semantics=0, sync_scope scope=scope_invocation)
+ : storage((storage_class)storage), semantics((memory_semantics)semantics), scope(scope) {}
+
+ storage_class storage:8;
+ memory_semantics semantics:8;
+ sync_scope scope:8;
+
+ bool operator == (const memory_sync_info& rhs) const {
+ return storage == rhs.storage &&
+ semantics == rhs.semantics &&
+ scope == rhs.scope;
+ }
+
+ bool can_reorder() const {
+ if (semantics & semantic_acqrel)
+ return false;
+ /* Also check storage so that zero-initialized memory_sync_info can be
+ * reordered. */
+ return (!storage || (semantics & semantic_can_reorder)) && !(semantics & semantic_volatile);
+ }
};
+static_assert(sizeof(memory_sync_info) == 3, "Unexpected padding");
enum fp_round {
fp_round_ne = 0,
static constexpr PhysReg m0{124};
static constexpr PhysReg vcc{106};
static constexpr PhysReg vcc_hi{107};
+static constexpr PhysReg tba{108}; /* GFX6-GFX8 */
+static constexpr PhysReg tma{110}; /* GFX6-GFX8 */
+static constexpr PhysReg ttmp0{112};
+static constexpr PhysReg ttmp1{113};
+static constexpr PhysReg ttmp2{114};
+static constexpr PhysReg ttmp3{115};
+static constexpr PhysReg ttmp4{116};
+static constexpr PhysReg ttmp5{117};
+static constexpr PhysReg ttmp6{118};
+static constexpr PhysReg ttmp7{119};
+static constexpr PhysReg ttmp8{120};
+static constexpr PhysReg ttmp9{121};
+static constexpr PhysReg ttmp10{122};
+static constexpr PhysReg ttmp11{123};
static constexpr PhysReg sgpr_null{125}; /* GFX10+ */
static constexpr PhysReg exec{126};
static constexpr PhysReg exec_lo{126};
*
*/
struct SMEM_instruction : public Instruction {
- barrier_interaction barrier;
+ memory_sync_info sync;
bool glc : 1; /* VI+: globally coherent */
bool dlc : 1; /* NAVI: device level coherent */
bool nv : 1; /* VEGA only: Non-volatile */
- bool can_reorder : 1;
bool disable_wqm : 1;
bool prevent_overflow : 1; /* avoid overflow when combining additions */
- uint32_t padding: 18;
+ uint32_t padding: 3;
};
static_assert(sizeof(SMEM_instruction) == sizeof(Instruction) + 4, "Unexpected padding");
*
*/
struct DS_instruction : public Instruction {
+ memory_sync_info sync;
+ bool gds;
int16_t offset0;
int8_t offset1;
- bool gds;
+ uint8_t padding;
};
-static_assert(sizeof(DS_instruction) == sizeof(Instruction) + 4, "Unexpected padding");
+static_assert(sizeof(DS_instruction) == sizeof(Instruction) + 8, "Unexpected padding");
/**
* Vector Memory Untyped-buffer Instructions
*
*/
struct MUBUF_instruction : public Instruction {
- uint16_t offset : 12; /* Unsigned byte offset - 12 bit */
+ memory_sync_info sync;
bool offen : 1; /* Supply an offset from VGPR (VADDR) */
bool idxen : 1; /* Supply an index from VGPR (VADDR) */
bool addr64 : 1; /* SI, CIK: Address size is 64-bit */
bool tfe : 1; /* texture fail enable */
bool lds : 1; /* Return read-data to LDS instead of VGPRs */
bool disable_wqm : 1; /* Require an exec mask without helper invocations */
- bool can_reorder : 1;
- bool swizzled:1;
- uint8_t padding : 1;
- barrier_interaction barrier;
+ uint16_t offset : 12; /* Unsigned byte offset - 12 bit */
+ bool swizzled : 1;
+ uint32_t padding1 : 18;
};
-static_assert(sizeof(MUBUF_instruction) == sizeof(Instruction) + 4, "Unexpected padding");
+static_assert(sizeof(MUBUF_instruction) == sizeof(Instruction) + 8, "Unexpected padding");
/**
* Vector Memory Typed-buffer Instructions
*
*/
struct MTBUF_instruction : public Instruction {
- uint16_t offset; /* Unsigned byte offset - 12 bit */
- barrier_interaction barrier;
+ memory_sync_info sync;
uint8_t dfmt : 4; /* Data Format of data in memory buffer */
uint8_t nfmt : 3; /* Numeric format of data in memory */
bool offen : 1; /* Supply an offset from VGPR (VADDR) */
bool slc : 1; /* system level coherent */
bool tfe : 1; /* texture fail enable */
bool disable_wqm : 1; /* Require an exec mask without helper invocations */
- bool can_reorder : 1;
- uint32_t padding : 25;
+ uint32_t padding : 10;
+ uint16_t offset; /* Unsigned byte offset - 12 bit */
};
static_assert(sizeof(MTBUF_instruction) == sizeof(Instruction) + 8, "Unexpected padding");
*
*/
struct MIMG_instruction : public Instruction {
+ memory_sync_info sync;
uint8_t dmask; /* Data VGPR enable mask */
uint8_t dim : 3; /* NAVI: dimensionality */
bool unrm : 1; /* Force address to be un-normalized */
bool a16 : 1; /* VEGA, NAVI: Address components are 16-bits */
bool d16 : 1; /* Convert 32-bit data to 16-bit data */
bool disable_wqm : 1; /* Require an exec mask without helper invocations */
- bool can_reorder : 1;
- uint8_t padding : 1;
- barrier_interaction barrier;
+ uint32_t padding : 18;
};
-static_assert(sizeof(MIMG_instruction) == sizeof(Instruction) + 4, "Unexpected padding");
+static_assert(sizeof(MIMG_instruction) == sizeof(Instruction) + 8, "Unexpected padding");
/**
* Flat/Scratch/Global Instructions
*
*/
struct FLAT_instruction : public Instruction {
- uint16_t offset; /* Vega/Navi only */
+ memory_sync_info sync;
bool slc : 1; /* system level coherent */
bool glc : 1; /* globally coherent */
bool dlc : 1; /* NAVI: device level coherent */
bool lds : 1;
bool nv : 1;
bool disable_wqm : 1; /* Require an exec mask without helper invocations */
- bool can_reorder : 1;
- uint8_t padding : 1;
- barrier_interaction barrier;
+ uint32_t padding0 : 2;
+ uint16_t offset; /* Vega/Navi only */
+ uint16_t padding1;
};
-static_assert(sizeof(FLAT_instruction) == sizeof(Instruction) + 4, "Unexpected padding");
+static_assert(sizeof(FLAT_instruction) == sizeof(Instruction) + 8, "Unexpected padding");
struct Export_instruction : public Instruction {
uint8_t enabled_mask;
static_assert(sizeof(Pseudo_branch_instruction) == sizeof(Instruction) + 8, "Unexpected padding");
struct Pseudo_barrier_instruction : public Instruction {
+ memory_sync_info sync;
+ sync_scope exec_scope;
};
-static_assert(sizeof(Pseudo_barrier_instruction) == sizeof(Instruction) + 0, "Unexpected padding");
+static_assert(sizeof(Pseudo_barrier_instruction) == sizeof(Instruction) + 4, "Unexpected padding");
enum ReduceOp : uint16_t {
iadd8, iadd16, iadd32, iadd64,
return is_phi(instr.get());
}
-barrier_interaction get_barrier_interaction(const Instruction* instr);
+memory_sync_info get_sync_info(const Instruction* instr);
+
bool is_dead(const std::vector<uint16_t>& uses, Instruction *instr);
bool can_use_opsel(chip_class chip, aco_opcode op, int idx, bool high);
bool collect_statistics = false;
uint32_t statistics[num_statistics];
+ struct {
+ void (*func)(void *private_data,
+ enum radv_compiler_debug_level level,
+ const char *message);
+ void *private_data;
+ } debug;
+
uint32_t allocateId()
{
assert(allocationID <= 16777215);
void select_gs_copy_shader(Program *program, struct nir_shader *gs_shader,
ac_shader_config* config,
struct radv_shader_args *args);
+void select_trap_handler_shader(Program *program, struct nir_shader *shader,
+ ac_shader_config* config,
+ struct radv_shader_args *args);
void lower_wqm(Program* program, live& live_vars,
const struct radv_nir_compiler_options *options);
unsigned emit_program(Program* program, std::vector<uint32_t>& code);
void print_asm(Program *program, std::vector<uint32_t>& binary,
unsigned exec_size, std::ostream& out);
-void validate(Program* program, FILE *output);
-bool validate_ra(Program* program, const struct radv_nir_compiler_options *options, FILE *output);
+bool validate_ir(Program* program);
+bool validate_ra(Program* program, const struct radv_nir_compiler_options *options);
#ifndef NDEBUG
-void perfwarn(bool cond, const char *msg, Instruction *instr=NULL);
+void perfwarn(Program *program, bool cond, const char *msg, Instruction *instr=NULL);
#else
#define perfwarn(program, cond, msg, ...) do {} while(0)
#endif
void aco_print_instr(const Instruction *instr, FILE *output);
void aco_print_program(const Program *program, FILE *output);
+void _aco_perfwarn(Program *program, const char *file, unsigned line,
+ const char *fmt, ...);
+void _aco_err(Program *program, const char *file, unsigned line,
+ const char *fmt, ...);
+
+#define aco_perfwarn(program, ...) _aco_perfwarn(program, __FILE__, __LINE__, __VA_ARGS__)
+#define aco_err(program, ...) _aco_err(program, __FILE__, __LINE__, __VA_ARGS__)
+
/* utilities for dealing with register demand */
RegisterDemand get_live_changes(aco_ptr<Instruction>& instr);
RegisterDemand get_temp_registers(aco_ptr<Instruction>& instr);