#include <llvm-c/Core.h>
#include "compiler/nir/nir.h"
#include "amd_family.h"
+#include "ac_shader_util.h"
#ifdef __cplusplus
extern "C" {
AC_ADDR_SPACE_CONST_32BIT = 6, /* same as CONST, but the pointer type has 32 bits */
};
-/* Combine these with & instead of |. */
-#define NOOP_WAITCNT 0xcf7f
-#define LGKM_CNT 0xc07f
-#define EXP_CNT 0xcf0f
-#define VM_CNT 0x0f70 /* On GFX9, vmcnt has 6 bits in [0:3] and [14:15] */
+#define AC_WAIT_LGKM (1 << 0) /* LDS, GDS, constant, message */
+#define AC_WAIT_VLOAD (1 << 1) /* VMEM load/sample instructions */
+#define AC_WAIT_VSTORE (1 << 2) /* VMEM store instructions */
struct ac_llvm_flow;
+struct ac_llvm_compiler;
+enum ac_float_mode;
+
+struct ac_llvm_flow_state {
+ struct ac_llvm_flow *stack;
+ unsigned depth_max;
+ unsigned depth;
+};
struct ac_llvm_context {
LLVMContextRef context;
LLVMTypeRef v3i32;
LLVMTypeRef v4i32;
LLVMTypeRef v2f32;
+ LLVMTypeRef v3f32;
LLVMTypeRef v4f32;
LLVMTypeRef v8i32;
+ LLVMTypeRef iN_wavemask;
+ LLVMTypeRef iN_ballotmask;
LLVMValueRef i8_0;
LLVMValueRef i8_1;
LLVMValueRef i1true;
LLVMValueRef i1false;
- struct ac_llvm_flow *flow;
- unsigned flow_depth;
- unsigned flow_depth_max;
+ /* Since ac_nir_translate makes a local copy of ac_llvm_context, there
+ * are two ac_llvm_contexts. Declare a pointer here, so that the control
+ * flow stack is shared by both ac_llvm_contexts.
+ */
+ struct ac_llvm_flow_state *flow;
unsigned range_md_kind;
unsigned invariant_load_md_kind;
enum chip_class chip_class;
enum radeon_family family;
+ unsigned wave_size;
+ unsigned ballot_mask_bits;
+
LLVMValueRef lds;
};
void
ac_llvm_context_init(struct ac_llvm_context *ctx,
- enum chip_class chip_class, enum radeon_family family);
+ struct ac_llvm_compiler *compiler,
+ enum chip_class chip_class, enum radeon_family family,
+ enum ac_float_mode float_mode, unsigned wave_size,
+ unsigned ballot_mask_bits);
void
ac_llvm_context_dispose(struct ac_llvm_context *ctx);
ac_build_gather_values(struct ac_llvm_context *ctx,
LLVMValueRef *values,
unsigned value_count);
+
+LLVMValueRef
+ac_extract_components(struct ac_llvm_context *ctx,
+ LLVMValueRef value,
+ unsigned start,
+ unsigned channels);
+
LLVMValueRef ac_build_expand_to_vec4(struct ac_llvm_context *ctx,
LLVMValueRef value,
unsigned num_channels);
LLVMValueRef voffset,
LLVMValueRef soffset,
unsigned inst_offset,
- bool glc,
- bool slc,
- bool writeonly_memory,
+ unsigned cache_policy,
bool swizzle_enable_hint);
void
LLVMValueRef vindex,
LLVMValueRef voffset,
unsigned num_channels,
- bool glc,
- bool writeonly_memory);
+ unsigned cache_policy);
LLVMValueRef
ac_build_buffer_load(struct ac_llvm_context *ctx,
LLVMValueRef voffset,
LLVMValueRef soffset,
unsigned inst_offset,
- unsigned glc,
- unsigned slc,
+ unsigned cache_policy,
bool can_speculate,
bool allow_smem);
LLVMValueRef vindex,
LLVMValueRef voffset,
unsigned num_channels,
- bool glc,
+ unsigned cache_policy,
bool can_speculate);
-/* load_format that handles the stride & element count better if idxen is
- * disabled by LLVM. */
-LLVMValueRef ac_build_buffer_load_format_gfx9_safe(struct ac_llvm_context *ctx,
- LLVMValueRef rsrc,
- LLVMValueRef vindex,
- LLVMValueRef voffset,
- unsigned num_channels,
- bool glc,
- bool can_speculate);
-
LLVMValueRef
ac_build_tbuffer_load_short(struct ac_llvm_context *ctx,
LLVMValueRef rsrc,
LLVMValueRef voffset,
LLVMValueRef soffset,
LLVMValueRef immoffset,
- bool glc);
+ unsigned cache_policy);
LLVMValueRef
ac_build_tbuffer_load_byte(struct ac_llvm_context *ctx,
LLVMValueRef voffset,
LLVMValueRef soffset,
LLVMValueRef immoffset,
- bool glc);
+ unsigned cache_policy);
LLVMValueRef
ac_build_struct_tbuffer_load(struct ac_llvm_context *ctx,
unsigned num_channels,
unsigned dfmt,
unsigned nfmt,
- bool glc,
- bool slc,
+ unsigned cache_policy,
bool can_speculate);
LLVMValueRef
unsigned num_channels,
unsigned dfmt,
unsigned nfmt,
- bool glc,
- bool slc,
+ unsigned cache_policy,
bool can_speculate);
+/* For ac_build_fetch_format.
+ *
+ * Note: FLOAT must be 0 (used for convenience of encoding in radeonsi).
+ */
+enum {
+ AC_FETCH_FORMAT_FLOAT = 0,
+ AC_FETCH_FORMAT_FIXED,
+ AC_FETCH_FORMAT_UNORM,
+ AC_FETCH_FORMAT_SNORM,
+ AC_FETCH_FORMAT_USCALED,
+ AC_FETCH_FORMAT_SSCALED,
+ AC_FETCH_FORMAT_UINT,
+ AC_FETCH_FORMAT_SINT,
+};
+
+LLVMValueRef
+ac_build_opencoded_load_format(struct ac_llvm_context *ctx,
+ unsigned log_size,
+ unsigned num_channels,
+ unsigned format,
+ bool reverse,
+ bool known_aligned,
+ LLVMValueRef rsrc,
+ LLVMValueRef vindex,
+ LLVMValueRef voffset,
+ LLVMValueRef soffset,
+ unsigned cache_policy,
+ bool can_speculate);
+
void
ac_build_tbuffer_store_short(struct ac_llvm_context *ctx,
LLVMValueRef rsrc,
LLVMValueRef vdata,
LLVMValueRef voffset,
LLVMValueRef soffset,
- bool glc,
- bool writeonly_memory);
+ unsigned cache_policy);
void
ac_build_tbuffer_store_byte(struct ac_llvm_context *ctx,
LLVMValueRef vdata,
LLVMValueRef voffset,
LLVMValueRef soffset,
- bool glc,
- bool writeonly_memory);
+ unsigned cache_policy);
void
ac_build_struct_tbuffer_store(struct ac_llvm_context *ctx,
unsigned num_channels,
unsigned dfmt,
unsigned nfmt,
- bool glc,
- bool slc,
- bool writeonly_memory);
+ unsigned cache_policy);
void
ac_build_raw_tbuffer_store(struct ac_llvm_context *ctx,
unsigned num_channels,
unsigned dfmt,
unsigned nfmt,
- bool glc,
- bool slc,
- bool writeonly_memory);
+ unsigned cache_policy);
LLVMValueRef
ac_get_thread_id(struct ac_llvm_context *ctx);
#define AC_SENDMSG_GS 2
#define AC_SENDMSG_GS_DONE 3
+#define AC_SENDMSG_GS_ALLOC_REQ 9
#define AC_SENDMSG_GS_OP_NOP (0 << 4)
#define AC_SENDMSG_GS_OP_CUT (1 << 4)
ac_atomic_and,
ac_atomic_or,
ac_atomic_xor,
-};
-
-enum ac_image_dim {
- ac_image_1d,
- ac_image_2d,
- ac_image_3d,
- ac_image_cube, // includes cube arrays
- ac_image_1darray,
- ac_image_2darray,
- ac_image_2dmsaa,
- ac_image_2darraymsaa,
+ ac_atomic_inc_wrap,
+ ac_atomic_dec_wrap,
};
/* These cache policy bits match the definitions used by the LLVM intrinsics. */
enum ac_image_cache_policy {
- ac_glc = 1 << 0,
- ac_slc = 1 << 1,
+ ac_glc = 1 << 0, /* per-CU cache control */
+ ac_slc = 1 << 1, /* global L2 cache control */
+ ac_dlc = 1 << 2, /* per-shader-array cache control */
};
struct ac_image_args {
enum ac_atomic_op atomic : 4; /* for the ac_image_atomic opcode */
enum ac_image_dim dim : 3;
unsigned dmask : 4;
- unsigned cache_policy : 2;
+ unsigned cache_policy : 3;
bool unorm : 1;
bool level_zero : 1;
unsigned attributes; /* additional call-site specific AC_FUNC_ATTRs */
LLVMValueRef ac_build_image_opcode(struct ac_llvm_context *ctx,
struct ac_image_args *a);
+LLVMValueRef ac_build_image_get_sample_count(struct ac_llvm_context *ctx,
+ LLVMValueRef rsrc);
LLVMValueRef ac_build_cvt_pkrtz_f16(struct ac_llvm_context *ctx,
LLVMValueRef args[2]);
LLVMValueRef ac_build_cvt_pknorm_i16(struct ac_llvm_context *ctx,
LLVMValueRef ac_build_fmad(struct ac_llvm_context *ctx, LLVMValueRef s0,
LLVMValueRef s1, LLVMValueRef s2);
-void ac_build_waitcnt(struct ac_llvm_context *ctx, unsigned simm16);
+void ac_build_waitcnt(struct ac_llvm_context *ctx, unsigned wait_flags);
LLVMValueRef ac_build_fract(struct ac_llvm_context *ctx, LLVMValueRef src0,
unsigned bitsize);
LLVMValueRef
ac_build_load_helper_invocation(struct ac_llvm_context *ctx);
+LLVMValueRef ac_build_call(struct ac_llvm_context *ctx, LLVMValueRef func,
+ LLVMValueRef *args, unsigned num_args);
+
LLVMValueRef ac_build_atomic_rmw(struct ac_llvm_context *ctx, LLVMAtomicRMWBinOp op,
LLVMValueRef ptr, LLVMValueRef val,
const char *sync_scope);
LLVMValueRef cmp, LLVMValueRef val,
const char *sync_scope);
+void
+ac_export_mrt_z(struct ac_llvm_context *ctx, LLVMValueRef depth,
+ LLVMValueRef stencil, LLVMValueRef samplemask,
+ struct ac_export_args *args);
+
#ifdef __cplusplus
}
#endif