#define AC_LLVM_BUILD_H
#include <stdbool.h>
-#include <llvm-c/TargetMachine.h>
+#include <llvm-c/Core.h>
#include "compiler/nir/nir.h"
#include "amd_family.h"
extern "C" {
#endif
-#define HAVE_32BIT_POINTERS (HAVE_LLVM >= 0x0700)
-
enum {
- AC_ADDR_SPACE_FLAT = HAVE_LLVM >= 0x0700 ? 0 : 4, /* Slower than global. */
+ AC_ADDR_SPACE_FLAT = 0, /* Slower than global. */
AC_ADDR_SPACE_GLOBAL = 1,
- AC_ADDR_SPACE_GDS = HAVE_LLVM >= 0x0700 ? 2 : 5,
+ AC_ADDR_SPACE_GDS = 2,
AC_ADDR_SPACE_LDS = 3,
- AC_ADDR_SPACE_CONST = HAVE_LLVM >= 0x0700 ? 4 : 2, /* Global allowing SMEM. */
+ AC_ADDR_SPACE_CONST = 4, /* Global allowing SMEM. */
AC_ADDR_SPACE_CONST_32BIT = 6, /* same as CONST, but the pointer type has 32 bits */
};
-/* Combine these with & instead of |. */
-#define NOOP_WAITCNT 0xcf7f
-#define LGKM_CNT 0xc07f
-#define EXP_CNT 0xcf0f
-#define VM_CNT 0x0f70 /* On GFX9, vmcnt has 6 bits in [0:3] and [14:15] */
+#define AC_WAIT_LGKM (1 << 0) /* LDS, GDS, constant, message */
+#define AC_WAIT_VLOAD (1 << 1) /* VMEM load/sample instructions */
+#define AC_WAIT_VSTORE (1 << 2) /* VMEM store instructions */
struct ac_llvm_flow;
LLVMTypeRef v3i32;
LLVMTypeRef v4i32;
LLVMTypeRef v2f32;
+ LLVMTypeRef v3f32;
LLVMTypeRef v4f32;
LLVMTypeRef v8i32;
+ LLVMValueRef i8_0;
+ LLVMValueRef i8_1;
LLVMValueRef i16_0;
LLVMValueRef i16_1;
LLVMValueRef i32_0;
LLVMValueRef i32_1;
LLVMValueRef i64_0;
LLVMValueRef i64_1;
+ LLVMValueRef f16_0;
+ LLVMValueRef f16_1;
LLVMValueRef f32_0;
LLVMValueRef f32_1;
LLVMValueRef f64_0;
enum chip_class chip_class;
enum radeon_family family;
+ unsigned wave_size;
LLVMValueRef lds;
};
void
ac_llvm_context_init(struct ac_llvm_context *ctx,
- enum chip_class chip_class, enum radeon_family family);
+ enum chip_class chip_class, enum radeon_family family,
+ unsigned wave_size);
void
ac_llvm_context_dispose(struct ac_llvm_context *ctx);
LLVMTypeRef ac_to_integer_type(struct ac_llvm_context *ctx, LLVMTypeRef t);
LLVMValueRef ac_to_integer(struct ac_llvm_context *ctx, LLVMValueRef v);
+LLVMValueRef ac_to_integer_or_pointer(struct ac_llvm_context *ctx, LLVMValueRef v);
LLVMTypeRef ac_to_float_type(struct ac_llvm_context *ctx, LLVMTypeRef t);
LLVMValueRef ac_to_float(struct ac_llvm_context *ctx, LLVMValueRef v);
LLVMValueRef ac_build_shader_clock(struct ac_llvm_context *ctx);
LLVMValueRef ac_build_ballot(struct ac_llvm_context *ctx, LLVMValueRef value);
+LLVMValueRef ac_get_i1_sgpr_mask(struct ac_llvm_context *ctx,
+ LLVMValueRef value);
LLVMValueRef ac_build_vote_all(struct ac_llvm_context *ctx, LLVMValueRef value);
ac_build_gather_values(struct ac_llvm_context *ctx,
LLVMValueRef *values,
unsigned value_count);
-LLVMValueRef ac_build_expand(struct ac_llvm_context *ctx,
- LLVMValueRef value,
- unsigned src_channels, unsigned dst_channels);
LLVMValueRef ac_build_expand_to_vec4(struct ac_llvm_context *ctx,
LLVMValueRef value,
unsigned num_channels);
LLVMValueRef i,
LLVMValueRef j);
+LLVMValueRef
+ac_build_fs_interp_f16(struct ac_llvm_context *ctx,
+ LLVMValueRef llvm_chan,
+ LLVMValueRef attr_number,
+ LLVMValueRef params,
+ LLVMValueRef i,
+ LLVMValueRef j);
+
LLVMValueRef
ac_build_fs_interp_mov(struct ac_llvm_context *ctx,
LLVMValueRef parameter,
LLVMValueRef attr_number,
LLVMValueRef params);
+LLVMValueRef
+ac_build_gep_ptr(struct ac_llvm_context *ctx,
+ LLVMValueRef base_ptr,
+ LLVMValueRef index);
+
LLVMValueRef
ac_build_gep0(struct ac_llvm_context *ctx,
LLVMValueRef base_ptr,
LLVMValueRef voffset,
LLVMValueRef soffset,
unsigned inst_offset,
- bool glc,
- bool slc,
- bool writeonly_memory,
+ unsigned cache_policy,
bool swizzle_enable_hint);
+
+void
+ac_build_buffer_store_format(struct ac_llvm_context *ctx,
+ LLVMValueRef rsrc,
+ LLVMValueRef data,
+ LLVMValueRef vindex,
+ LLVMValueRef voffset,
+ unsigned num_channels,
+ unsigned cache_policy);
+
LLVMValueRef
ac_build_buffer_load(struct ac_llvm_context *ctx,
LLVMValueRef rsrc,
LLVMValueRef voffset,
LLVMValueRef soffset,
unsigned inst_offset,
- unsigned glc,
- unsigned slc,
+ unsigned cache_policy,
bool can_speculate,
bool allow_smem);
LLVMValueRef vindex,
LLVMValueRef voffset,
unsigned num_channels,
- bool glc,
+ unsigned cache_policy,
bool can_speculate);
/* load_format that handles the stride & element count better if idxen is
LLVMValueRef vindex,
LLVMValueRef voffset,
unsigned num_channels,
- bool glc,
+ unsigned cache_policy,
bool can_speculate);
LLVMValueRef
ac_build_tbuffer_load_short(struct ac_llvm_context *ctx,
LLVMValueRef rsrc,
- LLVMValueRef vindex,
LLVMValueRef voffset,
- LLVMValueRef soffset,
- LLVMValueRef immoffset,
- LLVMValueRef glc);
+ LLVMValueRef soffset,
+ LLVMValueRef immoffset,
+ unsigned cache_policy);
+
+LLVMValueRef
+ac_build_tbuffer_load_byte(struct ac_llvm_context *ctx,
+ LLVMValueRef rsrc,
+ LLVMValueRef voffset,
+ LLVMValueRef soffset,
+ LLVMValueRef immoffset,
+ unsigned cache_policy);
+
+LLVMValueRef
+ac_build_struct_tbuffer_load(struct ac_llvm_context *ctx,
+ LLVMValueRef rsrc,
+ LLVMValueRef vindex,
+ LLVMValueRef voffset,
+ LLVMValueRef soffset,
+ LLVMValueRef immoffset,
+ unsigned num_channels,
+ unsigned dfmt,
+ unsigned nfmt,
+ unsigned cache_policy,
+ bool can_speculate);
+
+LLVMValueRef
+ac_build_raw_tbuffer_load(struct ac_llvm_context *ctx,
+ LLVMValueRef rsrc,
+ LLVMValueRef voffset,
+ LLVMValueRef soffset,
+ LLVMValueRef immoffset,
+ unsigned num_channels,
+ unsigned dfmt,
+ unsigned nfmt,
+ unsigned cache_policy,
+ bool can_speculate);
+
+/* For ac_build_fetch_format.
+ *
+ * Note: FLOAT must be 0 (used for convenience of encoding in radeonsi).
+ */
+enum {
+ AC_FETCH_FORMAT_FLOAT = 0,
+ AC_FETCH_FORMAT_FIXED,
+ AC_FETCH_FORMAT_UNORM,
+ AC_FETCH_FORMAT_SNORM,
+ AC_FETCH_FORMAT_USCALED,
+ AC_FETCH_FORMAT_SSCALED,
+ AC_FETCH_FORMAT_UINT,
+ AC_FETCH_FORMAT_SINT,
+};
+
+LLVMValueRef
+ac_build_opencoded_load_format(struct ac_llvm_context *ctx,
+ unsigned log_size,
+ unsigned num_channels,
+ unsigned format,
+ bool reverse,
+ bool known_aligned,
+ LLVMValueRef rsrc,
+ LLVMValueRef vindex,
+ LLVMValueRef voffset,
+ LLVMValueRef soffset,
+ unsigned cache_policy,
+ bool can_speculate);
+
+void
+ac_build_tbuffer_store_short(struct ac_llvm_context *ctx,
+ LLVMValueRef rsrc,
+ LLVMValueRef vdata,
+ LLVMValueRef voffset,
+ LLVMValueRef soffset,
+ unsigned cache_policy);
+
+void
+ac_build_tbuffer_store_byte(struct ac_llvm_context *ctx,
+ LLVMValueRef rsrc,
+ LLVMValueRef vdata,
+ LLVMValueRef voffset,
+ LLVMValueRef soffset,
+ unsigned cache_policy);
+
+void
+ac_build_struct_tbuffer_store(struct ac_llvm_context *ctx,
+ LLVMValueRef rsrc,
+ LLVMValueRef vdata,
+ LLVMValueRef vindex,
+ LLVMValueRef voffset,
+ LLVMValueRef soffset,
+ LLVMValueRef immoffset,
+ unsigned num_channels,
+ unsigned dfmt,
+ unsigned nfmt,
+ unsigned cache_policy);
+
+void
+ac_build_raw_tbuffer_store(struct ac_llvm_context *ctx,
+ LLVMValueRef rsrc,
+ LLVMValueRef vdata,
+ LLVMValueRef voffset,
+ LLVMValueRef soffset,
+ LLVMValueRef immoffset,
+ unsigned num_channels,
+ unsigned dfmt,
+ unsigned nfmt,
+ unsigned cache_policy);
LLVMValueRef
ac_get_thread_id(struct ac_llvm_context *ctx);
#define AC_SENDMSG_GS 2
#define AC_SENDMSG_GS_DONE 3
+#define AC_SENDMSG_GS_ALLOC_REQ 9
#define AC_SENDMSG_GS_OP_NOP (0 << 4)
#define AC_SENDMSG_GS_OP_CUT (1 << 4)
LLVMValueRef ac_build_imax(struct ac_llvm_context *ctx, LLVMValueRef a,
LLVMValueRef b);
LLVMValueRef ac_build_umin(struct ac_llvm_context *ctx, LLVMValueRef a, LLVMValueRef b);
+LLVMValueRef ac_build_umax(struct ac_llvm_context *ctx, LLVMValueRef a, LLVMValueRef b);
LLVMValueRef ac_build_clamp(struct ac_llvm_context *ctx, LLVMValueRef value);
struct ac_export_args {
/* These cache policy bits match the definitions used by the LLVM intrinsics. */
enum ac_image_cache_policy {
- ac_glc = 1 << 0,
- ac_slc = 1 << 1,
+ ac_glc = 1 << 0, /* per-CU cache control */
+ ac_slc = 1 << 1, /* global L2 cache control */
+ ac_dlc = 1 << 2, /* per-shader-array cache control */
};
struct ac_image_args {
enum ac_atomic_op atomic : 4; /* for the ac_image_atomic opcode */
enum ac_image_dim dim : 3;
unsigned dmask : 4;
- unsigned cache_policy : 2;
+ unsigned cache_policy : 3;
bool unorm : 1;
bool level_zero : 1;
unsigned attributes; /* additional call-site specific AC_FUNC_ATTRs */
LLVMValueRef ac_build_fmad(struct ac_llvm_context *ctx, LLVMValueRef s0,
LLVMValueRef s1, LLVMValueRef s2);
-void ac_build_waitcnt(struct ac_llvm_context *ctx, unsigned simm16);
+void ac_build_waitcnt(struct ac_llvm_context *ctx, unsigned wait_flags);
LLVMValueRef ac_build_fract(struct ac_llvm_context *ctx, LLVMValueRef src0,
unsigned bitsize);
+LLVMValueRef ac_build_fmed3(struct ac_llvm_context *ctx, LLVMValueRef src0,
+ LLVMValueRef src1, LLVMValueRef src2,
+ unsigned bitsize);
+
LLVMValueRef ac_build_isign(struct ac_llvm_context *ctx, LLVMValueRef src0,
unsigned bitsize);
void ac_build_else(struct ac_llvm_context *ctx, int lable_id);
void ac_build_endif(struct ac_llvm_context *ctx, int lable_id);
void ac_build_endloop(struct ac_llvm_context *ctx, int lable_id);
+void ac_build_ifcc(struct ac_llvm_context *ctx, LLVMValueRef cond, int label_id);
void ac_build_if(struct ac_llvm_context *ctx, LLVMValueRef value,
int lable_id);
void ac_build_uif(struct ac_llvm_context *ctx, LLVMValueRef value,
LLVMValueRef
ac_build_reduce(struct ac_llvm_context *ctx, LLVMValueRef src, nir_op op, unsigned cluster_size);
+/**
+ * Common arguments for a scan/reduce operation that accumulates per-wave
+ * values across an entire workgroup, while respecting the order of waves.
+ */
+struct ac_wg_scan {
+ bool enable_reduce;
+ bool enable_exclusive;
+ bool enable_inclusive;
+ nir_op op;
+ LLVMValueRef src; /* clobbered! */
+ LLVMValueRef result_reduce;
+ LLVMValueRef result_exclusive;
+ LLVMValueRef result_inclusive;
+ LLVMValueRef extra;
+ LLVMValueRef waveidx;
+ LLVMValueRef numwaves; /* only needed for "reduce" operations */
+
+ /* T addrspace(LDS) pointer to the same type as value, at least maxwaves entries */
+ LLVMValueRef scratch;
+ unsigned maxwaves;
+};
+
+void
+ac_build_wg_wavescan_top(struct ac_llvm_context *ctx, struct ac_wg_scan *ws);
+void
+ac_build_wg_wavescan_bottom(struct ac_llvm_context *ctx, struct ac_wg_scan *ws);
+void
+ac_build_wg_wavescan(struct ac_llvm_context *ctx, struct ac_wg_scan *ws);
+
+void
+ac_build_wg_scan_top(struct ac_llvm_context *ctx, struct ac_wg_scan *ws);
+void
+ac_build_wg_scan_bottom(struct ac_llvm_context *ctx, struct ac_wg_scan *ws);
+void
+ac_build_wg_scan(struct ac_llvm_context *ctx, struct ac_wg_scan *ws);
+
LLVMValueRef
ac_build_quad_swizzle(struct ac_llvm_context *ctx, LLVMValueRef src,
unsigned lane0, unsigned lane1, unsigned lane2, unsigned lane3);
LLVMValueRef
ac_build_shuffle(struct ac_llvm_context *ctx, LLVMValueRef src, LLVMValueRef index);
+LLVMValueRef
+ac_build_frexp_exp(struct ac_llvm_context *ctx, LLVMValueRef src0,
+ unsigned bitsize);
+
+LLVMValueRef
+ac_build_frexp_mant(struct ac_llvm_context *ctx, LLVMValueRef src0,
+ unsigned bitsize);
+
+LLVMValueRef
+ac_build_ddxy_interp(struct ac_llvm_context *ctx, LLVMValueRef interp_ij);
+
+LLVMValueRef
+ac_build_load_helper_invocation(struct ac_llvm_context *ctx);
+
+LLVMValueRef ac_build_call(struct ac_llvm_context *ctx, LLVMValueRef func,
+ LLVMValueRef *args, unsigned num_args);
+
+LLVMValueRef ac_build_atomic_rmw(struct ac_llvm_context *ctx, LLVMAtomicRMWBinOp op,
+ LLVMValueRef ptr, LLVMValueRef val,
+ const char *sync_scope);
+
+LLVMValueRef ac_build_atomic_cmp_xchg(struct ac_llvm_context *ctx, LLVMValueRef ptr,
+ LLVMValueRef cmp, LLVMValueRef val,
+ const char *sync_scope);
+
#ifdef __cplusplus
}
#endif