#include <stdbool.h>
#include <llvm-c/TargetMachine.h>
-
+#include "compiler/nir/nir.h"
#include "amd_family.h"
#ifdef __cplusplus
#endif
enum {
- AC_CONST_ADDR_SPACE = 2, /* CONST is the only address space that selects SMEM loads */
- AC_LOCAL_ADDR_SPACE = 3,
+ AC_ADDR_SPACE_FLAT = 0, /* Slower than global. */
+ AC_ADDR_SPACE_GLOBAL = 1,
+ AC_ADDR_SPACE_GDS = 2,
+ AC_ADDR_SPACE_LDS = 3,
+ AC_ADDR_SPACE_CONST = 4, /* Global allowing SMEM. */
+ AC_ADDR_SPACE_CONST_32BIT = 6, /* same as CONST, but the pointer type has 32 bits */
};
+/* Combine these with & instead of |. */
+#define NOOP_WAITCNT 0xcf7f
+#define LGKM_CNT 0xc07f
+#define EXP_CNT 0xcf0f
+#define VM_CNT 0x0f70 /* On GFX9, vmcnt has 6 bits in [0:3] and [14:15] */
+
+struct ac_llvm_flow;
+
struct ac_llvm_context {
LLVMContextRef context;
LLVMModuleRef module;
LLVMTypeRef i16;
LLVMTypeRef i32;
LLVMTypeRef i64;
+ LLVMTypeRef intptr;
LLVMTypeRef f16;
LLVMTypeRef f32;
LLVMTypeRef f64;
LLVMTypeRef v4f32;
LLVMTypeRef v8i32;
+ LLVMValueRef i8_0;
+ LLVMValueRef i8_1;
+ LLVMValueRef i16_0;
+ LLVMValueRef i16_1;
LLVMValueRef i32_0;
LLVMValueRef i32_1;
LLVMValueRef i64_0;
LLVMValueRef i64_1;
+ LLVMValueRef f16_0;
+ LLVMValueRef f16_1;
LLVMValueRef f32_0;
LLVMValueRef f32_1;
LLVMValueRef f64_0;
LLVMValueRef i1true;
LLVMValueRef i1false;
+ struct ac_llvm_flow *flow;
+ unsigned flow_depth;
+ unsigned flow_depth_max;
+
unsigned range_md_kind;
unsigned invariant_load_md_kind;
unsigned uniform_md_kind;
};
void
-ac_llvm_context_init(struct ac_llvm_context *ctx, LLVMContextRef context,
+ac_llvm_context_init(struct ac_llvm_context *ctx,
enum chip_class chip_class, enum radeon_family family);
+void
+ac_llvm_context_dispose(struct ac_llvm_context *ctx);
+
int
ac_get_llvm_num_components(LLVMValueRef value);
+int
+ac_get_elem_bits(struct ac_llvm_context *ctx, LLVMTypeRef type);
+
LLVMValueRef
ac_llvm_extract_elem(struct ac_llvm_context *ac,
LLVMValueRef value,
LLVMTypeRef ac_to_integer_type(struct ac_llvm_context *ctx, LLVMTypeRef t);
LLVMValueRef ac_to_integer(struct ac_llvm_context *ctx, LLVMValueRef v);
+LLVMValueRef ac_to_integer_or_pointer(struct ac_llvm_context *ctx, LLVMValueRef v);
LLVMTypeRef ac_to_float_type(struct ac_llvm_context *ctx, LLVMTypeRef t);
LLVMValueRef ac_to_float(struct ac_llvm_context *ctx, LLVMValueRef v);
unsigned count_incoming, LLVMValueRef *values,
LLVMBasicBlockRef *blocks);
+void ac_build_s_barrier(struct ac_llvm_context *ctx);
void ac_build_optimization_barrier(struct ac_llvm_context *ctx,
LLVMValueRef *pvgpr);
LLVMValueRef ac_build_expand_to_vec4(struct ac_llvm_context *ctx,
LLVMValueRef value,
unsigned num_channels);
+LLVMValueRef ac_build_round(struct ac_llvm_context *ctx, LLVMValueRef value);
LLVMValueRef
ac_build_fdiv(struct ac_llvm_context *ctx,
LLVMValueRef num,
LLVMValueRef den);
+LLVMValueRef ac_build_fast_udiv(struct ac_llvm_context *ctx,
+ LLVMValueRef num,
+ LLVMValueRef multiplier,
+ LLVMValueRef pre_shift,
+ LLVMValueRef post_shift,
+ LLVMValueRef increment);
+LLVMValueRef ac_build_fast_udiv_nuw(struct ac_llvm_context *ctx,
+ LLVMValueRef num,
+ LLVMValueRef multiplier,
+ LLVMValueRef pre_shift,
+ LLVMValueRef post_shift,
+ LLVMValueRef increment);
+LLVMValueRef ac_build_fast_udiv_u31_d_not_one(struct ac_llvm_context *ctx,
+ LLVMValueRef num,
+ LLVMValueRef multiplier,
+ LLVMValueRef post_shift);
+
void
ac_prepare_cube_coords(struct ac_llvm_context *ctx,
bool is_deriv, bool is_array, bool is_lod,
LLVMValueRef i,
LLVMValueRef j);
+LLVMValueRef
+ac_build_fs_interp_f16(struct ac_llvm_context *ctx,
+ LLVMValueRef llvm_chan,
+ LLVMValueRef attr_number,
+ LLVMValueRef params,
+ LLVMValueRef i,
+ LLVMValueRef j);
+
LLVMValueRef
ac_build_fs_interp_mov(struct ac_llvm_context *ctx,
LLVMValueRef parameter,
LLVMValueRef attr_number,
LLVMValueRef params);
+LLVMValueRef
+ac_build_gep_ptr(struct ac_llvm_context *ctx,
+ LLVMValueRef base_ptr,
+ LLVMValueRef index);
+
LLVMValueRef
ac_build_gep0(struct ac_llvm_context *ctx,
LLVMValueRef base_ptr,
LLVMValueRef index);
+LLVMValueRef ac_build_pointer_add(struct ac_llvm_context *ctx, LLVMValueRef ptr,
+ LLVMValueRef index);
void
ac_build_indexed_store(struct ac_llvm_context *ctx,
LLVMValueRef base_ptr, LLVMValueRef index);
LLVMValueRef ac_build_load_to_sgpr(struct ac_llvm_context *ctx,
LLVMValueRef base_ptr, LLVMValueRef index);
+LLVMValueRef ac_build_load_to_sgpr_uint_wraparound(struct ac_llvm_context *ctx,
+ LLVMValueRef base_ptr, LLVMValueRef index);
void
ac_build_buffer_store_dword(struct ac_llvm_context *ctx,
bool slc,
bool writeonly_memory,
bool swizzle_enable_hint);
+
+void
+ac_build_buffer_store_format(struct ac_llvm_context *ctx,
+ LLVMValueRef rsrc,
+ LLVMValueRef data,
+ LLVMValueRef vindex,
+ LLVMValueRef voffset,
+ unsigned num_channels,
+ bool glc,
+ bool writeonly_memory);
+
LLVMValueRef
ac_build_buffer_load(struct ac_llvm_context *ctx,
LLVMValueRef rsrc,
bool glc,
bool can_speculate);
+/* load_format that handles the stride & element count better if idxen is
+ * disabled by LLVM. */
+LLVMValueRef ac_build_buffer_load_format_gfx9_safe(struct ac_llvm_context *ctx,
+ LLVMValueRef rsrc,
+ LLVMValueRef vindex,
+ LLVMValueRef voffset,
+ unsigned num_channels,
+ bool glc,
+ bool can_speculate);
+
+LLVMValueRef
+ac_build_tbuffer_load_short(struct ac_llvm_context *ctx,
+ LLVMValueRef rsrc,
+ LLVMValueRef voffset,
+ LLVMValueRef soffset,
+ LLVMValueRef immoffset,
+ bool glc);
+
+LLVMValueRef
+ac_build_tbuffer_load_byte(struct ac_llvm_context *ctx,
+ LLVMValueRef rsrc,
+ LLVMValueRef voffset,
+ LLVMValueRef soffset,
+ LLVMValueRef immoffset,
+ bool glc);
+
+LLVMValueRef
+ac_build_struct_tbuffer_load(struct ac_llvm_context *ctx,
+ LLVMValueRef rsrc,
+ LLVMValueRef vindex,
+ LLVMValueRef voffset,
+ LLVMValueRef soffset,
+ LLVMValueRef immoffset,
+ unsigned num_channels,
+ unsigned dfmt,
+ unsigned nfmt,
+ bool glc,
+ bool slc,
+ bool can_speculate);
+
+LLVMValueRef
+ac_build_raw_tbuffer_load(struct ac_llvm_context *ctx,
+ LLVMValueRef rsrc,
+ LLVMValueRef voffset,
+ LLVMValueRef soffset,
+ LLVMValueRef immoffset,
+ unsigned num_channels,
+ unsigned dfmt,
+ unsigned nfmt,
+ bool glc,
+ bool slc,
+ bool can_speculate);
+
+void
+ac_build_tbuffer_store_short(struct ac_llvm_context *ctx,
+ LLVMValueRef rsrc,
+ LLVMValueRef vdata,
+ LLVMValueRef voffset,
+ LLVMValueRef soffset,
+ bool glc,
+ bool writeonly_memory);
+
+void
+ac_build_tbuffer_store_byte(struct ac_llvm_context *ctx,
+ LLVMValueRef rsrc,
+ LLVMValueRef vdata,
+ LLVMValueRef voffset,
+ LLVMValueRef soffset,
+ bool glc,
+ bool writeonly_memory);
+
+void
+ac_build_struct_tbuffer_store(struct ac_llvm_context *ctx,
+ LLVMValueRef rsrc,
+ LLVMValueRef vdata,
+ LLVMValueRef vindex,
+ LLVMValueRef voffset,
+ LLVMValueRef soffset,
+ LLVMValueRef immoffset,
+ unsigned num_channels,
+ unsigned dfmt,
+ unsigned nfmt,
+ bool glc,
+ bool slc,
+ bool writeonly_memory);
+
+void
+ac_build_raw_tbuffer_store(struct ac_llvm_context *ctx,
+ LLVMValueRef rsrc,
+ LLVMValueRef vdata,
+ LLVMValueRef voffset,
+ LLVMValueRef soffset,
+ LLVMValueRef immoffset,
+ unsigned num_channels,
+ unsigned dfmt,
+ unsigned nfmt,
+ bool glc,
+ bool slc,
+ bool writeonly_memory);
+
LLVMValueRef
ac_get_thread_id(struct ac_llvm_context *ctx);
LLVMValueRef ac_build_imax(struct ac_llvm_context *ctx, LLVMValueRef a,
LLVMValueRef b);
LLVMValueRef ac_build_umin(struct ac_llvm_context *ctx, LLVMValueRef a, LLVMValueRef b);
+LLVMValueRef ac_build_umax(struct ac_llvm_context *ctx, LLVMValueRef a, LLVMValueRef b);
LLVMValueRef ac_build_clamp(struct ac_llvm_context *ctx, LLVMValueRef value);
struct ac_export_args {
void ac_build_export(struct ac_llvm_context *ctx, struct ac_export_args *a);
+void ac_build_export_null(struct ac_llvm_context *ctx);
+
enum ac_image_opcode {
ac_image_sample,
ac_image_gather4,
ac_image_load,
ac_image_load_mip,
+ ac_image_store,
+ ac_image_store_mip,
ac_image_get_lod,
ac_image_get_resinfo,
+ ac_image_atomic,
+ ac_image_atomic_cmpswap,
+};
+
+enum ac_atomic_op {
+ ac_atomic_swap,
+ ac_atomic_add,
+ ac_atomic_sub,
+ ac_atomic_smin,
+ ac_atomic_umin,
+ ac_atomic_smax,
+ ac_atomic_umax,
+ ac_atomic_and,
+ ac_atomic_or,
+ ac_atomic_xor,
+};
+
+enum ac_image_dim {
+ ac_image_1d,
+ ac_image_2d,
+ ac_image_3d,
+ ac_image_cube, // includes cube arrays
+ ac_image_1darray,
+ ac_image_2darray,
+ ac_image_2dmsaa,
+ ac_image_2darraymsaa,
+};
+
+/* These cache policy bits match the definitions used by the LLVM intrinsics. */
+enum ac_image_cache_policy {
+ ac_glc = 1 << 0,
+ ac_slc = 1 << 1,
};
struct ac_image_args {
- enum ac_image_opcode opcode;
- bool level_zero;
- bool bias;
- bool lod;
- bool deriv;
- bool compare;
- bool offset;
+ enum ac_image_opcode opcode : 4;
+ enum ac_atomic_op atomic : 4; /* for the ac_image_atomic opcode */
+ enum ac_image_dim dim : 3;
+ unsigned dmask : 4;
+ unsigned cache_policy : 2;
+ bool unorm : 1;
+ bool level_zero : 1;
+ unsigned attributes; /* additional call-site specific AC_FUNC_ATTRs */
LLVMValueRef resource;
LLVMValueRef sampler;
- LLVMValueRef addr;
- unsigned dmask;
- bool unorm;
- bool da;
+ LLVMValueRef data[2]; /* data[0] is source data (vector); data[1] is cmp for cmpswap */
+ LLVMValueRef offset;
+ LLVMValueRef bias;
+ LLVMValueRef compare;
+ LLVMValueRef derivs[6];
+ LLVMValueRef coords[4];
+ LLVMValueRef lod; // also used by ac_image_get_resinfo
};
LLVMValueRef ac_build_image_opcode(struct ac_llvm_context *ctx,
LLVMValueRef ac_build_bfe(struct ac_llvm_context *ctx, LLVMValueRef input,
LLVMValueRef offset, LLVMValueRef width,
bool is_signed);
+LLVMValueRef ac_build_imad(struct ac_llvm_context *ctx, LLVMValueRef s0,
+ LLVMValueRef s1, LLVMValueRef s2);
+LLVMValueRef ac_build_fmad(struct ac_llvm_context *ctx, LLVMValueRef s0,
+ LLVMValueRef s1, LLVMValueRef s2);
void ac_build_waitcnt(struct ac_llvm_context *ctx, unsigned simm16);
-void ac_get_image_intr_name(const char *base_name,
- LLVMTypeRef data_type,
- LLVMTypeRef coords_type,
- LLVMTypeRef rsrc_type,
- char *out_name, unsigned out_len);
+LLVMValueRef ac_build_fract(struct ac_llvm_context *ctx, LLVMValueRef src0,
+ unsigned bitsize);
+
+LLVMValueRef ac_build_fmed3(struct ac_llvm_context *ctx, LLVMValueRef src0,
+ LLVMValueRef src1, LLVMValueRef src2,
+ unsigned bitsize);
+
+LLVMValueRef ac_build_isign(struct ac_llvm_context *ctx, LLVMValueRef src0,
+ unsigned bitsize);
+
+LLVMValueRef ac_build_fsign(struct ac_llvm_context *ctx, LLVMValueRef src0,
+ unsigned bitsize);
+
+LLVMValueRef ac_build_bit_count(struct ac_llvm_context *ctx, LLVMValueRef src0);
+
+LLVMValueRef ac_build_bitfield_reverse(struct ac_llvm_context *ctx,
+ LLVMValueRef src0);
void ac_optimize_vs_outputs(struct ac_llvm_context *ac,
LLVMValueRef main_fn,
LLVMValueRef src0);
LLVMTypeRef ac_array_in_const_addr_space(LLVMTypeRef elem_type);
+LLVMTypeRef ac_array_in_const32_addr_space(LLVMTypeRef elem_type);
+
+void ac_build_bgnloop(struct ac_llvm_context *ctx, int lable_id);
+void ac_build_break(struct ac_llvm_context *ctx);
+void ac_build_continue(struct ac_llvm_context *ctx);
+void ac_build_else(struct ac_llvm_context *ctx, int lable_id);
+void ac_build_endif(struct ac_llvm_context *ctx, int lable_id);
+void ac_build_endloop(struct ac_llvm_context *ctx, int lable_id);
+void ac_build_ifcc(struct ac_llvm_context *ctx, LLVMValueRef cond, int label_id);
+void ac_build_if(struct ac_llvm_context *ctx, LLVMValueRef value,
+ int lable_id);
+void ac_build_uif(struct ac_llvm_context *ctx, LLVMValueRef value,
+ int lable_id);
+
+LLVMValueRef ac_build_alloca(struct ac_llvm_context *ac, LLVMTypeRef type,
+ const char *name);
+LLVMValueRef ac_build_alloca_undef(struct ac_llvm_context *ac, LLVMTypeRef type,
+ const char *name);
+
+LLVMValueRef ac_cast_ptr(struct ac_llvm_context *ctx, LLVMValueRef ptr,
+ LLVMTypeRef type);
+
+LLVMValueRef ac_trim_vector(struct ac_llvm_context *ctx, LLVMValueRef value,
+ unsigned count);
+
+LLVMValueRef ac_unpack_param(struct ac_llvm_context *ctx, LLVMValueRef param,
+ unsigned rshift, unsigned bitwidth);
+
+void ac_apply_fmask_to_sample(struct ac_llvm_context *ac, LLVMValueRef fmask,
+ LLVMValueRef *addr, bool is_array_tex);
+
+LLVMValueRef
+ac_build_ds_swizzle(struct ac_llvm_context *ctx, LLVMValueRef src, unsigned mask);
+
+LLVMValueRef
+ac_build_readlane(struct ac_llvm_context *ctx, LLVMValueRef src, LLVMValueRef lane);
+
+LLVMValueRef
+ac_build_writelane(struct ac_llvm_context *ctx, LLVMValueRef src, LLVMValueRef value, LLVMValueRef lane);
+
+LLVMValueRef
+ac_build_mbcnt(struct ac_llvm_context *ctx, LLVMValueRef mask);
+
+LLVMValueRef
+ac_build_inclusive_scan(struct ac_llvm_context *ctx, LLVMValueRef src, nir_op op);
+
+LLVMValueRef
+ac_build_exclusive_scan(struct ac_llvm_context *ctx, LLVMValueRef src, nir_op op);
+
+LLVMValueRef
+ac_build_reduce(struct ac_llvm_context *ctx, LLVMValueRef src, nir_op op, unsigned cluster_size);
+
+/**
+ * Common arguments for a scan/reduce operation that accumulates per-wave
+ * values across an entire workgroup, while respecting the order of waves.
+ */
+struct ac_wg_scan {
+ bool enable_reduce;
+ bool enable_exclusive;
+ bool enable_inclusive;
+ nir_op op;
+ LLVMValueRef src; /* clobbered! */
+ LLVMValueRef result_reduce;
+ LLVMValueRef result_exclusive;
+ LLVMValueRef result_inclusive;
+ LLVMValueRef extra;
+ LLVMValueRef waveidx;
+ LLVMValueRef numwaves; /* only needed for "reduce" operations */
+
+ /* T addrspace(LDS) pointer to the same type as value, at least maxwaves entries */
+ LLVMValueRef scratch;
+ unsigned maxwaves;
+};
+
+void
+ac_build_wg_wavescan_top(struct ac_llvm_context *ctx, struct ac_wg_scan *ws);
+void
+ac_build_wg_wavescan_bottom(struct ac_llvm_context *ctx, struct ac_wg_scan *ws);
+void
+ac_build_wg_wavescan(struct ac_llvm_context *ctx, struct ac_wg_scan *ws);
+
+void
+ac_build_wg_scan_top(struct ac_llvm_context *ctx, struct ac_wg_scan *ws);
+void
+ac_build_wg_scan_bottom(struct ac_llvm_context *ctx, struct ac_wg_scan *ws);
+void
+ac_build_wg_scan(struct ac_llvm_context *ctx, struct ac_wg_scan *ws);
+
+LLVMValueRef
+ac_build_quad_swizzle(struct ac_llvm_context *ctx, LLVMValueRef src,
+ unsigned lane0, unsigned lane1, unsigned lane2, unsigned lane3);
+
+LLVMValueRef
+ac_build_shuffle(struct ac_llvm_context *ctx, LLVMValueRef src, LLVMValueRef index);
+
+LLVMValueRef
+ac_build_frexp_exp(struct ac_llvm_context *ctx, LLVMValueRef src0,
+ unsigned bitsize);
+
+LLVMValueRef
+ac_build_frexp_mant(struct ac_llvm_context *ctx, LLVMValueRef src0,
+ unsigned bitsize);
+
+LLVMValueRef
+ac_build_ddxy_interp(struct ac_llvm_context *ctx, LLVMValueRef interp_ij);
+
+LLVMValueRef
+ac_build_load_helper_invocation(struct ac_llvm_context *ctx);
#ifdef __cplusplus
}