#define HAVE_32BIT_POINTERS (HAVE_LLVM >= 0x0700)
enum {
- /* CONST is the only address space that selects SMEM loads */
- AC_CONST_ADDR_SPACE = HAVE_LLVM >= 0x700 ? 4 : 2,
- AC_LOCAL_ADDR_SPACE = 3,
- AC_CONST_32BIT_ADDR_SPACE = 6, /* same as CONST, but the pointer type has 32 bits */
+ AC_ADDR_SPACE_FLAT = HAVE_LLVM >= 0x0700 ? 0 : 4, /* Slower than global. */
+ AC_ADDR_SPACE_GLOBAL = 1,
+ AC_ADDR_SPACE_GDS = HAVE_LLVM >= 0x0700 ? 2 : 5,
+ AC_ADDR_SPACE_LDS = 3,
+ AC_ADDR_SPACE_CONST = HAVE_LLVM >= 0x0700 ? 4 : 2, /* Global allowing SMEM. */
+ AC_ADDR_SPACE_CONST_32BIT = 6, /* same as CONST, but the pointer type has 32 bits */
};
+/* Combine these with & instead of |. */
+#define NOOP_WAITCNT 0xcf7f
+#define LGKM_CNT 0xc07f
+#define EXP_CNT 0xcf0f
+#define VM_CNT 0x0f70 /* On GFX9, vmcnt has 6 bits in [0:3] and [14:15] */
+
struct ac_llvm_flow;
struct ac_llvm_context {
LLVMTypeRef v4f32;
LLVMTypeRef v8i32;
+ LLVMValueRef i16_0;
+ LLVMValueRef i16_1;
LLVMValueRef i32_0;
LLVMValueRef i32_1;
LLVMValueRef i64_0;
};
void
-ac_llvm_context_init(struct ac_llvm_context *ctx, LLVMContextRef context,
+ac_llvm_context_init(struct ac_llvm_context *ctx,
enum chip_class chip_class, enum radeon_family family);
void
LLVMTypeRef ac_to_integer_type(struct ac_llvm_context *ctx, LLVMTypeRef t);
LLVMValueRef ac_to_integer(struct ac_llvm_context *ctx, LLVMValueRef v);
+LLVMValueRef ac_to_integer_or_pointer(struct ac_llvm_context *ctx, LLVMValueRef v);
LLVMTypeRef ac_to_float_type(struct ac_llvm_context *ctx, LLVMTypeRef t);
LLVMValueRef ac_to_float(struct ac_llvm_context *ctx, LLVMValueRef v);
unsigned count_incoming, LLVMValueRef *values,
LLVMBasicBlockRef *blocks);
+void ac_build_s_barrier(struct ac_llvm_context *ctx);
void ac_build_optimization_barrier(struct ac_llvm_context *ctx,
LLVMValueRef *pvgpr);
ac_build_gather_values(struct ac_llvm_context *ctx,
LLVMValueRef *values,
unsigned value_count);
+LLVMValueRef ac_build_expand(struct ac_llvm_context *ctx,
+ LLVMValueRef value,
+ unsigned src_channels, unsigned dst_channels);
LLVMValueRef ac_build_expand_to_vec4(struct ac_llvm_context *ctx,
LLVMValueRef value,
unsigned num_channels);
+LLVMValueRef ac_build_round(struct ac_llvm_context *ctx, LLVMValueRef value);
LLVMValueRef
ac_build_fdiv(struct ac_llvm_context *ctx,
LLVMValueRef num,
LLVMValueRef den);
+LLVMValueRef ac_build_fast_udiv(struct ac_llvm_context *ctx,
+ LLVMValueRef num,
+ LLVMValueRef multiplier,
+ LLVMValueRef pre_shift,
+ LLVMValueRef post_shift,
+ LLVMValueRef increment);
+LLVMValueRef ac_build_fast_udiv_nuw(struct ac_llvm_context *ctx,
+ LLVMValueRef num,
+ LLVMValueRef multiplier,
+ LLVMValueRef pre_shift,
+ LLVMValueRef post_shift,
+ LLVMValueRef increment);
+LLVMValueRef ac_build_fast_udiv_u31_d_not_one(struct ac_llvm_context *ctx,
+ LLVMValueRef num,
+ LLVMValueRef multiplier,
+ LLVMValueRef post_shift);
+
void
ac_prepare_cube_coords(struct ac_llvm_context *ctx,
bool is_deriv, bool is_array, bool is_lod,
ac_build_gep0(struct ac_llvm_context *ctx,
LLVMValueRef base_ptr,
LLVMValueRef index);
+LLVMValueRef ac_build_pointer_add(struct ac_llvm_context *ctx, LLVMValueRef ptr,
+ LLVMValueRef index);
void
ac_build_indexed_store(struct ac_llvm_context *ctx,
LLVMValueRef base_ptr, LLVMValueRef index);
LLVMValueRef ac_build_load_to_sgpr(struct ac_llvm_context *ctx,
LLVMValueRef base_ptr, LLVMValueRef index);
+LLVMValueRef ac_build_load_to_sgpr_uint_wraparound(struct ac_llvm_context *ctx,
+ LLVMValueRef base_ptr, LLVMValueRef index);
void
ac_build_buffer_store_dword(struct ac_llvm_context *ctx,
bool glc,
bool can_speculate);
+LLVMValueRef
+ac_build_tbuffer_load_short(struct ac_llvm_context *ctx,
+ LLVMValueRef rsrc,
+ LLVMValueRef vindex,
+ LLVMValueRef voffset,
+ LLVMValueRef soffset,
+ LLVMValueRef immoffset,
+ LLVMValueRef glc);
+
LLVMValueRef
ac_get_thread_id(struct ac_llvm_context *ctx);
LLVMValueRef ac_build_bfe(struct ac_llvm_context *ctx, LLVMValueRef input,
LLVMValueRef offset, LLVMValueRef width,
bool is_signed);
+LLVMValueRef ac_build_imad(struct ac_llvm_context *ctx, LLVMValueRef s0,
+ LLVMValueRef s1, LLVMValueRef s2);
+LLVMValueRef ac_build_fmad(struct ac_llvm_context *ctx, LLVMValueRef s0,
+ LLVMValueRef s1, LLVMValueRef s2);
void ac_build_waitcnt(struct ac_llvm_context *ctx, unsigned simm16);
LLVMValueRef ac_build_fsign(struct ac_llvm_context *ctx, LLVMValueRef src0,
unsigned bitsize);
+LLVMValueRef ac_build_bit_count(struct ac_llvm_context *ctx, LLVMValueRef src0);
+
+LLVMValueRef ac_build_bitfield_reverse(struct ac_llvm_context *ctx,
+ LLVMValueRef src0);
+
void ac_optimize_vs_outputs(struct ac_llvm_context *ac,
LLVMValueRef main_fn,
uint8_t *vs_output_param_offset,