#define HAVE_32BIT_POINTERS (HAVE_LLVM >= 0x0700)
enum {
- /* CONST is the only address space that selects SMEM loads */
- AC_CONST_ADDR_SPACE = HAVE_LLVM >= 0x700 ? 4 : 2,
- AC_LOCAL_ADDR_SPACE = 3,
- AC_CONST_32BIT_ADDR_SPACE = 6, /* same as CONST, but the pointer type has 32 bits */
+ AC_ADDR_SPACE_FLAT = HAVE_LLVM >= 0x0700 ? 0 : 4, /* Slower than global. */
+ AC_ADDR_SPACE_GLOBAL = 1,
+ AC_ADDR_SPACE_GDS = HAVE_LLVM >= 0x0700 ? 2 : 5,
+ AC_ADDR_SPACE_LDS = 3,
+ AC_ADDR_SPACE_CONST = HAVE_LLVM >= 0x0700 ? 4 : 2, /* Global allowing SMEM. */
+ AC_ADDR_SPACE_CONST_32BIT = 6, /* same as CONST, but the pointer type has 32 bits */
};
+/* Combine these with & instead of |. */
+#define NOOP_WAITCNT 0xcf7f
+#define LGKM_CNT 0xc07f
+#define EXP_CNT 0xcf0f
+#define VM_CNT 0x0f70 /* On GFX9, vmcnt has 6 bits in [0:3] and [14:15] */
+
struct ac_llvm_flow;
struct ac_llvm_context {
LLVMTypeRef v4f32;
LLVMTypeRef v8i32;
+ LLVMValueRef i16_0;
+ LLVMValueRef i16_1;
LLVMValueRef i32_0;
LLVMValueRef i32_1;
LLVMValueRef i64_0;
};
void
-ac_llvm_context_init(struct ac_llvm_context *ctx, LLVMContextRef context,
+ac_llvm_context_init(struct ac_llvm_context *ctx,
enum chip_class chip_class, enum radeon_family family);
void
LLVMTypeRef ac_to_integer_type(struct ac_llvm_context *ctx, LLVMTypeRef t);
LLVMValueRef ac_to_integer(struct ac_llvm_context *ctx, LLVMValueRef v);
+LLVMValueRef ac_to_integer_or_pointer(struct ac_llvm_context *ctx, LLVMValueRef v);
LLVMTypeRef ac_to_float_type(struct ac_llvm_context *ctx, LLVMTypeRef t);
LLVMValueRef ac_to_float(struct ac_llvm_context *ctx, LLVMValueRef v);
unsigned count_incoming, LLVMValueRef *values,
LLVMBasicBlockRef *blocks);
+void ac_build_s_barrier(struct ac_llvm_context *ctx);
void ac_build_optimization_barrier(struct ac_llvm_context *ctx,
LLVMValueRef *pvgpr);
ac_build_gather_values(struct ac_llvm_context *ctx,
LLVMValueRef *values,
unsigned value_count);
+LLVMValueRef ac_build_expand(struct ac_llvm_context *ctx,
+ LLVMValueRef value,
+ unsigned src_channels, unsigned dst_channels);
LLVMValueRef ac_build_expand_to_vec4(struct ac_llvm_context *ctx,
LLVMValueRef value,
unsigned num_channels);
+LLVMValueRef ac_build_round(struct ac_llvm_context *ctx, LLVMValueRef value);
LLVMValueRef
ac_build_fdiv(struct ac_llvm_context *ctx,
LLVMValueRef num,
LLVMValueRef den);
+LLVMValueRef ac_build_fast_udiv(struct ac_llvm_context *ctx,
+ LLVMValueRef num,
+ LLVMValueRef multiplier,
+ LLVMValueRef pre_shift,
+ LLVMValueRef post_shift,
+ LLVMValueRef increment);
+LLVMValueRef ac_build_fast_udiv_nuw(struct ac_llvm_context *ctx,
+ LLVMValueRef num,
+ LLVMValueRef multiplier,
+ LLVMValueRef pre_shift,
+ LLVMValueRef post_shift,
+ LLVMValueRef increment);
+LLVMValueRef ac_build_fast_udiv_u31_d_not_one(struct ac_llvm_context *ctx,
+ LLVMValueRef num,
+ LLVMValueRef multiplier,
+ LLVMValueRef post_shift);
+
void
ac_prepare_cube_coords(struct ac_llvm_context *ctx,
bool is_deriv, bool is_array, bool is_lod,
ac_build_gep0(struct ac_llvm_context *ctx,
LLVMValueRef base_ptr,
LLVMValueRef index);
+LLVMValueRef ac_build_pointer_add(struct ac_llvm_context *ctx, LLVMValueRef ptr,
+ LLVMValueRef index);
void
ac_build_indexed_store(struct ac_llvm_context *ctx,
LLVMValueRef base_ptr, LLVMValueRef index);
LLVMValueRef ac_build_load_to_sgpr(struct ac_llvm_context *ctx,
LLVMValueRef base_ptr, LLVMValueRef index);
+LLVMValueRef ac_build_load_to_sgpr_uint_wraparound(struct ac_llvm_context *ctx,
+ LLVMValueRef base_ptr, LLVMValueRef index);
void
ac_build_buffer_store_dword(struct ac_llvm_context *ctx,
bool glc,
bool can_speculate);
+LLVMValueRef
+ac_build_tbuffer_load_short(struct ac_llvm_context *ctx,
+ LLVMValueRef rsrc,
+ LLVMValueRef vindex,
+ LLVMValueRef voffset,
+ LLVMValueRef soffset,
+ LLVMValueRef immoffset,
+ LLVMValueRef glc);
+
LLVMValueRef
ac_get_thread_id(struct ac_llvm_context *ctx);
ac_image_gather4,
ac_image_load,
ac_image_load_mip,
+ ac_image_store,
+ ac_image_store_mip,
ac_image_get_lod,
ac_image_get_resinfo,
+ ac_image_atomic,
+ ac_image_atomic_cmpswap,
+};
+
+enum ac_atomic_op {
+ ac_atomic_swap,
+ ac_atomic_add,
+ ac_atomic_sub,
+ ac_atomic_smin,
+ ac_atomic_umin,
+ ac_atomic_smax,
+ ac_atomic_umax,
+ ac_atomic_and,
+ ac_atomic_or,
+ ac_atomic_xor,
+};
+
+enum ac_image_dim {
+ ac_image_1d,
+ ac_image_2d,
+ ac_image_3d,
+ ac_image_cube, // includes cube arrays
+ ac_image_1darray,
+ ac_image_2darray,
+ ac_image_2dmsaa,
+ ac_image_2darraymsaa,
+};
+
+/* These cache policy bits match the definitions used by the LLVM intrinsics. */
+enum ac_image_cache_policy {
+ ac_glc = 1 << 0,
+ ac_slc = 1 << 1,
};
struct ac_image_args {
- enum ac_image_opcode opcode;
- bool level_zero;
- bool bias;
- bool lod;
- bool deriv;
- bool compare;
- bool offset;
+ enum ac_image_opcode opcode : 4;
+ enum ac_atomic_op atomic : 4; /* for the ac_image_atomic opcode */
+ enum ac_image_dim dim : 3;
+ unsigned dmask : 4;
+ unsigned cache_policy : 2;
+ bool unorm : 1;
+ bool level_zero : 1;
+ unsigned attributes; /* additional call-site specific AC_FUNC_ATTRs */
LLVMValueRef resource;
LLVMValueRef sampler;
- LLVMValueRef addr;
- unsigned dmask;
- bool unorm;
- bool da;
+ LLVMValueRef data[2]; /* data[0] is source data (vector); data[1] is cmp for cmpswap */
+ LLVMValueRef offset;
+ LLVMValueRef bias;
+ LLVMValueRef compare;
+ LLVMValueRef derivs[6];
+ LLVMValueRef coords[4];
+ LLVMValueRef lod; // also used by ac_image_get_resinfo
};
LLVMValueRef ac_build_image_opcode(struct ac_llvm_context *ctx,
LLVMValueRef ac_build_bfe(struct ac_llvm_context *ctx, LLVMValueRef input,
LLVMValueRef offset, LLVMValueRef width,
bool is_signed);
+LLVMValueRef ac_build_imad(struct ac_llvm_context *ctx, LLVMValueRef s0,
+ LLVMValueRef s1, LLVMValueRef s2);
+LLVMValueRef ac_build_fmad(struct ac_llvm_context *ctx, LLVMValueRef s0,
+ LLVMValueRef s1, LLVMValueRef s2);
void ac_build_waitcnt(struct ac_llvm_context *ctx, unsigned simm16);
LLVMValueRef ac_build_fsign(struct ac_llvm_context *ctx, LLVMValueRef src0,
unsigned bitsize);
-void ac_get_image_intr_name(const char *base_name,
- LLVMTypeRef data_type,
- LLVMTypeRef coords_type,
- LLVMTypeRef rsrc_type,
- char *out_name, unsigned out_len);
+LLVMValueRef ac_build_bit_count(struct ac_llvm_context *ctx, LLVMValueRef src0);
+
+LLVMValueRef ac_build_bitfield_reverse(struct ac_llvm_context *ctx,
+ LLVMValueRef src0);
void ac_optimize_vs_outputs(struct ac_llvm_context *ac,
LLVMValueRef main_fn,