radv: handle loading from shared pointers
[mesa.git] / src / amd / common / ac_llvm_build.h
index c583240e14bd86f2f76a4fc6552b74ce99720390..e90c8c21ad4ebf2dd462cd08616542357c57d9ca 100644 (file)
@@ -37,12 +37,20 @@ extern "C" {
 #define HAVE_32BIT_POINTERS (HAVE_LLVM >= 0x0700)
 
 enum {
-       /* CONST is the only address space that selects SMEM loads */
-       AC_CONST_ADDR_SPACE = HAVE_LLVM >= 0x700 ? 4 : 2,
-       AC_LOCAL_ADDR_SPACE = 3,
-       AC_CONST_32BIT_ADDR_SPACE = 6, /* same as CONST, but the pointer type has 32 bits */
+       AC_ADDR_SPACE_FLAT = HAVE_LLVM >= 0x0700 ? 0 : 4, /* Slower than global. */
+       AC_ADDR_SPACE_GLOBAL = 1,
+       AC_ADDR_SPACE_GDS = HAVE_LLVM >= 0x0700 ? 2 : 5,
+       AC_ADDR_SPACE_LDS = 3,
+       AC_ADDR_SPACE_CONST = HAVE_LLVM >= 0x0700 ? 4 : 2, /* Global allowing SMEM. */
+       AC_ADDR_SPACE_CONST_32BIT = 6, /* same as CONST, but the pointer type has 32 bits */
 };
 
+/* Combine these with & instead of |. */
+#define NOOP_WAITCNT   0xcf7f
+#define LGKM_CNT       0xc07f
+#define EXP_CNT                0xcf0f
+#define VM_CNT         0x0f70 /* On GFX9, vmcnt has 6 bits in [0:3] and [14:15] */
+
 struct ac_llvm_flow;
 
 struct ac_llvm_context {
@@ -68,6 +76,8 @@ struct ac_llvm_context {
        LLVMTypeRef v4f32;
        LLVMTypeRef v8i32;
 
+       LLVMValueRef i16_0;
+       LLVMValueRef i16_1;
        LLVMValueRef i32_0;
        LLVMValueRef i32_1;
        LLVMValueRef i64_0;
@@ -97,7 +107,7 @@ struct ac_llvm_context {
 };
 
 void
-ac_llvm_context_init(struct ac_llvm_context *ctx, LLVMContextRef context,
+ac_llvm_context_init(struct ac_llvm_context *ctx,
                     enum chip_class chip_class, enum radeon_family family);
 
 void
@@ -118,6 +128,7 @@ unsigned ac_get_type_size(LLVMTypeRef type);
 
 LLVMTypeRef ac_to_integer_type(struct ac_llvm_context *ctx, LLVMTypeRef t);
 LLVMValueRef ac_to_integer(struct ac_llvm_context *ctx, LLVMValueRef v);
+LLVMValueRef ac_to_integer_or_pointer(struct ac_llvm_context *ctx, LLVMValueRef v);
 LLVMTypeRef ac_to_float_type(struct ac_llvm_context *ctx, LLVMTypeRef t);
 LLVMValueRef ac_to_float(struct ac_llvm_context *ctx, LLVMValueRef v);
 
@@ -133,6 +144,7 @@ ac_build_phi(struct ac_llvm_context *ctx, LLVMTypeRef type,
             unsigned count_incoming, LLVMValueRef *values,
             LLVMBasicBlockRef *blocks);
 
+void ac_build_s_barrier(struct ac_llvm_context *ctx);
 void ac_build_optimization_barrier(struct ac_llvm_context *ctx,
                                   LLVMValueRef *pvgpr);
 
@@ -161,15 +173,36 @@ LLVMValueRef
 ac_build_gather_values(struct ac_llvm_context *ctx,
                       LLVMValueRef *values,
                       unsigned value_count);
+LLVMValueRef ac_build_expand(struct ac_llvm_context *ctx,
+                            LLVMValueRef value,
+                            unsigned src_channels, unsigned dst_channels);
 LLVMValueRef ac_build_expand_to_vec4(struct ac_llvm_context *ctx,
                                     LLVMValueRef value,
                                     unsigned num_channels);
+LLVMValueRef ac_build_round(struct ac_llvm_context *ctx, LLVMValueRef value);
 
 LLVMValueRef
 ac_build_fdiv(struct ac_llvm_context *ctx,
              LLVMValueRef num,
              LLVMValueRef den);
 
+LLVMValueRef ac_build_fast_udiv(struct ac_llvm_context *ctx,
+                               LLVMValueRef num,
+                               LLVMValueRef multiplier,
+                               LLVMValueRef pre_shift,
+                               LLVMValueRef post_shift,
+                               LLVMValueRef increment);
+LLVMValueRef ac_build_fast_udiv_nuw(struct ac_llvm_context *ctx,
+                                   LLVMValueRef num,
+                                   LLVMValueRef multiplier,
+                                   LLVMValueRef pre_shift,
+                                   LLVMValueRef post_shift,
+                                   LLVMValueRef increment);
+LLVMValueRef ac_build_fast_udiv_u31_d_not_one(struct ac_llvm_context *ctx,
+                                             LLVMValueRef num,
+                                             LLVMValueRef multiplier,
+                                             LLVMValueRef post_shift);
+
 void
 ac_prepare_cube_coords(struct ac_llvm_context *ctx,
                       bool is_deriv, bool is_array, bool is_lod,
@@ -196,6 +229,8 @@ LLVMValueRef
 ac_build_gep0(struct ac_llvm_context *ctx,
              LLVMValueRef base_ptr,
              LLVMValueRef index);
+LLVMValueRef ac_build_pointer_add(struct ac_llvm_context *ctx, LLVMValueRef ptr,
+                                 LLVMValueRef index);
 
 void
 ac_build_indexed_store(struct ac_llvm_context *ctx,
@@ -208,6 +243,8 @@ LLVMValueRef ac_build_load_invariant(struct ac_llvm_context *ctx,
                                     LLVMValueRef base_ptr, LLVMValueRef index);
 LLVMValueRef ac_build_load_to_sgpr(struct ac_llvm_context *ctx,
                                   LLVMValueRef base_ptr, LLVMValueRef index);
+LLVMValueRef ac_build_load_to_sgpr_uint_wraparound(struct ac_llvm_context *ctx,
+                                  LLVMValueRef base_ptr, LLVMValueRef index);
 
 void
 ac_build_buffer_store_dword(struct ac_llvm_context *ctx,
@@ -252,6 +289,15 @@ LLVMValueRef ac_build_buffer_load_format_gfx9_safe(struct ac_llvm_context *ctx,
                                                   bool glc,
                                                   bool can_speculate);
 
+LLVMValueRef
+ac_build_tbuffer_load_short(struct ac_llvm_context *ctx,
+                           LLVMValueRef rsrc,
+                           LLVMValueRef vindex,
+                           LLVMValueRef voffset,
+                               LLVMValueRef soffset,
+                               LLVMValueRef immoffset,
+                               LLVMValueRef glc);
+
 LLVMValueRef
 ac_get_thread_id(struct ac_llvm_context *ctx);
 
@@ -313,25 +359,63 @@ enum ac_image_opcode {
        ac_image_gather4,
        ac_image_load,
        ac_image_load_mip,
+       ac_image_store,
+       ac_image_store_mip,
        ac_image_get_lod,
        ac_image_get_resinfo,
+       ac_image_atomic,
+       ac_image_atomic_cmpswap,
+};
+
+enum ac_atomic_op {
+       ac_atomic_swap,
+       ac_atomic_add,
+       ac_atomic_sub,
+       ac_atomic_smin,
+       ac_atomic_umin,
+       ac_atomic_smax,
+       ac_atomic_umax,
+       ac_atomic_and,
+       ac_atomic_or,
+       ac_atomic_xor,
+};
+
+enum ac_image_dim {
+       ac_image_1d,
+       ac_image_2d,
+       ac_image_3d,
+       ac_image_cube, // includes cube arrays
+       ac_image_1darray,
+       ac_image_2darray,
+       ac_image_2dmsaa,
+       ac_image_2darraymsaa,
+};
+
+/* These cache policy bits match the definitions used by the LLVM intrinsics. */
+enum ac_image_cache_policy {
+       ac_glc = 1 << 0,
+       ac_slc = 1 << 1,
 };
 
 struct ac_image_args {
-       enum ac_image_opcode opcode;
-       bool level_zero;
-       bool bias;
-       bool lod;
-       bool deriv;
-       bool compare;
-       bool offset;
+       enum ac_image_opcode opcode : 4;
+       enum ac_atomic_op atomic : 4; /* for the ac_image_atomic opcode */
+       enum ac_image_dim dim : 3;
+       unsigned dmask : 4;
+       unsigned cache_policy : 2;
+       bool unorm : 1;
+       bool level_zero : 1;
+       unsigned attributes; /* additional call-site specific AC_FUNC_ATTRs */
 
        LLVMValueRef resource;
        LLVMValueRef sampler;
-       LLVMValueRef addr;
-       unsigned dmask;
-       bool unorm;
-       bool da;
+       LLVMValueRef data[2]; /* data[0] is source data (vector); data[1] is cmp for cmpswap */
+       LLVMValueRef offset;
+       LLVMValueRef bias;
+       LLVMValueRef compare;
+       LLVMValueRef derivs[6];
+       LLVMValueRef coords[4];
+       LLVMValueRef lod; // also used by ac_image_get_resinfo
 };
 
 LLVMValueRef ac_build_image_opcode(struct ac_llvm_context *ctx,
@@ -351,6 +435,10 @@ void ac_build_kill_if_false(struct ac_llvm_context *ctx, LLVMValueRef i1);
 LLVMValueRef ac_build_bfe(struct ac_llvm_context *ctx, LLVMValueRef input,
                          LLVMValueRef offset, LLVMValueRef width,
                          bool is_signed);
+LLVMValueRef ac_build_imad(struct ac_llvm_context *ctx, LLVMValueRef s0,
+                          LLVMValueRef s1, LLVMValueRef s2);
+LLVMValueRef ac_build_fmad(struct ac_llvm_context *ctx, LLVMValueRef s0,
+                          LLVMValueRef s1, LLVMValueRef s2);
 
 void ac_build_waitcnt(struct ac_llvm_context *ctx, unsigned simm16);
 
@@ -363,11 +451,10 @@ LLVMValueRef ac_build_isign(struct ac_llvm_context *ctx, LLVMValueRef src0,
 LLVMValueRef ac_build_fsign(struct ac_llvm_context *ctx, LLVMValueRef src0,
                            unsigned bitsize);
 
-void ac_get_image_intr_name(const char *base_name,
-                           LLVMTypeRef data_type,
-                           LLVMTypeRef coords_type,
-                           LLVMTypeRef rsrc_type,
-                           char *out_name, unsigned out_len);
+LLVMValueRef ac_build_bit_count(struct ac_llvm_context *ctx, LLVMValueRef src0);
+
+LLVMValueRef ac_build_bitfield_reverse(struct ac_llvm_context *ctx,
+                                      LLVMValueRef src0);
 
 void ac_optimize_vs_outputs(struct ac_llvm_context *ac,
                            LLVMValueRef main_fn,