radv: handle loading from shared pointers
[mesa.git] / src / amd / common / ac_llvm_build.h
index 026955a5556b591c8775735f49ff2a16b85b2d78..e90c8c21ad4ebf2dd462cd08616542357c57d9ca 100644 (file)
@@ -37,12 +37,20 @@ extern "C" {
 #define HAVE_32BIT_POINTERS (HAVE_LLVM >= 0x0700)
 
 enum {
-       /* CONST is the only address space that selects SMEM loads */
-       AC_CONST_ADDR_SPACE = HAVE_LLVM >= 0x700 ? 4 : 2,
-       AC_LOCAL_ADDR_SPACE = 3,
-       AC_CONST_32BIT_ADDR_SPACE = 6, /* same as CONST, but the pointer type has 32 bits */
+       AC_ADDR_SPACE_FLAT = HAVE_LLVM >= 0x0700 ? 0 : 4, /* Slower than global. */
+       AC_ADDR_SPACE_GLOBAL = 1,
+       AC_ADDR_SPACE_GDS = HAVE_LLVM >= 0x0700 ? 2 : 5,
+       AC_ADDR_SPACE_LDS = 3,
+       AC_ADDR_SPACE_CONST = HAVE_LLVM >= 0x0700 ? 4 : 2, /* Global allowing SMEM. */
+       AC_ADDR_SPACE_CONST_32BIT = 6, /* same as CONST, but the pointer type has 32 bits */
 };
 
+/* Combine these with & instead of |. */
+#define NOOP_WAITCNT   0xcf7f
+#define LGKM_CNT       0xc07f
+#define EXP_CNT                0xcf0f
+#define VM_CNT         0x0f70 /* On GFX9, vmcnt has 6 bits in [0:3] and [14:15] */
+
 struct ac_llvm_flow;
 
 struct ac_llvm_context {
@@ -68,6 +76,8 @@ struct ac_llvm_context {
        LLVMTypeRef v4f32;
        LLVMTypeRef v8i32;
 
+       LLVMValueRef i16_0;
+       LLVMValueRef i16_1;
        LLVMValueRef i32_0;
        LLVMValueRef i32_1;
        LLVMValueRef i64_0;
@@ -97,7 +107,7 @@ struct ac_llvm_context {
 };
 
 void
-ac_llvm_context_init(struct ac_llvm_context *ctx, LLVMContextRef context,
+ac_llvm_context_init(struct ac_llvm_context *ctx,
                     enum chip_class chip_class, enum radeon_family family);
 
 void
@@ -118,6 +128,7 @@ unsigned ac_get_type_size(LLVMTypeRef type);
 
 LLVMTypeRef ac_to_integer_type(struct ac_llvm_context *ctx, LLVMTypeRef t);
 LLVMValueRef ac_to_integer(struct ac_llvm_context *ctx, LLVMValueRef v);
+LLVMValueRef ac_to_integer_or_pointer(struct ac_llvm_context *ctx, LLVMValueRef v);
 LLVMTypeRef ac_to_float_type(struct ac_llvm_context *ctx, LLVMTypeRef t);
 LLVMValueRef ac_to_float(struct ac_llvm_context *ctx, LLVMValueRef v);
 
@@ -133,6 +144,7 @@ ac_build_phi(struct ac_llvm_context *ctx, LLVMTypeRef type,
             unsigned count_incoming, LLVMValueRef *values,
             LLVMBasicBlockRef *blocks);
 
+void ac_build_s_barrier(struct ac_llvm_context *ctx);
 void ac_build_optimization_barrier(struct ac_llvm_context *ctx,
                                   LLVMValueRef *pvgpr);
 
@@ -161,15 +173,36 @@ LLVMValueRef
 ac_build_gather_values(struct ac_llvm_context *ctx,
                       LLVMValueRef *values,
                       unsigned value_count);
+LLVMValueRef ac_build_expand(struct ac_llvm_context *ctx,
+                            LLVMValueRef value,
+                            unsigned src_channels, unsigned dst_channels);
 LLVMValueRef ac_build_expand_to_vec4(struct ac_llvm_context *ctx,
                                     LLVMValueRef value,
                                     unsigned num_channels);
+LLVMValueRef ac_build_round(struct ac_llvm_context *ctx, LLVMValueRef value);
 
 LLVMValueRef
 ac_build_fdiv(struct ac_llvm_context *ctx,
              LLVMValueRef num,
              LLVMValueRef den);
 
+LLVMValueRef ac_build_fast_udiv(struct ac_llvm_context *ctx,
+                               LLVMValueRef num,
+                               LLVMValueRef multiplier,
+                               LLVMValueRef pre_shift,
+                               LLVMValueRef post_shift,
+                               LLVMValueRef increment);
+LLVMValueRef ac_build_fast_udiv_nuw(struct ac_llvm_context *ctx,
+                                   LLVMValueRef num,
+                                   LLVMValueRef multiplier,
+                                   LLVMValueRef pre_shift,
+                                   LLVMValueRef post_shift,
+                                   LLVMValueRef increment);
+LLVMValueRef ac_build_fast_udiv_u31_d_not_one(struct ac_llvm_context *ctx,
+                                             LLVMValueRef num,
+                                             LLVMValueRef multiplier,
+                                             LLVMValueRef post_shift);
+
 void
 ac_prepare_cube_coords(struct ac_llvm_context *ctx,
                       bool is_deriv, bool is_array, bool is_lod,
@@ -196,6 +229,8 @@ LLVMValueRef
 ac_build_gep0(struct ac_llvm_context *ctx,
              LLVMValueRef base_ptr,
              LLVMValueRef index);
+LLVMValueRef ac_build_pointer_add(struct ac_llvm_context *ctx, LLVMValueRef ptr,
+                                 LLVMValueRef index);
 
 void
 ac_build_indexed_store(struct ac_llvm_context *ctx,
@@ -208,6 +243,8 @@ LLVMValueRef ac_build_load_invariant(struct ac_llvm_context *ctx,
                                     LLVMValueRef base_ptr, LLVMValueRef index);
 LLVMValueRef ac_build_load_to_sgpr(struct ac_llvm_context *ctx,
                                   LLVMValueRef base_ptr, LLVMValueRef index);
+LLVMValueRef ac_build_load_to_sgpr_uint_wraparound(struct ac_llvm_context *ctx,
+                                  LLVMValueRef base_ptr, LLVMValueRef index);
 
 void
 ac_build_buffer_store_dword(struct ac_llvm_context *ctx,
@@ -252,6 +289,15 @@ LLVMValueRef ac_build_buffer_load_format_gfx9_safe(struct ac_llvm_context *ctx,
                                                   bool glc,
                                                   bool can_speculate);
 
+LLVMValueRef
+ac_build_tbuffer_load_short(struct ac_llvm_context *ctx,
+                           LLVMValueRef rsrc,
+                           LLVMValueRef vindex,
+                           LLVMValueRef voffset,
+                               LLVMValueRef soffset,
+                               LLVMValueRef immoffset,
+                               LLVMValueRef glc);
+
 LLVMValueRef
 ac_get_thread_id(struct ac_llvm_context *ctx);
 
@@ -389,6 +435,10 @@ void ac_build_kill_if_false(struct ac_llvm_context *ctx, LLVMValueRef i1);
 LLVMValueRef ac_build_bfe(struct ac_llvm_context *ctx, LLVMValueRef input,
                          LLVMValueRef offset, LLVMValueRef width,
                          bool is_signed);
+LLVMValueRef ac_build_imad(struct ac_llvm_context *ctx, LLVMValueRef s0,
+                          LLVMValueRef s1, LLVMValueRef s2);
+LLVMValueRef ac_build_fmad(struct ac_llvm_context *ctx, LLVMValueRef s0,
+                          LLVMValueRef s1, LLVMValueRef s2);
 
 void ac_build_waitcnt(struct ac_llvm_context *ctx, unsigned simm16);
 
@@ -401,6 +451,11 @@ LLVMValueRef ac_build_isign(struct ac_llvm_context *ctx, LLVMValueRef src0,
 LLVMValueRef ac_build_fsign(struct ac_llvm_context *ctx, LLVMValueRef src0,
                            unsigned bitsize);
 
+LLVMValueRef ac_build_bit_count(struct ac_llvm_context *ctx, LLVMValueRef src0);
+
+LLVMValueRef ac_build_bitfield_reverse(struct ac_llvm_context *ctx,
+                                      LLVMValueRef src0);
+
 void ac_optimize_vs_outputs(struct ac_llvm_context *ac,
                            LLVMValueRef main_fn,
                            uint8_t *vs_output_param_offset,