ac: add ac_build_image_get_sample_count from radeonsi
[mesa.git] / src / amd / common / ac_llvm_build.h
index 98f856106d67ec0615f964e0479160bd356e8e9c..013bf00041ac5876db24c1624b09f57abbfd1010 100644 (file)
@@ -29,6 +29,7 @@
 #include <llvm-c/Core.h>
 #include "compiler/nir/nir.h"
 #include "amd_family.h"
+#include "ac_shader_util.h"
 
 #ifdef __cplusplus
 extern "C" {
@@ -43,13 +44,19 @@ enum {
        AC_ADDR_SPACE_CONST_32BIT = 6, /* same as CONST, but the pointer type has 32 bits */
 };
 
-/* Combine these with & instead of |. */
-#define NOOP_WAITCNT   0xcf7f
-#define LGKM_CNT       0xc07f
-#define EXP_CNT                0xcf0f
-#define VM_CNT         0x0f70 /* On GFX9, vmcnt has 6 bits in [0:3] and [14:15] */
+#define AC_WAIT_LGKM   (1 << 0) /* LDS, GDS, constant, message */
+#define AC_WAIT_VLOAD  (1 << 1) /* VMEM load/sample instructions */
+#define AC_WAIT_VSTORE (1 << 2) /* VMEM store instructions */
 
 struct ac_llvm_flow;
+struct ac_llvm_compiler;
+enum ac_float_mode;
+
+struct ac_llvm_flow_state {
+       struct ac_llvm_flow *stack;
+       unsigned depth_max;
+       unsigned depth;
+};
 
 struct ac_llvm_context {
        LLVMContextRef context;
@@ -71,8 +78,11 @@ struct ac_llvm_context {
        LLVMTypeRef v3i32;
        LLVMTypeRef v4i32;
        LLVMTypeRef v2f32;
+       LLVMTypeRef v3f32;
        LLVMTypeRef v4f32;
        LLVMTypeRef v8i32;
+       LLVMTypeRef iN_wavemask;
+       LLVMTypeRef iN_ballotmask;
 
        LLVMValueRef i8_0;
        LLVMValueRef i8_1;
@@ -91,9 +101,11 @@ struct ac_llvm_context {
        LLVMValueRef i1true;
        LLVMValueRef i1false;
 
-       struct ac_llvm_flow *flow;
-       unsigned flow_depth;
-       unsigned flow_depth_max;
+       /* Since ac_nir_translate makes a local copy of ac_llvm_context, there
+        * are two ac_llvm_contexts. Declare a pointer here, so that the control
+        * flow stack is shared by both ac_llvm_contexts.
+        */
+       struct ac_llvm_flow_state *flow;
 
        unsigned range_md_kind;
        unsigned invariant_load_md_kind;
@@ -105,12 +117,18 @@ struct ac_llvm_context {
        enum chip_class chip_class;
        enum radeon_family family;
 
+       unsigned wave_size;
+       unsigned ballot_mask_bits;
+
        LLVMValueRef lds;
 };
 
 void
 ac_llvm_context_init(struct ac_llvm_context *ctx,
-                    enum chip_class chip_class, enum radeon_family family);
+                    struct ac_llvm_compiler *compiler,
+                    enum chip_class chip_class, enum radeon_family family,
+                    enum ac_float_mode float_mode, unsigned wave_size,
+                    unsigned ballot_mask_bits);
 
 void
 ac_llvm_context_dispose(struct ac_llvm_context *ctx);
@@ -177,6 +195,13 @@ LLVMValueRef
 ac_build_gather_values(struct ac_llvm_context *ctx,
                       LLVMValueRef *values,
                       unsigned value_count);
+
+LLVMValueRef
+ac_extract_components(struct ac_llvm_context *ctx,
+                     LLVMValueRef value,
+                     unsigned start,
+                     unsigned channels);
+
 LLVMValueRef ac_build_expand_to_vec4(struct ac_llvm_context *ctx,
                                     LLVMValueRef value,
                                     unsigned num_channels);
@@ -268,9 +293,7 @@ ac_build_buffer_store_dword(struct ac_llvm_context *ctx,
                            LLVMValueRef voffset,
                            LLVMValueRef soffset,
                            unsigned inst_offset,
-                           bool glc,
-                           bool slc,
-                           bool writeonly_memory,
+                           unsigned cache_policy,
                            bool swizzle_enable_hint);
 
 void
@@ -280,8 +303,7 @@ ac_build_buffer_store_format(struct ac_llvm_context *ctx,
                             LLVMValueRef vindex,
                             LLVMValueRef voffset,
                             unsigned num_channels,
-                            bool glc,
-                            bool writeonly_memory);
+                            unsigned cache_policy);
 
 LLVMValueRef
 ac_build_buffer_load(struct ac_llvm_context *ctx,
@@ -291,8 +313,7 @@ ac_build_buffer_load(struct ac_llvm_context *ctx,
                     LLVMValueRef voffset,
                     LLVMValueRef soffset,
                     unsigned inst_offset,
-                    unsigned glc,
-                    unsigned slc,
+                    unsigned cache_policy,
                     bool can_speculate,
                     bool allow_smem);
 
@@ -301,26 +322,16 @@ LLVMValueRef ac_build_buffer_load_format(struct ac_llvm_context *ctx,
                                         LLVMValueRef vindex,
                                         LLVMValueRef voffset,
                                         unsigned num_channels,
-                                        bool glc,
+                                        unsigned cache_policy,
                                         bool can_speculate);
 
-/* load_format that handles the stride & element count better if idxen is
- * disabled by LLVM. */
-LLVMValueRef ac_build_buffer_load_format_gfx9_safe(struct ac_llvm_context *ctx,
-                                                  LLVMValueRef rsrc,
-                                                  LLVMValueRef vindex,
-                                                  LLVMValueRef voffset,
-                                                  unsigned num_channels,
-                                                  bool glc,
-                                                  bool can_speculate);
-
 LLVMValueRef
 ac_build_tbuffer_load_short(struct ac_llvm_context *ctx,
                            LLVMValueRef rsrc,
                            LLVMValueRef voffset,
                            LLVMValueRef soffset,
                            LLVMValueRef immoffset,
-                           bool glc);
+                           unsigned cache_policy);
 
 LLVMValueRef
 ac_build_tbuffer_load_byte(struct ac_llvm_context *ctx,
@@ -328,7 +339,7 @@ ac_build_tbuffer_load_byte(struct ac_llvm_context *ctx,
                           LLVMValueRef voffset,
                           LLVMValueRef soffset,
                           LLVMValueRef immoffset,
-                          bool glc);
+                          unsigned cache_policy);
 
 LLVMValueRef
 ac_build_struct_tbuffer_load(struct ac_llvm_context *ctx,
@@ -340,8 +351,7 @@ ac_build_struct_tbuffer_load(struct ac_llvm_context *ctx,
                             unsigned num_channels,
                             unsigned dfmt,
                             unsigned nfmt,
-                            bool glc,
-                            bool slc,
+                            unsigned cache_policy,
                             bool can_speculate);
 
 LLVMValueRef
@@ -353,18 +363,45 @@ ac_build_raw_tbuffer_load(struct ac_llvm_context *ctx,
                          unsigned num_channels,
                          unsigned dfmt,
                          unsigned nfmt,
-                         bool glc,
-                         bool slc,
+                         unsigned cache_policy,
                          bool can_speculate);
 
+/* For ac_build_fetch_format.
+ *
+ * Note: FLOAT must be 0 (used for convenience of encoding in radeonsi).
+ */
+enum {
+       AC_FETCH_FORMAT_FLOAT = 0,
+       AC_FETCH_FORMAT_FIXED,
+       AC_FETCH_FORMAT_UNORM,
+       AC_FETCH_FORMAT_SNORM,
+       AC_FETCH_FORMAT_USCALED,
+       AC_FETCH_FORMAT_SSCALED,
+       AC_FETCH_FORMAT_UINT,
+       AC_FETCH_FORMAT_SINT,
+};
+
+LLVMValueRef
+ac_build_opencoded_load_format(struct ac_llvm_context *ctx,
+                              unsigned log_size,
+                              unsigned num_channels,
+                              unsigned format,
+                              bool reverse,
+                              bool known_aligned,
+                              LLVMValueRef rsrc,
+                              LLVMValueRef vindex,
+                              LLVMValueRef voffset,
+                              LLVMValueRef soffset,
+                              unsigned cache_policy,
+                              bool can_speculate);
+
 void
 ac_build_tbuffer_store_short(struct ac_llvm_context *ctx,
                             LLVMValueRef rsrc,
                             LLVMValueRef vdata,
                             LLVMValueRef voffset,
                             LLVMValueRef soffset,
-                            bool glc,
-                            bool writeonly_memory);
+                            unsigned cache_policy);
 
 void
 ac_build_tbuffer_store_byte(struct ac_llvm_context *ctx,
@@ -372,8 +409,7 @@ ac_build_tbuffer_store_byte(struct ac_llvm_context *ctx,
                            LLVMValueRef vdata,
                            LLVMValueRef voffset,
                            LLVMValueRef soffset,
-                           bool glc,
-                           bool writeonly_memory);
+                           unsigned cache_policy);
 
 void
 ac_build_struct_tbuffer_store(struct ac_llvm_context *ctx,
@@ -386,9 +422,7 @@ ac_build_struct_tbuffer_store(struct ac_llvm_context *ctx,
                              unsigned num_channels,
                              unsigned dfmt,
                              unsigned nfmt,
-                             bool glc,
-                             bool slc,
-                             bool writeonly_memory);
+                             unsigned cache_policy);
 
 void
 ac_build_raw_tbuffer_store(struct ac_llvm_context *ctx,
@@ -400,9 +434,7 @@ ac_build_raw_tbuffer_store(struct ac_llvm_context *ctx,
                           unsigned num_channels,
                           unsigned dfmt,
                           unsigned nfmt,
-                          bool glc,
-                          bool slc,
-                          bool writeonly_memory);
+                          unsigned cache_policy);
 
 LLVMValueRef
 ac_get_thread_id(struct ac_llvm_context *ctx);
@@ -419,6 +451,7 @@ ac_build_ddxy(struct ac_llvm_context *ctx,
 
 #define AC_SENDMSG_GS 2
 #define AC_SENDMSG_GS_DONE 3
+#define AC_SENDMSG_GS_ALLOC_REQ 9
 
 #define AC_SENDMSG_GS_OP_NOP      (0 << 4)
 #define AC_SENDMSG_GS_OP_CUT      (1 << 4)
@@ -485,23 +518,15 @@ enum ac_atomic_op {
        ac_atomic_and,
        ac_atomic_or,
        ac_atomic_xor,
-};
-
-enum ac_image_dim {
-       ac_image_1d,
-       ac_image_2d,
-       ac_image_3d,
-       ac_image_cube, // includes cube arrays
-       ac_image_1darray,
-       ac_image_2darray,
-       ac_image_2dmsaa,
-       ac_image_2darraymsaa,
+       ac_atomic_inc_wrap,
+       ac_atomic_dec_wrap,
 };
 
 /* These cache policy bits match the definitions used by the LLVM intrinsics. */
 enum ac_image_cache_policy {
-       ac_glc = 1 << 0,
-       ac_slc = 1 << 1,
+       ac_glc = 1 << 0, /* per-CU cache control */
+       ac_slc = 1 << 1, /* global L2 cache control */
+       ac_dlc = 1 << 2, /* per-shader-array cache control */
 };
 
 struct ac_image_args {
@@ -509,7 +534,7 @@ struct ac_image_args {
        enum ac_atomic_op atomic : 4; /* for the ac_image_atomic opcode */
        enum ac_image_dim dim : 3;
        unsigned dmask : 4;
-       unsigned cache_policy : 2;
+       unsigned cache_policy : 3;
        bool unorm : 1;
        bool level_zero : 1;
        unsigned attributes; /* additional call-site specific AC_FUNC_ATTRs */
@@ -527,6 +552,8 @@ struct ac_image_args {
 
 LLVMValueRef ac_build_image_opcode(struct ac_llvm_context *ctx,
                                   struct ac_image_args *a);
+LLVMValueRef ac_build_image_get_sample_count(struct ac_llvm_context *ctx,
+                                            LLVMValueRef rsrc);
 LLVMValueRef ac_build_cvt_pkrtz_f16(struct ac_llvm_context *ctx,
                                    LLVMValueRef args[2]);
 LLVMValueRef ac_build_cvt_pknorm_i16(struct ac_llvm_context *ctx,
@@ -547,7 +574,7 @@ LLVMValueRef ac_build_imad(struct ac_llvm_context *ctx, LLVMValueRef s0,
 LLVMValueRef ac_build_fmad(struct ac_llvm_context *ctx, LLVMValueRef s0,
                           LLVMValueRef s1, LLVMValueRef s2);
 
-void ac_build_waitcnt(struct ac_llvm_context *ctx, unsigned simm16);
+void ac_build_waitcnt(struct ac_llvm_context *ctx, unsigned wait_flags);
 
 LLVMValueRef ac_build_fract(struct ac_llvm_context *ctx, LLVMValueRef src0,
                           unsigned bitsize);
@@ -694,6 +721,9 @@ ac_build_ddxy_interp(struct ac_llvm_context *ctx, LLVMValueRef interp_ij);
 LLVMValueRef
 ac_build_load_helper_invocation(struct ac_llvm_context *ctx);
 
+LLVMValueRef ac_build_call(struct ac_llvm_context *ctx, LLVMValueRef func,
+                          LLVMValueRef *args, unsigned num_args);
+
 LLVMValueRef ac_build_atomic_rmw(struct ac_llvm_context *ctx, LLVMAtomicRMWBinOp op,
                                 LLVMValueRef ptr, LLVMValueRef val,
                                 const char *sync_scope);
@@ -702,6 +732,11 @@ LLVMValueRef ac_build_atomic_cmp_xchg(struct ac_llvm_context *ctx, LLVMValueRef
                                      LLVMValueRef cmp, LLVMValueRef val,
                                      const char *sync_scope);
 
+void
+ac_export_mrt_z(struct ac_llvm_context *ctx, LLVMValueRef depth,
+               LLVMValueRef stencil, LLVMValueRef samplemask,
+               struct ac_export_args *args);
+
 #ifdef __cplusplus
 }
 #endif