ac: initial Wave32 support in LLVM build helpers
[mesa.git] / src / amd / common / ac_llvm_build.h
index 5ed9a112457ae5f34e6fc3562fa5f6bf135ab5e5..588ef242c20b82db4fe8d49e6e919f0bdecc6960 100644 (file)
@@ -43,11 +43,9 @@ enum {
        AC_ADDR_SPACE_CONST_32BIT = 6, /* same as CONST, but the pointer type has 32 bits */
 };
 
-/* Combine these with & instead of |. */
-#define NOOP_WAITCNT   0xcf7f
-#define LGKM_CNT       0xc07f
-#define EXP_CNT                0xcf0f
-#define VM_CNT         0x0f70 /* On GFX9, vmcnt has 6 bits in [0:3] and [14:15] */
+#define AC_WAIT_LGKM   (1 << 0) /* LDS, GDS, constant, message */
+#define AC_WAIT_VLOAD  (1 << 1) /* VMEM load/sample instructions */
+#define AC_WAIT_VSTORE (1 << 2) /* VMEM store instructions */
 
 struct ac_llvm_flow;
 
@@ -105,13 +103,15 @@ struct ac_llvm_context {
 
        enum chip_class chip_class;
        enum radeon_family family;
+       unsigned wave_size;
 
        LLVMValueRef lds;
 };
 
 void
 ac_llvm_context_init(struct ac_llvm_context *ctx,
-                    enum chip_class chip_class, enum radeon_family family);
+                    enum chip_class chip_class, enum radeon_family family,
+                    unsigned wave_size);
 
 void
 ac_llvm_context_dispose(struct ac_llvm_context *ctx);
@@ -269,9 +269,7 @@ ac_build_buffer_store_dword(struct ac_llvm_context *ctx,
                            LLVMValueRef voffset,
                            LLVMValueRef soffset,
                            unsigned inst_offset,
-                           bool glc,
-                           bool slc,
-                           bool writeonly_memory,
+                           unsigned cache_policy,
                            bool swizzle_enable_hint);
 
 void
@@ -281,8 +279,7 @@ ac_build_buffer_store_format(struct ac_llvm_context *ctx,
                             LLVMValueRef vindex,
                             LLVMValueRef voffset,
                             unsigned num_channels,
-                            bool glc,
-                            bool writeonly_memory);
+                            unsigned cache_policy);
 
 LLVMValueRef
 ac_build_buffer_load(struct ac_llvm_context *ctx,
@@ -292,8 +289,7 @@ ac_build_buffer_load(struct ac_llvm_context *ctx,
                     LLVMValueRef voffset,
                     LLVMValueRef soffset,
                     unsigned inst_offset,
-                    unsigned glc,
-                    unsigned slc,
+                    unsigned cache_policy,
                     bool can_speculate,
                     bool allow_smem);
 
@@ -302,7 +298,7 @@ LLVMValueRef ac_build_buffer_load_format(struct ac_llvm_context *ctx,
                                         LLVMValueRef vindex,
                                         LLVMValueRef voffset,
                                         unsigned num_channels,
-                                        bool glc,
+                                        unsigned cache_policy,
                                         bool can_speculate);
 
 /* load_format that handles the stride & element count better if idxen is
@@ -312,7 +308,7 @@ LLVMValueRef ac_build_buffer_load_format_gfx9_safe(struct ac_llvm_context *ctx,
                                                   LLVMValueRef vindex,
                                                   LLVMValueRef voffset,
                                                   unsigned num_channels,
-                                                  bool glc,
+                                                  unsigned cache_policy,
                                                   bool can_speculate);
 
 LLVMValueRef
@@ -321,7 +317,7 @@ ac_build_tbuffer_load_short(struct ac_llvm_context *ctx,
                            LLVMValueRef voffset,
                            LLVMValueRef soffset,
                            LLVMValueRef immoffset,
-                           bool glc);
+                           unsigned cache_policy);
 
 LLVMValueRef
 ac_build_tbuffer_load_byte(struct ac_llvm_context *ctx,
@@ -329,7 +325,7 @@ ac_build_tbuffer_load_byte(struct ac_llvm_context *ctx,
                           LLVMValueRef voffset,
                           LLVMValueRef soffset,
                           LLVMValueRef immoffset,
-                          bool glc);
+                          unsigned cache_policy);
 
 LLVMValueRef
 ac_build_struct_tbuffer_load(struct ac_llvm_context *ctx,
@@ -341,8 +337,7 @@ ac_build_struct_tbuffer_load(struct ac_llvm_context *ctx,
                             unsigned num_channels,
                             unsigned dfmt,
                             unsigned nfmt,
-                            bool glc,
-                            bool slc,
+                            unsigned cache_policy,
                             bool can_speculate);
 
 LLVMValueRef
@@ -354,8 +349,7 @@ ac_build_raw_tbuffer_load(struct ac_llvm_context *ctx,
                          unsigned num_channels,
                          unsigned dfmt,
                          unsigned nfmt,
-                         bool glc,
-                         bool slc,
+                         unsigned cache_policy,
                          bool can_speculate);
 
 /* For ac_build_fetch_format.
@@ -384,8 +378,7 @@ ac_build_opencoded_load_format(struct ac_llvm_context *ctx,
                               LLVMValueRef vindex,
                               LLVMValueRef voffset,
                               LLVMValueRef soffset,
-                              bool glc,
-                              bool slc,
+                              unsigned cache_policy,
                               bool can_speculate);
 
 void
@@ -394,8 +387,7 @@ ac_build_tbuffer_store_short(struct ac_llvm_context *ctx,
                             LLVMValueRef vdata,
                             LLVMValueRef voffset,
                             LLVMValueRef soffset,
-                            bool glc,
-                            bool writeonly_memory);
+                            unsigned cache_policy);
 
 void
 ac_build_tbuffer_store_byte(struct ac_llvm_context *ctx,
@@ -403,8 +395,7 @@ ac_build_tbuffer_store_byte(struct ac_llvm_context *ctx,
                            LLVMValueRef vdata,
                            LLVMValueRef voffset,
                            LLVMValueRef soffset,
-                           bool glc,
-                           bool writeonly_memory);
+                           unsigned cache_policy);
 
 void
 ac_build_struct_tbuffer_store(struct ac_llvm_context *ctx,
@@ -417,9 +408,7 @@ ac_build_struct_tbuffer_store(struct ac_llvm_context *ctx,
                              unsigned num_channels,
                              unsigned dfmt,
                              unsigned nfmt,
-                             bool glc,
-                             bool slc,
-                             bool writeonly_memory);
+                             unsigned cache_policy);
 
 void
 ac_build_raw_tbuffer_store(struct ac_llvm_context *ctx,
@@ -431,9 +420,7 @@ ac_build_raw_tbuffer_store(struct ac_llvm_context *ctx,
                           unsigned num_channels,
                           unsigned dfmt,
                           unsigned nfmt,
-                          bool glc,
-                          bool slc,
-                          bool writeonly_memory);
+                          unsigned cache_policy);
 
 LLVMValueRef
 ac_get_thread_id(struct ac_llvm_context *ctx);
@@ -450,6 +437,7 @@ ac_build_ddxy(struct ac_llvm_context *ctx,
 
 #define AC_SENDMSG_GS 2
 #define AC_SENDMSG_GS_DONE 3
+#define AC_SENDMSG_GS_ALLOC_REQ 9
 
 #define AC_SENDMSG_GS_OP_NOP      (0 << 4)
 #define AC_SENDMSG_GS_OP_CUT      (1 << 4)
@@ -531,8 +519,9 @@ enum ac_image_dim {
 
 /* These cache policy bits match the definitions used by the LLVM intrinsics. */
 enum ac_image_cache_policy {
-       ac_glc = 1 << 0,
-       ac_slc = 1 << 1,
+       ac_glc = 1 << 0, /* per-CU cache control */
+       ac_slc = 1 << 1, /* global L2 cache control */
+       ac_dlc = 1 << 2, /* per-shader-array cache control */
 };
 
 struct ac_image_args {
@@ -540,7 +529,7 @@ struct ac_image_args {
        enum ac_atomic_op atomic : 4; /* for the ac_image_atomic opcode */
        enum ac_image_dim dim : 3;
        unsigned dmask : 4;
-       unsigned cache_policy : 2;
+       unsigned cache_policy : 3;
        bool unorm : 1;
        bool level_zero : 1;
        unsigned attributes; /* additional call-site specific AC_FUNC_ATTRs */
@@ -578,7 +567,7 @@ LLVMValueRef ac_build_imad(struct ac_llvm_context *ctx, LLVMValueRef s0,
 LLVMValueRef ac_build_fmad(struct ac_llvm_context *ctx, LLVMValueRef s0,
                           LLVMValueRef s1, LLVMValueRef s2);
 
-void ac_build_waitcnt(struct ac_llvm_context *ctx, unsigned simm16);
+void ac_build_waitcnt(struct ac_llvm_context *ctx, unsigned wait_flags);
 
 LLVMValueRef ac_build_fract(struct ac_llvm_context *ctx, LLVMValueRef src0,
                           unsigned bitsize);
@@ -725,6 +714,9 @@ ac_build_ddxy_interp(struct ac_llvm_context *ctx, LLVMValueRef interp_ij);
 LLVMValueRef
 ac_build_load_helper_invocation(struct ac_llvm_context *ctx);
 
+LLVMValueRef ac_build_call(struct ac_llvm_context *ctx, LLVMValueRef func,
+                          LLVMValueRef *args, unsigned num_args);
+
 LLVMValueRef ac_build_atomic_rmw(struct ac_llvm_context *ctx, LLVMAtomicRMWBinOp op,
                                 LLVMValueRef ptr, LLVMValueRef val,
                                 const char *sync_scope);