ac: add ac_build_{struct,raw}_tbuffer_store() helpers
[mesa.git] / src / amd / common / ac_llvm_util.c
index be2d92b4c0891ccd67648c377e71631e402462ad..69446863b95167b7435f3ca16240e28dab0e1cf5 100644 (file)
 #include "util/bitscan.h"
 #include <llvm-c/Core.h>
 #include <llvm-c/Support.h>
+#include <llvm-c/Transforms/IPO.h>
+#include <llvm-c/Transforms/Scalar.h>
+#include <llvm-c/Transforms/Utils.h>
 #include "c11/threads.h"
+#include "gallivm/lp_bld_misc.h"
 #include "util/u_math.h"
 
 #include <assert.h>
@@ -50,20 +54,27 @@ static void ac_init_llvm_target()
         * https://reviews.llvm.org/D26348
         *
         * "mesa" is the prefix for error messages.
+        *
+        * -global-isel-abort=2 is a no-op unless global isel has been enabled.
+        * This option tells the backend to fall-back to SelectionDAG and print
+        * a diagnostic message if global isel fails.
         */
-       const char *argv[2] = { "mesa", "-simplifycfg-sink-common=false" };
-       LLVMParseCommandLineOptions(2, argv, NULL);
+       const char *argv[3] = { "mesa", "-simplifycfg-sink-common=false", "-global-isel-abort=2" };
+       LLVMParseCommandLineOptions(3, argv, NULL);
 }
 
 static once_flag ac_init_llvm_target_once_flag = ONCE_FLAG_INIT;
 
-LLVMTargetRef ac_get_llvm_target(const char *triple)
+void ac_init_llvm_once(void)
+{
+       call_once(&ac_init_llvm_target_once_flag, ac_init_llvm_target);
+}
+
+static LLVMTargetRef ac_get_llvm_target(const char *triple)
 {
        LLVMTargetRef target = NULL;
        char *err_message = NULL;
 
-       call_once(&ac_init_llvm_target_once_flag, ac_init_llvm_target);
-
        if (LLVMGetTargetFromTriple(triple, &target, &err_message)) {
                fprintf(stderr, "Cannot find target for triple %s ", triple);
                if (err_message) {
@@ -119,15 +130,20 @@ const char *ac_get_llvm_processor_name(enum radeon_family family)
        case CHIP_RAVEN:
                return "gfx902";
        case CHIP_VEGA12:
-               return HAVE_LLVM >= 0x0700 ? "gfx904" : "gfx902";
+               return "gfx904";
+       case CHIP_VEGA20:
+               return "gfx906";
+       case CHIP_RAVEN2:
+               return HAVE_LLVM >= 0x0800 ? "gfx909" : "gfx902";
        default:
                return "";
        }
 }
 
-LLVMTargetMachineRef ac_create_target_machine(enum radeon_family family,
-                                             enum ac_target_machine_options tm_options,
-                                             const char **out_triple)
+static LLVMTargetMachineRef ac_create_target_machine(enum radeon_family family,
+                                                    enum ac_target_machine_options tm_options,
+                                                    LLVMCodeGenOptLevel level,
+                                                    const char **out_triple)
 {
        assert(family >= CHIP_TAHITI);
        char features[256];
@@ -135,7 +151,8 @@ LLVMTargetMachineRef ac_create_target_machine(enum radeon_family family,
        LLVMTargetRef target = ac_get_llvm_target(triple);
 
        snprintf(features, sizeof(features),
-                "+DumpCode,+vgpr-spilling,-fp32-denormals,+fp64-denormals%s%s%s%s",
+                "+DumpCode,-fp32-denormals,+fp64-denormals%s%s%s%s%s",
+                HAVE_LLVM >= 0x0800 ? "" : ",+vgpr-spilling",
                 tm_options & AC_TM_SISCHED ? ",+si-scheduler" : "",
                 tm_options & AC_TM_FORCE_ENABLE_XNACK ? ",+xnack" : "",
                 tm_options & AC_TM_FORCE_DISABLE_XNACK ? ",-xnack" : "",
@@ -146,15 +163,50 @@ LLVMTargetMachineRef ac_create_target_machine(enum radeon_family family,
                                     triple,
                                     ac_get_llvm_processor_name(family),
                                     features,
-                                    LLVMCodeGenLevelDefault,
+                                    level,
                                     LLVMRelocDefault,
                                     LLVMCodeModelDefault);
 
        if (out_triple)
                *out_triple = triple;
+       if (tm_options & AC_TM_ENABLE_GLOBAL_ISEL)
+               ac_enable_global_isel(tm);
        return tm;
 }
 
+static LLVMPassManagerRef ac_create_passmgr(LLVMTargetLibraryInfoRef target_library_info,
+                                           bool check_ir)
+{
+       LLVMPassManagerRef passmgr = LLVMCreatePassManager();
+       if (!passmgr)
+               return NULL;
+
+       if (target_library_info)
+               LLVMAddTargetLibraryInfo(target_library_info,
+                                        passmgr);
+
+       if (check_ir)
+               LLVMAddVerifierPass(passmgr);
+       LLVMAddAlwaysInlinerPass(passmgr);
+       /* Normally, the pass manager runs all passes on one function before
+        * moving onto another. Adding a barrier no-op pass forces the pass
+        * manager to run the inliner on all functions first, which makes sure
+        * that the following passes are only run on the remaining non-inline
+        * function, so it removes useless work done on dead inline functions.
+        */
+       ac_llvm_add_barrier_noop_pass(passmgr);
+       /* This pass should eliminate all the load and store instructions. */
+       LLVMAddPromoteMemoryToRegisterPass(passmgr);
+       LLVMAddScalarReplAggregatesPass(passmgr);
+       LLVMAddLICMPass(passmgr);
+       LLVMAddAggressiveDCEPass(passmgr);
+       LLVMAddCFGSimplificationPass(passmgr);
+       /* This is recommended by the instruction combining pass. */
+       LLVMAddEarlyCSEMemSSAPass(passmgr);
+       LLVMAddInstructionCombiningPass(passmgr);
+       return passmgr;
+}
+
 static const char *attr_to_str(enum ac_func_attr attr)
 {
    switch (attr) {
@@ -246,3 +298,54 @@ ac_count_scratch_private_memory(LLVMValueRef function)
 
        return private_mem_vgprs;
 }
+
+bool
+ac_init_llvm_compiler(struct ac_llvm_compiler *compiler,
+                     enum radeon_family family,
+                     enum ac_target_machine_options tm_options)
+{
+       const char *triple;
+       memset(compiler, 0, sizeof(*compiler));
+
+       compiler->tm = ac_create_target_machine(family, tm_options,
+                                               LLVMCodeGenLevelDefault,
+                                               &triple);
+       if (!compiler->tm)
+               return false;
+
+       if (tm_options & AC_TM_CREATE_LOW_OPT) {
+               compiler->low_opt_tm =
+                       ac_create_target_machine(family, tm_options,
+                                                LLVMCodeGenLevelLess, NULL);
+               if (!compiler->low_opt_tm)
+                       goto fail;
+       }
+
+       compiler->target_library_info =
+               ac_create_target_library_info(triple);
+       if (!compiler->target_library_info)
+               goto fail;
+
+       compiler->passmgr = ac_create_passmgr(compiler->target_library_info,
+                                             tm_options & AC_TM_CHECK_IR);
+       if (!compiler->passmgr)
+               goto fail;
+
+       return true;
+fail:
+       ac_destroy_llvm_compiler(compiler);
+       return false;
+}
+
+void
+ac_destroy_llvm_compiler(struct ac_llvm_compiler *compiler)
+{
+       if (compiler->passmgr)
+               LLVMDisposePassManager(compiler->passmgr);
+       if (compiler->target_library_info)
+               ac_dispose_target_library_info(compiler->target_library_info);
+       if (compiler->low_opt_tm)
+               LLVMDisposeTargetMachine(compiler->low_opt_tm);
+       if (compiler->tm)
+               LLVMDisposeTargetMachine(compiler->tm);
+}