radv: add a workaround for Monster Hunter World and LLVM 7&8
[mesa.git] / src / amd / common / ac_llvm_util.c
index 2be2edf1e6acb9a58942eb91b961ab91b1a3894a..6063411310b849adedcb761f94cb9bf76fc04013 100644 (file)
 #include <llvm-c/Support.h>
 #include <llvm-c/Transforms/IPO.h>
 #include <llvm-c/Transforms/Scalar.h>
-#if HAVE_LLVM >= 0x0700
 #include <llvm-c/Transforms/Utils.h>
-#endif
 #include "c11/threads.h"
+#include "gallivm/lp_bld_misc.h"
 #include "util/u_math.h"
 
 #include <assert.h>
@@ -55,9 +54,13 @@ static void ac_init_llvm_target()
         * https://reviews.llvm.org/D26348
         *
         * "mesa" is the prefix for error messages.
+        *
+        * -global-isel-abort=2 is a no-op unless global isel has been enabled.
+        * This option tells the backend to fall-back to SelectionDAG and print
+        * a diagnostic message if global isel fails.
         */
-       const char *argv[2] = { "mesa", "-simplifycfg-sink-common=false" };
-       LLVMParseCommandLineOptions(2, argv, NULL);
+       const char *argv[3] = { "mesa", "-simplifycfg-sink-common=false", "-global-isel-abort=2" };
+       LLVMParseCommandLineOptions(3, argv, NULL);
 }
 
 static once_flag ac_init_llvm_target_once_flag = ONCE_FLAG_INIT;
@@ -67,7 +70,7 @@ void ac_init_llvm_once(void)
        call_once(&ac_init_llvm_target_once_flag, ac_init_llvm_target);
 }
 
-LLVMTargetRef ac_get_llvm_target(const char *triple)
+static LLVMTargetRef ac_get_llvm_target(const char *triple)
 {
        LLVMTargetRef target = NULL;
        char *err_message = NULL;
@@ -127,46 +130,53 @@ const char *ac_get_llvm_processor_name(enum radeon_family family)
        case CHIP_RAVEN:
                return "gfx902";
        case CHIP_VEGA12:
-               return HAVE_LLVM >= 0x0700 ? "gfx904" : "gfx902";
+               return "gfx904";
+       case CHIP_VEGA20:
+               return "gfx906";
+       case CHIP_RAVEN2:
+               return HAVE_LLVM >= 0x0800 ? "gfx909" : "gfx902";
        default:
                return "";
        }
 }
 
-LLVMTargetMachineRef ac_create_target_machine(enum radeon_family family,
-                                             enum ac_target_machine_options tm_options,
-                                             const char **out_triple)
+static LLVMTargetMachineRef ac_create_target_machine(enum radeon_family family,
+                                                    enum ac_target_machine_options tm_options,
+                                                    LLVMCodeGenOptLevel level,
+                                                    const char **out_triple)
 {
        assert(family >= CHIP_TAHITI);
        char features[256];
        const char *triple = (tm_options & AC_TM_SUPPORTS_SPILL) ? "amdgcn-mesa-mesa3d" : "amdgcn--";
        LLVMTargetRef target = ac_get_llvm_target(triple);
-       bool barrier_does_waitcnt = true; /* TODO: not for Vega20 */
 
        snprintf(features, sizeof(features),
-                "+DumpCode,+vgpr-spilling,-fp32-denormals,+fp64-denormals%s%s%s%s%s",
+                "+DumpCode,-fp32-denormals,+fp64-denormals%s%s%s%s%s%s",
+                HAVE_LLVM >= 0x0800 ? "" : ",+vgpr-spilling",
                 tm_options & AC_TM_SISCHED ? ",+si-scheduler" : "",
                 tm_options & AC_TM_FORCE_ENABLE_XNACK ? ",+xnack" : "",
                 tm_options & AC_TM_FORCE_DISABLE_XNACK ? ",-xnack" : "",
                 tm_options & AC_TM_PROMOTE_ALLOCA_TO_SCRATCH ? ",-promote-alloca" : "",
-                barrier_does_waitcnt ? ",+auto-waitcnt-before-barrier" : "");
-       
+                tm_options & AC_TM_NO_LOAD_STORE_OPT ? ",-load-store-opt" : "");
+
        LLVMTargetMachineRef tm = LLVMCreateTargetMachine(
                                     target,
                                     triple,
                                     ac_get_llvm_processor_name(family),
                                     features,
-                                    LLVMCodeGenLevelDefault,
+                                    level,
                                     LLVMRelocDefault,
                                     LLVMCodeModelDefault);
 
        if (out_triple)
                *out_triple = triple;
+       if (tm_options & AC_TM_ENABLE_GLOBAL_ISEL)
+               ac_enable_global_isel(tm);
        return tm;
 }
 
-LLVMPassManagerRef ac_create_passmgr(LLVMTargetLibraryInfoRef target_library_info,
-                                    bool check_ir)
+static LLVMPassManagerRef ac_create_passmgr(LLVMTargetLibraryInfoRef target_library_info,
+                                           bool check_ir)
 {
        LLVMPassManagerRef passmgr = LLVMCreatePassManager();
        if (!passmgr)
@@ -179,6 +189,13 @@ LLVMPassManagerRef ac_create_passmgr(LLVMTargetLibraryInfoRef target_library_inf
        if (check_ir)
                LLVMAddVerifierPass(passmgr);
        LLVMAddAlwaysInlinerPass(passmgr);
+       /* Normally, the pass manager runs all passes on one function before
+        * moving onto another. Adding a barrier no-op pass forces the pass
+        * manager to run the inliner on all functions first, which makes sure
+        * that the following passes are only run on the remaining non-inline
+        * function, so it removes useless work done on dead inline functions.
+        */
+       ac_llvm_add_barrier_noop_pass(passmgr);
        /* This pass should eliminate all the load and store instructions. */
        LLVMAddPromoteMemoryToRegisterPass(passmgr);
        LLVMAddScalarReplAggregatesPass(passmgr);
@@ -285,25 +302,31 @@ ac_count_scratch_private_memory(LLVMValueRef function)
 
 bool
 ac_init_llvm_compiler(struct ac_llvm_compiler *compiler,
-                     bool okay_to_leak_target_library_info,
                      enum radeon_family family,
                      enum ac_target_machine_options tm_options)
 {
        const char *triple;
        memset(compiler, 0, sizeof(*compiler));
 
-       compiler->tm = ac_create_target_machine(family,
-                                           tm_options, &triple);
+       compiler->tm = ac_create_target_machine(family, tm_options,
+                                               LLVMCodeGenLevelDefault,
+                                               &triple);
        if (!compiler->tm)
                return false;
 
-       if (okay_to_leak_target_library_info || (HAVE_LLVM >= 0x0700)) {
-               compiler->target_library_info =
-                       ac_create_target_library_info(triple);
-               if (!compiler->target_library_info)
+       if (tm_options & AC_TM_CREATE_LOW_OPT) {
+               compiler->low_opt_tm =
+                       ac_create_target_machine(family, tm_options,
+                                                LLVMCodeGenLevelLess, NULL);
+               if (!compiler->low_opt_tm)
                        goto fail;
        }
 
+       compiler->target_library_info =
+               ac_create_target_library_info(triple);
+       if (!compiler->target_library_info)
+               goto fail;
+
        compiler->passmgr = ac_create_passmgr(compiler->target_library_info,
                                              tm_options & AC_TM_CHECK_IR);
        if (!compiler->passmgr)
@@ -320,11 +343,10 @@ ac_destroy_llvm_compiler(struct ac_llvm_compiler *compiler)
 {
        if (compiler->passmgr)
                LLVMDisposePassManager(compiler->passmgr);
-#if HAVE_LLVM >= 0x0700
-       /* This crashes on LLVM 5.0 and 6.0 and Ubuntu 18.04, so leak it there. */
        if (compiler->target_library_info)
                ac_dispose_target_library_info(compiler->target_library_info);
-#endif
+       if (compiler->low_opt_tm)
+               LLVMDisposeTargetMachine(compiler->low_opt_tm);
        if (compiler->tm)
                LLVMDisposeTargetMachine(compiler->tm);
 }