ac: add ac_build_{struct,raw}_tbuffer_store() helpers
[mesa.git] / src / amd / common / ac_llvm_util.c
index e2d5de24ad789d02251b30300f0dfb4b9191f8c7..69446863b95167b7435f3ca16240e28dab0e1cf5 100644 (file)
  */
 /* based on pieces from si_pipe.c and radeon_llvm_emit.c */
 #include "ac_llvm_util.h"
+#include "ac_llvm_build.h"
 #include "util/bitscan.h"
 #include <llvm-c/Core.h>
 #include <llvm-c/Support.h>
+#include <llvm-c/Transforms/IPO.h>
+#include <llvm-c/Transforms/Scalar.h>
+#include <llvm-c/Transforms/Utils.h>
 #include "c11/threads.h"
+#include "gallivm/lp_bld_misc.h"
+#include "util/u_math.h"
 
 #include <assert.h>
 #include <stdio.h>
@@ -40,27 +46,35 @@ static void ac_init_llvm_target()
        LLVMInitializeAMDGPUTargetMC();
        LLVMInitializeAMDGPUAsmPrinter();
 
-       /*
-        * Workaround for bug in llvm 4.0 that causes image intrinsics
+       /* For inline assembly. */
+       LLVMInitializeAMDGPUAsmParser();
+
+       /* Workaround for bug in llvm 4.0 that causes image intrinsics
         * to disappear.
         * https://reviews.llvm.org/D26348
+        *
+        * "mesa" is the prefix for error messages.
+        *
+        * -global-isel-abort=2 is a no-op unless global isel has been enabled.
+        * This option tells the backend to fall-back to SelectionDAG and print
+        * a diagnostic message if global isel fails.
         */
-#if HAVE_LLVM >= 0x0400
-       const char *argv[2] = {"mesa", "-simplifycfg-sink-common=false"};
-       LLVMParseCommandLineOptions(2, argv, NULL);
-#endif
-
+       const char *argv[3] = { "mesa", "-simplifycfg-sink-common=false", "-global-isel-abort=2" };
+       LLVMParseCommandLineOptions(3, argv, NULL);
 }
 
 static once_flag ac_init_llvm_target_once_flag = ONCE_FLAG_INIT;
 
+void ac_init_llvm_once(void)
+{
+       call_once(&ac_init_llvm_target_once_flag, ac_init_llvm_target);
+}
+
 static LLVMTargetRef ac_get_llvm_target(const char *triple)
 {
        LLVMTargetRef target = NULL;
        char *err_message = NULL;
 
-       call_once(&ac_init_llvm_target_once_flag, ac_init_llvm_target);
-
        if (LLVMGetTargetFromTriple(triple, &target, &err_message)) {
                fprintf(stderr, "Cannot find target for triple %s ", triple);
                if (err_message) {
@@ -72,7 +86,7 @@ static LLVMTargetRef ac_get_llvm_target(const char *triple)
        return target;
 }
 
-static const char *ac_get_llvm_processor_name(enum radeon_family family)
+const char *ac_get_llvm_processor_name(enum radeon_family family)
 {
        switch (family) {
        case CHIP_TAHITI:
@@ -105,65 +119,98 @@ static const char *ac_get_llvm_processor_name(enum radeon_family family)
                return "fiji";
        case CHIP_STONEY:
                return "stoney";
-#if HAVE_LLVM == 0x0308
-       case CHIP_POLARIS10:
-               return "tonga";
-       case CHIP_POLARIS11:
-               return "tonga";
-#else
        case CHIP_POLARIS10:
                return "polaris10";
        case CHIP_POLARIS11:
+       case CHIP_POLARIS12:
+       case CHIP_VEGAM:
                return "polaris11";
-#endif
+       case CHIP_VEGA10:
+               return "gfx900";
+       case CHIP_RAVEN:
+               return "gfx902";
+       case CHIP_VEGA12:
+               return "gfx904";
+       case CHIP_VEGA20:
+               return "gfx906";
+       case CHIP_RAVEN2:
+               return HAVE_LLVM >= 0x0800 ? "gfx909" : "gfx902";
        default:
                return "";
        }
 }
 
-LLVMTargetMachineRef ac_create_target_machine(enum radeon_family family, bool supports_spill)
+static LLVMTargetMachineRef ac_create_target_machine(enum radeon_family family,
+                                                    enum ac_target_machine_options tm_options,
+                                                    LLVMCodeGenOptLevel level,
+                                                    const char **out_triple)
 {
        assert(family >= CHIP_TAHITI);
-
-       const char *triple = supports_spill ? "amdgcn-mesa-mesa3d" : "amdgcn--";
+       char features[256];
+       const char *triple = (tm_options & AC_TM_SUPPORTS_SPILL) ? "amdgcn-mesa-mesa3d" : "amdgcn--";
        LLVMTargetRef target = ac_get_llvm_target(triple);
+
+       snprintf(features, sizeof(features),
+                "+DumpCode,-fp32-denormals,+fp64-denormals%s%s%s%s%s",
+                HAVE_LLVM >= 0x0800 ? "" : ",+vgpr-spilling",
+                tm_options & AC_TM_SISCHED ? ",+si-scheduler" : "",
+                tm_options & AC_TM_FORCE_ENABLE_XNACK ? ",+xnack" : "",
+                tm_options & AC_TM_FORCE_DISABLE_XNACK ? ",-xnack" : "",
+                tm_options & AC_TM_PROMOTE_ALLOCA_TO_SCRATCH ? ",-promote-alloca" : "");
+       
        LLVMTargetMachineRef tm = LLVMCreateTargetMachine(
                                     target,
                                     triple,
                                     ac_get_llvm_processor_name(family),
-                                    "+DumpCode,+vgpr-spilling,-fp32-denormals,-xnack",
-                                    LLVMCodeGenLevelDefault,
+                                    features,
+                                    level,
                                     LLVMRelocDefault,
                                     LLVMCodeModelDefault);
 
+       if (out_triple)
+               *out_triple = triple;
+       if (tm_options & AC_TM_ENABLE_GLOBAL_ISEL)
+               ac_enable_global_isel(tm);
        return tm;
 }
 
-
-#if HAVE_LLVM < 0x0400
-static LLVMAttribute ac_attr_to_llvm_attr(enum ac_func_attr attr)
+static LLVMPassManagerRef ac_create_passmgr(LLVMTargetLibraryInfoRef target_library_info,
+                                           bool check_ir)
 {
-   switch (attr) {
-   case AC_FUNC_ATTR_ALWAYSINLINE: return LLVMAlwaysInlineAttribute;
-   case AC_FUNC_ATTR_BYVAL: return LLVMByValAttribute;
-   case AC_FUNC_ATTR_INREG: return LLVMInRegAttribute;
-   case AC_FUNC_ATTR_NOALIAS: return LLVMNoAliasAttribute;
-   case AC_FUNC_ATTR_NOUNWIND: return LLVMNoUnwindAttribute;
-   case AC_FUNC_ATTR_READNONE: return LLVMReadNoneAttribute;
-   case AC_FUNC_ATTR_READONLY: return LLVMReadOnlyAttribute;
-   default:
-          fprintf(stderr, "Unhandled function attribute: %x\n", attr);
-          return 0;
-   }
-}
+       LLVMPassManagerRef passmgr = LLVMCreatePassManager();
+       if (!passmgr)
+               return NULL;
+
+       if (target_library_info)
+               LLVMAddTargetLibraryInfo(target_library_info,
+                                        passmgr);
 
-#else
+       if (check_ir)
+               LLVMAddVerifierPass(passmgr);
+       LLVMAddAlwaysInlinerPass(passmgr);
+       /* Normally, the pass manager runs all passes on one function before
+        * moving onto another. Adding a barrier no-op pass forces the pass
+        * manager to run the inliner on all functions first, which makes sure
+        * that the following passes are only run on the remaining non-inline
+        * function, so it removes useless work done on dead inline functions.
+        */
+       ac_llvm_add_barrier_noop_pass(passmgr);
+       /* This pass should eliminate all the load and store instructions. */
+       LLVMAddPromoteMemoryToRegisterPass(passmgr);
+       LLVMAddScalarReplAggregatesPass(passmgr);
+       LLVMAddLICMPass(passmgr);
+       LLVMAddAggressiveDCEPass(passmgr);
+       LLVMAddCFGSimplificationPass(passmgr);
+       /* This is recommended by the instruction combining pass. */
+       LLVMAddEarlyCSEMemSSAPass(passmgr);
+       LLVMAddInstructionCombiningPass(passmgr);
+       return passmgr;
+}
 
 static const char *attr_to_str(enum ac_func_attr attr)
 {
    switch (attr) {
    case AC_FUNC_ATTR_ALWAYSINLINE: return "alwaysinline";
-   case AC_FUNC_ATTR_BYVAL: return "byval";
    case AC_FUNC_ATTR_INREG: return "inreg";
    case AC_FUNC_ATTR_NOALIAS: return "noalias";
    case AC_FUNC_ATTR_NOUNWIND: return "nounwind";
@@ -178,20 +225,10 @@ static const char *attr_to_str(enum ac_func_attr attr)
    }
 }
 
-#endif
-
 void
 ac_add_function_attr(LLVMContextRef ctx, LLVMValueRef function,
                      int attr_idx, enum ac_func_attr attr)
 {
-#if HAVE_LLVM < 0x0400
-   LLVMAttribute llvm_attr = ac_attr_to_llvm_attr(attr);
-   if (attr_idx == -1) {
-      LLVMAddFunctionAttr(function, llvm_attr);
-   } else {
-      LLVMAddAttribute(LLVMGetParam(function, attr_idx - 1), llvm_attr);
-   }
-#else
    const char *attr_name = attr_to_str(attr);
    unsigned kind_id = LLVMGetEnumAttributeKindForName(attr_name,
                                                       strlen(attr_name));
@@ -201,7 +238,6 @@ ac_add_function_attr(LLVMContextRef ctx, LLVMValueRef function,
       LLVMAddAttributeAtIndex(function, attr_idx, llvm_attr);
    else
       LLVMAddCallSiteAttribute(function, attr_idx, llvm_attr);
-#endif
 }
 
 void ac_add_func_attributes(LLVMContextRef ctx, LLVMValueRef function,
@@ -223,3 +259,93 @@ ac_dump_module(LLVMModuleRef module)
        fprintf(stderr, "%s", str);
        LLVMDisposeMessage(str);
 }
+
+void
+ac_llvm_add_target_dep_function_attr(LLVMValueRef F,
+                                    const char *name, unsigned value)
+{
+       char str[16];
+
+       snprintf(str, sizeof(str), "0x%x", value);
+       LLVMAddTargetDependentFunctionAttr(F, name, str);
+}
+
+unsigned
+ac_count_scratch_private_memory(LLVMValueRef function)
+{
+       unsigned private_mem_vgprs = 0;
+
+       /* Process all LLVM instructions. */
+       LLVMBasicBlockRef bb = LLVMGetFirstBasicBlock(function);
+       while (bb) {
+               LLVMValueRef next = LLVMGetFirstInstruction(bb);
+
+               while (next) {
+                       LLVMValueRef inst = next;
+                       next = LLVMGetNextInstruction(next);
+
+                       if (LLVMGetInstructionOpcode(inst) != LLVMAlloca)
+                               continue;
+
+                       LLVMTypeRef type = LLVMGetElementType(LLVMTypeOf(inst));
+                       /* No idea why LLVM aligns allocas to 4 elements. */
+                       unsigned alignment = LLVMGetAlignment(inst);
+                       unsigned dw_size = align(ac_get_type_size(type) / 4, alignment);
+                       private_mem_vgprs += dw_size;
+               }
+               bb = LLVMGetNextBasicBlock(bb);
+       }
+
+       return private_mem_vgprs;
+}
+
+bool
+ac_init_llvm_compiler(struct ac_llvm_compiler *compiler,
+                     enum radeon_family family,
+                     enum ac_target_machine_options tm_options)
+{
+       const char *triple;
+       memset(compiler, 0, sizeof(*compiler));
+
+       compiler->tm = ac_create_target_machine(family, tm_options,
+                                               LLVMCodeGenLevelDefault,
+                                               &triple);
+       if (!compiler->tm)
+               return false;
+
+       if (tm_options & AC_TM_CREATE_LOW_OPT) {
+               compiler->low_opt_tm =
+                       ac_create_target_machine(family, tm_options,
+                                                LLVMCodeGenLevelLess, NULL);
+               if (!compiler->low_opt_tm)
+                       goto fail;
+       }
+
+       compiler->target_library_info =
+               ac_create_target_library_info(triple);
+       if (!compiler->target_library_info)
+               goto fail;
+
+       compiler->passmgr = ac_create_passmgr(compiler->target_library_info,
+                                             tm_options & AC_TM_CHECK_IR);
+       if (!compiler->passmgr)
+               goto fail;
+
+       return true;
+fail:
+       ac_destroy_llvm_compiler(compiler);
+       return false;
+}
+
+void
+ac_destroy_llvm_compiler(struct ac_llvm_compiler *compiler)
+{
+       if (compiler->passmgr)
+               LLVMDisposePassManager(compiler->passmgr);
+       if (compiler->target_library_info)
+               ac_dispose_target_library_info(compiler->target_library_info);
+       if (compiler->low_opt_tm)
+               LLVMDisposeTargetMachine(compiler->low_opt_tm);
+       if (compiler->tm)
+               LLVMDisposeTargetMachine(compiler->tm);
+}