X-Git-Url: https://git.libre-soc.org/?a=blobdiff_plain;f=src%2Famd%2Fcommon%2Fac_llvm_util.c;h=6063411310b849adedcb761f94cb9bf76fc04013;hb=d7501834cd86f9ec0b7c3dea17448dc523e36390;hp=d14057fa7680441e68a8f03f1d49db8a6c9fd83f;hpb=09d7c7be4f1750a24670f8e7f9c555d88e47708f;p=mesa.git diff --git a/src/amd/common/ac_llvm_util.c b/src/amd/common/ac_llvm_util.c index d14057fa768..6063411310b 100644 --- a/src/amd/common/ac_llvm_util.c +++ b/src/amd/common/ac_llvm_util.c @@ -24,10 +24,16 @@ */ /* based on pieces from si_pipe.c and radeon_llvm_emit.c */ #include "ac_llvm_util.h" +#include "ac_llvm_build.h" #include "util/bitscan.h" #include #include +#include +#include +#include #include "c11/threads.h" +#include "gallivm/lp_bld_misc.h" +#include "util/u_math.h" #include #include @@ -40,27 +46,35 @@ static void ac_init_llvm_target() LLVMInitializeAMDGPUTargetMC(); LLVMInitializeAMDGPUAsmPrinter(); - /* - * Workaround for bug in llvm 4.0 that causes image intrinsics + /* For inline assembly. */ + LLVMInitializeAMDGPUAsmParser(); + + /* Workaround for bug in llvm 4.0 that causes image intrinsics * to disappear. * https://reviews.llvm.org/D26348 + * + * "mesa" is the prefix for error messages. + * + * -global-isel-abort=2 is a no-op unless global isel has been enabled. + * This option tells the backend to fall-back to SelectionDAG and print + * a diagnostic message if global isel fails. */ -#if HAVE_LLVM >= 0x0400 - const char *argv[2] = {"mesa", "-simplifycfg-sink-common=false"}; - LLVMParseCommandLineOptions(2, argv, NULL); -#endif - + const char *argv[3] = { "mesa", "-simplifycfg-sink-common=false", "-global-isel-abort=2" }; + LLVMParseCommandLineOptions(3, argv, NULL); } static once_flag ac_init_llvm_target_once_flag = ONCE_FLAG_INIT; +void ac_init_llvm_once(void) +{ + call_once(&ac_init_llvm_target_once_flag, ac_init_llvm_target); +} + static LLVMTargetRef ac_get_llvm_target(const char *triple) { LLVMTargetRef target = NULL; char *err_message = NULL; - call_once(&ac_init_llvm_target_once_flag, ac_init_llvm_target); - if (LLVMGetTargetFromTriple(triple, &target, &err_message)) { fprintf(stderr, "Cannot find target for triple %s ", triple); if (err_message) { @@ -72,7 +86,7 @@ static LLVMTargetRef ac_get_llvm_target(const char *triple) return target; } -static const char *ac_get_llvm_processor_name(enum radeon_family family) +const char *ac_get_llvm_processor_name(enum radeon_family family) { switch (family) { case CHIP_TAHITI: @@ -109,16 +123,27 @@ static const char *ac_get_llvm_processor_name(enum radeon_family family) return "polaris10"; case CHIP_POLARIS11: case CHIP_POLARIS12: + case CHIP_VEGAM: return "polaris11"; case CHIP_VEGA10: - case CHIP_RAVEN: return "gfx900"; + case CHIP_RAVEN: + return "gfx902"; + case CHIP_VEGA12: + return "gfx904"; + case CHIP_VEGA20: + return "gfx906"; + case CHIP_RAVEN2: + return HAVE_LLVM >= 0x0800 ? "gfx909" : "gfx902"; default: return ""; } } -LLVMTargetMachineRef ac_create_target_machine(enum radeon_family family, enum ac_target_machine_options tm_options) +static LLVMTargetMachineRef ac_create_target_machine(enum radeon_family family, + enum ac_target_machine_options tm_options, + LLVMCodeGenOptLevel level, + const char **out_triple) { assert(family >= CHIP_TAHITI); char features[256]; @@ -126,47 +151,67 @@ LLVMTargetMachineRef ac_create_target_machine(enum radeon_family family, enum ac LLVMTargetRef target = ac_get_llvm_target(triple); snprintf(features, sizeof(features), - "+DumpCode,+vgpr-spilling,-fp32-denormals%s%s", - family >= CHIP_VEGA10 ? ",+xnack" : ",-xnack", - tm_options & AC_TM_SISCHED ? ",+si-scheduler" : ""); - + "+DumpCode,-fp32-denormals,+fp64-denormals%s%s%s%s%s%s", + HAVE_LLVM >= 0x0800 ? "" : ",+vgpr-spilling", + tm_options & AC_TM_SISCHED ? ",+si-scheduler" : "", + tm_options & AC_TM_FORCE_ENABLE_XNACK ? ",+xnack" : "", + tm_options & AC_TM_FORCE_DISABLE_XNACK ? ",-xnack" : "", + tm_options & AC_TM_PROMOTE_ALLOCA_TO_SCRATCH ? ",-promote-alloca" : "", + tm_options & AC_TM_NO_LOAD_STORE_OPT ? ",-load-store-opt" : ""); + LLVMTargetMachineRef tm = LLVMCreateTargetMachine( target, triple, ac_get_llvm_processor_name(family), features, - LLVMCodeGenLevelDefault, + level, LLVMRelocDefault, LLVMCodeModelDefault); + if (out_triple) + *out_triple = triple; + if (tm_options & AC_TM_ENABLE_GLOBAL_ISEL) + ac_enable_global_isel(tm); return tm; } - -#if HAVE_LLVM < 0x0400 -static LLVMAttribute ac_attr_to_llvm_attr(enum ac_func_attr attr) +static LLVMPassManagerRef ac_create_passmgr(LLVMTargetLibraryInfoRef target_library_info, + bool check_ir) { - switch (attr) { - case AC_FUNC_ATTR_ALWAYSINLINE: return LLVMAlwaysInlineAttribute; - case AC_FUNC_ATTR_BYVAL: return LLVMByValAttribute; - case AC_FUNC_ATTR_INREG: return LLVMInRegAttribute; - case AC_FUNC_ATTR_NOALIAS: return LLVMNoAliasAttribute; - case AC_FUNC_ATTR_NOUNWIND: return LLVMNoUnwindAttribute; - case AC_FUNC_ATTR_READNONE: return LLVMReadNoneAttribute; - case AC_FUNC_ATTR_READONLY: return LLVMReadOnlyAttribute; - default: - fprintf(stderr, "Unhandled function attribute: %x\n", attr); - return 0; - } -} + LLVMPassManagerRef passmgr = LLVMCreatePassManager(); + if (!passmgr) + return NULL; + + if (target_library_info) + LLVMAddTargetLibraryInfo(target_library_info, + passmgr); -#else + if (check_ir) + LLVMAddVerifierPass(passmgr); + LLVMAddAlwaysInlinerPass(passmgr); + /* Normally, the pass manager runs all passes on one function before + * moving onto another. Adding a barrier no-op pass forces the pass + * manager to run the inliner on all functions first, which makes sure + * that the following passes are only run on the remaining non-inline + * function, so it removes useless work done on dead inline functions. + */ + ac_llvm_add_barrier_noop_pass(passmgr); + /* This pass should eliminate all the load and store instructions. */ + LLVMAddPromoteMemoryToRegisterPass(passmgr); + LLVMAddScalarReplAggregatesPass(passmgr); + LLVMAddLICMPass(passmgr); + LLVMAddAggressiveDCEPass(passmgr); + LLVMAddCFGSimplificationPass(passmgr); + /* This is recommended by the instruction combining pass. */ + LLVMAddEarlyCSEMemSSAPass(passmgr); + LLVMAddInstructionCombiningPass(passmgr); + return passmgr; +} static const char *attr_to_str(enum ac_func_attr attr) { switch (attr) { case AC_FUNC_ATTR_ALWAYSINLINE: return "alwaysinline"; - case AC_FUNC_ATTR_BYVAL: return "byval"; case AC_FUNC_ATTR_INREG: return "inreg"; case AC_FUNC_ATTR_NOALIAS: return "noalias"; case AC_FUNC_ATTR_NOUNWIND: return "nounwind"; @@ -181,20 +226,10 @@ static const char *attr_to_str(enum ac_func_attr attr) } } -#endif - void ac_add_function_attr(LLVMContextRef ctx, LLVMValueRef function, int attr_idx, enum ac_func_attr attr) { -#if HAVE_LLVM < 0x0400 - LLVMAttribute llvm_attr = ac_attr_to_llvm_attr(attr); - if (attr_idx == -1) { - LLVMAddFunctionAttr(function, llvm_attr); - } else { - LLVMAddAttribute(LLVMGetParam(function, attr_idx - 1), llvm_attr); - } -#else const char *attr_name = attr_to_str(attr); unsigned kind_id = LLVMGetEnumAttributeKindForName(attr_name, strlen(attr_name)); @@ -204,7 +239,6 @@ ac_add_function_attr(LLVMContextRef ctx, LLVMValueRef function, LLVMAddAttributeAtIndex(function, attr_idx, llvm_attr); else LLVMAddCallSiteAttribute(function, attr_idx, llvm_attr); -#endif } void ac_add_func_attributes(LLVMContextRef ctx, LLVMValueRef function, @@ -229,10 +263,90 @@ ac_dump_module(LLVMModuleRef module) void ac_llvm_add_target_dep_function_attr(LLVMValueRef F, - const char *name, int value) + const char *name, unsigned value) { char str[16]; - snprintf(str, sizeof(str), "%i", value); + snprintf(str, sizeof(str), "0x%x", value); LLVMAddTargetDependentFunctionAttr(F, name, str); } + +unsigned +ac_count_scratch_private_memory(LLVMValueRef function) +{ + unsigned private_mem_vgprs = 0; + + /* Process all LLVM instructions. */ + LLVMBasicBlockRef bb = LLVMGetFirstBasicBlock(function); + while (bb) { + LLVMValueRef next = LLVMGetFirstInstruction(bb); + + while (next) { + LLVMValueRef inst = next; + next = LLVMGetNextInstruction(next); + + if (LLVMGetInstructionOpcode(inst) != LLVMAlloca) + continue; + + LLVMTypeRef type = LLVMGetElementType(LLVMTypeOf(inst)); + /* No idea why LLVM aligns allocas to 4 elements. */ + unsigned alignment = LLVMGetAlignment(inst); + unsigned dw_size = align(ac_get_type_size(type) / 4, alignment); + private_mem_vgprs += dw_size; + } + bb = LLVMGetNextBasicBlock(bb); + } + + return private_mem_vgprs; +} + +bool +ac_init_llvm_compiler(struct ac_llvm_compiler *compiler, + enum radeon_family family, + enum ac_target_machine_options tm_options) +{ + const char *triple; + memset(compiler, 0, sizeof(*compiler)); + + compiler->tm = ac_create_target_machine(family, tm_options, + LLVMCodeGenLevelDefault, + &triple); + if (!compiler->tm) + return false; + + if (tm_options & AC_TM_CREATE_LOW_OPT) { + compiler->low_opt_tm = + ac_create_target_machine(family, tm_options, + LLVMCodeGenLevelLess, NULL); + if (!compiler->low_opt_tm) + goto fail; + } + + compiler->target_library_info = + ac_create_target_library_info(triple); + if (!compiler->target_library_info) + goto fail; + + compiler->passmgr = ac_create_passmgr(compiler->target_library_info, + tm_options & AC_TM_CHECK_IR); + if (!compiler->passmgr) + goto fail; + + return true; +fail: + ac_destroy_llvm_compiler(compiler); + return false; +} + +void +ac_destroy_llvm_compiler(struct ac_llvm_compiler *compiler) +{ + if (compiler->passmgr) + LLVMDisposePassManager(compiler->passmgr); + if (compiler->target_library_info) + ac_dispose_target_library_info(compiler->target_library_info); + if (compiler->low_opt_tm) + LLVMDisposeTargetMachine(compiler->low_opt_tm); + if (compiler->tm) + LLVMDisposeTargetMachine(compiler->tm); +}