*/
/* based on pieces from si_pipe.c and radeon_llvm_emit.c */
#include "ac_llvm_util.h"
+#include "ac_llvm_build.h"
#include "util/bitscan.h"
#include <llvm-c/Core.h>
#include <llvm-c/Support.h>
+#include <llvm-c/Transforms/IPO.h>
+#include <llvm-c/Transforms/Scalar.h>
+#include <llvm-c/Transforms/Utils.h>
#include "c11/threads.h"
+#include "gallivm/lp_bld_misc.h"
+#include "util/u_math.h"
#include <assert.h>
#include <stdio.h>
LLVMInitializeAMDGPUTargetMC();
LLVMInitializeAMDGPUAsmPrinter();
- /*
- * Workaround for bug in llvm 4.0 that causes image intrinsics
+ /* For inline assembly. */
+ LLVMInitializeAMDGPUAsmParser();
+
+ /* Workaround for bug in llvm 4.0 that causes image intrinsics
* to disappear.
* https://reviews.llvm.org/D26348
+ *
+ * "mesa" is the prefix for error messages.
+ *
+ * -global-isel-abort=2 is a no-op unless global isel has been enabled.
+ * This option tells the backend to fall-back to SelectionDAG and print
+ * a diagnostic message if global isel fails.
*/
-#if HAVE_LLVM >= 0x0400
- const char *argv[2] = {"mesa", "-simplifycfg-sink-common=false"};
- LLVMParseCommandLineOptions(2, argv, NULL);
-#endif
-
+ const char *argv[3] = { "mesa", "-simplifycfg-sink-common=false", "-global-isel-abort=2" };
+ LLVMParseCommandLineOptions(3, argv, NULL);
}
static once_flag ac_init_llvm_target_once_flag = ONCE_FLAG_INIT;
+void ac_init_llvm_once(void)
+{
+ call_once(&ac_init_llvm_target_once_flag, ac_init_llvm_target);
+}
+
static LLVMTargetRef ac_get_llvm_target(const char *triple)
{
LLVMTargetRef target = NULL;
char *err_message = NULL;
- call_once(&ac_init_llvm_target_once_flag, ac_init_llvm_target);
-
if (LLVMGetTargetFromTriple(triple, &target, &err_message)) {
fprintf(stderr, "Cannot find target for triple %s ", triple);
if (err_message) {
return target;
}
-static const char *ac_get_llvm_processor_name(enum radeon_family family)
+const char *ac_get_llvm_processor_name(enum radeon_family family)
{
switch (family) {
case CHIP_TAHITI:
return "polaris10";
case CHIP_POLARIS11:
case CHIP_POLARIS12:
+ case CHIP_VEGAM:
return "polaris11";
case CHIP_VEGA10:
- case CHIP_RAVEN:
return "gfx900";
+ case CHIP_RAVEN:
+ return "gfx902";
+ case CHIP_VEGA12:
+ return "gfx904";
+ case CHIP_VEGA20:
+ return "gfx906";
+ case CHIP_RAVEN2:
+ return HAVE_LLVM >= 0x0800 ? "gfx909" : "gfx902";
default:
return "";
}
}
-LLVMTargetMachineRef ac_create_target_machine(enum radeon_family family, bool supports_spill)
+static LLVMTargetMachineRef ac_create_target_machine(enum radeon_family family,
+ enum ac_target_machine_options tm_options,
+ LLVMCodeGenOptLevel level,
+ const char **out_triple)
{
assert(family >= CHIP_TAHITI);
-
- const char *triple = supports_spill ? "amdgcn-mesa-mesa3d" : "amdgcn--";
+ char features[256];
+ const char *triple = (tm_options & AC_TM_SUPPORTS_SPILL) ? "amdgcn-mesa-mesa3d" : "amdgcn--";
LLVMTargetRef target = ac_get_llvm_target(triple);
+
+ snprintf(features, sizeof(features),
+ "+DumpCode,-fp32-denormals,+fp64-denormals%s%s%s%s%s",
+ HAVE_LLVM >= 0x0800 ? "" : ",+vgpr-spilling",
+ tm_options & AC_TM_SISCHED ? ",+si-scheduler" : "",
+ tm_options & AC_TM_FORCE_ENABLE_XNACK ? ",+xnack" : "",
+ tm_options & AC_TM_FORCE_DISABLE_XNACK ? ",-xnack" : "",
+ tm_options & AC_TM_PROMOTE_ALLOCA_TO_SCRATCH ? ",-promote-alloca" : "");
+
LLVMTargetMachineRef tm = LLVMCreateTargetMachine(
target,
triple,
ac_get_llvm_processor_name(family),
- "+DumpCode,+vgpr-spilling,-fp32-denormals,-xnack",
- LLVMCodeGenLevelDefault,
+ features,
+ level,
LLVMRelocDefault,
LLVMCodeModelDefault);
+ if (out_triple)
+ *out_triple = triple;
+ if (tm_options & AC_TM_ENABLE_GLOBAL_ISEL)
+ ac_enable_global_isel(tm);
return tm;
}
-
-#if HAVE_LLVM < 0x0400
-static LLVMAttribute ac_attr_to_llvm_attr(enum ac_func_attr attr)
+static LLVMPassManagerRef ac_create_passmgr(LLVMTargetLibraryInfoRef target_library_info,
+ bool check_ir)
{
- switch (attr) {
- case AC_FUNC_ATTR_ALWAYSINLINE: return LLVMAlwaysInlineAttribute;
- case AC_FUNC_ATTR_BYVAL: return LLVMByValAttribute;
- case AC_FUNC_ATTR_INREG: return LLVMInRegAttribute;
- case AC_FUNC_ATTR_NOALIAS: return LLVMNoAliasAttribute;
- case AC_FUNC_ATTR_NOUNWIND: return LLVMNoUnwindAttribute;
- case AC_FUNC_ATTR_READNONE: return LLVMReadNoneAttribute;
- case AC_FUNC_ATTR_READONLY: return LLVMReadOnlyAttribute;
- default:
- fprintf(stderr, "Unhandled function attribute: %x\n", attr);
- return 0;
- }
-}
+ LLVMPassManagerRef passmgr = LLVMCreatePassManager();
+ if (!passmgr)
+ return NULL;
+
+ if (target_library_info)
+ LLVMAddTargetLibraryInfo(target_library_info,
+ passmgr);
-#else
+ if (check_ir)
+ LLVMAddVerifierPass(passmgr);
+ LLVMAddAlwaysInlinerPass(passmgr);
+ /* Normally, the pass manager runs all passes on one function before
+ * moving onto another. Adding a barrier no-op pass forces the pass
+ * manager to run the inliner on all functions first, which makes sure
+ * that the following passes are only run on the remaining non-inline
+ * function, so it removes useless work done on dead inline functions.
+ */
+ ac_llvm_add_barrier_noop_pass(passmgr);
+ /* This pass should eliminate all the load and store instructions. */
+ LLVMAddPromoteMemoryToRegisterPass(passmgr);
+ LLVMAddScalarReplAggregatesPass(passmgr);
+ LLVMAddLICMPass(passmgr);
+ LLVMAddAggressiveDCEPass(passmgr);
+ LLVMAddCFGSimplificationPass(passmgr);
+ /* This is recommended by the instruction combining pass. */
+ LLVMAddEarlyCSEMemSSAPass(passmgr);
+ LLVMAddInstructionCombiningPass(passmgr);
+ return passmgr;
+}
static const char *attr_to_str(enum ac_func_attr attr)
{
switch (attr) {
case AC_FUNC_ATTR_ALWAYSINLINE: return "alwaysinline";
- case AC_FUNC_ATTR_BYVAL: return "byval";
case AC_FUNC_ATTR_INREG: return "inreg";
case AC_FUNC_ATTR_NOALIAS: return "noalias";
case AC_FUNC_ATTR_NOUNWIND: return "nounwind";
}
}
-#endif
-
void
ac_add_function_attr(LLVMContextRef ctx, LLVMValueRef function,
int attr_idx, enum ac_func_attr attr)
{
-#if HAVE_LLVM < 0x0400
- LLVMAttribute llvm_attr = ac_attr_to_llvm_attr(attr);
- if (attr_idx == -1) {
- LLVMAddFunctionAttr(function, llvm_attr);
- } else {
- LLVMAddAttribute(LLVMGetParam(function, attr_idx - 1), llvm_attr);
- }
-#else
const char *attr_name = attr_to_str(attr);
unsigned kind_id = LLVMGetEnumAttributeKindForName(attr_name,
strlen(attr_name));
LLVMAddAttributeAtIndex(function, attr_idx, llvm_attr);
else
LLVMAddCallSiteAttribute(function, attr_idx, llvm_attr);
-#endif
}
void ac_add_func_attributes(LLVMContextRef ctx, LLVMValueRef function,
void
ac_llvm_add_target_dep_function_attr(LLVMValueRef F,
- const char *name, int value)
+ const char *name, unsigned value)
{
char str[16];
- snprintf(str, sizeof(str), "%i", value);
+ snprintf(str, sizeof(str), "0x%x", value);
LLVMAddTargetDependentFunctionAttr(F, name, str);
}
+
+unsigned
+ac_count_scratch_private_memory(LLVMValueRef function)
+{
+ unsigned private_mem_vgprs = 0;
+
+ /* Process all LLVM instructions. */
+ LLVMBasicBlockRef bb = LLVMGetFirstBasicBlock(function);
+ while (bb) {
+ LLVMValueRef next = LLVMGetFirstInstruction(bb);
+
+ while (next) {
+ LLVMValueRef inst = next;
+ next = LLVMGetNextInstruction(next);
+
+ if (LLVMGetInstructionOpcode(inst) != LLVMAlloca)
+ continue;
+
+ LLVMTypeRef type = LLVMGetElementType(LLVMTypeOf(inst));
+ /* No idea why LLVM aligns allocas to 4 elements. */
+ unsigned alignment = LLVMGetAlignment(inst);
+ unsigned dw_size = align(ac_get_type_size(type) / 4, alignment);
+ private_mem_vgprs += dw_size;
+ }
+ bb = LLVMGetNextBasicBlock(bb);
+ }
+
+ return private_mem_vgprs;
+}
+
+bool
+ac_init_llvm_compiler(struct ac_llvm_compiler *compiler,
+ enum radeon_family family,
+ enum ac_target_machine_options tm_options)
+{
+ const char *triple;
+ memset(compiler, 0, sizeof(*compiler));
+
+ compiler->tm = ac_create_target_machine(family, tm_options,
+ LLVMCodeGenLevelDefault,
+ &triple);
+ if (!compiler->tm)
+ return false;
+
+ if (tm_options & AC_TM_CREATE_LOW_OPT) {
+ compiler->low_opt_tm =
+ ac_create_target_machine(family, tm_options,
+ LLVMCodeGenLevelLess, NULL);
+ if (!compiler->low_opt_tm)
+ goto fail;
+ }
+
+ compiler->target_library_info =
+ ac_create_target_library_info(triple);
+ if (!compiler->target_library_info)
+ goto fail;
+
+ compiler->passmgr = ac_create_passmgr(compiler->target_library_info,
+ tm_options & AC_TM_CHECK_IR);
+ if (!compiler->passmgr)
+ goto fail;
+
+ return true;
+fail:
+ ac_destroy_llvm_compiler(compiler);
+ return false;
+}
+
+void
+ac_destroy_llvm_compiler(struct ac_llvm_compiler *compiler)
+{
+ if (compiler->passmgr)
+ LLVMDisposePassManager(compiler->passmgr);
+ if (compiler->target_library_info)
+ ac_dispose_target_library_info(compiler->target_library_info);
+ if (compiler->low_opt_tm)
+ LLVMDisposeTargetMachine(compiler->low_opt_tm);
+ if (compiler->tm)
+ LLVMDisposeTargetMachine(compiler->tm);
+}