snprintf(features, sizeof(features),
"+DumpCode,-fp32-denormals,+fp64-denormals%s%s%s%s%s%s%s",
HAVE_LLVM >= 0x0800 ? "" : ",+vgpr-spilling",
- family >= CHIP_NAVI10 ? ",+wavefrontsize64,-wavefrontsize32" : "",
+ family >= CHIP_NAVI10 && !(tm_options & AC_TM_WAVE32) ?
+ ",+wavefrontsize64,-wavefrontsize32" : "",
tm_options & AC_TM_SISCHED ? ",+si-scheduler" : "",
tm_options & AC_TM_FORCE_ENABLE_XNACK ? ",+xnack" : "",
tm_options & AC_TM_FORCE_DISABLE_XNACK ? ",-xnack" : "",
goto fail;
}
+ if (family >= CHIP_NAVI10) {
+ assert(!(tm_options & AC_TM_CREATE_LOW_OPT));
+ compiler->tm_wave32 = ac_create_target_machine(family,
+ tm_options | AC_TM_WAVE32,
+ LLVMCodeGenLevelDefault,
+ NULL);
+ if (!compiler->tm_wave32)
+ goto fail;
+ }
+
compiler->target_library_info =
ac_create_target_library_info(triple);
if (!compiler->target_library_info)
ac_destroy_llvm_compiler(struct ac_llvm_compiler *compiler)
{
ac_destroy_llvm_passes(compiler->passes);
+ ac_destroy_llvm_passes(compiler->passes_wave32);
ac_destroy_llvm_passes(compiler->low_opt_passes);
if (compiler->passmgr)
LLVMDisposeTargetMachine(compiler->low_opt_tm);
if (compiler->tm)
LLVMDisposeTargetMachine(compiler->tm);
+ if (compiler->tm_wave32)
+ LLVMDisposeTargetMachine(compiler->tm_wave32);
}
AC_TM_ENABLE_GLOBAL_ISEL = (1 << 6),
AC_TM_CREATE_LOW_OPT = (1 << 7),
AC_TM_NO_LOAD_STORE_OPT = (1 << 8),
+ AC_TM_WAVE32 = (1 << 9),
};
enum ac_float_mode {
LLVMTargetMachineRef tm;
struct ac_compiler_passes *passes;
+ /* Wave32 compiler for GFX10. */
+ LLVMTargetMachineRef tm_wave32;
+ struct ac_compiler_passes *passes_wave32;
+
/* Optional compiler for faster compilation with fewer optimizations.
* LLVM modules can be created with "tm" too. There is no difference.
*/