ac: add Wave32 LLVM target machine
authorMarek Olšák <marek.olsak@amd.com>
Fri, 12 Jul 2019 21:14:11 +0000 (17:14 -0400)
committerMarek Olšák <marek.olsak@amd.com>
Sat, 20 Jul 2019 00:16:19 +0000 (20:16 -0400)
Reviewed-by: Samuel Pitoiset <samuel.pitoiset@gmail.com>
src/amd/common/ac_llvm_util.c
src/amd/common/ac_llvm_util.h

index 1cf51b79c765c662374c76fdc4a4e943db54f13a..e4a353a4967f862ca74467440eaf3a187d366699 100644 (file)
@@ -157,7 +157,8 @@ static LLVMTargetMachineRef ac_create_target_machine(enum radeon_family family,
        snprintf(features, sizeof(features),
                 "+DumpCode,-fp32-denormals,+fp64-denormals%s%s%s%s%s%s%s",
                 HAVE_LLVM >= 0x0800 ? "" : ",+vgpr-spilling",
-                family >= CHIP_NAVI10 ? ",+wavefrontsize64,-wavefrontsize32" : "",
+                family >= CHIP_NAVI10 && !(tm_options & AC_TM_WAVE32) ?
+                        ",+wavefrontsize64,-wavefrontsize32" : "",
                 tm_options & AC_TM_SISCHED ? ",+si-scheduler" : "",
                 tm_options & AC_TM_FORCE_ENABLE_XNACK ? ",+xnack" : "",
                 tm_options & AC_TM_FORCE_DISABLE_XNACK ? ",-xnack" : "",
@@ -337,6 +338,16 @@ ac_init_llvm_compiler(struct ac_llvm_compiler *compiler,
                        goto fail;
        }
 
+       if (family >= CHIP_NAVI10) {
+               assert(!(tm_options & AC_TM_CREATE_LOW_OPT));
+               compiler->tm_wave32 = ac_create_target_machine(family,
+                                                              tm_options | AC_TM_WAVE32,
+                                                              LLVMCodeGenLevelDefault,
+                                                              NULL);
+               if (!compiler->tm_wave32)
+                       goto fail;
+       }
+
        compiler->target_library_info =
                ac_create_target_library_info(triple);
        if (!compiler->target_library_info)
@@ -357,6 +368,7 @@ void
 ac_destroy_llvm_compiler(struct ac_llvm_compiler *compiler)
 {
        ac_destroy_llvm_passes(compiler->passes);
+       ac_destroy_llvm_passes(compiler->passes_wave32);
        ac_destroy_llvm_passes(compiler->low_opt_passes);
 
        if (compiler->passmgr)
@@ -367,4 +379,6 @@ ac_destroy_llvm_compiler(struct ac_llvm_compiler *compiler)
                LLVMDisposeTargetMachine(compiler->low_opt_tm);
        if (compiler->tm)
                LLVMDisposeTargetMachine(compiler->tm);
+       if (compiler->tm_wave32)
+               LLVMDisposeTargetMachine(compiler->tm_wave32);
 }
index 0c900885de5ce827397a47c67b82a6d4fd7a80ac..8209b6dcabcecb8081089f6bc3c1b3b0b29977d6 100644 (file)
@@ -65,6 +65,7 @@ enum ac_target_machine_options {
        AC_TM_ENABLE_GLOBAL_ISEL = (1 << 6),
        AC_TM_CREATE_LOW_OPT = (1 << 7),
        AC_TM_NO_LOAD_STORE_OPT = (1 << 8),
+       AC_TM_WAVE32 = (1 << 9),
 };
 
 enum ac_float_mode {
@@ -82,6 +83,10 @@ struct ac_llvm_compiler {
        LLVMTargetMachineRef            tm;
        struct ac_compiler_passes       *passes;
 
+       /* Wave32 compiler for GFX10. */
+       LLVMTargetMachineRef            tm_wave32;
+       struct ac_compiler_passes       *passes_wave32;
+
        /* Optional compiler for faster compilation with fewer optimizations.
         * LLVM modules can be created with "tm" too. There is no difference.
         */