ac,radeonsi: reduce optimizations for complex compute shaders on older APUs (v2)

author Marek Olšák <marek.olsak@amd.com>

Fri, 20 Jul 2018 02:55:49 +0000 (22:55 -0400)

committer Marek Olšák <marek.olsak@amd.com>

Wed, 1 Aug 2018 19:25:18 +0000 (15:25 -0400)
author Marek Olšák <marek.olsak@amd.com>
Fri, 20 Jul 2018 02:55:49 +0000 (22:55 -0400)
committer Marek Olšák <marek.olsak@amd.com>
Wed, 1 Aug 2018 19:25:18 +0000 (15:25 -0400)
diff --git a/src/amd/common/ac_llvm_util.c b/src/amd/common/ac_llvm_util.c

index 678bc34e6f8ff0aedffd73bbc2df24d1bf51a2f7..10e1ca99d41f2b13219ef589097aa0ca09c79cb2 100644 (file)
--- a/src/amd/common/ac_llvm_util.c
+++ b/src/amd/common/ac_llvm_util.c
@@ -142,6 +142,7 @@ const char *ac_get_llvm_processor_name(enum radeon_family family)
  
  static LLVMTargetMachineRef ac_create_target_machine(enum radeon_family family,
                                                      enum ac_target_machine_options tm_options,
+                                                    LLVMCodeGenOptLevel level,
                                                      const char **out_triple)
  {
         assert(family >= CHIP_TAHITI);
@@ -163,7 +164,7 @@ static LLVMTargetMachineRef ac_create_target_machine(enum radeon_family family,
                                      triple,
                                      ac_get_llvm_processor_name(family),
                                      features,
-                                    LLVMCodeGenLevelDefault,
+                                    level,
                                      LLVMRelocDefault,
                                      LLVMCodeModelDefault);
  
@@ -308,11 +309,20 @@ ac_init_llvm_compiler(struct ac_llvm_compiler *compiler,
         const char *triple;
         memset(compiler, 0, sizeof(*compiler));
  
-       compiler->tm = ac_create_target_machine(family,
-                                           tm_options, &triple);
+       compiler->tm = ac_create_target_machine(family, tm_options,
+                                               LLVMCodeGenLevelDefault,
+                                               &triple);
         if (!compiler->tm)
                 return false;
  
+       if (tm_options & AC_TM_CREATE_LOW_OPT) {
+               compiler->low_opt_tm =
+                       ac_create_target_machine(family, tm_options,
+                                                LLVMCodeGenLevelLess, NULL);
+               if (!compiler->low_opt_tm)
+                       goto fail;
+       }
+
         if (okay_to_leak_target_library_info || (HAVE_LLVM >= 0x0700)) {
                 compiler->target_library_info =
                         ac_create_target_library_info(triple);
@@ -341,6 +351,8 @@ ac_destroy_llvm_compiler(struct ac_llvm_compiler *compiler)
         if (compiler->target_library_info)
                 ac_dispose_target_library_info(compiler->target_library_info);
  #endif
+       if (compiler->low_opt_tm)
+               LLVMDisposeTargetMachine(compiler->low_opt_tm);
         if (compiler->tm)
                 LLVMDisposeTargetMachine(compiler->tm);
  }
diff --git a/src/amd/common/ac_llvm_util.h b/src/amd/common/ac_llvm_util.h

index d4dea4dfde6b19465b618dd17c06d93520d45490..eaf5f21876b99b610765566fff28b4f4500713db 100644 (file)
--- a/src/amd/common/ac_llvm_util.h
+++ b/src/amd/common/ac_llvm_util.h
@@ -64,6 +64,7 @@ enum ac_target_machine_options {
         AC_TM_PROMOTE_ALLOCA_TO_SCRATCH = (1 << 4),
         AC_TM_CHECK_IR = (1 << 5),
         AC_TM_ENABLE_GLOBAL_ISEL = (1 << 6),
+       AC_TM_CREATE_LOW_OPT = (1 << 7),
  };
  
  enum ac_float_mode {
@@ -74,10 +75,18 @@ enum ac_float_mode {
  
  /* Per-thread persistent LLVM objects. */
  struct ac_llvm_compiler {
-       LLVMTargetMachineRef            tm;
         LLVMTargetLibraryInfoRef        target_library_info;
         LLVMPassManagerRef              passmgr;
+
+       /* Default compiler. */
+       LLVMTargetMachineRef            tm;
         struct ac_compiler_passes       *passes;
+
+       /* Optional compiler for faster compilation with fewer optimizations.
+        * LLVM modules can be created with "tm" too. There is no difference.
+        */
+       LLVMTargetMachineRef            low_opt_tm; /* uses -O1 instead of -O2 */
+       struct ac_compiler_passes       *low_opt_passes;
  };
  
  const char *ac_get_llvm_processor_name(enum radeon_family family);
diff --git a/src/gallium/drivers/radeonsi/si_pipe.c b/src/gallium/drivers/radeonsi/si_pipe.c

index 9e3a579d7439900c37268845a771b68ea68a4b92..cc05d2f8de3b25a4e54f6a6af8038377be94401a 100644 (file)
--- a/src/gallium/drivers/radeonsi/si_pipe.c
+++ b/src/gallium/drivers/radeonsi/si_pipe.c
@@ -108,22 +108,32 @@ static const struct debug_named_value debug_options[] = {
  static void si_init_compiler(struct si_screen *sscreen,
                              struct ac_llvm_compiler *compiler)
  {
+       /* Only create the less-optimizing version of the compiler on APUs
+        * predating Ryzen (Raven). */
+       bool create_low_opt_compiler = !sscreen->info.has_dedicated_vram &&
+                                      sscreen->info.chip_class <= VI;
+
         enum ac_target_machine_options tm_options =
                 (sscreen->debug_flags & DBG(SI_SCHED) ? AC_TM_SISCHED : 0) |
                 (sscreen->debug_flags & DBG(GISEL) ? AC_TM_ENABLE_GLOBAL_ISEL : 0) |
                 (sscreen->info.chip_class >= GFX9 ? AC_TM_FORCE_ENABLE_XNACK : 0) |
                 (sscreen->info.chip_class < GFX9 ? AC_TM_FORCE_DISABLE_XNACK : 0) |
                 (!sscreen->llvm_has_working_vgpr_indexing ? AC_TM_PROMOTE_ALLOCA_TO_SCRATCH : 0) |
-               (sscreen->debug_flags & DBG(CHECK_IR) ? AC_TM_CHECK_IR : 0);
+               (sscreen->debug_flags & DBG(CHECK_IR) ? AC_TM_CHECK_IR : 0) |
+               (create_low_opt_compiler ? AC_TM_CREATE_LOW_OPT : 0);
  
         ac_init_llvm_once();
         ac_init_llvm_compiler(compiler, true, sscreen->info.family, tm_options);
         compiler->passes = ac_create_llvm_passes(compiler->tm);
+
+       if (compiler->low_opt_tm)
+               compiler->low_opt_passes = ac_create_llvm_passes(compiler->low_opt_tm);
  }
  
  static void si_destroy_compiler(struct ac_llvm_compiler *compiler)
  {
         ac_destroy_llvm_passes(compiler->passes);
+       ac_destroy_llvm_passes(compiler->low_opt_passes);
         ac_destroy_llvm_compiler(compiler);
  }
  
diff --git a/src/gallium/drivers/radeonsi/si_shader.c b/src/gallium/drivers/radeonsi/si_shader.c

index 43ba23ff49483aa89bad7a00bd10d5580f875a97..405833d3ba73e0afa2d4d2d6026044b2374460e7 100644 (file)
--- a/src/gallium/drivers/radeonsi/si_shader.c
+++ b/src/gallium/drivers/radeonsi/si_shader.c
@@ -5645,7 +5645,8 @@ static int si_compile_llvm(struct si_screen *sscreen,
                            LLVMModuleRef mod,
                            struct pipe_debug_callback *debug,
                            unsigned processor,
-                          const char *name)
+                          const char *name,
+                          bool less_optimized)
  {
         int r = 0;
         unsigned count = p_atomic_inc_return(&sscreen->num_compilations);
@@ -5667,7 +5668,8 @@ static int si_compile_llvm(struct si_screen *sscreen,
         }
  
         if (!si_replace_shader(count, binary)) {
-               r = si_llvm_compile(mod, binary, compiler, debug);
+               r = si_llvm_compile(mod, binary, compiler, debug,
+                                   less_optimized);
                 if (r)
                         return r;
         }
@@ -5884,7 +5886,7 @@ si_generate_gs_copy_shader(struct si_screen *sscreen,
                             &ctx.shader->config, ctx.compiler,
                             ctx.ac.module,
                             debug, PIPE_SHADER_GEOMETRY,
-                           "GS Copy Shader");
+                           "GS Copy Shader", false);
         if (!r) {
                 if (si_can_dump_shader(sscreen, PIPE_SHADER_GEOMETRY))
                         fprintf(stderr, "GS Copy Shader:\n");
@@ -6790,6 +6792,22 @@ static void si_build_wrapper_function(struct si_shader_context *ctx,
         LLVMBuildRetVoid(builder);
  }
  
+static bool si_should_optimize_less(struct ac_llvm_compiler *compiler,
+                                   struct si_shader_selector *sel)
+{
+       if (!compiler->low_opt_passes)
+               return false;
+
+       /* Assume a slow CPU. */
+       assert(!sel->screen->info.has_dedicated_vram &&
+              sel->screen->info.chip_class <= VI);
+
+       /* For a crazy dEQP test containing 2597 memory opcodes, mostly
+        * buffer stores. */
+       return sel->type == PIPE_SHADER_COMPUTE &&
+              sel->info.num_memory_instructions > 1000;
+}
+
  int si_compile_tgsi_shader(struct si_screen *sscreen,
                            struct ac_llvm_compiler *compiler,
                            struct si_shader *shader,
@@ -7022,7 +7040,8 @@ int si_compile_tgsi_shader(struct si_screen *sscreen,
  
         /* Compile to bytecode. */
         r = si_compile_llvm(sscreen, &shader->binary, &shader->config, compiler,
-                           ctx.ac.module, debug, ctx.type, "TGSI shader");
+                           ctx.ac.module, debug, ctx.type, "TGSI shader",
+                           si_should_optimize_less(compiler, shader->selector));
         si_llvm_dispose(&ctx);
         if (r) {
                 fprintf(stderr, "LLVM failed to compile shader\n");
@@ -7189,7 +7208,7 @@ si_get_shader_part(struct si_screen *sscreen,
         si_llvm_optimize_module(&ctx);
  
         if (si_compile_llvm(sscreen, &result->binary, &result->config, compiler,
-                           ctx.ac.module, debug, ctx.type, name)) {
+                           ctx.ac.module, debug, ctx.type, name, false)) {
                 FREE(result);
                 result = NULL;
                 goto out;
diff --git a/src/gallium/drivers/radeonsi/si_shader_internal.h b/src/gallium/drivers/radeonsi/si_shader_internal.h

index 21e325c2d82666390c6b8bf5b1edf2cbb1bf9934..36351391d95b676ac9cff71a9a15c57f77b16a36 100644 (file)
--- a/src/gallium/drivers/radeonsi/si_shader_internal.h
+++ b/src/gallium/drivers/radeonsi/si_shader_internal.h
@@ -217,7 +217,8 @@ si_shader_context_from_abi(struct ac_shader_abi *abi)
  
  unsigned si_llvm_compile(LLVMModuleRef M, struct ac_shader_binary *binary,
                          struct ac_llvm_compiler *compiler,
-                        struct pipe_debug_callback *debug);
+                        struct pipe_debug_callback *debug,
+                        bool less_optimized);
  
  LLVMTypeRef tgsi2llvmtype(struct lp_build_tgsi_context *bld_base,
                           enum tgsi_opcode_type type);
diff --git a/src/gallium/drivers/radeonsi/si_shader_tgsi_setup.c b/src/gallium/drivers/radeonsi/si_shader_tgsi_setup.c

index b486be257495d862763c63230086ef784866fdb4..b9ed0fc3ab00b9493835c2956f8dedde8a750c92 100644 (file)
--- a/src/gallium/drivers/radeonsi/si_shader_tgsi_setup.c
+++ b/src/gallium/drivers/radeonsi/si_shader_tgsi_setup.c
@@ -82,8 +82,12 @@ static void si_diagnostic_handler(LLVMDiagnosticInfoRef di, void *context)
   */
  unsigned si_llvm_compile(LLVMModuleRef M, struct ac_shader_binary *binary,
                          struct ac_llvm_compiler *compiler,
-                        struct pipe_debug_callback *debug)
+                        struct pipe_debug_callback *debug,
+                        bool less_optimized)
  {
+       struct ac_compiler_passes *passes =
+               less_optimized && compiler->low_opt_passes ?
+                       compiler->low_opt_passes : compiler->passes;
         struct si_llvm_diagnostics diag;
         LLVMContextRef llvm_ctx;
  
@@ -96,7 +100,7 @@ unsigned si_llvm_compile(LLVMModuleRef M, struct ac_shader_binary *binary,
         LLVMContextSetDiagnosticHandler(llvm_ctx, si_diagnostic_handler, &diag);
  
         /* Compile IR. */
-       if (!ac_compile_module_to_binary(compiler->passes, M, binary))
+       if (!ac_compile_module_to_binary(passes, M, binary))
                 diag.retval = 1;
  
         if (diag.retval != 0)
author	Marek Olšák <marek.olsak@amd.com>
	Fri, 20 Jul 2018 02:55:49 +0000 (22:55 -0400)
committer	Marek Olšák <marek.olsak@amd.com>
	Wed, 1 Aug 2018 19:25:18 +0000 (15:25 -0400)
src/amd/common/ac_llvm_util.c		patch \| blob \| history
src/amd/common/ac_llvm_util.h		patch \| blob \| history
src/gallium/drivers/radeonsi/si_pipe.c		patch \| blob \| history
src/gallium/drivers/radeonsi/si_shader.c		patch \| blob \| history
src/gallium/drivers/radeonsi/si_shader_internal.h		patch \| blob \| history
src/gallium/drivers/radeonsi/si_shader_tgsi_setup.c		patch \| blob \| history