ac: add ac_count_scratch_private_memory()
authorSamuel Pitoiset <samuel.pitoiset@gmail.com>
Thu, 1 Mar 2018 21:12:54 +0000 (22:12 +0100)
committerSamuel Pitoiset <samuel.pitoiset@gmail.com>
Tue, 6 Mar 2018 09:38:38 +0000 (10:38 +0100)
Imported from RadeonSI.

Signed-off-by: Samuel Pitoiset <samuel.pitoiset@gmail.com>
Reviewed-by: Dave Airlie <airlied@redhat.com>
src/amd/common/ac_llvm_util.c
src/amd/common/ac_llvm_util.h
src/gallium/drivers/radeonsi/si_shader.c

index b88c4e4979f51fbc32d7e93d7fac570dd9015f85..3530bf088bef9fd28fcf2fa2be383294d7a79808 100644 (file)
  */
 /* based on pieces from si_pipe.c and radeon_llvm_emit.c */
 #include "ac_llvm_util.h"
+#include "ac_llvm_build.h"
 #include "util/bitscan.h"
 #include <llvm-c/Core.h>
 #include <llvm-c/Support.h>
 #include "c11/threads.h"
+#include "util/u_math.h"
 
 #include <assert.h>
 #include <stdio.h>
@@ -207,3 +209,32 @@ ac_llvm_add_target_dep_function_attr(LLVMValueRef F,
        snprintf(str, sizeof(str), "%i", value);
        LLVMAddTargetDependentFunctionAttr(F, name, str);
 }
+
+unsigned
+ac_count_scratch_private_memory(LLVMValueRef function)
+{
+       unsigned private_mem_vgprs = 0;
+
+       /* Process all LLVM instructions. */
+       LLVMBasicBlockRef bb = LLVMGetFirstBasicBlock(function);
+       while (bb) {
+               LLVMValueRef next = LLVMGetFirstInstruction(bb);
+
+               while (next) {
+                       LLVMValueRef inst = next;
+                       next = LLVMGetNextInstruction(next);
+
+                       if (LLVMGetInstructionOpcode(inst) != LLVMAlloca)
+                               continue;
+
+                       LLVMTypeRef type = LLVMGetElementType(LLVMTypeOf(inst));
+                       /* No idea why LLVM aligns allocas to 4 elements. */
+                       unsigned alignment = LLVMGetAlignment(inst);
+                       unsigned dw_size = align(ac_get_type_size(type) / 4, alignment);
+                       private_mem_vgprs += dw_size;
+               }
+               bb = LLVMGetNextBasicBlock(bb);
+       }
+
+       return private_mem_vgprs;
+}
index 3cf385a33edeb5606155c6f8e9081d30ddbb6f08..5329bb1b702f9f6c35f102800118ab901202c0c0 100644 (file)
@@ -105,6 +105,9 @@ ac_get_store_intr_attribs(bool writeonly_memory)
                                  AC_FUNC_ATTR_WRITEONLY;
 }
 
+unsigned
+ac_count_scratch_private_memory(LLVMValueRef function);
+
 #ifdef __cplusplus
 }
 #endif
index 2ae2544e3f71aa549fc1a621e8ac42bce649bd66..2e57eca6e54157b461121a4a14c1d0ddc4c62b7e 100644 (file)
@@ -5981,32 +5981,6 @@ static void si_optimize_vs_outputs(struct si_shader_context *ctx)
                               &shader->info.nr_param_exports);
 }
 
-static void si_count_scratch_private_memory(struct si_shader_context *ctx)
-{
-       ctx->shader->config.private_mem_vgprs = 0;
-
-       /* Process all LLVM instructions. */
-       LLVMBasicBlockRef bb = LLVMGetFirstBasicBlock(ctx->main_fn);
-       while (bb) {
-               LLVMValueRef next = LLVMGetFirstInstruction(bb);
-
-               while (next) {
-                       LLVMValueRef inst = next;
-                       next = LLVMGetNextInstruction(next);
-
-                       if (LLVMGetInstructionOpcode(inst) != LLVMAlloca)
-                               continue;
-
-                       LLVMTypeRef type = LLVMGetElementType(LLVMTypeOf(inst));
-                       /* No idea why LLVM aligns allocas to 4 elements. */
-                       unsigned alignment = LLVMGetAlignment(inst);
-                       unsigned dw_size = align(ac_get_type_size(type) / 4, alignment);
-                       ctx->shader->config.private_mem_vgprs += dw_size;
-               }
-               bb = LLVMGetNextBasicBlock(bb);
-       }
-}
-
 static void si_init_exec_from_input(struct si_shader_context *ctx,
                                    unsigned param, unsigned bitoffset)
 {
@@ -6953,8 +6927,10 @@ int si_compile_tgsi_shader(struct si_screen *sscreen,
        si_optimize_vs_outputs(&ctx);
 
        if ((debug && debug->debug_message) ||
-           si_can_dump_shader(sscreen, ctx.type))
-               si_count_scratch_private_memory(&ctx);
+           si_can_dump_shader(sscreen, ctx.type)) {
+               ctx.shader->config.private_mem_vgprs =
+                       ac_count_scratch_private_memory(ctx.main_fn);
+       }
 
        /* Compile to bytecode. */
        r = si_compile_llvm(sscreen, &shader->binary, &shader->config, tm,