ac: initial Wave32 support in LLVM build helpers

[mesa.git] / src / gallium / drivers / radeonsi / si_shader_tgsi_setup.c
diff --git a/src/gallium/drivers/radeonsi/si_shader_tgsi_setup.c b/src/gallium/drivers/radeonsi/si_shader_tgsi_setup.c

index 90cc2e0d98196a387f3728559c0b33ee94ac022b..9a9f3d63cc6c6bf64c5d424749df29599934eab8 100644 (file)
--- a/src/gallium/drivers/radeonsi/si_shader_tgsi_setup.c
+++ b/src/gallium/drivers/radeonsi/si_shader_tgsi_setup.c
@@ -1,5 +1,6 @@
  /*
   * Copyright 2016 Advanced Micro Devices, Inc.
+ * All Rights Reserved.
   *
   * Permission is hereby granted, free of charge, to any person obtaining a
   * copy of this software and associated documentation files (the "Software"),
@@ -23,23 +24,8 @@
  
  #include "si_shader_internal.h"
  #include "si_pipe.h"
-
-#include "gallivm/lp_bld_const.h"
-#include "gallivm/lp_bld_gather.h"
-#include "gallivm/lp_bld_flow.h"
-#include "gallivm/lp_bld_init.h"
-#include "gallivm/lp_bld_intr.h"
-#include "gallivm/lp_bld_misc.h"
-#include "gallivm/lp_bld_swizzle.h"
-#include "tgsi/tgsi_info.h"
-#include "tgsi/tgsi_parse.h"
-#include "util/u_math.h"
+#include "ac_llvm_util.h"
  #include "util/u_memory.h"
-#include "util/u_debug.h"
-
-#include <stdio.h>
-#include <llvm-c/Transforms/IPO.h>
-#include <llvm-c/Transforms/Scalar.h>
  
  enum si_llvm_calling_convention {
         RADEON_LLVM_AMDGPU_VS = 87,
@@ -94,17 +80,16 @@ static void si_diagnostic_handler(LLVMDiagnosticInfoRef di, void *context)
   *
   * @returns 0 for success, 1 for failure
   */
-unsigned si_llvm_compile(LLVMModuleRef M, struct ac_shader_binary *binary,
-                        LLVMTargetMachineRef tm,
-                        struct pipe_debug_callback *debug)
+unsigned si_llvm_compile(LLVMModuleRef M, struct si_shader_binary *binary,
+                        struct ac_llvm_compiler *compiler,
+                        struct pipe_debug_callback *debug,
+                        bool less_optimized)
  {
+       struct ac_compiler_passes *passes =
+               less_optimized && compiler->low_opt_passes ?
+                       compiler->low_opt_passes : compiler->passes;
         struct si_llvm_diagnostics diag;
-       char *err;
         LLVMContextRef llvm_ctx;
-       LLVMMemoryBufferRef out_buffer;
-       unsigned buffer_size;
-       const char *buffer_data;
-       LLVMBool mem_err;
  
         diag.debug = debug;
         diag.retval = 0;
@@ -114,38 +99,25 @@ unsigned si_llvm_compile(LLVMModuleRef M, struct ac_shader_binary *binary,
  
         LLVMContextSetDiagnosticHandler(llvm_ctx, si_diagnostic_handler, &diag);
  
-       /* Compile IR*/
-       mem_err = LLVMTargetMachineEmitToMemoryBuffer(tm, M, LLVMObjectFile, &err,
-                                                                &out_buffer);
-
-       /* Process Errors/Warnings */
-       if (mem_err) {
-               fprintf(stderr, "%s: %s", __FUNCTION__, err);
-               pipe_debug_message(debug, SHADER_INFO,
-                                  "LLVM emit error: %s", err);
-               FREE(err);
+       /* Compile IR. */
+       if (!ac_compile_module_to_elf(passes, M, (char **)&binary->elf_buffer,
+                                     &binary->elf_size))
                 diag.retval = 1;
-               goto out;
-       }
  
-       /* Extract Shader Code*/
-       buffer_size = LLVMGetBufferSize(out_buffer);
-       buffer_data = LLVMGetBufferStart(out_buffer);
-
-       if (!ac_elf_read(buffer_data, buffer_size, binary)) {
-               fprintf(stderr, "radeonsi: cannot read an ELF shader binary\n");
-               diag.retval = 1;
-       }
-
-       /* Clean up */
-       LLVMDisposeMemoryBuffer(out_buffer);
-
-out:
         if (diag.retval != 0)
                 pipe_debug_message(debug, SHADER_INFO, "LLVM compile failed");
         return diag.retval;
  }
  
+void si_shader_binary_clean(struct si_shader_binary *binary)
+{
+       free((void *)binary->elf_buffer);
+       binary->elf_buffer = NULL;
+
+       free(binary->llvm_ir_string);
+       binary->llvm_ir_string = NULL;
+}
+
  LLVMTypeRef tgsi2llvmtype(struct lp_build_tgsi_context *bld_base,
                           enum tgsi_opcode_type type)
  {
@@ -192,7 +164,7 @@ LLVMValueRef si_llvm_bound_index(struct si_shader_context *ctx,
         LLVMValueRef c_max = LLVMConstInt(ctx->i32, num - 1, 0);
         LLVMValueRef cc;
  
-       if (util_is_power_of_two(num)) {
+       if (util_is_power_of_two_or_zero(num)) {
                 index = LLVMBuildAnd(builder, index, c_max, "");
         } else {
                 /* In theory, this MAX pattern should result in code that is
@@ -290,7 +262,6 @@ get_pointer_into_array(struct si_shader_context *ctx,
  {
         unsigned array_id;
         struct tgsi_array_info *array;
-       LLVMBuilderRef builder = ctx->ac.builder;
         LLVMValueRef idxs[2];
         LLVMValueRef index;
         LLVMValueRef alloca;
@@ -328,15 +299,10 @@ get_pointer_into_array(struct si_shader_context *ctx,
          */
         index = si_llvm_bound_index(ctx, index, array->range.Last - array->range.First + 1);
  
-       index = LLVMBuildMul(
-               builder, index,
-               LLVMConstInt(ctx->i32, util_bitcount(array->writemask), 0),
-               "");
-       index = LLVMBuildAdd(
-               builder, index,
-               LLVMConstInt(ctx->i32,
-                            util_bitcount(array->writemask & ((1 << swizzle) - 1)), 0),
-               "");
+       index = ac_build_imad(&ctx->ac, index,
+                             LLVMConstInt(ctx->i32, util_bitcount(array->writemask), 0),
+                             LLVMConstInt(ctx->i32,
+                                          util_bitcount(array->writemask & ((1 << swizzle) - 1)), 0));
         idxs[0] = ctx->i32_0;
         idxs[1] = index;
         return LLVMBuildGEP(ctx->ac.builder, alloca, idxs, 2, "");
@@ -349,18 +315,11 @@ si_llvm_emit_fetch_64bit(struct lp_build_tgsi_context *bld_base,
                          LLVMValueRef ptr2)
  {
         struct si_shader_context *ctx = si_shader_context(bld_base);
-       LLVMValueRef result;
-
-       result = LLVMGetUndef(LLVMVectorType(ctx->i32, 2));
-
-       result = LLVMBuildInsertElement(ctx->ac.builder,
-                                       result,
-                                       ac_to_integer(&ctx->ac, ptr),
-                                       ctx->i32_0, "");
-       result = LLVMBuildInsertElement(ctx->ac.builder,
-                                       result,
-                                       ac_to_integer(&ctx->ac, ptr2),
-                                       ctx->i32_1, "");
+       LLVMValueRef values[2] = {
+               ac_to_integer(&ctx->ac, ptr),
+               ac_to_integer(&ctx->ac, ptr2),
+       };
+       LLVMValueRef result = ac_build_gather_values(&ctx->ac, values, 2);
         return LLVMBuildBitCast(ctx->ac.builder, result, type, "");
  }
  
@@ -368,18 +327,21 @@ static LLVMValueRef
  emit_array_fetch(struct lp_build_tgsi_context *bld_base,
                  unsigned File, enum tgsi_opcode_type type,
                  struct tgsi_declaration_range range,
-                unsigned swizzle)
+                unsigned swizzle_in)
  {
         struct si_shader_context *ctx = si_shader_context(bld_base);
         unsigned i, size = range.Last - range.First + 1;
         LLVMTypeRef vec = LLVMVectorType(tgsi2llvmtype(bld_base, type), size);
         LLVMValueRef result = LLVMGetUndef(vec);
-
+       unsigned swizzle = swizzle_in;
         struct tgsi_full_src_register tmp_reg = {};
         tmp_reg.Register.File = File;
+       if (tgsi_type_is_64bit(type))
+               swizzle |= (swizzle_in + 1) << 16;
  
         for (i = 0; i < size; ++i) {
                 tmp_reg.Register.Index = i + range.First;
+
                 LLVMValueRef temp = si_llvm_emit_fetch(bld_base, &tmp_reg, type, swizzle);
                 result = LLVMBuildInsertElement(ctx->ac.builder, result, temp,
                         LLVMConstInt(ctx->i32, i, 0), "array_vector");
@@ -496,19 +458,20 @@ get_output_ptr(struct lp_build_tgsi_context *bld_base, unsigned index,
  LLVMValueRef si_llvm_emit_fetch(struct lp_build_tgsi_context *bld_base,
                                 const struct tgsi_full_src_register *reg,
                                 enum tgsi_opcode_type type,
-                               unsigned swizzle)
+                               unsigned swizzle_in)
  {
         struct si_shader_context *ctx = si_shader_context(bld_base);
         LLVMBuilderRef builder = ctx->ac.builder;
         LLVMValueRef result = NULL, ptr, ptr2;
+       unsigned swizzle = swizzle_in & 0xffff;
  
-       if (swizzle == ~0) {
+       if (swizzle_in == ~0) {
                 LLVMValueRef values[TGSI_NUM_CHANNELS];
                 unsigned chan;
                 for (chan = 0; chan < TGSI_NUM_CHANNELS; chan++) {
                         values[chan] = si_llvm_emit_fetch(bld_base, reg, type, chan);
                 }
-               return lp_build_gather_values(&ctx->gallivm, values,
+               return ac_build_gather_values(&ctx->ac, values,
                                               TGSI_NUM_CHANNELS);
         }
  
@@ -527,7 +490,7 @@ LLVMValueRef si_llvm_emit_fetch(struct lp_build_tgsi_context *bld_base,
                                                         ctx->imms[reg->Register.Index * TGSI_NUM_CHANNELS + swizzle],
                                                         ctx->i32_0);
                         result = LLVMConstInsertElement(result,
-                                                       ctx->imms[reg->Register.Index * TGSI_NUM_CHANNELS + swizzle + 1],
+                                                       ctx->imms[reg->Register.Index * TGSI_NUM_CHANNELS + (swizzle_in >> 16)],
                                                         ctx->i32_1);
                         return LLVMConstBitCast(result, ctype);
                 } else {
@@ -554,7 +517,7 @@ LLVMValueRef si_llvm_emit_fetch(struct lp_build_tgsi_context *bld_base,
  
                 if (tgsi_type_is_64bit(type)) {
                         ptr = result;
-                       ptr2 = input[swizzle + 1];
+                       ptr2 = input[swizzle_in >> 16];
                         return si_llvm_emit_fetch_64bit(bld_base, tgsi2llvmtype(bld_base, type),
                                                         ptr, ptr2);
                 }
@@ -566,7 +529,7 @@ LLVMValueRef si_llvm_emit_fetch(struct lp_build_tgsi_context *bld_base,
                         return LLVMGetUndef(tgsi2llvmtype(bld_base, type));
                 ptr = ctx->temps[reg->Register.Index * TGSI_NUM_CHANNELS + swizzle];
                 if (tgsi_type_is_64bit(type)) {
-                       ptr2 = ctx->temps[reg->Register.Index * TGSI_NUM_CHANNELS + swizzle + 1];
+                       ptr2 = ctx->temps[reg->Register.Index * TGSI_NUM_CHANNELS + (swizzle_in >> 16)];
                         return si_llvm_emit_fetch_64bit(bld_base, tgsi2llvmtype(bld_base, type),
                                                         LLVMBuildLoad(builder, ptr, ""),
                                                         LLVMBuildLoad(builder, ptr2, ""));
@@ -577,7 +540,7 @@ LLVMValueRef si_llvm_emit_fetch(struct lp_build_tgsi_context *bld_base,
         case TGSI_FILE_OUTPUT:
                 ptr = get_output_ptr(bld_base, reg->Register.Index, swizzle);
                 if (tgsi_type_is_64bit(type)) {
-                       ptr2 = get_output_ptr(bld_base, reg->Register.Index, swizzle + 1);
+                       ptr2 = get_output_ptr(bld_base, reg->Register.Index, (swizzle_in >> 16));
                         return si_llvm_emit_fetch_64bit(bld_base, tgsi2llvmtype(bld_base, type),
                                                         LLVMBuildLoad(builder, ptr, ""),
                                                         LLVMBuildLoad(builder, ptr2, ""));
@@ -595,11 +558,12 @@ LLVMValueRef si_llvm_emit_fetch(struct lp_build_tgsi_context *bld_base,
  static LLVMValueRef fetch_system_value(struct lp_build_tgsi_context *bld_base,
                                        const struct tgsi_full_src_register *reg,
                                        enum tgsi_opcode_type type,
-                                      unsigned swizzle)
+                                      unsigned swizzle_in)
  {
         struct si_shader_context *ctx = si_shader_context(bld_base);
         LLVMBuilderRef builder = ctx->ac.builder;
         LLVMValueRef cval = ctx->system_values[reg->Register.Index];
+       unsigned swizzle = swizzle_in & 0xffff;
  
         if (tgsi_type_is_64bit(type)) {
                 LLVMValueRef lo, hi;
@@ -609,7 +573,7 @@ static LLVMValueRef fetch_system_value(struct lp_build_tgsi_context *bld_base,
                 lo = LLVMBuildExtractElement(
                         builder, cval, LLVMConstInt(ctx->i32, swizzle, 0), "");
                 hi = LLVMBuildExtractElement(
-                       builder, cval, LLVMConstInt(ctx->i32, swizzle + 1, 0), "");
+                       builder, cval, LLVMConstInt(ctx->i32, (swizzle_in >> 16), 0), "");
  
                 return si_llvm_emit_fetch_64bit(bld_base, tgsi2llvmtype(bld_base, type),
                                                 lo, hi);
@@ -638,9 +602,8 @@ static void emit_declaration(struct lp_build_tgsi_context *bld_base,
                 for (idx = decl->Range.First; idx <= decl->Range.Last; idx++) {
                         unsigned chan;
                         for (chan = 0; chan < TGSI_NUM_CHANNELS; chan++) {
-                                ctx->addrs[idx][chan] = lp_build_alloca_undef(
-                                       &ctx->gallivm,
-                                       ctx->i32, "");
+                                ctx->addrs[idx][chan] = ac_build_alloca_undef(
+                                       &ctx->ac, ctx->i32, "");
                         }
                 }
                 break;
@@ -685,7 +648,7 @@ static void emit_declaration(struct lp_build_tgsi_context *bld_base,
                          */
                         if (array_size > 16 ||
                             !ctx->screen->llvm_has_working_vgpr_indexing) {
-                               array_alloca = lp_build_alloca_undef(&ctx->gallivm,
+                               array_alloca = ac_build_alloca_undef(&ctx->ac,
                                         LLVMArrayType(ctx->f32,
                                                       array_size), "array");
                                 ctx->temp_array_allocas[id] = array_alloca;
@@ -698,12 +661,12 @@ static void emit_declaration(struct lp_build_tgsi_context *bld_base,
                 }
                 if (!array_alloca) {
                         for (i = 0; i < decl_size; ++i) {
-#ifdef DEBUG
+#ifndef NDEBUG
                                 snprintf(name, sizeof(name), "TEMP%d.%c",
                                          first + i / 4, "xyzw"[i % 4]);
  #endif
                                 ctx->temps[first * TGSI_NUM_CHANNELS + i] =
-                                       lp_build_alloca_undef(&ctx->gallivm,
+                                       ac_build_alloca_undef(&ctx->ac,
                                                               ctx->f32,
                                                               name);
                         }
@@ -721,15 +684,14 @@ static void emit_declaration(struct lp_build_tgsi_context *bld_base,
                                  * a shader ever reads from a channel that
                                  * it never writes to.
                                  */
-                               ctx->undef_alloca = lp_build_alloca_undef(
-                                       &ctx->gallivm,
-                                       ctx->f32, "undef");
+                               ctx->undef_alloca = ac_build_alloca_undef(
+                                       &ctx->ac, ctx->f32, "undef");
                         }
  
                         for (i = 0; i < decl_size; ++i) {
                                 LLVMValueRef ptr;
                                 if (writemask & (1 << (i % 4))) {
-#ifdef DEBUG
+#ifndef NDEBUG
                                         snprintf(name, sizeof(name), "TEMP%d.%c",
                                                  first + i / 4, "xyzw"[i % 4]);
  #endif
@@ -783,13 +745,12 @@ static void emit_declaration(struct lp_build_tgsi_context *bld_base,
                         if (ctx->outputs[idx][0])
                                 continue;
                         for (chan = 0; chan < TGSI_NUM_CHANNELS; chan++) {
-#ifdef DEBUG
+#ifndef NDEBUG
                                 snprintf(name, sizeof(name), "OUT%d.%c",
                                          idx, "xyzw"[chan % 4]);
  #endif
-                               ctx->outputs[idx][chan] = lp_build_alloca_undef(
-                                       &ctx->gallivm,
-                                       ctx->f32, name);
+                               ctx->outputs[idx][chan] = ac_build_alloca_undef(
+                                       &ctx->ac, ctx->f32, name);
                         }
                 }
                 break;
@@ -988,7 +949,7 @@ static void emit_immediate(struct lp_build_tgsi_context *bld_base,
  
  void si_llvm_context_init(struct si_shader_context *ctx,
                           struct si_screen *sscreen,
-                         LLVMTargetMachineRef tm)
+                         struct ac_llvm_compiler *compiler)
  {
         struct lp_type type;
  
@@ -999,31 +960,20 @@ void si_llvm_context_init(struct si_shader_context *ctx,
          */
         memset(ctx, 0, sizeof(*ctx));
         ctx->screen = sscreen;
-       ctx->tm = tm;
-
-       ctx->gallivm.context = LLVMContextCreate();
-       ctx->gallivm.module = LLVMModuleCreateWithNameInContext("tgsi",
-                                               ctx->gallivm.context);
-       LLVMSetTarget(ctx->gallivm.module, "amdgcn--");
+       ctx->compiler = compiler;
  
-       LLVMTargetDataRef data_layout = LLVMCreateTargetDataLayout(tm);
-       char *data_layout_str = LLVMCopyStringRepOfTargetData(data_layout);
-       LLVMSetDataLayout(ctx->gallivm.module, data_layout_str);
-       LLVMDisposeTargetData(data_layout);
-       LLVMDisposeMessage(data_layout_str);
+       ac_llvm_context_init(&ctx->ac, sscreen->info.chip_class, sscreen->info.family, 64);
+       ctx->ac.module = ac_create_module(compiler->tm, ctx->ac.context);
  
-       bool unsafe_fpmath = (sscreen->debug_flags & DBG(UNSAFE_MATH)) != 0;
         enum ac_float_mode float_mode =
-               unsafe_fpmath ? AC_FLOAT_MODE_UNSAFE_FP_MATH :
-                               AC_FLOAT_MODE_NO_SIGNED_ZEROS_FP_MATH;
+               sscreen->debug_flags & DBG(UNSAFE_MATH) ?
+                       AC_FLOAT_MODE_UNSAFE_FP_MATH :
+                       AC_FLOAT_MODE_NO_SIGNED_ZEROS_FP_MATH;
+       ctx->ac.builder = ac_create_builder(ctx->ac.context, float_mode);
  
-       ctx->gallivm.builder = ac_create_builder(ctx->gallivm.context,
-                                                float_mode);
-
-       ac_llvm_context_init(&ctx->ac, ctx->gallivm.context,
-                            sscreen->info.chip_class, sscreen->info.family);
-       ctx->ac.module = ctx->gallivm.module;
-       ctx->ac.builder = ctx->gallivm.builder;
+       ctx->gallivm.context = ctx->ac.context;
+       ctx->gallivm.module = ctx->ac.module;
+       ctx->gallivm.builder = ctx->ac.builder;
  
         struct lp_build_tgsi_context *bld_base = &ctx->bld_base;
  
@@ -1073,6 +1023,8 @@ void si_llvm_context_init(struct si_shader_context *ctx,
  
         ctx->i32_0 = LLVMConstInt(ctx->i32, 0, 0);
         ctx->i32_1 = LLVMConstInt(ctx->i32, 1, 0);
+       ctx->i1false = LLVMConstInt(ctx->i1, 0, 0);
+       ctx->i1true = LLVMConstInt(ctx->i1, 1, 0);
  }
  
  /* Set the context to a certain TGSI shader. Can be called repeatedly
@@ -1153,7 +1105,7 @@ void si_llvm_create_func(struct si_shader_context *ctx,
         LLVMTypeRef main_fn_type, ret_type;
         LLVMBasicBlockRef main_fn_body;
         enum si_llvm_calling_convention call_conv;
-       unsigned real_shader_type;
+       enum pipe_shader_type real_shader_type;
  
         if (num_return_elems)
                 ret_type = LLVMStructTypeInContext(ctx->ac.context,
@@ -1176,7 +1128,7 @@ void si_llvm_create_func(struct si_shader_context *ctx,
         if (ctx->screen->info.chip_class >= GFX9) {
                 if (ctx->shader->key.as_ls)
                         real_shader_type = PIPE_SHADER_TESS_CTRL;
-               else if (ctx->shader->key.as_es)
+               else if (ctx->shader->key.as_es || ctx->shader->key.as_ngg)
                         real_shader_type = PIPE_SHADER_GEOMETRY;
         }
  
@@ -1186,8 +1138,7 @@ void si_llvm_create_func(struct si_shader_context *ctx,
                 call_conv = RADEON_LLVM_AMDGPU_VS;
                 break;
         case PIPE_SHADER_TESS_CTRL:
-               call_conv = HAVE_LLVM >= 0x0500 ? RADEON_LLVM_AMDGPU_HS :
-                                                 RADEON_LLVM_AMDGPU_VS;
+               call_conv = RADEON_LLVM_AMDGPU_HS;
                 break;
         case PIPE_SHADER_GEOMETRY:
                 call_conv = RADEON_LLVM_AMDGPU_GS;
@@ -1207,44 +1158,14 @@ void si_llvm_create_func(struct si_shader_context *ctx,
  
  void si_llvm_optimize_module(struct si_shader_context *ctx)
  {
-       struct gallivm_state *gallivm = &ctx->gallivm;
-       const char *triple = LLVMGetTarget(gallivm->module);
-       LLVMTargetLibraryInfoRef target_library_info;
-
         /* Dump LLVM IR before any optimization passes */
         if (ctx->screen->debug_flags & DBG(PREOPT_IR) &&
             si_can_dump_shader(ctx->screen, ctx->type))
                 LLVMDumpModule(ctx->gallivm.module);
  
-       /* Create the pass manager */
-       gallivm->passmgr = LLVMCreatePassManager();
-
-       target_library_info = gallivm_create_target_library_info(triple);
-       LLVMAddTargetLibraryInfo(target_library_info, gallivm->passmgr);
-
-       if (si_extra_shader_checks(ctx->screen, ctx->type))
-               LLVMAddVerifierPass(gallivm->passmgr);
-
-       LLVMAddAlwaysInlinerPass(gallivm->passmgr);
-
-       /* This pass should eliminate all the load and store instructions */
-       LLVMAddPromoteMemoryToRegisterPass(gallivm->passmgr);
-
-       /* Add some optimization passes */
-       LLVMAddScalarReplAggregatesPass(gallivm->passmgr);
-       LLVMAddLICMPass(gallivm->passmgr);
-       LLVMAddAggressiveDCEPass(gallivm->passmgr);
-       LLVMAddCFGSimplificationPass(gallivm->passmgr);
-       /* This is recommended by the instruction combining pass. */
-       LLVMAddEarlyCSEMemSSAPass(gallivm->passmgr);
-       LLVMAddInstructionCombiningPass(gallivm->passmgr);
-
         /* Run the pass */
-       LLVMRunPassManager(gallivm->passmgr, ctx->gallivm.module);
-
+       LLVMRunPassManager(ctx->compiler->passmgr, ctx->gallivm.module);
         LLVMDisposeBuilder(ctx->ac.builder);
-       LLVMDisposePassManager(gallivm->passmgr);
-       gallivm_dispose_target_library_info(target_library_info);
  }
  
  void si_llvm_dispose(struct si_shader_context *ctx)