src/gallium/drivers/radeonsi/si_shader_llvm.c

   1 /*
   2  * Copyright 2016 Advanced Micro Devices, Inc.
   3  * All Rights Reserved.
   4  *
   5  * Permission is hereby granted, free of charge, to any person obtaining a
   6  * copy of this software and associated documentation files (the "Software"),
   7  * to deal in the Software without restriction, including without limitation
   8  * on the rights to use, copy, modify, merge, publish, distribute, sub
   9  * license, and/or sell copies of the Software, and to permit persons to whom
  10  * the Software is furnished to do so, subject to the following conditions:
  11  *
  12  * The above copyright notice and this permission notice (including the next
  13  * paragraph) shall be included in all copies or substantial portions of the
  14  * Software.
  15  *
  16  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  17  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  18  * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
  19  * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
  20  * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
  21  * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
  22  * USE OR OTHER DEALINGS IN THE SOFTWARE.
  23  */
  24
  25 #include "si_shader_internal.h"
  26 #include "si_pipe.h"
  27 #include "ac_rtld.h"
  28 #include "sid.h"
  29
  30 #include "tgsi/tgsi_from_mesa.h"
  31 #include "util/u_memory.h"
  32
  33 struct si_llvm_diagnostics {
  34         struct pipe_debug_callback *debug;
  35         unsigned retval;
  36 };
  37
  38 static void si_diagnostic_handler(LLVMDiagnosticInfoRef di, void *context)
  39 {
  40         struct si_llvm_diagnostics *diag = (struct si_llvm_diagnostics *)context;
  41         LLVMDiagnosticSeverity severity = LLVMGetDiagInfoSeverity(di);
  42         const char *severity_str = NULL;
  43
  44         switch (severity) {
  45         case LLVMDSError:
  46                 severity_str = "error";
  47                 break;
  48         case LLVMDSWarning:
  49                 severity_str = "warning";
  50                 break;
  51         case LLVMDSRemark:
  52         case LLVMDSNote:
  53         default:
  54                 return;
  55         }
  56
  57         char *description = LLVMGetDiagInfoDescription(di);
  58
  59         pipe_debug_message(diag->debug, SHADER_INFO,
  60                            "LLVM diagnostic (%s): %s", severity_str, description);
  61
  62         if (severity == LLVMDSError) {
  63                 diag->retval = 1;
  64                 fprintf(stderr,"LLVM triggered Diagnostic Handler: %s\n", description);
  65         }
  66
  67         LLVMDisposeMessage(description);
  68 }
  69
  70 int si_compile_llvm(struct si_screen *sscreen,
  71                     struct si_shader_binary *binary,
  72                     struct ac_shader_config *conf,
  73                     struct ac_llvm_compiler *compiler,
  74                     struct ac_llvm_context *ac,
  75                     struct pipe_debug_callback *debug,
  76                     enum pipe_shader_type shader_type,
  77                     const char *name,
  78                     bool less_optimized)
  79 {
  80         unsigned count = p_atomic_inc_return(&sscreen->num_compilations);
  81
  82         if (si_can_dump_shader(sscreen, shader_type)) {
  83                 fprintf(stderr, "radeonsi: Compiling shader %d\n", count);
  84
  85                 if (!(sscreen->debug_flags & (DBG(NO_IR) | DBG(PREOPT_IR)))) {
  86                         fprintf(stderr, "%s LLVM IR:\n\n", name);
  87                         ac_dump_module(ac->module);
  88                         fprintf(stderr, "\n");
  89                 }
  90         }
  91
  92         if (sscreen->record_llvm_ir) {
  93                 char *ir = LLVMPrintModuleToString(ac->module);
  94                 binary->llvm_ir_string = strdup(ir);
  95                 LLVMDisposeMessage(ir);
  96         }
  97
  98         if (!si_replace_shader(count, binary)) {
  99                 struct ac_compiler_passes *passes = compiler->passes;
 100
 101                 if (ac->wave_size == 32)
 102                         passes = compiler->passes_wave32;
 103                 else if (less_optimized && compiler->low_opt_passes)
 104                         passes = compiler->low_opt_passes;
 105
 106                 struct si_llvm_diagnostics diag = {debug};
 107                 LLVMContextSetDiagnosticHandler(ac->context, si_diagnostic_handler, &diag);
 108
 109                 if (!ac_compile_module_to_elf(passes, ac->module,
 110                                               (char **)&binary->elf_buffer,
 111                                               &binary->elf_size))
 112                         diag.retval = 1;
 113
 114                 if (diag.retval != 0) {
 115                         pipe_debug_message(debug, SHADER_INFO, "LLVM compilation failed");
 116                         return diag.retval;
 117                 }
 118         }
 119
 120         struct ac_rtld_binary rtld;
 121         if (!ac_rtld_open(&rtld, (struct ac_rtld_open_info){
 122                         .info = &sscreen->info,
 123                         .shader_type = tgsi_processor_to_shader_stage(shader_type),
 124                         .wave_size = ac->wave_size,
 125                         .num_parts = 1,
 126                         .elf_ptrs = &binary->elf_buffer,
 127                         .elf_sizes = &binary->elf_size }))
 128                 return -1;
 129
 130         bool ok = ac_rtld_read_config(&rtld, conf);
 131         ac_rtld_close(&rtld);
 132         if (!ok)
 133                 return -1;
 134
 135         /* Enable 64-bit and 16-bit denormals, because there is no performance
 136          * cost.
 137          *
 138          * If denormals are enabled, all floating-point output modifiers are
 139          * ignored.
 140          *
 141          * Don't enable denormals for 32-bit floats, because:
 142          * - Floating-point output modifiers would be ignored by the hw.
 143          * - Some opcodes don't support denormals, such as v_mad_f32. We would
 144          *   have to stop using those.
 145          * - GFX6 & GFX7 would be very slow.
 146          */
 147         conf->float_mode |= V_00B028_FP_64_DENORMS;
 148
 149         return 0;
 150 }
 151
 152 void si_llvm_context_init(struct si_shader_context *ctx,
 153                           struct si_screen *sscreen,
 154                           struct ac_llvm_compiler *compiler,
 155                           unsigned wave_size)
 156 {
 157         memset(ctx, 0, sizeof(*ctx));
 158         ctx->screen = sscreen;
 159         ctx->compiler = compiler;
 160
 161         ac_llvm_context_init(&ctx->ac, compiler, sscreen->info.chip_class,
 162                              sscreen->info.family,
 163                              AC_FLOAT_MODE_NO_SIGNED_ZEROS_FP_MATH,
 164                              wave_size, 64);
 165 }
 166
 167 void si_llvm_create_func(struct si_shader_context *ctx, const char *name,
 168                          LLVMTypeRef *return_types, unsigned num_return_elems,
 169                          unsigned max_workgroup_size)
 170 {
 171         LLVMTypeRef ret_type;
 172         enum ac_llvm_calling_convention call_conv;
 173         enum pipe_shader_type real_shader_type;
 174
 175         if (num_return_elems)
 176                 ret_type = LLVMStructTypeInContext(ctx->ac.context,
 177                                                    return_types,
 178                                                    num_return_elems, true);
 179         else
 180                 ret_type = ctx->ac.voidt;
 181
 182         real_shader_type = ctx->type;
 183
 184         /* LS is merged into HS (TCS), and ES is merged into GS. */
 185         if (ctx->screen->info.chip_class >= GFX9) {
 186                 if (ctx->shader->key.as_ls)
 187                         real_shader_type = PIPE_SHADER_TESS_CTRL;
 188                 else if (ctx->shader->key.as_es || ctx->shader->key.as_ngg)
 189                         real_shader_type = PIPE_SHADER_GEOMETRY;
 190         }
 191
 192         switch (real_shader_type) {
 193         case PIPE_SHADER_VERTEX:
 194         case PIPE_SHADER_TESS_EVAL:
 195                 call_conv = AC_LLVM_AMDGPU_VS;
 196                 break;
 197         case PIPE_SHADER_TESS_CTRL:
 198                 call_conv = AC_LLVM_AMDGPU_HS;
 199                 break;
 200         case PIPE_SHADER_GEOMETRY:
 201                 call_conv = AC_LLVM_AMDGPU_GS;
 202                 break;
 203         case PIPE_SHADER_FRAGMENT:
 204                 call_conv = AC_LLVM_AMDGPU_PS;
 205                 break;
 206         case PIPE_SHADER_COMPUTE:
 207                 call_conv = AC_LLVM_AMDGPU_CS;
 208                 break;
 209         default:
 210                 unreachable("Unhandle shader type");
 211         }
 212
 213         /* Setup the function */
 214         ctx->return_type = ret_type;
 215         ctx->main_fn = ac_build_main(&ctx->args, &ctx->ac, call_conv, name,
 216                                      ret_type, ctx->ac.module);
 217         ctx->return_value = LLVMGetUndef(ctx->return_type);
 218
 219         if (ctx->screen->info.address32_hi) {
 220                 ac_llvm_add_target_dep_function_attr(ctx->main_fn,
 221                                                      "amdgpu-32bit-address-high-bits",
 222                                                      ctx->screen->info.address32_hi);
 223         }
 224
 225         LLVMAddTargetDependentFunctionAttr(ctx->main_fn,
 226                                            "no-signed-zeros-fp-math",
 227                                            "true");
 228
 229         ac_llvm_set_workgroup_size(ctx->main_fn, max_workgroup_size);
 230 }
 231
 232 void si_llvm_optimize_module(struct si_shader_context *ctx)
 233 {
 234         /* Dump LLVM IR before any optimization passes */
 235         if (ctx->screen->debug_flags & DBG(PREOPT_IR) &&
 236             si_can_dump_shader(ctx->screen, ctx->type))
 237                 LLVMDumpModule(ctx->ac.module);
 238
 239         /* Run the pass */
 240         LLVMRunPassManager(ctx->compiler->passmgr, ctx->ac.module);
 241         LLVMDisposeBuilder(ctx->ac.builder);
 242 }
 243
 244 void si_llvm_dispose(struct si_shader_context *ctx)
 245 {
 246         LLVMDisposeModule(ctx->ac.module);
 247         LLVMContextDispose(ctx->ac.context);
 248         ac_llvm_context_dispose(&ctx->ac);
 249 }
 250
 251 /**
 252  * Load a dword from a constant buffer.
 253  */
 254 LLVMValueRef si_buffer_load_const(struct si_shader_context *ctx,
 255                                   LLVMValueRef resource, LLVMValueRef offset)
 256 {
 257         return ac_build_buffer_load(&ctx->ac, resource, 1, NULL, offset, NULL,
 258                                     0, 0, true, true);
 259 }
 260
 261 void si_llvm_build_ret(struct si_shader_context *ctx, LLVMValueRef ret)
 262 {
 263         if (LLVMGetTypeKind(LLVMTypeOf(ret)) == LLVMVoidTypeKind)
 264                 LLVMBuildRetVoid(ctx->ac.builder);
 265         else
 266                 LLVMBuildRet(ctx->ac.builder, ret);
 267 }
 268
 269 LLVMValueRef si_insert_input_ret(struct si_shader_context *ctx, LLVMValueRef ret,
 270                                  struct ac_arg param, unsigned return_index)
 271 {
 272         return LLVMBuildInsertValue(ctx->ac.builder, ret,
 273                                     ac_get_arg(&ctx->ac, param),
 274                                     return_index, "");
 275 }
 276
 277 LLVMValueRef si_insert_input_ret_float(struct si_shader_context *ctx, LLVMValueRef ret,
 278                                        struct ac_arg param, unsigned return_index)
 279 {
 280         LLVMBuilderRef builder = ctx->ac.builder;
 281         LLVMValueRef p = ac_get_arg(&ctx->ac, param);
 282
 283         return LLVMBuildInsertValue(builder, ret,
 284                                     ac_to_float(&ctx->ac, p),
 285                                     return_index, "");
 286 }
 287
 288 LLVMValueRef si_insert_input_ptr(struct si_shader_context *ctx, LLVMValueRef ret,
 289                                  struct ac_arg param, unsigned return_index)
 290 {
 291         LLVMBuilderRef builder = ctx->ac.builder;
 292         LLVMValueRef ptr = ac_get_arg(&ctx->ac, param);
 293         ptr = LLVMBuildPtrToInt(builder, ptr, ctx->ac.i32, "");
 294         return LLVMBuildInsertValue(builder, ret, ptr, return_index, "");
 295 }
 296
 297 LLVMValueRef si_prolog_get_rw_buffers(struct si_shader_context *ctx)
 298 {
 299         LLVMValueRef ptr[2], list;
 300         bool merged_shader = si_is_merged_shader(ctx);
 301
 302         ptr[0] = LLVMGetParam(ctx->main_fn, (merged_shader ? 8 : 0) + SI_SGPR_RW_BUFFERS);
 303         list = LLVMBuildIntToPtr(ctx->ac.builder, ptr[0],
 304                                  ac_array_in_const32_addr_space(ctx->ac.v4i32), "");
 305         return list;
 306 }
 307
 308 LLVMValueRef si_build_gather_64bit(struct si_shader_context *ctx,
 309                                    LLVMTypeRef type, LLVMValueRef val1,
 310                                    LLVMValueRef val2)
 311 {
 312         LLVMValueRef values[2] = {
 313                 ac_to_integer(&ctx->ac, val1),
 314                 ac_to_integer(&ctx->ac, val2),
 315         };
 316         LLVMValueRef result = ac_build_gather_values(&ctx->ac, values, 2);
 317         return LLVMBuildBitCast(ctx->ac.builder, result, type, "");
 318 }
 319
 320 void si_llvm_emit_barrier(struct si_shader_context *ctx)
 321 {
 322         /* GFX6 only (thanks to a hw bug workaround):
 323          * The real barrier instruction isn’t needed, because an entire patch
 324          * always fits into a single wave.
 325          */
 326         if (ctx->screen->info.chip_class == GFX6 &&
 327             ctx->type == PIPE_SHADER_TESS_CTRL) {
 328                 ac_build_waitcnt(&ctx->ac, AC_WAIT_LGKM | AC_WAIT_VLOAD | AC_WAIT_VSTORE);
 329                 return;
 330         }
 331
 332         ac_build_s_barrier(&ctx->ac);
 333 }
 334
 335 /* Ensure that the esgs ring is declared.
 336  *
 337  * We declare it with 64KB alignment as a hint that the
 338  * pointer value will always be 0.
 339  */
 340 void si_llvm_declare_esgs_ring(struct si_shader_context *ctx)
 341 {
 342         if (ctx->esgs_ring)
 343                 return;
 344
 345         assert(!LLVMGetNamedGlobal(ctx->ac.module, "esgs_ring"));
 346
 347         ctx->esgs_ring = LLVMAddGlobalInAddressSpace(
 348                 ctx->ac.module, LLVMArrayType(ctx->ac.i32, 0),
 349                 "esgs_ring",
 350                 AC_ADDR_SPACE_LDS);
 351         LLVMSetLinkage(ctx->esgs_ring, LLVMExternalLinkage);
 352         LLVMSetAlignment(ctx->esgs_ring, 64 * 1024);
 353 }
 354
 355 void si_init_exec_from_input(struct si_shader_context *ctx, struct ac_arg param,
 356                              unsigned bitoffset)
 357 {
 358         LLVMValueRef args[] = {
 359                 ac_get_arg(&ctx->ac, param),
 360                 LLVMConstInt(ctx->ac.i32, bitoffset, 0),
 361         };
 362         ac_build_intrinsic(&ctx->ac,
 363                            "llvm.amdgcn.init.exec.from.input",
 364                            ctx->ac.voidt, args, 2, AC_FUNC_ATTR_CONVERGENT);
 365 }