src/gallium/drivers/radeonsi/si_shader_llvm.c

   1 /*
   2  * Copyright 2016 Advanced Micro Devices, Inc.
   3  * All Rights Reserved.
   4  *
   5  * Permission is hereby granted, free of charge, to any person obtaining a
   6  * copy of this software and associated documentation files (the "Software"),
   7  * to deal in the Software without restriction, including without limitation
   8  * on the rights to use, copy, modify, merge, publish, distribute, sub
   9  * license, and/or sell copies of the Software, and to permit persons to whom
  10  * the Software is furnished to do so, subject to the following conditions:
  11  *
  12  * The above copyright notice and this permission notice (including the next
  13  * paragraph) shall be included in all copies or substantial portions of the
  14  * Software.
  15  *
  16  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  17  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  18  * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
  19  * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
  20  * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
  21  * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
  22  * USE OR OTHER DEALINGS IN THE SOFTWARE.
  23  */
  24
  25 #include "si_shader_internal.h"
  26 #include "si_pipe.h"
  27 #include "ac_rtld.h"
  28 #include "ac_nir_to_llvm.h"
  29 #include "sid.h"
  30
  31 #include "tgsi/tgsi_from_mesa.h"
  32 #include "util/u_memory.h"
  33
  34 struct si_llvm_diagnostics {
  35         struct pipe_debug_callback *debug;
  36         unsigned retval;
  37 };
  38
  39 static void si_diagnostic_handler(LLVMDiagnosticInfoRef di, void *context)
  40 {
  41         struct si_llvm_diagnostics *diag = (struct si_llvm_diagnostics *)context;
  42         LLVMDiagnosticSeverity severity = LLVMGetDiagInfoSeverity(di);
  43         const char *severity_str = NULL;
  44
  45         switch (severity) {
  46         case LLVMDSError:
  47                 severity_str = "error";
  48                 break;
  49         case LLVMDSWarning:
  50                 severity_str = "warning";
  51                 break;
  52         case LLVMDSRemark:
  53         case LLVMDSNote:
  54         default:
  55                 return;
  56         }
  57
  58         char *description = LLVMGetDiagInfoDescription(di);
  59
  60         pipe_debug_message(diag->debug, SHADER_INFO,
  61                            "LLVM diagnostic (%s): %s", severity_str, description);
  62
  63         if (severity == LLVMDSError) {
  64                 diag->retval = 1;
  65                 fprintf(stderr,"LLVM triggered Diagnostic Handler: %s\n", description);
  66         }
  67
  68         LLVMDisposeMessage(description);
  69 }
  70
  71 int si_compile_llvm(struct si_screen *sscreen,
  72                     struct si_shader_binary *binary,
  73                     struct ac_shader_config *conf,
  74                     struct ac_llvm_compiler *compiler,
  75                     struct ac_llvm_context *ac,
  76                     struct pipe_debug_callback *debug,
  77                     enum pipe_shader_type shader_type,
  78                     const char *name,
  79                     bool less_optimized)
  80 {
  81         unsigned count = p_atomic_inc_return(&sscreen->num_compilations);
  82
  83         if (si_can_dump_shader(sscreen, shader_type)) {
  84                 fprintf(stderr, "radeonsi: Compiling shader %d\n", count);
  85
  86                 if (!(sscreen->debug_flags & (DBG(NO_IR) | DBG(PREOPT_IR)))) {
  87                         fprintf(stderr, "%s LLVM IR:\n\n", name);
  88                         ac_dump_module(ac->module);
  89                         fprintf(stderr, "\n");
  90                 }
  91         }
  92
  93         if (sscreen->record_llvm_ir) {
  94                 char *ir = LLVMPrintModuleToString(ac->module);
  95                 binary->llvm_ir_string = strdup(ir);
  96                 LLVMDisposeMessage(ir);
  97         }
  98
  99         if (!si_replace_shader(count, binary)) {
 100                 struct ac_compiler_passes *passes = compiler->passes;
 101
 102                 if (ac->wave_size == 32)
 103                         passes = compiler->passes_wave32;
 104                 else if (less_optimized && compiler->low_opt_passes)
 105                         passes = compiler->low_opt_passes;
 106
 107                 struct si_llvm_diagnostics diag = {debug};
 108                 LLVMContextSetDiagnosticHandler(ac->context, si_diagnostic_handler, &diag);
 109
 110                 if (!ac_compile_module_to_elf(passes, ac->module,
 111                                               (char **)&binary->elf_buffer,
 112                                               &binary->elf_size))
 113                         diag.retval = 1;
 114
 115                 if (diag.retval != 0) {
 116                         pipe_debug_message(debug, SHADER_INFO, "LLVM compilation failed");
 117                         return diag.retval;
 118                 }
 119         }
 120
 121         struct ac_rtld_binary rtld;
 122         if (!ac_rtld_open(&rtld, (struct ac_rtld_open_info){
 123                         .info = &sscreen->info,
 124                         .shader_type = tgsi_processor_to_shader_stage(shader_type),
 125                         .wave_size = ac->wave_size,
 126                         .num_parts = 1,
 127                         .elf_ptrs = &binary->elf_buffer,
 128                         .elf_sizes = &binary->elf_size }))
 129                 return -1;
 130
 131         bool ok = ac_rtld_read_config(&rtld, conf);
 132         ac_rtld_close(&rtld);
 133         if (!ok)
 134                 return -1;
 135
 136         /* Enable 64-bit and 16-bit denormals, because there is no performance
 137          * cost.
 138          *
 139          * If denormals are enabled, all floating-point output modifiers are
 140          * ignored.
 141          *
 142          * Don't enable denormals for 32-bit floats, because:
 143          * - Floating-point output modifiers would be ignored by the hw.
 144          * - Some opcodes don't support denormals, such as v_mad_f32. We would
 145          *   have to stop using those.
 146          * - GFX6 & GFX7 would be very slow.
 147          */
 148         conf->float_mode |= V_00B028_FP_64_DENORMS;
 149
 150         return 0;
 151 }
 152
 153 void si_llvm_context_init(struct si_shader_context *ctx,
 154                           struct si_screen *sscreen,
 155                           struct ac_llvm_compiler *compiler,
 156                           unsigned wave_size)
 157 {
 158         memset(ctx, 0, sizeof(*ctx));
 159         ctx->screen = sscreen;
 160         ctx->compiler = compiler;
 161
 162         ac_llvm_context_init(&ctx->ac, compiler, sscreen->info.chip_class,
 163                              sscreen->info.family,
 164                              AC_FLOAT_MODE_NO_SIGNED_ZEROS_FP_MATH,
 165                              wave_size, 64);
 166 }
 167
 168 void si_llvm_create_func(struct si_shader_context *ctx, const char *name,
 169                          LLVMTypeRef *return_types, unsigned num_return_elems,
 170                          unsigned max_workgroup_size)
 171 {
 172         LLVMTypeRef ret_type;
 173         enum ac_llvm_calling_convention call_conv;
 174         enum pipe_shader_type real_shader_type;
 175
 176         if (num_return_elems)
 177                 ret_type = LLVMStructTypeInContext(ctx->ac.context,
 178                                                    return_types,
 179                                                    num_return_elems, true);
 180         else
 181                 ret_type = ctx->ac.voidt;
 182
 183         real_shader_type = ctx->type;
 184
 185         /* LS is merged into HS (TCS), and ES is merged into GS. */
 186         if (ctx->screen->info.chip_class >= GFX9) {
 187                 if (ctx->shader->key.as_ls)
 188                         real_shader_type = PIPE_SHADER_TESS_CTRL;
 189                 else if (ctx->shader->key.as_es || ctx->shader->key.as_ngg)
 190                         real_shader_type = PIPE_SHADER_GEOMETRY;
 191         }
 192
 193         switch (real_shader_type) {
 194         case PIPE_SHADER_VERTEX:
 195         case PIPE_SHADER_TESS_EVAL:
 196                 call_conv = AC_LLVM_AMDGPU_VS;
 197                 break;
 198         case PIPE_SHADER_TESS_CTRL:
 199                 call_conv = AC_LLVM_AMDGPU_HS;
 200                 break;
 201         case PIPE_SHADER_GEOMETRY:
 202                 call_conv = AC_LLVM_AMDGPU_GS;
 203                 break;
 204         case PIPE_SHADER_FRAGMENT:
 205                 call_conv = AC_LLVM_AMDGPU_PS;
 206                 break;
 207         case PIPE_SHADER_COMPUTE:
 208                 call_conv = AC_LLVM_AMDGPU_CS;
 209                 break;
 210         default:
 211                 unreachable("Unhandle shader type");
 212         }
 213
 214         /* Setup the function */
 215         ctx->return_type = ret_type;
 216         ctx->main_fn = ac_build_main(&ctx->args, &ctx->ac, call_conv, name,
 217                                      ret_type, ctx->ac.module);
 218         ctx->return_value = LLVMGetUndef(ctx->return_type);
 219
 220         if (ctx->screen->info.address32_hi) {
 221                 ac_llvm_add_target_dep_function_attr(ctx->main_fn,
 222                                                      "amdgpu-32bit-address-high-bits",
 223                                                      ctx->screen->info.address32_hi);
 224         }
 225
 226         LLVMAddTargetDependentFunctionAttr(ctx->main_fn,
 227                                            "no-signed-zeros-fp-math",
 228                                            "true");
 229
 230         ac_llvm_set_workgroup_size(ctx->main_fn, max_workgroup_size);
 231 }
 232
 233 void si_llvm_optimize_module(struct si_shader_context *ctx)
 234 {
 235         /* Dump LLVM IR before any optimization passes */
 236         if (ctx->screen->debug_flags & DBG(PREOPT_IR) &&
 237             si_can_dump_shader(ctx->screen, ctx->type))
 238                 LLVMDumpModule(ctx->ac.module);
 239
 240         /* Run the pass */
 241         LLVMRunPassManager(ctx->compiler->passmgr, ctx->ac.module);
 242         LLVMDisposeBuilder(ctx->ac.builder);
 243 }
 244
 245 void si_llvm_dispose(struct si_shader_context *ctx)
 246 {
 247         LLVMDisposeModule(ctx->ac.module);
 248         LLVMContextDispose(ctx->ac.context);
 249         ac_llvm_context_dispose(&ctx->ac);
 250 }
 251
 252 /**
 253  * Load a dword from a constant buffer.
 254  */
 255 LLVMValueRef si_buffer_load_const(struct si_shader_context *ctx,
 256                                   LLVMValueRef resource, LLVMValueRef offset)
 257 {
 258         return ac_build_buffer_load(&ctx->ac, resource, 1, NULL, offset, NULL,
 259                                     0, 0, true, true);
 260 }
 261
 262 void si_llvm_build_ret(struct si_shader_context *ctx, LLVMValueRef ret)
 263 {
 264         if (LLVMGetTypeKind(LLVMTypeOf(ret)) == LLVMVoidTypeKind)
 265                 LLVMBuildRetVoid(ctx->ac.builder);
 266         else
 267                 LLVMBuildRet(ctx->ac.builder, ret);
 268 }
 269
 270 LLVMValueRef si_insert_input_ret(struct si_shader_context *ctx, LLVMValueRef ret,
 271                                  struct ac_arg param, unsigned return_index)
 272 {
 273         return LLVMBuildInsertValue(ctx->ac.builder, ret,
 274                                     ac_get_arg(&ctx->ac, param),
 275                                     return_index, "");
 276 }
 277
 278 LLVMValueRef si_insert_input_ret_float(struct si_shader_context *ctx, LLVMValueRef ret,
 279                                        struct ac_arg param, unsigned return_index)
 280 {
 281         LLVMBuilderRef builder = ctx->ac.builder;
 282         LLVMValueRef p = ac_get_arg(&ctx->ac, param);
 283
 284         return LLVMBuildInsertValue(builder, ret,
 285                                     ac_to_float(&ctx->ac, p),
 286                                     return_index, "");
 287 }
 288
 289 LLVMValueRef si_insert_input_ptr(struct si_shader_context *ctx, LLVMValueRef ret,
 290                                  struct ac_arg param, unsigned return_index)
 291 {
 292         LLVMBuilderRef builder = ctx->ac.builder;
 293         LLVMValueRef ptr = ac_get_arg(&ctx->ac, param);
 294         ptr = LLVMBuildPtrToInt(builder, ptr, ctx->ac.i32, "");
 295         return LLVMBuildInsertValue(builder, ret, ptr, return_index, "");
 296 }
 297
 298 LLVMValueRef si_prolog_get_rw_buffers(struct si_shader_context *ctx)
 299 {
 300         LLVMValueRef ptr[2], list;
 301         bool merged_shader = si_is_merged_shader(ctx);
 302
 303         ptr[0] = LLVMGetParam(ctx->main_fn, (merged_shader ? 8 : 0) + SI_SGPR_RW_BUFFERS);
 304         list = LLVMBuildIntToPtr(ctx->ac.builder, ptr[0],
 305                                  ac_array_in_const32_addr_space(ctx->ac.v4i32), "");
 306         return list;
 307 }
 308
 309 LLVMValueRef si_build_gather_64bit(struct si_shader_context *ctx,
 310                                    LLVMTypeRef type, LLVMValueRef val1,
 311                                    LLVMValueRef val2)
 312 {
 313         LLVMValueRef values[2] = {
 314                 ac_to_integer(&ctx->ac, val1),
 315                 ac_to_integer(&ctx->ac, val2),
 316         };
 317         LLVMValueRef result = ac_build_gather_values(&ctx->ac, values, 2);
 318         return LLVMBuildBitCast(ctx->ac.builder, result, type, "");
 319 }
 320
 321 void si_llvm_emit_barrier(struct si_shader_context *ctx)
 322 {
 323         /* GFX6 only (thanks to a hw bug workaround):
 324          * The real barrier instruction isn’t needed, because an entire patch
 325          * always fits into a single wave.
 326          */
 327         if (ctx->screen->info.chip_class == GFX6 &&
 328             ctx->type == PIPE_SHADER_TESS_CTRL) {
 329                 ac_build_waitcnt(&ctx->ac, AC_WAIT_LGKM | AC_WAIT_VLOAD | AC_WAIT_VSTORE);
 330                 return;
 331         }
 332
 333         ac_build_s_barrier(&ctx->ac);
 334 }
 335
 336 /* Ensure that the esgs ring is declared.
 337  *
 338  * We declare it with 64KB alignment as a hint that the
 339  * pointer value will always be 0.
 340  */
 341 void si_llvm_declare_esgs_ring(struct si_shader_context *ctx)
 342 {
 343         if (ctx->esgs_ring)
 344                 return;
 345
 346         assert(!LLVMGetNamedGlobal(ctx->ac.module, "esgs_ring"));
 347
 348         ctx->esgs_ring = LLVMAddGlobalInAddressSpace(
 349                 ctx->ac.module, LLVMArrayType(ctx->ac.i32, 0),
 350                 "esgs_ring",
 351                 AC_ADDR_SPACE_LDS);
 352         LLVMSetLinkage(ctx->esgs_ring, LLVMExternalLinkage);
 353         LLVMSetAlignment(ctx->esgs_ring, 64 * 1024);
 354 }
 355
 356 void si_init_exec_from_input(struct si_shader_context *ctx, struct ac_arg param,
 357                              unsigned bitoffset)
 358 {
 359         LLVMValueRef args[] = {
 360                 ac_get_arg(&ctx->ac, param),
 361                 LLVMConstInt(ctx->ac.i32, bitoffset, 0),
 362         };
 363         ac_build_intrinsic(&ctx->ac,
 364                            "llvm.amdgcn.init.exec.from.input",
 365                            ctx->ac.voidt, args, 2, AC_FUNC_ATTR_CONVERGENT);
 366 }
 367
 368 bool si_nir_build_llvm(struct si_shader_context *ctx, struct nir_shader *nir)
 369 {
 370         if (nir->info.stage == MESA_SHADER_VERTEX) {
 371                 si_llvm_load_vs_inputs(ctx, nir);
 372         } else if (nir->info.stage == MESA_SHADER_FRAGMENT) {
 373                 unsigned colors_read =
 374                         ctx->shader->selector->info.colors_read;
 375                 LLVMValueRef main_fn = ctx->main_fn;
 376
 377                 LLVMValueRef undef = LLVMGetUndef(ctx->ac.f32);
 378
 379                 unsigned offset = SI_PARAM_POS_FIXED_PT + 1;
 380
 381                 if (colors_read & 0x0f) {
 382                         unsigned mask = colors_read & 0x0f;
 383                         LLVMValueRef values[4];
 384                         values[0] = mask & 0x1 ? LLVMGetParam(main_fn, offset++) : undef;
 385                         values[1] = mask & 0x2 ? LLVMGetParam(main_fn, offset++) : undef;
 386                         values[2] = mask & 0x4 ? LLVMGetParam(main_fn, offset++) : undef;
 387                         values[3] = mask & 0x8 ? LLVMGetParam(main_fn, offset++) : undef;
 388                         ctx->abi.color0 =
 389                                 ac_to_integer(&ctx->ac,
 390                                               ac_build_gather_values(&ctx->ac, values, 4));
 391                 }
 392                 if (colors_read & 0xf0) {
 393                         unsigned mask = (colors_read & 0xf0) >> 4;
 394                         LLVMValueRef values[4];
 395                         values[0] = mask & 0x1 ? LLVMGetParam(main_fn, offset++) : undef;
 396                         values[1] = mask & 0x2 ? LLVMGetParam(main_fn, offset++) : undef;
 397                         values[2] = mask & 0x4 ? LLVMGetParam(main_fn, offset++) : undef;
 398                         values[3] = mask & 0x8 ? LLVMGetParam(main_fn, offset++) : undef;
 399                         ctx->abi.color1 =
 400                                 ac_to_integer(&ctx->ac,
 401                                               ac_build_gather_values(&ctx->ac, values, 4));
 402                 }
 403
 404                 ctx->abi.interp_at_sample_force_center =
 405                         ctx->shader->key.mono.u.ps.interpolate_at_sample_force_center;
 406         } else if (nir->info.stage == MESA_SHADER_COMPUTE) {
 407                 if (nir->info.cs.user_data_components_amd) {
 408                         ctx->abi.user_data = ac_get_arg(&ctx->ac, ctx->cs_user_data);
 409                         ctx->abi.user_data = ac_build_expand_to_vec4(&ctx->ac, ctx->abi.user_data,
 410                                                                      nir->info.cs.user_data_components_amd);
 411                 }
 412         }
 413
 414         ctx->abi.inputs = &ctx->inputs[0];
 415         ctx->abi.clamp_shadow_reference = true;
 416         ctx->abi.robust_buffer_access = true;
 417
 418         if (ctx->shader->selector->info.properties[TGSI_PROPERTY_CS_LOCAL_SIZE]) {
 419                 assert(gl_shader_stage_is_compute(nir->info.stage));
 420                 si_declare_compute_memory(ctx);
 421         }
 422         ac_nir_translate(&ctx->ac, &ctx->abi, &ctx->args, nir);
 423
 424         return true;
 425 }