src/gallium/drivers/radeonsi/si_shader_llvm.c

   1 /*
   2  * Copyright 2016 Advanced Micro Devices, Inc.
   3  * All Rights Reserved.
   4  *
   5  * Permission is hereby granted, free of charge, to any person obtaining a
   6  * copy of this software and associated documentation files (the "Software"),
   7  * to deal in the Software without restriction, including without limitation
   8  * on the rights to use, copy, modify, merge, publish, distribute, sub
   9  * license, and/or sell copies of the Software, and to permit persons to whom
  10  * the Software is furnished to do so, subject to the following conditions:
  11  *
  12  * The above copyright notice and this permission notice (including the next
  13  * paragraph) shall be included in all copies or substantial portions of the
  14  * Software.
  15  *
  16  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  17  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  18  * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
  19  * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
  20  * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
  21  * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
  22  * USE OR OTHER DEALINGS IN THE SOFTWARE.
  23  */
  24
  25 #include "si_shader_internal.h"
  26 #include "si_pipe.h"
  27 #include "ac_rtld.h"
  28 #include "ac_nir_to_llvm.h"
  29 #include "sid.h"
  30
  31 #include "tgsi/tgsi_from_mesa.h"
  32 #include "util/u_memory.h"
  33
  34 struct si_llvm_diagnostics {
  35         struct pipe_debug_callback *debug;
  36         unsigned retval;
  37 };
  38
  39 static void si_diagnostic_handler(LLVMDiagnosticInfoRef di, void *context)
  40 {
  41         struct si_llvm_diagnostics *diag = (struct si_llvm_diagnostics *)context;
  42         LLVMDiagnosticSeverity severity = LLVMGetDiagInfoSeverity(di);
  43         const char *severity_str = NULL;
  44
  45         switch (severity) {
  46         case LLVMDSError:
  47                 severity_str = "error";
  48                 break;
  49         case LLVMDSWarning:
  50                 severity_str = "warning";
  51                 break;
  52         case LLVMDSRemark:
  53         case LLVMDSNote:
  54         default:
  55                 return;
  56         }
  57
  58         char *description = LLVMGetDiagInfoDescription(di);
  59
  60         pipe_debug_message(diag->debug, SHADER_INFO,
  61                            "LLVM diagnostic (%s): %s", severity_str, description);
  62
  63         if (severity == LLVMDSError) {
  64                 diag->retval = 1;
  65                 fprintf(stderr,"LLVM triggered Diagnostic Handler: %s\n", description);
  66         }
  67
  68         LLVMDisposeMessage(description);
  69 }
  70
  71 int si_compile_llvm(struct si_screen *sscreen,
  72                     struct si_shader_binary *binary,
  73                     struct ac_shader_config *conf,
  74                     struct ac_llvm_compiler *compiler,
  75                     struct ac_llvm_context *ac,
  76                     struct pipe_debug_callback *debug,
  77                     enum pipe_shader_type shader_type,
  78                     const char *name,
  79                     bool less_optimized)
  80 {
  81         unsigned count = p_atomic_inc_return(&sscreen->num_compilations);
  82
  83         if (si_can_dump_shader(sscreen, shader_type)) {
  84                 fprintf(stderr, "radeonsi: Compiling shader %d\n", count);
  85
  86                 if (!(sscreen->debug_flags & (DBG(NO_IR) | DBG(PREOPT_IR)))) {
  87                         fprintf(stderr, "%s LLVM IR:\n\n", name);
  88                         ac_dump_module(ac->module);
  89                         fprintf(stderr, "\n");
  90                 }
  91         }
  92
  93         if (sscreen->record_llvm_ir) {
  94                 char *ir = LLVMPrintModuleToString(ac->module);
  95                 binary->llvm_ir_string = strdup(ir);
  96                 LLVMDisposeMessage(ir);
  97         }
  98
  99         if (!si_replace_shader(count, binary)) {
 100                 struct ac_compiler_passes *passes = compiler->passes;
 101
 102                 if (ac->wave_size == 32)
 103                         passes = compiler->passes_wave32;
 104                 else if (less_optimized && compiler->low_opt_passes)
 105                         passes = compiler->low_opt_passes;
 106
 107                 struct si_llvm_diagnostics diag = {debug};
 108                 LLVMContextSetDiagnosticHandler(ac->context, si_diagnostic_handler, &diag);
 109
 110                 if (!ac_compile_module_to_elf(passes, ac->module,
 111                                               (char **)&binary->elf_buffer,
 112                                               &binary->elf_size))
 113                         diag.retval = 1;
 114
 115                 if (diag.retval != 0) {
 116                         pipe_debug_message(debug, SHADER_INFO, "LLVM compilation failed");
 117                         return diag.retval;
 118                 }
 119         }
 120
 121         struct ac_rtld_binary rtld;
 122         if (!ac_rtld_open(&rtld, (struct ac_rtld_open_info){
 123                         .info = &sscreen->info,
 124                         .shader_type = tgsi_processor_to_shader_stage(shader_type),
 125                         .wave_size = ac->wave_size,
 126                         .num_parts = 1,
 127                         .elf_ptrs = &binary->elf_buffer,
 128                         .elf_sizes = &binary->elf_size }))
 129                 return -1;
 130
 131         bool ok = ac_rtld_read_config(&rtld, conf);
 132         ac_rtld_close(&rtld);
 133         if (!ok)
 134                 return -1;
 135
 136         /* Enable 64-bit and 16-bit denormals, because there is no performance
 137          * cost.
 138          *
 139          * If denormals are enabled, all floating-point output modifiers are
 140          * ignored.
 141          *
 142          * Don't enable denormals for 32-bit floats, because:
 143          * - Floating-point output modifiers would be ignored by the hw.
 144          * - Some opcodes don't support denormals, such as v_mad_f32. We would
 145          *   have to stop using those.
 146          * - GFX6 & GFX7 would be very slow.
 147          */
 148         conf->float_mode |= V_00B028_FP_64_DENORMS;
 149
 150         return 0;
 151 }
 152
 153 void si_llvm_context_init(struct si_shader_context *ctx,
 154                           struct si_screen *sscreen,
 155                           struct ac_llvm_compiler *compiler,
 156                           unsigned wave_size)
 157 {
 158         memset(ctx, 0, sizeof(*ctx));
 159         ctx->screen = sscreen;
 160         ctx->compiler = compiler;
 161
 162         ac_llvm_context_init(&ctx->ac, compiler, sscreen->info.chip_class,
 163                              sscreen->info.family,
 164                              AC_FLOAT_MODE_NO_SIGNED_ZEROS_FP_MATH,
 165                              wave_size, 64);
 166 }
 167
 168 void si_llvm_create_func(struct si_shader_context *ctx, const char *name,
 169                          LLVMTypeRef *return_types, unsigned num_return_elems,
 170                          unsigned max_workgroup_size)
 171 {
 172         LLVMTypeRef ret_type;
 173         enum ac_llvm_calling_convention call_conv;
 174         enum pipe_shader_type real_shader_type;
 175
 176         if (num_return_elems)
 177                 ret_type = LLVMStructTypeInContext(ctx->ac.context,
 178                                                    return_types,
 179                                                    num_return_elems, true);
 180         else
 181                 ret_type = ctx->ac.voidt;
 182
 183         real_shader_type = ctx->type;
 184
 185         /* LS is merged into HS (TCS), and ES is merged into GS. */
 186         if (ctx->screen->info.chip_class >= GFX9) {
 187                 if (ctx->shader->key.as_ls)
 188                         real_shader_type = PIPE_SHADER_TESS_CTRL;
 189                 else if (ctx->shader->key.as_es || ctx->shader->key.as_ngg)
 190                         real_shader_type = PIPE_SHADER_GEOMETRY;
 191         }
 192
 193         switch (real_shader_type) {
 194         case PIPE_SHADER_VERTEX:
 195         case PIPE_SHADER_TESS_EVAL:
 196                 call_conv = AC_LLVM_AMDGPU_VS;
 197                 break;
 198         case PIPE_SHADER_TESS_CTRL:
 199                 call_conv = AC_LLVM_AMDGPU_HS;
 200                 break;
 201         case PIPE_SHADER_GEOMETRY:
 202                 call_conv = AC_LLVM_AMDGPU_GS;
 203                 break;
 204         case PIPE_SHADER_FRAGMENT:
 205                 call_conv = AC_LLVM_AMDGPU_PS;
 206                 break;
 207         case PIPE_SHADER_COMPUTE:
 208                 call_conv = AC_LLVM_AMDGPU_CS;
 209                 break;
 210         default:
 211                 unreachable("Unhandle shader type");
 212         }
 213
 214         /* Setup the function */
 215         ctx->return_type = ret_type;
 216         ctx->main_fn = ac_build_main(&ctx->args, &ctx->ac, call_conv, name,
 217                                      ret_type, ctx->ac.module);
 218         ctx->return_value = LLVMGetUndef(ctx->return_type);
 219
 220         if (ctx->screen->info.address32_hi) {
 221                 ac_llvm_add_target_dep_function_attr(ctx->main_fn,
 222                                                      "amdgpu-32bit-address-high-bits",
 223                                                      ctx->screen->info.address32_hi);
 224         }
 225
 226         LLVMAddTargetDependentFunctionAttr(ctx->main_fn,
 227                                            "no-signed-zeros-fp-math",
 228                                            "true");
 229
 230         ac_llvm_set_workgroup_size(ctx->main_fn, max_workgroup_size);
 231 }
 232
 233 void si_llvm_optimize_module(struct si_shader_context *ctx)
 234 {
 235         /* Dump LLVM IR before any optimization passes */
 236         if (ctx->screen->debug_flags & DBG(PREOPT_IR) &&
 237             si_can_dump_shader(ctx->screen, ctx->type))
 238                 LLVMDumpModule(ctx->ac.module);
 239
 240         /* Run the pass */
 241         LLVMRunPassManager(ctx->compiler->passmgr, ctx->ac.module);
 242         LLVMDisposeBuilder(ctx->ac.builder);
 243 }
 244
 245 void si_llvm_dispose(struct si_shader_context *ctx)
 246 {
 247         LLVMDisposeModule(ctx->ac.module);
 248         LLVMContextDispose(ctx->ac.context);
 249         ac_llvm_context_dispose(&ctx->ac);
 250 }
 251
 252 /**
 253  * Load a dword from a constant buffer.
 254  */
 255 LLVMValueRef si_buffer_load_const(struct si_shader_context *ctx,
 256                                   LLVMValueRef resource, LLVMValueRef offset)
 257 {
 258         return ac_build_buffer_load(&ctx->ac, resource, 1, NULL, offset, NULL,
 259                                     0, 0, true, true);
 260 }
 261
 262 void si_llvm_build_ret(struct si_shader_context *ctx, LLVMValueRef ret)
 263 {
 264         if (LLVMGetTypeKind(LLVMTypeOf(ret)) == LLVMVoidTypeKind)
 265                 LLVMBuildRetVoid(ctx->ac.builder);
 266         else
 267                 LLVMBuildRet(ctx->ac.builder, ret);
 268 }
 269
 270 LLVMValueRef si_insert_input_ret(struct si_shader_context *ctx, LLVMValueRef ret,
 271                                  struct ac_arg param, unsigned return_index)
 272 {
 273         return LLVMBuildInsertValue(ctx->ac.builder, ret,
 274                                     ac_get_arg(&ctx->ac, param),
 275                                     return_index, "");
 276 }
 277
 278 LLVMValueRef si_insert_input_ret_float(struct si_shader_context *ctx, LLVMValueRef ret,
 279                                        struct ac_arg param, unsigned return_index)
 280 {
 281         LLVMBuilderRef builder = ctx->ac.builder;
 282         LLVMValueRef p = ac_get_arg(&ctx->ac, param);
 283
 284         return LLVMBuildInsertValue(builder, ret,
 285                                     ac_to_float(&ctx->ac, p),
 286                                     return_index, "");
 287 }
 288
 289 LLVMValueRef si_insert_input_ptr(struct si_shader_context *ctx, LLVMValueRef ret,
 290                                  struct ac_arg param, unsigned return_index)
 291 {
 292         LLVMBuilderRef builder = ctx->ac.builder;
 293         LLVMValueRef ptr = ac_get_arg(&ctx->ac, param);
 294         ptr = LLVMBuildPtrToInt(builder, ptr, ctx->ac.i32, "");
 295         return LLVMBuildInsertValue(builder, ret, ptr, return_index, "");
 296 }
 297
 298 LLVMValueRef si_prolog_get_rw_buffers(struct si_shader_context *ctx)
 299 {
 300         LLVMValueRef ptr[2], list;
 301         bool merged_shader = si_is_merged_shader(ctx);
 302
 303         ptr[0] = LLVMGetParam(ctx->main_fn, (merged_shader ? 8 : 0) + SI_SGPR_RW_BUFFERS);
 304         list = LLVMBuildIntToPtr(ctx->ac.builder, ptr[0],
 305                                  ac_array_in_const32_addr_space(ctx->ac.v4i32), "");
 306         return list;
 307 }
 308
 309 LLVMValueRef si_build_gather_64bit(struct si_shader_context *ctx,
 310                                    LLVMTypeRef type, LLVMValueRef val1,
 311                                    LLVMValueRef val2)
 312 {
 313         LLVMValueRef values[2] = {
 314                 ac_to_integer(&ctx->ac, val1),
 315                 ac_to_integer(&ctx->ac, val2),
 316         };
 317         LLVMValueRef result = ac_build_gather_values(&ctx->ac, values, 2);
 318         return LLVMBuildBitCast(ctx->ac.builder, result, type, "");
 319 }
 320
 321 void si_llvm_emit_barrier(struct si_shader_context *ctx)
 322 {
 323         /* GFX6 only (thanks to a hw bug workaround):
 324          * The real barrier instruction isn’t needed, because an entire patch
 325          * always fits into a single wave.
 326          */
 327         if (ctx->screen->info.chip_class == GFX6 &&
 328             ctx->type == PIPE_SHADER_TESS_CTRL) {
 329                 ac_build_waitcnt(&ctx->ac, AC_WAIT_LGKM | AC_WAIT_VLOAD | AC_WAIT_VSTORE);
 330                 return;
 331         }
 332
 333         ac_build_s_barrier(&ctx->ac);
 334 }
 335
 336 /* Ensure that the esgs ring is declared.
 337  *
 338  * We declare it with 64KB alignment as a hint that the
 339  * pointer value will always be 0.
 340  */
 341 void si_llvm_declare_esgs_ring(struct si_shader_context *ctx)
 342 {
 343         if (ctx->esgs_ring)
 344                 return;
 345
 346         assert(!LLVMGetNamedGlobal(ctx->ac.module, "esgs_ring"));
 347
 348         ctx->esgs_ring = LLVMAddGlobalInAddressSpace(
 349                 ctx->ac.module, LLVMArrayType(ctx->ac.i32, 0),
 350                 "esgs_ring",
 351                 AC_ADDR_SPACE_LDS);
 352         LLVMSetLinkage(ctx->esgs_ring, LLVMExternalLinkage);
 353         LLVMSetAlignment(ctx->esgs_ring, 64 * 1024);
 354 }
 355
 356 void si_init_exec_from_input(struct si_shader_context *ctx, struct ac_arg param,
 357                              unsigned bitoffset)
 358 {
 359         LLVMValueRef args[] = {
 360                 ac_get_arg(&ctx->ac, param),
 361                 LLVMConstInt(ctx->ac.i32, bitoffset, 0),
 362         };
 363         ac_build_intrinsic(&ctx->ac,
 364                            "llvm.amdgcn.init.exec.from.input",
 365                            ctx->ac.voidt, args, 2, AC_FUNC_ATTR_CONVERGENT);
 366 }
 367
 368 /**
 369  * Get the value of a shader input parameter and extract a bitfield.
 370  */
 371 static LLVMValueRef unpack_llvm_param(struct si_shader_context *ctx,
 372                                       LLVMValueRef value, unsigned rshift,
 373                                       unsigned bitwidth)
 374 {
 375         if (LLVMGetTypeKind(LLVMTypeOf(value)) == LLVMFloatTypeKind)
 376                 value = ac_to_integer(&ctx->ac, value);
 377
 378         if (rshift)
 379                 value = LLVMBuildLShr(ctx->ac.builder, value,
 380                                       LLVMConstInt(ctx->ac.i32, rshift, 0), "");
 381
 382         if (rshift + bitwidth < 32) {
 383                 unsigned mask = (1 << bitwidth) - 1;
 384                 value = LLVMBuildAnd(ctx->ac.builder, value,
 385                                      LLVMConstInt(ctx->ac.i32, mask, 0), "");
 386         }
 387
 388         return value;
 389 }
 390
 391 LLVMValueRef si_unpack_param(struct si_shader_context *ctx,
 392                              struct ac_arg param, unsigned rshift,
 393                              unsigned bitwidth)
 394 {
 395         LLVMValueRef value = ac_get_arg(&ctx->ac, param);
 396
 397         return unpack_llvm_param(ctx, value, rshift, bitwidth);
 398 }
 399
 400 LLVMValueRef si_get_primitive_id(struct si_shader_context *ctx,
 401                                  unsigned swizzle)
 402 {
 403         if (swizzle > 0)
 404                 return ctx->ac.i32_0;
 405
 406         switch (ctx->type) {
 407         case PIPE_SHADER_VERTEX:
 408                 return ac_get_arg(&ctx->ac, ctx->vs_prim_id);
 409         case PIPE_SHADER_TESS_CTRL:
 410                 return ac_get_arg(&ctx->ac, ctx->args.tcs_patch_id);
 411         case PIPE_SHADER_TESS_EVAL:
 412                 return ac_get_arg(&ctx->ac, ctx->args.tes_patch_id);
 413         case PIPE_SHADER_GEOMETRY:
 414                 return ac_get_arg(&ctx->ac, ctx->args.gs_prim_id);
 415         default:
 416                 assert(0);
 417                 return ctx->ac.i32_0;
 418         }
 419 }
 420
 421 LLVMValueRef si_llvm_get_block_size(struct ac_shader_abi *abi)
 422 {
 423         struct si_shader_context *ctx = si_shader_context_from_abi(abi);
 424
 425         LLVMValueRef values[3];
 426         LLVMValueRef result;
 427         unsigned i;
 428         unsigned *properties = ctx->shader->selector->info.properties;
 429
 430         if (properties[TGSI_PROPERTY_CS_FIXED_BLOCK_WIDTH] != 0) {
 431                 unsigned sizes[3] = {
 432                         properties[TGSI_PROPERTY_CS_FIXED_BLOCK_WIDTH],
 433                         properties[TGSI_PROPERTY_CS_FIXED_BLOCK_HEIGHT],
 434                         properties[TGSI_PROPERTY_CS_FIXED_BLOCK_DEPTH]
 435                 };
 436
 437                 for (i = 0; i < 3; ++i)
 438                         values[i] = LLVMConstInt(ctx->ac.i32, sizes[i], 0);
 439
 440                 result = ac_build_gather_values(&ctx->ac, values, 3);
 441         } else {
 442                 result = ac_get_arg(&ctx->ac, ctx->block_size);
 443         }
 444
 445         return result;
 446 }
 447
 448 void si_llvm_declare_compute_memory(struct si_shader_context *ctx)
 449 {
 450         struct si_shader_selector *sel = ctx->shader->selector;
 451         unsigned lds_size = sel->info.properties[TGSI_PROPERTY_CS_LOCAL_SIZE];
 452
 453         LLVMTypeRef i8p = LLVMPointerType(ctx->ac.i8, AC_ADDR_SPACE_LDS);
 454         LLVMValueRef var;
 455
 456         assert(!ctx->ac.lds);
 457
 458         var = LLVMAddGlobalInAddressSpace(ctx->ac.module,
 459                                           LLVMArrayType(ctx->ac.i8, lds_size),
 460                                           "compute_lds",
 461                                           AC_ADDR_SPACE_LDS);
 462         LLVMSetAlignment(var, 64 * 1024);
 463
 464         ctx->ac.lds = LLVMBuildBitCast(ctx->ac.builder, var, i8p, "");
 465 }
 466
 467 bool si_nir_build_llvm(struct si_shader_context *ctx, struct nir_shader *nir)
 468 {
 469         if (nir->info.stage == MESA_SHADER_VERTEX) {
 470                 si_llvm_load_vs_inputs(ctx, nir);
 471         } else if (nir->info.stage == MESA_SHADER_FRAGMENT) {
 472                 unsigned colors_read =
 473                         ctx->shader->selector->info.colors_read;
 474                 LLVMValueRef main_fn = ctx->main_fn;
 475
 476                 LLVMValueRef undef = LLVMGetUndef(ctx->ac.f32);
 477
 478                 unsigned offset = SI_PARAM_POS_FIXED_PT + 1;
 479
 480                 if (colors_read & 0x0f) {
 481                         unsigned mask = colors_read & 0x0f;
 482                         LLVMValueRef values[4];
 483                         values[0] = mask & 0x1 ? LLVMGetParam(main_fn, offset++) : undef;
 484                         values[1] = mask & 0x2 ? LLVMGetParam(main_fn, offset++) : undef;
 485                         values[2] = mask & 0x4 ? LLVMGetParam(main_fn, offset++) : undef;
 486                         values[3] = mask & 0x8 ? LLVMGetParam(main_fn, offset++) : undef;
 487                         ctx->abi.color0 =
 488                                 ac_to_integer(&ctx->ac,
 489                                               ac_build_gather_values(&ctx->ac, values, 4));
 490                 }
 491                 if (colors_read & 0xf0) {
 492                         unsigned mask = (colors_read & 0xf0) >> 4;
 493                         LLVMValueRef values[4];
 494                         values[0] = mask & 0x1 ? LLVMGetParam(main_fn, offset++) : undef;
 495                         values[1] = mask & 0x2 ? LLVMGetParam(main_fn, offset++) : undef;
 496                         values[2] = mask & 0x4 ? LLVMGetParam(main_fn, offset++) : undef;
 497                         values[3] = mask & 0x8 ? LLVMGetParam(main_fn, offset++) : undef;
 498                         ctx->abi.color1 =
 499                                 ac_to_integer(&ctx->ac,
 500                                               ac_build_gather_values(&ctx->ac, values, 4));
 501                 }
 502
 503                 ctx->abi.interp_at_sample_force_center =
 504                         ctx->shader->key.mono.u.ps.interpolate_at_sample_force_center;
 505         } else if (nir->info.stage == MESA_SHADER_COMPUTE) {
 506                 if (nir->info.cs.user_data_components_amd) {
 507                         ctx->abi.user_data = ac_get_arg(&ctx->ac, ctx->cs_user_data);
 508                         ctx->abi.user_data = ac_build_expand_to_vec4(&ctx->ac, ctx->abi.user_data,
 509                                                                      nir->info.cs.user_data_components_amd);
 510                 }
 511         }
 512
 513         ctx->abi.inputs = &ctx->inputs[0];
 514         ctx->abi.clamp_shadow_reference = true;
 515         ctx->abi.robust_buffer_access = true;
 516
 517         if (ctx->shader->selector->info.properties[TGSI_PROPERTY_CS_LOCAL_SIZE]) {
 518                 assert(gl_shader_stage_is_compute(nir->info.stage));
 519                 si_llvm_declare_compute_memory(ctx);
 520         }
 521         ac_nir_translate(&ctx->ac, &ctx->abi, &ctx->args, nir);
 522
 523         return true;
 524 }
 525
 526 /**
 527  * Given a list of shader part functions, build a wrapper function that
 528  * runs them in sequence to form a monolithic shader.
 529  */
 530 void si_build_wrapper_function(struct si_shader_context *ctx, LLVMValueRef *parts,
 531                                unsigned num_parts, unsigned main_part,
 532                                unsigned next_shader_first_part)
 533 {
 534         LLVMBuilderRef builder = ctx->ac.builder;
 535         /* PS epilog has one arg per color component; gfx9 merged shader
 536          * prologs need to forward 40 SGPRs.
 537          */
 538         LLVMValueRef initial[AC_MAX_ARGS], out[AC_MAX_ARGS];
 539         LLVMTypeRef function_type;
 540         unsigned num_first_params;
 541         unsigned num_out, initial_num_out;
 542         ASSERTED unsigned num_out_sgpr; /* used in debug checks */
 543         ASSERTED unsigned initial_num_out_sgpr; /* used in debug checks */
 544         unsigned num_sgprs, num_vgprs;
 545         unsigned gprs;
 546
 547         memset(&ctx->args, 0, sizeof(ctx->args));
 548
 549         for (unsigned i = 0; i < num_parts; ++i) {
 550                 ac_add_function_attr(ctx->ac.context, parts[i], -1,
 551                                      AC_FUNC_ATTR_ALWAYSINLINE);
 552                 LLVMSetLinkage(parts[i], LLVMPrivateLinkage);
 553         }
 554
 555         /* The parameters of the wrapper function correspond to those of the
 556          * first part in terms of SGPRs and VGPRs, but we use the types of the
 557          * main part to get the right types. This is relevant for the
 558          * dereferenceable attribute on descriptor table pointers.
 559          */
 560         num_sgprs = 0;
 561         num_vgprs = 0;
 562
 563         function_type = LLVMGetElementType(LLVMTypeOf(parts[0]));
 564         num_first_params = LLVMCountParamTypes(function_type);
 565
 566         for (unsigned i = 0; i < num_first_params; ++i) {
 567                 LLVMValueRef param = LLVMGetParam(parts[0], i);
 568
 569                 if (ac_is_sgpr_param(param)) {
 570                         assert(num_vgprs == 0);
 571                         num_sgprs += ac_get_type_size(LLVMTypeOf(param)) / 4;
 572                 } else {
 573                         num_vgprs += ac_get_type_size(LLVMTypeOf(param)) / 4;
 574                 }
 575         }
 576
 577         gprs = 0;
 578         while (gprs < num_sgprs + num_vgprs) {
 579                 LLVMValueRef param = LLVMGetParam(parts[main_part], ctx->args.arg_count);
 580                 LLVMTypeRef type = LLVMTypeOf(param);
 581                 unsigned size = ac_get_type_size(type) / 4;
 582
 583                 /* This is going to get casted anyways, so we don't have to
 584                  * have the exact same type. But we do have to preserve the
 585                  * pointer-ness so that LLVM knows about it.
 586                  */
 587                 enum ac_arg_type arg_type = AC_ARG_INT;
 588                 if (LLVMGetTypeKind(type) == LLVMPointerTypeKind) {
 589                         type = LLVMGetElementType(type);
 590
 591                         if (LLVMGetTypeKind(type) == LLVMVectorTypeKind) {
 592                                 if (LLVMGetVectorSize(type) == 4)
 593                                         arg_type = AC_ARG_CONST_DESC_PTR;
 594                                 else if (LLVMGetVectorSize(type) == 8)
 595                                         arg_type = AC_ARG_CONST_IMAGE_PTR;
 596                                 else
 597                                         assert(0);
 598                         } else if (type == ctx->ac.f32) {
 599                                 arg_type = AC_ARG_CONST_FLOAT_PTR;
 600                         } else {
 601                                 assert(0);
 602                         }
 603                 }
 604
 605                 ac_add_arg(&ctx->args, gprs < num_sgprs ? AC_ARG_SGPR : AC_ARG_VGPR,
 606                            size, arg_type, NULL);
 607
 608                 assert(ac_is_sgpr_param(param) == (gprs < num_sgprs));
 609                 assert(gprs + size <= num_sgprs + num_vgprs &&
 610                        (gprs >= num_sgprs || gprs + size <= num_sgprs));
 611
 612                 gprs += size;
 613         }
 614
 615         /* Prepare the return type. */
 616         unsigned num_returns = 0;
 617         LLVMTypeRef returns[AC_MAX_ARGS], last_func_type, return_type;
 618
 619         last_func_type = LLVMGetElementType(LLVMTypeOf(parts[num_parts - 1]));
 620         return_type = LLVMGetReturnType(last_func_type);
 621
 622         switch (LLVMGetTypeKind(return_type)) {
 623         case LLVMStructTypeKind:
 624                 num_returns = LLVMCountStructElementTypes(return_type);
 625                 assert(num_returns <= ARRAY_SIZE(returns));
 626                 LLVMGetStructElementTypes(return_type, returns);
 627                 break;
 628         case LLVMVoidTypeKind:
 629                 break;
 630         default:
 631                 unreachable("unexpected type");
 632         }
 633
 634         si_llvm_create_func(ctx, "wrapper", returns, num_returns,
 635                             si_get_max_workgroup_size(ctx->shader));
 636
 637         if (si_is_merged_shader(ctx))
 638                 ac_init_exec_full_mask(&ctx->ac);
 639
 640         /* Record the arguments of the function as if they were an output of
 641          * a previous part.
 642          */
 643         num_out = 0;
 644         num_out_sgpr = 0;
 645
 646         for (unsigned i = 0; i < ctx->args.arg_count; ++i) {
 647                 LLVMValueRef param = LLVMGetParam(ctx->main_fn, i);
 648                 LLVMTypeRef param_type = LLVMTypeOf(param);
 649                 LLVMTypeRef out_type = ctx->args.args[i].file == AC_ARG_SGPR ? ctx->ac.i32 : ctx->ac.f32;
 650                 unsigned size = ac_get_type_size(param_type) / 4;
 651
 652                 if (size == 1) {
 653                         if (LLVMGetTypeKind(param_type) == LLVMPointerTypeKind) {
 654                                 param = LLVMBuildPtrToInt(builder, param, ctx->ac.i32, "");
 655                                 param_type = ctx->ac.i32;
 656                         }
 657
 658                         if (param_type != out_type)
 659                                 param = LLVMBuildBitCast(builder, param, out_type, "");
 660                         out[num_out++] = param;
 661                 } else {
 662                         LLVMTypeRef vector_type = LLVMVectorType(out_type, size);
 663
 664                         if (LLVMGetTypeKind(param_type) == LLVMPointerTypeKind) {
 665                                 param = LLVMBuildPtrToInt(builder, param, ctx->ac.i64, "");
 666                                 param_type = ctx->ac.i64;
 667                         }
 668
 669                         if (param_type != vector_type)
 670                                 param = LLVMBuildBitCast(builder, param, vector_type, "");
 671
 672                         for (unsigned j = 0; j < size; ++j)
 673                                 out[num_out++] = LLVMBuildExtractElement(
 674                                         builder, param, LLVMConstInt(ctx->ac.i32, j, 0), "");
 675                 }
 676
 677                 if (ctx->args.args[i].file == AC_ARG_SGPR)
 678                         num_out_sgpr = num_out;
 679         }
 680
 681         memcpy(initial, out, sizeof(out));
 682         initial_num_out = num_out;
 683         initial_num_out_sgpr = num_out_sgpr;
 684
 685         /* Now chain the parts. */
 686         LLVMValueRef ret = NULL;
 687         for (unsigned part = 0; part < num_parts; ++part) {
 688                 LLVMValueRef in[AC_MAX_ARGS];
 689                 LLVMTypeRef ret_type;
 690                 unsigned out_idx = 0;
 691                 unsigned num_params = LLVMCountParams(parts[part]);
 692
 693                 /* Merged shaders are executed conditionally depending
 694                  * on the number of enabled threads passed in the input SGPRs. */
 695                 if (si_is_multi_part_shader(ctx) && part == 0) {
 696                         LLVMValueRef ena, count = initial[3];
 697
 698                         count = LLVMBuildAnd(builder, count,
 699                                              LLVMConstInt(ctx->ac.i32, 0x7f, 0), "");
 700                         ena = LLVMBuildICmp(builder, LLVMIntULT,
 701                                             ac_get_thread_id(&ctx->ac), count, "");
 702                         ac_build_ifcc(&ctx->ac, ena, 6506);
 703                 }
 704
 705                 /* Derive arguments for the next part from outputs of the
 706                  * previous one.
 707                  */
 708                 for (unsigned param_idx = 0; param_idx < num_params; ++param_idx) {
 709                         LLVMValueRef param;
 710                         LLVMTypeRef param_type;
 711                         bool is_sgpr;
 712                         unsigned param_size;
 713                         LLVMValueRef arg = NULL;
 714
 715                         param = LLVMGetParam(parts[part], param_idx);
 716                         param_type = LLVMTypeOf(param);
 717                         param_size = ac_get_type_size(param_type) / 4;
 718                         is_sgpr = ac_is_sgpr_param(param);
 719
 720                         if (is_sgpr) {
 721                                 ac_add_function_attr(ctx->ac.context, parts[part],
 722                                                      param_idx + 1, AC_FUNC_ATTR_INREG);
 723                         } else if (out_idx < num_out_sgpr) {
 724                                 /* Skip returned SGPRs the current part doesn't
 725                                  * declare on the input. */
 726                                 out_idx = num_out_sgpr;
 727                         }
 728
 729                         assert(out_idx + param_size <= (is_sgpr ? num_out_sgpr : num_out));
 730
 731                         if (param_size == 1)
 732                                 arg = out[out_idx];
 733                         else
 734                                 arg = ac_build_gather_values(&ctx->ac, &out[out_idx], param_size);
 735
 736                         if (LLVMTypeOf(arg) != param_type) {
 737                                 if (LLVMGetTypeKind(param_type) == LLVMPointerTypeKind) {
 738                                         if (LLVMGetPointerAddressSpace(param_type) ==
 739                                             AC_ADDR_SPACE_CONST_32BIT) {
 740                                                 arg = LLVMBuildBitCast(builder, arg, ctx->ac.i32, "");
 741                                                 arg = LLVMBuildIntToPtr(builder, arg, param_type, "");
 742                                         } else {
 743                                                 arg = LLVMBuildBitCast(builder, arg, ctx->ac.i64, "");
 744                                                 arg = LLVMBuildIntToPtr(builder, arg, param_type, "");
 745                                         }
 746                                 } else {
 747                                         arg = LLVMBuildBitCast(builder, arg, param_type, "");
 748                                 }
 749                         }
 750
 751                         in[param_idx] = arg;
 752                         out_idx += param_size;
 753                 }
 754
 755                 ret = ac_build_call(&ctx->ac, parts[part], in, num_params);
 756
 757                 if (si_is_multi_part_shader(ctx) &&
 758                     part + 1 == next_shader_first_part) {
 759                         ac_build_endif(&ctx->ac, 6506);
 760
 761                         /* The second half of the merged shader should use
 762                          * the inputs from the toplevel (wrapper) function,
 763                          * not the return value from the last call.
 764                          *
 765                          * That's because the last call was executed condi-
 766                          * tionally, so we can't consume it in the main
 767                          * block.
 768                          */
 769                         memcpy(out, initial, sizeof(initial));
 770                         num_out = initial_num_out;
 771                         num_out_sgpr = initial_num_out_sgpr;
 772                         continue;
 773                 }
 774
 775                 /* Extract the returned GPRs. */
 776                 ret_type = LLVMTypeOf(ret);
 777                 num_out = 0;
 778                 num_out_sgpr = 0;
 779
 780                 if (LLVMGetTypeKind(ret_type) != LLVMVoidTypeKind) {
 781                         assert(LLVMGetTypeKind(ret_type) == LLVMStructTypeKind);
 782
 783                         unsigned ret_size = LLVMCountStructElementTypes(ret_type);
 784
 785                         for (unsigned i = 0; i < ret_size; ++i) {
 786                                 LLVMValueRef val =
 787                                         LLVMBuildExtractValue(builder, ret, i, "");
 788
 789                                 assert(num_out < ARRAY_SIZE(out));
 790                                 out[num_out++] = val;
 791
 792                                 if (LLVMTypeOf(val) == ctx->ac.i32) {
 793                                         assert(num_out_sgpr + 1 == num_out);
 794                                         num_out_sgpr = num_out;
 795                                 }
 796                         }
 797                 }
 798         }
 799
 800         /* Return the value from the last part. */
 801         if (LLVMGetTypeKind(LLVMTypeOf(ret)) == LLVMVoidTypeKind)
 802                 LLVMBuildRetVoid(builder);
 803         else
 804                 LLVMBuildRet(builder, ret);
 805 }