src/gallium/drivers/radeonsi/si_shader_llvm.c

   1 /*
   2  * Copyright 2016 Advanced Micro Devices, Inc.
   3  * All Rights Reserved.
   4  *
   5  * Permission is hereby granted, free of charge, to any person obtaining a
   6  * copy of this software and associated documentation files (the "Software"),
   7  * to deal in the Software without restriction, including without limitation
   8  * on the rights to use, copy, modify, merge, publish, distribute, sub
   9  * license, and/or sell copies of the Software, and to permit persons to whom
  10  * the Software is furnished to do so, subject to the following conditions:
  11  *
  12  * The above copyright notice and this permission notice (including the next
  13  * paragraph) shall be included in all copies or substantial portions of the
  14  * Software.
  15  *
  16  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  17  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  18  * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
  19  * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
  20  * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
  21  * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
  22  * USE OR OTHER DEALINGS IN THE SOFTWARE.
  23  */
  24
  25 #include "ac_nir_to_llvm.h"
  26 #include "ac_rtld.h"
  27 #include "si_pipe.h"
  28 #include "si_shader_internal.h"
  29 #include "sid.h"
  30 #include "tgsi/tgsi_from_mesa.h"
  31 #include "util/u_memory.h"
  32
  33 struct si_llvm_diagnostics {
  34    struct pipe_debug_callback *debug;
  35    unsigned retval;
  36 };
  37
  38 static void si_diagnostic_handler(LLVMDiagnosticInfoRef di, void *context)
  39 {
  40    struct si_llvm_diagnostics *diag = (struct si_llvm_diagnostics *)context;
  41    LLVMDiagnosticSeverity severity = LLVMGetDiagInfoSeverity(di);
  42    const char *severity_str = NULL;
  43
  44    switch (severity) {
  45    case LLVMDSError:
  46       severity_str = "error";
  47       break;
  48    case LLVMDSWarning:
  49       severity_str = "warning";
  50       break;
  51    case LLVMDSRemark:
  52    case LLVMDSNote:
  53    default:
  54       return;
  55    }
  56
  57    char *description = LLVMGetDiagInfoDescription(di);
  58
  59    pipe_debug_message(diag->debug, SHADER_INFO, "LLVM diagnostic (%s): %s", severity_str,
  60                       description);
  61
  62    if (severity == LLVMDSError) {
  63       diag->retval = 1;
  64       fprintf(stderr, "LLVM triggered Diagnostic Handler: %s\n", description);
  65    }
  66
  67    LLVMDisposeMessage(description);
  68 }
  69
  70 bool si_compile_llvm(struct si_screen *sscreen, struct si_shader_binary *binary,
  71                      struct ac_shader_config *conf, struct ac_llvm_compiler *compiler,
  72                      struct ac_llvm_context *ac, struct pipe_debug_callback *debug,
  73                      enum pipe_shader_type shader_type, const char *name, bool less_optimized)
  74 {
  75    unsigned count = p_atomic_inc_return(&sscreen->num_compilations);
  76
  77    if (si_can_dump_shader(sscreen, shader_type)) {
  78       fprintf(stderr, "radeonsi: Compiling shader %d\n", count);
  79
  80       if (!(sscreen->debug_flags & (DBG(NO_IR) | DBG(PREOPT_IR)))) {
  81          fprintf(stderr, "%s LLVM IR:\n\n", name);
  82          ac_dump_module(ac->module);
  83          fprintf(stderr, "\n");
  84       }
  85    }
  86
  87    if (sscreen->record_llvm_ir) {
  88       char *ir = LLVMPrintModuleToString(ac->module);
  89       binary->llvm_ir_string = strdup(ir);
  90       LLVMDisposeMessage(ir);
  91    }
  92
  93    if (!si_replace_shader(count, binary)) {
  94       struct ac_compiler_passes *passes = compiler->passes;
  95
  96       if (ac->wave_size == 32)
  97          passes = compiler->passes_wave32;
  98       else if (less_optimized && compiler->low_opt_passes)
  99          passes = compiler->low_opt_passes;
 100
 101       struct si_llvm_diagnostics diag = {debug};
 102       LLVMContextSetDiagnosticHandler(ac->context, si_diagnostic_handler, &diag);
 103
 104       if (!ac_compile_module_to_elf(passes, ac->module, (char **)&binary->elf_buffer,
 105                                     &binary->elf_size))
 106          diag.retval = 1;
 107
 108       if (diag.retval != 0) {
 109          pipe_debug_message(debug, SHADER_INFO, "LLVM compilation failed");
 110          return false;
 111       }
 112    }
 113
 114    struct ac_rtld_binary rtld;
 115    if (!ac_rtld_open(&rtld, (struct ac_rtld_open_info){
 116                                .info = &sscreen->info,
 117                                .shader_type = tgsi_processor_to_shader_stage(shader_type),
 118                                .wave_size = ac->wave_size,
 119                                .num_parts = 1,
 120                                .elf_ptrs = &binary->elf_buffer,
 121                                .elf_sizes = &binary->elf_size}))
 122       return false;
 123
 124    bool ok = ac_rtld_read_config(&sscreen->info, &rtld, conf);
 125    ac_rtld_close(&rtld);
 126    return ok;
 127 }
 128
 129 void si_llvm_context_init(struct si_shader_context *ctx, struct si_screen *sscreen,
 130                           struct ac_llvm_compiler *compiler, unsigned wave_size)
 131 {
 132    memset(ctx, 0, sizeof(*ctx));
 133    ctx->screen = sscreen;
 134    ctx->compiler = compiler;
 135
 136    ac_llvm_context_init(&ctx->ac, compiler, sscreen->info.chip_class, sscreen->info.family,
 137                         AC_FLOAT_MODE_DEFAULT_OPENGL, wave_size, 64);
 138 }
 139
 140 void si_llvm_create_func(struct si_shader_context *ctx, const char *name, LLVMTypeRef *return_types,
 141                          unsigned num_return_elems, unsigned max_workgroup_size)
 142 {
 143    LLVMTypeRef ret_type;
 144    enum ac_llvm_calling_convention call_conv;
 145    enum pipe_shader_type real_shader_type;
 146
 147    if (num_return_elems)
 148       ret_type = LLVMStructTypeInContext(ctx->ac.context, return_types, num_return_elems, true);
 149    else
 150       ret_type = ctx->ac.voidt;
 151
 152    real_shader_type = ctx->type;
 153
 154    /* LS is merged into HS (TCS), and ES is merged into GS. */
 155    if (ctx->screen->info.chip_class >= GFX9) {
 156       if (ctx->shader->key.as_ls)
 157          real_shader_type = PIPE_SHADER_TESS_CTRL;
 158       else if (ctx->shader->key.as_es || ctx->shader->key.as_ngg)
 159          real_shader_type = PIPE_SHADER_GEOMETRY;
 160    }
 161
 162    switch (real_shader_type) {
 163    case PIPE_SHADER_VERTEX:
 164    case PIPE_SHADER_TESS_EVAL:
 165       call_conv = AC_LLVM_AMDGPU_VS;
 166       break;
 167    case PIPE_SHADER_TESS_CTRL:
 168       call_conv = AC_LLVM_AMDGPU_HS;
 169       break;
 170    case PIPE_SHADER_GEOMETRY:
 171       call_conv = AC_LLVM_AMDGPU_GS;
 172       break;
 173    case PIPE_SHADER_FRAGMENT:
 174       call_conv = AC_LLVM_AMDGPU_PS;
 175       break;
 176    case PIPE_SHADER_COMPUTE:
 177       call_conv = AC_LLVM_AMDGPU_CS;
 178       break;
 179    default:
 180       unreachable("Unhandle shader type");
 181    }
 182
 183    /* Setup the function */
 184    ctx->return_type = ret_type;
 185    ctx->main_fn = ac_build_main(&ctx->args, &ctx->ac, call_conv, name, ret_type, ctx->ac.module);
 186    ctx->return_value = LLVMGetUndef(ctx->return_type);
 187
 188    if (ctx->screen->info.address32_hi) {
 189       ac_llvm_add_target_dep_function_attr(ctx->main_fn, "amdgpu-32bit-address-high-bits",
 190                                            ctx->screen->info.address32_hi);
 191    }
 192
 193    LLVMAddTargetDependentFunctionAttr(ctx->main_fn, "no-signed-zeros-fp-math", "true");
 194
 195    ac_llvm_set_workgroup_size(ctx->main_fn, max_workgroup_size);
 196 }
 197
 198 void si_llvm_optimize_module(struct si_shader_context *ctx)
 199 {
 200    /* Dump LLVM IR before any optimization passes */
 201    if (ctx->screen->debug_flags & DBG(PREOPT_IR) && si_can_dump_shader(ctx->screen, ctx->type))
 202       LLVMDumpModule(ctx->ac.module);
 203
 204    /* Run the pass */
 205    LLVMRunPassManager(ctx->compiler->passmgr, ctx->ac.module);
 206    LLVMDisposeBuilder(ctx->ac.builder);
 207 }
 208
 209 void si_llvm_dispose(struct si_shader_context *ctx)
 210 {
 211    LLVMDisposeModule(ctx->ac.module);
 212    LLVMContextDispose(ctx->ac.context);
 213    ac_llvm_context_dispose(&ctx->ac);
 214 }
 215
 216 /**
 217  * Load a dword from a constant buffer.
 218  */
 219 LLVMValueRef si_buffer_load_const(struct si_shader_context *ctx, LLVMValueRef resource,
 220                                   LLVMValueRef offset)
 221 {
 222    return ac_build_buffer_load(&ctx->ac, resource, 1, NULL, offset, NULL, 0, 0, true, true);
 223 }
 224
 225 void si_llvm_build_ret(struct si_shader_context *ctx, LLVMValueRef ret)
 226 {
 227    if (LLVMGetTypeKind(LLVMTypeOf(ret)) == LLVMVoidTypeKind)
 228       LLVMBuildRetVoid(ctx->ac.builder);
 229    else
 230       LLVMBuildRet(ctx->ac.builder, ret);
 231 }
 232
 233 LLVMValueRef si_insert_input_ret(struct si_shader_context *ctx, LLVMValueRef ret,
 234                                  struct ac_arg param, unsigned return_index)
 235 {
 236    return LLVMBuildInsertValue(ctx->ac.builder, ret, ac_get_arg(&ctx->ac, param), return_index, "");
 237 }
 238
 239 LLVMValueRef si_insert_input_ret_float(struct si_shader_context *ctx, LLVMValueRef ret,
 240                                        struct ac_arg param, unsigned return_index)
 241 {
 242    LLVMBuilderRef builder = ctx->ac.builder;
 243    LLVMValueRef p = ac_get_arg(&ctx->ac, param);
 244
 245    return LLVMBuildInsertValue(builder, ret, ac_to_float(&ctx->ac, p), return_index, "");
 246 }
 247
 248 LLVMValueRef si_insert_input_ptr(struct si_shader_context *ctx, LLVMValueRef ret,
 249                                  struct ac_arg param, unsigned return_index)
 250 {
 251    LLVMBuilderRef builder = ctx->ac.builder;
 252    LLVMValueRef ptr = ac_get_arg(&ctx->ac, param);
 253    ptr = LLVMBuildPtrToInt(builder, ptr, ctx->ac.i32, "");
 254    return LLVMBuildInsertValue(builder, ret, ptr, return_index, "");
 255 }
 256
 257 LLVMValueRef si_prolog_get_rw_buffers(struct si_shader_context *ctx)
 258 {
 259    LLVMValueRef ptr[2], list;
 260    bool merged_shader = si_is_merged_shader(ctx->shader);
 261
 262    ptr[0] = LLVMGetParam(ctx->main_fn, (merged_shader ? 8 : 0) + SI_SGPR_RW_BUFFERS);
 263    list =
 264       LLVMBuildIntToPtr(ctx->ac.builder, ptr[0], ac_array_in_const32_addr_space(ctx->ac.v4i32), "");
 265    return list;
 266 }
 267
 268 LLVMValueRef si_build_gather_64bit(struct si_shader_context *ctx, LLVMTypeRef type,
 269                                    LLVMValueRef val1, LLVMValueRef val2)
 270 {
 271    LLVMValueRef values[2] = {
 272       ac_to_integer(&ctx->ac, val1),
 273       ac_to_integer(&ctx->ac, val2),
 274    };
 275    LLVMValueRef result = ac_build_gather_values(&ctx->ac, values, 2);
 276    return LLVMBuildBitCast(ctx->ac.builder, result, type, "");
 277 }
 278
 279 void si_llvm_emit_barrier(struct si_shader_context *ctx)
 280 {
 281    /* GFX6 only (thanks to a hw bug workaround):
 282     * The real barrier instruction isn’t needed, because an entire patch
 283     * always fits into a single wave.
 284     */
 285    if (ctx->screen->info.chip_class == GFX6 && ctx->type == PIPE_SHADER_TESS_CTRL) {
 286       ac_build_waitcnt(&ctx->ac, AC_WAIT_LGKM | AC_WAIT_VLOAD | AC_WAIT_VSTORE);
 287       return;
 288    }
 289
 290    ac_build_s_barrier(&ctx->ac);
 291 }
 292
 293 /* Ensure that the esgs ring is declared.
 294  *
 295  * We declare it with 64KB alignment as a hint that the
 296  * pointer value will always be 0.
 297  */
 298 void si_llvm_declare_esgs_ring(struct si_shader_context *ctx)
 299 {
 300    if (ctx->esgs_ring)
 301       return;
 302
 303    assert(!LLVMGetNamedGlobal(ctx->ac.module, "esgs_ring"));
 304
 305    ctx->esgs_ring = LLVMAddGlobalInAddressSpace(ctx->ac.module, LLVMArrayType(ctx->ac.i32, 0),
 306                                                 "esgs_ring", AC_ADDR_SPACE_LDS);
 307    LLVMSetLinkage(ctx->esgs_ring, LLVMExternalLinkage);
 308    LLVMSetAlignment(ctx->esgs_ring, 64 * 1024);
 309 }
 310
 311 void si_init_exec_from_input(struct si_shader_context *ctx, struct ac_arg param, unsigned bitoffset)
 312 {
 313    LLVMValueRef args[] = {
 314       ac_get_arg(&ctx->ac, param),
 315       LLVMConstInt(ctx->ac.i32, bitoffset, 0),
 316    };
 317    ac_build_intrinsic(&ctx->ac, "llvm.amdgcn.init.exec.from.input", ctx->ac.voidt, args, 2,
 318                       AC_FUNC_ATTR_CONVERGENT);
 319 }
 320
 321 /**
 322  * Get the value of a shader input parameter and extract a bitfield.
 323  */
 324 static LLVMValueRef unpack_llvm_param(struct si_shader_context *ctx, LLVMValueRef value,
 325                                       unsigned rshift, unsigned bitwidth)
 326 {
 327    if (LLVMGetTypeKind(LLVMTypeOf(value)) == LLVMFloatTypeKind)
 328       value = ac_to_integer(&ctx->ac, value);
 329
 330    if (rshift)
 331       value = LLVMBuildLShr(ctx->ac.builder, value, LLVMConstInt(ctx->ac.i32, rshift, 0), "");
 332
 333    if (rshift + bitwidth < 32) {
 334       unsigned mask = (1 << bitwidth) - 1;
 335       value = LLVMBuildAnd(ctx->ac.builder, value, LLVMConstInt(ctx->ac.i32, mask, 0), "");
 336    }
 337
 338    return value;
 339 }
 340
 341 LLVMValueRef si_unpack_param(struct si_shader_context *ctx, struct ac_arg param, unsigned rshift,
 342                              unsigned bitwidth)
 343 {
 344    LLVMValueRef value = ac_get_arg(&ctx->ac, param);
 345
 346    return unpack_llvm_param(ctx, value, rshift, bitwidth);
 347 }
 348
 349 LLVMValueRef si_get_primitive_id(struct si_shader_context *ctx, unsigned swizzle)
 350 {
 351    if (swizzle > 0)
 352       return ctx->ac.i32_0;
 353
 354    switch (ctx->type) {
 355    case PIPE_SHADER_VERTEX:
 356       return ac_get_arg(&ctx->ac, ctx->vs_prim_id);
 357    case PIPE_SHADER_TESS_CTRL:
 358       return ac_get_arg(&ctx->ac, ctx->args.tcs_patch_id);
 359    case PIPE_SHADER_TESS_EVAL:
 360       return ac_get_arg(&ctx->ac, ctx->args.tes_patch_id);
 361    case PIPE_SHADER_GEOMETRY:
 362       return ac_get_arg(&ctx->ac, ctx->args.gs_prim_id);
 363    default:
 364       assert(0);
 365       return ctx->ac.i32_0;
 366    }
 367 }
 368
 369 LLVMValueRef si_llvm_get_block_size(struct ac_shader_abi *abi)
 370 {
 371    struct si_shader_context *ctx = si_shader_context_from_abi(abi);
 372
 373    LLVMValueRef values[3];
 374    LLVMValueRef result;
 375    unsigned i;
 376    unsigned *properties = ctx->shader->selector->info.properties;
 377
 378    if (properties[TGSI_PROPERTY_CS_FIXED_BLOCK_WIDTH] != 0) {
 379       unsigned sizes[3] = {properties[TGSI_PROPERTY_CS_FIXED_BLOCK_WIDTH],
 380                            properties[TGSI_PROPERTY_CS_FIXED_BLOCK_HEIGHT],
 381                            properties[TGSI_PROPERTY_CS_FIXED_BLOCK_DEPTH]};
 382
 383       for (i = 0; i < 3; ++i)
 384          values[i] = LLVMConstInt(ctx->ac.i32, sizes[i], 0);
 385
 386       result = ac_build_gather_values(&ctx->ac, values, 3);
 387    } else {
 388       result = ac_get_arg(&ctx->ac, ctx->block_size);
 389    }
 390
 391    return result;
 392 }
 393
 394 void si_llvm_declare_compute_memory(struct si_shader_context *ctx)
 395 {
 396    struct si_shader_selector *sel = ctx->shader->selector;
 397    unsigned lds_size = sel->info.properties[TGSI_PROPERTY_CS_LOCAL_SIZE];
 398
 399    LLVMTypeRef i8p = LLVMPointerType(ctx->ac.i8, AC_ADDR_SPACE_LDS);
 400    LLVMValueRef var;
 401
 402    assert(!ctx->ac.lds);
 403
 404    var = LLVMAddGlobalInAddressSpace(ctx->ac.module, LLVMArrayType(ctx->ac.i8, lds_size),
 405                                      "compute_lds", AC_ADDR_SPACE_LDS);
 406    LLVMSetAlignment(var, 64 * 1024);
 407
 408    ctx->ac.lds = LLVMBuildBitCast(ctx->ac.builder, var, i8p, "");
 409 }
 410
 411 bool si_nir_build_llvm(struct si_shader_context *ctx, struct nir_shader *nir)
 412 {
 413    if (nir->info.stage == MESA_SHADER_VERTEX) {
 414       si_llvm_load_vs_inputs(ctx, nir);
 415    } else if (nir->info.stage == MESA_SHADER_FRAGMENT) {
 416       unsigned colors_read = ctx->shader->selector->info.colors_read;
 417       LLVMValueRef main_fn = ctx->main_fn;
 418
 419       LLVMValueRef undef = LLVMGetUndef(ctx->ac.f32);
 420
 421       unsigned offset = SI_PARAM_POS_FIXED_PT + 1;
 422
 423       if (colors_read & 0x0f) {
 424          unsigned mask = colors_read & 0x0f;
 425          LLVMValueRef values[4];
 426          values[0] = mask & 0x1 ? LLVMGetParam(main_fn, offset++) : undef;
 427          values[1] = mask & 0x2 ? LLVMGetParam(main_fn, offset++) : undef;
 428          values[2] = mask & 0x4 ? LLVMGetParam(main_fn, offset++) : undef;
 429          values[3] = mask & 0x8 ? LLVMGetParam(main_fn, offset++) : undef;
 430          ctx->abi.color0 = ac_to_integer(&ctx->ac, ac_build_gather_values(&ctx->ac, values, 4));
 431       }
 432       if (colors_read & 0xf0) {
 433          unsigned mask = (colors_read & 0xf0) >> 4;
 434          LLVMValueRef values[4];
 435          values[0] = mask & 0x1 ? LLVMGetParam(main_fn, offset++) : undef;
 436          values[1] = mask & 0x2 ? LLVMGetParam(main_fn, offset++) : undef;
 437          values[2] = mask & 0x4 ? LLVMGetParam(main_fn, offset++) : undef;
 438          values[3] = mask & 0x8 ? LLVMGetParam(main_fn, offset++) : undef;
 439          ctx->abi.color1 = ac_to_integer(&ctx->ac, ac_build_gather_values(&ctx->ac, values, 4));
 440       }
 441
 442       ctx->abi.interp_at_sample_force_center =
 443          ctx->shader->key.mono.u.ps.interpolate_at_sample_force_center;
 444
 445       ctx->abi.kill_ps_if_inf_interp =
 446          (ctx->screen->debug_flags & DBG(KILL_PS_INF_INTERP)) &&
 447          (ctx->shader->selector->info.uses_persp_center ||
 448           ctx->shader->selector->info.uses_persp_centroid ||
 449           ctx->shader->selector->info.uses_persp_sample);
 450
 451    } else if (nir->info.stage == MESA_SHADER_COMPUTE) {
 452       if (nir->info.cs.user_data_components_amd) {
 453          ctx->abi.user_data = ac_get_arg(&ctx->ac, ctx->cs_user_data);
 454          ctx->abi.user_data = ac_build_expand_to_vec4(&ctx->ac, ctx->abi.user_data,
 455                                                       nir->info.cs.user_data_components_amd);
 456       }
 457    }
 458
 459    ctx->abi.inputs = &ctx->inputs[0];
 460    ctx->abi.clamp_shadow_reference = true;
 461    ctx->abi.robust_buffer_access = true;
 462    ctx->abi.convert_undef_to_zero = true;
 463    ctx->abi.clamp_div_by_zero = ctx->screen->options.clamp_div_by_zero;
 464
 465    if (ctx->shader->selector->info.properties[TGSI_PROPERTY_CS_LOCAL_SIZE]) {
 466       assert(gl_shader_stage_is_compute(nir->info.stage));
 467       si_llvm_declare_compute_memory(ctx);
 468    }
 469
 470    const struct si_shader_info *info = &ctx->shader->selector->info;
 471    for (unsigned i = 0; i < info->num_outputs; i++) {
 472       for (unsigned j = 0; j < 4; j++)
 473          ctx->abi.outputs[i * 4 + j] = ac_build_alloca_undef(&ctx->ac, ctx->ac.f32, "");
 474    }
 475
 476    ac_nir_translate(&ctx->ac, &ctx->abi, &ctx->args, nir);
 477
 478    return true;
 479 }
 480
 481 /**
 482  * Given a list of shader part functions, build a wrapper function that
 483  * runs them in sequence to form a monolithic shader.
 484  */
 485 void si_build_wrapper_function(struct si_shader_context *ctx, LLVMValueRef *parts,
 486                                unsigned num_parts, unsigned main_part,
 487                                unsigned next_shader_first_part)
 488 {
 489    LLVMBuilderRef builder = ctx->ac.builder;
 490    /* PS epilog has one arg per color component; gfx9 merged shader
 491     * prologs need to forward 40 SGPRs.
 492     */
 493    LLVMValueRef initial[AC_MAX_ARGS], out[AC_MAX_ARGS];
 494    LLVMTypeRef function_type;
 495    unsigned num_first_params;
 496    unsigned num_out, initial_num_out;
 497    ASSERTED unsigned num_out_sgpr;         /* used in debug checks */
 498    ASSERTED unsigned initial_num_out_sgpr; /* used in debug checks */
 499    unsigned num_sgprs, num_vgprs;
 500    unsigned gprs;
 501
 502    memset(&ctx->args, 0, sizeof(ctx->args));
 503
 504    for (unsigned i = 0; i < num_parts; ++i) {
 505       ac_add_function_attr(ctx->ac.context, parts[i], -1, AC_FUNC_ATTR_ALWAYSINLINE);
 506       LLVMSetLinkage(parts[i], LLVMPrivateLinkage);
 507    }
 508
 509    /* The parameters of the wrapper function correspond to those of the
 510     * first part in terms of SGPRs and VGPRs, but we use the types of the
 511     * main part to get the right types. This is relevant for the
 512     * dereferenceable attribute on descriptor table pointers.
 513     */
 514    num_sgprs = 0;
 515    num_vgprs = 0;
 516
 517    function_type = LLVMGetElementType(LLVMTypeOf(parts[0]));
 518    num_first_params = LLVMCountParamTypes(function_type);
 519
 520    for (unsigned i = 0; i < num_first_params; ++i) {
 521       LLVMValueRef param = LLVMGetParam(parts[0], i);
 522
 523       if (ac_is_sgpr_param(param)) {
 524          assert(num_vgprs == 0);
 525          num_sgprs += ac_get_type_size(LLVMTypeOf(param)) / 4;
 526       } else {
 527          num_vgprs += ac_get_type_size(LLVMTypeOf(param)) / 4;
 528       }
 529    }
 530
 531    gprs = 0;
 532    while (gprs < num_sgprs + num_vgprs) {
 533       LLVMValueRef param = LLVMGetParam(parts[main_part], ctx->args.arg_count);
 534       LLVMTypeRef type = LLVMTypeOf(param);
 535       unsigned size = ac_get_type_size(type) / 4;
 536
 537       /* This is going to get casted anyways, so we don't have to
 538        * have the exact same type. But we do have to preserve the
 539        * pointer-ness so that LLVM knows about it.
 540        */
 541       enum ac_arg_type arg_type = AC_ARG_INT;
 542       if (LLVMGetTypeKind(type) == LLVMPointerTypeKind) {
 543          type = LLVMGetElementType(type);
 544
 545          if (LLVMGetTypeKind(type) == LLVMVectorTypeKind) {
 546             if (LLVMGetVectorSize(type) == 4)
 547                arg_type = AC_ARG_CONST_DESC_PTR;
 548             else if (LLVMGetVectorSize(type) == 8)
 549                arg_type = AC_ARG_CONST_IMAGE_PTR;
 550             else
 551                assert(0);
 552          } else if (type == ctx->ac.f32) {
 553             arg_type = AC_ARG_CONST_FLOAT_PTR;
 554          } else {
 555             assert(0);
 556          }
 557       }
 558
 559       ac_add_arg(&ctx->args, gprs < num_sgprs ? AC_ARG_SGPR : AC_ARG_VGPR, size, arg_type, NULL);
 560
 561       assert(ac_is_sgpr_param(param) == (gprs < num_sgprs));
 562       assert(gprs + size <= num_sgprs + num_vgprs &&
 563              (gprs >= num_sgprs || gprs + size <= num_sgprs));
 564
 565       gprs += size;
 566    }
 567
 568    /* Prepare the return type. */
 569    unsigned num_returns = 0;
 570    LLVMTypeRef returns[AC_MAX_ARGS], last_func_type, return_type;
 571
 572    last_func_type = LLVMGetElementType(LLVMTypeOf(parts[num_parts - 1]));
 573    return_type = LLVMGetReturnType(last_func_type);
 574
 575    switch (LLVMGetTypeKind(return_type)) {
 576    case LLVMStructTypeKind:
 577       num_returns = LLVMCountStructElementTypes(return_type);
 578       assert(num_returns <= ARRAY_SIZE(returns));
 579       LLVMGetStructElementTypes(return_type, returns);
 580       break;
 581    case LLVMVoidTypeKind:
 582       break;
 583    default:
 584       unreachable("unexpected type");
 585    }
 586
 587    si_llvm_create_func(ctx, "wrapper", returns, num_returns,
 588                        si_get_max_workgroup_size(ctx->shader));
 589
 590    if (si_is_merged_shader(ctx->shader))
 591       ac_init_exec_full_mask(&ctx->ac);
 592
 593    /* Record the arguments of the function as if they were an output of
 594     * a previous part.
 595     */
 596    num_out = 0;
 597    num_out_sgpr = 0;
 598
 599    for (unsigned i = 0; i < ctx->args.arg_count; ++i) {
 600       LLVMValueRef param = LLVMGetParam(ctx->main_fn, i);
 601       LLVMTypeRef param_type = LLVMTypeOf(param);
 602       LLVMTypeRef out_type = ctx->args.args[i].file == AC_ARG_SGPR ? ctx->ac.i32 : ctx->ac.f32;
 603       unsigned size = ac_get_type_size(param_type) / 4;
 604
 605       if (size == 1) {
 606          if (LLVMGetTypeKind(param_type) == LLVMPointerTypeKind) {
 607             param = LLVMBuildPtrToInt(builder, param, ctx->ac.i32, "");
 608             param_type = ctx->ac.i32;
 609          }
 610
 611          if (param_type != out_type)
 612             param = LLVMBuildBitCast(builder, param, out_type, "");
 613          out[num_out++] = param;
 614       } else {
 615          LLVMTypeRef vector_type = LLVMVectorType(out_type, size);
 616
 617          if (LLVMGetTypeKind(param_type) == LLVMPointerTypeKind) {
 618             param = LLVMBuildPtrToInt(builder, param, ctx->ac.i64, "");
 619             param_type = ctx->ac.i64;
 620          }
 621
 622          if (param_type != vector_type)
 623             param = LLVMBuildBitCast(builder, param, vector_type, "");
 624
 625          for (unsigned j = 0; j < size; ++j)
 626             out[num_out++] =
 627                LLVMBuildExtractElement(builder, param, LLVMConstInt(ctx->ac.i32, j, 0), "");
 628       }
 629
 630       if (ctx->args.args[i].file == AC_ARG_SGPR)
 631          num_out_sgpr = num_out;
 632    }
 633
 634    memcpy(initial, out, sizeof(out));
 635    initial_num_out = num_out;
 636    initial_num_out_sgpr = num_out_sgpr;
 637
 638    /* Now chain the parts. */
 639    LLVMValueRef ret = NULL;
 640    for (unsigned part = 0; part < num_parts; ++part) {
 641       LLVMValueRef in[AC_MAX_ARGS];
 642       LLVMTypeRef ret_type;
 643       unsigned out_idx = 0;
 644       unsigned num_params = LLVMCountParams(parts[part]);
 645
 646       /* Merged shaders are executed conditionally depending
 647        * on the number of enabled threads passed in the input SGPRs. */
 648       if (si_is_multi_part_shader(ctx->shader) && part == 0) {
 649          LLVMValueRef ena, count = initial[3];
 650
 651          count = LLVMBuildAnd(builder, count, LLVMConstInt(ctx->ac.i32, 0x7f, 0), "");
 652          ena = LLVMBuildICmp(builder, LLVMIntULT, ac_get_thread_id(&ctx->ac), count, "");
 653          ac_build_ifcc(&ctx->ac, ena, 6506);
 654       }
 655
 656       /* Derive arguments for the next part from outputs of the
 657        * previous one.
 658        */
 659       for (unsigned param_idx = 0; param_idx < num_params; ++param_idx) {
 660          LLVMValueRef param;
 661          LLVMTypeRef param_type;
 662          bool is_sgpr;
 663          unsigned param_size;
 664          LLVMValueRef arg = NULL;
 665
 666          param = LLVMGetParam(parts[part], param_idx);
 667          param_type = LLVMTypeOf(param);
 668          param_size = ac_get_type_size(param_type) / 4;
 669          is_sgpr = ac_is_sgpr_param(param);
 670
 671          if (is_sgpr) {
 672             ac_add_function_attr(ctx->ac.context, parts[part], param_idx + 1, AC_FUNC_ATTR_INREG);
 673          } else if (out_idx < num_out_sgpr) {
 674             /* Skip returned SGPRs the current part doesn't
 675              * declare on the input. */
 676             out_idx = num_out_sgpr;
 677          }
 678
 679          assert(out_idx + param_size <= (is_sgpr ? num_out_sgpr : num_out));
 680
 681          if (param_size == 1)
 682             arg = out[out_idx];
 683          else
 684             arg = ac_build_gather_values(&ctx->ac, &out[out_idx], param_size);
 685
 686          if (LLVMTypeOf(arg) != param_type) {
 687             if (LLVMGetTypeKind(param_type) == LLVMPointerTypeKind) {
 688                if (LLVMGetPointerAddressSpace(param_type) == AC_ADDR_SPACE_CONST_32BIT) {
 689                   arg = LLVMBuildBitCast(builder, arg, ctx->ac.i32, "");
 690                   arg = LLVMBuildIntToPtr(builder, arg, param_type, "");
 691                } else {
 692                   arg = LLVMBuildBitCast(builder, arg, ctx->ac.i64, "");
 693                   arg = LLVMBuildIntToPtr(builder, arg, param_type, "");
 694                }
 695             } else {
 696                arg = LLVMBuildBitCast(builder, arg, param_type, "");
 697             }
 698          }
 699
 700          in[param_idx] = arg;
 701          out_idx += param_size;
 702       }
 703
 704       ret = ac_build_call(&ctx->ac, parts[part], in, num_params);
 705
 706       if (si_is_multi_part_shader(ctx->shader) && part + 1 == next_shader_first_part) {
 707          ac_build_endif(&ctx->ac, 6506);
 708
 709          /* The second half of the merged shader should use
 710           * the inputs from the toplevel (wrapper) function,
 711           * not the return value from the last call.
 712           *
 713           * That's because the last call was executed condi-
 714           * tionally, so we can't consume it in the main
 715           * block.
 716           */
 717          memcpy(out, initial, sizeof(initial));
 718          num_out = initial_num_out;
 719          num_out_sgpr = initial_num_out_sgpr;
 720          continue;
 721       }
 722
 723       /* Extract the returned GPRs. */
 724       ret_type = LLVMTypeOf(ret);
 725       num_out = 0;
 726       num_out_sgpr = 0;
 727
 728       if (LLVMGetTypeKind(ret_type) != LLVMVoidTypeKind) {
 729          assert(LLVMGetTypeKind(ret_type) == LLVMStructTypeKind);
 730
 731          unsigned ret_size = LLVMCountStructElementTypes(ret_type);
 732
 733          for (unsigned i = 0; i < ret_size; ++i) {
 734             LLVMValueRef val = LLVMBuildExtractValue(builder, ret, i, "");
 735
 736             assert(num_out < ARRAY_SIZE(out));
 737             out[num_out++] = val;
 738
 739             if (LLVMTypeOf(val) == ctx->ac.i32) {
 740                assert(num_out_sgpr + 1 == num_out);
 741                num_out_sgpr = num_out;
 742             }
 743          }
 744       }
 745    }
 746
 747    /* Return the value from the last part. */
 748    if (LLVMGetTypeKind(LLVMTypeOf(ret)) == LLVMVoidTypeKind)
 749       LLVMBuildRetVoid(builder);
 750    else
 751       LLVMBuildRet(builder, ret);
 752 }