src/gallium/drivers/radeonsi/si_shader_tgsi_setup.c

   1 /*
   2  * Copyright 2016 Advanced Micro Devices, Inc.
   3  *
   4  * Permission is hereby granted, free of charge, to any person obtaining a
   5  * copy of this software and associated documentation files (the "Software"),
   6  * to deal in the Software without restriction, including without limitation
   7  * on the rights to use, copy, modify, merge, publish, distribute, sub
   8  * license, and/or sell copies of the Software, and to permit persons to whom
   9  * the Software is furnished to do so, subject to the following conditions:
  10  *
  11  * The above copyright notice and this permission notice (including the next
  12  * paragraph) shall be included in all copies or substantial portions of the
  13  * Software.
  14  *
  15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  17  * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
  18  * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
  19  * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
  20  * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
  21  * USE OR OTHER DEALINGS IN THE SOFTWARE.
  22  */
  23
  24 #include "si_shader_internal.h"
  25 #include "si_pipe.h"
  26
  27 #include "gallivm/lp_bld_const.h"
  28 #include "gallivm/lp_bld_gather.h"
  29 #include "gallivm/lp_bld_flow.h"
  30 #include "gallivm/lp_bld_init.h"
  31 #include "gallivm/lp_bld_intr.h"
  32 #include "gallivm/lp_bld_misc.h"
  33 #include "gallivm/lp_bld_swizzle.h"
  34 #include "tgsi/tgsi_info.h"
  35 #include "tgsi/tgsi_parse.h"
  36 #include "util/u_math.h"
  37 #include "util/u_memory.h"
  38 #include "util/u_debug.h"
  39
  40 #include <stdio.h>
  41 #include <llvm-c/Transforms/IPO.h>
  42 #include <llvm-c/Transforms/Scalar.h>
  43 #include <llvm-c/Support.h>
  44
  45 /* Data for if/else/endif and bgnloop/endloop control flow structures.
  46  */
  47 struct si_llvm_flow {
  48         /* Loop exit or next part of if/else/endif. */
  49         LLVMBasicBlockRef next_block;
  50         LLVMBasicBlockRef loop_entry_block;
  51 };
  52
  53 enum si_llvm_calling_convention {
  54         RADEON_LLVM_AMDGPU_VS = 87,
  55         RADEON_LLVM_AMDGPU_GS = 88,
  56         RADEON_LLVM_AMDGPU_PS = 89,
  57         RADEON_LLVM_AMDGPU_CS = 90,
  58         RADEON_LLVM_AMDGPU_HS = 93,
  59 };
  60
  61 void si_llvm_add_attribute(LLVMValueRef F, const char *name, int value)
  62 {
  63         char str[16];
  64
  65         snprintf(str, sizeof(str), "%i", value);
  66         LLVMAddTargetDependentFunctionAttr(F, name, str);
  67 }
  68
  69 static void init_amdgpu_target()
  70 {
  71         gallivm_init_llvm_targets();
  72         LLVMInitializeAMDGPUTargetInfo();
  73         LLVMInitializeAMDGPUTarget();
  74         LLVMInitializeAMDGPUTargetMC();
  75         LLVMInitializeAMDGPUAsmPrinter();
  76
  77         /* For inline assembly. */
  78         LLVMInitializeAMDGPUAsmParser();
  79
  80         if (HAVE_LLVM >= 0x0400) {
  81                 /*
  82                  * Workaround for bug in llvm 4.0 that causes image intrinsics
  83                  * to disappear.
  84                  * https://reviews.llvm.org/D26348
  85                  */
  86                 const char *argv[2] = {"mesa", "-simplifycfg-sink-common=false"};
  87                 LLVMParseCommandLineOptions(2, argv, NULL);
  88         }
  89 }
  90
  91 static once_flag init_amdgpu_target_once_flag = ONCE_FLAG_INIT;
  92
  93 LLVMTargetRef si_llvm_get_amdgpu_target(const char *triple)
  94 {
  95         LLVMTargetRef target = NULL;
  96         char *err_message = NULL;
  97
  98         call_once(&init_amdgpu_target_once_flag, init_amdgpu_target);
  99
 100         if (LLVMGetTargetFromTriple(triple, &target, &err_message)) {
 101                 fprintf(stderr, "Cannot find target for triple %s ", triple);
 102                 if (err_message) {
 103                         fprintf(stderr, "%s\n", err_message);
 104                 }
 105                 LLVMDisposeMessage(err_message);
 106                 return NULL;
 107         }
 108         return target;
 109 }
 110
 111 struct si_llvm_diagnostics {
 112         struct pipe_debug_callback *debug;
 113         unsigned retval;
 114 };
 115
 116 static void si_diagnostic_handler(LLVMDiagnosticInfoRef di, void *context)
 117 {
 118         struct si_llvm_diagnostics *diag = (struct si_llvm_diagnostics *)context;
 119         LLVMDiagnosticSeverity severity = LLVMGetDiagInfoSeverity(di);
 120         char *description = LLVMGetDiagInfoDescription(di);
 121         const char *severity_str = NULL;
 122
 123         switch (severity) {
 124         case LLVMDSError:
 125                 severity_str = "error";
 126                 break;
 127         case LLVMDSWarning:
 128                 severity_str = "warning";
 129                 break;
 130         case LLVMDSRemark:
 131                 severity_str = "remark";
 132                 break;
 133         case LLVMDSNote:
 134                 severity_str = "note";
 135                 break;
 136         default:
 137                 severity_str = "unknown";
 138         }
 139
 140         pipe_debug_message(diag->debug, SHADER_INFO,
 141                            "LLVM diagnostic (%s): %s", severity_str, description);
 142
 143         if (severity == LLVMDSError) {
 144                 diag->retval = 1;
 145                 fprintf(stderr,"LLVM triggered Diagnostic Handler: %s\n", description);
 146         }
 147
 148         LLVMDisposeMessage(description);
 149 }
 150
 151 /**
 152  * Compile an LLVM module to machine code.
 153  *
 154  * @returns 0 for success, 1 for failure
 155  */
 156 unsigned si_llvm_compile(LLVMModuleRef M, struct ac_shader_binary *binary,
 157                          LLVMTargetMachineRef tm,
 158                          struct pipe_debug_callback *debug)
 159 {
 160         struct si_llvm_diagnostics diag;
 161         char *err;
 162         LLVMContextRef llvm_ctx;
 163         LLVMMemoryBufferRef out_buffer;
 164         unsigned buffer_size;
 165         const char *buffer_data;
 166         LLVMBool mem_err;
 167
 168         diag.debug = debug;
 169         diag.retval = 0;
 170
 171         /* Setup Diagnostic Handler*/
 172         llvm_ctx = LLVMGetModuleContext(M);
 173
 174         LLVMContextSetDiagnosticHandler(llvm_ctx, si_diagnostic_handler, &diag);
 175
 176         /* Compile IR*/
 177         mem_err = LLVMTargetMachineEmitToMemoryBuffer(tm, M, LLVMObjectFile, &err,
 178                                                                  &out_buffer);
 179
 180         /* Process Errors/Warnings */
 181         if (mem_err) {
 182                 fprintf(stderr, "%s: %s", __FUNCTION__, err);
 183                 pipe_debug_message(debug, SHADER_INFO,
 184                                    "LLVM emit error: %s", err);
 185                 FREE(err);
 186                 diag.retval = 1;
 187                 goto out;
 188         }
 189
 190         /* Extract Shader Code*/
 191         buffer_size = LLVMGetBufferSize(out_buffer);
 192         buffer_data = LLVMGetBufferStart(out_buffer);
 193
 194         ac_elf_read(buffer_data, buffer_size, binary);
 195
 196         /* Clean up */
 197         LLVMDisposeMemoryBuffer(out_buffer);
 198
 199 out:
 200         if (diag.retval != 0)
 201                 pipe_debug_message(debug, SHADER_INFO, "LLVM compile failed");
 202         return diag.retval;
 203 }
 204
 205 LLVMTypeRef tgsi2llvmtype(struct lp_build_tgsi_context *bld_base,
 206                           enum tgsi_opcode_type type)
 207 {
 208         LLVMContextRef ctx = bld_base->base.gallivm->context;
 209
 210         switch (type) {
 211         case TGSI_TYPE_UNSIGNED:
 212         case TGSI_TYPE_SIGNED:
 213                 return LLVMInt32TypeInContext(ctx);
 214         case TGSI_TYPE_UNSIGNED64:
 215         case TGSI_TYPE_SIGNED64:
 216                 return LLVMInt64TypeInContext(ctx);
 217         case TGSI_TYPE_DOUBLE:
 218                 return LLVMDoubleTypeInContext(ctx);
 219         case TGSI_TYPE_UNTYPED:
 220         case TGSI_TYPE_FLOAT:
 221                 return LLVMFloatTypeInContext(ctx);
 222         default: break;
 223         }
 224         return 0;
 225 }
 226
 227 LLVMValueRef bitcast(struct lp_build_tgsi_context *bld_base,
 228                      enum tgsi_opcode_type type, LLVMValueRef value)
 229 {
 230         LLVMBuilderRef builder = bld_base->base.gallivm->builder;
 231         LLVMTypeRef dst_type = tgsi2llvmtype(bld_base, type);
 232
 233         if (dst_type)
 234                 return LLVMBuildBitCast(builder, value, dst_type, "");
 235         else
 236                 return value;
 237 }
 238
 239 /**
 240  * Return a value that is equal to the given i32 \p index if it lies in [0,num)
 241  * or an undefined value in the same interval otherwise.
 242  */
 243 LLVMValueRef si_llvm_bound_index(struct si_shader_context *ctx,
 244                                  LLVMValueRef index,
 245                                  unsigned num)
 246 {
 247         struct gallivm_state *gallivm = &ctx->gallivm;
 248         LLVMBuilderRef builder = gallivm->builder;
 249         LLVMValueRef c_max = LLVMConstInt(ctx->i32, num - 1, 0);
 250         LLVMValueRef cc;
 251
 252         if (util_is_power_of_two(num)) {
 253                 index = LLVMBuildAnd(builder, index, c_max, "");
 254         } else {
 255                 /* In theory, this MAX pattern should result in code that is
 256                  * as good as the bit-wise AND above.
 257                  *
 258                  * In practice, LLVM generates worse code (at the time of
 259                  * writing), because its value tracking is not strong enough.
 260                  */
 261                 cc = LLVMBuildICmp(builder, LLVMIntULE, index, c_max, "");
 262                 index = LLVMBuildSelect(builder, cc, index, c_max, "");
 263         }
 264
 265         return index;
 266 }
 267
 268 static struct si_llvm_flow *
 269 get_current_flow(struct si_shader_context *ctx)
 270 {
 271         if (ctx->flow_depth > 0)
 272                 return &ctx->flow[ctx->flow_depth - 1];
 273         return NULL;
 274 }
 275
 276 static struct si_llvm_flow *
 277 get_innermost_loop(struct si_shader_context *ctx)
 278 {
 279         for (unsigned i = ctx->flow_depth; i > 0; --i) {
 280                 if (ctx->flow[i - 1].loop_entry_block)
 281                         return &ctx->flow[i - 1];
 282         }
 283         return NULL;
 284 }
 285
 286 static struct si_llvm_flow *
 287 push_flow(struct si_shader_context *ctx)
 288 {
 289         struct si_llvm_flow *flow;
 290
 291         if (ctx->flow_depth >= ctx->flow_depth_max) {
 292                 unsigned new_max = MAX2(ctx->flow_depth << 1, RADEON_LLVM_INITIAL_CF_DEPTH);
 293                 ctx->flow = REALLOC(ctx->flow,
 294                                     ctx->flow_depth_max * sizeof(*ctx->flow),
 295                                     new_max * sizeof(*ctx->flow));
 296                 ctx->flow_depth_max = new_max;
 297         }
 298
 299         flow = &ctx->flow[ctx->flow_depth];
 300         ctx->flow_depth++;
 301
 302         flow->next_block = NULL;
 303         flow->loop_entry_block = NULL;
 304         return flow;
 305 }
 306
 307 static LLVMValueRef emit_swizzle(struct lp_build_tgsi_context *bld_base,
 308                                  LLVMValueRef value,
 309                                  unsigned swizzle_x,
 310                                  unsigned swizzle_y,
 311                                  unsigned swizzle_z,
 312                                  unsigned swizzle_w)
 313 {
 314         LLVMValueRef swizzles[4];
 315         LLVMTypeRef i32t =
 316                 LLVMInt32TypeInContext(bld_base->base.gallivm->context);
 317
 318         swizzles[0] = LLVMConstInt(i32t, swizzle_x, 0);
 319         swizzles[1] = LLVMConstInt(i32t, swizzle_y, 0);
 320         swizzles[2] = LLVMConstInt(i32t, swizzle_z, 0);
 321         swizzles[3] = LLVMConstInt(i32t, swizzle_w, 0);
 322
 323         return LLVMBuildShuffleVector(bld_base->base.gallivm->builder,
 324                                       value,
 325                                       LLVMGetUndef(LLVMTypeOf(value)),
 326                                       LLVMConstVector(swizzles, 4), "");
 327 }
 328
 329 /**
 330  * Return the description of the array covering the given temporary register
 331  * index.
 332  */
 333 static unsigned
 334 get_temp_array_id(struct lp_build_tgsi_context *bld_base,
 335                   unsigned reg_index,
 336                   const struct tgsi_ind_register *reg)
 337 {
 338         struct si_shader_context *ctx = si_shader_context(bld_base);
 339         unsigned num_arrays = ctx->bld_base.info->array_max[TGSI_FILE_TEMPORARY];
 340         unsigned i;
 341
 342         if (reg && reg->ArrayID > 0 && reg->ArrayID <= num_arrays)
 343                 return reg->ArrayID;
 344
 345         for (i = 0; i < num_arrays; i++) {
 346                 const struct tgsi_array_info *array = &ctx->temp_arrays[i];
 347
 348                 if (reg_index >= array->range.First && reg_index <= array->range.Last)
 349                         return i + 1;
 350         }
 351
 352         return 0;
 353 }
 354
 355 static struct tgsi_declaration_range
 356 get_array_range(struct lp_build_tgsi_context *bld_base,
 357                 unsigned File, unsigned reg_index,
 358                 const struct tgsi_ind_register *reg)
 359 {
 360         struct si_shader_context *ctx = si_shader_context(bld_base);
 361         struct tgsi_declaration_range range;
 362
 363         if (File == TGSI_FILE_TEMPORARY) {
 364                 unsigned array_id = get_temp_array_id(bld_base, reg_index, reg);
 365                 if (array_id)
 366                         return ctx->temp_arrays[array_id - 1].range;
 367         }
 368
 369         range.First = 0;
 370         range.Last = bld_base->info->file_max[File];
 371         return range;
 372 }
 373
 374 static LLVMValueRef
 375 emit_array_index(struct si_shader_context *ctx,
 376                  const struct tgsi_ind_register *reg,
 377                  unsigned offset)
 378 {
 379         struct gallivm_state *gallivm = &ctx->gallivm;
 380
 381         if (!reg) {
 382                 return LLVMConstInt(ctx->i32, offset, 0);
 383         }
 384         LLVMValueRef addr = LLVMBuildLoad(gallivm->builder, ctx->addrs[reg->Index][reg->Swizzle], "");
 385         return LLVMBuildAdd(gallivm->builder, addr, LLVMConstInt(ctx->i32, offset, 0), "");
 386 }
 387
 388 /**
 389  * For indirect registers, construct a pointer directly to the requested
 390  * element using getelementptr if possible.
 391  *
 392  * Returns NULL if the insertelement/extractelement fallback for array access
 393  * must be used.
 394  */
 395 static LLVMValueRef
 396 get_pointer_into_array(struct si_shader_context *ctx,
 397                        unsigned file,
 398                        unsigned swizzle,
 399                        unsigned reg_index,
 400                        const struct tgsi_ind_register *reg_indirect)
 401 {
 402         unsigned array_id;
 403         struct tgsi_array_info *array;
 404         struct gallivm_state *gallivm = &ctx->gallivm;
 405         LLVMBuilderRef builder = gallivm->builder;
 406         LLVMValueRef idxs[2];
 407         LLVMValueRef index;
 408         LLVMValueRef alloca;
 409
 410         if (file != TGSI_FILE_TEMPORARY)
 411                 return NULL;
 412
 413         array_id = get_temp_array_id(&ctx->bld_base, reg_index, reg_indirect);
 414         if (!array_id)
 415                 return NULL;
 416
 417         alloca = ctx->temp_array_allocas[array_id - 1];
 418         if (!alloca)
 419                 return NULL;
 420
 421         array = &ctx->temp_arrays[array_id - 1];
 422
 423         if (!(array->writemask & (1 << swizzle)))
 424                 return ctx->undef_alloca;
 425
 426         index = emit_array_index(ctx, reg_indirect,
 427                                  reg_index - ctx->temp_arrays[array_id - 1].range.First);
 428
 429         /* Ensure that the index is within a valid range, to guard against
 430          * VM faults and overwriting critical data (e.g. spilled resource
 431          * descriptors).
 432          *
 433          * TODO It should be possible to avoid the additional instructions
 434          * if LLVM is changed so that it guarantuees:
 435          * 1. the scratch space descriptor isolates the current wave (this
 436          *    could even save the scratch offset SGPR at the cost of an
 437          *    additional SALU instruction)
 438          * 2. the memory for allocas must be allocated at the _end_ of the
 439          *    scratch space (after spilled registers)
 440          */
 441         index = si_llvm_bound_index(ctx, index, array->range.Last - array->range.First + 1);
 442
 443         index = LLVMBuildMul(
 444                 builder, index,
 445                 LLVMConstInt(ctx->i32, util_bitcount(array->writemask), 0),
 446                 "");
 447         index = LLVMBuildAdd(
 448                 builder, index,
 449                 LLVMConstInt(ctx->i32,
 450                              util_bitcount(array->writemask & ((1 << swizzle) - 1)), 0),
 451                 "");
 452         idxs[0] = ctx->i32_0;
 453         idxs[1] = index;
 454         return LLVMBuildGEP(builder, alloca, idxs, 2, "");
 455 }
 456
 457 LLVMValueRef
 458 si_llvm_emit_fetch_64bit(struct lp_build_tgsi_context *bld_base,
 459                          enum tgsi_opcode_type type,
 460                          LLVMValueRef ptr,
 461                          LLVMValueRef ptr2)
 462 {
 463         LLVMBuilderRef builder = bld_base->base.gallivm->builder;
 464         LLVMValueRef result;
 465
 466         result = LLVMGetUndef(LLVMVectorType(LLVMIntTypeInContext(bld_base->base.gallivm->context, 32), bld_base->base.type.length * 2));
 467
 468         result = LLVMBuildInsertElement(builder,
 469                                         result,
 470                                         bitcast(bld_base, TGSI_TYPE_UNSIGNED, ptr),
 471                                         bld_base->int_bld.zero, "");
 472         result = LLVMBuildInsertElement(builder,
 473                                         result,
 474                                         bitcast(bld_base, TGSI_TYPE_UNSIGNED, ptr2),
 475                                         bld_base->int_bld.one, "");
 476         return bitcast(bld_base, type, result);
 477 }
 478
 479 static LLVMValueRef
 480 emit_array_fetch(struct lp_build_tgsi_context *bld_base,
 481                  unsigned File, enum tgsi_opcode_type type,
 482                  struct tgsi_declaration_range range,
 483                  unsigned swizzle)
 484 {
 485         struct si_shader_context *ctx = si_shader_context(bld_base);
 486
 487         LLVMBuilderRef builder = ctx->gallivm.builder;
 488
 489         unsigned i, size = range.Last - range.First + 1;
 490         LLVMTypeRef vec = LLVMVectorType(tgsi2llvmtype(bld_base, type), size);
 491         LLVMValueRef result = LLVMGetUndef(vec);
 492
 493         struct tgsi_full_src_register tmp_reg = {};
 494         tmp_reg.Register.File = File;
 495
 496         for (i = 0; i < size; ++i) {
 497                 tmp_reg.Register.Index = i + range.First;
 498                 LLVMValueRef temp = si_llvm_emit_fetch(bld_base, &tmp_reg, type, swizzle);
 499                 result = LLVMBuildInsertElement(builder, result, temp,
 500                         LLVMConstInt(ctx->i32, i, 0), "array_vector");
 501         }
 502         return result;
 503 }
 504
 505 static LLVMValueRef
 506 load_value_from_array(struct lp_build_tgsi_context *bld_base,
 507                       unsigned file,
 508                       enum tgsi_opcode_type type,
 509                       unsigned swizzle,
 510                       unsigned reg_index,
 511                       const struct tgsi_ind_register *reg_indirect)
 512 {
 513         struct si_shader_context *ctx = si_shader_context(bld_base);
 514         struct gallivm_state *gallivm = &ctx->gallivm;
 515         LLVMBuilderRef builder = gallivm->builder;
 516         LLVMValueRef ptr;
 517
 518         ptr = get_pointer_into_array(ctx, file, swizzle, reg_index, reg_indirect);
 519         if (ptr) {
 520                 LLVMValueRef val = LLVMBuildLoad(builder, ptr, "");
 521                 if (tgsi_type_is_64bit(type)) {
 522                         LLVMValueRef ptr_hi, val_hi;
 523                         ptr_hi = LLVMBuildGEP(builder, ptr, &ctx->i32_1, 1, "");
 524                         val_hi = LLVMBuildLoad(builder, ptr_hi, "");
 525                         val = si_llvm_emit_fetch_64bit(bld_base, type, val, val_hi);
 526                 }
 527
 528                 return val;
 529         } else {
 530                 struct tgsi_declaration_range range =
 531                         get_array_range(bld_base, file, reg_index, reg_indirect);
 532                 LLVMValueRef index =
 533                         emit_array_index(ctx, reg_indirect, reg_index - range.First);
 534                 LLVMValueRef array =
 535                         emit_array_fetch(bld_base, file, type, range, swizzle);
 536                 return LLVMBuildExtractElement(builder, array, index, "");
 537         }
 538 }
 539
 540 static void
 541 store_value_to_array(struct lp_build_tgsi_context *bld_base,
 542                      LLVMValueRef value,
 543                      unsigned file,
 544                      unsigned chan_index,
 545                      unsigned reg_index,
 546                      const struct tgsi_ind_register *reg_indirect)
 547 {
 548         struct si_shader_context *ctx = si_shader_context(bld_base);
 549         struct gallivm_state *gallivm = &ctx->gallivm;
 550         LLVMBuilderRef builder = gallivm->builder;
 551         LLVMValueRef ptr;
 552
 553         ptr = get_pointer_into_array(ctx, file, chan_index, reg_index, reg_indirect);
 554         if (ptr) {
 555                 LLVMBuildStore(builder, value, ptr);
 556         } else {
 557                 unsigned i, size;
 558                 struct tgsi_declaration_range range = get_array_range(bld_base, file, reg_index, reg_indirect);
 559                 LLVMValueRef index = emit_array_index(ctx, reg_indirect, reg_index - range.First);
 560                 LLVMValueRef array =
 561                         emit_array_fetch(bld_base, file, TGSI_TYPE_FLOAT, range, chan_index);
 562                 LLVMValueRef temp_ptr;
 563
 564                 array = LLVMBuildInsertElement(builder, array, value, index, "");
 565
 566                 size = range.Last - range.First + 1;
 567                 for (i = 0; i < size; ++i) {
 568                         switch(file) {
 569                         case TGSI_FILE_OUTPUT:
 570                                 temp_ptr = ctx->outputs[i + range.First][chan_index];
 571                                 break;
 572
 573                         case TGSI_FILE_TEMPORARY:
 574                                 if (range.First + i >= ctx->temps_count)
 575                                         continue;
 576                                 temp_ptr = ctx->temps[(i + range.First) * TGSI_NUM_CHANNELS + chan_index];
 577                                 break;
 578
 579                         default:
 580                                 continue;
 581                         }
 582                         value = LLVMBuildExtractElement(builder, array,
 583                                 LLVMConstInt(ctx->i32, i, 0), "");
 584                         LLVMBuildStore(builder, value, temp_ptr);
 585                 }
 586         }
 587 }
 588
 589 /* If this is true, preload FS inputs at the beginning of shaders. Otherwise,
 590  * reload them at each use. This must be true if the shader is using
 591  * derivatives and KILL, because KILL can leave the WQM and then a lazy
 592  * input load isn't in the WQM anymore.
 593  */
 594 static bool si_preload_fs_inputs(struct si_shader_context *ctx)
 595 {
 596         struct si_shader_selector *sel = ctx->shader->selector;
 597
 598         return sel->info.uses_derivatives &&
 599                sel->info.uses_kill;
 600 }
 601
 602 static LLVMValueRef
 603 get_output_ptr(struct lp_build_tgsi_context *bld_base, unsigned index,
 604                unsigned chan)
 605 {
 606         struct si_shader_context *ctx = si_shader_context(bld_base);
 607
 608         assert(index <= ctx->bld_base.info->file_max[TGSI_FILE_OUTPUT]);
 609         return ctx->outputs[index][chan];
 610 }
 611
 612 LLVMValueRef si_llvm_emit_fetch(struct lp_build_tgsi_context *bld_base,
 613                                 const struct tgsi_full_src_register *reg,
 614                                 enum tgsi_opcode_type type,
 615                                 unsigned swizzle)
 616 {
 617         struct si_shader_context *ctx = si_shader_context(bld_base);
 618         LLVMBuilderRef builder = ctx->gallivm.builder;
 619         LLVMValueRef result = NULL, ptr, ptr2;
 620
 621         if (swizzle == ~0) {
 622                 LLVMValueRef values[TGSI_NUM_CHANNELS];
 623                 unsigned chan;
 624                 for (chan = 0; chan < TGSI_NUM_CHANNELS; chan++) {
 625                         values[chan] = si_llvm_emit_fetch(bld_base, reg, type, chan);
 626                 }
 627                 return lp_build_gather_values(&ctx->gallivm, values,
 628                                               TGSI_NUM_CHANNELS);
 629         }
 630
 631         if (reg->Register.Indirect) {
 632                 LLVMValueRef load = load_value_from_array(bld_base, reg->Register.File, type,
 633                                 swizzle, reg->Register.Index, &reg->Indirect);
 634                 return bitcast(bld_base, type, load);
 635         }
 636
 637         switch(reg->Register.File) {
 638         case TGSI_FILE_IMMEDIATE: {
 639                 LLVMTypeRef ctype = tgsi2llvmtype(bld_base, type);
 640                 if (tgsi_type_is_64bit(type)) {
 641                         result = LLVMGetUndef(LLVMVectorType(ctx->i32, bld_base->base.type.length * 2));
 642                         result = LLVMConstInsertElement(result,
 643                                                         ctx->imms[reg->Register.Index * TGSI_NUM_CHANNELS + swizzle],
 644                                                         ctx->i32_0);
 645                         result = LLVMConstInsertElement(result,
 646                                                         ctx->imms[reg->Register.Index * TGSI_NUM_CHANNELS + swizzle + 1],
 647                                                         ctx->i32_1);
 648                         return LLVMConstBitCast(result, ctype);
 649                 } else {
 650                         return LLVMConstBitCast(ctx->imms[reg->Register.Index * TGSI_NUM_CHANNELS + swizzle], ctype);
 651                 }
 652         }
 653
 654         case TGSI_FILE_INPUT: {
 655                 unsigned index = reg->Register.Index;
 656                 LLVMValueRef input[4];
 657
 658                 /* I don't think doing this for vertex shaders is beneficial.
 659                  * For those, we want to make sure the VMEM loads are executed
 660                  * only once. Fragment shaders don't care much, because
 661                  * v_interp instructions are much cheaper than VMEM loads.
 662                  */
 663                 if (!si_preload_fs_inputs(ctx) &&
 664                     ctx->bld_base.info->processor == PIPE_SHADER_FRAGMENT)
 665                         ctx->load_input(ctx, index, &ctx->input_decls[index], input);
 666                 else
 667                         memcpy(input, &ctx->inputs[index * 4], sizeof(input));
 668
 669                 result = input[swizzle];
 670
 671                 if (tgsi_type_is_64bit(type)) {
 672                         ptr = result;
 673                         ptr2 = input[swizzle + 1];
 674                         return si_llvm_emit_fetch_64bit(bld_base, type, ptr, ptr2);
 675                 }
 676                 break;
 677         }
 678
 679         case TGSI_FILE_TEMPORARY:
 680                 if (reg->Register.Index >= ctx->temps_count)
 681                         return LLVMGetUndef(tgsi2llvmtype(bld_base, type));
 682                 ptr = ctx->temps[reg->Register.Index * TGSI_NUM_CHANNELS + swizzle];
 683                 if (tgsi_type_is_64bit(type)) {
 684                         ptr2 = ctx->temps[reg->Register.Index * TGSI_NUM_CHANNELS + swizzle + 1];
 685                         return si_llvm_emit_fetch_64bit(bld_base, type,
 686                                                         LLVMBuildLoad(builder, ptr, ""),
 687                                                         LLVMBuildLoad(builder, ptr2, ""));
 688                 }
 689                 result = LLVMBuildLoad(builder, ptr, "");
 690                 break;
 691
 692         case TGSI_FILE_OUTPUT:
 693                 ptr = get_output_ptr(bld_base, reg->Register.Index, swizzle);
 694                 if (tgsi_type_is_64bit(type)) {
 695                         ptr2 = get_output_ptr(bld_base, reg->Register.Index, swizzle + 1);
 696                         return si_llvm_emit_fetch_64bit(bld_base, type,
 697                                                         LLVMBuildLoad(builder, ptr, ""),
 698                                                         LLVMBuildLoad(builder, ptr2, ""));
 699                 }
 700                 result = LLVMBuildLoad(builder, ptr, "");
 701                 break;
 702
 703         default:
 704                 return LLVMGetUndef(tgsi2llvmtype(bld_base, type));
 705         }
 706
 707         return bitcast(bld_base, type, result);
 708 }
 709
 710 static LLVMValueRef fetch_system_value(struct lp_build_tgsi_context *bld_base,
 711                                        const struct tgsi_full_src_register *reg,
 712                                        enum tgsi_opcode_type type,
 713                                        unsigned swizzle)
 714 {
 715         struct si_shader_context *ctx = si_shader_context(bld_base);
 716         LLVMBuilderRef builder = ctx->gallivm.builder;
 717         LLVMValueRef cval = ctx->system_values[reg->Register.Index];
 718
 719         if (tgsi_type_is_64bit(type)) {
 720                 LLVMValueRef lo, hi;
 721
 722                 assert(swizzle == 0 || swizzle == 2);
 723
 724                 lo = LLVMBuildExtractElement(
 725                         builder, cval, LLVMConstInt(ctx->i32, swizzle, 0), "");
 726                 hi = LLVMBuildExtractElement(
 727                         builder, cval, LLVMConstInt(ctx->i32, swizzle + 1, 0), "");
 728
 729                 return si_llvm_emit_fetch_64bit(bld_base, type, lo, hi);
 730         }
 731
 732         if (LLVMGetTypeKind(LLVMTypeOf(cval)) == LLVMVectorTypeKind) {
 733                 cval = LLVMBuildExtractElement(
 734                         builder, cval, LLVMConstInt(ctx->i32, swizzle, 0), "");
 735         } else {
 736                 assert(swizzle == 0);
 737         }
 738
 739         return bitcast(bld_base, type, cval);
 740 }
 741
 742 static void emit_declaration(struct lp_build_tgsi_context *bld_base,
 743                              const struct tgsi_full_declaration *decl)
 744 {
 745         struct si_shader_context *ctx = si_shader_context(bld_base);
 746         LLVMBuilderRef builder = ctx->gallivm.builder;
 747         unsigned first, last, i;
 748         switch(decl->Declaration.File) {
 749         case TGSI_FILE_ADDRESS:
 750         {
 751                  unsigned idx;
 752                 for (idx = decl->Range.First; idx <= decl->Range.Last; idx++) {
 753                         unsigned chan;
 754                         for (chan = 0; chan < TGSI_NUM_CHANNELS; chan++) {
 755                                  ctx->addrs[idx][chan] = lp_build_alloca_undef(
 756                                         &ctx->gallivm,
 757                                         ctx->i32, "");
 758                         }
 759                 }
 760                 break;
 761         }
 762
 763         case TGSI_FILE_TEMPORARY:
 764         {
 765                 char name[16] = "";
 766                 LLVMValueRef array_alloca = NULL;
 767                 unsigned decl_size;
 768                 unsigned writemask = decl->Declaration.UsageMask;
 769                 first = decl->Range.First;
 770                 last = decl->Range.Last;
 771                 decl_size = 4 * ((last - first) + 1);
 772
 773                 if (decl->Declaration.Array) {
 774                         unsigned id = decl->Array.ArrayID - 1;
 775                         unsigned array_size;
 776
 777                         writemask &= ctx->temp_arrays[id].writemask;
 778                         ctx->temp_arrays[id].writemask = writemask;
 779                         array_size = ((last - first) + 1) * util_bitcount(writemask);
 780
 781                         /* If the array has more than 16 elements, store it
 782                          * in memory using an alloca that spans the entire
 783                          * array.
 784                          *
 785                          * Otherwise, store each array element individually.
 786                          * We will then generate vectors (per-channel, up to
 787                          * <16 x float> if the usagemask is a single bit) for
 788                          * indirect addressing.
 789                          *
 790                          * Note that 16 is the number of vector elements that
 791                          * LLVM will store in a register, so theoretically an
 792                          * array with up to 4 * 16 = 64 elements could be
 793                          * handled this way, but whether that's a good idea
 794                          * depends on VGPR register pressure elsewhere.
 795                          *
 796                          * FIXME: We shouldn't need to have the non-alloca
 797                          * code path for arrays. LLVM should be smart enough to
 798                          * promote allocas into registers when profitable.
 799                          */
 800                         if (array_size > 16 ||
 801                             /* TODO: VGPR indexing is buggy on GFX9. */
 802                             ctx->screen->b.chip_class == GFX9) {
 803                                 array_alloca = LLVMBuildAlloca(builder,
 804                                         LLVMArrayType(ctx->f32,
 805                                                       array_size), "array");
 806                                 ctx->temp_array_allocas[id] = array_alloca;
 807                         }
 808                 }
 809
 810                 if (!ctx->temps_count) {
 811                         ctx->temps_count = bld_base->info->file_max[TGSI_FILE_TEMPORARY] + 1;
 812                         ctx->temps = MALLOC(TGSI_NUM_CHANNELS * ctx->temps_count * sizeof(LLVMValueRef));
 813                 }
 814                 if (!array_alloca) {
 815                         for (i = 0; i < decl_size; ++i) {
 816 #ifdef DEBUG
 817                                 snprintf(name, sizeof(name), "TEMP%d.%c",
 818                                          first + i / 4, "xyzw"[i % 4]);
 819 #endif
 820                                 ctx->temps[first * TGSI_NUM_CHANNELS + i] =
 821                                         lp_build_alloca_undef(&ctx->gallivm,
 822                                                               ctx->f32,
 823                                                               name);
 824                         }
 825                 } else {
 826                         LLVMValueRef idxs[2] = {
 827                                 ctx->i32_0,
 828                                 NULL
 829                         };
 830                         unsigned j = 0;
 831
 832                         if (writemask != TGSI_WRITEMASK_XYZW &&
 833                             !ctx->undef_alloca) {
 834                                 /* Create a dummy alloca. We use it so that we
 835                                  * have a pointer that is safe to load from if
 836                                  * a shader ever reads from a channel that
 837                                  * it never writes to.
 838                                  */
 839                                 ctx->undef_alloca = lp_build_alloca_undef(
 840                                         &ctx->gallivm,
 841                                         ctx->f32, "undef");
 842                         }
 843
 844                         for (i = 0; i < decl_size; ++i) {
 845                                 LLVMValueRef ptr;
 846                                 if (writemask & (1 << (i % 4))) {
 847 #ifdef DEBUG
 848                                         snprintf(name, sizeof(name), "TEMP%d.%c",
 849                                                  first + i / 4, "xyzw"[i % 4]);
 850 #endif
 851                                         idxs[1] = LLVMConstInt(ctx->i32, j, 0);
 852                                         ptr = LLVMBuildGEP(builder, array_alloca, idxs, 2, name);
 853                                         j++;
 854                                 } else {
 855                                         ptr = ctx->undef_alloca;
 856                                 }
 857                                 ctx->temps[first * TGSI_NUM_CHANNELS + i] = ptr;
 858                         }
 859                 }
 860                 break;
 861         }
 862         case TGSI_FILE_INPUT:
 863         {
 864                 unsigned idx;
 865                 for (idx = decl->Range.First; idx <= decl->Range.Last; idx++) {
 866                         if (ctx->load_input &&
 867                             ctx->input_decls[idx].Declaration.File != TGSI_FILE_INPUT) {
 868                                 ctx->input_decls[idx] = *decl;
 869                                 ctx->input_decls[idx].Range.First = idx;
 870                                 ctx->input_decls[idx].Range.Last = idx;
 871                                 ctx->input_decls[idx].Semantic.Index += idx - decl->Range.First;
 872
 873                                 if (si_preload_fs_inputs(ctx) ||
 874                                     bld_base->info->processor != PIPE_SHADER_FRAGMENT)
 875                                         ctx->load_input(ctx, idx, &ctx->input_decls[idx],
 876                                                         &ctx->inputs[idx * 4]);
 877                         }
 878                 }
 879         }
 880         break;
 881
 882         case TGSI_FILE_SYSTEM_VALUE:
 883         {
 884                 unsigned idx;
 885                 for (idx = decl->Range.First; idx <= decl->Range.Last; idx++) {
 886                         ctx->load_system_value(ctx, idx, decl);
 887                 }
 888         }
 889         break;
 890
 891         case TGSI_FILE_OUTPUT:
 892         {
 893                 char name[16] = "";
 894                 unsigned idx;
 895                 for (idx = decl->Range.First; idx <= decl->Range.Last; idx++) {
 896                         unsigned chan;
 897                         assert(idx < RADEON_LLVM_MAX_OUTPUTS);
 898                         if (ctx->outputs[idx][0])
 899                                 continue;
 900                         for (chan = 0; chan < TGSI_NUM_CHANNELS; chan++) {
 901 #ifdef DEBUG
 902                                 snprintf(name, sizeof(name), "OUT%d.%c",
 903                                          idx, "xyzw"[chan % 4]);
 904 #endif
 905                                 ctx->outputs[idx][chan] = lp_build_alloca_undef(
 906                                         &ctx->gallivm,
 907                                         ctx->f32, name);
 908                         }
 909                 }
 910                 break;
 911         }
 912
 913         case TGSI_FILE_MEMORY:
 914                 ctx->declare_memory_region(ctx, decl);
 915                 break;
 916
 917         default:
 918                 break;
 919         }
 920 }
 921
 922 void si_llvm_emit_store(struct lp_build_tgsi_context *bld_base,
 923                         const struct tgsi_full_instruction *inst,
 924                         const struct tgsi_opcode_info *info,
 925                         LLVMValueRef dst[4])
 926 {
 927         struct si_shader_context *ctx = si_shader_context(bld_base);
 928         struct gallivm_state *gallivm = &ctx->gallivm;
 929         const struct tgsi_full_dst_register *reg = &inst->Dst[0];
 930         LLVMBuilderRef builder = ctx->gallivm.builder;
 931         LLVMValueRef temp_ptr, temp_ptr2 = NULL;
 932         unsigned chan, chan_index;
 933         bool is_vec_store = false;
 934         enum tgsi_opcode_type dtype = tgsi_opcode_infer_dst_type(inst->Instruction.Opcode);
 935
 936         if (dst[0]) {
 937                 LLVMTypeKind k = LLVMGetTypeKind(LLVMTypeOf(dst[0]));
 938                 is_vec_store = (k == LLVMVectorTypeKind);
 939         }
 940
 941         if (is_vec_store) {
 942                 LLVMValueRef values[4] = {};
 943                 TGSI_FOR_EACH_DST0_ENABLED_CHANNEL(inst, chan) {
 944                         LLVMValueRef index = LLVMConstInt(ctx->i32, chan, 0);
 945                         values[chan]  = LLVMBuildExtractElement(gallivm->builder,
 946                                                         dst[0], index, "");
 947                 }
 948                 bld_base->emit_store(bld_base, inst, info, values);
 949                 return;
 950         }
 951
 952         TGSI_FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
 953                 LLVMValueRef value = dst[chan_index];
 954
 955                 if (tgsi_type_is_64bit(dtype) && (chan_index == 1 || chan_index == 3))
 956                         continue;
 957                 if (inst->Instruction.Saturate)
 958                         value = ac_build_clamp(&ctx->ac, value);
 959
 960                 if (reg->Register.File == TGSI_FILE_ADDRESS) {
 961                         temp_ptr = ctx->addrs[reg->Register.Index][chan_index];
 962                         LLVMBuildStore(builder, value, temp_ptr);
 963                         continue;
 964                 }
 965
 966                 if (!tgsi_type_is_64bit(dtype))
 967                         value = bitcast(bld_base, TGSI_TYPE_FLOAT, value);
 968
 969                 if (reg->Register.Indirect) {
 970                         unsigned file = reg->Register.File;
 971                         unsigned reg_index = reg->Register.Index;
 972                         store_value_to_array(bld_base, value, file, chan_index,
 973                                              reg_index, &reg->Indirect);
 974                 } else {
 975                         switch(reg->Register.File) {
 976                         case TGSI_FILE_OUTPUT:
 977                                 temp_ptr = ctx->outputs[reg->Register.Index][chan_index];
 978                                 if (tgsi_type_is_64bit(dtype))
 979                                         temp_ptr2 = ctx->outputs[reg->Register.Index][chan_index + 1];
 980                                 break;
 981
 982                         case TGSI_FILE_TEMPORARY:
 983                         {
 984                                 if (reg->Register.Index >= ctx->temps_count)
 985                                         continue;
 986
 987                                 temp_ptr = ctx->temps[ TGSI_NUM_CHANNELS * reg->Register.Index + chan_index];
 988                                 if (tgsi_type_is_64bit(dtype))
 989                                         temp_ptr2 = ctx->temps[ TGSI_NUM_CHANNELS * reg->Register.Index + chan_index + 1];
 990
 991                                 break;
 992                         }
 993                         default:
 994                                 return;
 995                         }
 996                         if (!tgsi_type_is_64bit(dtype))
 997                                 LLVMBuildStore(builder, value, temp_ptr);
 998                         else {
 999                                 LLVMValueRef ptr = LLVMBuildBitCast(builder, value,
1000                                                                     LLVMVectorType(ctx->i32, 2), "");
1001                                 LLVMValueRef val2;
1002                                 value = LLVMBuildExtractElement(builder, ptr,
1003                                                                 ctx->i32_0, "");
1004                                 val2 = LLVMBuildExtractElement(builder, ptr,
1005                                                                ctx->i32_1, "");
1006
1007                                 LLVMBuildStore(builder, bitcast(bld_base, TGSI_TYPE_FLOAT, value), temp_ptr);
1008                                 LLVMBuildStore(builder, bitcast(bld_base, TGSI_TYPE_FLOAT, val2), temp_ptr2);
1009                         }
1010                 }
1011         }
1012 }
1013
1014 static void set_basicblock_name(LLVMBasicBlockRef bb, const char *base, int pc)
1015 {
1016         char buf[32];
1017         /* Subtract 1 so that the number shown is that of the corresponding
1018          * opcode in the TGSI dump, e.g. an if block has the same suffix as
1019          * the instruction number of the corresponding TGSI IF.
1020          */
1021         snprintf(buf, sizeof(buf), "%s%d", base, pc - 1);
1022         LLVMSetValueName(LLVMBasicBlockAsValue(bb), buf);
1023 }
1024
1025 /* Append a basic block at the level of the parent flow.
1026  */
1027 static LLVMBasicBlockRef append_basic_block(struct si_shader_context *ctx,
1028                                             const char *name)
1029 {
1030         struct gallivm_state *gallivm = &ctx->gallivm;
1031
1032         assert(ctx->flow_depth >= 1);
1033
1034         if (ctx->flow_depth >= 2) {
1035                 struct si_llvm_flow *flow = &ctx->flow[ctx->flow_depth - 2];
1036
1037                 return LLVMInsertBasicBlockInContext(gallivm->context,
1038                                                      flow->next_block, name);
1039         }
1040
1041         return LLVMAppendBasicBlockInContext(gallivm->context, ctx->main_fn, name);
1042 }
1043
1044 /* Emit a branch to the given default target for the current block if
1045  * applicable -- that is, if the current block does not already contain a
1046  * branch from a break or continue.
1047  */
1048 static void emit_default_branch(LLVMBuilderRef builder, LLVMBasicBlockRef target)
1049 {
1050         if (!LLVMGetBasicBlockTerminator(LLVMGetInsertBlock(builder)))
1051                  LLVMBuildBr(builder, target);
1052 }
1053
1054 static void bgnloop_emit(const struct lp_build_tgsi_action *action,
1055                          struct lp_build_tgsi_context *bld_base,
1056                          struct lp_build_emit_data *emit_data)
1057 {
1058         struct si_shader_context *ctx = si_shader_context(bld_base);
1059         struct gallivm_state *gallivm = &ctx->gallivm;
1060         struct si_llvm_flow *flow = push_flow(ctx);
1061         flow->loop_entry_block = append_basic_block(ctx, "LOOP");
1062         flow->next_block = append_basic_block(ctx, "ENDLOOP");
1063         set_basicblock_name(flow->loop_entry_block, "loop", bld_base->pc);
1064         LLVMBuildBr(gallivm->builder, flow->loop_entry_block);
1065         LLVMPositionBuilderAtEnd(gallivm->builder, flow->loop_entry_block);
1066 }
1067
1068 static void brk_emit(const struct lp_build_tgsi_action *action,
1069                      struct lp_build_tgsi_context *bld_base,
1070                      struct lp_build_emit_data *emit_data)
1071 {
1072         struct si_shader_context *ctx = si_shader_context(bld_base);
1073         struct gallivm_state *gallivm = &ctx->gallivm;
1074         struct si_llvm_flow *flow = get_innermost_loop(ctx);
1075
1076         LLVMBuildBr(gallivm->builder, flow->next_block);
1077 }
1078
1079 static void cont_emit(const struct lp_build_tgsi_action *action,
1080                       struct lp_build_tgsi_context *bld_base,
1081                       struct lp_build_emit_data *emit_data)
1082 {
1083         struct si_shader_context *ctx = si_shader_context(bld_base);
1084         struct gallivm_state *gallivm = &ctx->gallivm;
1085         struct si_llvm_flow *flow = get_innermost_loop(ctx);
1086
1087         LLVMBuildBr(gallivm->builder, flow->loop_entry_block);
1088 }
1089
1090 static void else_emit(const struct lp_build_tgsi_action *action,
1091                       struct lp_build_tgsi_context *bld_base,
1092                       struct lp_build_emit_data *emit_data)
1093 {
1094         struct si_shader_context *ctx = si_shader_context(bld_base);
1095         struct gallivm_state *gallivm = &ctx->gallivm;
1096         struct si_llvm_flow *current_branch = get_current_flow(ctx);
1097         LLVMBasicBlockRef endif_block;
1098
1099         assert(!current_branch->loop_entry_block);
1100
1101         endif_block = append_basic_block(ctx, "ENDIF");
1102         emit_default_branch(gallivm->builder, endif_block);
1103
1104         LLVMPositionBuilderAtEnd(gallivm->builder, current_branch->next_block);
1105         set_basicblock_name(current_branch->next_block, "else", bld_base->pc);
1106
1107         current_branch->next_block = endif_block;
1108 }
1109
1110 static void endif_emit(const struct lp_build_tgsi_action *action,
1111                        struct lp_build_tgsi_context *bld_base,
1112                        struct lp_build_emit_data *emit_data)
1113 {
1114         struct si_shader_context *ctx = si_shader_context(bld_base);
1115         struct gallivm_state *gallivm = &ctx->gallivm;
1116         struct si_llvm_flow *current_branch = get_current_flow(ctx);
1117
1118         assert(!current_branch->loop_entry_block);
1119
1120         emit_default_branch(gallivm->builder, current_branch->next_block);
1121         LLVMPositionBuilderAtEnd(gallivm->builder, current_branch->next_block);
1122         set_basicblock_name(current_branch->next_block, "endif", bld_base->pc);
1123
1124         ctx->flow_depth--;
1125 }
1126
1127 static void endloop_emit(const struct lp_build_tgsi_action *action,
1128                          struct lp_build_tgsi_context *bld_base,
1129                          struct lp_build_emit_data *emit_data)
1130 {
1131         struct si_shader_context *ctx = si_shader_context(bld_base);
1132         struct gallivm_state *gallivm = &ctx->gallivm;
1133         struct si_llvm_flow *current_loop = get_current_flow(ctx);
1134
1135         assert(current_loop->loop_entry_block);
1136
1137         emit_default_branch(gallivm->builder, current_loop->loop_entry_block);
1138
1139         LLVMPositionBuilderAtEnd(gallivm->builder, current_loop->next_block);
1140         set_basicblock_name(current_loop->next_block, "endloop", bld_base->pc);
1141         ctx->flow_depth--;
1142 }
1143
1144 static void if_cond_emit(const struct lp_build_tgsi_action *action,
1145                          struct lp_build_tgsi_context *bld_base,
1146                          struct lp_build_emit_data *emit_data,
1147                          LLVMValueRef cond)
1148 {
1149         struct si_shader_context *ctx = si_shader_context(bld_base);
1150         struct gallivm_state *gallivm = &ctx->gallivm;
1151         struct si_llvm_flow *flow = push_flow(ctx);
1152         LLVMBasicBlockRef if_block;
1153
1154         if_block = append_basic_block(ctx, "IF");
1155         flow->next_block = append_basic_block(ctx, "ELSE");
1156         set_basicblock_name(if_block, "if", bld_base->pc);
1157         LLVMBuildCondBr(gallivm->builder, cond, if_block, flow->next_block);
1158         LLVMPositionBuilderAtEnd(gallivm->builder, if_block);
1159 }
1160
1161 static void if_emit(const struct lp_build_tgsi_action *action,
1162                     struct lp_build_tgsi_context *bld_base,
1163                     struct lp_build_emit_data *emit_data)
1164 {
1165         struct gallivm_state *gallivm = bld_base->base.gallivm;
1166         LLVMValueRef cond;
1167
1168         cond = LLVMBuildFCmp(gallivm->builder, LLVMRealUNE,
1169                         emit_data->args[0],
1170                         bld_base->base.zero, "");
1171
1172         if_cond_emit(action, bld_base, emit_data, cond);
1173 }
1174
1175 static void uif_emit(const struct lp_build_tgsi_action *action,
1176                      struct lp_build_tgsi_context *bld_base,
1177                      struct lp_build_emit_data *emit_data)
1178 {
1179         struct gallivm_state *gallivm = bld_base->base.gallivm;
1180         LLVMValueRef cond;
1181
1182         cond = LLVMBuildICmp(gallivm->builder, LLVMIntNE,
1183                 bitcast(bld_base, TGSI_TYPE_UNSIGNED, emit_data->args[0]),
1184                         bld_base->int_bld.zero, "");
1185
1186         if_cond_emit(action, bld_base, emit_data, cond);
1187 }
1188
1189 static void emit_immediate(struct lp_build_tgsi_context *bld_base,
1190                            const struct tgsi_full_immediate *imm)
1191 {
1192         unsigned i;
1193         struct si_shader_context *ctx = si_shader_context(bld_base);
1194
1195         for (i = 0; i < 4; ++i) {
1196                 ctx->imms[ctx->imms_num * TGSI_NUM_CHANNELS + i] =
1197                                 LLVMConstInt(ctx->i32, imm->u[i].Uint, false   );
1198         }
1199
1200         ctx->imms_num++;
1201 }
1202
1203 void si_llvm_context_init(struct si_shader_context *ctx,
1204                           struct si_screen *sscreen,
1205                           LLVMTargetMachineRef tm)
1206 {
1207         struct lp_type type;
1208
1209         /* Initialize the gallivm object:
1210          * We are only using the module, context, and builder fields of this struct.
1211          * This should be enough for us to be able to pass our gallivm struct to the
1212          * helper functions in the gallivm module.
1213          */
1214         memset(ctx, 0, sizeof(*ctx));
1215         ctx->screen = sscreen;
1216         ctx->tm = tm;
1217
1218         ctx->gallivm.context = LLVMContextCreate();
1219         ctx->gallivm.module = LLVMModuleCreateWithNameInContext("tgsi",
1220                                                 ctx->gallivm.context);
1221         LLVMSetTarget(ctx->gallivm.module, "amdgcn--");
1222
1223         LLVMTargetDataRef data_layout = LLVMCreateTargetDataLayout(tm);
1224         char *data_layout_str = LLVMCopyStringRepOfTargetData(data_layout);
1225         LLVMSetDataLayout(ctx->gallivm.module, data_layout_str);
1226         LLVMDisposeTargetData(data_layout);
1227         LLVMDisposeMessage(data_layout_str);
1228
1229         bool unsafe_fpmath = (sscreen->b.debug_flags & DBG_UNSAFE_MATH) != 0;
1230         enum lp_float_mode float_mode =
1231                 unsafe_fpmath ? LP_FLOAT_MODE_UNSAFE_FP_MATH :
1232                                 LP_FLOAT_MODE_NO_SIGNED_ZEROS_FP_MATH;
1233
1234         ctx->gallivm.builder = lp_create_builder(ctx->gallivm.context,
1235                                                  float_mode);
1236
1237         ac_llvm_context_init(&ctx->ac, ctx->gallivm.context);
1238         ctx->ac.module = ctx->gallivm.module;
1239         ctx->ac.builder = ctx->gallivm.builder;
1240
1241         struct lp_build_tgsi_context *bld_base = &ctx->bld_base;
1242
1243         type.floating = true;
1244         type.fixed = false;
1245         type.sign = true;
1246         type.norm = false;
1247         type.width = 32;
1248         type.length = 1;
1249
1250         lp_build_context_init(&bld_base->base, &ctx->gallivm, type);
1251         lp_build_context_init(&ctx->bld_base.uint_bld, &ctx->gallivm, lp_uint_type(type));
1252         lp_build_context_init(&ctx->bld_base.int_bld, &ctx->gallivm, lp_int_type(type));
1253         type.width *= 2;
1254         lp_build_context_init(&ctx->bld_base.dbl_bld, &ctx->gallivm, type);
1255         lp_build_context_init(&ctx->bld_base.uint64_bld, &ctx->gallivm, lp_uint_type(type));
1256         lp_build_context_init(&ctx->bld_base.int64_bld, &ctx->gallivm, lp_int_type(type));
1257
1258         bld_base->soa = 1;
1259         bld_base->emit_swizzle = emit_swizzle;
1260         bld_base->emit_declaration = emit_declaration;
1261         bld_base->emit_immediate = emit_immediate;
1262
1263         /* metadata allowing 2.5 ULP */
1264         ctx->fpmath_md_kind = LLVMGetMDKindIDInContext(ctx->gallivm.context,
1265                                                        "fpmath", 6);
1266         LLVMValueRef arg = lp_build_const_float(&ctx->gallivm, 2.5);
1267         ctx->fpmath_md_2p5_ulp = LLVMMDNodeInContext(ctx->gallivm.context,
1268                                                      &arg, 1);
1269
1270         bld_base->op_actions[TGSI_OPCODE_BGNLOOP].emit = bgnloop_emit;
1271         bld_base->op_actions[TGSI_OPCODE_BRK].emit = brk_emit;
1272         bld_base->op_actions[TGSI_OPCODE_CONT].emit = cont_emit;
1273         bld_base->op_actions[TGSI_OPCODE_IF].emit = if_emit;
1274         bld_base->op_actions[TGSI_OPCODE_UIF].emit = uif_emit;
1275         bld_base->op_actions[TGSI_OPCODE_ELSE].emit = else_emit;
1276         bld_base->op_actions[TGSI_OPCODE_ENDIF].emit = endif_emit;
1277         bld_base->op_actions[TGSI_OPCODE_ENDLOOP].emit = endloop_emit;
1278
1279         si_shader_context_init_alu(&ctx->bld_base);
1280
1281         ctx->voidt = LLVMVoidTypeInContext(ctx->gallivm.context);
1282         ctx->i1 = LLVMInt1TypeInContext(ctx->gallivm.context);
1283         ctx->i8 = LLVMInt8TypeInContext(ctx->gallivm.context);
1284         ctx->i32 = LLVMInt32TypeInContext(ctx->gallivm.context);
1285         ctx->i64 = LLVMInt64TypeInContext(ctx->gallivm.context);
1286         ctx->i128 = LLVMIntTypeInContext(ctx->gallivm.context, 128);
1287         ctx->f32 = LLVMFloatTypeInContext(ctx->gallivm.context);
1288         ctx->v2i32 = LLVMVectorType(ctx->i32, 2);
1289         ctx->v4i32 = LLVMVectorType(ctx->i32, 4);
1290         ctx->v4f32 = LLVMVectorType(ctx->f32, 4);
1291         ctx->v8i32 = LLVMVectorType(ctx->i32, 8);
1292
1293         ctx->i32_0 = LLVMConstInt(ctx->i32, 0, 0);
1294         ctx->i32_1 = LLVMConstInt(ctx->i32, 1, 0);
1295 }
1296
1297 /* Set the context to a certain TGSI shader. Can be called repeatedly
1298  * to change the shader. */
1299 void si_llvm_context_set_tgsi(struct si_shader_context *ctx,
1300                               struct si_shader *shader)
1301 {
1302         const struct tgsi_shader_info *info = NULL;
1303         const struct tgsi_token *tokens = NULL;
1304
1305         if (shader && shader->selector) {
1306                 info = &shader->selector->info;
1307                 tokens = shader->selector->tokens;
1308         }
1309
1310         ctx->shader = shader;
1311         ctx->type = info ? info->processor : -1;
1312         ctx->bld_base.info = info;
1313
1314         /* Clean up the old contents. */
1315         FREE(ctx->temp_arrays);
1316         ctx->temp_arrays = NULL;
1317         FREE(ctx->temp_array_allocas);
1318         ctx->temp_array_allocas = NULL;
1319
1320         FREE(ctx->imms);
1321         ctx->imms = NULL;
1322         ctx->imms_num = 0;
1323
1324         FREE(ctx->temps);
1325         ctx->temps = NULL;
1326         ctx->temps_count = 0;
1327
1328         if (!info || !tokens)
1329                 return;
1330
1331         if (info->array_max[TGSI_FILE_TEMPORARY] > 0) {
1332                 int size = info->array_max[TGSI_FILE_TEMPORARY];
1333
1334                 ctx->temp_arrays = CALLOC(size, sizeof(ctx->temp_arrays[0]));
1335                 ctx->temp_array_allocas = CALLOC(size, sizeof(ctx->temp_array_allocas[0]));
1336
1337                 tgsi_scan_arrays(tokens, TGSI_FILE_TEMPORARY, size,
1338                                  ctx->temp_arrays);
1339         }
1340         if (info->file_max[TGSI_FILE_IMMEDIATE] >= 0) {
1341                 int size = info->file_max[TGSI_FILE_IMMEDIATE] + 1;
1342                 ctx->imms = MALLOC(size * TGSI_NUM_CHANNELS * sizeof(LLVMValueRef));
1343         }
1344
1345         /* Re-set these to start with a clean slate. */
1346         ctx->bld_base.num_instructions = 0;
1347         ctx->bld_base.pc = 0;
1348         memset(ctx->outputs, 0, sizeof(ctx->outputs));
1349
1350         ctx->bld_base.emit_store = si_llvm_emit_store;
1351         ctx->bld_base.emit_fetch_funcs[TGSI_FILE_IMMEDIATE] = si_llvm_emit_fetch;
1352         ctx->bld_base.emit_fetch_funcs[TGSI_FILE_INPUT] = si_llvm_emit_fetch;
1353         ctx->bld_base.emit_fetch_funcs[TGSI_FILE_TEMPORARY] = si_llvm_emit_fetch;
1354         ctx->bld_base.emit_fetch_funcs[TGSI_FILE_OUTPUT] = si_llvm_emit_fetch;
1355         ctx->bld_base.emit_fetch_funcs[TGSI_FILE_SYSTEM_VALUE] = fetch_system_value;
1356 }
1357
1358 void si_llvm_create_func(struct si_shader_context *ctx,
1359                          const char *name,
1360                          LLVMTypeRef *return_types, unsigned num_return_elems,
1361                          LLVMTypeRef *ParamTypes, unsigned ParamCount)
1362 {
1363         LLVMTypeRef main_fn_type, ret_type;
1364         LLVMBasicBlockRef main_fn_body;
1365         enum si_llvm_calling_convention call_conv;
1366         unsigned real_shader_type;
1367
1368         if (num_return_elems)
1369                 ret_type = LLVMStructTypeInContext(ctx->gallivm.context,
1370                                                    return_types,
1371                                                    num_return_elems, true);
1372         else
1373                 ret_type = LLVMVoidTypeInContext(ctx->gallivm.context);
1374
1375         /* Setup the function */
1376         ctx->return_type = ret_type;
1377         main_fn_type = LLVMFunctionType(ret_type, ParamTypes, ParamCount, 0);
1378         ctx->main_fn = LLVMAddFunction(ctx->gallivm.module, name, main_fn_type);
1379         main_fn_body = LLVMAppendBasicBlockInContext(ctx->gallivm.context,
1380                         ctx->main_fn, "main_body");
1381         LLVMPositionBuilderAtEnd(ctx->gallivm.builder, main_fn_body);
1382
1383         real_shader_type = ctx->type;
1384
1385         /* LS is merged into HS (TCS), and ES is merged into GS. */
1386         if (ctx->screen->b.chip_class >= GFX9) {
1387                 if (ctx->shader->key.as_ls)
1388                         real_shader_type = PIPE_SHADER_TESS_CTRL;
1389                 else if (ctx->shader->key.as_es)
1390                         real_shader_type = PIPE_SHADER_GEOMETRY;
1391         }
1392
1393         switch (real_shader_type) {
1394         case PIPE_SHADER_VERTEX:
1395         case PIPE_SHADER_TESS_EVAL:
1396                 call_conv = RADEON_LLVM_AMDGPU_VS;
1397                 break;
1398         case PIPE_SHADER_TESS_CTRL:
1399                 call_conv = HAVE_LLVM >= 0x0500 ? RADEON_LLVM_AMDGPU_HS :
1400                                                   RADEON_LLVM_AMDGPU_VS;
1401                 break;
1402         case PIPE_SHADER_GEOMETRY:
1403                 call_conv = RADEON_LLVM_AMDGPU_GS;
1404                 break;
1405         case PIPE_SHADER_FRAGMENT:
1406                 call_conv = RADEON_LLVM_AMDGPU_PS;
1407                 break;
1408         case PIPE_SHADER_COMPUTE:
1409                 call_conv = RADEON_LLVM_AMDGPU_CS;
1410                 break;
1411         default:
1412                 unreachable("Unhandle shader type");
1413         }
1414
1415         LLVMSetFunctionCallConv(ctx->main_fn, call_conv);
1416 }
1417
1418 void si_llvm_optimize_module(struct si_shader_context *ctx)
1419 {
1420         struct gallivm_state *gallivm = &ctx->gallivm;
1421         const char *triple = LLVMGetTarget(gallivm->module);
1422         LLVMTargetLibraryInfoRef target_library_info;
1423
1424         /* Dump LLVM IR before any optimization passes */
1425         if (ctx->screen->b.debug_flags & DBG_PREOPT_IR &&
1426             r600_can_dump_shader(&ctx->screen->b, ctx->type))
1427                 LLVMDumpModule(ctx->gallivm.module);
1428
1429         /* Create the pass manager */
1430         gallivm->passmgr = LLVMCreatePassManager();
1431
1432         target_library_info = gallivm_create_target_library_info(triple);
1433         LLVMAddTargetLibraryInfo(target_library_info, gallivm->passmgr);
1434
1435         if (r600_extra_shader_checks(&ctx->screen->b, ctx->type))
1436                 LLVMAddVerifierPass(gallivm->passmgr);
1437
1438         LLVMAddAlwaysInlinerPass(gallivm->passmgr);
1439
1440         /* This pass should eliminate all the load and store instructions */
1441         LLVMAddPromoteMemoryToRegisterPass(gallivm->passmgr);
1442
1443         /* Add some optimization passes */
1444         LLVMAddScalarReplAggregatesPass(gallivm->passmgr);
1445         LLVMAddLICMPass(gallivm->passmgr);
1446         LLVMAddAggressiveDCEPass(gallivm->passmgr);
1447         LLVMAddCFGSimplificationPass(gallivm->passmgr);
1448         LLVMAddInstructionCombiningPass(gallivm->passmgr);
1449
1450         /* Run the pass */
1451         LLVMRunPassManager(gallivm->passmgr, ctx->gallivm.module);
1452
1453         LLVMDisposeBuilder(gallivm->builder);
1454         LLVMDisposePassManager(gallivm->passmgr);
1455         gallivm_dispose_target_library_info(target_library_info);
1456 }
1457
1458 void si_llvm_dispose(struct si_shader_context *ctx)
1459 {
1460         LLVMDisposeModule(ctx->gallivm.module);
1461         LLVMContextDispose(ctx->gallivm.context);
1462         FREE(ctx->temp_arrays);
1463         ctx->temp_arrays = NULL;
1464         FREE(ctx->temp_array_allocas);
1465         ctx->temp_array_allocas = NULL;
1466         FREE(ctx->temps);
1467         ctx->temps = NULL;
1468         ctx->temps_count = 0;
1469         FREE(ctx->imms);
1470         ctx->imms = NULL;
1471         ctx->imms_num = 0;
1472         FREE(ctx->flow);
1473         ctx->flow = NULL;
1474         ctx->flow_depth_max = 0;
1475 }