src/gallium/drivers/radeonsi/si_shader_tgsi_setup.c

   1 /*
   2  * Copyright 2016 Advanced Micro Devices, Inc.
   3  *
   4  * Permission is hereby granted, free of charge, to any person obtaining a
   5  * copy of this software and associated documentation files (the "Software"),
   6  * to deal in the Software without restriction, including without limitation
   7  * on the rights to use, copy, modify, merge, publish, distribute, sub
   8  * license, and/or sell copies of the Software, and to permit persons to whom
   9  * the Software is furnished to do so, subject to the following conditions:
  10  *
  11  * The above copyright notice and this permission notice (including the next
  12  * paragraph) shall be included in all copies or substantial portions of the
  13  * Software.
  14  *
  15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  17  * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
  18  * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
  19  * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
  20  * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
  21  * USE OR OTHER DEALINGS IN THE SOFTWARE.
  22  */
  23
  24 #include "si_shader_internal.h"
  25 #include "si_pipe.h"
  26
  27 #include "gallivm/lp_bld_const.h"
  28 #include "gallivm/lp_bld_gather.h"
  29 #include "gallivm/lp_bld_flow.h"
  30 #include "gallivm/lp_bld_init.h"
  31 #include "gallivm/lp_bld_intr.h"
  32 #include "gallivm/lp_bld_misc.h"
  33 #include "gallivm/lp_bld_swizzle.h"
  34 #include "tgsi/tgsi_info.h"
  35 #include "tgsi/tgsi_parse.h"
  36 #include "util/u_math.h"
  37 #include "util/u_memory.h"
  38 #include "util/u_debug.h"
  39
  40 #include <stdio.h>
  41 #include <llvm-c/Transforms/IPO.h>
  42 #include <llvm-c/Transforms/Scalar.h>
  43 #include <llvm-c/Support.h>
  44
  45 /* Data for if/else/endif and bgnloop/endloop control flow structures.
  46  */
  47 struct si_llvm_flow {
  48         /* Loop exit or next part of if/else/endif. */
  49         LLVMBasicBlockRef next_block;
  50         LLVMBasicBlockRef loop_entry_block;
  51 };
  52
  53 enum si_llvm_calling_convention {
  54         RADEON_LLVM_AMDGPU_VS = 87,
  55         RADEON_LLVM_AMDGPU_GS = 88,
  56         RADEON_LLVM_AMDGPU_PS = 89,
  57         RADEON_LLVM_AMDGPU_CS = 90,
  58 };
  59
  60 void si_llvm_add_attribute(LLVMValueRef F, const char *name, int value)
  61 {
  62         char str[16];
  63
  64         snprintf(str, sizeof(str), "%i", value);
  65         LLVMAddTargetDependentFunctionAttr(F, name, str);
  66 }
  67
  68 static void init_amdgpu_target()
  69 {
  70         gallivm_init_llvm_targets();
  71         LLVMInitializeAMDGPUTargetInfo();
  72         LLVMInitializeAMDGPUTarget();
  73         LLVMInitializeAMDGPUTargetMC();
  74         LLVMInitializeAMDGPUAsmPrinter();
  75
  76         /* For inline assembly. */
  77         LLVMInitializeAMDGPUAsmParser();
  78
  79         if (HAVE_LLVM >= 0x0400) {
  80                 /*
  81                  * Workaround for bug in llvm 4.0 that causes image intrinsics
  82                  * to disappear.
  83                  * https://reviews.llvm.org/D26348
  84                  */
  85                 const char *argv[2] = {"mesa", "-simplifycfg-sink-common=false"};
  86                 LLVMParseCommandLineOptions(2, argv, NULL);
  87         }
  88 }
  89
  90 static once_flag init_amdgpu_target_once_flag = ONCE_FLAG_INIT;
  91
  92 LLVMTargetRef si_llvm_get_amdgpu_target(const char *triple)
  93 {
  94         LLVMTargetRef target = NULL;
  95         char *err_message = NULL;
  96
  97         call_once(&init_amdgpu_target_once_flag, init_amdgpu_target);
  98
  99         if (LLVMGetTargetFromTriple(triple, &target, &err_message)) {
 100                 fprintf(stderr, "Cannot find target for triple %s ", triple);
 101                 if (err_message) {
 102                         fprintf(stderr, "%s\n", err_message);
 103                 }
 104                 LLVMDisposeMessage(err_message);
 105                 return NULL;
 106         }
 107         return target;
 108 }
 109
 110 struct si_llvm_diagnostics {
 111         struct pipe_debug_callback *debug;
 112         unsigned retval;
 113 };
 114
 115 static void si_diagnostic_handler(LLVMDiagnosticInfoRef di, void *context)
 116 {
 117         struct si_llvm_diagnostics *diag = (struct si_llvm_diagnostics *)context;
 118         LLVMDiagnosticSeverity severity = LLVMGetDiagInfoSeverity(di);
 119         char *description = LLVMGetDiagInfoDescription(di);
 120         const char *severity_str = NULL;
 121
 122         switch (severity) {
 123         case LLVMDSError:
 124                 severity_str = "error";
 125                 break;
 126         case LLVMDSWarning:
 127                 severity_str = "warning";
 128                 break;
 129         case LLVMDSRemark:
 130                 severity_str = "remark";
 131                 break;
 132         case LLVMDSNote:
 133                 severity_str = "note";
 134                 break;
 135         default:
 136                 severity_str = "unknown";
 137         }
 138
 139         pipe_debug_message(diag->debug, SHADER_INFO,
 140                            "LLVM diagnostic (%s): %s", severity_str, description);
 141
 142         if (severity == LLVMDSError) {
 143                 diag->retval = 1;
 144                 fprintf(stderr,"LLVM triggered Diagnostic Handler: %s\n", description);
 145         }
 146
 147         LLVMDisposeMessage(description);
 148 }
 149
 150 /**
 151  * Compile an LLVM module to machine code.
 152  *
 153  * @returns 0 for success, 1 for failure
 154  */
 155 unsigned si_llvm_compile(LLVMModuleRef M, struct ac_shader_binary *binary,
 156                          LLVMTargetMachineRef tm,
 157                          struct pipe_debug_callback *debug)
 158 {
 159         struct si_llvm_diagnostics diag;
 160         char *err;
 161         LLVMContextRef llvm_ctx;
 162         LLVMMemoryBufferRef out_buffer;
 163         unsigned buffer_size;
 164         const char *buffer_data;
 165         LLVMBool mem_err;
 166
 167         diag.debug = debug;
 168         diag.retval = 0;
 169
 170         /* Setup Diagnostic Handler*/
 171         llvm_ctx = LLVMGetModuleContext(M);
 172
 173         LLVMContextSetDiagnosticHandler(llvm_ctx, si_diagnostic_handler, &diag);
 174
 175         /* Compile IR*/
 176         mem_err = LLVMTargetMachineEmitToMemoryBuffer(tm, M, LLVMObjectFile, &err,
 177                                                                  &out_buffer);
 178
 179         /* Process Errors/Warnings */
 180         if (mem_err) {
 181                 fprintf(stderr, "%s: %s", __FUNCTION__, err);
 182                 pipe_debug_message(debug, SHADER_INFO,
 183                                    "LLVM emit error: %s", err);
 184                 FREE(err);
 185                 diag.retval = 1;
 186                 goto out;
 187         }
 188
 189         /* Extract Shader Code*/
 190         buffer_size = LLVMGetBufferSize(out_buffer);
 191         buffer_data = LLVMGetBufferStart(out_buffer);
 192
 193         ac_elf_read(buffer_data, buffer_size, binary);
 194
 195         /* Clean up */
 196         LLVMDisposeMemoryBuffer(out_buffer);
 197
 198 out:
 199         if (diag.retval != 0)
 200                 pipe_debug_message(debug, SHADER_INFO, "LLVM compile failed");
 201         return diag.retval;
 202 }
 203
 204 LLVMTypeRef tgsi2llvmtype(struct lp_build_tgsi_context *bld_base,
 205                           enum tgsi_opcode_type type)
 206 {
 207         LLVMContextRef ctx = bld_base->base.gallivm->context;
 208
 209         switch (type) {
 210         case TGSI_TYPE_UNSIGNED:
 211         case TGSI_TYPE_SIGNED:
 212                 return LLVMInt32TypeInContext(ctx);
 213         case TGSI_TYPE_UNSIGNED64:
 214         case TGSI_TYPE_SIGNED64:
 215                 return LLVMInt64TypeInContext(ctx);
 216         case TGSI_TYPE_DOUBLE:
 217                 return LLVMDoubleTypeInContext(ctx);
 218         case TGSI_TYPE_UNTYPED:
 219         case TGSI_TYPE_FLOAT:
 220                 return LLVMFloatTypeInContext(ctx);
 221         default: break;
 222         }
 223         return 0;
 224 }
 225
 226 LLVMValueRef bitcast(struct lp_build_tgsi_context *bld_base,
 227                      enum tgsi_opcode_type type, LLVMValueRef value)
 228 {
 229         LLVMBuilderRef builder = bld_base->base.gallivm->builder;
 230         LLVMTypeRef dst_type = tgsi2llvmtype(bld_base, type);
 231
 232         if (dst_type)
 233                 return LLVMBuildBitCast(builder, value, dst_type, "");
 234         else
 235                 return value;
 236 }
 237
 238 /**
 239  * Return a value that is equal to the given i32 \p index if it lies in [0,num)
 240  * or an undefined value in the same interval otherwise.
 241  */
 242 LLVMValueRef si_llvm_bound_index(struct si_shader_context *ctx,
 243                                  LLVMValueRef index,
 244                                  unsigned num)
 245 {
 246         struct gallivm_state *gallivm = &ctx->gallivm;
 247         LLVMBuilderRef builder = gallivm->builder;
 248         LLVMValueRef c_max = LLVMConstInt(ctx->i32, num - 1, 0);
 249         LLVMValueRef cc;
 250
 251         if (util_is_power_of_two(num)) {
 252                 index = LLVMBuildAnd(builder, index, c_max, "");
 253         } else {
 254                 /* In theory, this MAX pattern should result in code that is
 255                  * as good as the bit-wise AND above.
 256                  *
 257                  * In practice, LLVM generates worse code (at the time of
 258                  * writing), because its value tracking is not strong enough.
 259                  */
 260                 cc = LLVMBuildICmp(builder, LLVMIntULE, index, c_max, "");
 261                 index = LLVMBuildSelect(builder, cc, index, c_max, "");
 262         }
 263
 264         return index;
 265 }
 266
 267 static struct si_llvm_flow *
 268 get_current_flow(struct si_shader_context *ctx)
 269 {
 270         if (ctx->flow_depth > 0)
 271                 return &ctx->flow[ctx->flow_depth - 1];
 272         return NULL;
 273 }
 274
 275 static struct si_llvm_flow *
 276 get_innermost_loop(struct si_shader_context *ctx)
 277 {
 278         for (unsigned i = ctx->flow_depth; i > 0; --i) {
 279                 if (ctx->flow[i - 1].loop_entry_block)
 280                         return &ctx->flow[i - 1];
 281         }
 282         return NULL;
 283 }
 284
 285 static struct si_llvm_flow *
 286 push_flow(struct si_shader_context *ctx)
 287 {
 288         struct si_llvm_flow *flow;
 289
 290         if (ctx->flow_depth >= ctx->flow_depth_max) {
 291                 unsigned new_max = MAX2(ctx->flow_depth << 1, RADEON_LLVM_INITIAL_CF_DEPTH);
 292                 ctx->flow = REALLOC(ctx->flow,
 293                                     ctx->flow_depth_max * sizeof(*ctx->flow),
 294                                     new_max * sizeof(*ctx->flow));
 295                 ctx->flow_depth_max = new_max;
 296         }
 297
 298         flow = &ctx->flow[ctx->flow_depth];
 299         ctx->flow_depth++;
 300
 301         flow->next_block = NULL;
 302         flow->loop_entry_block = NULL;
 303         return flow;
 304 }
 305
 306 static LLVMValueRef emit_swizzle(struct lp_build_tgsi_context *bld_base,
 307                                  LLVMValueRef value,
 308                                  unsigned swizzle_x,
 309                                  unsigned swizzle_y,
 310                                  unsigned swizzle_z,
 311                                  unsigned swizzle_w)
 312 {
 313         LLVMValueRef swizzles[4];
 314         LLVMTypeRef i32t =
 315                 LLVMInt32TypeInContext(bld_base->base.gallivm->context);
 316
 317         swizzles[0] = LLVMConstInt(i32t, swizzle_x, 0);
 318         swizzles[1] = LLVMConstInt(i32t, swizzle_y, 0);
 319         swizzles[2] = LLVMConstInt(i32t, swizzle_z, 0);
 320         swizzles[3] = LLVMConstInt(i32t, swizzle_w, 0);
 321
 322         return LLVMBuildShuffleVector(bld_base->base.gallivm->builder,
 323                                       value,
 324                                       LLVMGetUndef(LLVMTypeOf(value)),
 325                                       LLVMConstVector(swizzles, 4), "");
 326 }
 327
 328 /**
 329  * Return the description of the array covering the given temporary register
 330  * index.
 331  */
 332 static unsigned
 333 get_temp_array_id(struct lp_build_tgsi_context *bld_base,
 334                   unsigned reg_index,
 335                   const struct tgsi_ind_register *reg)
 336 {
 337         struct si_shader_context *ctx = si_shader_context(bld_base);
 338         unsigned num_arrays = ctx->bld_base.info->array_max[TGSI_FILE_TEMPORARY];
 339         unsigned i;
 340
 341         if (reg && reg->ArrayID > 0 && reg->ArrayID <= num_arrays)
 342                 return reg->ArrayID;
 343
 344         for (i = 0; i < num_arrays; i++) {
 345                 const struct tgsi_array_info *array = &ctx->temp_arrays[i];
 346
 347                 if (reg_index >= array->range.First && reg_index <= array->range.Last)
 348                         return i + 1;
 349         }
 350
 351         return 0;
 352 }
 353
 354 static struct tgsi_declaration_range
 355 get_array_range(struct lp_build_tgsi_context *bld_base,
 356                 unsigned File, unsigned reg_index,
 357                 const struct tgsi_ind_register *reg)
 358 {
 359         struct si_shader_context *ctx = si_shader_context(bld_base);
 360         struct tgsi_declaration_range range;
 361
 362         if (File == TGSI_FILE_TEMPORARY) {
 363                 unsigned array_id = get_temp_array_id(bld_base, reg_index, reg);
 364                 if (array_id)
 365                         return ctx->temp_arrays[array_id - 1].range;
 366         }
 367
 368         range.First = 0;
 369         range.Last = bld_base->info->file_max[File];
 370         return range;
 371 }
 372
 373 static LLVMValueRef
 374 emit_array_index(struct si_shader_context *ctx,
 375                  const struct tgsi_ind_register *reg,
 376                  unsigned offset)
 377 {
 378         struct gallivm_state *gallivm = &ctx->gallivm;
 379
 380         if (!reg) {
 381                 return LLVMConstInt(ctx->i32, offset, 0);
 382         }
 383         LLVMValueRef addr = LLVMBuildLoad(gallivm->builder, ctx->addrs[reg->Index][reg->Swizzle], "");
 384         return LLVMBuildAdd(gallivm->builder, addr, LLVMConstInt(ctx->i32, offset, 0), "");
 385 }
 386
 387 /**
 388  * For indirect registers, construct a pointer directly to the requested
 389  * element using getelementptr if possible.
 390  *
 391  * Returns NULL if the insertelement/extractelement fallback for array access
 392  * must be used.
 393  */
 394 static LLVMValueRef
 395 get_pointer_into_array(struct si_shader_context *ctx,
 396                        unsigned file,
 397                        unsigned swizzle,
 398                        unsigned reg_index,
 399                        const struct tgsi_ind_register *reg_indirect)
 400 {
 401         unsigned array_id;
 402         struct tgsi_array_info *array;
 403         struct gallivm_state *gallivm = &ctx->gallivm;
 404         LLVMBuilderRef builder = gallivm->builder;
 405         LLVMValueRef idxs[2];
 406         LLVMValueRef index;
 407         LLVMValueRef alloca;
 408
 409         if (file != TGSI_FILE_TEMPORARY)
 410                 return NULL;
 411
 412         array_id = get_temp_array_id(&ctx->bld_base, reg_index, reg_indirect);
 413         if (!array_id)
 414                 return NULL;
 415
 416         alloca = ctx->temp_array_allocas[array_id - 1];
 417         if (!alloca)
 418                 return NULL;
 419
 420         array = &ctx->temp_arrays[array_id - 1];
 421
 422         if (!(array->writemask & (1 << swizzle)))
 423                 return ctx->undef_alloca;
 424
 425         index = emit_array_index(ctx, reg_indirect,
 426                                  reg_index - ctx->temp_arrays[array_id - 1].range.First);
 427
 428         /* Ensure that the index is within a valid range, to guard against
 429          * VM faults and overwriting critical data (e.g. spilled resource
 430          * descriptors).
 431          *
 432          * TODO It should be possible to avoid the additional instructions
 433          * if LLVM is changed so that it guarantuees:
 434          * 1. the scratch space descriptor isolates the current wave (this
 435          *    could even save the scratch offset SGPR at the cost of an
 436          *    additional SALU instruction)
 437          * 2. the memory for allocas must be allocated at the _end_ of the
 438          *    scratch space (after spilled registers)
 439          */
 440         index = si_llvm_bound_index(ctx, index, array->range.Last - array->range.First + 1);
 441
 442         index = LLVMBuildMul(
 443                 builder, index,
 444                 LLVMConstInt(ctx->i32, util_bitcount(array->writemask), 0),
 445                 "");
 446         index = LLVMBuildAdd(
 447                 builder, index,
 448                 LLVMConstInt(ctx->i32,
 449                              util_bitcount(array->writemask & ((1 << swizzle) - 1)), 0),
 450                 "");
 451         idxs[0] = ctx->i32_0;
 452         idxs[1] = index;
 453         return LLVMBuildGEP(builder, alloca, idxs, 2, "");
 454 }
 455
 456 LLVMValueRef
 457 si_llvm_emit_fetch_64bit(struct lp_build_tgsi_context *bld_base,
 458                          enum tgsi_opcode_type type,
 459                          LLVMValueRef ptr,
 460                          LLVMValueRef ptr2)
 461 {
 462         LLVMBuilderRef builder = bld_base->base.gallivm->builder;
 463         LLVMValueRef result;
 464
 465         result = LLVMGetUndef(LLVMVectorType(LLVMIntTypeInContext(bld_base->base.gallivm->context, 32), bld_base->base.type.length * 2));
 466
 467         result = LLVMBuildInsertElement(builder,
 468                                         result,
 469                                         bitcast(bld_base, TGSI_TYPE_UNSIGNED, ptr),
 470                                         bld_base->int_bld.zero, "");
 471         result = LLVMBuildInsertElement(builder,
 472                                         result,
 473                                         bitcast(bld_base, TGSI_TYPE_UNSIGNED, ptr2),
 474                                         bld_base->int_bld.one, "");
 475         return bitcast(bld_base, type, result);
 476 }
 477
 478 static LLVMValueRef
 479 emit_array_fetch(struct lp_build_tgsi_context *bld_base,
 480                  unsigned File, enum tgsi_opcode_type type,
 481                  struct tgsi_declaration_range range,
 482                  unsigned swizzle)
 483 {
 484         struct si_shader_context *ctx = si_shader_context(bld_base);
 485
 486         LLVMBuilderRef builder = ctx->gallivm.builder;
 487
 488         unsigned i, size = range.Last - range.First + 1;
 489         LLVMTypeRef vec = LLVMVectorType(tgsi2llvmtype(bld_base, type), size);
 490         LLVMValueRef result = LLVMGetUndef(vec);
 491
 492         struct tgsi_full_src_register tmp_reg = {};
 493         tmp_reg.Register.File = File;
 494
 495         for (i = 0; i < size; ++i) {
 496                 tmp_reg.Register.Index = i + range.First;
 497                 LLVMValueRef temp = si_llvm_emit_fetch(bld_base, &tmp_reg, type, swizzle);
 498                 result = LLVMBuildInsertElement(builder, result, temp,
 499                         LLVMConstInt(ctx->i32, i, 0), "array_vector");
 500         }
 501         return result;
 502 }
 503
 504 static LLVMValueRef
 505 load_value_from_array(struct lp_build_tgsi_context *bld_base,
 506                       unsigned file,
 507                       enum tgsi_opcode_type type,
 508                       unsigned swizzle,
 509                       unsigned reg_index,
 510                       const struct tgsi_ind_register *reg_indirect)
 511 {
 512         struct si_shader_context *ctx = si_shader_context(bld_base);
 513         struct gallivm_state *gallivm = &ctx->gallivm;
 514         LLVMBuilderRef builder = gallivm->builder;
 515         LLVMValueRef ptr;
 516
 517         ptr = get_pointer_into_array(ctx, file, swizzle, reg_index, reg_indirect);
 518         if (ptr) {
 519                 LLVMValueRef val = LLVMBuildLoad(builder, ptr, "");
 520                 if (tgsi_type_is_64bit(type)) {
 521                         LLVMValueRef ptr_hi, val_hi;
 522                         ptr_hi = LLVMBuildGEP(builder, ptr, &ctx->i32_1, 1, "");
 523                         val_hi = LLVMBuildLoad(builder, ptr_hi, "");
 524                         val = si_llvm_emit_fetch_64bit(bld_base, type, val, val_hi);
 525                 }
 526
 527                 return val;
 528         } else {
 529                 struct tgsi_declaration_range range =
 530                         get_array_range(bld_base, file, reg_index, reg_indirect);
 531                 LLVMValueRef index =
 532                         emit_array_index(ctx, reg_indirect, reg_index - range.First);
 533                 LLVMValueRef array =
 534                         emit_array_fetch(bld_base, file, type, range, swizzle);
 535                 return LLVMBuildExtractElement(builder, array, index, "");
 536         }
 537 }
 538
 539 static void
 540 store_value_to_array(struct lp_build_tgsi_context *bld_base,
 541                      LLVMValueRef value,
 542                      unsigned file,
 543                      unsigned chan_index,
 544                      unsigned reg_index,
 545                      const struct tgsi_ind_register *reg_indirect)
 546 {
 547         struct si_shader_context *ctx = si_shader_context(bld_base);
 548         struct gallivm_state *gallivm = &ctx->gallivm;
 549         LLVMBuilderRef builder = gallivm->builder;
 550         LLVMValueRef ptr;
 551
 552         ptr = get_pointer_into_array(ctx, file, chan_index, reg_index, reg_indirect);
 553         if (ptr) {
 554                 LLVMBuildStore(builder, value, ptr);
 555         } else {
 556                 unsigned i, size;
 557                 struct tgsi_declaration_range range = get_array_range(bld_base, file, reg_index, reg_indirect);
 558                 LLVMValueRef index = emit_array_index(ctx, reg_indirect, reg_index - range.First);
 559                 LLVMValueRef array =
 560                         emit_array_fetch(bld_base, file, TGSI_TYPE_FLOAT, range, chan_index);
 561                 LLVMValueRef temp_ptr;
 562
 563                 array = LLVMBuildInsertElement(builder, array, value, index, "");
 564
 565                 size = range.Last - range.First + 1;
 566                 for (i = 0; i < size; ++i) {
 567                         switch(file) {
 568                         case TGSI_FILE_OUTPUT:
 569                                 temp_ptr = ctx->outputs[i + range.First][chan_index];
 570                                 break;
 571
 572                         case TGSI_FILE_TEMPORARY:
 573                                 if (range.First + i >= ctx->temps_count)
 574                                         continue;
 575                                 temp_ptr = ctx->temps[(i + range.First) * TGSI_NUM_CHANNELS + chan_index];
 576                                 break;
 577
 578                         default:
 579                                 continue;
 580                         }
 581                         value = LLVMBuildExtractElement(builder, array,
 582                                 LLVMConstInt(ctx->i32, i, 0), "");
 583                         LLVMBuildStore(builder, value, temp_ptr);
 584                 }
 585         }
 586 }
 587
 588 /* If this is true, preload FS inputs at the beginning of shaders. Otherwise,
 589  * reload them at each use. This must be true if the shader is using
 590  * derivatives and KILL, because KILL can leave the WQM and then a lazy
 591  * input load isn't in the WQM anymore.
 592  */
 593 static bool si_preload_fs_inputs(struct si_shader_context *ctx)
 594 {
 595         struct si_shader_selector *sel = ctx->shader->selector;
 596
 597         return sel->info.uses_derivatives &&
 598                sel->info.uses_kill;
 599 }
 600
 601 static LLVMValueRef
 602 get_output_ptr(struct lp_build_tgsi_context *bld_base, unsigned index,
 603                unsigned chan)
 604 {
 605         struct si_shader_context *ctx = si_shader_context(bld_base);
 606
 607         assert(index <= ctx->bld_base.info->file_max[TGSI_FILE_OUTPUT]);
 608         return ctx->outputs[index][chan];
 609 }
 610
 611 LLVMValueRef si_llvm_emit_fetch(struct lp_build_tgsi_context *bld_base,
 612                                 const struct tgsi_full_src_register *reg,
 613                                 enum tgsi_opcode_type type,
 614                                 unsigned swizzle)
 615 {
 616         struct si_shader_context *ctx = si_shader_context(bld_base);
 617         LLVMBuilderRef builder = ctx->gallivm.builder;
 618         LLVMValueRef result = NULL, ptr, ptr2;
 619
 620         if (swizzle == ~0) {
 621                 LLVMValueRef values[TGSI_NUM_CHANNELS];
 622                 unsigned chan;
 623                 for (chan = 0; chan < TGSI_NUM_CHANNELS; chan++) {
 624                         values[chan] = si_llvm_emit_fetch(bld_base, reg, type, chan);
 625                 }
 626                 return lp_build_gather_values(&ctx->gallivm, values,
 627                                               TGSI_NUM_CHANNELS);
 628         }
 629
 630         if (reg->Register.Indirect) {
 631                 LLVMValueRef load = load_value_from_array(bld_base, reg->Register.File, type,
 632                                 swizzle, reg->Register.Index, &reg->Indirect);
 633                 return bitcast(bld_base, type, load);
 634         }
 635
 636         switch(reg->Register.File) {
 637         case TGSI_FILE_IMMEDIATE: {
 638                 LLVMTypeRef ctype = tgsi2llvmtype(bld_base, type);
 639                 if (tgsi_type_is_64bit(type)) {
 640                         result = LLVMGetUndef(LLVMVectorType(ctx->i32, bld_base->base.type.length * 2));
 641                         result = LLVMConstInsertElement(result,
 642                                                         ctx->imms[reg->Register.Index * TGSI_NUM_CHANNELS + swizzle],
 643                                                         ctx->i32_0);
 644                         result = LLVMConstInsertElement(result,
 645                                                         ctx->imms[reg->Register.Index * TGSI_NUM_CHANNELS + swizzle + 1],
 646                                                         ctx->i32_1);
 647                         return LLVMConstBitCast(result, ctype);
 648                 } else {
 649                         return LLVMConstBitCast(ctx->imms[reg->Register.Index * TGSI_NUM_CHANNELS + swizzle], ctype);
 650                 }
 651         }
 652
 653         case TGSI_FILE_INPUT: {
 654                 unsigned index = reg->Register.Index;
 655                 LLVMValueRef input[4];
 656
 657                 /* I don't think doing this for vertex shaders is beneficial.
 658                  * For those, we want to make sure the VMEM loads are executed
 659                  * only once. Fragment shaders don't care much, because
 660                  * v_interp instructions are much cheaper than VMEM loads.
 661                  */
 662                 if (!si_preload_fs_inputs(ctx) &&
 663                     ctx->bld_base.info->processor == PIPE_SHADER_FRAGMENT)
 664                         ctx->load_input(ctx, index, &ctx->input_decls[index], input);
 665                 else
 666                         memcpy(input, &ctx->inputs[index * 4], sizeof(input));
 667
 668                 result = input[swizzle];
 669
 670                 if (tgsi_type_is_64bit(type)) {
 671                         ptr = result;
 672                         ptr2 = input[swizzle + 1];
 673                         return si_llvm_emit_fetch_64bit(bld_base, type, ptr, ptr2);
 674                 }
 675                 break;
 676         }
 677
 678         case TGSI_FILE_TEMPORARY:
 679                 if (reg->Register.Index >= ctx->temps_count)
 680                         return LLVMGetUndef(tgsi2llvmtype(bld_base, type));
 681                 ptr = ctx->temps[reg->Register.Index * TGSI_NUM_CHANNELS + swizzle];
 682                 if (tgsi_type_is_64bit(type)) {
 683                         ptr2 = ctx->temps[reg->Register.Index * TGSI_NUM_CHANNELS + swizzle + 1];
 684                         return si_llvm_emit_fetch_64bit(bld_base, type,
 685                                                         LLVMBuildLoad(builder, ptr, ""),
 686                                                         LLVMBuildLoad(builder, ptr2, ""));
 687                 }
 688                 result = LLVMBuildLoad(builder, ptr, "");
 689                 break;
 690
 691         case TGSI_FILE_OUTPUT:
 692                 ptr = get_output_ptr(bld_base, reg->Register.Index, swizzle);
 693                 if (tgsi_type_is_64bit(type)) {
 694                         ptr2 = get_output_ptr(bld_base, reg->Register.Index, swizzle + 1);
 695                         return si_llvm_emit_fetch_64bit(bld_base, type,
 696                                                         LLVMBuildLoad(builder, ptr, ""),
 697                                                         LLVMBuildLoad(builder, ptr2, ""));
 698                 }
 699                 result = LLVMBuildLoad(builder, ptr, "");
 700                 break;
 701
 702         default:
 703                 return LLVMGetUndef(tgsi2llvmtype(bld_base, type));
 704         }
 705
 706         return bitcast(bld_base, type, result);
 707 }
 708
 709 static LLVMValueRef fetch_system_value(struct lp_build_tgsi_context *bld_base,
 710                                        const struct tgsi_full_src_register *reg,
 711                                        enum tgsi_opcode_type type,
 712                                        unsigned swizzle)
 713 {
 714         struct si_shader_context *ctx = si_shader_context(bld_base);
 715         LLVMBuilderRef builder = ctx->gallivm.builder;
 716         LLVMValueRef cval = ctx->system_values[reg->Register.Index];
 717
 718         if (tgsi_type_is_64bit(type)) {
 719                 LLVMValueRef lo, hi;
 720
 721                 assert(swizzle == 0 || swizzle == 2);
 722
 723                 lo = LLVMBuildExtractElement(
 724                         builder, cval, LLVMConstInt(ctx->i32, swizzle, 0), "");
 725                 hi = LLVMBuildExtractElement(
 726                         builder, cval, LLVMConstInt(ctx->i32, swizzle + 1, 0), "");
 727
 728                 return si_llvm_emit_fetch_64bit(bld_base, type, lo, hi);
 729         }
 730
 731         if (LLVMGetTypeKind(LLVMTypeOf(cval)) == LLVMVectorTypeKind) {
 732                 cval = LLVMBuildExtractElement(
 733                         builder, cval, LLVMConstInt(ctx->i32, swizzle, 0), "");
 734         } else {
 735                 assert(swizzle == 0);
 736         }
 737
 738         return bitcast(bld_base, type, cval);
 739 }
 740
 741 static void emit_declaration(struct lp_build_tgsi_context *bld_base,
 742                              const struct tgsi_full_declaration *decl)
 743 {
 744         struct si_shader_context *ctx = si_shader_context(bld_base);
 745         LLVMBuilderRef builder = ctx->gallivm.builder;
 746         unsigned first, last, i;
 747         switch(decl->Declaration.File) {
 748         case TGSI_FILE_ADDRESS:
 749         {
 750                  unsigned idx;
 751                 for (idx = decl->Range.First; idx <= decl->Range.Last; idx++) {
 752                         unsigned chan;
 753                         for (chan = 0; chan < TGSI_NUM_CHANNELS; chan++) {
 754                                  ctx->addrs[idx][chan] = lp_build_alloca_undef(
 755                                         &ctx->gallivm,
 756                                         ctx->i32, "");
 757                         }
 758                 }
 759                 break;
 760         }
 761
 762         case TGSI_FILE_TEMPORARY:
 763         {
 764                 char name[16] = "";
 765                 LLVMValueRef array_alloca = NULL;
 766                 unsigned decl_size;
 767                 unsigned writemask = decl->Declaration.UsageMask;
 768                 first = decl->Range.First;
 769                 last = decl->Range.Last;
 770                 decl_size = 4 * ((last - first) + 1);
 771
 772                 if (decl->Declaration.Array) {
 773                         unsigned id = decl->Array.ArrayID - 1;
 774                         unsigned array_size;
 775
 776                         writemask &= ctx->temp_arrays[id].writemask;
 777                         ctx->temp_arrays[id].writemask = writemask;
 778                         array_size = ((last - first) + 1) * util_bitcount(writemask);
 779
 780                         /* If the array has more than 16 elements, store it
 781                          * in memory using an alloca that spans the entire
 782                          * array.
 783                          *
 784                          * Otherwise, store each array element individually.
 785                          * We will then generate vectors (per-channel, up to
 786                          * <16 x float> if the usagemask is a single bit) for
 787                          * indirect addressing.
 788                          *
 789                          * Note that 16 is the number of vector elements that
 790                          * LLVM will store in a register, so theoretically an
 791                          * array with up to 4 * 16 = 64 elements could be
 792                          * handled this way, but whether that's a good idea
 793                          * depends on VGPR register pressure elsewhere.
 794                          *
 795                          * FIXME: We shouldn't need to have the non-alloca
 796                          * code path for arrays. LLVM should be smart enough to
 797                          * promote allocas into registers when profitable.
 798                          */
 799                         if (array_size > 16 ||
 800                             /* TODO: VGPR indexing is buggy on GFX9. */
 801                             ctx->screen->b.chip_class == GFX9) {
 802                                 array_alloca = LLVMBuildAlloca(builder,
 803                                         LLVMArrayType(ctx->f32,
 804                                                       array_size), "array");
 805                                 ctx->temp_array_allocas[id] = array_alloca;
 806                         }
 807                 }
 808
 809                 if (!ctx->temps_count) {
 810                         ctx->temps_count = bld_base->info->file_max[TGSI_FILE_TEMPORARY] + 1;
 811                         ctx->temps = MALLOC(TGSI_NUM_CHANNELS * ctx->temps_count * sizeof(LLVMValueRef));
 812                 }
 813                 if (!array_alloca) {
 814                         for (i = 0; i < decl_size; ++i) {
 815 #ifdef DEBUG
 816                                 snprintf(name, sizeof(name), "TEMP%d.%c",
 817                                          first + i / 4, "xyzw"[i % 4]);
 818 #endif
 819                                 ctx->temps[first * TGSI_NUM_CHANNELS + i] =
 820                                         lp_build_alloca_undef(&ctx->gallivm,
 821                                                               ctx->f32,
 822                                                               name);
 823                         }
 824                 } else {
 825                         LLVMValueRef idxs[2] = {
 826                                 ctx->i32_0,
 827                                 NULL
 828                         };
 829                         unsigned j = 0;
 830
 831                         if (writemask != TGSI_WRITEMASK_XYZW &&
 832                             !ctx->undef_alloca) {
 833                                 /* Create a dummy alloca. We use it so that we
 834                                  * have a pointer that is safe to load from if
 835                                  * a shader ever reads from a channel that
 836                                  * it never writes to.
 837                                  */
 838                                 ctx->undef_alloca = lp_build_alloca_undef(
 839                                         &ctx->gallivm,
 840                                         ctx->f32, "undef");
 841                         }
 842
 843                         for (i = 0; i < decl_size; ++i) {
 844                                 LLVMValueRef ptr;
 845                                 if (writemask & (1 << (i % 4))) {
 846 #ifdef DEBUG
 847                                         snprintf(name, sizeof(name), "TEMP%d.%c",
 848                                                  first + i / 4, "xyzw"[i % 4]);
 849 #endif
 850                                         idxs[1] = LLVMConstInt(ctx->i32, j, 0);
 851                                         ptr = LLVMBuildGEP(builder, array_alloca, idxs, 2, name);
 852                                         j++;
 853                                 } else {
 854                                         ptr = ctx->undef_alloca;
 855                                 }
 856                                 ctx->temps[first * TGSI_NUM_CHANNELS + i] = ptr;
 857                         }
 858                 }
 859                 break;
 860         }
 861         case TGSI_FILE_INPUT:
 862         {
 863                 unsigned idx;
 864                 for (idx = decl->Range.First; idx <= decl->Range.Last; idx++) {
 865                         if (ctx->load_input &&
 866                             ctx->input_decls[idx].Declaration.File != TGSI_FILE_INPUT) {
 867                                 ctx->input_decls[idx] = *decl;
 868                                 ctx->input_decls[idx].Range.First = idx;
 869                                 ctx->input_decls[idx].Range.Last = idx;
 870                                 ctx->input_decls[idx].Semantic.Index += idx - decl->Range.First;
 871
 872                                 if (si_preload_fs_inputs(ctx) ||
 873                                     bld_base->info->processor != PIPE_SHADER_FRAGMENT)
 874                                         ctx->load_input(ctx, idx, &ctx->input_decls[idx],
 875                                                         &ctx->inputs[idx * 4]);
 876                         }
 877                 }
 878         }
 879         break;
 880
 881         case TGSI_FILE_SYSTEM_VALUE:
 882         {
 883                 unsigned idx;
 884                 for (idx = decl->Range.First; idx <= decl->Range.Last; idx++) {
 885                         ctx->load_system_value(ctx, idx, decl);
 886                 }
 887         }
 888         break;
 889
 890         case TGSI_FILE_OUTPUT:
 891         {
 892                 char name[16] = "";
 893                 unsigned idx;
 894                 for (idx = decl->Range.First; idx <= decl->Range.Last; idx++) {
 895                         unsigned chan;
 896                         assert(idx < RADEON_LLVM_MAX_OUTPUTS);
 897                         if (ctx->outputs[idx][0])
 898                                 continue;
 899                         for (chan = 0; chan < TGSI_NUM_CHANNELS; chan++) {
 900 #ifdef DEBUG
 901                                 snprintf(name, sizeof(name), "OUT%d.%c",
 902                                          idx, "xyzw"[chan % 4]);
 903 #endif
 904                                 ctx->outputs[idx][chan] = lp_build_alloca_undef(
 905                                         &ctx->gallivm,
 906                                         ctx->f32, name);
 907                         }
 908                 }
 909                 break;
 910         }
 911
 912         case TGSI_FILE_MEMORY:
 913                 ctx->declare_memory_region(ctx, decl);
 914                 break;
 915
 916         default:
 917                 break;
 918         }
 919 }
 920
 921 void si_llvm_emit_store(struct lp_build_tgsi_context *bld_base,
 922                         const struct tgsi_full_instruction *inst,
 923                         const struct tgsi_opcode_info *info,
 924                         LLVMValueRef dst[4])
 925 {
 926         struct si_shader_context *ctx = si_shader_context(bld_base);
 927         struct gallivm_state *gallivm = &ctx->gallivm;
 928         const struct tgsi_full_dst_register *reg = &inst->Dst[0];
 929         LLVMBuilderRef builder = ctx->gallivm.builder;
 930         LLVMValueRef temp_ptr, temp_ptr2 = NULL;
 931         unsigned chan, chan_index;
 932         bool is_vec_store = false;
 933         enum tgsi_opcode_type dtype = tgsi_opcode_infer_dst_type(inst->Instruction.Opcode);
 934
 935         if (dst[0]) {
 936                 LLVMTypeKind k = LLVMGetTypeKind(LLVMTypeOf(dst[0]));
 937                 is_vec_store = (k == LLVMVectorTypeKind);
 938         }
 939
 940         if (is_vec_store) {
 941                 LLVMValueRef values[4] = {};
 942                 TGSI_FOR_EACH_DST0_ENABLED_CHANNEL(inst, chan) {
 943                         LLVMValueRef index = LLVMConstInt(ctx->i32, chan, 0);
 944                         values[chan]  = LLVMBuildExtractElement(gallivm->builder,
 945                                                         dst[0], index, "");
 946                 }
 947                 bld_base->emit_store(bld_base, inst, info, values);
 948                 return;
 949         }
 950
 951         TGSI_FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
 952                 LLVMValueRef value = dst[chan_index];
 953
 954                 if (tgsi_type_is_64bit(dtype) && (chan_index == 1 || chan_index == 3))
 955                         continue;
 956                 if (inst->Instruction.Saturate)
 957                         value = ac_build_clamp(&ctx->ac, value);
 958
 959                 if (reg->Register.File == TGSI_FILE_ADDRESS) {
 960                         temp_ptr = ctx->addrs[reg->Register.Index][chan_index];
 961                         LLVMBuildStore(builder, value, temp_ptr);
 962                         continue;
 963                 }
 964
 965                 if (!tgsi_type_is_64bit(dtype))
 966                         value = bitcast(bld_base, TGSI_TYPE_FLOAT, value);
 967
 968                 if (reg->Register.Indirect) {
 969                         unsigned file = reg->Register.File;
 970                         unsigned reg_index = reg->Register.Index;
 971                         store_value_to_array(bld_base, value, file, chan_index,
 972                                              reg_index, &reg->Indirect);
 973                 } else {
 974                         switch(reg->Register.File) {
 975                         case TGSI_FILE_OUTPUT:
 976                                 temp_ptr = ctx->outputs[reg->Register.Index][chan_index];
 977                                 if (tgsi_type_is_64bit(dtype))
 978                                         temp_ptr2 = ctx->outputs[reg->Register.Index][chan_index + 1];
 979                                 break;
 980
 981                         case TGSI_FILE_TEMPORARY:
 982                         {
 983                                 if (reg->Register.Index >= ctx->temps_count)
 984                                         continue;
 985
 986                                 temp_ptr = ctx->temps[ TGSI_NUM_CHANNELS * reg->Register.Index + chan_index];
 987                                 if (tgsi_type_is_64bit(dtype))
 988                                         temp_ptr2 = ctx->temps[ TGSI_NUM_CHANNELS * reg->Register.Index + chan_index + 1];
 989
 990                                 break;
 991                         }
 992                         default:
 993                                 return;
 994                         }
 995                         if (!tgsi_type_is_64bit(dtype))
 996                                 LLVMBuildStore(builder, value, temp_ptr);
 997                         else {
 998                                 LLVMValueRef ptr = LLVMBuildBitCast(builder, value,
 999                                                                     LLVMVectorType(ctx->i32, 2), "");
1000                                 LLVMValueRef val2;
1001                                 value = LLVMBuildExtractElement(builder, ptr,
1002                                                                 ctx->i32_0, "");
1003                                 val2 = LLVMBuildExtractElement(builder, ptr,
1004                                                                ctx->i32_1, "");
1005
1006                                 LLVMBuildStore(builder, bitcast(bld_base, TGSI_TYPE_FLOAT, value), temp_ptr);
1007                                 LLVMBuildStore(builder, bitcast(bld_base, TGSI_TYPE_FLOAT, val2), temp_ptr2);
1008                         }
1009                 }
1010         }
1011 }
1012
1013 static void set_basicblock_name(LLVMBasicBlockRef bb, const char *base, int pc)
1014 {
1015         char buf[32];
1016         /* Subtract 1 so that the number shown is that of the corresponding
1017          * opcode in the TGSI dump, e.g. an if block has the same suffix as
1018          * the instruction number of the corresponding TGSI IF.
1019          */
1020         snprintf(buf, sizeof(buf), "%s%d", base, pc - 1);
1021         LLVMSetValueName(LLVMBasicBlockAsValue(bb), buf);
1022 }
1023
1024 /* Append a basic block at the level of the parent flow.
1025  */
1026 static LLVMBasicBlockRef append_basic_block(struct si_shader_context *ctx,
1027                                             const char *name)
1028 {
1029         struct gallivm_state *gallivm = &ctx->gallivm;
1030
1031         assert(ctx->flow_depth >= 1);
1032
1033         if (ctx->flow_depth >= 2) {
1034                 struct si_llvm_flow *flow = &ctx->flow[ctx->flow_depth - 2];
1035
1036                 return LLVMInsertBasicBlockInContext(gallivm->context,
1037                                                      flow->next_block, name);
1038         }
1039
1040         return LLVMAppendBasicBlockInContext(gallivm->context, ctx->main_fn, name);
1041 }
1042
1043 /* Emit a branch to the given default target for the current block if
1044  * applicable -- that is, if the current block does not already contain a
1045  * branch from a break or continue.
1046  */
1047 static void emit_default_branch(LLVMBuilderRef builder, LLVMBasicBlockRef target)
1048 {
1049         if (!LLVMGetBasicBlockTerminator(LLVMGetInsertBlock(builder)))
1050                  LLVMBuildBr(builder, target);
1051 }
1052
1053 static void bgnloop_emit(const struct lp_build_tgsi_action *action,
1054                          struct lp_build_tgsi_context *bld_base,
1055                          struct lp_build_emit_data *emit_data)
1056 {
1057         struct si_shader_context *ctx = si_shader_context(bld_base);
1058         struct gallivm_state *gallivm = &ctx->gallivm;
1059         struct si_llvm_flow *flow = push_flow(ctx);
1060         flow->loop_entry_block = append_basic_block(ctx, "LOOP");
1061         flow->next_block = append_basic_block(ctx, "ENDLOOP");
1062         set_basicblock_name(flow->loop_entry_block, "loop", bld_base->pc);
1063         LLVMBuildBr(gallivm->builder, flow->loop_entry_block);
1064         LLVMPositionBuilderAtEnd(gallivm->builder, flow->loop_entry_block);
1065 }
1066
1067 static void brk_emit(const struct lp_build_tgsi_action *action,
1068                      struct lp_build_tgsi_context *bld_base,
1069                      struct lp_build_emit_data *emit_data)
1070 {
1071         struct si_shader_context *ctx = si_shader_context(bld_base);
1072         struct gallivm_state *gallivm = &ctx->gallivm;
1073         struct si_llvm_flow *flow = get_innermost_loop(ctx);
1074
1075         LLVMBuildBr(gallivm->builder, flow->next_block);
1076 }
1077
1078 static void cont_emit(const struct lp_build_tgsi_action *action,
1079                       struct lp_build_tgsi_context *bld_base,
1080                       struct lp_build_emit_data *emit_data)
1081 {
1082         struct si_shader_context *ctx = si_shader_context(bld_base);
1083         struct gallivm_state *gallivm = &ctx->gallivm;
1084         struct si_llvm_flow *flow = get_innermost_loop(ctx);
1085
1086         LLVMBuildBr(gallivm->builder, flow->loop_entry_block);
1087 }
1088
1089 static void else_emit(const struct lp_build_tgsi_action *action,
1090                       struct lp_build_tgsi_context *bld_base,
1091                       struct lp_build_emit_data *emit_data)
1092 {
1093         struct si_shader_context *ctx = si_shader_context(bld_base);
1094         struct gallivm_state *gallivm = &ctx->gallivm;
1095         struct si_llvm_flow *current_branch = get_current_flow(ctx);
1096         LLVMBasicBlockRef endif_block;
1097
1098         assert(!current_branch->loop_entry_block);
1099
1100         endif_block = append_basic_block(ctx, "ENDIF");
1101         emit_default_branch(gallivm->builder, endif_block);
1102
1103         LLVMPositionBuilderAtEnd(gallivm->builder, current_branch->next_block);
1104         set_basicblock_name(current_branch->next_block, "else", bld_base->pc);
1105
1106         current_branch->next_block = endif_block;
1107 }
1108
1109 static void endif_emit(const struct lp_build_tgsi_action *action,
1110                        struct lp_build_tgsi_context *bld_base,
1111                        struct lp_build_emit_data *emit_data)
1112 {
1113         struct si_shader_context *ctx = si_shader_context(bld_base);
1114         struct gallivm_state *gallivm = &ctx->gallivm;
1115         struct si_llvm_flow *current_branch = get_current_flow(ctx);
1116
1117         assert(!current_branch->loop_entry_block);
1118
1119         emit_default_branch(gallivm->builder, current_branch->next_block);
1120         LLVMPositionBuilderAtEnd(gallivm->builder, current_branch->next_block);
1121         set_basicblock_name(current_branch->next_block, "endif", bld_base->pc);
1122
1123         ctx->flow_depth--;
1124 }
1125
1126 static void endloop_emit(const struct lp_build_tgsi_action *action,
1127                          struct lp_build_tgsi_context *bld_base,
1128                          struct lp_build_emit_data *emit_data)
1129 {
1130         struct si_shader_context *ctx = si_shader_context(bld_base);
1131         struct gallivm_state *gallivm = &ctx->gallivm;
1132         struct si_llvm_flow *current_loop = get_current_flow(ctx);
1133
1134         assert(current_loop->loop_entry_block);
1135
1136         emit_default_branch(gallivm->builder, current_loop->loop_entry_block);
1137
1138         LLVMPositionBuilderAtEnd(gallivm->builder, current_loop->next_block);
1139         set_basicblock_name(current_loop->next_block, "endloop", bld_base->pc);
1140         ctx->flow_depth--;
1141 }
1142
1143 static void if_cond_emit(const struct lp_build_tgsi_action *action,
1144                          struct lp_build_tgsi_context *bld_base,
1145                          struct lp_build_emit_data *emit_data,
1146                          LLVMValueRef cond)
1147 {
1148         struct si_shader_context *ctx = si_shader_context(bld_base);
1149         struct gallivm_state *gallivm = &ctx->gallivm;
1150         struct si_llvm_flow *flow = push_flow(ctx);
1151         LLVMBasicBlockRef if_block;
1152
1153         if_block = append_basic_block(ctx, "IF");
1154         flow->next_block = append_basic_block(ctx, "ELSE");
1155         set_basicblock_name(if_block, "if", bld_base->pc);
1156         LLVMBuildCondBr(gallivm->builder, cond, if_block, flow->next_block);
1157         LLVMPositionBuilderAtEnd(gallivm->builder, if_block);
1158 }
1159
1160 static void if_emit(const struct lp_build_tgsi_action *action,
1161                     struct lp_build_tgsi_context *bld_base,
1162                     struct lp_build_emit_data *emit_data)
1163 {
1164         struct gallivm_state *gallivm = bld_base->base.gallivm;
1165         LLVMValueRef cond;
1166
1167         cond = LLVMBuildFCmp(gallivm->builder, LLVMRealUNE,
1168                         emit_data->args[0],
1169                         bld_base->base.zero, "");
1170
1171         if_cond_emit(action, bld_base, emit_data, cond);
1172 }
1173
1174 static void uif_emit(const struct lp_build_tgsi_action *action,
1175                      struct lp_build_tgsi_context *bld_base,
1176                      struct lp_build_emit_data *emit_data)
1177 {
1178         struct gallivm_state *gallivm = bld_base->base.gallivm;
1179         LLVMValueRef cond;
1180
1181         cond = LLVMBuildICmp(gallivm->builder, LLVMIntNE,
1182                 bitcast(bld_base, TGSI_TYPE_UNSIGNED, emit_data->args[0]),
1183                         bld_base->int_bld.zero, "");
1184
1185         if_cond_emit(action, bld_base, emit_data, cond);
1186 }
1187
1188 static void emit_immediate(struct lp_build_tgsi_context *bld_base,
1189                            const struct tgsi_full_immediate *imm)
1190 {
1191         unsigned i;
1192         struct si_shader_context *ctx = si_shader_context(bld_base);
1193
1194         for (i = 0; i < 4; ++i) {
1195                 ctx->imms[ctx->imms_num * TGSI_NUM_CHANNELS + i] =
1196                                 LLVMConstInt(ctx->i32, imm->u[i].Uint, false   );
1197         }
1198
1199         ctx->imms_num++;
1200 }
1201
1202 void si_llvm_context_init(struct si_shader_context *ctx,
1203                           struct si_screen *sscreen,
1204                           LLVMTargetMachineRef tm)
1205 {
1206         struct lp_type type;
1207
1208         /* Initialize the gallivm object:
1209          * We are only using the module, context, and builder fields of this struct.
1210          * This should be enough for us to be able to pass our gallivm struct to the
1211          * helper functions in the gallivm module.
1212          */
1213         memset(ctx, 0, sizeof(*ctx));
1214         ctx->screen = sscreen;
1215         ctx->tm = tm;
1216
1217         ctx->gallivm.context = LLVMContextCreate();
1218         ctx->gallivm.module = LLVMModuleCreateWithNameInContext("tgsi",
1219                                                 ctx->gallivm.context);
1220         LLVMSetTarget(ctx->gallivm.module, "amdgcn--");
1221
1222         LLVMTargetDataRef data_layout = LLVMCreateTargetDataLayout(tm);
1223         char *data_layout_str = LLVMCopyStringRepOfTargetData(data_layout);
1224         LLVMSetDataLayout(ctx->gallivm.module, data_layout_str);
1225         LLVMDisposeTargetData(data_layout);
1226         LLVMDisposeMessage(data_layout_str);
1227
1228         bool unsafe_fpmath = (sscreen->b.debug_flags & DBG_UNSAFE_MATH) != 0;
1229         enum lp_float_mode float_mode =
1230                 unsafe_fpmath ? LP_FLOAT_MODE_UNSAFE_FP_MATH :
1231                                 LP_FLOAT_MODE_NO_SIGNED_ZEROS_FP_MATH;
1232
1233         ctx->gallivm.builder = lp_create_builder(ctx->gallivm.context,
1234                                                  float_mode);
1235
1236         ac_llvm_context_init(&ctx->ac, ctx->gallivm.context);
1237         ctx->ac.module = ctx->gallivm.module;
1238         ctx->ac.builder = ctx->gallivm.builder;
1239
1240         struct lp_build_tgsi_context *bld_base = &ctx->bld_base;
1241
1242         type.floating = true;
1243         type.fixed = false;
1244         type.sign = true;
1245         type.norm = false;
1246         type.width = 32;
1247         type.length = 1;
1248
1249         lp_build_context_init(&bld_base->base, &ctx->gallivm, type);
1250         lp_build_context_init(&ctx->bld_base.uint_bld, &ctx->gallivm, lp_uint_type(type));
1251         lp_build_context_init(&ctx->bld_base.int_bld, &ctx->gallivm, lp_int_type(type));
1252         type.width *= 2;
1253         lp_build_context_init(&ctx->bld_base.dbl_bld, &ctx->gallivm, type);
1254         lp_build_context_init(&ctx->bld_base.uint64_bld, &ctx->gallivm, lp_uint_type(type));
1255         lp_build_context_init(&ctx->bld_base.int64_bld, &ctx->gallivm, lp_int_type(type));
1256
1257         bld_base->soa = 1;
1258         bld_base->emit_swizzle = emit_swizzle;
1259         bld_base->emit_declaration = emit_declaration;
1260         bld_base->emit_immediate = emit_immediate;
1261
1262         /* metadata allowing 2.5 ULP */
1263         ctx->fpmath_md_kind = LLVMGetMDKindIDInContext(ctx->gallivm.context,
1264                                                        "fpmath", 6);
1265         LLVMValueRef arg = lp_build_const_float(&ctx->gallivm, 2.5);
1266         ctx->fpmath_md_2p5_ulp = LLVMMDNodeInContext(ctx->gallivm.context,
1267                                                      &arg, 1);
1268
1269         bld_base->op_actions[TGSI_OPCODE_BGNLOOP].emit = bgnloop_emit;
1270         bld_base->op_actions[TGSI_OPCODE_BRK].emit = brk_emit;
1271         bld_base->op_actions[TGSI_OPCODE_CONT].emit = cont_emit;
1272         bld_base->op_actions[TGSI_OPCODE_IF].emit = if_emit;
1273         bld_base->op_actions[TGSI_OPCODE_UIF].emit = uif_emit;
1274         bld_base->op_actions[TGSI_OPCODE_ELSE].emit = else_emit;
1275         bld_base->op_actions[TGSI_OPCODE_ENDIF].emit = endif_emit;
1276         bld_base->op_actions[TGSI_OPCODE_ENDLOOP].emit = endloop_emit;
1277
1278         si_shader_context_init_alu(&ctx->bld_base);
1279
1280         ctx->voidt = LLVMVoidTypeInContext(ctx->gallivm.context);
1281         ctx->i1 = LLVMInt1TypeInContext(ctx->gallivm.context);
1282         ctx->i8 = LLVMInt8TypeInContext(ctx->gallivm.context);
1283         ctx->i32 = LLVMInt32TypeInContext(ctx->gallivm.context);
1284         ctx->i64 = LLVMInt64TypeInContext(ctx->gallivm.context);
1285         ctx->i128 = LLVMIntTypeInContext(ctx->gallivm.context, 128);
1286         ctx->f32 = LLVMFloatTypeInContext(ctx->gallivm.context);
1287         ctx->v2i32 = LLVMVectorType(ctx->i32, 2);
1288         ctx->v4i32 = LLVMVectorType(ctx->i32, 4);
1289         ctx->v4f32 = LLVMVectorType(ctx->f32, 4);
1290         ctx->v8i32 = LLVMVectorType(ctx->i32, 8);
1291
1292         ctx->i32_0 = LLVMConstInt(ctx->i32, 0, 0);
1293         ctx->i32_1 = LLVMConstInt(ctx->i32, 1, 0);
1294 }
1295
1296 /* Set the context to a certain TGSI shader. Can be called repeatedly
1297  * to change the shader. */
1298 void si_llvm_context_set_tgsi(struct si_shader_context *ctx,
1299                               struct si_shader *shader)
1300 {
1301         const struct tgsi_shader_info *info = NULL;
1302         const struct tgsi_token *tokens = NULL;
1303
1304         if (shader && shader->selector) {
1305                 info = &shader->selector->info;
1306                 tokens = shader->selector->tokens;
1307         }
1308
1309         ctx->shader = shader;
1310         ctx->type = info ? info->processor : -1;
1311         ctx->bld_base.info = info;
1312
1313         /* Clean up the old contents. */
1314         FREE(ctx->temp_arrays);
1315         ctx->temp_arrays = NULL;
1316         FREE(ctx->temp_array_allocas);
1317         ctx->temp_array_allocas = NULL;
1318
1319         FREE(ctx->imms);
1320         ctx->imms = NULL;
1321         ctx->imms_num = 0;
1322
1323         FREE(ctx->temps);
1324         ctx->temps = NULL;
1325         ctx->temps_count = 0;
1326
1327         if (!info || !tokens)
1328                 return;
1329
1330         if (info->array_max[TGSI_FILE_TEMPORARY] > 0) {
1331                 int size = info->array_max[TGSI_FILE_TEMPORARY];
1332
1333                 ctx->temp_arrays = CALLOC(size, sizeof(ctx->temp_arrays[0]));
1334                 ctx->temp_array_allocas = CALLOC(size, sizeof(ctx->temp_array_allocas[0]));
1335
1336                 tgsi_scan_arrays(tokens, TGSI_FILE_TEMPORARY, size,
1337                                  ctx->temp_arrays);
1338         }
1339         if (info->file_max[TGSI_FILE_IMMEDIATE] >= 0) {
1340                 int size = info->file_max[TGSI_FILE_IMMEDIATE] + 1;
1341                 ctx->imms = MALLOC(size * TGSI_NUM_CHANNELS * sizeof(LLVMValueRef));
1342         }
1343
1344         /* Re-set these to start with a clean slate. */
1345         ctx->bld_base.num_instructions = 0;
1346         ctx->bld_base.pc = 0;
1347         memset(ctx->outputs, 0, sizeof(ctx->outputs));
1348
1349         ctx->bld_base.emit_store = si_llvm_emit_store;
1350         ctx->bld_base.emit_fetch_funcs[TGSI_FILE_IMMEDIATE] = si_llvm_emit_fetch;
1351         ctx->bld_base.emit_fetch_funcs[TGSI_FILE_INPUT] = si_llvm_emit_fetch;
1352         ctx->bld_base.emit_fetch_funcs[TGSI_FILE_TEMPORARY] = si_llvm_emit_fetch;
1353         ctx->bld_base.emit_fetch_funcs[TGSI_FILE_OUTPUT] = si_llvm_emit_fetch;
1354         ctx->bld_base.emit_fetch_funcs[TGSI_FILE_SYSTEM_VALUE] = fetch_system_value;
1355 }
1356
1357 void si_llvm_create_func(struct si_shader_context *ctx,
1358                          const char *name,
1359                          LLVMTypeRef *return_types, unsigned num_return_elems,
1360                          LLVMTypeRef *ParamTypes, unsigned ParamCount)
1361 {
1362         LLVMTypeRef main_fn_type, ret_type;
1363         LLVMBasicBlockRef main_fn_body;
1364         enum si_llvm_calling_convention call_conv;
1365
1366         if (num_return_elems)
1367                 ret_type = LLVMStructTypeInContext(ctx->gallivm.context,
1368                                                    return_types,
1369                                                    num_return_elems, true);
1370         else
1371                 ret_type = LLVMVoidTypeInContext(ctx->gallivm.context);
1372
1373         /* Setup the function */
1374         ctx->return_type = ret_type;
1375         main_fn_type = LLVMFunctionType(ret_type, ParamTypes, ParamCount, 0);
1376         ctx->main_fn = LLVMAddFunction(ctx->gallivm.module, name, main_fn_type);
1377         main_fn_body = LLVMAppendBasicBlockInContext(ctx->gallivm.context,
1378                         ctx->main_fn, "main_body");
1379         LLVMPositionBuilderAtEnd(ctx->gallivm.builder, main_fn_body);
1380
1381         switch (ctx->type) {
1382         case PIPE_SHADER_VERTEX:
1383         case PIPE_SHADER_TESS_CTRL:
1384         case PIPE_SHADER_TESS_EVAL:
1385                 call_conv = RADEON_LLVM_AMDGPU_VS;
1386                 break;
1387         case PIPE_SHADER_GEOMETRY:
1388                 call_conv = RADEON_LLVM_AMDGPU_GS;
1389                 break;
1390         case PIPE_SHADER_FRAGMENT:
1391                 call_conv = RADEON_LLVM_AMDGPU_PS;
1392                 break;
1393         case PIPE_SHADER_COMPUTE:
1394                 call_conv = RADEON_LLVM_AMDGPU_CS;
1395                 break;
1396         default:
1397                 unreachable("Unhandle shader type");
1398         }
1399
1400         LLVMSetFunctionCallConv(ctx->main_fn, call_conv);
1401 }
1402
1403 void si_llvm_optimize_module(struct si_shader_context *ctx)
1404 {
1405         struct gallivm_state *gallivm = &ctx->gallivm;
1406         const char *triple = LLVMGetTarget(gallivm->module);
1407         LLVMTargetLibraryInfoRef target_library_info;
1408
1409         /* Dump LLVM IR before any optimization passes */
1410         if (ctx->screen->b.debug_flags & DBG_PREOPT_IR &&
1411             r600_can_dump_shader(&ctx->screen->b, ctx->type))
1412                 LLVMDumpModule(ctx->gallivm.module);
1413
1414         /* Create the pass manager */
1415         gallivm->passmgr = LLVMCreatePassManager();
1416
1417         target_library_info = gallivm_create_target_library_info(triple);
1418         LLVMAddTargetLibraryInfo(target_library_info, gallivm->passmgr);
1419
1420         if (r600_extra_shader_checks(&ctx->screen->b, ctx->type))
1421                 LLVMAddVerifierPass(gallivm->passmgr);
1422
1423         LLVMAddAlwaysInlinerPass(gallivm->passmgr);
1424
1425         /* This pass should eliminate all the load and store instructions */
1426         LLVMAddPromoteMemoryToRegisterPass(gallivm->passmgr);
1427
1428         /* Add some optimization passes */
1429         LLVMAddScalarReplAggregatesPass(gallivm->passmgr);
1430         LLVMAddLICMPass(gallivm->passmgr);
1431         LLVMAddAggressiveDCEPass(gallivm->passmgr);
1432         LLVMAddCFGSimplificationPass(gallivm->passmgr);
1433         LLVMAddInstructionCombiningPass(gallivm->passmgr);
1434
1435         /* Run the pass */
1436         LLVMRunPassManager(gallivm->passmgr, ctx->gallivm.module);
1437
1438         LLVMDisposeBuilder(gallivm->builder);
1439         LLVMDisposePassManager(gallivm->passmgr);
1440         gallivm_dispose_target_library_info(target_library_info);
1441 }
1442
1443 void si_llvm_dispose(struct si_shader_context *ctx)
1444 {
1445         LLVMDisposeModule(ctx->gallivm.module);
1446         LLVMContextDispose(ctx->gallivm.context);
1447         FREE(ctx->temp_arrays);
1448         ctx->temp_arrays = NULL;
1449         FREE(ctx->temp_array_allocas);
1450         ctx->temp_array_allocas = NULL;
1451         FREE(ctx->temps);
1452         ctx->temps = NULL;
1453         ctx->temps_count = 0;
1454         FREE(ctx->imms);
1455         ctx->imms = NULL;
1456         ctx->imms_num = 0;
1457         FREE(ctx->flow);
1458         ctx->flow = NULL;
1459         ctx->flow_depth_max = 0;
1460 }