src/gallium/drivers/radeonsi/si_shader_tgsi_setup.c

   1 /*
   2  * Copyright 2016 Advanced Micro Devices, Inc.
   3  *
   4  * Permission is hereby granted, free of charge, to any person obtaining a
   5  * copy of this software and associated documentation files (the "Software"),
   6  * to deal in the Software without restriction, including without limitation
   7  * on the rights to use, copy, modify, merge, publish, distribute, sub
   8  * license, and/or sell copies of the Software, and to permit persons to whom
   9  * the Software is furnished to do so, subject to the following conditions:
  10  *
  11  * The above copyright notice and this permission notice (including the next
  12  * paragraph) shall be included in all copies or substantial portions of the
  13  * Software.
  14  *
  15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  17  * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
  18  * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
  19  * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
  20  * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
  21  * USE OR OTHER DEALINGS IN THE SOFTWARE.
  22  */
  23
  24 #include "si_shader_internal.h"
  25 #include "si_pipe.h"
  26
  27 #include "gallivm/lp_bld_const.h"
  28 #include "gallivm/lp_bld_gather.h"
  29 #include "gallivm/lp_bld_flow.h"
  30 #include "gallivm/lp_bld_init.h"
  31 #include "gallivm/lp_bld_intr.h"
  32 #include "gallivm/lp_bld_misc.h"
  33 #include "gallivm/lp_bld_swizzle.h"
  34 #include "tgsi/tgsi_info.h"
  35 #include "tgsi/tgsi_parse.h"
  36 #include "util/u_math.h"
  37 #include "util/u_memory.h"
  38 #include "util/u_debug.h"
  39
  40 #include <stdio.h>
  41 #include <llvm-c/Transforms/IPO.h>
  42 #include <llvm-c/Transforms/Scalar.h>
  43
  44 /* Data for if/else/endif and bgnloop/endloop control flow structures.
  45  */
  46 struct si_llvm_flow {
  47         /* Loop exit or next part of if/else/endif. */
  48         LLVMBasicBlockRef next_block;
  49         LLVMBasicBlockRef loop_entry_block;
  50 };
  51
  52 #define CPU_STRING_LEN 30
  53 #define FS_STRING_LEN 30
  54 #define TRIPLE_STRING_LEN 7
  55
  56 /**
  57  * Shader types for the LLVM backend.
  58  */
  59 enum si_llvm_shader_type {
  60         RADEON_LLVM_SHADER_PS = 0,
  61         RADEON_LLVM_SHADER_VS = 1,
  62         RADEON_LLVM_SHADER_GS = 2,
  63         RADEON_LLVM_SHADER_CS = 3,
  64 };
  65
  66 enum si_llvm_calling_convention {
  67         RADEON_LLVM_AMDGPU_VS = 87,
  68         RADEON_LLVM_AMDGPU_GS = 88,
  69         RADEON_LLVM_AMDGPU_PS = 89,
  70         RADEON_LLVM_AMDGPU_CS = 90,
  71 };
  72
  73 void si_llvm_add_attribute(LLVMValueRef F, const char *name, int value)
  74 {
  75         char str[16];
  76
  77         snprintf(str, sizeof(str), "%i", value);
  78         LLVMAddTargetDependentFunctionAttr(F, name, str);
  79 }
  80
  81 /**
  82  * Set the shader type we want to compile
  83  *
  84  * @param type shader type to set
  85  */
  86 void si_llvm_shader_type(LLVMValueRef F, unsigned type)
  87 {
  88         enum si_llvm_shader_type llvm_type;
  89         enum si_llvm_calling_convention calling_conv;
  90
  91         switch (type) {
  92         case PIPE_SHADER_VERTEX:
  93         case PIPE_SHADER_TESS_CTRL:
  94         case PIPE_SHADER_TESS_EVAL:
  95                 llvm_type = RADEON_LLVM_SHADER_VS;
  96                 calling_conv = RADEON_LLVM_AMDGPU_VS;
  97                 break;
  98         case PIPE_SHADER_GEOMETRY:
  99                 llvm_type = RADEON_LLVM_SHADER_GS;
 100                 calling_conv = RADEON_LLVM_AMDGPU_GS;
 101                 break;
 102         case PIPE_SHADER_FRAGMENT:
 103                 llvm_type = RADEON_LLVM_SHADER_PS;
 104                 calling_conv = RADEON_LLVM_AMDGPU_PS;
 105                 break;
 106         case PIPE_SHADER_COMPUTE:
 107                 llvm_type = RADEON_LLVM_SHADER_CS;
 108                 calling_conv = RADEON_LLVM_AMDGPU_CS;
 109                 break;
 110         default:
 111                 unreachable("Unhandle shader type");
 112         }
 113
 114         if (HAVE_LLVM >= 0x309)
 115                 LLVMSetFunctionCallConv(F, calling_conv);
 116         else
 117                 si_llvm_add_attribute(F, "ShaderType", llvm_type);
 118 }
 119
 120 static void init_amdgpu_target()
 121 {
 122         gallivm_init_llvm_targets();
 123         LLVMInitializeAMDGPUTargetInfo();
 124         LLVMInitializeAMDGPUTarget();
 125         LLVMInitializeAMDGPUTargetMC();
 126         LLVMInitializeAMDGPUAsmPrinter();
 127 }
 128
 129 static once_flag init_amdgpu_target_once_flag = ONCE_FLAG_INIT;
 130
 131 LLVMTargetRef si_llvm_get_amdgpu_target(const char *triple)
 132 {
 133         LLVMTargetRef target = NULL;
 134         char *err_message = NULL;
 135
 136         call_once(&init_amdgpu_target_once_flag, init_amdgpu_target);
 137
 138         if (LLVMGetTargetFromTriple(triple, &target, &err_message)) {
 139                 fprintf(stderr, "Cannot find target for triple %s ", triple);
 140                 if (err_message) {
 141                         fprintf(stderr, "%s\n", err_message);
 142                 }
 143                 LLVMDisposeMessage(err_message);
 144                 return NULL;
 145         }
 146         return target;
 147 }
 148
 149 struct si_llvm_diagnostics {
 150         struct pipe_debug_callback *debug;
 151         unsigned retval;
 152 };
 153
 154 static void si_diagnostic_handler(LLVMDiagnosticInfoRef di, void *context)
 155 {
 156         struct si_llvm_diagnostics *diag = (struct si_llvm_diagnostics *)context;
 157         LLVMDiagnosticSeverity severity = LLVMGetDiagInfoSeverity(di);
 158         char *description = LLVMGetDiagInfoDescription(di);
 159         const char *severity_str = NULL;
 160
 161         switch (severity) {
 162         case LLVMDSError:
 163                 severity_str = "error";
 164                 break;
 165         case LLVMDSWarning:
 166                 severity_str = "warning";
 167                 break;
 168         case LLVMDSRemark:
 169                 severity_str = "remark";
 170                 break;
 171         case LLVMDSNote:
 172                 severity_str = "note";
 173                 break;
 174         default:
 175                 severity_str = "unknown";
 176         }
 177
 178         pipe_debug_message(diag->debug, SHADER_INFO,
 179                            "LLVM diagnostic (%s): %s", severity_str, description);
 180
 181         if (severity == LLVMDSError) {
 182                 diag->retval = 1;
 183                 fprintf(stderr,"LLVM triggered Diagnostic Handler: %s\n", description);
 184         }
 185
 186         LLVMDisposeMessage(description);
 187 }
 188
 189 /**
 190  * Compile an LLVM module to machine code.
 191  *
 192  * @returns 0 for success, 1 for failure
 193  */
 194 unsigned si_llvm_compile(LLVMModuleRef M, struct ac_shader_binary *binary,
 195                          LLVMTargetMachineRef tm,
 196                          struct pipe_debug_callback *debug)
 197 {
 198         struct si_llvm_diagnostics diag;
 199         char *err;
 200         LLVMContextRef llvm_ctx;
 201         LLVMMemoryBufferRef out_buffer;
 202         unsigned buffer_size;
 203         const char *buffer_data;
 204         LLVMBool mem_err;
 205
 206         diag.debug = debug;
 207         diag.retval = 0;
 208
 209         /* Setup Diagnostic Handler*/
 210         llvm_ctx = LLVMGetModuleContext(M);
 211
 212         LLVMContextSetDiagnosticHandler(llvm_ctx, si_diagnostic_handler, &diag);
 213
 214         /* Compile IR*/
 215         mem_err = LLVMTargetMachineEmitToMemoryBuffer(tm, M, LLVMObjectFile, &err,
 216                                                                  &out_buffer);
 217
 218         /* Process Errors/Warnings */
 219         if (mem_err) {
 220                 fprintf(stderr, "%s: %s", __FUNCTION__, err);
 221                 pipe_debug_message(debug, SHADER_INFO,
 222                                    "LLVM emit error: %s", err);
 223                 FREE(err);
 224                 diag.retval = 1;
 225                 goto out;
 226         }
 227
 228         /* Extract Shader Code*/
 229         buffer_size = LLVMGetBufferSize(out_buffer);
 230         buffer_data = LLVMGetBufferStart(out_buffer);
 231
 232         ac_elf_read(buffer_data, buffer_size, binary);
 233
 234         /* Clean up */
 235         LLVMDisposeMemoryBuffer(out_buffer);
 236
 237 out:
 238         if (diag.retval != 0)
 239                 pipe_debug_message(debug, SHADER_INFO, "LLVM compile failed");
 240         return diag.retval;
 241 }
 242
 243 LLVMTypeRef tgsi2llvmtype(struct lp_build_tgsi_context *bld_base,
 244                           enum tgsi_opcode_type type)
 245 {
 246         LLVMContextRef ctx = bld_base->base.gallivm->context;
 247
 248         switch (type) {
 249         case TGSI_TYPE_UNSIGNED:
 250         case TGSI_TYPE_SIGNED:
 251                 return LLVMInt32TypeInContext(ctx);
 252         case TGSI_TYPE_UNSIGNED64:
 253         case TGSI_TYPE_SIGNED64:
 254                 return LLVMInt64TypeInContext(ctx);
 255         case TGSI_TYPE_DOUBLE:
 256                 return LLVMDoubleTypeInContext(ctx);
 257         case TGSI_TYPE_UNTYPED:
 258         case TGSI_TYPE_FLOAT:
 259                 return LLVMFloatTypeInContext(ctx);
 260         default: break;
 261         }
 262         return 0;
 263 }
 264
 265 LLVMValueRef bitcast(struct lp_build_tgsi_context *bld_base,
 266                      enum tgsi_opcode_type type, LLVMValueRef value)
 267 {
 268         LLVMBuilderRef builder = bld_base->base.gallivm->builder;
 269         LLVMTypeRef dst_type = tgsi2llvmtype(bld_base, type);
 270
 271         if (dst_type)
 272                 return LLVMBuildBitCast(builder, value, dst_type, "");
 273         else
 274                 return value;
 275 }
 276
 277 /**
 278  * Return a value that is equal to the given i32 \p index if it lies in [0,num)
 279  * or an undefined value in the same interval otherwise.
 280  */
 281 LLVMValueRef si_llvm_bound_index(struct si_shader_context *ctx,
 282                                  LLVMValueRef index,
 283                                  unsigned num)
 284 {
 285         struct gallivm_state *gallivm = &ctx->gallivm;
 286         LLVMBuilderRef builder = gallivm->builder;
 287         LLVMValueRef c_max = lp_build_const_int32(gallivm, num - 1);
 288         LLVMValueRef cc;
 289
 290         if (util_is_power_of_two(num)) {
 291                 index = LLVMBuildAnd(builder, index, c_max, "");
 292         } else {
 293                 /* In theory, this MAX pattern should result in code that is
 294                  * as good as the bit-wise AND above.
 295                  *
 296                  * In practice, LLVM generates worse code (at the time of
 297                  * writing), because its value tracking is not strong enough.
 298                  */
 299                 cc = LLVMBuildICmp(builder, LLVMIntULE, index, c_max, "");
 300                 index = LLVMBuildSelect(builder, cc, index, c_max, "");
 301         }
 302
 303         return index;
 304 }
 305
 306 static struct si_llvm_flow *
 307 get_current_flow(struct si_shader_context *ctx)
 308 {
 309         if (ctx->flow_depth > 0)
 310                 return &ctx->flow[ctx->flow_depth - 1];
 311         return NULL;
 312 }
 313
 314 static struct si_llvm_flow *
 315 get_innermost_loop(struct si_shader_context *ctx)
 316 {
 317         for (unsigned i = ctx->flow_depth; i > 0; --i) {
 318                 if (ctx->flow[i - 1].loop_entry_block)
 319                         return &ctx->flow[i - 1];
 320         }
 321         return NULL;
 322 }
 323
 324 static struct si_llvm_flow *
 325 push_flow(struct si_shader_context *ctx)
 326 {
 327         struct si_llvm_flow *flow;
 328
 329         if (ctx->flow_depth >= ctx->flow_depth_max) {
 330                 unsigned new_max = MAX2(ctx->flow_depth << 1, RADEON_LLVM_INITIAL_CF_DEPTH);
 331                 ctx->flow = REALLOC(ctx->flow,
 332                                     ctx->flow_depth_max * sizeof(*ctx->flow),
 333                                     new_max * sizeof(*ctx->flow));
 334                 ctx->flow_depth_max = new_max;
 335         }
 336
 337         flow = &ctx->flow[ctx->flow_depth];
 338         ctx->flow_depth++;
 339
 340         flow->next_block = NULL;
 341         flow->loop_entry_block = NULL;
 342         return flow;
 343 }
 344
 345 static LLVMValueRef emit_swizzle(struct lp_build_tgsi_context *bld_base,
 346                                  LLVMValueRef value,
 347                                  unsigned swizzle_x,
 348                                  unsigned swizzle_y,
 349                                  unsigned swizzle_z,
 350                                  unsigned swizzle_w)
 351 {
 352         LLVMValueRef swizzles[4];
 353         LLVMTypeRef i32t =
 354                 LLVMInt32TypeInContext(bld_base->base.gallivm->context);
 355
 356         swizzles[0] = LLVMConstInt(i32t, swizzle_x, 0);
 357         swizzles[1] = LLVMConstInt(i32t, swizzle_y, 0);
 358         swizzles[2] = LLVMConstInt(i32t, swizzle_z, 0);
 359         swizzles[3] = LLVMConstInt(i32t, swizzle_w, 0);
 360
 361         return LLVMBuildShuffleVector(bld_base->base.gallivm->builder,
 362                                       value,
 363                                       LLVMGetUndef(LLVMTypeOf(value)),
 364                                       LLVMConstVector(swizzles, 4), "");
 365 }
 366
 367 /**
 368  * Return the description of the array covering the given temporary register
 369  * index.
 370  */
 371 static unsigned
 372 get_temp_array_id(struct lp_build_tgsi_context *bld_base,
 373                   unsigned reg_index,
 374                   const struct tgsi_ind_register *reg)
 375 {
 376         struct si_shader_context *ctx = si_shader_context(bld_base);
 377         unsigned num_arrays = ctx->bld_base.info->array_max[TGSI_FILE_TEMPORARY];
 378         unsigned i;
 379
 380         if (reg && reg->ArrayID > 0 && reg->ArrayID <= num_arrays)
 381                 return reg->ArrayID;
 382
 383         for (i = 0; i < num_arrays; i++) {
 384                 const struct tgsi_array_info *array = &ctx->temp_arrays[i];
 385
 386                 if (reg_index >= array->range.First && reg_index <= array->range.Last)
 387                         return i + 1;
 388         }
 389
 390         return 0;
 391 }
 392
 393 static struct tgsi_declaration_range
 394 get_array_range(struct lp_build_tgsi_context *bld_base,
 395                 unsigned File, unsigned reg_index,
 396                 const struct tgsi_ind_register *reg)
 397 {
 398         struct si_shader_context *ctx = si_shader_context(bld_base);
 399         struct tgsi_declaration_range range;
 400
 401         if (File == TGSI_FILE_TEMPORARY) {
 402                 unsigned array_id = get_temp_array_id(bld_base, reg_index, reg);
 403                 if (array_id)
 404                         return ctx->temp_arrays[array_id - 1].range;
 405         }
 406
 407         range.First = 0;
 408         range.Last = bld_base->info->file_max[File];
 409         return range;
 410 }
 411
 412 static LLVMValueRef
 413 emit_array_index(struct si_shader_context *ctx,
 414                  const struct tgsi_ind_register *reg,
 415                  unsigned offset)
 416 {
 417         struct gallivm_state *gallivm = ctx->bld_base.base.gallivm;
 418
 419         if (!reg) {
 420                 return lp_build_const_int32(gallivm, offset);
 421         }
 422         LLVMValueRef addr = LLVMBuildLoad(gallivm->builder, ctx->addrs[reg->Index][reg->Swizzle], "");
 423         return LLVMBuildAdd(gallivm->builder, addr, lp_build_const_int32(gallivm, offset), "");
 424 }
 425
 426 /**
 427  * For indirect registers, construct a pointer directly to the requested
 428  * element using getelementptr if possible.
 429  *
 430  * Returns NULL if the insertelement/extractelement fallback for array access
 431  * must be used.
 432  */
 433 static LLVMValueRef
 434 get_pointer_into_array(struct si_shader_context *ctx,
 435                        unsigned file,
 436                        unsigned swizzle,
 437                        unsigned reg_index,
 438                        const struct tgsi_ind_register *reg_indirect)
 439 {
 440         unsigned array_id;
 441         struct tgsi_array_info *array;
 442         struct gallivm_state *gallivm = ctx->bld_base.base.gallivm;
 443         LLVMBuilderRef builder = gallivm->builder;
 444         LLVMValueRef idxs[2];
 445         LLVMValueRef index;
 446         LLVMValueRef alloca;
 447
 448         if (file != TGSI_FILE_TEMPORARY)
 449                 return NULL;
 450
 451         array_id = get_temp_array_id(&ctx->bld_base, reg_index, reg_indirect);
 452         if (!array_id)
 453                 return NULL;
 454
 455         alloca = ctx->temp_array_allocas[array_id - 1];
 456         if (!alloca)
 457                 return NULL;
 458
 459         array = &ctx->temp_arrays[array_id - 1];
 460
 461         if (!(array->writemask & (1 << swizzle)))
 462                 return ctx->undef_alloca;
 463
 464         index = emit_array_index(ctx, reg_indirect,
 465                                  reg_index - ctx->temp_arrays[array_id - 1].range.First);
 466
 467         /* Ensure that the index is within a valid range, to guard against
 468          * VM faults and overwriting critical data (e.g. spilled resource
 469          * descriptors).
 470          *
 471          * TODO It should be possible to avoid the additional instructions
 472          * if LLVM is changed so that it guarantuees:
 473          * 1. the scratch space descriptor isolates the current wave (this
 474          *    could even save the scratch offset SGPR at the cost of an
 475          *    additional SALU instruction)
 476          * 2. the memory for allocas must be allocated at the _end_ of the
 477          *    scratch space (after spilled registers)
 478          */
 479         index = si_llvm_bound_index(ctx, index, array->range.Last - array->range.First + 1);
 480
 481         index = LLVMBuildMul(
 482                 builder, index,
 483                 lp_build_const_int32(gallivm, util_bitcount(array->writemask)),
 484                 "");
 485         index = LLVMBuildAdd(
 486                 builder, index,
 487                 lp_build_const_int32(
 488                         gallivm,
 489                         util_bitcount(array->writemask & ((1 << swizzle) - 1))),
 490                 "");
 491         idxs[0] = ctx->bld_base.uint_bld.zero;
 492         idxs[1] = index;
 493         return LLVMBuildGEP(builder, alloca, idxs, 2, "");
 494 }
 495
 496 LLVMValueRef
 497 si_llvm_emit_fetch_64bit(struct lp_build_tgsi_context *bld_base,
 498                          enum tgsi_opcode_type type,
 499                          LLVMValueRef ptr,
 500                          LLVMValueRef ptr2)
 501 {
 502         LLVMBuilderRef builder = bld_base->base.gallivm->builder;
 503         LLVMValueRef result;
 504
 505         result = LLVMGetUndef(LLVMVectorType(LLVMIntTypeInContext(bld_base->base.gallivm->context, 32), bld_base->base.type.length * 2));
 506
 507         result = LLVMBuildInsertElement(builder,
 508                                         result,
 509                                         bitcast(bld_base, TGSI_TYPE_UNSIGNED, ptr),
 510                                         bld_base->int_bld.zero, "");
 511         result = LLVMBuildInsertElement(builder,
 512                                         result,
 513                                         bitcast(bld_base, TGSI_TYPE_UNSIGNED, ptr2),
 514                                         bld_base->int_bld.one, "");
 515         return bitcast(bld_base, type, result);
 516 }
 517
 518 static LLVMValueRef
 519 emit_array_fetch(struct lp_build_tgsi_context *bld_base,
 520                  unsigned File, enum tgsi_opcode_type type,
 521                  struct tgsi_declaration_range range,
 522                  unsigned swizzle)
 523 {
 524         struct si_shader_context *ctx = si_shader_context(bld_base);
 525         struct gallivm_state *gallivm = ctx->bld_base.base.gallivm;
 526
 527         LLVMBuilderRef builder = bld_base->base.gallivm->builder;
 528
 529         unsigned i, size = range.Last - range.First + 1;
 530         LLVMTypeRef vec = LLVMVectorType(tgsi2llvmtype(bld_base, type), size);
 531         LLVMValueRef result = LLVMGetUndef(vec);
 532
 533         struct tgsi_full_src_register tmp_reg = {};
 534         tmp_reg.Register.File = File;
 535
 536         for (i = 0; i < size; ++i) {
 537                 tmp_reg.Register.Index = i + range.First;
 538                 LLVMValueRef temp = si_llvm_emit_fetch(bld_base, &tmp_reg, type, swizzle);
 539                 result = LLVMBuildInsertElement(builder, result, temp,
 540                         lp_build_const_int32(gallivm, i), "array_vector");
 541         }
 542         return result;
 543 }
 544
 545 static LLVMValueRef
 546 load_value_from_array(struct lp_build_tgsi_context *bld_base,
 547                       unsigned file,
 548                       enum tgsi_opcode_type type,
 549                       unsigned swizzle,
 550                       unsigned reg_index,
 551                       const struct tgsi_ind_register *reg_indirect)
 552 {
 553         struct si_shader_context *ctx = si_shader_context(bld_base);
 554         struct gallivm_state *gallivm = bld_base->base.gallivm;
 555         LLVMBuilderRef builder = gallivm->builder;
 556         LLVMValueRef ptr;
 557
 558         ptr = get_pointer_into_array(ctx, file, swizzle, reg_index, reg_indirect);
 559         if (ptr) {
 560                 LLVMValueRef val = LLVMBuildLoad(builder, ptr, "");
 561                 if (tgsi_type_is_64bit(type)) {
 562                         LLVMValueRef ptr_hi, val_hi;
 563                         ptr_hi = LLVMBuildGEP(builder, ptr, &bld_base->uint_bld.one, 1, "");
 564                         val_hi = LLVMBuildLoad(builder, ptr_hi, "");
 565                         val = si_llvm_emit_fetch_64bit(bld_base, type, val, val_hi);
 566                 }
 567
 568                 return val;
 569         } else {
 570                 struct tgsi_declaration_range range =
 571                         get_array_range(bld_base, file, reg_index, reg_indirect);
 572                 LLVMValueRef index =
 573                         emit_array_index(ctx, reg_indirect, reg_index - range.First);
 574                 LLVMValueRef array =
 575                         emit_array_fetch(bld_base, file, type, range, swizzle);
 576                 return LLVMBuildExtractElement(builder, array, index, "");
 577         }
 578 }
 579
 580 static void
 581 store_value_to_array(struct lp_build_tgsi_context *bld_base,
 582                      LLVMValueRef value,
 583                      unsigned file,
 584                      unsigned chan_index,
 585                      unsigned reg_index,
 586                      const struct tgsi_ind_register *reg_indirect)
 587 {
 588         struct si_shader_context *ctx = si_shader_context(bld_base);
 589         struct gallivm_state *gallivm = bld_base->base.gallivm;
 590         LLVMBuilderRef builder = gallivm->builder;
 591         LLVMValueRef ptr;
 592
 593         ptr = get_pointer_into_array(ctx, file, chan_index, reg_index, reg_indirect);
 594         if (ptr) {
 595                 LLVMBuildStore(builder, value, ptr);
 596         } else {
 597                 unsigned i, size;
 598                 struct tgsi_declaration_range range = get_array_range(bld_base, file, reg_index, reg_indirect);
 599                 LLVMValueRef index = emit_array_index(ctx, reg_indirect, reg_index - range.First);
 600                 LLVMValueRef array =
 601                         emit_array_fetch(bld_base, file, TGSI_TYPE_FLOAT, range, chan_index);
 602                 LLVMValueRef temp_ptr;
 603
 604                 array = LLVMBuildInsertElement(builder, array, value, index, "");
 605
 606                 size = range.Last - range.First + 1;
 607                 for (i = 0; i < size; ++i) {
 608                         switch(file) {
 609                         case TGSI_FILE_OUTPUT:
 610                                 temp_ptr = ctx->outputs[i + range.First][chan_index];
 611                                 break;
 612
 613                         case TGSI_FILE_TEMPORARY:
 614                                 if (range.First + i >= ctx->temps_count)
 615                                         continue;
 616                                 temp_ptr = ctx->temps[(i + range.First) * TGSI_NUM_CHANNELS + chan_index];
 617                                 break;
 618
 619                         default:
 620                                 continue;
 621                         }
 622                         value = LLVMBuildExtractElement(builder, array,
 623                                 lp_build_const_int32(gallivm, i), "");
 624                         LLVMBuildStore(builder, value, temp_ptr);
 625                 }
 626         }
 627 }
 628
 629 /* If this is true, preload FS inputs at the beginning of shaders. Otherwise,
 630  * reload them at each use. This must be true if the shader is using
 631  * derivatives and KILL, because KILL can leave the WQM and then a lazy
 632  * input load isn't in the WQM anymore.
 633  */
 634 static bool si_preload_fs_inputs(struct si_shader_context *ctx)
 635 {
 636         struct si_shader_selector *sel = ctx->shader->selector;
 637
 638         return sel->info.uses_derivatives &&
 639                sel->info.uses_kill;
 640 }
 641
 642 static LLVMValueRef
 643 get_output_ptr(struct lp_build_tgsi_context *bld_base, unsigned index,
 644                unsigned chan)
 645 {
 646         struct si_shader_context *ctx = si_shader_context(bld_base);
 647
 648         assert(index <= ctx->bld_base.info->file_max[TGSI_FILE_OUTPUT]);
 649         return ctx->outputs[index][chan];
 650 }
 651
 652 LLVMValueRef si_llvm_emit_fetch(struct lp_build_tgsi_context *bld_base,
 653                                 const struct tgsi_full_src_register *reg,
 654                                 enum tgsi_opcode_type type,
 655                                 unsigned swizzle)
 656 {
 657         struct si_shader_context *ctx = si_shader_context(bld_base);
 658         LLVMBuilderRef builder = bld_base->base.gallivm->builder;
 659         LLVMValueRef result = NULL, ptr, ptr2;
 660
 661         if (swizzle == ~0) {
 662                 LLVMValueRef values[TGSI_NUM_CHANNELS];
 663                 unsigned chan;
 664                 for (chan = 0; chan < TGSI_NUM_CHANNELS; chan++) {
 665                         values[chan] = si_llvm_emit_fetch(bld_base, reg, type, chan);
 666                 }
 667                 return lp_build_gather_values(bld_base->base.gallivm, values,
 668                                               TGSI_NUM_CHANNELS);
 669         }
 670
 671         if (reg->Register.Indirect) {
 672                 LLVMValueRef load = load_value_from_array(bld_base, reg->Register.File, type,
 673                                 swizzle, reg->Register.Index, &reg->Indirect);
 674                 return bitcast(bld_base, type, load);
 675         }
 676
 677         switch(reg->Register.File) {
 678         case TGSI_FILE_IMMEDIATE: {
 679                 LLVMTypeRef ctype = tgsi2llvmtype(bld_base, type);
 680                 if (tgsi_type_is_64bit(type)) {
 681                         result = LLVMGetUndef(LLVMVectorType(LLVMIntTypeInContext(bld_base->base.gallivm->context, 32), bld_base->base.type.length * 2));
 682                         result = LLVMConstInsertElement(result,
 683                                                         ctx->imms[reg->Register.Index * TGSI_NUM_CHANNELS + swizzle],
 684                                                         bld_base->int_bld.zero);
 685                         result = LLVMConstInsertElement(result,
 686                                                         ctx->imms[reg->Register.Index * TGSI_NUM_CHANNELS + swizzle + 1],
 687                                                         bld_base->int_bld.one);
 688                         return LLVMConstBitCast(result, ctype);
 689                 } else {
 690                         return LLVMConstBitCast(ctx->imms[reg->Register.Index * TGSI_NUM_CHANNELS + swizzle], ctype);
 691                 }
 692         }
 693
 694         case TGSI_FILE_INPUT: {
 695                 unsigned index = reg->Register.Index;
 696                 LLVMValueRef input[4];
 697
 698                 /* I don't think doing this for vertex shaders is beneficial.
 699                  * For those, we want to make sure the VMEM loads are executed
 700                  * only once. Fragment shaders don't care much, because
 701                  * v_interp instructions are much cheaper than VMEM loads.
 702                  */
 703                 if (!si_preload_fs_inputs(ctx) &&
 704                     ctx->bld_base.info->processor == PIPE_SHADER_FRAGMENT)
 705                         ctx->load_input(ctx, index, &ctx->input_decls[index], input);
 706                 else
 707                         memcpy(input, &ctx->inputs[index * 4], sizeof(input));
 708
 709                 result = input[swizzle];
 710
 711                 if (tgsi_type_is_64bit(type)) {
 712                         ptr = result;
 713                         ptr2 = input[swizzle + 1];
 714                         return si_llvm_emit_fetch_64bit(bld_base, type, ptr, ptr2);
 715                 }
 716                 break;
 717         }
 718
 719         case TGSI_FILE_TEMPORARY:
 720                 if (reg->Register.Index >= ctx->temps_count)
 721                         return LLVMGetUndef(tgsi2llvmtype(bld_base, type));
 722                 ptr = ctx->temps[reg->Register.Index * TGSI_NUM_CHANNELS + swizzle];
 723                 if (tgsi_type_is_64bit(type)) {
 724                         ptr2 = ctx->temps[reg->Register.Index * TGSI_NUM_CHANNELS + swizzle + 1];
 725                         return si_llvm_emit_fetch_64bit(bld_base, type,
 726                                                         LLVMBuildLoad(builder, ptr, ""),
 727                                                         LLVMBuildLoad(builder, ptr2, ""));
 728                 }
 729                 result = LLVMBuildLoad(builder, ptr, "");
 730                 break;
 731
 732         case TGSI_FILE_OUTPUT:
 733                 ptr = get_output_ptr(bld_base, reg->Register.Index, swizzle);
 734                 if (tgsi_type_is_64bit(type)) {
 735                         ptr2 = get_output_ptr(bld_base, reg->Register.Index, swizzle + 1);
 736                         return si_llvm_emit_fetch_64bit(bld_base, type,
 737                                                         LLVMBuildLoad(builder, ptr, ""),
 738                                                         LLVMBuildLoad(builder, ptr2, ""));
 739                 }
 740                 result = LLVMBuildLoad(builder, ptr, "");
 741                 break;
 742
 743         default:
 744                 return LLVMGetUndef(tgsi2llvmtype(bld_base, type));
 745         }
 746
 747         return bitcast(bld_base, type, result);
 748 }
 749
 750 static LLVMValueRef fetch_system_value(struct lp_build_tgsi_context *bld_base,
 751                                        const struct tgsi_full_src_register *reg,
 752                                        enum tgsi_opcode_type type,
 753                                        unsigned swizzle)
 754 {
 755         struct si_shader_context *ctx = si_shader_context(bld_base);
 756         struct gallivm_state *gallivm = bld_base->base.gallivm;
 757
 758         LLVMValueRef cval = ctx->system_values[reg->Register.Index];
 759         if (LLVMGetTypeKind(LLVMTypeOf(cval)) == LLVMVectorTypeKind) {
 760                 cval = LLVMBuildExtractElement(gallivm->builder, cval,
 761                                                lp_build_const_int32(gallivm, swizzle), "");
 762         }
 763         return bitcast(bld_base, type, cval);
 764 }
 765
 766 static void emit_declaration(struct lp_build_tgsi_context *bld_base,
 767                              const struct tgsi_full_declaration *decl)
 768 {
 769         struct si_shader_context *ctx = si_shader_context(bld_base);
 770         LLVMBuilderRef builder = bld_base->base.gallivm->builder;
 771         unsigned first, last, i;
 772         switch(decl->Declaration.File) {
 773         case TGSI_FILE_ADDRESS:
 774         {
 775                  unsigned idx;
 776                 for (idx = decl->Range.First; idx <= decl->Range.Last; idx++) {
 777                         unsigned chan;
 778                         for (chan = 0; chan < TGSI_NUM_CHANNELS; chan++) {
 779                                  ctx->addrs[idx][chan] = lp_build_alloca_undef(
 780                                         &ctx->gallivm,
 781                                         ctx->bld_base.uint_bld.elem_type, "");
 782                         }
 783                 }
 784                 break;
 785         }
 786
 787         case TGSI_FILE_TEMPORARY:
 788         {
 789                 char name[16] = "";
 790                 LLVMValueRef array_alloca = NULL;
 791                 unsigned decl_size;
 792                 unsigned writemask = decl->Declaration.UsageMask;
 793                 first = decl->Range.First;
 794                 last = decl->Range.Last;
 795                 decl_size = 4 * ((last - first) + 1);
 796
 797                 if (decl->Declaration.Array) {
 798                         unsigned id = decl->Array.ArrayID - 1;
 799                         unsigned array_size;
 800
 801                         writemask &= ctx->temp_arrays[id].writemask;
 802                         ctx->temp_arrays[id].writemask = writemask;
 803                         array_size = ((last - first) + 1) * util_bitcount(writemask);
 804
 805                         /* If the array has more than 16 elements, store it
 806                          * in memory using an alloca that spans the entire
 807                          * array.
 808                          *
 809                          * Otherwise, store each array element individually.
 810                          * We will then generate vectors (per-channel, up to
 811                          * <16 x float> if the usagemask is a single bit) for
 812                          * indirect addressing.
 813                          *
 814                          * Note that 16 is the number of vector elements that
 815                          * LLVM will store in a register, so theoretically an
 816                          * array with up to 4 * 16 = 64 elements could be
 817                          * handled this way, but whether that's a good idea
 818                          * depends on VGPR register pressure elsewhere.
 819                          *
 820                          * FIXME: We shouldn't need to have the non-alloca
 821                          * code path for arrays. LLVM should be smart enough to
 822                          * promote allocas into registers when profitable.
 823                          *
 824                          * LLVM 3.8 crashes with this.
 825                          */
 826                         if (HAVE_LLVM >= 0x0309 && array_size > 16) {
 827                                 array_alloca = LLVMBuildAlloca(builder,
 828                                         LLVMArrayType(bld_base->base.vec_type,
 829                                                       array_size), "array");
 830                                 ctx->temp_array_allocas[id] = array_alloca;
 831                         }
 832                 }
 833
 834                 if (!ctx->temps_count) {
 835                         ctx->temps_count = bld_base->info->file_max[TGSI_FILE_TEMPORARY] + 1;
 836                         ctx->temps = MALLOC(TGSI_NUM_CHANNELS * ctx->temps_count * sizeof(LLVMValueRef));
 837                 }
 838                 if (!array_alloca) {
 839                         for (i = 0; i < decl_size; ++i) {
 840 #ifdef DEBUG
 841                                 snprintf(name, sizeof(name), "TEMP%d.%c",
 842                                          first + i / 4, "xyzw"[i % 4]);
 843 #endif
 844                                 ctx->temps[first * TGSI_NUM_CHANNELS + i] =
 845                                         lp_build_alloca_undef(bld_base->base.gallivm,
 846                                                               bld_base->base.vec_type,
 847                                                               name);
 848                         }
 849                 } else {
 850                         LLVMValueRef idxs[2] = {
 851                                 bld_base->uint_bld.zero,
 852                                 NULL
 853                         };
 854                         unsigned j = 0;
 855
 856                         if (writemask != TGSI_WRITEMASK_XYZW &&
 857                             !ctx->undef_alloca) {
 858                                 /* Create a dummy alloca. We use it so that we
 859                                  * have a pointer that is safe to load from if
 860                                  * a shader ever reads from a channel that
 861                                  * it never writes to.
 862                                  */
 863                                 ctx->undef_alloca = lp_build_alloca_undef(
 864                                         bld_base->base.gallivm,
 865                                         bld_base->base.vec_type, "undef");
 866                         }
 867
 868                         for (i = 0; i < decl_size; ++i) {
 869                                 LLVMValueRef ptr;
 870                                 if (writemask & (1 << (i % 4))) {
 871 #ifdef DEBUG
 872                                         snprintf(name, sizeof(name), "TEMP%d.%c",
 873                                                  first + i / 4, "xyzw"[i % 4]);
 874 #endif
 875                                         idxs[1] = lp_build_const_int32(bld_base->base.gallivm, j);
 876                                         ptr = LLVMBuildGEP(builder, array_alloca, idxs, 2, name);
 877                                         j++;
 878                                 } else {
 879                                         ptr = ctx->undef_alloca;
 880                                 }
 881                                 ctx->temps[first * TGSI_NUM_CHANNELS + i] = ptr;
 882                         }
 883                 }
 884                 break;
 885         }
 886         case TGSI_FILE_INPUT:
 887         {
 888                 unsigned idx;
 889                 for (idx = decl->Range.First; idx <= decl->Range.Last; idx++) {
 890                         if (ctx->load_input &&
 891                             ctx->input_decls[idx].Declaration.File != TGSI_FILE_INPUT) {
 892                                 ctx->input_decls[idx] = *decl;
 893                                 ctx->input_decls[idx].Range.First = idx;
 894                                 ctx->input_decls[idx].Range.Last = idx;
 895                                 ctx->input_decls[idx].Semantic.Index += idx - decl->Range.First;
 896
 897                                 if (si_preload_fs_inputs(ctx) ||
 898                                     bld_base->info->processor != PIPE_SHADER_FRAGMENT)
 899                                         ctx->load_input(ctx, idx, &ctx->input_decls[idx],
 900                                                         &ctx->inputs[idx * 4]);
 901                         }
 902                 }
 903         }
 904         break;
 905
 906         case TGSI_FILE_SYSTEM_VALUE:
 907         {
 908                 unsigned idx;
 909                 for (idx = decl->Range.First; idx <= decl->Range.Last; idx++) {
 910                         ctx->load_system_value(ctx, idx, decl);
 911                 }
 912         }
 913         break;
 914
 915         case TGSI_FILE_OUTPUT:
 916         {
 917                 char name[16] = "";
 918                 unsigned idx;
 919                 for (idx = decl->Range.First; idx <= decl->Range.Last; idx++) {
 920                         unsigned chan;
 921                         assert(idx < RADEON_LLVM_MAX_OUTPUTS);
 922                         if (ctx->outputs[idx][0])
 923                                 continue;
 924                         for (chan = 0; chan < TGSI_NUM_CHANNELS; chan++) {
 925 #ifdef DEBUG
 926                                 snprintf(name, sizeof(name), "OUT%d.%c",
 927                                          idx, "xyzw"[chan % 4]);
 928 #endif
 929                                 ctx->outputs[idx][chan] = lp_build_alloca_undef(
 930                                         &ctx->gallivm,
 931                                         ctx->bld_base.base.elem_type, name);
 932                         }
 933                 }
 934                 break;
 935         }
 936
 937         case TGSI_FILE_MEMORY:
 938                 ctx->declare_memory_region(ctx, decl);
 939                 break;
 940
 941         default:
 942                 break;
 943         }
 944 }
 945
 946 void si_llvm_emit_store(struct lp_build_tgsi_context *bld_base,
 947                         const struct tgsi_full_instruction *inst,
 948                         const struct tgsi_opcode_info *info,
 949                         LLVMValueRef dst[4])
 950 {
 951         struct si_shader_context *ctx = si_shader_context(bld_base);
 952         struct gallivm_state *gallivm = ctx->bld_base.base.gallivm;
 953         const struct tgsi_full_dst_register *reg = &inst->Dst[0];
 954         LLVMBuilderRef builder = ctx->bld_base.base.gallivm->builder;
 955         LLVMValueRef temp_ptr, temp_ptr2 = NULL;
 956         unsigned chan, chan_index;
 957         bool is_vec_store = false;
 958         enum tgsi_opcode_type dtype = tgsi_opcode_infer_dst_type(inst->Instruction.Opcode);
 959
 960         if (dst[0]) {
 961                 LLVMTypeKind k = LLVMGetTypeKind(LLVMTypeOf(dst[0]));
 962                 is_vec_store = (k == LLVMVectorTypeKind);
 963         }
 964
 965         if (is_vec_store) {
 966                 LLVMValueRef values[4] = {};
 967                 TGSI_FOR_EACH_DST0_ENABLED_CHANNEL(inst, chan) {
 968                         LLVMValueRef index = lp_build_const_int32(gallivm, chan);
 969                         values[chan]  = LLVMBuildExtractElement(gallivm->builder,
 970                                                         dst[0], index, "");
 971                 }
 972                 bld_base->emit_store(bld_base, inst, info, values);
 973                 return;
 974         }
 975
 976         TGSI_FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
 977                 LLVMValueRef value = dst[chan_index];
 978
 979                 if (tgsi_type_is_64bit(dtype) && (chan_index == 1 || chan_index == 3))
 980                         continue;
 981                 if (inst->Instruction.Saturate)
 982                         value = ac_build_clamp(&ctx->ac, value);
 983
 984                 if (reg->Register.File == TGSI_FILE_ADDRESS) {
 985                         temp_ptr = ctx->addrs[reg->Register.Index][chan_index];
 986                         LLVMBuildStore(builder, value, temp_ptr);
 987                         continue;
 988                 }
 989
 990                 if (!tgsi_type_is_64bit(dtype))
 991                         value = bitcast(bld_base, TGSI_TYPE_FLOAT, value);
 992
 993                 if (reg->Register.Indirect) {
 994                         unsigned file = reg->Register.File;
 995                         unsigned reg_index = reg->Register.Index;
 996                         store_value_to_array(bld_base, value, file, chan_index,
 997                                              reg_index, &reg->Indirect);
 998                 } else {
 999                         switch(reg->Register.File) {
1000                         case TGSI_FILE_OUTPUT:
1001                                 temp_ptr = ctx->outputs[reg->Register.Index][chan_index];
1002                                 if (tgsi_type_is_64bit(dtype))
1003                                         temp_ptr2 = ctx->outputs[reg->Register.Index][chan_index + 1];
1004                                 break;
1005
1006                         case TGSI_FILE_TEMPORARY:
1007                         {
1008                                 if (reg->Register.Index >= ctx->temps_count)
1009                                         continue;
1010
1011                                 temp_ptr = ctx->temps[ TGSI_NUM_CHANNELS * reg->Register.Index + chan_index];
1012                                 if (tgsi_type_is_64bit(dtype))
1013                                         temp_ptr2 = ctx->temps[ TGSI_NUM_CHANNELS * reg->Register.Index + chan_index + 1];
1014
1015                                 break;
1016                         }
1017                         default:
1018                                 return;
1019                         }
1020                         if (!tgsi_type_is_64bit(dtype))
1021                                 LLVMBuildStore(builder, value, temp_ptr);
1022                         else {
1023                                 LLVMValueRef ptr = LLVMBuildBitCast(builder, value,
1024                                                                     LLVMVectorType(LLVMIntTypeInContext(bld_base->base.gallivm->context, 32), 2), "");
1025                                 LLVMValueRef val2;
1026                                 value = LLVMBuildExtractElement(builder, ptr,
1027                                                                 bld_base->uint_bld.zero, "");
1028                                 val2 = LLVMBuildExtractElement(builder, ptr,
1029                                                                 bld_base->uint_bld.one, "");
1030
1031                                 LLVMBuildStore(builder, bitcast(bld_base, TGSI_TYPE_FLOAT, value), temp_ptr);
1032                                 LLVMBuildStore(builder, bitcast(bld_base, TGSI_TYPE_FLOAT, val2), temp_ptr2);
1033                         }
1034                 }
1035         }
1036 }
1037
1038 static void set_basicblock_name(LLVMBasicBlockRef bb, const char *base, int pc)
1039 {
1040         char buf[32];
1041         /* Subtract 1 so that the number shown is that of the corresponding
1042          * opcode in the TGSI dump, e.g. an if block has the same suffix as
1043          * the instruction number of the corresponding TGSI IF.
1044          */
1045         snprintf(buf, sizeof(buf), "%s%d", base, pc - 1);
1046         LLVMSetValueName(LLVMBasicBlockAsValue(bb), buf);
1047 }
1048
1049 /* Append a basic block at the level of the parent flow.
1050  */
1051 static LLVMBasicBlockRef append_basic_block(struct si_shader_context *ctx,
1052                                             const char *name)
1053 {
1054         struct gallivm_state *gallivm = &ctx->gallivm;
1055
1056         assert(ctx->flow_depth >= 1);
1057
1058         if (ctx->flow_depth >= 2) {
1059                 struct si_llvm_flow *flow = &ctx->flow[ctx->flow_depth - 2];
1060
1061                 return LLVMInsertBasicBlockInContext(gallivm->context,
1062                                                      flow->next_block, name);
1063         }
1064
1065         return LLVMAppendBasicBlockInContext(gallivm->context, ctx->main_fn, name);
1066 }
1067
1068 /* Emit a branch to the given default target for the current block if
1069  * applicable -- that is, if the current block does not already contain a
1070  * branch from a break or continue.
1071  */
1072 static void emit_default_branch(LLVMBuilderRef builder, LLVMBasicBlockRef target)
1073 {
1074         if (!LLVMGetBasicBlockTerminator(LLVMGetInsertBlock(builder)))
1075                  LLVMBuildBr(builder, target);
1076 }
1077
1078 static void bgnloop_emit(const struct lp_build_tgsi_action *action,
1079                          struct lp_build_tgsi_context *bld_base,
1080                          struct lp_build_emit_data *emit_data)
1081 {
1082         struct si_shader_context *ctx = si_shader_context(bld_base);
1083         struct gallivm_state *gallivm = bld_base->base.gallivm;
1084         struct si_llvm_flow *flow = push_flow(ctx);
1085         flow->loop_entry_block = append_basic_block(ctx, "LOOP");
1086         flow->next_block = append_basic_block(ctx, "ENDLOOP");
1087         set_basicblock_name(flow->loop_entry_block, "loop", bld_base->pc);
1088         LLVMBuildBr(gallivm->builder, flow->loop_entry_block);
1089         LLVMPositionBuilderAtEnd(gallivm->builder, flow->loop_entry_block);
1090 }
1091
1092 static void brk_emit(const struct lp_build_tgsi_action *action,
1093                      struct lp_build_tgsi_context *bld_base,
1094                      struct lp_build_emit_data *emit_data)
1095 {
1096         struct si_shader_context *ctx = si_shader_context(bld_base);
1097         struct gallivm_state *gallivm = bld_base->base.gallivm;
1098         struct si_llvm_flow *flow = get_innermost_loop(ctx);
1099
1100         LLVMBuildBr(gallivm->builder, flow->next_block);
1101 }
1102
1103 static void cont_emit(const struct lp_build_tgsi_action *action,
1104                       struct lp_build_tgsi_context *bld_base,
1105                       struct lp_build_emit_data *emit_data)
1106 {
1107         struct si_shader_context *ctx = si_shader_context(bld_base);
1108         struct gallivm_state *gallivm = bld_base->base.gallivm;
1109         struct si_llvm_flow *flow = get_innermost_loop(ctx);
1110
1111         LLVMBuildBr(gallivm->builder, flow->loop_entry_block);
1112 }
1113
1114 static void else_emit(const struct lp_build_tgsi_action *action,
1115                       struct lp_build_tgsi_context *bld_base,
1116                       struct lp_build_emit_data *emit_data)
1117 {
1118         struct si_shader_context *ctx = si_shader_context(bld_base);
1119         struct gallivm_state *gallivm = bld_base->base.gallivm;
1120         struct si_llvm_flow *current_branch = get_current_flow(ctx);
1121         LLVMBasicBlockRef endif_block;
1122
1123         assert(!current_branch->loop_entry_block);
1124
1125         endif_block = append_basic_block(ctx, "ENDIF");
1126         emit_default_branch(gallivm->builder, endif_block);
1127
1128         LLVMPositionBuilderAtEnd(gallivm->builder, current_branch->next_block);
1129         set_basicblock_name(current_branch->next_block, "else", bld_base->pc);
1130
1131         current_branch->next_block = endif_block;
1132 }
1133
1134 static void endif_emit(const struct lp_build_tgsi_action *action,
1135                        struct lp_build_tgsi_context *bld_base,
1136                        struct lp_build_emit_data *emit_data)
1137 {
1138         struct si_shader_context *ctx = si_shader_context(bld_base);
1139         struct gallivm_state *gallivm = bld_base->base.gallivm;
1140         struct si_llvm_flow *current_branch = get_current_flow(ctx);
1141
1142         assert(!current_branch->loop_entry_block);
1143
1144         emit_default_branch(gallivm->builder, current_branch->next_block);
1145         LLVMPositionBuilderAtEnd(gallivm->builder, current_branch->next_block);
1146         set_basicblock_name(current_branch->next_block, "endif", bld_base->pc);
1147
1148         ctx->flow_depth--;
1149 }
1150
1151 static void endloop_emit(const struct lp_build_tgsi_action *action,
1152                          struct lp_build_tgsi_context *bld_base,
1153                          struct lp_build_emit_data *emit_data)
1154 {
1155         struct si_shader_context *ctx = si_shader_context(bld_base);
1156         struct gallivm_state *gallivm = bld_base->base.gallivm;
1157         struct si_llvm_flow *current_loop = get_current_flow(ctx);
1158
1159         assert(current_loop->loop_entry_block);
1160
1161         emit_default_branch(gallivm->builder, current_loop->loop_entry_block);
1162
1163         LLVMPositionBuilderAtEnd(gallivm->builder, current_loop->next_block);
1164         set_basicblock_name(current_loop->next_block, "endloop", bld_base->pc);
1165         ctx->flow_depth--;
1166 }
1167
1168 static void if_cond_emit(const struct lp_build_tgsi_action *action,
1169                          struct lp_build_tgsi_context *bld_base,
1170                          struct lp_build_emit_data *emit_data,
1171                          LLVMValueRef cond)
1172 {
1173         struct si_shader_context *ctx = si_shader_context(bld_base);
1174         struct gallivm_state *gallivm = bld_base->base.gallivm;
1175         struct si_llvm_flow *flow = push_flow(ctx);
1176         LLVMBasicBlockRef if_block;
1177
1178         if_block = append_basic_block(ctx, "IF");
1179         flow->next_block = append_basic_block(ctx, "ELSE");
1180         set_basicblock_name(if_block, "if", bld_base->pc);
1181         LLVMBuildCondBr(gallivm->builder, cond, if_block, flow->next_block);
1182         LLVMPositionBuilderAtEnd(gallivm->builder, if_block);
1183 }
1184
1185 static void if_emit(const struct lp_build_tgsi_action *action,
1186                     struct lp_build_tgsi_context *bld_base,
1187                     struct lp_build_emit_data *emit_data)
1188 {
1189         struct gallivm_state *gallivm = bld_base->base.gallivm;
1190         LLVMValueRef cond;
1191
1192         cond = LLVMBuildFCmp(gallivm->builder, LLVMRealUNE,
1193                         emit_data->args[0],
1194                         bld_base->base.zero, "");
1195
1196         if_cond_emit(action, bld_base, emit_data, cond);
1197 }
1198
1199 static void uif_emit(const struct lp_build_tgsi_action *action,
1200                      struct lp_build_tgsi_context *bld_base,
1201                      struct lp_build_emit_data *emit_data)
1202 {
1203         struct gallivm_state *gallivm = bld_base->base.gallivm;
1204         LLVMValueRef cond;
1205
1206         cond = LLVMBuildICmp(gallivm->builder, LLVMIntNE,
1207                 bitcast(bld_base, TGSI_TYPE_UNSIGNED, emit_data->args[0]),
1208                         bld_base->int_bld.zero, "");
1209
1210         if_cond_emit(action, bld_base, emit_data, cond);
1211 }
1212
1213 static void emit_immediate(struct lp_build_tgsi_context *bld_base,
1214                            const struct tgsi_full_immediate *imm)
1215 {
1216         unsigned i;
1217         struct si_shader_context *ctx = si_shader_context(bld_base);
1218
1219         for (i = 0; i < 4; ++i) {
1220                 ctx->imms[ctx->imms_num * TGSI_NUM_CHANNELS + i] =
1221                                 LLVMConstInt(bld_base->uint_bld.elem_type, imm->u[i].Uint, false   );
1222         }
1223
1224         ctx->imms_num++;
1225 }
1226
1227 void si_llvm_context_init(struct si_shader_context *ctx,
1228                           struct si_screen *sscreen,
1229                           struct si_shader *shader,
1230                           LLVMTargetMachineRef tm,
1231                           const struct tgsi_shader_info *info,
1232                           const struct tgsi_token *tokens)
1233 {
1234         struct lp_type type;
1235
1236         /* Initialize the gallivm object:
1237          * We are only using the module, context, and builder fields of this struct.
1238          * This should be enough for us to be able to pass our gallivm struct to the
1239          * helper functions in the gallivm module.
1240          */
1241         memset(ctx, 0, sizeof(*ctx));
1242         ctx->shader = shader;
1243         ctx->screen = sscreen;
1244         ctx->tm = tm;
1245         ctx->type = info ? info->processor : -1;
1246
1247         ctx->gallivm.context = LLVMContextCreate();
1248         ctx->gallivm.module = LLVMModuleCreateWithNameInContext("tgsi",
1249                                                 ctx->gallivm.context);
1250         LLVMSetTarget(ctx->gallivm.module, "amdgcn--");
1251
1252 #if HAVE_LLVM >= 0x0309
1253         LLVMTargetDataRef data_layout = LLVMCreateTargetDataLayout(tm);
1254         char *data_layout_str = LLVMCopyStringRepOfTargetData(data_layout);
1255         LLVMSetDataLayout(ctx->gallivm.module, data_layout_str);
1256         LLVMDisposeTargetData(data_layout);
1257         LLVMDisposeMessage(data_layout_str);
1258 #endif
1259
1260         bool unsafe_fpmath = (sscreen->b.debug_flags & DBG_UNSAFE_MATH) != 0;
1261         enum lp_float_mode float_mode =
1262                 unsafe_fpmath ? LP_FLOAT_MODE_UNSAFE_FP_MATH :
1263                                 LP_FLOAT_MODE_NO_SIGNED_ZEROS_FP_MATH;
1264
1265         ctx->gallivm.builder = lp_create_builder(ctx->gallivm.context,
1266                                                  float_mode);
1267
1268         ac_llvm_context_init(&ctx->ac, ctx->gallivm.context);
1269         ctx->ac.module = ctx->gallivm.module;
1270         ctx->ac.builder = ctx->gallivm.builder;
1271
1272         struct lp_build_tgsi_context *bld_base = &ctx->bld_base;
1273
1274         bld_base->info = info;
1275
1276         if (info && info->array_max[TGSI_FILE_TEMPORARY] > 0) {
1277                 int size = info->array_max[TGSI_FILE_TEMPORARY];
1278
1279                 ctx->temp_arrays = CALLOC(size, sizeof(ctx->temp_arrays[0]));
1280                 ctx->temp_array_allocas = CALLOC(size, sizeof(ctx->temp_array_allocas[0]));
1281
1282                 if (tokens)
1283                         tgsi_scan_arrays(tokens, TGSI_FILE_TEMPORARY, size,
1284                                          ctx->temp_arrays);
1285         }
1286
1287         if (info && info->file_max[TGSI_FILE_IMMEDIATE] >= 0) {
1288                 int size = info->file_max[TGSI_FILE_IMMEDIATE] + 1;
1289                 ctx->imms = MALLOC(size * TGSI_NUM_CHANNELS * sizeof(LLVMValueRef));
1290         }
1291
1292         type.floating = true;
1293         type.fixed = false;
1294         type.sign = true;
1295         type.norm = false;
1296         type.width = 32;
1297         type.length = 1;
1298
1299         lp_build_context_init(&bld_base->base, &ctx->gallivm, type);
1300         lp_build_context_init(&ctx->bld_base.uint_bld, &ctx->gallivm, lp_uint_type(type));
1301         lp_build_context_init(&ctx->bld_base.int_bld, &ctx->gallivm, lp_int_type(type));
1302         type.width *= 2;
1303         lp_build_context_init(&ctx->bld_base.dbl_bld, &ctx->gallivm, type);
1304         lp_build_context_init(&ctx->bld_base.uint64_bld, &ctx->gallivm, lp_uint_type(type));
1305         lp_build_context_init(&ctx->bld_base.int64_bld, &ctx->gallivm, lp_int_type(type));
1306
1307         bld_base->soa = 1;
1308         bld_base->emit_store = si_llvm_emit_store;
1309         bld_base->emit_swizzle = emit_swizzle;
1310         bld_base->emit_declaration = emit_declaration;
1311         bld_base->emit_immediate = emit_immediate;
1312
1313         bld_base->emit_fetch_funcs[TGSI_FILE_IMMEDIATE] = si_llvm_emit_fetch;
1314         bld_base->emit_fetch_funcs[TGSI_FILE_INPUT] = si_llvm_emit_fetch;
1315         bld_base->emit_fetch_funcs[TGSI_FILE_TEMPORARY] = si_llvm_emit_fetch;
1316         bld_base->emit_fetch_funcs[TGSI_FILE_OUTPUT] = si_llvm_emit_fetch;
1317         bld_base->emit_fetch_funcs[TGSI_FILE_SYSTEM_VALUE] = fetch_system_value;
1318
1319         /* metadata allowing 2.5 ULP */
1320         ctx->fpmath_md_kind = LLVMGetMDKindIDInContext(ctx->gallivm.context,
1321                                                        "fpmath", 6);
1322         LLVMValueRef arg = lp_build_const_float(&ctx->gallivm, 2.5);
1323         ctx->fpmath_md_2p5_ulp = LLVMMDNodeInContext(ctx->gallivm.context,
1324                                                      &arg, 1);
1325
1326         bld_base->op_actions[TGSI_OPCODE_BGNLOOP].emit = bgnloop_emit;
1327         bld_base->op_actions[TGSI_OPCODE_BRK].emit = brk_emit;
1328         bld_base->op_actions[TGSI_OPCODE_CONT].emit = cont_emit;
1329         bld_base->op_actions[TGSI_OPCODE_IF].emit = if_emit;
1330         bld_base->op_actions[TGSI_OPCODE_UIF].emit = uif_emit;
1331         bld_base->op_actions[TGSI_OPCODE_ELSE].emit = else_emit;
1332         bld_base->op_actions[TGSI_OPCODE_ENDIF].emit = endif_emit;
1333         bld_base->op_actions[TGSI_OPCODE_ENDLOOP].emit = endloop_emit;
1334
1335         si_shader_context_init_alu(&ctx->bld_base);
1336
1337         ctx->voidt = LLVMVoidTypeInContext(ctx->gallivm.context);
1338         ctx->i1 = LLVMInt1TypeInContext(ctx->gallivm.context);
1339         ctx->i8 = LLVMInt8TypeInContext(ctx->gallivm.context);
1340         ctx->i32 = LLVMInt32TypeInContext(ctx->gallivm.context);
1341         ctx->i64 = LLVMInt64TypeInContext(ctx->gallivm.context);
1342         ctx->i128 = LLVMIntTypeInContext(ctx->gallivm.context, 128);
1343         ctx->f32 = LLVMFloatTypeInContext(ctx->gallivm.context);
1344         ctx->v16i8 = LLVMVectorType(ctx->i8, 16);
1345         ctx->v2i32 = LLVMVectorType(ctx->i32, 2);
1346         ctx->v4i32 = LLVMVectorType(ctx->i32, 4);
1347         ctx->v4f32 = LLVMVectorType(ctx->f32, 4);
1348         ctx->v8i32 = LLVMVectorType(ctx->i32, 8);
1349
1350         ctx->i32_0 = LLVMConstInt(ctx->i32, 0, 0);
1351         ctx->i32_1 = LLVMConstInt(ctx->i32, 1, 0);
1352 }
1353
1354 void si_llvm_create_func(struct si_shader_context *ctx,
1355                          const char *name,
1356                          LLVMTypeRef *return_types, unsigned num_return_elems,
1357                          LLVMTypeRef *ParamTypes, unsigned ParamCount)
1358 {
1359         LLVMTypeRef main_fn_type, ret_type;
1360         LLVMBasicBlockRef main_fn_body;
1361
1362         if (num_return_elems)
1363                 ret_type = LLVMStructTypeInContext(ctx->gallivm.context,
1364                                                    return_types,
1365                                                    num_return_elems, true);
1366         else
1367                 ret_type = LLVMVoidTypeInContext(ctx->gallivm.context);
1368
1369         /* Setup the function */
1370         ctx->return_type = ret_type;
1371         main_fn_type = LLVMFunctionType(ret_type, ParamTypes, ParamCount, 0);
1372         ctx->main_fn = LLVMAddFunction(ctx->gallivm.module, name, main_fn_type);
1373         main_fn_body = LLVMAppendBasicBlockInContext(ctx->gallivm.context,
1374                         ctx->main_fn, "main_body");
1375         LLVMPositionBuilderAtEnd(ctx->gallivm.builder, main_fn_body);
1376 }
1377
1378 void si_llvm_finalize_module(struct si_shader_context *ctx,
1379                              bool run_verifier)
1380 {
1381         struct gallivm_state *gallivm = ctx->bld_base.base.gallivm;
1382         const char *triple = LLVMGetTarget(gallivm->module);
1383         LLVMTargetLibraryInfoRef target_library_info;
1384
1385         /* Create the pass manager */
1386         gallivm->passmgr = LLVMCreatePassManager();
1387
1388         target_library_info = gallivm_create_target_library_info(triple);
1389         LLVMAddTargetLibraryInfo(target_library_info, gallivm->passmgr);
1390
1391         if (run_verifier)
1392                 LLVMAddVerifierPass(gallivm->passmgr);
1393
1394         LLVMAddAlwaysInlinerPass(gallivm->passmgr);
1395
1396         /* This pass should eliminate all the load and store instructions */
1397         LLVMAddPromoteMemoryToRegisterPass(gallivm->passmgr);
1398
1399         /* Add some optimization passes */
1400         LLVMAddScalarReplAggregatesPass(gallivm->passmgr);
1401         LLVMAddLICMPass(gallivm->passmgr);
1402         LLVMAddAggressiveDCEPass(gallivm->passmgr);
1403         LLVMAddCFGSimplificationPass(gallivm->passmgr);
1404         LLVMAddInstructionCombiningPass(gallivm->passmgr);
1405
1406         /* Run the pass */
1407         LLVMRunPassManager(gallivm->passmgr, ctx->gallivm.module);
1408
1409         LLVMDisposeBuilder(gallivm->builder);
1410         LLVMDisposePassManager(gallivm->passmgr);
1411         gallivm_dispose_target_library_info(target_library_info);
1412 }
1413
1414 void si_llvm_dispose(struct si_shader_context *ctx)
1415 {
1416         LLVMDisposeModule(ctx->bld_base.base.gallivm->module);
1417         LLVMContextDispose(ctx->bld_base.base.gallivm->context);
1418         FREE(ctx->temp_arrays);
1419         ctx->temp_arrays = NULL;
1420         FREE(ctx->temp_array_allocas);
1421         ctx->temp_array_allocas = NULL;
1422         FREE(ctx->temps);
1423         ctx->temps = NULL;
1424         ctx->temps_count = 0;
1425         FREE(ctx->imms);
1426         ctx->imms = NULL;
1427         ctx->imms_num = 0;
1428         FREE(ctx->flow);
1429         ctx->flow = NULL;
1430         ctx->flow_depth_max = 0;
1431 }