src/gallium/drivers/radeonsi/si_shader_tgsi_setup.c

   1 /*
   2  * Copyright 2016 Advanced Micro Devices, Inc.
   3  *
   4  * Permission is hereby granted, free of charge, to any person obtaining a
   5  * copy of this software and associated documentation files (the "Software"),
   6  * to deal in the Software without restriction, including without limitation
   7  * on the rights to use, copy, modify, merge, publish, distribute, sub
   8  * license, and/or sell copies of the Software, and to permit persons to whom
   9  * the Software is furnished to do so, subject to the following conditions:
  10  *
  11  * The above copyright notice and this permission notice (including the next
  12  * paragraph) shall be included in all copies or substantial portions of the
  13  * Software.
  14  *
  15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  17  * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
  18  * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
  19  * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
  20  * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
  21  * USE OR OTHER DEALINGS IN THE SOFTWARE.
  22  */
  23
  24 #include "si_shader_internal.h"
  25 #include "si_pipe.h"
  26
  27 #include "gallivm/lp_bld_const.h"
  28 #include "gallivm/lp_bld_gather.h"
  29 #include "gallivm/lp_bld_flow.h"
  30 #include "gallivm/lp_bld_init.h"
  31 #include "gallivm/lp_bld_intr.h"
  32 #include "gallivm/lp_bld_misc.h"
  33 #include "gallivm/lp_bld_swizzle.h"
  34 #include "tgsi/tgsi_info.h"
  35 #include "tgsi/tgsi_parse.h"
  36 #include "util/u_math.h"
  37 #include "util/u_memory.h"
  38 #include "util/u_debug.h"
  39
  40 #include <stdio.h>
  41 #include <llvm-c/Transforms/IPO.h>
  42 #include <llvm-c/Transforms/Scalar.h>
  43 #include <llvm-c/Support.h>
  44
  45 /* Data for if/else/endif and bgnloop/endloop control flow structures.
  46  */
  47 struct si_llvm_flow {
  48         /* Loop exit or next part of if/else/endif. */
  49         LLVMBasicBlockRef next_block;
  50         LLVMBasicBlockRef loop_entry_block;
  51 };
  52
  53 enum si_llvm_calling_convention {
  54         RADEON_LLVM_AMDGPU_VS = 87,
  55         RADEON_LLVM_AMDGPU_GS = 88,
  56         RADEON_LLVM_AMDGPU_PS = 89,
  57         RADEON_LLVM_AMDGPU_CS = 90,
  58 };
  59
  60 void si_llvm_add_attribute(LLVMValueRef F, const char *name, int value)
  61 {
  62         char str[16];
  63
  64         snprintf(str, sizeof(str), "%i", value);
  65         LLVMAddTargetDependentFunctionAttr(F, name, str);
  66 }
  67
  68 /**
  69  * Set the shader type we want to compile
  70  *
  71  * @param type shader type to set
  72  */
  73 void si_llvm_shader_type(LLVMValueRef F, unsigned type)
  74 {
  75         enum si_llvm_calling_convention calling_conv;
  76
  77         switch (type) {
  78         case PIPE_SHADER_VERTEX:
  79         case PIPE_SHADER_TESS_CTRL:
  80         case PIPE_SHADER_TESS_EVAL:
  81                 calling_conv = RADEON_LLVM_AMDGPU_VS;
  82                 break;
  83         case PIPE_SHADER_GEOMETRY:
  84                 calling_conv = RADEON_LLVM_AMDGPU_GS;
  85                 break;
  86         case PIPE_SHADER_FRAGMENT:
  87                 calling_conv = RADEON_LLVM_AMDGPU_PS;
  88                 break;
  89         case PIPE_SHADER_COMPUTE:
  90                 calling_conv = RADEON_LLVM_AMDGPU_CS;
  91                 break;
  92         default:
  93                 unreachable("Unhandle shader type");
  94         }
  95
  96         LLVMSetFunctionCallConv(F, calling_conv);
  97 }
  98
  99 static void init_amdgpu_target()
 100 {
 101         gallivm_init_llvm_targets();
 102         LLVMInitializeAMDGPUTargetInfo();
 103         LLVMInitializeAMDGPUTarget();
 104         LLVMInitializeAMDGPUTargetMC();
 105         LLVMInitializeAMDGPUAsmPrinter();
 106
 107         /* For inline assembly. */
 108         LLVMInitializeAMDGPUAsmParser();
 109
 110         if (HAVE_LLVM >= 0x0400) {
 111                 /*
 112                  * Workaround for bug in llvm 4.0 that causes image intrinsics
 113                  * to disappear.
 114                  * https://reviews.llvm.org/D26348
 115                  */
 116                 const char *argv[2] = {"mesa", "-simplifycfg-sink-common=false"};
 117                 LLVMParseCommandLineOptions(2, argv, NULL);
 118         }
 119 }
 120
 121 static once_flag init_amdgpu_target_once_flag = ONCE_FLAG_INIT;
 122
 123 LLVMTargetRef si_llvm_get_amdgpu_target(const char *triple)
 124 {
 125         LLVMTargetRef target = NULL;
 126         char *err_message = NULL;
 127
 128         call_once(&init_amdgpu_target_once_flag, init_amdgpu_target);
 129
 130         if (LLVMGetTargetFromTriple(triple, &target, &err_message)) {
 131                 fprintf(stderr, "Cannot find target for triple %s ", triple);
 132                 if (err_message) {
 133                         fprintf(stderr, "%s\n", err_message);
 134                 }
 135                 LLVMDisposeMessage(err_message);
 136                 return NULL;
 137         }
 138         return target;
 139 }
 140
 141 struct si_llvm_diagnostics {
 142         struct pipe_debug_callback *debug;
 143         unsigned retval;
 144 };
 145
 146 static void si_diagnostic_handler(LLVMDiagnosticInfoRef di, void *context)
 147 {
 148         struct si_llvm_diagnostics *diag = (struct si_llvm_diagnostics *)context;
 149         LLVMDiagnosticSeverity severity = LLVMGetDiagInfoSeverity(di);
 150         char *description = LLVMGetDiagInfoDescription(di);
 151         const char *severity_str = NULL;
 152
 153         switch (severity) {
 154         case LLVMDSError:
 155                 severity_str = "error";
 156                 break;
 157         case LLVMDSWarning:
 158                 severity_str = "warning";
 159                 break;
 160         case LLVMDSRemark:
 161                 severity_str = "remark";
 162                 break;
 163         case LLVMDSNote:
 164                 severity_str = "note";
 165                 break;
 166         default:
 167                 severity_str = "unknown";
 168         }
 169
 170         pipe_debug_message(diag->debug, SHADER_INFO,
 171                            "LLVM diagnostic (%s): %s", severity_str, description);
 172
 173         if (severity == LLVMDSError) {
 174                 diag->retval = 1;
 175                 fprintf(stderr,"LLVM triggered Diagnostic Handler: %s\n", description);
 176         }
 177
 178         LLVMDisposeMessage(description);
 179 }
 180
 181 /**
 182  * Compile an LLVM module to machine code.
 183  *
 184  * @returns 0 for success, 1 for failure
 185  */
 186 unsigned si_llvm_compile(LLVMModuleRef M, struct ac_shader_binary *binary,
 187                          LLVMTargetMachineRef tm,
 188                          struct pipe_debug_callback *debug)
 189 {
 190         struct si_llvm_diagnostics diag;
 191         char *err;
 192         LLVMContextRef llvm_ctx;
 193         LLVMMemoryBufferRef out_buffer;
 194         unsigned buffer_size;
 195         const char *buffer_data;
 196         LLVMBool mem_err;
 197
 198         diag.debug = debug;
 199         diag.retval = 0;
 200
 201         /* Setup Diagnostic Handler*/
 202         llvm_ctx = LLVMGetModuleContext(M);
 203
 204         LLVMContextSetDiagnosticHandler(llvm_ctx, si_diagnostic_handler, &diag);
 205
 206         /* Compile IR*/
 207         mem_err = LLVMTargetMachineEmitToMemoryBuffer(tm, M, LLVMObjectFile, &err,
 208                                                                  &out_buffer);
 209
 210         /* Process Errors/Warnings */
 211         if (mem_err) {
 212                 fprintf(stderr, "%s: %s", __FUNCTION__, err);
 213                 pipe_debug_message(debug, SHADER_INFO,
 214                                    "LLVM emit error: %s", err);
 215                 FREE(err);
 216                 diag.retval = 1;
 217                 goto out;
 218         }
 219
 220         /* Extract Shader Code*/
 221         buffer_size = LLVMGetBufferSize(out_buffer);
 222         buffer_data = LLVMGetBufferStart(out_buffer);
 223
 224         ac_elf_read(buffer_data, buffer_size, binary);
 225
 226         /* Clean up */
 227         LLVMDisposeMemoryBuffer(out_buffer);
 228
 229 out:
 230         if (diag.retval != 0)
 231                 pipe_debug_message(debug, SHADER_INFO, "LLVM compile failed");
 232         return diag.retval;
 233 }
 234
 235 LLVMTypeRef tgsi2llvmtype(struct lp_build_tgsi_context *bld_base,
 236                           enum tgsi_opcode_type type)
 237 {
 238         LLVMContextRef ctx = bld_base->base.gallivm->context;
 239
 240         switch (type) {
 241         case TGSI_TYPE_UNSIGNED:
 242         case TGSI_TYPE_SIGNED:
 243                 return LLVMInt32TypeInContext(ctx);
 244         case TGSI_TYPE_UNSIGNED64:
 245         case TGSI_TYPE_SIGNED64:
 246                 return LLVMInt64TypeInContext(ctx);
 247         case TGSI_TYPE_DOUBLE:
 248                 return LLVMDoubleTypeInContext(ctx);
 249         case TGSI_TYPE_UNTYPED:
 250         case TGSI_TYPE_FLOAT:
 251                 return LLVMFloatTypeInContext(ctx);
 252         default: break;
 253         }
 254         return 0;
 255 }
 256
 257 LLVMValueRef bitcast(struct lp_build_tgsi_context *bld_base,
 258                      enum tgsi_opcode_type type, LLVMValueRef value)
 259 {
 260         LLVMBuilderRef builder = bld_base->base.gallivm->builder;
 261         LLVMTypeRef dst_type = tgsi2llvmtype(bld_base, type);
 262
 263         if (dst_type)
 264                 return LLVMBuildBitCast(builder, value, dst_type, "");
 265         else
 266                 return value;
 267 }
 268
 269 /**
 270  * Return a value that is equal to the given i32 \p index if it lies in [0,num)
 271  * or an undefined value in the same interval otherwise.
 272  */
 273 LLVMValueRef si_llvm_bound_index(struct si_shader_context *ctx,
 274                                  LLVMValueRef index,
 275                                  unsigned num)
 276 {
 277         struct gallivm_state *gallivm = &ctx->gallivm;
 278         LLVMBuilderRef builder = gallivm->builder;
 279         LLVMValueRef c_max = LLVMConstInt(ctx->i32, num - 1, 0);
 280         LLVMValueRef cc;
 281
 282         if (util_is_power_of_two(num)) {
 283                 index = LLVMBuildAnd(builder, index, c_max, "");
 284         } else {
 285                 /* In theory, this MAX pattern should result in code that is
 286                  * as good as the bit-wise AND above.
 287                  *
 288                  * In practice, LLVM generates worse code (at the time of
 289                  * writing), because its value tracking is not strong enough.
 290                  */
 291                 cc = LLVMBuildICmp(builder, LLVMIntULE, index, c_max, "");
 292                 index = LLVMBuildSelect(builder, cc, index, c_max, "");
 293         }
 294
 295         return index;
 296 }
 297
 298 static struct si_llvm_flow *
 299 get_current_flow(struct si_shader_context *ctx)
 300 {
 301         if (ctx->flow_depth > 0)
 302                 return &ctx->flow[ctx->flow_depth - 1];
 303         return NULL;
 304 }
 305
 306 static struct si_llvm_flow *
 307 get_innermost_loop(struct si_shader_context *ctx)
 308 {
 309         for (unsigned i = ctx->flow_depth; i > 0; --i) {
 310                 if (ctx->flow[i - 1].loop_entry_block)
 311                         return &ctx->flow[i - 1];
 312         }
 313         return NULL;
 314 }
 315
 316 static struct si_llvm_flow *
 317 push_flow(struct si_shader_context *ctx)
 318 {
 319         struct si_llvm_flow *flow;
 320
 321         if (ctx->flow_depth >= ctx->flow_depth_max) {
 322                 unsigned new_max = MAX2(ctx->flow_depth << 1, RADEON_LLVM_INITIAL_CF_DEPTH);
 323                 ctx->flow = REALLOC(ctx->flow,
 324                                     ctx->flow_depth_max * sizeof(*ctx->flow),
 325                                     new_max * sizeof(*ctx->flow));
 326                 ctx->flow_depth_max = new_max;
 327         }
 328
 329         flow = &ctx->flow[ctx->flow_depth];
 330         ctx->flow_depth++;
 331
 332         flow->next_block = NULL;
 333         flow->loop_entry_block = NULL;
 334         return flow;
 335 }
 336
 337 static LLVMValueRef emit_swizzle(struct lp_build_tgsi_context *bld_base,
 338                                  LLVMValueRef value,
 339                                  unsigned swizzle_x,
 340                                  unsigned swizzle_y,
 341                                  unsigned swizzle_z,
 342                                  unsigned swizzle_w)
 343 {
 344         LLVMValueRef swizzles[4];
 345         LLVMTypeRef i32t =
 346                 LLVMInt32TypeInContext(bld_base->base.gallivm->context);
 347
 348         swizzles[0] = LLVMConstInt(i32t, swizzle_x, 0);
 349         swizzles[1] = LLVMConstInt(i32t, swizzle_y, 0);
 350         swizzles[2] = LLVMConstInt(i32t, swizzle_z, 0);
 351         swizzles[3] = LLVMConstInt(i32t, swizzle_w, 0);
 352
 353         return LLVMBuildShuffleVector(bld_base->base.gallivm->builder,
 354                                       value,
 355                                       LLVMGetUndef(LLVMTypeOf(value)),
 356                                       LLVMConstVector(swizzles, 4), "");
 357 }
 358
 359 /**
 360  * Return the description of the array covering the given temporary register
 361  * index.
 362  */
 363 static unsigned
 364 get_temp_array_id(struct lp_build_tgsi_context *bld_base,
 365                   unsigned reg_index,
 366                   const struct tgsi_ind_register *reg)
 367 {
 368         struct si_shader_context *ctx = si_shader_context(bld_base);
 369         unsigned num_arrays = ctx->bld_base.info->array_max[TGSI_FILE_TEMPORARY];
 370         unsigned i;
 371
 372         if (reg && reg->ArrayID > 0 && reg->ArrayID <= num_arrays)
 373                 return reg->ArrayID;
 374
 375         for (i = 0; i < num_arrays; i++) {
 376                 const struct tgsi_array_info *array = &ctx->temp_arrays[i];
 377
 378                 if (reg_index >= array->range.First && reg_index <= array->range.Last)
 379                         return i + 1;
 380         }
 381
 382         return 0;
 383 }
 384
 385 static struct tgsi_declaration_range
 386 get_array_range(struct lp_build_tgsi_context *bld_base,
 387                 unsigned File, unsigned reg_index,
 388                 const struct tgsi_ind_register *reg)
 389 {
 390         struct si_shader_context *ctx = si_shader_context(bld_base);
 391         struct tgsi_declaration_range range;
 392
 393         if (File == TGSI_FILE_TEMPORARY) {
 394                 unsigned array_id = get_temp_array_id(bld_base, reg_index, reg);
 395                 if (array_id)
 396                         return ctx->temp_arrays[array_id - 1].range;
 397         }
 398
 399         range.First = 0;
 400         range.Last = bld_base->info->file_max[File];
 401         return range;
 402 }
 403
 404 static LLVMValueRef
 405 emit_array_index(struct si_shader_context *ctx,
 406                  const struct tgsi_ind_register *reg,
 407                  unsigned offset)
 408 {
 409         struct gallivm_state *gallivm = &ctx->gallivm;
 410
 411         if (!reg) {
 412                 return LLVMConstInt(ctx->i32, offset, 0);
 413         }
 414         LLVMValueRef addr = LLVMBuildLoad(gallivm->builder, ctx->addrs[reg->Index][reg->Swizzle], "");
 415         return LLVMBuildAdd(gallivm->builder, addr, LLVMConstInt(ctx->i32, offset, 0), "");
 416 }
 417
 418 /**
 419  * For indirect registers, construct a pointer directly to the requested
 420  * element using getelementptr if possible.
 421  *
 422  * Returns NULL if the insertelement/extractelement fallback for array access
 423  * must be used.
 424  */
 425 static LLVMValueRef
 426 get_pointer_into_array(struct si_shader_context *ctx,
 427                        unsigned file,
 428                        unsigned swizzle,
 429                        unsigned reg_index,
 430                        const struct tgsi_ind_register *reg_indirect)
 431 {
 432         unsigned array_id;
 433         struct tgsi_array_info *array;
 434         struct gallivm_state *gallivm = &ctx->gallivm;
 435         LLVMBuilderRef builder = gallivm->builder;
 436         LLVMValueRef idxs[2];
 437         LLVMValueRef index;
 438         LLVMValueRef alloca;
 439
 440         if (file != TGSI_FILE_TEMPORARY)
 441                 return NULL;
 442
 443         array_id = get_temp_array_id(&ctx->bld_base, reg_index, reg_indirect);
 444         if (!array_id)
 445                 return NULL;
 446
 447         alloca = ctx->temp_array_allocas[array_id - 1];
 448         if (!alloca)
 449                 return NULL;
 450
 451         array = &ctx->temp_arrays[array_id - 1];
 452
 453         if (!(array->writemask & (1 << swizzle)))
 454                 return ctx->undef_alloca;
 455
 456         index = emit_array_index(ctx, reg_indirect,
 457                                  reg_index - ctx->temp_arrays[array_id - 1].range.First);
 458
 459         /* Ensure that the index is within a valid range, to guard against
 460          * VM faults and overwriting critical data (e.g. spilled resource
 461          * descriptors).
 462          *
 463          * TODO It should be possible to avoid the additional instructions
 464          * if LLVM is changed so that it guarantuees:
 465          * 1. the scratch space descriptor isolates the current wave (this
 466          *    could even save the scratch offset SGPR at the cost of an
 467          *    additional SALU instruction)
 468          * 2. the memory for allocas must be allocated at the _end_ of the
 469          *    scratch space (after spilled registers)
 470          */
 471         index = si_llvm_bound_index(ctx, index, array->range.Last - array->range.First + 1);
 472
 473         index = LLVMBuildMul(
 474                 builder, index,
 475                 LLVMConstInt(ctx->i32, util_bitcount(array->writemask), 0),
 476                 "");
 477         index = LLVMBuildAdd(
 478                 builder, index,
 479                 LLVMConstInt(ctx->i32,
 480                              util_bitcount(array->writemask & ((1 << swizzle) - 1)), 0),
 481                 "");
 482         idxs[0] = ctx->i32_0;
 483         idxs[1] = index;
 484         return LLVMBuildGEP(builder, alloca, idxs, 2, "");
 485 }
 486
 487 LLVMValueRef
 488 si_llvm_emit_fetch_64bit(struct lp_build_tgsi_context *bld_base,
 489                          enum tgsi_opcode_type type,
 490                          LLVMValueRef ptr,
 491                          LLVMValueRef ptr2)
 492 {
 493         LLVMBuilderRef builder = bld_base->base.gallivm->builder;
 494         LLVMValueRef result;
 495
 496         result = LLVMGetUndef(LLVMVectorType(LLVMIntTypeInContext(bld_base->base.gallivm->context, 32), bld_base->base.type.length * 2));
 497
 498         result = LLVMBuildInsertElement(builder,
 499                                         result,
 500                                         bitcast(bld_base, TGSI_TYPE_UNSIGNED, ptr),
 501                                         bld_base->int_bld.zero, "");
 502         result = LLVMBuildInsertElement(builder,
 503                                         result,
 504                                         bitcast(bld_base, TGSI_TYPE_UNSIGNED, ptr2),
 505                                         bld_base->int_bld.one, "");
 506         return bitcast(bld_base, type, result);
 507 }
 508
 509 static LLVMValueRef
 510 emit_array_fetch(struct lp_build_tgsi_context *bld_base,
 511                  unsigned File, enum tgsi_opcode_type type,
 512                  struct tgsi_declaration_range range,
 513                  unsigned swizzle)
 514 {
 515         struct si_shader_context *ctx = si_shader_context(bld_base);
 516
 517         LLVMBuilderRef builder = ctx->gallivm.builder;
 518
 519         unsigned i, size = range.Last - range.First + 1;
 520         LLVMTypeRef vec = LLVMVectorType(tgsi2llvmtype(bld_base, type), size);
 521         LLVMValueRef result = LLVMGetUndef(vec);
 522
 523         struct tgsi_full_src_register tmp_reg = {};
 524         tmp_reg.Register.File = File;
 525
 526         for (i = 0; i < size; ++i) {
 527                 tmp_reg.Register.Index = i + range.First;
 528                 LLVMValueRef temp = si_llvm_emit_fetch(bld_base, &tmp_reg, type, swizzle);
 529                 result = LLVMBuildInsertElement(builder, result, temp,
 530                         LLVMConstInt(ctx->i32, i, 0), "array_vector");
 531         }
 532         return result;
 533 }
 534
 535 static LLVMValueRef
 536 load_value_from_array(struct lp_build_tgsi_context *bld_base,
 537                       unsigned file,
 538                       enum tgsi_opcode_type type,
 539                       unsigned swizzle,
 540                       unsigned reg_index,
 541                       const struct tgsi_ind_register *reg_indirect)
 542 {
 543         struct si_shader_context *ctx = si_shader_context(bld_base);
 544         struct gallivm_state *gallivm = &ctx->gallivm;
 545         LLVMBuilderRef builder = gallivm->builder;
 546         LLVMValueRef ptr;
 547
 548         ptr = get_pointer_into_array(ctx, file, swizzle, reg_index, reg_indirect);
 549         if (ptr) {
 550                 LLVMValueRef val = LLVMBuildLoad(builder, ptr, "");
 551                 if (tgsi_type_is_64bit(type)) {
 552                         LLVMValueRef ptr_hi, val_hi;
 553                         ptr_hi = LLVMBuildGEP(builder, ptr, &ctx->i32_1, 1, "");
 554                         val_hi = LLVMBuildLoad(builder, ptr_hi, "");
 555                         val = si_llvm_emit_fetch_64bit(bld_base, type, val, val_hi);
 556                 }
 557
 558                 return val;
 559         } else {
 560                 struct tgsi_declaration_range range =
 561                         get_array_range(bld_base, file, reg_index, reg_indirect);
 562                 LLVMValueRef index =
 563                         emit_array_index(ctx, reg_indirect, reg_index - range.First);
 564                 LLVMValueRef array =
 565                         emit_array_fetch(bld_base, file, type, range, swizzle);
 566                 return LLVMBuildExtractElement(builder, array, index, "");
 567         }
 568 }
 569
 570 static void
 571 store_value_to_array(struct lp_build_tgsi_context *bld_base,
 572                      LLVMValueRef value,
 573                      unsigned file,
 574                      unsigned chan_index,
 575                      unsigned reg_index,
 576                      const struct tgsi_ind_register *reg_indirect)
 577 {
 578         struct si_shader_context *ctx = si_shader_context(bld_base);
 579         struct gallivm_state *gallivm = &ctx->gallivm;
 580         LLVMBuilderRef builder = gallivm->builder;
 581         LLVMValueRef ptr;
 582
 583         ptr = get_pointer_into_array(ctx, file, chan_index, reg_index, reg_indirect);
 584         if (ptr) {
 585                 LLVMBuildStore(builder, value, ptr);
 586         } else {
 587                 unsigned i, size;
 588                 struct tgsi_declaration_range range = get_array_range(bld_base, file, reg_index, reg_indirect);
 589                 LLVMValueRef index = emit_array_index(ctx, reg_indirect, reg_index - range.First);
 590                 LLVMValueRef array =
 591                         emit_array_fetch(bld_base, file, TGSI_TYPE_FLOAT, range, chan_index);
 592                 LLVMValueRef temp_ptr;
 593
 594                 array = LLVMBuildInsertElement(builder, array, value, index, "");
 595
 596                 size = range.Last - range.First + 1;
 597                 for (i = 0; i < size; ++i) {
 598                         switch(file) {
 599                         case TGSI_FILE_OUTPUT:
 600                                 temp_ptr = ctx->outputs[i + range.First][chan_index];
 601                                 break;
 602
 603                         case TGSI_FILE_TEMPORARY:
 604                                 if (range.First + i >= ctx->temps_count)
 605                                         continue;
 606                                 temp_ptr = ctx->temps[(i + range.First) * TGSI_NUM_CHANNELS + chan_index];
 607                                 break;
 608
 609                         default:
 610                                 continue;
 611                         }
 612                         value = LLVMBuildExtractElement(builder, array,
 613                                 LLVMConstInt(ctx->i32, i, 0), "");
 614                         LLVMBuildStore(builder, value, temp_ptr);
 615                 }
 616         }
 617 }
 618
 619 /* If this is true, preload FS inputs at the beginning of shaders. Otherwise,
 620  * reload them at each use. This must be true if the shader is using
 621  * derivatives and KILL, because KILL can leave the WQM and then a lazy
 622  * input load isn't in the WQM anymore.
 623  */
 624 static bool si_preload_fs_inputs(struct si_shader_context *ctx)
 625 {
 626         struct si_shader_selector *sel = ctx->shader->selector;
 627
 628         return sel->info.uses_derivatives &&
 629                sel->info.uses_kill;
 630 }
 631
 632 static LLVMValueRef
 633 get_output_ptr(struct lp_build_tgsi_context *bld_base, unsigned index,
 634                unsigned chan)
 635 {
 636         struct si_shader_context *ctx = si_shader_context(bld_base);
 637
 638         assert(index <= ctx->bld_base.info->file_max[TGSI_FILE_OUTPUT]);
 639         return ctx->outputs[index][chan];
 640 }
 641
 642 LLVMValueRef si_llvm_emit_fetch(struct lp_build_tgsi_context *bld_base,
 643                                 const struct tgsi_full_src_register *reg,
 644                                 enum tgsi_opcode_type type,
 645                                 unsigned swizzle)
 646 {
 647         struct si_shader_context *ctx = si_shader_context(bld_base);
 648         LLVMBuilderRef builder = ctx->gallivm.builder;
 649         LLVMValueRef result = NULL, ptr, ptr2;
 650
 651         if (swizzle == ~0) {
 652                 LLVMValueRef values[TGSI_NUM_CHANNELS];
 653                 unsigned chan;
 654                 for (chan = 0; chan < TGSI_NUM_CHANNELS; chan++) {
 655                         values[chan] = si_llvm_emit_fetch(bld_base, reg, type, chan);
 656                 }
 657                 return lp_build_gather_values(&ctx->gallivm, values,
 658                                               TGSI_NUM_CHANNELS);
 659         }
 660
 661         if (reg->Register.Indirect) {
 662                 LLVMValueRef load = load_value_from_array(bld_base, reg->Register.File, type,
 663                                 swizzle, reg->Register.Index, &reg->Indirect);
 664                 return bitcast(bld_base, type, load);
 665         }
 666
 667         switch(reg->Register.File) {
 668         case TGSI_FILE_IMMEDIATE: {
 669                 LLVMTypeRef ctype = tgsi2llvmtype(bld_base, type);
 670                 if (tgsi_type_is_64bit(type)) {
 671                         result = LLVMGetUndef(LLVMVectorType(ctx->i32, bld_base->base.type.length * 2));
 672                         result = LLVMConstInsertElement(result,
 673                                                         ctx->imms[reg->Register.Index * TGSI_NUM_CHANNELS + swizzle],
 674                                                         ctx->i32_0);
 675                         result = LLVMConstInsertElement(result,
 676                                                         ctx->imms[reg->Register.Index * TGSI_NUM_CHANNELS + swizzle + 1],
 677                                                         ctx->i32_1);
 678                         return LLVMConstBitCast(result, ctype);
 679                 } else {
 680                         return LLVMConstBitCast(ctx->imms[reg->Register.Index * TGSI_NUM_CHANNELS + swizzle], ctype);
 681                 }
 682         }
 683
 684         case TGSI_FILE_INPUT: {
 685                 unsigned index = reg->Register.Index;
 686                 LLVMValueRef input[4];
 687
 688                 /* I don't think doing this for vertex shaders is beneficial.
 689                  * For those, we want to make sure the VMEM loads are executed
 690                  * only once. Fragment shaders don't care much, because
 691                  * v_interp instructions are much cheaper than VMEM loads.
 692                  */
 693                 if (!si_preload_fs_inputs(ctx) &&
 694                     ctx->bld_base.info->processor == PIPE_SHADER_FRAGMENT)
 695                         ctx->load_input(ctx, index, &ctx->input_decls[index], input);
 696                 else
 697                         memcpy(input, &ctx->inputs[index * 4], sizeof(input));
 698
 699                 result = input[swizzle];
 700
 701                 if (tgsi_type_is_64bit(type)) {
 702                         ptr = result;
 703                         ptr2 = input[swizzle + 1];
 704                         return si_llvm_emit_fetch_64bit(bld_base, type, ptr, ptr2);
 705                 }
 706                 break;
 707         }
 708
 709         case TGSI_FILE_TEMPORARY:
 710                 if (reg->Register.Index >= ctx->temps_count)
 711                         return LLVMGetUndef(tgsi2llvmtype(bld_base, type));
 712                 ptr = ctx->temps[reg->Register.Index * TGSI_NUM_CHANNELS + swizzle];
 713                 if (tgsi_type_is_64bit(type)) {
 714                         ptr2 = ctx->temps[reg->Register.Index * TGSI_NUM_CHANNELS + swizzle + 1];
 715                         return si_llvm_emit_fetch_64bit(bld_base, type,
 716                                                         LLVMBuildLoad(builder, ptr, ""),
 717                                                         LLVMBuildLoad(builder, ptr2, ""));
 718                 }
 719                 result = LLVMBuildLoad(builder, ptr, "");
 720                 break;
 721
 722         case TGSI_FILE_OUTPUT:
 723                 ptr = get_output_ptr(bld_base, reg->Register.Index, swizzle);
 724                 if (tgsi_type_is_64bit(type)) {
 725                         ptr2 = get_output_ptr(bld_base, reg->Register.Index, swizzle + 1);
 726                         return si_llvm_emit_fetch_64bit(bld_base, type,
 727                                                         LLVMBuildLoad(builder, ptr, ""),
 728                                                         LLVMBuildLoad(builder, ptr2, ""));
 729                 }
 730                 result = LLVMBuildLoad(builder, ptr, "");
 731                 break;
 732
 733         default:
 734                 return LLVMGetUndef(tgsi2llvmtype(bld_base, type));
 735         }
 736
 737         return bitcast(bld_base, type, result);
 738 }
 739
 740 static LLVMValueRef fetch_system_value(struct lp_build_tgsi_context *bld_base,
 741                                        const struct tgsi_full_src_register *reg,
 742                                        enum tgsi_opcode_type type,
 743                                        unsigned swizzle)
 744 {
 745         struct si_shader_context *ctx = si_shader_context(bld_base);
 746         LLVMBuilderRef builder = ctx->gallivm.builder;
 747         LLVMValueRef cval = ctx->system_values[reg->Register.Index];
 748
 749         if (tgsi_type_is_64bit(type)) {
 750                 LLVMValueRef lo, hi;
 751
 752                 assert(swizzle == 0 || swizzle == 2);
 753
 754                 lo = LLVMBuildExtractElement(
 755                         builder, cval, LLVMConstInt(ctx->i32, swizzle, 0), "");
 756                 hi = LLVMBuildExtractElement(
 757                         builder, cval, LLVMConstInt(ctx->i32, swizzle + 1, 0), "");
 758
 759                 return si_llvm_emit_fetch_64bit(bld_base, type, lo, hi);
 760         }
 761
 762         if (LLVMGetTypeKind(LLVMTypeOf(cval)) == LLVMVectorTypeKind) {
 763                 cval = LLVMBuildExtractElement(
 764                         builder, cval, LLVMConstInt(ctx->i32, swizzle, 0), "");
 765         } else {
 766                 assert(swizzle == 0);
 767         }
 768
 769         return bitcast(bld_base, type, cval);
 770 }
 771
 772 static void emit_declaration(struct lp_build_tgsi_context *bld_base,
 773                              const struct tgsi_full_declaration *decl)
 774 {
 775         struct si_shader_context *ctx = si_shader_context(bld_base);
 776         LLVMBuilderRef builder = ctx->gallivm.builder;
 777         unsigned first, last, i;
 778         switch(decl->Declaration.File) {
 779         case TGSI_FILE_ADDRESS:
 780         {
 781                  unsigned idx;
 782                 for (idx = decl->Range.First; idx <= decl->Range.Last; idx++) {
 783                         unsigned chan;
 784                         for (chan = 0; chan < TGSI_NUM_CHANNELS; chan++) {
 785                                  ctx->addrs[idx][chan] = lp_build_alloca_undef(
 786                                         &ctx->gallivm,
 787                                         ctx->i32, "");
 788                         }
 789                 }
 790                 break;
 791         }
 792
 793         case TGSI_FILE_TEMPORARY:
 794         {
 795                 char name[16] = "";
 796                 LLVMValueRef array_alloca = NULL;
 797                 unsigned decl_size;
 798                 unsigned writemask = decl->Declaration.UsageMask;
 799                 first = decl->Range.First;
 800                 last = decl->Range.Last;
 801                 decl_size = 4 * ((last - first) + 1);
 802
 803                 if (decl->Declaration.Array) {
 804                         unsigned id = decl->Array.ArrayID - 1;
 805                         unsigned array_size;
 806
 807                         writemask &= ctx->temp_arrays[id].writemask;
 808                         ctx->temp_arrays[id].writemask = writemask;
 809                         array_size = ((last - first) + 1) * util_bitcount(writemask);
 810
 811                         /* If the array has more than 16 elements, store it
 812                          * in memory using an alloca that spans the entire
 813                          * array.
 814                          *
 815                          * Otherwise, store each array element individually.
 816                          * We will then generate vectors (per-channel, up to
 817                          * <16 x float> if the usagemask is a single bit) for
 818                          * indirect addressing.
 819                          *
 820                          * Note that 16 is the number of vector elements that
 821                          * LLVM will store in a register, so theoretically an
 822                          * array with up to 4 * 16 = 64 elements could be
 823                          * handled this way, but whether that's a good idea
 824                          * depends on VGPR register pressure elsewhere.
 825                          *
 826                          * FIXME: We shouldn't need to have the non-alloca
 827                          * code path for arrays. LLVM should be smart enough to
 828                          * promote allocas into registers when profitable.
 829                          */
 830                         if (array_size > 16 ||
 831                             /* TODO: VGPR indexing is buggy on GFX9. */
 832                             ctx->screen->b.chip_class == GFX9) {
 833                                 array_alloca = LLVMBuildAlloca(builder,
 834                                         LLVMArrayType(ctx->f32,
 835                                                       array_size), "array");
 836                                 ctx->temp_array_allocas[id] = array_alloca;
 837                         }
 838                 }
 839
 840                 if (!ctx->temps_count) {
 841                         ctx->temps_count = bld_base->info->file_max[TGSI_FILE_TEMPORARY] + 1;
 842                         ctx->temps = MALLOC(TGSI_NUM_CHANNELS * ctx->temps_count * sizeof(LLVMValueRef));
 843                 }
 844                 if (!array_alloca) {
 845                         for (i = 0; i < decl_size; ++i) {
 846 #ifdef DEBUG
 847                                 snprintf(name, sizeof(name), "TEMP%d.%c",
 848                                          first + i / 4, "xyzw"[i % 4]);
 849 #endif
 850                                 ctx->temps[first * TGSI_NUM_CHANNELS + i] =
 851                                         lp_build_alloca_undef(&ctx->gallivm,
 852                                                               ctx->f32,
 853                                                               name);
 854                         }
 855                 } else {
 856                         LLVMValueRef idxs[2] = {
 857                                 ctx->i32_0,
 858                                 NULL
 859                         };
 860                         unsigned j = 0;
 861
 862                         if (writemask != TGSI_WRITEMASK_XYZW &&
 863                             !ctx->undef_alloca) {
 864                                 /* Create a dummy alloca. We use it so that we
 865                                  * have a pointer that is safe to load from if
 866                                  * a shader ever reads from a channel that
 867                                  * it never writes to.
 868                                  */
 869                                 ctx->undef_alloca = lp_build_alloca_undef(
 870                                         &ctx->gallivm,
 871                                         ctx->f32, "undef");
 872                         }
 873
 874                         for (i = 0; i < decl_size; ++i) {
 875                                 LLVMValueRef ptr;
 876                                 if (writemask & (1 << (i % 4))) {
 877 #ifdef DEBUG
 878                                         snprintf(name, sizeof(name), "TEMP%d.%c",
 879                                                  first + i / 4, "xyzw"[i % 4]);
 880 #endif
 881                                         idxs[1] = LLVMConstInt(ctx->i32, j, 0);
 882                                         ptr = LLVMBuildGEP(builder, array_alloca, idxs, 2, name);
 883                                         j++;
 884                                 } else {
 885                                         ptr = ctx->undef_alloca;
 886                                 }
 887                                 ctx->temps[first * TGSI_NUM_CHANNELS + i] = ptr;
 888                         }
 889                 }
 890                 break;
 891         }
 892         case TGSI_FILE_INPUT:
 893         {
 894                 unsigned idx;
 895                 for (idx = decl->Range.First; idx <= decl->Range.Last; idx++) {
 896                         if (ctx->load_input &&
 897                             ctx->input_decls[idx].Declaration.File != TGSI_FILE_INPUT) {
 898                                 ctx->input_decls[idx] = *decl;
 899                                 ctx->input_decls[idx].Range.First = idx;
 900                                 ctx->input_decls[idx].Range.Last = idx;
 901                                 ctx->input_decls[idx].Semantic.Index += idx - decl->Range.First;
 902
 903                                 if (si_preload_fs_inputs(ctx) ||
 904                                     bld_base->info->processor != PIPE_SHADER_FRAGMENT)
 905                                         ctx->load_input(ctx, idx, &ctx->input_decls[idx],
 906                                                         &ctx->inputs[idx * 4]);
 907                         }
 908                 }
 909         }
 910         break;
 911
 912         case TGSI_FILE_SYSTEM_VALUE:
 913         {
 914                 unsigned idx;
 915                 for (idx = decl->Range.First; idx <= decl->Range.Last; idx++) {
 916                         ctx->load_system_value(ctx, idx, decl);
 917                 }
 918         }
 919         break;
 920
 921         case TGSI_FILE_OUTPUT:
 922         {
 923                 char name[16] = "";
 924                 unsigned idx;
 925                 for (idx = decl->Range.First; idx <= decl->Range.Last; idx++) {
 926                         unsigned chan;
 927                         assert(idx < RADEON_LLVM_MAX_OUTPUTS);
 928                         if (ctx->outputs[idx][0])
 929                                 continue;
 930                         for (chan = 0; chan < TGSI_NUM_CHANNELS; chan++) {
 931 #ifdef DEBUG
 932                                 snprintf(name, sizeof(name), "OUT%d.%c",
 933                                          idx, "xyzw"[chan % 4]);
 934 #endif
 935                                 ctx->outputs[idx][chan] = lp_build_alloca_undef(
 936                                         &ctx->gallivm,
 937                                         ctx->f32, name);
 938                         }
 939                 }
 940                 break;
 941         }
 942
 943         case TGSI_FILE_MEMORY:
 944                 ctx->declare_memory_region(ctx, decl);
 945                 break;
 946
 947         default:
 948                 break;
 949         }
 950 }
 951
 952 void si_llvm_emit_store(struct lp_build_tgsi_context *bld_base,
 953                         const struct tgsi_full_instruction *inst,
 954                         const struct tgsi_opcode_info *info,
 955                         LLVMValueRef dst[4])
 956 {
 957         struct si_shader_context *ctx = si_shader_context(bld_base);
 958         struct gallivm_state *gallivm = &ctx->gallivm;
 959         const struct tgsi_full_dst_register *reg = &inst->Dst[0];
 960         LLVMBuilderRef builder = ctx->gallivm.builder;
 961         LLVMValueRef temp_ptr, temp_ptr2 = NULL;
 962         unsigned chan, chan_index;
 963         bool is_vec_store = false;
 964         enum tgsi_opcode_type dtype = tgsi_opcode_infer_dst_type(inst->Instruction.Opcode);
 965
 966         if (dst[0]) {
 967                 LLVMTypeKind k = LLVMGetTypeKind(LLVMTypeOf(dst[0]));
 968                 is_vec_store = (k == LLVMVectorTypeKind);
 969         }
 970
 971         if (is_vec_store) {
 972                 LLVMValueRef values[4] = {};
 973                 TGSI_FOR_EACH_DST0_ENABLED_CHANNEL(inst, chan) {
 974                         LLVMValueRef index = LLVMConstInt(ctx->i32, chan, 0);
 975                         values[chan]  = LLVMBuildExtractElement(gallivm->builder,
 976                                                         dst[0], index, "");
 977                 }
 978                 bld_base->emit_store(bld_base, inst, info, values);
 979                 return;
 980         }
 981
 982         TGSI_FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
 983                 LLVMValueRef value = dst[chan_index];
 984
 985                 if (tgsi_type_is_64bit(dtype) && (chan_index == 1 || chan_index == 3))
 986                         continue;
 987                 if (inst->Instruction.Saturate)
 988                         value = ac_build_clamp(&ctx->ac, value);
 989
 990                 if (reg->Register.File == TGSI_FILE_ADDRESS) {
 991                         temp_ptr = ctx->addrs[reg->Register.Index][chan_index];
 992                         LLVMBuildStore(builder, value, temp_ptr);
 993                         continue;
 994                 }
 995
 996                 if (!tgsi_type_is_64bit(dtype))
 997                         value = bitcast(bld_base, TGSI_TYPE_FLOAT, value);
 998
 999                 if (reg->Register.Indirect) {
1000                         unsigned file = reg->Register.File;
1001                         unsigned reg_index = reg->Register.Index;
1002                         store_value_to_array(bld_base, value, file, chan_index,
1003                                              reg_index, &reg->Indirect);
1004                 } else {
1005                         switch(reg->Register.File) {
1006                         case TGSI_FILE_OUTPUT:
1007                                 temp_ptr = ctx->outputs[reg->Register.Index][chan_index];
1008                                 if (tgsi_type_is_64bit(dtype))
1009                                         temp_ptr2 = ctx->outputs[reg->Register.Index][chan_index + 1];
1010                                 break;
1011
1012                         case TGSI_FILE_TEMPORARY:
1013                         {
1014                                 if (reg->Register.Index >= ctx->temps_count)
1015                                         continue;
1016
1017                                 temp_ptr = ctx->temps[ TGSI_NUM_CHANNELS * reg->Register.Index + chan_index];
1018                                 if (tgsi_type_is_64bit(dtype))
1019                                         temp_ptr2 = ctx->temps[ TGSI_NUM_CHANNELS * reg->Register.Index + chan_index + 1];
1020
1021                                 break;
1022                         }
1023                         default:
1024                                 return;
1025                         }
1026                         if (!tgsi_type_is_64bit(dtype))
1027                                 LLVMBuildStore(builder, value, temp_ptr);
1028                         else {
1029                                 LLVMValueRef ptr = LLVMBuildBitCast(builder, value,
1030                                                                     LLVMVectorType(ctx->i32, 2), "");
1031                                 LLVMValueRef val2;
1032                                 value = LLVMBuildExtractElement(builder, ptr,
1033                                                                 ctx->i32_0, "");
1034                                 val2 = LLVMBuildExtractElement(builder, ptr,
1035                                                                ctx->i32_1, "");
1036
1037                                 LLVMBuildStore(builder, bitcast(bld_base, TGSI_TYPE_FLOAT, value), temp_ptr);
1038                                 LLVMBuildStore(builder, bitcast(bld_base, TGSI_TYPE_FLOAT, val2), temp_ptr2);
1039                         }
1040                 }
1041         }
1042 }
1043
1044 static void set_basicblock_name(LLVMBasicBlockRef bb, const char *base, int pc)
1045 {
1046         char buf[32];
1047         /* Subtract 1 so that the number shown is that of the corresponding
1048          * opcode in the TGSI dump, e.g. an if block has the same suffix as
1049          * the instruction number of the corresponding TGSI IF.
1050          */
1051         snprintf(buf, sizeof(buf), "%s%d", base, pc - 1);
1052         LLVMSetValueName(LLVMBasicBlockAsValue(bb), buf);
1053 }
1054
1055 /* Append a basic block at the level of the parent flow.
1056  */
1057 static LLVMBasicBlockRef append_basic_block(struct si_shader_context *ctx,
1058                                             const char *name)
1059 {
1060         struct gallivm_state *gallivm = &ctx->gallivm;
1061
1062         assert(ctx->flow_depth >= 1);
1063
1064         if (ctx->flow_depth >= 2) {
1065                 struct si_llvm_flow *flow = &ctx->flow[ctx->flow_depth - 2];
1066
1067                 return LLVMInsertBasicBlockInContext(gallivm->context,
1068                                                      flow->next_block, name);
1069         }
1070
1071         return LLVMAppendBasicBlockInContext(gallivm->context, ctx->main_fn, name);
1072 }
1073
1074 /* Emit a branch to the given default target for the current block if
1075  * applicable -- that is, if the current block does not already contain a
1076  * branch from a break or continue.
1077  */
1078 static void emit_default_branch(LLVMBuilderRef builder, LLVMBasicBlockRef target)
1079 {
1080         if (!LLVMGetBasicBlockTerminator(LLVMGetInsertBlock(builder)))
1081                  LLVMBuildBr(builder, target);
1082 }
1083
1084 static void bgnloop_emit(const struct lp_build_tgsi_action *action,
1085                          struct lp_build_tgsi_context *bld_base,
1086                          struct lp_build_emit_data *emit_data)
1087 {
1088         struct si_shader_context *ctx = si_shader_context(bld_base);
1089         struct gallivm_state *gallivm = &ctx->gallivm;
1090         struct si_llvm_flow *flow = push_flow(ctx);
1091         flow->loop_entry_block = append_basic_block(ctx, "LOOP");
1092         flow->next_block = append_basic_block(ctx, "ENDLOOP");
1093         set_basicblock_name(flow->loop_entry_block, "loop", bld_base->pc);
1094         LLVMBuildBr(gallivm->builder, flow->loop_entry_block);
1095         LLVMPositionBuilderAtEnd(gallivm->builder, flow->loop_entry_block);
1096 }
1097
1098 static void brk_emit(const struct lp_build_tgsi_action *action,
1099                      struct lp_build_tgsi_context *bld_base,
1100                      struct lp_build_emit_data *emit_data)
1101 {
1102         struct si_shader_context *ctx = si_shader_context(bld_base);
1103         struct gallivm_state *gallivm = &ctx->gallivm;
1104         struct si_llvm_flow *flow = get_innermost_loop(ctx);
1105
1106         LLVMBuildBr(gallivm->builder, flow->next_block);
1107 }
1108
1109 static void cont_emit(const struct lp_build_tgsi_action *action,
1110                       struct lp_build_tgsi_context *bld_base,
1111                       struct lp_build_emit_data *emit_data)
1112 {
1113         struct si_shader_context *ctx = si_shader_context(bld_base);
1114         struct gallivm_state *gallivm = &ctx->gallivm;
1115         struct si_llvm_flow *flow = get_innermost_loop(ctx);
1116
1117         LLVMBuildBr(gallivm->builder, flow->loop_entry_block);
1118 }
1119
1120 static void else_emit(const struct lp_build_tgsi_action *action,
1121                       struct lp_build_tgsi_context *bld_base,
1122                       struct lp_build_emit_data *emit_data)
1123 {
1124         struct si_shader_context *ctx = si_shader_context(bld_base);
1125         struct gallivm_state *gallivm = &ctx->gallivm;
1126         struct si_llvm_flow *current_branch = get_current_flow(ctx);
1127         LLVMBasicBlockRef endif_block;
1128
1129         assert(!current_branch->loop_entry_block);
1130
1131         endif_block = append_basic_block(ctx, "ENDIF");
1132         emit_default_branch(gallivm->builder, endif_block);
1133
1134         LLVMPositionBuilderAtEnd(gallivm->builder, current_branch->next_block);
1135         set_basicblock_name(current_branch->next_block, "else", bld_base->pc);
1136
1137         current_branch->next_block = endif_block;
1138 }
1139
1140 static void endif_emit(const struct lp_build_tgsi_action *action,
1141                        struct lp_build_tgsi_context *bld_base,
1142                        struct lp_build_emit_data *emit_data)
1143 {
1144         struct si_shader_context *ctx = si_shader_context(bld_base);
1145         struct gallivm_state *gallivm = &ctx->gallivm;
1146         struct si_llvm_flow *current_branch = get_current_flow(ctx);
1147
1148         assert(!current_branch->loop_entry_block);
1149
1150         emit_default_branch(gallivm->builder, current_branch->next_block);
1151         LLVMPositionBuilderAtEnd(gallivm->builder, current_branch->next_block);
1152         set_basicblock_name(current_branch->next_block, "endif", bld_base->pc);
1153
1154         ctx->flow_depth--;
1155 }
1156
1157 static void endloop_emit(const struct lp_build_tgsi_action *action,
1158                          struct lp_build_tgsi_context *bld_base,
1159                          struct lp_build_emit_data *emit_data)
1160 {
1161         struct si_shader_context *ctx = si_shader_context(bld_base);
1162         struct gallivm_state *gallivm = &ctx->gallivm;
1163         struct si_llvm_flow *current_loop = get_current_flow(ctx);
1164
1165         assert(current_loop->loop_entry_block);
1166
1167         emit_default_branch(gallivm->builder, current_loop->loop_entry_block);
1168
1169         LLVMPositionBuilderAtEnd(gallivm->builder, current_loop->next_block);
1170         set_basicblock_name(current_loop->next_block, "endloop", bld_base->pc);
1171         ctx->flow_depth--;
1172 }
1173
1174 static void if_cond_emit(const struct lp_build_tgsi_action *action,
1175                          struct lp_build_tgsi_context *bld_base,
1176                          struct lp_build_emit_data *emit_data,
1177                          LLVMValueRef cond)
1178 {
1179         struct si_shader_context *ctx = si_shader_context(bld_base);
1180         struct gallivm_state *gallivm = &ctx->gallivm;
1181         struct si_llvm_flow *flow = push_flow(ctx);
1182         LLVMBasicBlockRef if_block;
1183
1184         if_block = append_basic_block(ctx, "IF");
1185         flow->next_block = append_basic_block(ctx, "ELSE");
1186         set_basicblock_name(if_block, "if", bld_base->pc);
1187         LLVMBuildCondBr(gallivm->builder, cond, if_block, flow->next_block);
1188         LLVMPositionBuilderAtEnd(gallivm->builder, if_block);
1189 }
1190
1191 static void if_emit(const struct lp_build_tgsi_action *action,
1192                     struct lp_build_tgsi_context *bld_base,
1193                     struct lp_build_emit_data *emit_data)
1194 {
1195         struct gallivm_state *gallivm = bld_base->base.gallivm;
1196         LLVMValueRef cond;
1197
1198         cond = LLVMBuildFCmp(gallivm->builder, LLVMRealUNE,
1199                         emit_data->args[0],
1200                         bld_base->base.zero, "");
1201
1202         if_cond_emit(action, bld_base, emit_data, cond);
1203 }
1204
1205 static void uif_emit(const struct lp_build_tgsi_action *action,
1206                      struct lp_build_tgsi_context *bld_base,
1207                      struct lp_build_emit_data *emit_data)
1208 {
1209         struct gallivm_state *gallivm = bld_base->base.gallivm;
1210         LLVMValueRef cond;
1211
1212         cond = LLVMBuildICmp(gallivm->builder, LLVMIntNE,
1213                 bitcast(bld_base, TGSI_TYPE_UNSIGNED, emit_data->args[0]),
1214                         bld_base->int_bld.zero, "");
1215
1216         if_cond_emit(action, bld_base, emit_data, cond);
1217 }
1218
1219 static void emit_immediate(struct lp_build_tgsi_context *bld_base,
1220                            const struct tgsi_full_immediate *imm)
1221 {
1222         unsigned i;
1223         struct si_shader_context *ctx = si_shader_context(bld_base);
1224
1225         for (i = 0; i < 4; ++i) {
1226                 ctx->imms[ctx->imms_num * TGSI_NUM_CHANNELS + i] =
1227                                 LLVMConstInt(ctx->i32, imm->u[i].Uint, false   );
1228         }
1229
1230         ctx->imms_num++;
1231 }
1232
1233 void si_llvm_context_init(struct si_shader_context *ctx,
1234                           struct si_screen *sscreen,
1235                           LLVMTargetMachineRef tm)
1236 {
1237         struct lp_type type;
1238
1239         /* Initialize the gallivm object:
1240          * We are only using the module, context, and builder fields of this struct.
1241          * This should be enough for us to be able to pass our gallivm struct to the
1242          * helper functions in the gallivm module.
1243          */
1244         memset(ctx, 0, sizeof(*ctx));
1245         ctx->screen = sscreen;
1246         ctx->tm = tm;
1247
1248         ctx->gallivm.context = LLVMContextCreate();
1249         ctx->gallivm.module = LLVMModuleCreateWithNameInContext("tgsi",
1250                                                 ctx->gallivm.context);
1251         LLVMSetTarget(ctx->gallivm.module, "amdgcn--");
1252
1253         LLVMTargetDataRef data_layout = LLVMCreateTargetDataLayout(tm);
1254         char *data_layout_str = LLVMCopyStringRepOfTargetData(data_layout);
1255         LLVMSetDataLayout(ctx->gallivm.module, data_layout_str);
1256         LLVMDisposeTargetData(data_layout);
1257         LLVMDisposeMessage(data_layout_str);
1258
1259         bool unsafe_fpmath = (sscreen->b.debug_flags & DBG_UNSAFE_MATH) != 0;
1260         enum lp_float_mode float_mode =
1261                 unsafe_fpmath ? LP_FLOAT_MODE_UNSAFE_FP_MATH :
1262                                 LP_FLOAT_MODE_NO_SIGNED_ZEROS_FP_MATH;
1263
1264         ctx->gallivm.builder = lp_create_builder(ctx->gallivm.context,
1265                                                  float_mode);
1266
1267         ac_llvm_context_init(&ctx->ac, ctx->gallivm.context);
1268         ctx->ac.module = ctx->gallivm.module;
1269         ctx->ac.builder = ctx->gallivm.builder;
1270
1271         struct lp_build_tgsi_context *bld_base = &ctx->bld_base;
1272
1273         type.floating = true;
1274         type.fixed = false;
1275         type.sign = true;
1276         type.norm = false;
1277         type.width = 32;
1278         type.length = 1;
1279
1280         lp_build_context_init(&bld_base->base, &ctx->gallivm, type);
1281         lp_build_context_init(&ctx->bld_base.uint_bld, &ctx->gallivm, lp_uint_type(type));
1282         lp_build_context_init(&ctx->bld_base.int_bld, &ctx->gallivm, lp_int_type(type));
1283         type.width *= 2;
1284         lp_build_context_init(&ctx->bld_base.dbl_bld, &ctx->gallivm, type);
1285         lp_build_context_init(&ctx->bld_base.uint64_bld, &ctx->gallivm, lp_uint_type(type));
1286         lp_build_context_init(&ctx->bld_base.int64_bld, &ctx->gallivm, lp_int_type(type));
1287
1288         bld_base->soa = 1;
1289         bld_base->emit_swizzle = emit_swizzle;
1290         bld_base->emit_declaration = emit_declaration;
1291         bld_base->emit_immediate = emit_immediate;
1292
1293         /* metadata allowing 2.5 ULP */
1294         ctx->fpmath_md_kind = LLVMGetMDKindIDInContext(ctx->gallivm.context,
1295                                                        "fpmath", 6);
1296         LLVMValueRef arg = lp_build_const_float(&ctx->gallivm, 2.5);
1297         ctx->fpmath_md_2p5_ulp = LLVMMDNodeInContext(ctx->gallivm.context,
1298                                                      &arg, 1);
1299
1300         bld_base->op_actions[TGSI_OPCODE_BGNLOOP].emit = bgnloop_emit;
1301         bld_base->op_actions[TGSI_OPCODE_BRK].emit = brk_emit;
1302         bld_base->op_actions[TGSI_OPCODE_CONT].emit = cont_emit;
1303         bld_base->op_actions[TGSI_OPCODE_IF].emit = if_emit;
1304         bld_base->op_actions[TGSI_OPCODE_UIF].emit = uif_emit;
1305         bld_base->op_actions[TGSI_OPCODE_ELSE].emit = else_emit;
1306         bld_base->op_actions[TGSI_OPCODE_ENDIF].emit = endif_emit;
1307         bld_base->op_actions[TGSI_OPCODE_ENDLOOP].emit = endloop_emit;
1308
1309         si_shader_context_init_alu(&ctx->bld_base);
1310
1311         ctx->voidt = LLVMVoidTypeInContext(ctx->gallivm.context);
1312         ctx->i1 = LLVMInt1TypeInContext(ctx->gallivm.context);
1313         ctx->i8 = LLVMInt8TypeInContext(ctx->gallivm.context);
1314         ctx->i32 = LLVMInt32TypeInContext(ctx->gallivm.context);
1315         ctx->i64 = LLVMInt64TypeInContext(ctx->gallivm.context);
1316         ctx->i128 = LLVMIntTypeInContext(ctx->gallivm.context, 128);
1317         ctx->f32 = LLVMFloatTypeInContext(ctx->gallivm.context);
1318         ctx->v2i32 = LLVMVectorType(ctx->i32, 2);
1319         ctx->v4i32 = LLVMVectorType(ctx->i32, 4);
1320         ctx->v4f32 = LLVMVectorType(ctx->f32, 4);
1321         ctx->v8i32 = LLVMVectorType(ctx->i32, 8);
1322
1323         ctx->i32_0 = LLVMConstInt(ctx->i32, 0, 0);
1324         ctx->i32_1 = LLVMConstInt(ctx->i32, 1, 0);
1325 }
1326
1327 /* Set the context to a certain TGSI shader. Can be called repeatedly
1328  * to change the shader. */
1329 void si_llvm_context_set_tgsi(struct si_shader_context *ctx,
1330                               struct si_shader *shader)
1331 {
1332         const struct tgsi_shader_info *info = NULL;
1333         const struct tgsi_token *tokens = NULL;
1334
1335         if (shader && shader->selector) {
1336                 info = &shader->selector->info;
1337                 tokens = shader->selector->tokens;
1338         }
1339
1340         ctx->shader = shader;
1341         ctx->type = info ? info->processor : -1;
1342         ctx->bld_base.info = info;
1343
1344         /* Clean up the old contents. */
1345         FREE(ctx->temp_arrays);
1346         ctx->temp_arrays = NULL;
1347         FREE(ctx->temp_array_allocas);
1348         ctx->temp_array_allocas = NULL;
1349
1350         FREE(ctx->imms);
1351         ctx->imms = NULL;
1352         ctx->imms_num = 0;
1353
1354         FREE(ctx->temps);
1355         ctx->temps = NULL;
1356         ctx->temps_count = 0;
1357
1358         if (!info || !tokens)
1359                 return;
1360
1361         if (info->array_max[TGSI_FILE_TEMPORARY] > 0) {
1362                 int size = info->array_max[TGSI_FILE_TEMPORARY];
1363
1364                 ctx->temp_arrays = CALLOC(size, sizeof(ctx->temp_arrays[0]));
1365                 ctx->temp_array_allocas = CALLOC(size, sizeof(ctx->temp_array_allocas[0]));
1366
1367                 tgsi_scan_arrays(tokens, TGSI_FILE_TEMPORARY, size,
1368                                  ctx->temp_arrays);
1369         }
1370         if (info->file_max[TGSI_FILE_IMMEDIATE] >= 0) {
1371                 int size = info->file_max[TGSI_FILE_IMMEDIATE] + 1;
1372                 ctx->imms = MALLOC(size * TGSI_NUM_CHANNELS * sizeof(LLVMValueRef));
1373         }
1374
1375         /* Re-set these to start with a clean slate. */
1376         ctx->bld_base.num_instructions = 0;
1377         ctx->bld_base.pc = 0;
1378         memset(ctx->outputs, 0, sizeof(ctx->outputs));
1379
1380         ctx->bld_base.emit_store = si_llvm_emit_store;
1381         ctx->bld_base.emit_fetch_funcs[TGSI_FILE_IMMEDIATE] = si_llvm_emit_fetch;
1382         ctx->bld_base.emit_fetch_funcs[TGSI_FILE_INPUT] = si_llvm_emit_fetch;
1383         ctx->bld_base.emit_fetch_funcs[TGSI_FILE_TEMPORARY] = si_llvm_emit_fetch;
1384         ctx->bld_base.emit_fetch_funcs[TGSI_FILE_OUTPUT] = si_llvm_emit_fetch;
1385         ctx->bld_base.emit_fetch_funcs[TGSI_FILE_SYSTEM_VALUE] = fetch_system_value;
1386 }
1387
1388 void si_llvm_create_func(struct si_shader_context *ctx,
1389                          const char *name,
1390                          LLVMTypeRef *return_types, unsigned num_return_elems,
1391                          LLVMTypeRef *ParamTypes, unsigned ParamCount)
1392 {
1393         LLVMTypeRef main_fn_type, ret_type;
1394         LLVMBasicBlockRef main_fn_body;
1395
1396         if (num_return_elems)
1397                 ret_type = LLVMStructTypeInContext(ctx->gallivm.context,
1398                                                    return_types,
1399                                                    num_return_elems, true);
1400         else
1401                 ret_type = LLVMVoidTypeInContext(ctx->gallivm.context);
1402
1403         /* Setup the function */
1404         ctx->return_type = ret_type;
1405         main_fn_type = LLVMFunctionType(ret_type, ParamTypes, ParamCount, 0);
1406         ctx->main_fn = LLVMAddFunction(ctx->gallivm.module, name, main_fn_type);
1407         main_fn_body = LLVMAppendBasicBlockInContext(ctx->gallivm.context,
1408                         ctx->main_fn, "main_body");
1409         LLVMPositionBuilderAtEnd(ctx->gallivm.builder, main_fn_body);
1410 }
1411
1412 void si_llvm_optimize_module(struct si_shader_context *ctx)
1413 {
1414         struct gallivm_state *gallivm = &ctx->gallivm;
1415         const char *triple = LLVMGetTarget(gallivm->module);
1416         LLVMTargetLibraryInfoRef target_library_info;
1417
1418         /* Dump LLVM IR before any optimization passes */
1419         if (ctx->screen->b.debug_flags & DBG_PREOPT_IR &&
1420             r600_can_dump_shader(&ctx->screen->b, ctx->type))
1421                 LLVMDumpModule(ctx->gallivm.module);
1422
1423         /* Create the pass manager */
1424         gallivm->passmgr = LLVMCreatePassManager();
1425
1426         target_library_info = gallivm_create_target_library_info(triple);
1427         LLVMAddTargetLibraryInfo(target_library_info, gallivm->passmgr);
1428
1429         if (r600_extra_shader_checks(&ctx->screen->b, ctx->type))
1430                 LLVMAddVerifierPass(gallivm->passmgr);
1431
1432         LLVMAddAlwaysInlinerPass(gallivm->passmgr);
1433
1434         /* This pass should eliminate all the load and store instructions */
1435         LLVMAddPromoteMemoryToRegisterPass(gallivm->passmgr);
1436
1437         /* Add some optimization passes */
1438         LLVMAddScalarReplAggregatesPass(gallivm->passmgr);
1439         LLVMAddLICMPass(gallivm->passmgr);
1440         LLVMAddAggressiveDCEPass(gallivm->passmgr);
1441         LLVMAddCFGSimplificationPass(gallivm->passmgr);
1442         LLVMAddInstructionCombiningPass(gallivm->passmgr);
1443
1444         /* Run the pass */
1445         LLVMRunPassManager(gallivm->passmgr, ctx->gallivm.module);
1446
1447         LLVMDisposeBuilder(gallivm->builder);
1448         LLVMDisposePassManager(gallivm->passmgr);
1449         gallivm_dispose_target_library_info(target_library_info);
1450 }
1451
1452 void si_llvm_dispose(struct si_shader_context *ctx)
1453 {
1454         LLVMDisposeModule(ctx->gallivm.module);
1455         LLVMContextDispose(ctx->gallivm.context);
1456         FREE(ctx->temp_arrays);
1457         ctx->temp_arrays = NULL;
1458         FREE(ctx->temp_array_allocas);
1459         ctx->temp_array_allocas = NULL;
1460         FREE(ctx->temps);
1461         ctx->temps = NULL;
1462         ctx->temps_count = 0;
1463         FREE(ctx->imms);
1464         ctx->imms = NULL;
1465         ctx->imms_num = 0;
1466         FREE(ctx->flow);
1467         ctx->flow = NULL;
1468         ctx->flow_depth_max = 0;
1469 }