src/gallium/drivers/radeonsi/si_shader_tgsi_setup.c

   1 /*
   2  * Copyright 2016 Advanced Micro Devices, Inc.
   3  *
   4  * Permission is hereby granted, free of charge, to any person obtaining a
   5  * copy of this software and associated documentation files (the "Software"),
   6  * to deal in the Software without restriction, including without limitation
   7  * on the rights to use, copy, modify, merge, publish, distribute, sub
   8  * license, and/or sell copies of the Software, and to permit persons to whom
   9  * the Software is furnished to do so, subject to the following conditions:
  10  *
  11  * The above copyright notice and this permission notice (including the next
  12  * paragraph) shall be included in all copies or substantial portions of the
  13  * Software.
  14  *
  15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  17  * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
  18  * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
  19  * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
  20  * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
  21  * USE OR OTHER DEALINGS IN THE SOFTWARE.
  22  */
  23
  24 #include "si_shader_internal.h"
  25 #include "si_pipe.h"
  26
  27 #include "gallivm/lp_bld_const.h"
  28 #include "gallivm/lp_bld_gather.h"
  29 #include "gallivm/lp_bld_flow.h"
  30 #include "gallivm/lp_bld_init.h"
  31 #include "gallivm/lp_bld_intr.h"
  32 #include "gallivm/lp_bld_misc.h"
  33 #include "gallivm/lp_bld_swizzle.h"
  34 #include "tgsi/tgsi_info.h"
  35 #include "tgsi/tgsi_parse.h"
  36 #include "util/u_math.h"
  37 #include "util/u_memory.h"
  38 #include "util/u_debug.h"
  39
  40 #include <stdio.h>
  41 #include <llvm-c/Transforms/IPO.h>
  42 #include <llvm-c/Transforms/Scalar.h>
  43
  44 /* Data for if/else/endif and bgnloop/endloop control flow structures.
  45  */
  46 struct si_llvm_flow {
  47         /* Loop exit or next part of if/else/endif. */
  48         LLVMBasicBlockRef next_block;
  49         LLVMBasicBlockRef loop_entry_block;
  50 };
  51
  52 enum si_llvm_calling_convention {
  53         RADEON_LLVM_AMDGPU_VS = 87,
  54         RADEON_LLVM_AMDGPU_GS = 88,
  55         RADEON_LLVM_AMDGPU_PS = 89,
  56         RADEON_LLVM_AMDGPU_CS = 90,
  57         RADEON_LLVM_AMDGPU_HS = 93,
  58 };
  59
  60 void si_llvm_add_attribute(LLVMValueRef F, const char *name, int value)
  61 {
  62         char str[16];
  63
  64         snprintf(str, sizeof(str), "%i", value);
  65         LLVMAddTargetDependentFunctionAttr(F, name, str);
  66 }
  67
  68 struct si_llvm_diagnostics {
  69         struct pipe_debug_callback *debug;
  70         unsigned retval;
  71 };
  72
  73 static void si_diagnostic_handler(LLVMDiagnosticInfoRef di, void *context)
  74 {
  75         struct si_llvm_diagnostics *diag = (struct si_llvm_diagnostics *)context;
  76         LLVMDiagnosticSeverity severity = LLVMGetDiagInfoSeverity(di);
  77         char *description = LLVMGetDiagInfoDescription(di);
  78         const char *severity_str = NULL;
  79
  80         switch (severity) {
  81         case LLVMDSError:
  82                 severity_str = "error";
  83                 break;
  84         case LLVMDSWarning:
  85                 severity_str = "warning";
  86                 break;
  87         case LLVMDSRemark:
  88                 severity_str = "remark";
  89                 break;
  90         case LLVMDSNote:
  91                 severity_str = "note";
  92                 break;
  93         default:
  94                 severity_str = "unknown";
  95         }
  96
  97         pipe_debug_message(diag->debug, SHADER_INFO,
  98                            "LLVM diagnostic (%s): %s", severity_str, description);
  99
 100         if (severity == LLVMDSError) {
 101                 diag->retval = 1;
 102                 fprintf(stderr,"LLVM triggered Diagnostic Handler: %s\n", description);
 103         }
 104
 105         LLVMDisposeMessage(description);
 106 }
 107
 108 /**
 109  * Compile an LLVM module to machine code.
 110  *
 111  * @returns 0 for success, 1 for failure
 112  */
 113 unsigned si_llvm_compile(LLVMModuleRef M, struct ac_shader_binary *binary,
 114                          LLVMTargetMachineRef tm,
 115                          struct pipe_debug_callback *debug)
 116 {
 117         struct si_llvm_diagnostics diag;
 118         char *err;
 119         LLVMContextRef llvm_ctx;
 120         LLVMMemoryBufferRef out_buffer;
 121         unsigned buffer_size;
 122         const char *buffer_data;
 123         LLVMBool mem_err;
 124
 125         diag.debug = debug;
 126         diag.retval = 0;
 127
 128         /* Setup Diagnostic Handler*/
 129         llvm_ctx = LLVMGetModuleContext(M);
 130
 131         LLVMContextSetDiagnosticHandler(llvm_ctx, si_diagnostic_handler, &diag);
 132
 133         /* Compile IR*/
 134         mem_err = LLVMTargetMachineEmitToMemoryBuffer(tm, M, LLVMObjectFile, &err,
 135                                                                  &out_buffer);
 136
 137         /* Process Errors/Warnings */
 138         if (mem_err) {
 139                 fprintf(stderr, "%s: %s", __FUNCTION__, err);
 140                 pipe_debug_message(debug, SHADER_INFO,
 141                                    "LLVM emit error: %s", err);
 142                 FREE(err);
 143                 diag.retval = 1;
 144                 goto out;
 145         }
 146
 147         /* Extract Shader Code*/
 148         buffer_size = LLVMGetBufferSize(out_buffer);
 149         buffer_data = LLVMGetBufferStart(out_buffer);
 150
 151         if (!ac_elf_read(buffer_data, buffer_size, binary)) {
 152                 fprintf(stderr, "radeonsi: cannot read an ELF shader binary\n");
 153                 diag.retval = 1;
 154         }
 155
 156         /* Clean up */
 157         LLVMDisposeMemoryBuffer(out_buffer);
 158
 159 out:
 160         if (diag.retval != 0)
 161                 pipe_debug_message(debug, SHADER_INFO, "LLVM compile failed");
 162         return diag.retval;
 163 }
 164
 165 LLVMTypeRef tgsi2llvmtype(struct lp_build_tgsi_context *bld_base,
 166                           enum tgsi_opcode_type type)
 167 {
 168         LLVMContextRef ctx = bld_base->base.gallivm->context;
 169
 170         switch (type) {
 171         case TGSI_TYPE_UNSIGNED:
 172         case TGSI_TYPE_SIGNED:
 173                 return LLVMInt32TypeInContext(ctx);
 174         case TGSI_TYPE_UNSIGNED64:
 175         case TGSI_TYPE_SIGNED64:
 176                 return LLVMInt64TypeInContext(ctx);
 177         case TGSI_TYPE_DOUBLE:
 178                 return LLVMDoubleTypeInContext(ctx);
 179         case TGSI_TYPE_UNTYPED:
 180         case TGSI_TYPE_FLOAT:
 181                 return LLVMFloatTypeInContext(ctx);
 182         default: break;
 183         }
 184         return 0;
 185 }
 186
 187 LLVMValueRef bitcast(struct lp_build_tgsi_context *bld_base,
 188                      enum tgsi_opcode_type type, LLVMValueRef value)
 189 {
 190         LLVMBuilderRef builder = bld_base->base.gallivm->builder;
 191         LLVMTypeRef dst_type = tgsi2llvmtype(bld_base, type);
 192
 193         if (dst_type)
 194                 return LLVMBuildBitCast(builder, value, dst_type, "");
 195         else
 196                 return value;
 197 }
 198
 199 /**
 200  * Return a value that is equal to the given i32 \p index if it lies in [0,num)
 201  * or an undefined value in the same interval otherwise.
 202  */
 203 LLVMValueRef si_llvm_bound_index(struct si_shader_context *ctx,
 204                                  LLVMValueRef index,
 205                                  unsigned num)
 206 {
 207         struct gallivm_state *gallivm = &ctx->gallivm;
 208         LLVMBuilderRef builder = gallivm->builder;
 209         LLVMValueRef c_max = LLVMConstInt(ctx->i32, num - 1, 0);
 210         LLVMValueRef cc;
 211
 212         if (util_is_power_of_two(num)) {
 213                 index = LLVMBuildAnd(builder, index, c_max, "");
 214         } else {
 215                 /* In theory, this MAX pattern should result in code that is
 216                  * as good as the bit-wise AND above.
 217                  *
 218                  * In practice, LLVM generates worse code (at the time of
 219                  * writing), because its value tracking is not strong enough.
 220                  */
 221                 cc = LLVMBuildICmp(builder, LLVMIntULE, index, c_max, "");
 222                 index = LLVMBuildSelect(builder, cc, index, c_max, "");
 223         }
 224
 225         return index;
 226 }
 227
 228 static struct si_llvm_flow *
 229 get_current_flow(struct si_shader_context *ctx)
 230 {
 231         if (ctx->flow_depth > 0)
 232                 return &ctx->flow[ctx->flow_depth - 1];
 233         return NULL;
 234 }
 235
 236 static struct si_llvm_flow *
 237 get_innermost_loop(struct si_shader_context *ctx)
 238 {
 239         for (unsigned i = ctx->flow_depth; i > 0; --i) {
 240                 if (ctx->flow[i - 1].loop_entry_block)
 241                         return &ctx->flow[i - 1];
 242         }
 243         return NULL;
 244 }
 245
 246 static struct si_llvm_flow *
 247 push_flow(struct si_shader_context *ctx)
 248 {
 249         struct si_llvm_flow *flow;
 250
 251         if (ctx->flow_depth >= ctx->flow_depth_max) {
 252                 unsigned new_max = MAX2(ctx->flow_depth << 1, RADEON_LLVM_INITIAL_CF_DEPTH);
 253                 ctx->flow = REALLOC(ctx->flow,
 254                                     ctx->flow_depth_max * sizeof(*ctx->flow),
 255                                     new_max * sizeof(*ctx->flow));
 256                 ctx->flow_depth_max = new_max;
 257         }
 258
 259         flow = &ctx->flow[ctx->flow_depth];
 260         ctx->flow_depth++;
 261
 262         flow->next_block = NULL;
 263         flow->loop_entry_block = NULL;
 264         return flow;
 265 }
 266
 267 static LLVMValueRef emit_swizzle(struct lp_build_tgsi_context *bld_base,
 268                                  LLVMValueRef value,
 269                                  unsigned swizzle_x,
 270                                  unsigned swizzle_y,
 271                                  unsigned swizzle_z,
 272                                  unsigned swizzle_w)
 273 {
 274         LLVMValueRef swizzles[4];
 275         LLVMTypeRef i32t =
 276                 LLVMInt32TypeInContext(bld_base->base.gallivm->context);
 277
 278         swizzles[0] = LLVMConstInt(i32t, swizzle_x, 0);
 279         swizzles[1] = LLVMConstInt(i32t, swizzle_y, 0);
 280         swizzles[2] = LLVMConstInt(i32t, swizzle_z, 0);
 281         swizzles[3] = LLVMConstInt(i32t, swizzle_w, 0);
 282
 283         return LLVMBuildShuffleVector(bld_base->base.gallivm->builder,
 284                                       value,
 285                                       LLVMGetUndef(LLVMTypeOf(value)),
 286                                       LLVMConstVector(swizzles, 4), "");
 287 }
 288
 289 /**
 290  * Return the description of the array covering the given temporary register
 291  * index.
 292  */
 293 static unsigned
 294 get_temp_array_id(struct lp_build_tgsi_context *bld_base,
 295                   unsigned reg_index,
 296                   const struct tgsi_ind_register *reg)
 297 {
 298         struct si_shader_context *ctx = si_shader_context(bld_base);
 299         unsigned num_arrays = ctx->bld_base.info->array_max[TGSI_FILE_TEMPORARY];
 300         unsigned i;
 301
 302         if (reg && reg->ArrayID > 0 && reg->ArrayID <= num_arrays)
 303                 return reg->ArrayID;
 304
 305         for (i = 0; i < num_arrays; i++) {
 306                 const struct tgsi_array_info *array = &ctx->temp_arrays[i];
 307
 308                 if (reg_index >= array->range.First && reg_index <= array->range.Last)
 309                         return i + 1;
 310         }
 311
 312         return 0;
 313 }
 314
 315 static struct tgsi_declaration_range
 316 get_array_range(struct lp_build_tgsi_context *bld_base,
 317                 unsigned File, unsigned reg_index,
 318                 const struct tgsi_ind_register *reg)
 319 {
 320         struct si_shader_context *ctx = si_shader_context(bld_base);
 321         struct tgsi_declaration_range range;
 322
 323         if (File == TGSI_FILE_TEMPORARY) {
 324                 unsigned array_id = get_temp_array_id(bld_base, reg_index, reg);
 325                 if (array_id)
 326                         return ctx->temp_arrays[array_id - 1].range;
 327         }
 328
 329         range.First = 0;
 330         range.Last = bld_base->info->file_max[File];
 331         return range;
 332 }
 333
 334 /**
 335  * For indirect registers, construct a pointer directly to the requested
 336  * element using getelementptr if possible.
 337  *
 338  * Returns NULL if the insertelement/extractelement fallback for array access
 339  * must be used.
 340  */
 341 static LLVMValueRef
 342 get_pointer_into_array(struct si_shader_context *ctx,
 343                        unsigned file,
 344                        unsigned swizzle,
 345                        unsigned reg_index,
 346                        const struct tgsi_ind_register *reg_indirect)
 347 {
 348         unsigned array_id;
 349         struct tgsi_array_info *array;
 350         struct gallivm_state *gallivm = &ctx->gallivm;
 351         LLVMBuilderRef builder = gallivm->builder;
 352         LLVMValueRef idxs[2];
 353         LLVMValueRef index;
 354         LLVMValueRef alloca;
 355
 356         if (file != TGSI_FILE_TEMPORARY)
 357                 return NULL;
 358
 359         array_id = get_temp_array_id(&ctx->bld_base, reg_index, reg_indirect);
 360         if (!array_id)
 361                 return NULL;
 362
 363         alloca = ctx->temp_array_allocas[array_id - 1];
 364         if (!alloca)
 365                 return NULL;
 366
 367         array = &ctx->temp_arrays[array_id - 1];
 368
 369         if (!(array->writemask & (1 << swizzle)))
 370                 return ctx->undef_alloca;
 371
 372         index = si_get_indirect_index(ctx, reg_indirect, 1,
 373                                       reg_index - ctx->temp_arrays[array_id - 1].range.First);
 374
 375         /* Ensure that the index is within a valid range, to guard against
 376          * VM faults and overwriting critical data (e.g. spilled resource
 377          * descriptors).
 378          *
 379          * TODO It should be possible to avoid the additional instructions
 380          * if LLVM is changed so that it guarantuees:
 381          * 1. the scratch space descriptor isolates the current wave (this
 382          *    could even save the scratch offset SGPR at the cost of an
 383          *    additional SALU instruction)
 384          * 2. the memory for allocas must be allocated at the _end_ of the
 385          *    scratch space (after spilled registers)
 386          */
 387         index = si_llvm_bound_index(ctx, index, array->range.Last - array->range.First + 1);
 388
 389         index = LLVMBuildMul(
 390                 builder, index,
 391                 LLVMConstInt(ctx->i32, util_bitcount(array->writemask), 0),
 392                 "");
 393         index = LLVMBuildAdd(
 394                 builder, index,
 395                 LLVMConstInt(ctx->i32,
 396                              util_bitcount(array->writemask & ((1 << swizzle) - 1)), 0),
 397                 "");
 398         idxs[0] = ctx->i32_0;
 399         idxs[1] = index;
 400         return LLVMBuildGEP(builder, alloca, idxs, 2, "");
 401 }
 402
 403 LLVMValueRef
 404 si_llvm_emit_fetch_64bit(struct lp_build_tgsi_context *bld_base,
 405                          enum tgsi_opcode_type type,
 406                          LLVMValueRef ptr,
 407                          LLVMValueRef ptr2)
 408 {
 409         LLVMBuilderRef builder = bld_base->base.gallivm->builder;
 410         LLVMValueRef result;
 411
 412         result = LLVMGetUndef(LLVMVectorType(LLVMIntTypeInContext(bld_base->base.gallivm->context, 32), bld_base->base.type.length * 2));
 413
 414         result = LLVMBuildInsertElement(builder,
 415                                         result,
 416                                         bitcast(bld_base, TGSI_TYPE_UNSIGNED, ptr),
 417                                         bld_base->int_bld.zero, "");
 418         result = LLVMBuildInsertElement(builder,
 419                                         result,
 420                                         bitcast(bld_base, TGSI_TYPE_UNSIGNED, ptr2),
 421                                         bld_base->int_bld.one, "");
 422         return bitcast(bld_base, type, result);
 423 }
 424
 425 static LLVMValueRef
 426 emit_array_fetch(struct lp_build_tgsi_context *bld_base,
 427                  unsigned File, enum tgsi_opcode_type type,
 428                  struct tgsi_declaration_range range,
 429                  unsigned swizzle)
 430 {
 431         struct si_shader_context *ctx = si_shader_context(bld_base);
 432
 433         LLVMBuilderRef builder = ctx->gallivm.builder;
 434
 435         unsigned i, size = range.Last - range.First + 1;
 436         LLVMTypeRef vec = LLVMVectorType(tgsi2llvmtype(bld_base, type), size);
 437         LLVMValueRef result = LLVMGetUndef(vec);
 438
 439         struct tgsi_full_src_register tmp_reg = {};
 440         tmp_reg.Register.File = File;
 441
 442         for (i = 0; i < size; ++i) {
 443                 tmp_reg.Register.Index = i + range.First;
 444                 LLVMValueRef temp = si_llvm_emit_fetch(bld_base, &tmp_reg, type, swizzle);
 445                 result = LLVMBuildInsertElement(builder, result, temp,
 446                         LLVMConstInt(ctx->i32, i, 0), "array_vector");
 447         }
 448         return result;
 449 }
 450
 451 static LLVMValueRef
 452 load_value_from_array(struct lp_build_tgsi_context *bld_base,
 453                       unsigned file,
 454                       enum tgsi_opcode_type type,
 455                       unsigned swizzle,
 456                       unsigned reg_index,
 457                       const struct tgsi_ind_register *reg_indirect)
 458 {
 459         struct si_shader_context *ctx = si_shader_context(bld_base);
 460         struct gallivm_state *gallivm = &ctx->gallivm;
 461         LLVMBuilderRef builder = gallivm->builder;
 462         LLVMValueRef ptr;
 463
 464         ptr = get_pointer_into_array(ctx, file, swizzle, reg_index, reg_indirect);
 465         if (ptr) {
 466                 LLVMValueRef val = LLVMBuildLoad(builder, ptr, "");
 467                 if (tgsi_type_is_64bit(type)) {
 468                         LLVMValueRef ptr_hi, val_hi;
 469                         ptr_hi = LLVMBuildGEP(builder, ptr, &ctx->i32_1, 1, "");
 470                         val_hi = LLVMBuildLoad(builder, ptr_hi, "");
 471                         val = si_llvm_emit_fetch_64bit(bld_base, type, val, val_hi);
 472                 }
 473
 474                 return val;
 475         } else {
 476                 struct tgsi_declaration_range range =
 477                         get_array_range(bld_base, file, reg_index, reg_indirect);
 478                 LLVMValueRef index =
 479                         si_get_indirect_index(ctx, reg_indirect, 1, reg_index - range.First);
 480                 LLVMValueRef array =
 481                         emit_array_fetch(bld_base, file, type, range, swizzle);
 482                 return LLVMBuildExtractElement(builder, array, index, "");
 483         }
 484 }
 485
 486 static void
 487 store_value_to_array(struct lp_build_tgsi_context *bld_base,
 488                      LLVMValueRef value,
 489                      unsigned file,
 490                      unsigned chan_index,
 491                      unsigned reg_index,
 492                      const struct tgsi_ind_register *reg_indirect)
 493 {
 494         struct si_shader_context *ctx = si_shader_context(bld_base);
 495         struct gallivm_state *gallivm = &ctx->gallivm;
 496         LLVMBuilderRef builder = gallivm->builder;
 497         LLVMValueRef ptr;
 498
 499         ptr = get_pointer_into_array(ctx, file, chan_index, reg_index, reg_indirect);
 500         if (ptr) {
 501                 LLVMBuildStore(builder, value, ptr);
 502         } else {
 503                 unsigned i, size;
 504                 struct tgsi_declaration_range range = get_array_range(bld_base, file, reg_index, reg_indirect);
 505                 LLVMValueRef index = si_get_indirect_index(ctx, reg_indirect, 1, reg_index - range.First);
 506                 LLVMValueRef array =
 507                         emit_array_fetch(bld_base, file, TGSI_TYPE_FLOAT, range, chan_index);
 508                 LLVMValueRef temp_ptr;
 509
 510                 array = LLVMBuildInsertElement(builder, array, value, index, "");
 511
 512                 size = range.Last - range.First + 1;
 513                 for (i = 0; i < size; ++i) {
 514                         switch(file) {
 515                         case TGSI_FILE_OUTPUT:
 516                                 temp_ptr = ctx->outputs[i + range.First][chan_index];
 517                                 break;
 518
 519                         case TGSI_FILE_TEMPORARY:
 520                                 if (range.First + i >= ctx->temps_count)
 521                                         continue;
 522                                 temp_ptr = ctx->temps[(i + range.First) * TGSI_NUM_CHANNELS + chan_index];
 523                                 break;
 524
 525                         default:
 526                                 continue;
 527                         }
 528                         value = LLVMBuildExtractElement(builder, array,
 529                                 LLVMConstInt(ctx->i32, i, 0), "");
 530                         LLVMBuildStore(builder, value, temp_ptr);
 531                 }
 532         }
 533 }
 534
 535 /* If this is true, preload FS inputs at the beginning of shaders. Otherwise,
 536  * reload them at each use. This must be true if the shader is using
 537  * derivatives and KILL, because KILL can leave the WQM and then a lazy
 538  * input load isn't in the WQM anymore.
 539  */
 540 static bool si_preload_fs_inputs(struct si_shader_context *ctx)
 541 {
 542         struct si_shader_selector *sel = ctx->shader->selector;
 543
 544         return sel->info.uses_derivatives &&
 545                sel->info.uses_kill;
 546 }
 547
 548 static LLVMValueRef
 549 get_output_ptr(struct lp_build_tgsi_context *bld_base, unsigned index,
 550                unsigned chan)
 551 {
 552         struct si_shader_context *ctx = si_shader_context(bld_base);
 553
 554         assert(index <= ctx->bld_base.info->file_max[TGSI_FILE_OUTPUT]);
 555         return ctx->outputs[index][chan];
 556 }
 557
 558 LLVMValueRef si_llvm_emit_fetch(struct lp_build_tgsi_context *bld_base,
 559                                 const struct tgsi_full_src_register *reg,
 560                                 enum tgsi_opcode_type type,
 561                                 unsigned swizzle)
 562 {
 563         struct si_shader_context *ctx = si_shader_context(bld_base);
 564         LLVMBuilderRef builder = ctx->gallivm.builder;
 565         LLVMValueRef result = NULL, ptr, ptr2;
 566
 567         if (swizzle == ~0) {
 568                 LLVMValueRef values[TGSI_NUM_CHANNELS];
 569                 unsigned chan;
 570                 for (chan = 0; chan < TGSI_NUM_CHANNELS; chan++) {
 571                         values[chan] = si_llvm_emit_fetch(bld_base, reg, type, chan);
 572                 }
 573                 return lp_build_gather_values(&ctx->gallivm, values,
 574                                               TGSI_NUM_CHANNELS);
 575         }
 576
 577         if (reg->Register.Indirect) {
 578                 LLVMValueRef load = load_value_from_array(bld_base, reg->Register.File, type,
 579                                 swizzle, reg->Register.Index, &reg->Indirect);
 580                 return bitcast(bld_base, type, load);
 581         }
 582
 583         switch(reg->Register.File) {
 584         case TGSI_FILE_IMMEDIATE: {
 585                 LLVMTypeRef ctype = tgsi2llvmtype(bld_base, type);
 586                 if (tgsi_type_is_64bit(type)) {
 587                         result = LLVMGetUndef(LLVMVectorType(ctx->i32, bld_base->base.type.length * 2));
 588                         result = LLVMConstInsertElement(result,
 589                                                         ctx->imms[reg->Register.Index * TGSI_NUM_CHANNELS + swizzle],
 590                                                         ctx->i32_0);
 591                         result = LLVMConstInsertElement(result,
 592                                                         ctx->imms[reg->Register.Index * TGSI_NUM_CHANNELS + swizzle + 1],
 593                                                         ctx->i32_1);
 594                         return LLVMConstBitCast(result, ctype);
 595                 } else {
 596                         return LLVMConstBitCast(ctx->imms[reg->Register.Index * TGSI_NUM_CHANNELS + swizzle], ctype);
 597                 }
 598         }
 599
 600         case TGSI_FILE_INPUT: {
 601                 unsigned index = reg->Register.Index;
 602                 LLVMValueRef input[4];
 603
 604                 /* I don't think doing this for vertex shaders is beneficial.
 605                  * For those, we want to make sure the VMEM loads are executed
 606                  * only once. Fragment shaders don't care much, because
 607                  * v_interp instructions are much cheaper than VMEM loads.
 608                  */
 609                 if (!si_preload_fs_inputs(ctx) &&
 610                     ctx->bld_base.info->processor == PIPE_SHADER_FRAGMENT)
 611                         ctx->load_input(ctx, index, &ctx->input_decls[index], input);
 612                 else
 613                         memcpy(input, &ctx->inputs[index * 4], sizeof(input));
 614
 615                 result = input[swizzle];
 616
 617                 if (tgsi_type_is_64bit(type)) {
 618                         ptr = result;
 619                         ptr2 = input[swizzle + 1];
 620                         return si_llvm_emit_fetch_64bit(bld_base, type, ptr, ptr2);
 621                 }
 622                 break;
 623         }
 624
 625         case TGSI_FILE_TEMPORARY:
 626                 if (reg->Register.Index >= ctx->temps_count)
 627                         return LLVMGetUndef(tgsi2llvmtype(bld_base, type));
 628                 ptr = ctx->temps[reg->Register.Index * TGSI_NUM_CHANNELS + swizzle];
 629                 if (tgsi_type_is_64bit(type)) {
 630                         ptr2 = ctx->temps[reg->Register.Index * TGSI_NUM_CHANNELS + swizzle + 1];
 631                         return si_llvm_emit_fetch_64bit(bld_base, type,
 632                                                         LLVMBuildLoad(builder, ptr, ""),
 633                                                         LLVMBuildLoad(builder, ptr2, ""));
 634                 }
 635                 result = LLVMBuildLoad(builder, ptr, "");
 636                 break;
 637
 638         case TGSI_FILE_OUTPUT:
 639                 ptr = get_output_ptr(bld_base, reg->Register.Index, swizzle);
 640                 if (tgsi_type_is_64bit(type)) {
 641                         ptr2 = get_output_ptr(bld_base, reg->Register.Index, swizzle + 1);
 642                         return si_llvm_emit_fetch_64bit(bld_base, type,
 643                                                         LLVMBuildLoad(builder, ptr, ""),
 644                                                         LLVMBuildLoad(builder, ptr2, ""));
 645                 }
 646                 result = LLVMBuildLoad(builder, ptr, "");
 647                 break;
 648
 649         default:
 650                 return LLVMGetUndef(tgsi2llvmtype(bld_base, type));
 651         }
 652
 653         return bitcast(bld_base, type, result);
 654 }
 655
 656 static LLVMValueRef fetch_system_value(struct lp_build_tgsi_context *bld_base,
 657                                        const struct tgsi_full_src_register *reg,
 658                                        enum tgsi_opcode_type type,
 659                                        unsigned swizzle)
 660 {
 661         struct si_shader_context *ctx = si_shader_context(bld_base);
 662         LLVMBuilderRef builder = ctx->gallivm.builder;
 663         LLVMValueRef cval = ctx->system_values[reg->Register.Index];
 664
 665         if (tgsi_type_is_64bit(type)) {
 666                 LLVMValueRef lo, hi;
 667
 668                 assert(swizzle == 0 || swizzle == 2);
 669
 670                 lo = LLVMBuildExtractElement(
 671                         builder, cval, LLVMConstInt(ctx->i32, swizzle, 0), "");
 672                 hi = LLVMBuildExtractElement(
 673                         builder, cval, LLVMConstInt(ctx->i32, swizzle + 1, 0), "");
 674
 675                 return si_llvm_emit_fetch_64bit(bld_base, type, lo, hi);
 676         }
 677
 678         if (LLVMGetTypeKind(LLVMTypeOf(cval)) == LLVMVectorTypeKind) {
 679                 cval = LLVMBuildExtractElement(
 680                         builder, cval, LLVMConstInt(ctx->i32, swizzle, 0), "");
 681         } else {
 682                 assert(swizzle == 0);
 683         }
 684
 685         return bitcast(bld_base, type, cval);
 686 }
 687
 688 static void emit_declaration(struct lp_build_tgsi_context *bld_base,
 689                              const struct tgsi_full_declaration *decl)
 690 {
 691         struct si_shader_context *ctx = si_shader_context(bld_base);
 692         LLVMBuilderRef builder = ctx->gallivm.builder;
 693         unsigned first, last, i;
 694         switch(decl->Declaration.File) {
 695         case TGSI_FILE_ADDRESS:
 696         {
 697                  unsigned idx;
 698                 for (idx = decl->Range.First; idx <= decl->Range.Last; idx++) {
 699                         unsigned chan;
 700                         for (chan = 0; chan < TGSI_NUM_CHANNELS; chan++) {
 701                                  ctx->addrs[idx][chan] = lp_build_alloca_undef(
 702                                         &ctx->gallivm,
 703                                         ctx->i32, "");
 704                         }
 705                 }
 706                 break;
 707         }
 708
 709         case TGSI_FILE_TEMPORARY:
 710         {
 711                 char name[16] = "";
 712                 LLVMValueRef array_alloca = NULL;
 713                 unsigned decl_size;
 714                 unsigned writemask = decl->Declaration.UsageMask;
 715                 first = decl->Range.First;
 716                 last = decl->Range.Last;
 717                 decl_size = 4 * ((last - first) + 1);
 718
 719                 if (decl->Declaration.Array) {
 720                         unsigned id = decl->Array.ArrayID - 1;
 721                         unsigned array_size;
 722
 723                         writemask &= ctx->temp_arrays[id].writemask;
 724                         ctx->temp_arrays[id].writemask = writemask;
 725                         array_size = ((last - first) + 1) * util_bitcount(writemask);
 726
 727                         /* If the array has more than 16 elements, store it
 728                          * in memory using an alloca that spans the entire
 729                          * array.
 730                          *
 731                          * Otherwise, store each array element individually.
 732                          * We will then generate vectors (per-channel, up to
 733                          * <16 x float> if the usagemask is a single bit) for
 734                          * indirect addressing.
 735                          *
 736                          * Note that 16 is the number of vector elements that
 737                          * LLVM will store in a register, so theoretically an
 738                          * array with up to 4 * 16 = 64 elements could be
 739                          * handled this way, but whether that's a good idea
 740                          * depends on VGPR register pressure elsewhere.
 741                          *
 742                          * FIXME: We shouldn't need to have the non-alloca
 743                          * code path for arrays. LLVM should be smart enough to
 744                          * promote allocas into registers when profitable.
 745                          */
 746                         if (array_size > 16 ||
 747                             !ctx->screen->llvm_has_working_vgpr_indexing) {
 748                                 array_alloca = lp_build_alloca_undef(&ctx->gallivm,
 749                                         LLVMArrayType(ctx->f32,
 750                                                       array_size), "array");
 751                                 ctx->temp_array_allocas[id] = array_alloca;
 752                         }
 753                 }
 754
 755                 if (!ctx->temps_count) {
 756                         ctx->temps_count = bld_base->info->file_max[TGSI_FILE_TEMPORARY] + 1;
 757                         ctx->temps = MALLOC(TGSI_NUM_CHANNELS * ctx->temps_count * sizeof(LLVMValueRef));
 758                 }
 759                 if (!array_alloca) {
 760                         for (i = 0; i < decl_size; ++i) {
 761 #ifdef DEBUG
 762                                 snprintf(name, sizeof(name), "TEMP%d.%c",
 763                                          first + i / 4, "xyzw"[i % 4]);
 764 #endif
 765                                 ctx->temps[first * TGSI_NUM_CHANNELS + i] =
 766                                         lp_build_alloca_undef(&ctx->gallivm,
 767                                                               ctx->f32,
 768                                                               name);
 769                         }
 770                 } else {
 771                         LLVMValueRef idxs[2] = {
 772                                 ctx->i32_0,
 773                                 NULL
 774                         };
 775                         unsigned j = 0;
 776
 777                         if (writemask != TGSI_WRITEMASK_XYZW &&
 778                             !ctx->undef_alloca) {
 779                                 /* Create a dummy alloca. We use it so that we
 780                                  * have a pointer that is safe to load from if
 781                                  * a shader ever reads from a channel that
 782                                  * it never writes to.
 783                                  */
 784                                 ctx->undef_alloca = lp_build_alloca_undef(
 785                                         &ctx->gallivm,
 786                                         ctx->f32, "undef");
 787                         }
 788
 789                         for (i = 0; i < decl_size; ++i) {
 790                                 LLVMValueRef ptr;
 791                                 if (writemask & (1 << (i % 4))) {
 792 #ifdef DEBUG
 793                                         snprintf(name, sizeof(name), "TEMP%d.%c",
 794                                                  first + i / 4, "xyzw"[i % 4]);
 795 #endif
 796                                         idxs[1] = LLVMConstInt(ctx->i32, j, 0);
 797                                         ptr = LLVMBuildGEP(builder, array_alloca, idxs, 2, name);
 798                                         j++;
 799                                 } else {
 800                                         ptr = ctx->undef_alloca;
 801                                 }
 802                                 ctx->temps[first * TGSI_NUM_CHANNELS + i] = ptr;
 803                         }
 804                 }
 805                 break;
 806         }
 807         case TGSI_FILE_INPUT:
 808         {
 809                 unsigned idx;
 810                 for (idx = decl->Range.First; idx <= decl->Range.Last; idx++) {
 811                         if (ctx->load_input &&
 812                             ctx->input_decls[idx].Declaration.File != TGSI_FILE_INPUT) {
 813                                 ctx->input_decls[idx] = *decl;
 814                                 ctx->input_decls[idx].Range.First = idx;
 815                                 ctx->input_decls[idx].Range.Last = idx;
 816                                 ctx->input_decls[idx].Semantic.Index += idx - decl->Range.First;
 817
 818                                 if (si_preload_fs_inputs(ctx) ||
 819                                     bld_base->info->processor != PIPE_SHADER_FRAGMENT)
 820                                         ctx->load_input(ctx, idx, &ctx->input_decls[idx],
 821                                                         &ctx->inputs[idx * 4]);
 822                         }
 823                 }
 824         }
 825         break;
 826
 827         case TGSI_FILE_SYSTEM_VALUE:
 828         {
 829                 unsigned idx;
 830                 for (idx = decl->Range.First; idx <= decl->Range.Last; idx++) {
 831                         si_load_system_value(ctx, idx, decl);
 832                 }
 833         }
 834         break;
 835
 836         case TGSI_FILE_OUTPUT:
 837         {
 838                 char name[16] = "";
 839                 unsigned idx;
 840                 for (idx = decl->Range.First; idx <= decl->Range.Last; idx++) {
 841                         unsigned chan;
 842                         assert(idx < RADEON_LLVM_MAX_OUTPUTS);
 843                         if (ctx->outputs[idx][0])
 844                                 continue;
 845                         for (chan = 0; chan < TGSI_NUM_CHANNELS; chan++) {
 846 #ifdef DEBUG
 847                                 snprintf(name, sizeof(name), "OUT%d.%c",
 848                                          idx, "xyzw"[chan % 4]);
 849 #endif
 850                                 ctx->outputs[idx][chan] = lp_build_alloca_undef(
 851                                         &ctx->gallivm,
 852                                         ctx->f32, name);
 853                         }
 854                 }
 855                 break;
 856         }
 857
 858         case TGSI_FILE_MEMORY:
 859                 si_declare_compute_memory(ctx, decl);
 860                 break;
 861
 862         default:
 863                 break;
 864         }
 865 }
 866
 867 void si_llvm_emit_store(struct lp_build_tgsi_context *bld_base,
 868                         const struct tgsi_full_instruction *inst,
 869                         const struct tgsi_opcode_info *info,
 870                         unsigned index,
 871                         LLVMValueRef dst[4])
 872 {
 873         struct si_shader_context *ctx = si_shader_context(bld_base);
 874         struct gallivm_state *gallivm = &ctx->gallivm;
 875         const struct tgsi_full_dst_register *reg = &inst->Dst[index];
 876         LLVMBuilderRef builder = ctx->gallivm.builder;
 877         LLVMValueRef temp_ptr, temp_ptr2 = NULL;
 878         bool is_vec_store = false;
 879         enum tgsi_opcode_type dtype = tgsi_opcode_infer_dst_type(inst->Instruction.Opcode, index);
 880
 881         if (dst[0]) {
 882                 LLVMTypeKind k = LLVMGetTypeKind(LLVMTypeOf(dst[0]));
 883                 is_vec_store = (k == LLVMVectorTypeKind);
 884         }
 885
 886         if (is_vec_store) {
 887                 LLVMValueRef values[4] = {};
 888                 uint32_t writemask = reg->Register.WriteMask;
 889                 while (writemask) {
 890                         unsigned chan = u_bit_scan(&writemask);
 891                         LLVMValueRef index = LLVMConstInt(ctx->i32, chan, 0);
 892                         values[chan]  = LLVMBuildExtractElement(gallivm->builder,
 893                                                         dst[0], index, "");
 894                 }
 895                 bld_base->emit_store(bld_base, inst, info, index, values);
 896                 return;
 897         }
 898
 899         uint32_t writemask = reg->Register.WriteMask;
 900         while (writemask) {
 901                 unsigned chan_index = u_bit_scan(&writemask);
 902                 LLVMValueRef value = dst[chan_index];
 903
 904                 if (tgsi_type_is_64bit(dtype) && (chan_index == 1 || chan_index == 3))
 905                         continue;
 906                 if (inst->Instruction.Saturate)
 907                         value = ac_build_clamp(&ctx->ac, value);
 908
 909                 if (reg->Register.File == TGSI_FILE_ADDRESS) {
 910                         temp_ptr = ctx->addrs[reg->Register.Index][chan_index];
 911                         LLVMBuildStore(builder, value, temp_ptr);
 912                         continue;
 913                 }
 914
 915                 if (!tgsi_type_is_64bit(dtype))
 916                         value = bitcast(bld_base, TGSI_TYPE_FLOAT, value);
 917
 918                 if (reg->Register.Indirect) {
 919                         unsigned file = reg->Register.File;
 920                         unsigned reg_index = reg->Register.Index;
 921                         store_value_to_array(bld_base, value, file, chan_index,
 922                                              reg_index, &reg->Indirect);
 923                 } else {
 924                         switch(reg->Register.File) {
 925                         case TGSI_FILE_OUTPUT:
 926                                 temp_ptr = ctx->outputs[reg->Register.Index][chan_index];
 927                                 if (tgsi_type_is_64bit(dtype))
 928                                         temp_ptr2 = ctx->outputs[reg->Register.Index][chan_index + 1];
 929                                 break;
 930
 931                         case TGSI_FILE_TEMPORARY:
 932                         {
 933                                 if (reg->Register.Index >= ctx->temps_count)
 934                                         continue;
 935
 936                                 temp_ptr = ctx->temps[ TGSI_NUM_CHANNELS * reg->Register.Index + chan_index];
 937                                 if (tgsi_type_is_64bit(dtype))
 938                                         temp_ptr2 = ctx->temps[ TGSI_NUM_CHANNELS * reg->Register.Index + chan_index + 1];
 939
 940                                 break;
 941                         }
 942                         default:
 943                                 return;
 944                         }
 945                         if (!tgsi_type_is_64bit(dtype))
 946                                 LLVMBuildStore(builder, value, temp_ptr);
 947                         else {
 948                                 LLVMValueRef ptr = LLVMBuildBitCast(builder, value,
 949                                                                     LLVMVectorType(ctx->i32, 2), "");
 950                                 LLVMValueRef val2;
 951                                 value = LLVMBuildExtractElement(builder, ptr,
 952                                                                 ctx->i32_0, "");
 953                                 val2 = LLVMBuildExtractElement(builder, ptr,
 954                                                                ctx->i32_1, "");
 955
 956                                 LLVMBuildStore(builder, bitcast(bld_base, TGSI_TYPE_FLOAT, value), temp_ptr);
 957                                 LLVMBuildStore(builder, bitcast(bld_base, TGSI_TYPE_FLOAT, val2), temp_ptr2);
 958                         }
 959                 }
 960         }
 961 }
 962
 963 static void set_basicblock_name(LLVMBasicBlockRef bb, const char *base, int pc)
 964 {
 965         char buf[32];
 966         /* Subtract 1 so that the number shown is that of the corresponding
 967          * opcode in the TGSI dump, e.g. an if block has the same suffix as
 968          * the instruction number of the corresponding TGSI IF.
 969          */
 970         snprintf(buf, sizeof(buf), "%s%d", base, pc - 1);
 971         LLVMSetValueName(LLVMBasicBlockAsValue(bb), buf);
 972 }
 973
 974 /* Append a basic block at the level of the parent flow.
 975  */
 976 static LLVMBasicBlockRef append_basic_block(struct si_shader_context *ctx,
 977                                             const char *name)
 978 {
 979         struct gallivm_state *gallivm = &ctx->gallivm;
 980
 981         assert(ctx->flow_depth >= 1);
 982
 983         if (ctx->flow_depth >= 2) {
 984                 struct si_llvm_flow *flow = &ctx->flow[ctx->flow_depth - 2];
 985
 986                 return LLVMInsertBasicBlockInContext(gallivm->context,
 987                                                      flow->next_block, name);
 988         }
 989
 990         return LLVMAppendBasicBlockInContext(gallivm->context, ctx->main_fn, name);
 991 }
 992
 993 /* Emit a branch to the given default target for the current block if
 994  * applicable -- that is, if the current block does not already contain a
 995  * branch from a break or continue.
 996  */
 997 static void emit_default_branch(LLVMBuilderRef builder, LLVMBasicBlockRef target)
 998 {
 999         if (!LLVMGetBasicBlockTerminator(LLVMGetInsertBlock(builder)))
1000                  LLVMBuildBr(builder, target);
1001 }
1002
1003 static void bgnloop_emit(const struct lp_build_tgsi_action *action,
1004                          struct lp_build_tgsi_context *bld_base,
1005                          struct lp_build_emit_data *emit_data)
1006 {
1007         struct si_shader_context *ctx = si_shader_context(bld_base);
1008         struct gallivm_state *gallivm = &ctx->gallivm;
1009         struct si_llvm_flow *flow = push_flow(ctx);
1010         flow->loop_entry_block = append_basic_block(ctx, "LOOP");
1011         flow->next_block = append_basic_block(ctx, "ENDLOOP");
1012         set_basicblock_name(flow->loop_entry_block, "loop", bld_base->pc);
1013         LLVMBuildBr(gallivm->builder, flow->loop_entry_block);
1014         LLVMPositionBuilderAtEnd(gallivm->builder, flow->loop_entry_block);
1015 }
1016
1017 static void brk_emit(const struct lp_build_tgsi_action *action,
1018                      struct lp_build_tgsi_context *bld_base,
1019                      struct lp_build_emit_data *emit_data)
1020 {
1021         struct si_shader_context *ctx = si_shader_context(bld_base);
1022         struct gallivm_state *gallivm = &ctx->gallivm;
1023         struct si_llvm_flow *flow = get_innermost_loop(ctx);
1024
1025         LLVMBuildBr(gallivm->builder, flow->next_block);
1026 }
1027
1028 static void cont_emit(const struct lp_build_tgsi_action *action,
1029                       struct lp_build_tgsi_context *bld_base,
1030                       struct lp_build_emit_data *emit_data)
1031 {
1032         struct si_shader_context *ctx = si_shader_context(bld_base);
1033         struct gallivm_state *gallivm = &ctx->gallivm;
1034         struct si_llvm_flow *flow = get_innermost_loop(ctx);
1035
1036         LLVMBuildBr(gallivm->builder, flow->loop_entry_block);
1037 }
1038
1039 static void else_emit(const struct lp_build_tgsi_action *action,
1040                       struct lp_build_tgsi_context *bld_base,
1041                       struct lp_build_emit_data *emit_data)
1042 {
1043         struct si_shader_context *ctx = si_shader_context(bld_base);
1044         struct gallivm_state *gallivm = &ctx->gallivm;
1045         struct si_llvm_flow *current_branch = get_current_flow(ctx);
1046         LLVMBasicBlockRef endif_block;
1047
1048         assert(!current_branch->loop_entry_block);
1049
1050         endif_block = append_basic_block(ctx, "ENDIF");
1051         emit_default_branch(gallivm->builder, endif_block);
1052
1053         LLVMPositionBuilderAtEnd(gallivm->builder, current_branch->next_block);
1054         set_basicblock_name(current_branch->next_block, "else", bld_base->pc);
1055
1056         current_branch->next_block = endif_block;
1057 }
1058
1059 static void endif_emit(const struct lp_build_tgsi_action *action,
1060                        struct lp_build_tgsi_context *bld_base,
1061                        struct lp_build_emit_data *emit_data)
1062 {
1063         struct si_shader_context *ctx = si_shader_context(bld_base);
1064         struct gallivm_state *gallivm = &ctx->gallivm;
1065         struct si_llvm_flow *current_branch = get_current_flow(ctx);
1066
1067         assert(!current_branch->loop_entry_block);
1068
1069         emit_default_branch(gallivm->builder, current_branch->next_block);
1070         LLVMPositionBuilderAtEnd(gallivm->builder, current_branch->next_block);
1071         set_basicblock_name(current_branch->next_block, "endif", bld_base->pc);
1072
1073         ctx->flow_depth--;
1074 }
1075
1076 static void endloop_emit(const struct lp_build_tgsi_action *action,
1077                          struct lp_build_tgsi_context *bld_base,
1078                          struct lp_build_emit_data *emit_data)
1079 {
1080         struct si_shader_context *ctx = si_shader_context(bld_base);
1081         struct gallivm_state *gallivm = &ctx->gallivm;
1082         struct si_llvm_flow *current_loop = get_current_flow(ctx);
1083
1084         assert(current_loop->loop_entry_block);
1085
1086         emit_default_branch(gallivm->builder, current_loop->loop_entry_block);
1087
1088         LLVMPositionBuilderAtEnd(gallivm->builder, current_loop->next_block);
1089         set_basicblock_name(current_loop->next_block, "endloop", bld_base->pc);
1090         ctx->flow_depth--;
1091 }
1092
1093 static void if_cond_emit(const struct lp_build_tgsi_action *action,
1094                          struct lp_build_tgsi_context *bld_base,
1095                          struct lp_build_emit_data *emit_data,
1096                          LLVMValueRef cond)
1097 {
1098         struct si_shader_context *ctx = si_shader_context(bld_base);
1099         struct gallivm_state *gallivm = &ctx->gallivm;
1100         struct si_llvm_flow *flow = push_flow(ctx);
1101         LLVMBasicBlockRef if_block;
1102
1103         if_block = append_basic_block(ctx, "IF");
1104         flow->next_block = append_basic_block(ctx, "ELSE");
1105         set_basicblock_name(if_block, "if", bld_base->pc);
1106         LLVMBuildCondBr(gallivm->builder, cond, if_block, flow->next_block);
1107         LLVMPositionBuilderAtEnd(gallivm->builder, if_block);
1108 }
1109
1110 static void if_emit(const struct lp_build_tgsi_action *action,
1111                     struct lp_build_tgsi_context *bld_base,
1112                     struct lp_build_emit_data *emit_data)
1113 {
1114         struct gallivm_state *gallivm = bld_base->base.gallivm;
1115         LLVMValueRef cond;
1116
1117         cond = LLVMBuildFCmp(gallivm->builder, LLVMRealUNE,
1118                         emit_data->args[0],
1119                         bld_base->base.zero, "");
1120
1121         if_cond_emit(action, bld_base, emit_data, cond);
1122 }
1123
1124 static void uif_emit(const struct lp_build_tgsi_action *action,
1125                      struct lp_build_tgsi_context *bld_base,
1126                      struct lp_build_emit_data *emit_data)
1127 {
1128         struct gallivm_state *gallivm = bld_base->base.gallivm;
1129         LLVMValueRef cond;
1130
1131         cond = LLVMBuildICmp(gallivm->builder, LLVMIntNE,
1132                 bitcast(bld_base, TGSI_TYPE_UNSIGNED, emit_data->args[0]),
1133                         bld_base->int_bld.zero, "");
1134
1135         if_cond_emit(action, bld_base, emit_data, cond);
1136 }
1137
1138 static void emit_immediate(struct lp_build_tgsi_context *bld_base,
1139                            const struct tgsi_full_immediate *imm)
1140 {
1141         unsigned i;
1142         struct si_shader_context *ctx = si_shader_context(bld_base);
1143
1144         for (i = 0; i < 4; ++i) {
1145                 ctx->imms[ctx->imms_num * TGSI_NUM_CHANNELS + i] =
1146                                 LLVMConstInt(ctx->i32, imm->u[i].Uint, false   );
1147         }
1148
1149         ctx->imms_num++;
1150 }
1151
1152 void si_llvm_context_init(struct si_shader_context *ctx,
1153                           struct si_screen *sscreen,
1154                           LLVMTargetMachineRef tm)
1155 {
1156         struct lp_type type;
1157
1158         /* Initialize the gallivm object:
1159          * We are only using the module, context, and builder fields of this struct.
1160          * This should be enough for us to be able to pass our gallivm struct to the
1161          * helper functions in the gallivm module.
1162          */
1163         memset(ctx, 0, sizeof(*ctx));
1164         ctx->screen = sscreen;
1165         ctx->tm = tm;
1166
1167         ctx->gallivm.context = LLVMContextCreate();
1168         ctx->gallivm.module = LLVMModuleCreateWithNameInContext("tgsi",
1169                                                 ctx->gallivm.context);
1170         LLVMSetTarget(ctx->gallivm.module, "amdgcn--");
1171
1172         LLVMTargetDataRef data_layout = LLVMCreateTargetDataLayout(tm);
1173         char *data_layout_str = LLVMCopyStringRepOfTargetData(data_layout);
1174         LLVMSetDataLayout(ctx->gallivm.module, data_layout_str);
1175         LLVMDisposeTargetData(data_layout);
1176         LLVMDisposeMessage(data_layout_str);
1177
1178         bool unsafe_fpmath = (sscreen->b.debug_flags & DBG_UNSAFE_MATH) != 0;
1179         enum lp_float_mode float_mode =
1180                 unsafe_fpmath ? LP_FLOAT_MODE_UNSAFE_FP_MATH :
1181                                 LP_FLOAT_MODE_NO_SIGNED_ZEROS_FP_MATH;
1182
1183         ctx->gallivm.builder = lp_create_builder(ctx->gallivm.context,
1184                                                  float_mode);
1185
1186         ac_llvm_context_init(&ctx->ac, ctx->gallivm.context, sscreen->b.chip_class);
1187         ctx->ac.module = ctx->gallivm.module;
1188         ctx->ac.builder = ctx->gallivm.builder;
1189
1190         struct lp_build_tgsi_context *bld_base = &ctx->bld_base;
1191
1192         type.floating = true;
1193         type.fixed = false;
1194         type.sign = true;
1195         type.norm = false;
1196         type.width = 32;
1197         type.length = 1;
1198
1199         lp_build_context_init(&bld_base->base, &ctx->gallivm, type);
1200         lp_build_context_init(&ctx->bld_base.uint_bld, &ctx->gallivm, lp_uint_type(type));
1201         lp_build_context_init(&ctx->bld_base.int_bld, &ctx->gallivm, lp_int_type(type));
1202         type.width *= 2;
1203         lp_build_context_init(&ctx->bld_base.dbl_bld, &ctx->gallivm, type);
1204         lp_build_context_init(&ctx->bld_base.uint64_bld, &ctx->gallivm, lp_uint_type(type));
1205         lp_build_context_init(&ctx->bld_base.int64_bld, &ctx->gallivm, lp_int_type(type));
1206
1207         bld_base->soa = 1;
1208         bld_base->emit_swizzle = emit_swizzle;
1209         bld_base->emit_declaration = emit_declaration;
1210         bld_base->emit_immediate = emit_immediate;
1211
1212         /* metadata allowing 2.5 ULP */
1213         ctx->fpmath_md_kind = LLVMGetMDKindIDInContext(ctx->gallivm.context,
1214                                                        "fpmath", 6);
1215         LLVMValueRef arg = lp_build_const_float(&ctx->gallivm, 2.5);
1216         ctx->fpmath_md_2p5_ulp = LLVMMDNodeInContext(ctx->gallivm.context,
1217                                                      &arg, 1);
1218
1219         bld_base->op_actions[TGSI_OPCODE_BGNLOOP].emit = bgnloop_emit;
1220         bld_base->op_actions[TGSI_OPCODE_BRK].emit = brk_emit;
1221         bld_base->op_actions[TGSI_OPCODE_CONT].emit = cont_emit;
1222         bld_base->op_actions[TGSI_OPCODE_IF].emit = if_emit;
1223         bld_base->op_actions[TGSI_OPCODE_UIF].emit = uif_emit;
1224         bld_base->op_actions[TGSI_OPCODE_ELSE].emit = else_emit;
1225         bld_base->op_actions[TGSI_OPCODE_ENDIF].emit = endif_emit;
1226         bld_base->op_actions[TGSI_OPCODE_ENDLOOP].emit = endloop_emit;
1227
1228         si_shader_context_init_alu(&ctx->bld_base);
1229         si_shader_context_init_mem(ctx);
1230
1231         ctx->voidt = LLVMVoidTypeInContext(ctx->gallivm.context);
1232         ctx->i1 = LLVMInt1TypeInContext(ctx->gallivm.context);
1233         ctx->i8 = LLVMInt8TypeInContext(ctx->gallivm.context);
1234         ctx->i32 = LLVMInt32TypeInContext(ctx->gallivm.context);
1235         ctx->i64 = LLVMInt64TypeInContext(ctx->gallivm.context);
1236         ctx->i128 = LLVMIntTypeInContext(ctx->gallivm.context, 128);
1237         ctx->f32 = LLVMFloatTypeInContext(ctx->gallivm.context);
1238         ctx->v2i32 = LLVMVectorType(ctx->i32, 2);
1239         ctx->v4i32 = LLVMVectorType(ctx->i32, 4);
1240         ctx->v4f32 = LLVMVectorType(ctx->f32, 4);
1241         ctx->v8i32 = LLVMVectorType(ctx->i32, 8);
1242
1243         ctx->i32_0 = LLVMConstInt(ctx->i32, 0, 0);
1244         ctx->i32_1 = LLVMConstInt(ctx->i32, 1, 0);
1245 }
1246
1247 /* Set the context to a certain TGSI shader. Can be called repeatedly
1248  * to change the shader. */
1249 void si_llvm_context_set_tgsi(struct si_shader_context *ctx,
1250                               struct si_shader *shader)
1251 {
1252         const struct tgsi_shader_info *info = NULL;
1253         const struct tgsi_token *tokens = NULL;
1254
1255         if (shader && shader->selector) {
1256                 info = &shader->selector->info;
1257                 tokens = shader->selector->tokens;
1258         }
1259
1260         ctx->shader = shader;
1261         ctx->type = info ? info->processor : -1;
1262         ctx->bld_base.info = info;
1263
1264         /* Clean up the old contents. */
1265         FREE(ctx->temp_arrays);
1266         ctx->temp_arrays = NULL;
1267         FREE(ctx->temp_array_allocas);
1268         ctx->temp_array_allocas = NULL;
1269
1270         FREE(ctx->imms);
1271         ctx->imms = NULL;
1272         ctx->imms_num = 0;
1273
1274         FREE(ctx->temps);
1275         ctx->temps = NULL;
1276         ctx->temps_count = 0;
1277
1278         if (!info || !tokens)
1279                 return;
1280
1281         if (info->array_max[TGSI_FILE_TEMPORARY] > 0) {
1282                 int size = info->array_max[TGSI_FILE_TEMPORARY];
1283
1284                 ctx->temp_arrays = CALLOC(size, sizeof(ctx->temp_arrays[0]));
1285                 ctx->temp_array_allocas = CALLOC(size, sizeof(ctx->temp_array_allocas[0]));
1286
1287                 tgsi_scan_arrays(tokens, TGSI_FILE_TEMPORARY, size,
1288                                  ctx->temp_arrays);
1289         }
1290         if (info->file_max[TGSI_FILE_IMMEDIATE] >= 0) {
1291                 int size = info->file_max[TGSI_FILE_IMMEDIATE] + 1;
1292                 ctx->imms = MALLOC(size * TGSI_NUM_CHANNELS * sizeof(LLVMValueRef));
1293         }
1294
1295         /* Re-set these to start with a clean slate. */
1296         ctx->bld_base.num_instructions = 0;
1297         ctx->bld_base.pc = 0;
1298         memset(ctx->outputs, 0, sizeof(ctx->outputs));
1299
1300         ctx->bld_base.emit_store = si_llvm_emit_store;
1301         ctx->bld_base.emit_fetch_funcs[TGSI_FILE_IMMEDIATE] = si_llvm_emit_fetch;
1302         ctx->bld_base.emit_fetch_funcs[TGSI_FILE_INPUT] = si_llvm_emit_fetch;
1303         ctx->bld_base.emit_fetch_funcs[TGSI_FILE_TEMPORARY] = si_llvm_emit_fetch;
1304         ctx->bld_base.emit_fetch_funcs[TGSI_FILE_OUTPUT] = si_llvm_emit_fetch;
1305         ctx->bld_base.emit_fetch_funcs[TGSI_FILE_SYSTEM_VALUE] = fetch_system_value;
1306
1307         ctx->num_const_buffers = util_last_bit(info->const_buffers_declared);
1308         ctx->num_shader_buffers = util_last_bit(info->shader_buffers_declared);
1309         ctx->num_samplers = util_last_bit(info->samplers_declared);
1310         ctx->num_images = util_last_bit(info->images_declared);
1311 }
1312
1313 void si_llvm_create_func(struct si_shader_context *ctx,
1314                          const char *name,
1315                          LLVMTypeRef *return_types, unsigned num_return_elems,
1316                          LLVMTypeRef *ParamTypes, unsigned ParamCount)
1317 {
1318         LLVMTypeRef main_fn_type, ret_type;
1319         LLVMBasicBlockRef main_fn_body;
1320         enum si_llvm_calling_convention call_conv;
1321         unsigned real_shader_type;
1322
1323         if (num_return_elems)
1324                 ret_type = LLVMStructTypeInContext(ctx->gallivm.context,
1325                                                    return_types,
1326                                                    num_return_elems, true);
1327         else
1328                 ret_type = LLVMVoidTypeInContext(ctx->gallivm.context);
1329
1330         /* Setup the function */
1331         ctx->return_type = ret_type;
1332         main_fn_type = LLVMFunctionType(ret_type, ParamTypes, ParamCount, 0);
1333         ctx->main_fn = LLVMAddFunction(ctx->gallivm.module, name, main_fn_type);
1334         main_fn_body = LLVMAppendBasicBlockInContext(ctx->gallivm.context,
1335                         ctx->main_fn, "main_body");
1336         LLVMPositionBuilderAtEnd(ctx->gallivm.builder, main_fn_body);
1337
1338         real_shader_type = ctx->type;
1339
1340         /* LS is merged into HS (TCS), and ES is merged into GS. */
1341         if (ctx->screen->b.chip_class >= GFX9) {
1342                 if (ctx->shader->key.as_ls)
1343                         real_shader_type = PIPE_SHADER_TESS_CTRL;
1344                 else if (ctx->shader->key.as_es)
1345                         real_shader_type = PIPE_SHADER_GEOMETRY;
1346         }
1347
1348         switch (real_shader_type) {
1349         case PIPE_SHADER_VERTEX:
1350         case PIPE_SHADER_TESS_EVAL:
1351                 call_conv = RADEON_LLVM_AMDGPU_VS;
1352                 break;
1353         case PIPE_SHADER_TESS_CTRL:
1354                 call_conv = HAVE_LLVM >= 0x0500 ? RADEON_LLVM_AMDGPU_HS :
1355                                                   RADEON_LLVM_AMDGPU_VS;
1356                 break;
1357         case PIPE_SHADER_GEOMETRY:
1358                 call_conv = RADEON_LLVM_AMDGPU_GS;
1359                 break;
1360         case PIPE_SHADER_FRAGMENT:
1361                 call_conv = RADEON_LLVM_AMDGPU_PS;
1362                 break;
1363         case PIPE_SHADER_COMPUTE:
1364                 call_conv = RADEON_LLVM_AMDGPU_CS;
1365                 break;
1366         default:
1367                 unreachable("Unhandle shader type");
1368         }
1369
1370         LLVMSetFunctionCallConv(ctx->main_fn, call_conv);
1371 }
1372
1373 void si_llvm_optimize_module(struct si_shader_context *ctx)
1374 {
1375         struct gallivm_state *gallivm = &ctx->gallivm;
1376         const char *triple = LLVMGetTarget(gallivm->module);
1377         LLVMTargetLibraryInfoRef target_library_info;
1378
1379         /* Dump LLVM IR before any optimization passes */
1380         if (ctx->screen->b.debug_flags & DBG_PREOPT_IR &&
1381             si_can_dump_shader(&ctx->screen->b, ctx->type))
1382                 LLVMDumpModule(ctx->gallivm.module);
1383
1384         /* Create the pass manager */
1385         gallivm->passmgr = LLVMCreatePassManager();
1386
1387         target_library_info = gallivm_create_target_library_info(triple);
1388         LLVMAddTargetLibraryInfo(target_library_info, gallivm->passmgr);
1389
1390         if (si_extra_shader_checks(&ctx->screen->b, ctx->type))
1391                 LLVMAddVerifierPass(gallivm->passmgr);
1392
1393         LLVMAddAlwaysInlinerPass(gallivm->passmgr);
1394
1395         /* This pass should eliminate all the load and store instructions */
1396         LLVMAddPromoteMemoryToRegisterPass(gallivm->passmgr);
1397
1398         /* Add some optimization passes */
1399         LLVMAddScalarReplAggregatesPass(gallivm->passmgr);
1400         LLVMAddLICMPass(gallivm->passmgr);
1401         LLVMAddAggressiveDCEPass(gallivm->passmgr);
1402         LLVMAddCFGSimplificationPass(gallivm->passmgr);
1403 #if HAVE_LLVM >= 0x0400
1404         /* This is recommended by the instruction combining pass. */
1405         LLVMAddEarlyCSEMemSSAPass(gallivm->passmgr);
1406 #endif
1407         LLVMAddInstructionCombiningPass(gallivm->passmgr);
1408
1409         /* Run the pass */
1410         LLVMRunPassManager(gallivm->passmgr, ctx->gallivm.module);
1411
1412         LLVMDisposeBuilder(gallivm->builder);
1413         LLVMDisposePassManager(gallivm->passmgr);
1414         gallivm_dispose_target_library_info(target_library_info);
1415 }
1416
1417 void si_llvm_dispose(struct si_shader_context *ctx)
1418 {
1419         LLVMDisposeModule(ctx->gallivm.module);
1420         LLVMContextDispose(ctx->gallivm.context);
1421         FREE(ctx->temp_arrays);
1422         ctx->temp_arrays = NULL;
1423         FREE(ctx->temp_array_allocas);
1424         ctx->temp_array_allocas = NULL;
1425         FREE(ctx->temps);
1426         ctx->temps = NULL;
1427         ctx->temps_count = 0;
1428         FREE(ctx->imms);
1429         ctx->imms = NULL;
1430         ctx->imms_num = 0;
1431         FREE(ctx->flow);
1432         ctx->flow = NULL;
1433         ctx->flow_depth_max = 0;
1434 }