src/gallium/drivers/radeonsi/si_shader_tgsi_setup.c

   1 /*
   2  * Copyright 2016 Advanced Micro Devices, Inc.
   3  *
   4  * Permission is hereby granted, free of charge, to any person obtaining a
   5  * copy of this software and associated documentation files (the "Software"),
   6  * to deal in the Software without restriction, including without limitation
   7  * on the rights to use, copy, modify, merge, publish, distribute, sub
   8  * license, and/or sell copies of the Software, and to permit persons to whom
   9  * the Software is furnished to do so, subject to the following conditions:
  10  *
  11  * The above copyright notice and this permission notice (including the next
  12  * paragraph) shall be included in all copies or substantial portions of the
  13  * Software.
  14  *
  15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  17  * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
  18  * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
  19  * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
  20  * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
  21  * USE OR OTHER DEALINGS IN THE SOFTWARE.
  22  */
  23
  24 #include "si_shader_internal.h"
  25 #include "si_pipe.h"
  26
  27 #include "gallivm/lp_bld_const.h"
  28 #include "gallivm/lp_bld_gather.h"
  29 #include "gallivm/lp_bld_flow.h"
  30 #include "gallivm/lp_bld_init.h"
  31 #include "gallivm/lp_bld_intr.h"
  32 #include "gallivm/lp_bld_misc.h"
  33 #include "gallivm/lp_bld_swizzle.h"
  34 #include "tgsi/tgsi_info.h"
  35 #include "tgsi/tgsi_parse.h"
  36 #include "util/u_math.h"
  37 #include "util/u_memory.h"
  38 #include "util/u_debug.h"
  39
  40 #include <stdio.h>
  41 #include <llvm-c/Transforms/IPO.h>
  42 #include <llvm-c/Transforms/Scalar.h>
  43
  44 /* Data for if/else/endif and bgnloop/endloop control flow structures.
  45  */
  46 struct si_llvm_flow {
  47         /* Loop exit or next part of if/else/endif. */
  48         LLVMBasicBlockRef next_block;
  49         LLVMBasicBlockRef loop_entry_block;
  50 };
  51
  52 enum si_llvm_calling_convention {
  53         RADEON_LLVM_AMDGPU_VS = 87,
  54         RADEON_LLVM_AMDGPU_GS = 88,
  55         RADEON_LLVM_AMDGPU_PS = 89,
  56         RADEON_LLVM_AMDGPU_CS = 90,
  57         RADEON_LLVM_AMDGPU_HS = 93,
  58 };
  59
  60 void si_llvm_add_attribute(LLVMValueRef F, const char *name, int value)
  61 {
  62         char str[16];
  63
  64         snprintf(str, sizeof(str), "%i", value);
  65         LLVMAddTargetDependentFunctionAttr(F, name, str);
  66 }
  67
  68 struct si_llvm_diagnostics {
  69         struct pipe_debug_callback *debug;
  70         unsigned retval;
  71 };
  72
  73 static void si_diagnostic_handler(LLVMDiagnosticInfoRef di, void *context)
  74 {
  75         struct si_llvm_diagnostics *diag = (struct si_llvm_diagnostics *)context;
  76         LLVMDiagnosticSeverity severity = LLVMGetDiagInfoSeverity(di);
  77         char *description = LLVMGetDiagInfoDescription(di);
  78         const char *severity_str = NULL;
  79
  80         switch (severity) {
  81         case LLVMDSError:
  82                 severity_str = "error";
  83                 break;
  84         case LLVMDSWarning:
  85                 severity_str = "warning";
  86                 break;
  87         case LLVMDSRemark:
  88                 severity_str = "remark";
  89                 break;
  90         case LLVMDSNote:
  91                 severity_str = "note";
  92                 break;
  93         default:
  94                 severity_str = "unknown";
  95         }
  96
  97         pipe_debug_message(diag->debug, SHADER_INFO,
  98                            "LLVM diagnostic (%s): %s", severity_str, description);
  99
 100         if (severity == LLVMDSError) {
 101                 diag->retval = 1;
 102                 fprintf(stderr,"LLVM triggered Diagnostic Handler: %s\n", description);
 103         }
 104
 105         LLVMDisposeMessage(description);
 106 }
 107
 108 /**
 109  * Compile an LLVM module to machine code.
 110  *
 111  * @returns 0 for success, 1 for failure
 112  */
 113 unsigned si_llvm_compile(LLVMModuleRef M, struct ac_shader_binary *binary,
 114                          LLVMTargetMachineRef tm,
 115                          struct pipe_debug_callback *debug)
 116 {
 117         struct si_llvm_diagnostics diag;
 118         char *err;
 119         LLVMContextRef llvm_ctx;
 120         LLVMMemoryBufferRef out_buffer;
 121         unsigned buffer_size;
 122         const char *buffer_data;
 123         LLVMBool mem_err;
 124
 125         diag.debug = debug;
 126         diag.retval = 0;
 127
 128         /* Setup Diagnostic Handler*/
 129         llvm_ctx = LLVMGetModuleContext(M);
 130
 131         LLVMContextSetDiagnosticHandler(llvm_ctx, si_diagnostic_handler, &diag);
 132
 133         /* Compile IR*/
 134         mem_err = LLVMTargetMachineEmitToMemoryBuffer(tm, M, LLVMObjectFile, &err,
 135                                                                  &out_buffer);
 136
 137         /* Process Errors/Warnings */
 138         if (mem_err) {
 139                 fprintf(stderr, "%s: %s", __FUNCTION__, err);
 140                 pipe_debug_message(debug, SHADER_INFO,
 141                                    "LLVM emit error: %s", err);
 142                 FREE(err);
 143                 diag.retval = 1;
 144                 goto out;
 145         }
 146
 147         /* Extract Shader Code*/
 148         buffer_size = LLVMGetBufferSize(out_buffer);
 149         buffer_data = LLVMGetBufferStart(out_buffer);
 150
 151         if (!ac_elf_read(buffer_data, buffer_size, binary)) {
 152                 fprintf(stderr, "radeonsi: cannot read an ELF shader binary\n");
 153                 diag.retval = 1;
 154         }
 155
 156         /* Clean up */
 157         LLVMDisposeMemoryBuffer(out_buffer);
 158
 159 out:
 160         if (diag.retval != 0)
 161                 pipe_debug_message(debug, SHADER_INFO, "LLVM compile failed");
 162         return diag.retval;
 163 }
 164
 165 LLVMTypeRef tgsi2llvmtype(struct lp_build_tgsi_context *bld_base,
 166                           enum tgsi_opcode_type type)
 167 {
 168         LLVMContextRef ctx = bld_base->base.gallivm->context;
 169
 170         switch (type) {
 171         case TGSI_TYPE_UNSIGNED:
 172         case TGSI_TYPE_SIGNED:
 173                 return LLVMInt32TypeInContext(ctx);
 174         case TGSI_TYPE_UNSIGNED64:
 175         case TGSI_TYPE_SIGNED64:
 176                 return LLVMInt64TypeInContext(ctx);
 177         case TGSI_TYPE_DOUBLE:
 178                 return LLVMDoubleTypeInContext(ctx);
 179         case TGSI_TYPE_UNTYPED:
 180         case TGSI_TYPE_FLOAT:
 181                 return LLVMFloatTypeInContext(ctx);
 182         default: break;
 183         }
 184         return 0;
 185 }
 186
 187 LLVMValueRef bitcast(struct lp_build_tgsi_context *bld_base,
 188                      enum tgsi_opcode_type type, LLVMValueRef value)
 189 {
 190         LLVMBuilderRef builder = bld_base->base.gallivm->builder;
 191         LLVMTypeRef dst_type = tgsi2llvmtype(bld_base, type);
 192
 193         if (dst_type)
 194                 return LLVMBuildBitCast(builder, value, dst_type, "");
 195         else
 196                 return value;
 197 }
 198
 199 /**
 200  * Return a value that is equal to the given i32 \p index if it lies in [0,num)
 201  * or an undefined value in the same interval otherwise.
 202  */
 203 LLVMValueRef si_llvm_bound_index(struct si_shader_context *ctx,
 204                                  LLVMValueRef index,
 205                                  unsigned num)
 206 {
 207         struct gallivm_state *gallivm = &ctx->gallivm;
 208         LLVMBuilderRef builder = gallivm->builder;
 209         LLVMValueRef c_max = LLVMConstInt(ctx->i32, num - 1, 0);
 210         LLVMValueRef cc;
 211
 212         if (util_is_power_of_two(num)) {
 213                 index = LLVMBuildAnd(builder, index, c_max, "");
 214         } else {
 215                 /* In theory, this MAX pattern should result in code that is
 216                  * as good as the bit-wise AND above.
 217                  *
 218                  * In practice, LLVM generates worse code (at the time of
 219                  * writing), because its value tracking is not strong enough.
 220                  */
 221                 cc = LLVMBuildICmp(builder, LLVMIntULE, index, c_max, "");
 222                 index = LLVMBuildSelect(builder, cc, index, c_max, "");
 223         }
 224
 225         return index;
 226 }
 227
 228 static struct si_llvm_flow *
 229 get_current_flow(struct si_shader_context *ctx)
 230 {
 231         if (ctx->flow_depth > 0)
 232                 return &ctx->flow[ctx->flow_depth - 1];
 233         return NULL;
 234 }
 235
 236 static struct si_llvm_flow *
 237 get_innermost_loop(struct si_shader_context *ctx)
 238 {
 239         for (unsigned i = ctx->flow_depth; i > 0; --i) {
 240                 if (ctx->flow[i - 1].loop_entry_block)
 241                         return &ctx->flow[i - 1];
 242         }
 243         return NULL;
 244 }
 245
 246 static struct si_llvm_flow *
 247 push_flow(struct si_shader_context *ctx)
 248 {
 249         struct si_llvm_flow *flow;
 250
 251         if (ctx->flow_depth >= ctx->flow_depth_max) {
 252                 unsigned new_max = MAX2(ctx->flow_depth << 1, RADEON_LLVM_INITIAL_CF_DEPTH);
 253                 ctx->flow = REALLOC(ctx->flow,
 254                                     ctx->flow_depth_max * sizeof(*ctx->flow),
 255                                     new_max * sizeof(*ctx->flow));
 256                 ctx->flow_depth_max = new_max;
 257         }
 258
 259         flow = &ctx->flow[ctx->flow_depth];
 260         ctx->flow_depth++;
 261
 262         flow->next_block = NULL;
 263         flow->loop_entry_block = NULL;
 264         return flow;
 265 }
 266
 267 static LLVMValueRef emit_swizzle(struct lp_build_tgsi_context *bld_base,
 268                                  LLVMValueRef value,
 269                                  unsigned swizzle_x,
 270                                  unsigned swizzle_y,
 271                                  unsigned swizzle_z,
 272                                  unsigned swizzle_w)
 273 {
 274         LLVMValueRef swizzles[4];
 275         LLVMTypeRef i32t =
 276                 LLVMInt32TypeInContext(bld_base->base.gallivm->context);
 277
 278         swizzles[0] = LLVMConstInt(i32t, swizzle_x, 0);
 279         swizzles[1] = LLVMConstInt(i32t, swizzle_y, 0);
 280         swizzles[2] = LLVMConstInt(i32t, swizzle_z, 0);
 281         swizzles[3] = LLVMConstInt(i32t, swizzle_w, 0);
 282
 283         return LLVMBuildShuffleVector(bld_base->base.gallivm->builder,
 284                                       value,
 285                                       LLVMGetUndef(LLVMTypeOf(value)),
 286                                       LLVMConstVector(swizzles, 4), "");
 287 }
 288
 289 /**
 290  * Return the description of the array covering the given temporary register
 291  * index.
 292  */
 293 static unsigned
 294 get_temp_array_id(struct lp_build_tgsi_context *bld_base,
 295                   unsigned reg_index,
 296                   const struct tgsi_ind_register *reg)
 297 {
 298         struct si_shader_context *ctx = si_shader_context(bld_base);
 299         unsigned num_arrays = ctx->bld_base.info->array_max[TGSI_FILE_TEMPORARY];
 300         unsigned i;
 301
 302         if (reg && reg->ArrayID > 0 && reg->ArrayID <= num_arrays)
 303                 return reg->ArrayID;
 304
 305         for (i = 0; i < num_arrays; i++) {
 306                 const struct tgsi_array_info *array = &ctx->temp_arrays[i];
 307
 308                 if (reg_index >= array->range.First && reg_index <= array->range.Last)
 309                         return i + 1;
 310         }
 311
 312         return 0;
 313 }
 314
 315 static struct tgsi_declaration_range
 316 get_array_range(struct lp_build_tgsi_context *bld_base,
 317                 unsigned File, unsigned reg_index,
 318                 const struct tgsi_ind_register *reg)
 319 {
 320         struct si_shader_context *ctx = si_shader_context(bld_base);
 321         struct tgsi_declaration_range range;
 322
 323         if (File == TGSI_FILE_TEMPORARY) {
 324                 unsigned array_id = get_temp_array_id(bld_base, reg_index, reg);
 325                 if (array_id)
 326                         return ctx->temp_arrays[array_id - 1].range;
 327         }
 328
 329         range.First = 0;
 330         range.Last = bld_base->info->file_max[File];
 331         return range;
 332 }
 333
 334 /**
 335  * For indirect registers, construct a pointer directly to the requested
 336  * element using getelementptr if possible.
 337  *
 338  * Returns NULL if the insertelement/extractelement fallback for array access
 339  * must be used.
 340  */
 341 static LLVMValueRef
 342 get_pointer_into_array(struct si_shader_context *ctx,
 343                        unsigned file,
 344                        unsigned swizzle,
 345                        unsigned reg_index,
 346                        const struct tgsi_ind_register *reg_indirect)
 347 {
 348         unsigned array_id;
 349         struct tgsi_array_info *array;
 350         struct gallivm_state *gallivm = &ctx->gallivm;
 351         LLVMBuilderRef builder = gallivm->builder;
 352         LLVMValueRef idxs[2];
 353         LLVMValueRef index;
 354         LLVMValueRef alloca;
 355
 356         if (file != TGSI_FILE_TEMPORARY)
 357                 return NULL;
 358
 359         array_id = get_temp_array_id(&ctx->bld_base, reg_index, reg_indirect);
 360         if (!array_id)
 361                 return NULL;
 362
 363         alloca = ctx->temp_array_allocas[array_id - 1];
 364         if (!alloca)
 365                 return NULL;
 366
 367         array = &ctx->temp_arrays[array_id - 1];
 368
 369         if (!(array->writemask & (1 << swizzle)))
 370                 return ctx->undef_alloca;
 371
 372         index = si_get_indirect_index(ctx, reg_indirect, 1,
 373                                       reg_index - ctx->temp_arrays[array_id - 1].range.First);
 374
 375         /* Ensure that the index is within a valid range, to guard against
 376          * VM faults and overwriting critical data (e.g. spilled resource
 377          * descriptors).
 378          *
 379          * TODO It should be possible to avoid the additional instructions
 380          * if LLVM is changed so that it guarantuees:
 381          * 1. the scratch space descriptor isolates the current wave (this
 382          *    could even save the scratch offset SGPR at the cost of an
 383          *    additional SALU instruction)
 384          * 2. the memory for allocas must be allocated at the _end_ of the
 385          *    scratch space (after spilled registers)
 386          */
 387         index = si_llvm_bound_index(ctx, index, array->range.Last - array->range.First + 1);
 388
 389         index = LLVMBuildMul(
 390                 builder, index,
 391                 LLVMConstInt(ctx->i32, util_bitcount(array->writemask), 0),
 392                 "");
 393         index = LLVMBuildAdd(
 394                 builder, index,
 395                 LLVMConstInt(ctx->i32,
 396                              util_bitcount(array->writemask & ((1 << swizzle) - 1)), 0),
 397                 "");
 398         idxs[0] = ctx->i32_0;
 399         idxs[1] = index;
 400         return LLVMBuildGEP(builder, alloca, idxs, 2, "");
 401 }
 402
 403 LLVMValueRef
 404 si_llvm_emit_fetch_64bit(struct lp_build_tgsi_context *bld_base,
 405                          enum tgsi_opcode_type type,
 406                          LLVMValueRef ptr,
 407                          LLVMValueRef ptr2)
 408 {
 409         struct si_shader_context *ctx = si_shader_context(bld_base);
 410         LLVMBuilderRef builder = bld_base->base.gallivm->builder;
 411         LLVMValueRef result;
 412
 413         result = LLVMGetUndef(LLVMVectorType(LLVMIntTypeInContext(bld_base->base.gallivm->context, 32), bld_base->base.type.length * 2));
 414
 415         result = LLVMBuildInsertElement(builder,
 416                                         result,
 417                                         ac_to_integer(&ctx->ac, ptr),
 418                                         bld_base->int_bld.zero, "");
 419         result = LLVMBuildInsertElement(builder,
 420                                         result,
 421                                         ac_to_integer(&ctx->ac, ptr2),
 422                                         bld_base->int_bld.one, "");
 423         return bitcast(bld_base, type, result);
 424 }
 425
 426 static LLVMValueRef
 427 emit_array_fetch(struct lp_build_tgsi_context *bld_base,
 428                  unsigned File, enum tgsi_opcode_type type,
 429                  struct tgsi_declaration_range range,
 430                  unsigned swizzle)
 431 {
 432         struct si_shader_context *ctx = si_shader_context(bld_base);
 433
 434         LLVMBuilderRef builder = ctx->gallivm.builder;
 435
 436         unsigned i, size = range.Last - range.First + 1;
 437         LLVMTypeRef vec = LLVMVectorType(tgsi2llvmtype(bld_base, type), size);
 438         LLVMValueRef result = LLVMGetUndef(vec);
 439
 440         struct tgsi_full_src_register tmp_reg = {};
 441         tmp_reg.Register.File = File;
 442
 443         for (i = 0; i < size; ++i) {
 444                 tmp_reg.Register.Index = i + range.First;
 445                 LLVMValueRef temp = si_llvm_emit_fetch(bld_base, &tmp_reg, type, swizzle);
 446                 result = LLVMBuildInsertElement(builder, result, temp,
 447                         LLVMConstInt(ctx->i32, i, 0), "array_vector");
 448         }
 449         return result;
 450 }
 451
 452 static LLVMValueRef
 453 load_value_from_array(struct lp_build_tgsi_context *bld_base,
 454                       unsigned file,
 455                       enum tgsi_opcode_type type,
 456                       unsigned swizzle,
 457                       unsigned reg_index,
 458                       const struct tgsi_ind_register *reg_indirect)
 459 {
 460         struct si_shader_context *ctx = si_shader_context(bld_base);
 461         struct gallivm_state *gallivm = &ctx->gallivm;
 462         LLVMBuilderRef builder = gallivm->builder;
 463         LLVMValueRef ptr;
 464
 465         ptr = get_pointer_into_array(ctx, file, swizzle, reg_index, reg_indirect);
 466         if (ptr) {
 467                 LLVMValueRef val = LLVMBuildLoad(builder, ptr, "");
 468                 if (tgsi_type_is_64bit(type)) {
 469                         LLVMValueRef ptr_hi, val_hi;
 470                         ptr_hi = LLVMBuildGEP(builder, ptr, &ctx->i32_1, 1, "");
 471                         val_hi = LLVMBuildLoad(builder, ptr_hi, "");
 472                         val = si_llvm_emit_fetch_64bit(bld_base, type, val, val_hi);
 473                 }
 474
 475                 return val;
 476         } else {
 477                 struct tgsi_declaration_range range =
 478                         get_array_range(bld_base, file, reg_index, reg_indirect);
 479                 LLVMValueRef index =
 480                         si_get_indirect_index(ctx, reg_indirect, 1, reg_index - range.First);
 481                 LLVMValueRef array =
 482                         emit_array_fetch(bld_base, file, type, range, swizzle);
 483                 return LLVMBuildExtractElement(builder, array, index, "");
 484         }
 485 }
 486
 487 static void
 488 store_value_to_array(struct lp_build_tgsi_context *bld_base,
 489                      LLVMValueRef value,
 490                      unsigned file,
 491                      unsigned chan_index,
 492                      unsigned reg_index,
 493                      const struct tgsi_ind_register *reg_indirect)
 494 {
 495         struct si_shader_context *ctx = si_shader_context(bld_base);
 496         struct gallivm_state *gallivm = &ctx->gallivm;
 497         LLVMBuilderRef builder = gallivm->builder;
 498         LLVMValueRef ptr;
 499
 500         ptr = get_pointer_into_array(ctx, file, chan_index, reg_index, reg_indirect);
 501         if (ptr) {
 502                 LLVMBuildStore(builder, value, ptr);
 503         } else {
 504                 unsigned i, size;
 505                 struct tgsi_declaration_range range = get_array_range(bld_base, file, reg_index, reg_indirect);
 506                 LLVMValueRef index = si_get_indirect_index(ctx, reg_indirect, 1, reg_index - range.First);
 507                 LLVMValueRef array =
 508                         emit_array_fetch(bld_base, file, TGSI_TYPE_FLOAT, range, chan_index);
 509                 LLVMValueRef temp_ptr;
 510
 511                 array = LLVMBuildInsertElement(builder, array, value, index, "");
 512
 513                 size = range.Last - range.First + 1;
 514                 for (i = 0; i < size; ++i) {
 515                         switch(file) {
 516                         case TGSI_FILE_OUTPUT:
 517                                 temp_ptr = ctx->outputs[i + range.First][chan_index];
 518                                 break;
 519
 520                         case TGSI_FILE_TEMPORARY:
 521                                 if (range.First + i >= ctx->temps_count)
 522                                         continue;
 523                                 temp_ptr = ctx->temps[(i + range.First) * TGSI_NUM_CHANNELS + chan_index];
 524                                 break;
 525
 526                         default:
 527                                 continue;
 528                         }
 529                         value = LLVMBuildExtractElement(builder, array,
 530                                 LLVMConstInt(ctx->i32, i, 0), "");
 531                         LLVMBuildStore(builder, value, temp_ptr);
 532                 }
 533         }
 534 }
 535
 536 /* If this is true, preload FS inputs at the beginning of shaders. Otherwise,
 537  * reload them at each use. This must be true if the shader is using
 538  * derivatives and KILL, because KILL can leave the WQM and then a lazy
 539  * input load isn't in the WQM anymore.
 540  */
 541 static bool si_preload_fs_inputs(struct si_shader_context *ctx)
 542 {
 543         struct si_shader_selector *sel = ctx->shader->selector;
 544
 545         return sel->info.uses_derivatives &&
 546                sel->info.uses_kill;
 547 }
 548
 549 static LLVMValueRef
 550 get_output_ptr(struct lp_build_tgsi_context *bld_base, unsigned index,
 551                unsigned chan)
 552 {
 553         struct si_shader_context *ctx = si_shader_context(bld_base);
 554
 555         assert(index <= ctx->bld_base.info->file_max[TGSI_FILE_OUTPUT]);
 556         return ctx->outputs[index][chan];
 557 }
 558
 559 LLVMValueRef si_llvm_emit_fetch(struct lp_build_tgsi_context *bld_base,
 560                                 const struct tgsi_full_src_register *reg,
 561                                 enum tgsi_opcode_type type,
 562                                 unsigned swizzle)
 563 {
 564         struct si_shader_context *ctx = si_shader_context(bld_base);
 565         LLVMBuilderRef builder = ctx->gallivm.builder;
 566         LLVMValueRef result = NULL, ptr, ptr2;
 567
 568         if (swizzle == ~0) {
 569                 LLVMValueRef values[TGSI_NUM_CHANNELS];
 570                 unsigned chan;
 571                 for (chan = 0; chan < TGSI_NUM_CHANNELS; chan++) {
 572                         values[chan] = si_llvm_emit_fetch(bld_base, reg, type, chan);
 573                 }
 574                 return lp_build_gather_values(&ctx->gallivm, values,
 575                                               TGSI_NUM_CHANNELS);
 576         }
 577
 578         if (reg->Register.Indirect) {
 579                 LLVMValueRef load = load_value_from_array(bld_base, reg->Register.File, type,
 580                                 swizzle, reg->Register.Index, &reg->Indirect);
 581                 return bitcast(bld_base, type, load);
 582         }
 583
 584         switch(reg->Register.File) {
 585         case TGSI_FILE_IMMEDIATE: {
 586                 LLVMTypeRef ctype = tgsi2llvmtype(bld_base, type);
 587                 if (tgsi_type_is_64bit(type)) {
 588                         result = LLVMGetUndef(LLVMVectorType(ctx->i32, bld_base->base.type.length * 2));
 589                         result = LLVMConstInsertElement(result,
 590                                                         ctx->imms[reg->Register.Index * TGSI_NUM_CHANNELS + swizzle],
 591                                                         ctx->i32_0);
 592                         result = LLVMConstInsertElement(result,
 593                                                         ctx->imms[reg->Register.Index * TGSI_NUM_CHANNELS + swizzle + 1],
 594                                                         ctx->i32_1);
 595                         return LLVMConstBitCast(result, ctype);
 596                 } else {
 597                         return LLVMConstBitCast(ctx->imms[reg->Register.Index * TGSI_NUM_CHANNELS + swizzle], ctype);
 598                 }
 599         }
 600
 601         case TGSI_FILE_INPUT: {
 602                 unsigned index = reg->Register.Index;
 603                 LLVMValueRef input[4];
 604
 605                 /* I don't think doing this for vertex shaders is beneficial.
 606                  * For those, we want to make sure the VMEM loads are executed
 607                  * only once. Fragment shaders don't care much, because
 608                  * v_interp instructions are much cheaper than VMEM loads.
 609                  */
 610                 if (!si_preload_fs_inputs(ctx) &&
 611                     ctx->bld_base.info->processor == PIPE_SHADER_FRAGMENT)
 612                         ctx->load_input(ctx, index, &ctx->input_decls[index], input);
 613                 else
 614                         memcpy(input, &ctx->inputs[index * 4], sizeof(input));
 615
 616                 result = input[swizzle];
 617
 618                 if (tgsi_type_is_64bit(type)) {
 619                         ptr = result;
 620                         ptr2 = input[swizzle + 1];
 621                         return si_llvm_emit_fetch_64bit(bld_base, type, ptr, ptr2);
 622                 }
 623                 break;
 624         }
 625
 626         case TGSI_FILE_TEMPORARY:
 627                 if (reg->Register.Index >= ctx->temps_count)
 628                         return LLVMGetUndef(tgsi2llvmtype(bld_base, type));
 629                 ptr = ctx->temps[reg->Register.Index * TGSI_NUM_CHANNELS + swizzle];
 630                 if (tgsi_type_is_64bit(type)) {
 631                         ptr2 = ctx->temps[reg->Register.Index * TGSI_NUM_CHANNELS + swizzle + 1];
 632                         return si_llvm_emit_fetch_64bit(bld_base, type,
 633                                                         LLVMBuildLoad(builder, ptr, ""),
 634                                                         LLVMBuildLoad(builder, ptr2, ""));
 635                 }
 636                 result = LLVMBuildLoad(builder, ptr, "");
 637                 break;
 638
 639         case TGSI_FILE_OUTPUT:
 640                 ptr = get_output_ptr(bld_base, reg->Register.Index, swizzle);
 641                 if (tgsi_type_is_64bit(type)) {
 642                         ptr2 = get_output_ptr(bld_base, reg->Register.Index, swizzle + 1);
 643                         return si_llvm_emit_fetch_64bit(bld_base, type,
 644                                                         LLVMBuildLoad(builder, ptr, ""),
 645                                                         LLVMBuildLoad(builder, ptr2, ""));
 646                 }
 647                 result = LLVMBuildLoad(builder, ptr, "");
 648                 break;
 649
 650         default:
 651                 return LLVMGetUndef(tgsi2llvmtype(bld_base, type));
 652         }
 653
 654         return bitcast(bld_base, type, result);
 655 }
 656
 657 static LLVMValueRef fetch_system_value(struct lp_build_tgsi_context *bld_base,
 658                                        const struct tgsi_full_src_register *reg,
 659                                        enum tgsi_opcode_type type,
 660                                        unsigned swizzle)
 661 {
 662         struct si_shader_context *ctx = si_shader_context(bld_base);
 663         LLVMBuilderRef builder = ctx->gallivm.builder;
 664         LLVMValueRef cval = ctx->system_values[reg->Register.Index];
 665
 666         if (tgsi_type_is_64bit(type)) {
 667                 LLVMValueRef lo, hi;
 668
 669                 assert(swizzle == 0 || swizzle == 2);
 670
 671                 lo = LLVMBuildExtractElement(
 672                         builder, cval, LLVMConstInt(ctx->i32, swizzle, 0), "");
 673                 hi = LLVMBuildExtractElement(
 674                         builder, cval, LLVMConstInt(ctx->i32, swizzle + 1, 0), "");
 675
 676                 return si_llvm_emit_fetch_64bit(bld_base, type, lo, hi);
 677         }
 678
 679         if (LLVMGetTypeKind(LLVMTypeOf(cval)) == LLVMVectorTypeKind) {
 680                 cval = LLVMBuildExtractElement(
 681                         builder, cval, LLVMConstInt(ctx->i32, swizzle, 0), "");
 682         } else {
 683                 assert(swizzle == 0);
 684         }
 685
 686         return bitcast(bld_base, type, cval);
 687 }
 688
 689 static void emit_declaration(struct lp_build_tgsi_context *bld_base,
 690                              const struct tgsi_full_declaration *decl)
 691 {
 692         struct si_shader_context *ctx = si_shader_context(bld_base);
 693         LLVMBuilderRef builder = ctx->gallivm.builder;
 694         unsigned first, last, i;
 695         switch(decl->Declaration.File) {
 696         case TGSI_FILE_ADDRESS:
 697         {
 698                  unsigned idx;
 699                 for (idx = decl->Range.First; idx <= decl->Range.Last; idx++) {
 700                         unsigned chan;
 701                         for (chan = 0; chan < TGSI_NUM_CHANNELS; chan++) {
 702                                  ctx->addrs[idx][chan] = lp_build_alloca_undef(
 703                                         &ctx->gallivm,
 704                                         ctx->i32, "");
 705                         }
 706                 }
 707                 break;
 708         }
 709
 710         case TGSI_FILE_TEMPORARY:
 711         {
 712                 char name[16] = "";
 713                 LLVMValueRef array_alloca = NULL;
 714                 unsigned decl_size;
 715                 unsigned writemask = decl->Declaration.UsageMask;
 716                 first = decl->Range.First;
 717                 last = decl->Range.Last;
 718                 decl_size = 4 * ((last - first) + 1);
 719
 720                 if (decl->Declaration.Array) {
 721                         unsigned id = decl->Array.ArrayID - 1;
 722                         unsigned array_size;
 723
 724                         writemask &= ctx->temp_arrays[id].writemask;
 725                         ctx->temp_arrays[id].writemask = writemask;
 726                         array_size = ((last - first) + 1) * util_bitcount(writemask);
 727
 728                         /* If the array has more than 16 elements, store it
 729                          * in memory using an alloca that spans the entire
 730                          * array.
 731                          *
 732                          * Otherwise, store each array element individually.
 733                          * We will then generate vectors (per-channel, up to
 734                          * <16 x float> if the usagemask is a single bit) for
 735                          * indirect addressing.
 736                          *
 737                          * Note that 16 is the number of vector elements that
 738                          * LLVM will store in a register, so theoretically an
 739                          * array with up to 4 * 16 = 64 elements could be
 740                          * handled this way, but whether that's a good idea
 741                          * depends on VGPR register pressure elsewhere.
 742                          *
 743                          * FIXME: We shouldn't need to have the non-alloca
 744                          * code path for arrays. LLVM should be smart enough to
 745                          * promote allocas into registers when profitable.
 746                          */
 747                         if (array_size > 16 ||
 748                             !ctx->screen->llvm_has_working_vgpr_indexing) {
 749                                 array_alloca = lp_build_alloca_undef(&ctx->gallivm,
 750                                         LLVMArrayType(ctx->f32,
 751                                                       array_size), "array");
 752                                 ctx->temp_array_allocas[id] = array_alloca;
 753                         }
 754                 }
 755
 756                 if (!ctx->temps_count) {
 757                         ctx->temps_count = bld_base->info->file_max[TGSI_FILE_TEMPORARY] + 1;
 758                         ctx->temps = MALLOC(TGSI_NUM_CHANNELS * ctx->temps_count * sizeof(LLVMValueRef));
 759                 }
 760                 if (!array_alloca) {
 761                         for (i = 0; i < decl_size; ++i) {
 762 #ifdef DEBUG
 763                                 snprintf(name, sizeof(name), "TEMP%d.%c",
 764                                          first + i / 4, "xyzw"[i % 4]);
 765 #endif
 766                                 ctx->temps[first * TGSI_NUM_CHANNELS + i] =
 767                                         lp_build_alloca_undef(&ctx->gallivm,
 768                                                               ctx->f32,
 769                                                               name);
 770                         }
 771                 } else {
 772                         LLVMValueRef idxs[2] = {
 773                                 ctx->i32_0,
 774                                 NULL
 775                         };
 776                         unsigned j = 0;
 777
 778                         if (writemask != TGSI_WRITEMASK_XYZW &&
 779                             !ctx->undef_alloca) {
 780                                 /* Create a dummy alloca. We use it so that we
 781                                  * have a pointer that is safe to load from if
 782                                  * a shader ever reads from a channel that
 783                                  * it never writes to.
 784                                  */
 785                                 ctx->undef_alloca = lp_build_alloca_undef(
 786                                         &ctx->gallivm,
 787                                         ctx->f32, "undef");
 788                         }
 789
 790                         for (i = 0; i < decl_size; ++i) {
 791                                 LLVMValueRef ptr;
 792                                 if (writemask & (1 << (i % 4))) {
 793 #ifdef DEBUG
 794                                         snprintf(name, sizeof(name), "TEMP%d.%c",
 795                                                  first + i / 4, "xyzw"[i % 4]);
 796 #endif
 797                                         idxs[1] = LLVMConstInt(ctx->i32, j, 0);
 798                                         ptr = LLVMBuildGEP(builder, array_alloca, idxs, 2, name);
 799                                         j++;
 800                                 } else {
 801                                         ptr = ctx->undef_alloca;
 802                                 }
 803                                 ctx->temps[first * TGSI_NUM_CHANNELS + i] = ptr;
 804                         }
 805                 }
 806                 break;
 807         }
 808         case TGSI_FILE_INPUT:
 809         {
 810                 unsigned idx;
 811                 for (idx = decl->Range.First; idx <= decl->Range.Last; idx++) {
 812                         if (ctx->load_input &&
 813                             ctx->input_decls[idx].Declaration.File != TGSI_FILE_INPUT) {
 814                                 ctx->input_decls[idx] = *decl;
 815                                 ctx->input_decls[idx].Range.First = idx;
 816                                 ctx->input_decls[idx].Range.Last = idx;
 817                                 ctx->input_decls[idx].Semantic.Index += idx - decl->Range.First;
 818
 819                                 if (si_preload_fs_inputs(ctx) ||
 820                                     bld_base->info->processor != PIPE_SHADER_FRAGMENT)
 821                                         ctx->load_input(ctx, idx, &ctx->input_decls[idx],
 822                                                         &ctx->inputs[idx * 4]);
 823                         }
 824                 }
 825         }
 826         break;
 827
 828         case TGSI_FILE_SYSTEM_VALUE:
 829         {
 830                 unsigned idx;
 831                 for (idx = decl->Range.First; idx <= decl->Range.Last; idx++) {
 832                         si_load_system_value(ctx, idx, decl);
 833                 }
 834         }
 835         break;
 836
 837         case TGSI_FILE_OUTPUT:
 838         {
 839                 char name[16] = "";
 840                 unsigned idx;
 841                 for (idx = decl->Range.First; idx <= decl->Range.Last; idx++) {
 842                         unsigned chan;
 843                         assert(idx < RADEON_LLVM_MAX_OUTPUTS);
 844                         if (ctx->outputs[idx][0])
 845                                 continue;
 846                         for (chan = 0; chan < TGSI_NUM_CHANNELS; chan++) {
 847 #ifdef DEBUG
 848                                 snprintf(name, sizeof(name), "OUT%d.%c",
 849                                          idx, "xyzw"[chan % 4]);
 850 #endif
 851                                 ctx->outputs[idx][chan] = lp_build_alloca_undef(
 852                                         &ctx->gallivm,
 853                                         ctx->f32, name);
 854                         }
 855                 }
 856                 break;
 857         }
 858
 859         case TGSI_FILE_MEMORY:
 860                 si_declare_compute_memory(ctx, decl);
 861                 break;
 862
 863         default:
 864                 break;
 865         }
 866 }
 867
 868 void si_llvm_emit_store(struct lp_build_tgsi_context *bld_base,
 869                         const struct tgsi_full_instruction *inst,
 870                         const struct tgsi_opcode_info *info,
 871                         unsigned index,
 872                         LLVMValueRef dst[4])
 873 {
 874         struct si_shader_context *ctx = si_shader_context(bld_base);
 875         struct gallivm_state *gallivm = &ctx->gallivm;
 876         const struct tgsi_full_dst_register *reg = &inst->Dst[index];
 877         LLVMBuilderRef builder = ctx->gallivm.builder;
 878         LLVMValueRef temp_ptr, temp_ptr2 = NULL;
 879         bool is_vec_store = false;
 880         enum tgsi_opcode_type dtype = tgsi_opcode_infer_dst_type(inst->Instruction.Opcode, index);
 881
 882         if (dst[0]) {
 883                 LLVMTypeKind k = LLVMGetTypeKind(LLVMTypeOf(dst[0]));
 884                 is_vec_store = (k == LLVMVectorTypeKind);
 885         }
 886
 887         if (is_vec_store) {
 888                 LLVMValueRef values[4] = {};
 889                 uint32_t writemask = reg->Register.WriteMask;
 890                 while (writemask) {
 891                         unsigned chan = u_bit_scan(&writemask);
 892                         LLVMValueRef index = LLVMConstInt(ctx->i32, chan, 0);
 893                         values[chan]  = LLVMBuildExtractElement(gallivm->builder,
 894                                                         dst[0], index, "");
 895                 }
 896                 bld_base->emit_store(bld_base, inst, info, index, values);
 897                 return;
 898         }
 899
 900         uint32_t writemask = reg->Register.WriteMask;
 901         while (writemask) {
 902                 unsigned chan_index = u_bit_scan(&writemask);
 903                 LLVMValueRef value = dst[chan_index];
 904
 905                 if (tgsi_type_is_64bit(dtype) && (chan_index == 1 || chan_index == 3))
 906                         continue;
 907                 if (inst->Instruction.Saturate)
 908                         value = ac_build_clamp(&ctx->ac, value);
 909
 910                 if (reg->Register.File == TGSI_FILE_ADDRESS) {
 911                         temp_ptr = ctx->addrs[reg->Register.Index][chan_index];
 912                         LLVMBuildStore(builder, value, temp_ptr);
 913                         continue;
 914                 }
 915
 916                 if (!tgsi_type_is_64bit(dtype))
 917                         value = ac_to_float(&ctx->ac, value);
 918
 919                 if (reg->Register.Indirect) {
 920                         unsigned file = reg->Register.File;
 921                         unsigned reg_index = reg->Register.Index;
 922                         store_value_to_array(bld_base, value, file, chan_index,
 923                                              reg_index, &reg->Indirect);
 924                 } else {
 925                         switch(reg->Register.File) {
 926                         case TGSI_FILE_OUTPUT:
 927                                 temp_ptr = ctx->outputs[reg->Register.Index][chan_index];
 928                                 if (tgsi_type_is_64bit(dtype))
 929                                         temp_ptr2 = ctx->outputs[reg->Register.Index][chan_index + 1];
 930                                 break;
 931
 932                         case TGSI_FILE_TEMPORARY:
 933                         {
 934                                 if (reg->Register.Index >= ctx->temps_count)
 935                                         continue;
 936
 937                                 temp_ptr = ctx->temps[ TGSI_NUM_CHANNELS * reg->Register.Index + chan_index];
 938                                 if (tgsi_type_is_64bit(dtype))
 939                                         temp_ptr2 = ctx->temps[ TGSI_NUM_CHANNELS * reg->Register.Index + chan_index + 1];
 940
 941                                 break;
 942                         }
 943                         default:
 944                                 return;
 945                         }
 946                         if (!tgsi_type_is_64bit(dtype))
 947                                 LLVMBuildStore(builder, value, temp_ptr);
 948                         else {
 949                                 LLVMValueRef ptr = LLVMBuildBitCast(builder, value,
 950                                                                     LLVMVectorType(ctx->i32, 2), "");
 951                                 LLVMValueRef val2;
 952                                 value = LLVMBuildExtractElement(builder, ptr,
 953                                                                 ctx->i32_0, "");
 954                                 val2 = LLVMBuildExtractElement(builder, ptr,
 955                                                                ctx->i32_1, "");
 956
 957                                 LLVMBuildStore(builder, ac_to_float(&ctx->ac, value), temp_ptr);
 958                                 LLVMBuildStore(builder, ac_to_float(&ctx->ac, val2), temp_ptr2);
 959                         }
 960                 }
 961         }
 962 }
 963
 964 static void set_basicblock_name(LLVMBasicBlockRef bb, const char *base, int pc)
 965 {
 966         char buf[32];
 967         /* Subtract 1 so that the number shown is that of the corresponding
 968          * opcode in the TGSI dump, e.g. an if block has the same suffix as
 969          * the instruction number of the corresponding TGSI IF.
 970          */
 971         snprintf(buf, sizeof(buf), "%s%d", base, pc - 1);
 972         LLVMSetValueName(LLVMBasicBlockAsValue(bb), buf);
 973 }
 974
 975 /* Append a basic block at the level of the parent flow.
 976  */
 977 static LLVMBasicBlockRef append_basic_block(struct si_shader_context *ctx,
 978                                             const char *name)
 979 {
 980         struct gallivm_state *gallivm = &ctx->gallivm;
 981
 982         assert(ctx->flow_depth >= 1);
 983
 984         if (ctx->flow_depth >= 2) {
 985                 struct si_llvm_flow *flow = &ctx->flow[ctx->flow_depth - 2];
 986
 987                 return LLVMInsertBasicBlockInContext(gallivm->context,
 988                                                      flow->next_block, name);
 989         }
 990
 991         return LLVMAppendBasicBlockInContext(gallivm->context, ctx->main_fn, name);
 992 }
 993
 994 /* Emit a branch to the given default target for the current block if
 995  * applicable -- that is, if the current block does not already contain a
 996  * branch from a break or continue.
 997  */
 998 static void emit_default_branch(LLVMBuilderRef builder, LLVMBasicBlockRef target)
 999 {
1000         if (!LLVMGetBasicBlockTerminator(LLVMGetInsertBlock(builder)))
1001                  LLVMBuildBr(builder, target);
1002 }
1003
1004 static void bgnloop_emit(const struct lp_build_tgsi_action *action,
1005                          struct lp_build_tgsi_context *bld_base,
1006                          struct lp_build_emit_data *emit_data)
1007 {
1008         struct si_shader_context *ctx = si_shader_context(bld_base);
1009         struct gallivm_state *gallivm = &ctx->gallivm;
1010         struct si_llvm_flow *flow = push_flow(ctx);
1011         flow->loop_entry_block = append_basic_block(ctx, "LOOP");
1012         flow->next_block = append_basic_block(ctx, "ENDLOOP");
1013         set_basicblock_name(flow->loop_entry_block, "loop", bld_base->pc);
1014         LLVMBuildBr(gallivm->builder, flow->loop_entry_block);
1015         LLVMPositionBuilderAtEnd(gallivm->builder, flow->loop_entry_block);
1016 }
1017
1018 static void brk_emit(const struct lp_build_tgsi_action *action,
1019                      struct lp_build_tgsi_context *bld_base,
1020                      struct lp_build_emit_data *emit_data)
1021 {
1022         struct si_shader_context *ctx = si_shader_context(bld_base);
1023         struct gallivm_state *gallivm = &ctx->gallivm;
1024         struct si_llvm_flow *flow = get_innermost_loop(ctx);
1025
1026         LLVMBuildBr(gallivm->builder, flow->next_block);
1027 }
1028
1029 static void cont_emit(const struct lp_build_tgsi_action *action,
1030                       struct lp_build_tgsi_context *bld_base,
1031                       struct lp_build_emit_data *emit_data)
1032 {
1033         struct si_shader_context *ctx = si_shader_context(bld_base);
1034         struct gallivm_state *gallivm = &ctx->gallivm;
1035         struct si_llvm_flow *flow = get_innermost_loop(ctx);
1036
1037         LLVMBuildBr(gallivm->builder, flow->loop_entry_block);
1038 }
1039
1040 static void else_emit(const struct lp_build_tgsi_action *action,
1041                       struct lp_build_tgsi_context *bld_base,
1042                       struct lp_build_emit_data *emit_data)
1043 {
1044         struct si_shader_context *ctx = si_shader_context(bld_base);
1045         struct gallivm_state *gallivm = &ctx->gallivm;
1046         struct si_llvm_flow *current_branch = get_current_flow(ctx);
1047         LLVMBasicBlockRef endif_block;
1048
1049         assert(!current_branch->loop_entry_block);
1050
1051         endif_block = append_basic_block(ctx, "ENDIF");
1052         emit_default_branch(gallivm->builder, endif_block);
1053
1054         LLVMPositionBuilderAtEnd(gallivm->builder, current_branch->next_block);
1055         set_basicblock_name(current_branch->next_block, "else", bld_base->pc);
1056
1057         current_branch->next_block = endif_block;
1058 }
1059
1060 static void endif_emit(const struct lp_build_tgsi_action *action,
1061                        struct lp_build_tgsi_context *bld_base,
1062                        struct lp_build_emit_data *emit_data)
1063 {
1064         struct si_shader_context *ctx = si_shader_context(bld_base);
1065         struct gallivm_state *gallivm = &ctx->gallivm;
1066         struct si_llvm_flow *current_branch = get_current_flow(ctx);
1067
1068         assert(!current_branch->loop_entry_block);
1069
1070         emit_default_branch(gallivm->builder, current_branch->next_block);
1071         LLVMPositionBuilderAtEnd(gallivm->builder, current_branch->next_block);
1072         set_basicblock_name(current_branch->next_block, "endif", bld_base->pc);
1073
1074         ctx->flow_depth--;
1075 }
1076
1077 static void endloop_emit(const struct lp_build_tgsi_action *action,
1078                          struct lp_build_tgsi_context *bld_base,
1079                          struct lp_build_emit_data *emit_data)
1080 {
1081         struct si_shader_context *ctx = si_shader_context(bld_base);
1082         struct gallivm_state *gallivm = &ctx->gallivm;
1083         struct si_llvm_flow *current_loop = get_current_flow(ctx);
1084
1085         assert(current_loop->loop_entry_block);
1086
1087         emit_default_branch(gallivm->builder, current_loop->loop_entry_block);
1088
1089         LLVMPositionBuilderAtEnd(gallivm->builder, current_loop->next_block);
1090         set_basicblock_name(current_loop->next_block, "endloop", bld_base->pc);
1091         ctx->flow_depth--;
1092 }
1093
1094 static void if_cond_emit(const struct lp_build_tgsi_action *action,
1095                          struct lp_build_tgsi_context *bld_base,
1096                          struct lp_build_emit_data *emit_data,
1097                          LLVMValueRef cond)
1098 {
1099         struct si_shader_context *ctx = si_shader_context(bld_base);
1100         struct gallivm_state *gallivm = &ctx->gallivm;
1101         struct si_llvm_flow *flow = push_flow(ctx);
1102         LLVMBasicBlockRef if_block;
1103
1104         if_block = append_basic_block(ctx, "IF");
1105         flow->next_block = append_basic_block(ctx, "ELSE");
1106         set_basicblock_name(if_block, "if", bld_base->pc);
1107         LLVMBuildCondBr(gallivm->builder, cond, if_block, flow->next_block);
1108         LLVMPositionBuilderAtEnd(gallivm->builder, if_block);
1109 }
1110
1111 static void if_emit(const struct lp_build_tgsi_action *action,
1112                     struct lp_build_tgsi_context *bld_base,
1113                     struct lp_build_emit_data *emit_data)
1114 {
1115         struct gallivm_state *gallivm = bld_base->base.gallivm;
1116         LLVMValueRef cond;
1117
1118         cond = LLVMBuildFCmp(gallivm->builder, LLVMRealUNE,
1119                         emit_data->args[0],
1120                         bld_base->base.zero, "");
1121
1122         if_cond_emit(action, bld_base, emit_data, cond);
1123 }
1124
1125 static void uif_emit(const struct lp_build_tgsi_action *action,
1126                      struct lp_build_tgsi_context *bld_base,
1127                      struct lp_build_emit_data *emit_data)
1128 {
1129         struct si_shader_context *ctx = si_shader_context(bld_base);
1130         struct gallivm_state *gallivm = bld_base->base.gallivm;
1131         LLVMValueRef cond;
1132
1133         cond = LLVMBuildICmp(gallivm->builder, LLVMIntNE,
1134                 ac_to_integer(&ctx->ac, emit_data->args[0]),
1135                         bld_base->int_bld.zero, "");
1136
1137         if_cond_emit(action, bld_base, emit_data, cond);
1138 }
1139
1140 static void emit_immediate(struct lp_build_tgsi_context *bld_base,
1141                            const struct tgsi_full_immediate *imm)
1142 {
1143         unsigned i;
1144         struct si_shader_context *ctx = si_shader_context(bld_base);
1145
1146         for (i = 0; i < 4; ++i) {
1147                 ctx->imms[ctx->imms_num * TGSI_NUM_CHANNELS + i] =
1148                                 LLVMConstInt(ctx->i32, imm->u[i].Uint, false   );
1149         }
1150
1151         ctx->imms_num++;
1152 }
1153
1154 void si_llvm_context_init(struct si_shader_context *ctx,
1155                           struct si_screen *sscreen,
1156                           LLVMTargetMachineRef tm)
1157 {
1158         struct lp_type type;
1159
1160         /* Initialize the gallivm object:
1161          * We are only using the module, context, and builder fields of this struct.
1162          * This should be enough for us to be able to pass our gallivm struct to the
1163          * helper functions in the gallivm module.
1164          */
1165         memset(ctx, 0, sizeof(*ctx));
1166         ctx->screen = sscreen;
1167         ctx->tm = tm;
1168
1169         ctx->gallivm.context = LLVMContextCreate();
1170         ctx->gallivm.module = LLVMModuleCreateWithNameInContext("tgsi",
1171                                                 ctx->gallivm.context);
1172         LLVMSetTarget(ctx->gallivm.module, "amdgcn--");
1173
1174         LLVMTargetDataRef data_layout = LLVMCreateTargetDataLayout(tm);
1175         char *data_layout_str = LLVMCopyStringRepOfTargetData(data_layout);
1176         LLVMSetDataLayout(ctx->gallivm.module, data_layout_str);
1177         LLVMDisposeTargetData(data_layout);
1178         LLVMDisposeMessage(data_layout_str);
1179
1180         bool unsafe_fpmath = (sscreen->b.debug_flags & DBG_UNSAFE_MATH) != 0;
1181         enum lp_float_mode float_mode =
1182                 unsafe_fpmath ? LP_FLOAT_MODE_UNSAFE_FP_MATH :
1183                                 LP_FLOAT_MODE_NO_SIGNED_ZEROS_FP_MATH;
1184
1185         ctx->gallivm.builder = lp_create_builder(ctx->gallivm.context,
1186                                                  float_mode);
1187
1188         ac_llvm_context_init(&ctx->ac, ctx->gallivm.context, sscreen->b.chip_class);
1189         ctx->ac.module = ctx->gallivm.module;
1190         ctx->ac.builder = ctx->gallivm.builder;
1191
1192         struct lp_build_tgsi_context *bld_base = &ctx->bld_base;
1193
1194         type.floating = true;
1195         type.fixed = false;
1196         type.sign = true;
1197         type.norm = false;
1198         type.width = 32;
1199         type.length = 1;
1200
1201         lp_build_context_init(&bld_base->base, &ctx->gallivm, type);
1202         lp_build_context_init(&ctx->bld_base.uint_bld, &ctx->gallivm, lp_uint_type(type));
1203         lp_build_context_init(&ctx->bld_base.int_bld, &ctx->gallivm, lp_int_type(type));
1204         type.width *= 2;
1205         lp_build_context_init(&ctx->bld_base.dbl_bld, &ctx->gallivm, type);
1206         lp_build_context_init(&ctx->bld_base.uint64_bld, &ctx->gallivm, lp_uint_type(type));
1207         lp_build_context_init(&ctx->bld_base.int64_bld, &ctx->gallivm, lp_int_type(type));
1208
1209         bld_base->soa = 1;
1210         bld_base->emit_swizzle = emit_swizzle;
1211         bld_base->emit_declaration = emit_declaration;
1212         bld_base->emit_immediate = emit_immediate;
1213
1214         /* metadata allowing 2.5 ULP */
1215         ctx->fpmath_md_kind = LLVMGetMDKindIDInContext(ctx->gallivm.context,
1216                                                        "fpmath", 6);
1217         LLVMValueRef arg = lp_build_const_float(&ctx->gallivm, 2.5);
1218         ctx->fpmath_md_2p5_ulp = LLVMMDNodeInContext(ctx->gallivm.context,
1219                                                      &arg, 1);
1220
1221         bld_base->op_actions[TGSI_OPCODE_BGNLOOP].emit = bgnloop_emit;
1222         bld_base->op_actions[TGSI_OPCODE_BRK].emit = brk_emit;
1223         bld_base->op_actions[TGSI_OPCODE_CONT].emit = cont_emit;
1224         bld_base->op_actions[TGSI_OPCODE_IF].emit = if_emit;
1225         bld_base->op_actions[TGSI_OPCODE_UIF].emit = uif_emit;
1226         bld_base->op_actions[TGSI_OPCODE_ELSE].emit = else_emit;
1227         bld_base->op_actions[TGSI_OPCODE_ENDIF].emit = endif_emit;
1228         bld_base->op_actions[TGSI_OPCODE_ENDLOOP].emit = endloop_emit;
1229
1230         si_shader_context_init_alu(&ctx->bld_base);
1231         si_shader_context_init_mem(ctx);
1232
1233         ctx->voidt = LLVMVoidTypeInContext(ctx->gallivm.context);
1234         ctx->i1 = LLVMInt1TypeInContext(ctx->gallivm.context);
1235         ctx->i8 = LLVMInt8TypeInContext(ctx->gallivm.context);
1236         ctx->i32 = LLVMInt32TypeInContext(ctx->gallivm.context);
1237         ctx->i64 = LLVMInt64TypeInContext(ctx->gallivm.context);
1238         ctx->i128 = LLVMIntTypeInContext(ctx->gallivm.context, 128);
1239         ctx->f32 = LLVMFloatTypeInContext(ctx->gallivm.context);
1240         ctx->v2i32 = LLVMVectorType(ctx->i32, 2);
1241         ctx->v4i32 = LLVMVectorType(ctx->i32, 4);
1242         ctx->v4f32 = LLVMVectorType(ctx->f32, 4);
1243         ctx->v8i32 = LLVMVectorType(ctx->i32, 8);
1244
1245         ctx->i32_0 = LLVMConstInt(ctx->i32, 0, 0);
1246         ctx->i32_1 = LLVMConstInt(ctx->i32, 1, 0);
1247 }
1248
1249 /* Set the context to a certain TGSI shader. Can be called repeatedly
1250  * to change the shader. */
1251 void si_llvm_context_set_tgsi(struct si_shader_context *ctx,
1252                               struct si_shader *shader)
1253 {
1254         const struct tgsi_shader_info *info = NULL;
1255         const struct tgsi_token *tokens = NULL;
1256
1257         if (shader && shader->selector) {
1258                 info = &shader->selector->info;
1259                 tokens = shader->selector->tokens;
1260         }
1261
1262         ctx->shader = shader;
1263         ctx->type = info ? info->processor : -1;
1264         ctx->bld_base.info = info;
1265
1266         /* Clean up the old contents. */
1267         FREE(ctx->temp_arrays);
1268         ctx->temp_arrays = NULL;
1269         FREE(ctx->temp_array_allocas);
1270         ctx->temp_array_allocas = NULL;
1271
1272         FREE(ctx->imms);
1273         ctx->imms = NULL;
1274         ctx->imms_num = 0;
1275
1276         FREE(ctx->temps);
1277         ctx->temps = NULL;
1278         ctx->temps_count = 0;
1279
1280         if (!info || !tokens)
1281                 return;
1282
1283         if (info->array_max[TGSI_FILE_TEMPORARY] > 0) {
1284                 int size = info->array_max[TGSI_FILE_TEMPORARY];
1285
1286                 ctx->temp_arrays = CALLOC(size, sizeof(ctx->temp_arrays[0]));
1287                 ctx->temp_array_allocas = CALLOC(size, sizeof(ctx->temp_array_allocas[0]));
1288
1289                 tgsi_scan_arrays(tokens, TGSI_FILE_TEMPORARY, size,
1290                                  ctx->temp_arrays);
1291         }
1292         if (info->file_max[TGSI_FILE_IMMEDIATE] >= 0) {
1293                 int size = info->file_max[TGSI_FILE_IMMEDIATE] + 1;
1294                 ctx->imms = MALLOC(size * TGSI_NUM_CHANNELS * sizeof(LLVMValueRef));
1295         }
1296
1297         /* Re-set these to start with a clean slate. */
1298         ctx->bld_base.num_instructions = 0;
1299         ctx->bld_base.pc = 0;
1300         memset(ctx->outputs, 0, sizeof(ctx->outputs));
1301
1302         ctx->bld_base.emit_store = si_llvm_emit_store;
1303         ctx->bld_base.emit_fetch_funcs[TGSI_FILE_IMMEDIATE] = si_llvm_emit_fetch;
1304         ctx->bld_base.emit_fetch_funcs[TGSI_FILE_INPUT] = si_llvm_emit_fetch;
1305         ctx->bld_base.emit_fetch_funcs[TGSI_FILE_TEMPORARY] = si_llvm_emit_fetch;
1306         ctx->bld_base.emit_fetch_funcs[TGSI_FILE_OUTPUT] = si_llvm_emit_fetch;
1307         ctx->bld_base.emit_fetch_funcs[TGSI_FILE_SYSTEM_VALUE] = fetch_system_value;
1308
1309         ctx->num_const_buffers = util_last_bit(info->const_buffers_declared);
1310         ctx->num_shader_buffers = util_last_bit(info->shader_buffers_declared);
1311         ctx->num_samplers = util_last_bit(info->samplers_declared);
1312         ctx->num_images = util_last_bit(info->images_declared);
1313 }
1314
1315 void si_llvm_create_func(struct si_shader_context *ctx,
1316                          const char *name,
1317                          LLVMTypeRef *return_types, unsigned num_return_elems,
1318                          LLVMTypeRef *ParamTypes, unsigned ParamCount)
1319 {
1320         LLVMTypeRef main_fn_type, ret_type;
1321         LLVMBasicBlockRef main_fn_body;
1322         enum si_llvm_calling_convention call_conv;
1323         unsigned real_shader_type;
1324
1325         if (num_return_elems)
1326                 ret_type = LLVMStructTypeInContext(ctx->gallivm.context,
1327                                                    return_types,
1328                                                    num_return_elems, true);
1329         else
1330                 ret_type = LLVMVoidTypeInContext(ctx->gallivm.context);
1331
1332         /* Setup the function */
1333         ctx->return_type = ret_type;
1334         main_fn_type = LLVMFunctionType(ret_type, ParamTypes, ParamCount, 0);
1335         ctx->main_fn = LLVMAddFunction(ctx->gallivm.module, name, main_fn_type);
1336         main_fn_body = LLVMAppendBasicBlockInContext(ctx->gallivm.context,
1337                         ctx->main_fn, "main_body");
1338         LLVMPositionBuilderAtEnd(ctx->gallivm.builder, main_fn_body);
1339
1340         real_shader_type = ctx->type;
1341
1342         /* LS is merged into HS (TCS), and ES is merged into GS. */
1343         if (ctx->screen->b.chip_class >= GFX9) {
1344                 if (ctx->shader->key.as_ls)
1345                         real_shader_type = PIPE_SHADER_TESS_CTRL;
1346                 else if (ctx->shader->key.as_es)
1347                         real_shader_type = PIPE_SHADER_GEOMETRY;
1348         }
1349
1350         switch (real_shader_type) {
1351         case PIPE_SHADER_VERTEX:
1352         case PIPE_SHADER_TESS_EVAL:
1353                 call_conv = RADEON_LLVM_AMDGPU_VS;
1354                 break;
1355         case PIPE_SHADER_TESS_CTRL:
1356                 call_conv = HAVE_LLVM >= 0x0500 ? RADEON_LLVM_AMDGPU_HS :
1357                                                   RADEON_LLVM_AMDGPU_VS;
1358                 break;
1359         case PIPE_SHADER_GEOMETRY:
1360                 call_conv = RADEON_LLVM_AMDGPU_GS;
1361                 break;
1362         case PIPE_SHADER_FRAGMENT:
1363                 call_conv = RADEON_LLVM_AMDGPU_PS;
1364                 break;
1365         case PIPE_SHADER_COMPUTE:
1366                 call_conv = RADEON_LLVM_AMDGPU_CS;
1367                 break;
1368         default:
1369                 unreachable("Unhandle shader type");
1370         }
1371
1372         LLVMSetFunctionCallConv(ctx->main_fn, call_conv);
1373 }
1374
1375 void si_llvm_optimize_module(struct si_shader_context *ctx)
1376 {
1377         struct gallivm_state *gallivm = &ctx->gallivm;
1378         const char *triple = LLVMGetTarget(gallivm->module);
1379         LLVMTargetLibraryInfoRef target_library_info;
1380
1381         /* Dump LLVM IR before any optimization passes */
1382         if (ctx->screen->b.debug_flags & DBG_PREOPT_IR &&
1383             si_can_dump_shader(&ctx->screen->b, ctx->type))
1384                 LLVMDumpModule(ctx->gallivm.module);
1385
1386         /* Create the pass manager */
1387         gallivm->passmgr = LLVMCreatePassManager();
1388
1389         target_library_info = gallivm_create_target_library_info(triple);
1390         LLVMAddTargetLibraryInfo(target_library_info, gallivm->passmgr);
1391
1392         if (si_extra_shader_checks(&ctx->screen->b, ctx->type))
1393                 LLVMAddVerifierPass(gallivm->passmgr);
1394
1395         LLVMAddAlwaysInlinerPass(gallivm->passmgr);
1396
1397         /* This pass should eliminate all the load and store instructions */
1398         LLVMAddPromoteMemoryToRegisterPass(gallivm->passmgr);
1399
1400         /* Add some optimization passes */
1401         LLVMAddScalarReplAggregatesPass(gallivm->passmgr);
1402         LLVMAddLICMPass(gallivm->passmgr);
1403         LLVMAddAggressiveDCEPass(gallivm->passmgr);
1404         LLVMAddCFGSimplificationPass(gallivm->passmgr);
1405 #if HAVE_LLVM >= 0x0400
1406         /* This is recommended by the instruction combining pass. */
1407         LLVMAddEarlyCSEMemSSAPass(gallivm->passmgr);
1408 #endif
1409         LLVMAddInstructionCombiningPass(gallivm->passmgr);
1410
1411         /* Run the pass */
1412         LLVMRunPassManager(gallivm->passmgr, ctx->gallivm.module);
1413
1414         LLVMDisposeBuilder(gallivm->builder);
1415         LLVMDisposePassManager(gallivm->passmgr);
1416         gallivm_dispose_target_library_info(target_library_info);
1417 }
1418
1419 void si_llvm_dispose(struct si_shader_context *ctx)
1420 {
1421         LLVMDisposeModule(ctx->gallivm.module);
1422         LLVMContextDispose(ctx->gallivm.context);
1423         FREE(ctx->temp_arrays);
1424         ctx->temp_arrays = NULL;
1425         FREE(ctx->temp_array_allocas);
1426         ctx->temp_array_allocas = NULL;
1427         FREE(ctx->temps);
1428         ctx->temps = NULL;
1429         ctx->temps_count = 0;
1430         FREE(ctx->imms);
1431         ctx->imms = NULL;
1432         ctx->imms_num = 0;
1433         FREE(ctx->flow);
1434         ctx->flow = NULL;
1435         ctx->flow_depth_max = 0;
1436 }