src/gallium/drivers/radeonsi/si_shader_tgsi_setup.c

   1 /*
   2  * Copyright 2016 Advanced Micro Devices, Inc.
   3  *
   4  * Permission is hereby granted, free of charge, to any person obtaining a
   5  * copy of this software and associated documentation files (the "Software"),
   6  * to deal in the Software without restriction, including without limitation
   7  * on the rights to use, copy, modify, merge, publish, distribute, sub
   8  * license, and/or sell copies of the Software, and to permit persons to whom
   9  * the Software is furnished to do so, subject to the following conditions:
  10  *
  11  * The above copyright notice and this permission notice (including the next
  12  * paragraph) shall be included in all copies or substantial portions of the
  13  * Software.
  14  *
  15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  17  * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
  18  * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
  19  * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
  20  * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
  21  * USE OR OTHER DEALINGS IN THE SOFTWARE.
  22  */
  23
  24 #include "si_shader_internal.h"
  25 #include "si_pipe.h"
  26
  27 #include "gallivm/lp_bld_const.h"
  28 #include "gallivm/lp_bld_gather.h"
  29 #include "gallivm/lp_bld_flow.h"
  30 #include "gallivm/lp_bld_init.h"
  31 #include "gallivm/lp_bld_intr.h"
  32 #include "gallivm/lp_bld_misc.h"
  33 #include "gallivm/lp_bld_swizzle.h"
  34 #include "tgsi/tgsi_info.h"
  35 #include "tgsi/tgsi_parse.h"
  36 #include "util/u_math.h"
  37 #include "util/u_memory.h"
  38 #include "util/u_debug.h"
  39
  40 #include <stdio.h>
  41 #include <llvm-c/Transforms/IPO.h>
  42 #include <llvm-c/Transforms/Scalar.h>
  43
  44 /* Data for if/else/endif and bgnloop/endloop control flow structures.
  45  */
  46 struct si_llvm_flow {
  47         /* Loop exit or next part of if/else/endif. */
  48         LLVMBasicBlockRef next_block;
  49         LLVMBasicBlockRef loop_entry_block;
  50 };
  51
  52 #define CPU_STRING_LEN 30
  53 #define FS_STRING_LEN 30
  54 #define TRIPLE_STRING_LEN 7
  55
  56 /**
  57  * Shader types for the LLVM backend.
  58  */
  59 enum si_llvm_shader_type {
  60         RADEON_LLVM_SHADER_PS = 0,
  61         RADEON_LLVM_SHADER_VS = 1,
  62         RADEON_LLVM_SHADER_GS = 2,
  63         RADEON_LLVM_SHADER_CS = 3,
  64 };
  65
  66 enum si_llvm_calling_convention {
  67         RADEON_LLVM_AMDGPU_VS = 87,
  68         RADEON_LLVM_AMDGPU_GS = 88,
  69         RADEON_LLVM_AMDGPU_PS = 89,
  70         RADEON_LLVM_AMDGPU_CS = 90,
  71 };
  72
  73 void si_llvm_add_attribute(LLVMValueRef F, const char *name, int value)
  74 {
  75         char str[16];
  76
  77         snprintf(str, sizeof(str), "%i", value);
  78         LLVMAddTargetDependentFunctionAttr(F, name, str);
  79 }
  80
  81 /**
  82  * Set the shader type we want to compile
  83  *
  84  * @param type shader type to set
  85  */
  86 void si_llvm_shader_type(LLVMValueRef F, unsigned type)
  87 {
  88         enum si_llvm_shader_type llvm_type;
  89         enum si_llvm_calling_convention calling_conv;
  90
  91         switch (type) {
  92         case PIPE_SHADER_VERTEX:
  93         case PIPE_SHADER_TESS_CTRL:
  94         case PIPE_SHADER_TESS_EVAL:
  95                 llvm_type = RADEON_LLVM_SHADER_VS;
  96                 calling_conv = RADEON_LLVM_AMDGPU_VS;
  97                 break;
  98         case PIPE_SHADER_GEOMETRY:
  99                 llvm_type = RADEON_LLVM_SHADER_GS;
 100                 calling_conv = RADEON_LLVM_AMDGPU_GS;
 101                 break;
 102         case PIPE_SHADER_FRAGMENT:
 103                 llvm_type = RADEON_LLVM_SHADER_PS;
 104                 calling_conv = RADEON_LLVM_AMDGPU_PS;
 105                 break;
 106         case PIPE_SHADER_COMPUTE:
 107                 llvm_type = RADEON_LLVM_SHADER_CS;
 108                 calling_conv = RADEON_LLVM_AMDGPU_CS;
 109                 break;
 110         default:
 111                 unreachable("Unhandle shader type");
 112         }
 113
 114         if (HAVE_LLVM >= 0x309)
 115                 LLVMSetFunctionCallConv(F, calling_conv);
 116         else
 117                 si_llvm_add_attribute(F, "ShaderType", llvm_type);
 118 }
 119
 120 static void init_amdgpu_target()
 121 {
 122         gallivm_init_llvm_targets();
 123 #if HAVE_LLVM < 0x0307
 124         LLVMInitializeR600TargetInfo();
 125         LLVMInitializeR600Target();
 126         LLVMInitializeR600TargetMC();
 127         LLVMInitializeR600AsmPrinter();
 128 #else
 129         LLVMInitializeAMDGPUTargetInfo();
 130         LLVMInitializeAMDGPUTarget();
 131         LLVMInitializeAMDGPUTargetMC();
 132         LLVMInitializeAMDGPUAsmPrinter();
 133
 134 #endif
 135 }
 136
 137 static once_flag init_amdgpu_target_once_flag = ONCE_FLAG_INIT;
 138
 139 LLVMTargetRef si_llvm_get_amdgpu_target(const char *triple)
 140 {
 141         LLVMTargetRef target = NULL;
 142         char *err_message = NULL;
 143
 144         call_once(&init_amdgpu_target_once_flag, init_amdgpu_target);
 145
 146         if (LLVMGetTargetFromTriple(triple, &target, &err_message)) {
 147                 fprintf(stderr, "Cannot find target for triple %s ", triple);
 148                 if (err_message) {
 149                         fprintf(stderr, "%s\n", err_message);
 150                 }
 151                 LLVMDisposeMessage(err_message);
 152                 return NULL;
 153         }
 154         return target;
 155 }
 156
 157 struct si_llvm_diagnostics {
 158         struct pipe_debug_callback *debug;
 159         unsigned retval;
 160 };
 161
 162 static void si_diagnostic_handler(LLVMDiagnosticInfoRef di, void *context)
 163 {
 164         struct si_llvm_diagnostics *diag = (struct si_llvm_diagnostics *)context;
 165         LLVMDiagnosticSeverity severity = LLVMGetDiagInfoSeverity(di);
 166         char *description = LLVMGetDiagInfoDescription(di);
 167         const char *severity_str = NULL;
 168
 169         switch (severity) {
 170         case LLVMDSError:
 171                 severity_str = "error";
 172                 break;
 173         case LLVMDSWarning:
 174                 severity_str = "warning";
 175                 break;
 176         case LLVMDSRemark:
 177                 severity_str = "remark";
 178                 break;
 179         case LLVMDSNote:
 180                 severity_str = "note";
 181                 break;
 182         default:
 183                 severity_str = "unknown";
 184         }
 185
 186         pipe_debug_message(diag->debug, SHADER_INFO,
 187                            "LLVM diagnostic (%s): %s", severity_str, description);
 188
 189         if (severity == LLVMDSError) {
 190                 diag->retval = 1;
 191                 fprintf(stderr,"LLVM triggered Diagnostic Handler: %s\n", description);
 192         }
 193
 194         LLVMDisposeMessage(description);
 195 }
 196
 197 /**
 198  * Compile an LLVM module to machine code.
 199  *
 200  * @returns 0 for success, 1 for failure
 201  */
 202 unsigned si_llvm_compile(LLVMModuleRef M, struct ac_shader_binary *binary,
 203                          LLVMTargetMachineRef tm,
 204                          struct pipe_debug_callback *debug)
 205 {
 206         struct si_llvm_diagnostics diag;
 207         char *err;
 208         LLVMContextRef llvm_ctx;
 209         LLVMMemoryBufferRef out_buffer;
 210         unsigned buffer_size;
 211         const char *buffer_data;
 212         LLVMBool mem_err;
 213
 214         diag.debug = debug;
 215         diag.retval = 0;
 216
 217         /* Setup Diagnostic Handler*/
 218         llvm_ctx = LLVMGetModuleContext(M);
 219
 220         LLVMContextSetDiagnosticHandler(llvm_ctx, si_diagnostic_handler, &diag);
 221
 222         /* Compile IR*/
 223         mem_err = LLVMTargetMachineEmitToMemoryBuffer(tm, M, LLVMObjectFile, &err,
 224                                                                  &out_buffer);
 225
 226         /* Process Errors/Warnings */
 227         if (mem_err) {
 228                 fprintf(stderr, "%s: %s", __FUNCTION__, err);
 229                 pipe_debug_message(debug, SHADER_INFO,
 230                                    "LLVM emit error: %s", err);
 231                 FREE(err);
 232                 diag.retval = 1;
 233                 goto out;
 234         }
 235
 236         /* Extract Shader Code*/
 237         buffer_size = LLVMGetBufferSize(out_buffer);
 238         buffer_data = LLVMGetBufferStart(out_buffer);
 239
 240         ac_elf_read(buffer_data, buffer_size, binary);
 241
 242         /* Clean up */
 243         LLVMDisposeMemoryBuffer(out_buffer);
 244
 245 out:
 246         if (diag.retval != 0)
 247                 pipe_debug_message(debug, SHADER_INFO, "LLVM compile failed");
 248         return diag.retval;
 249 }
 250
 251 LLVMTypeRef tgsi2llvmtype(struct lp_build_tgsi_context *bld_base,
 252                           enum tgsi_opcode_type type)
 253 {
 254         LLVMContextRef ctx = bld_base->base.gallivm->context;
 255
 256         switch (type) {
 257         case TGSI_TYPE_UNSIGNED:
 258         case TGSI_TYPE_SIGNED:
 259                 return LLVMInt32TypeInContext(ctx);
 260         case TGSI_TYPE_UNSIGNED64:
 261         case TGSI_TYPE_SIGNED64:
 262                 return LLVMInt64TypeInContext(ctx);
 263         case TGSI_TYPE_DOUBLE:
 264                 return LLVMDoubleTypeInContext(ctx);
 265         case TGSI_TYPE_UNTYPED:
 266         case TGSI_TYPE_FLOAT:
 267                 return LLVMFloatTypeInContext(ctx);
 268         default: break;
 269         }
 270         return 0;
 271 }
 272
 273 LLVMValueRef bitcast(struct lp_build_tgsi_context *bld_base,
 274                      enum tgsi_opcode_type type, LLVMValueRef value)
 275 {
 276         LLVMBuilderRef builder = bld_base->base.gallivm->builder;
 277         LLVMTypeRef dst_type = tgsi2llvmtype(bld_base, type);
 278
 279         if (dst_type)
 280                 return LLVMBuildBitCast(builder, value, dst_type, "");
 281         else
 282                 return value;
 283 }
 284
 285 /**
 286  * Return a value that is equal to the given i32 \p index if it lies in [0,num)
 287  * or an undefined value in the same interval otherwise.
 288  */
 289 LLVMValueRef si_llvm_bound_index(struct si_shader_context *ctx,
 290                                  LLVMValueRef index,
 291                                  unsigned num)
 292 {
 293         struct gallivm_state *gallivm = &ctx->gallivm;
 294         LLVMBuilderRef builder = gallivm->builder;
 295         LLVMValueRef c_max = lp_build_const_int32(gallivm, num - 1);
 296         LLVMValueRef cc;
 297
 298         if (util_is_power_of_two(num)) {
 299                 index = LLVMBuildAnd(builder, index, c_max, "");
 300         } else {
 301                 /* In theory, this MAX pattern should result in code that is
 302                  * as good as the bit-wise AND above.
 303                  *
 304                  * In practice, LLVM generates worse code (at the time of
 305                  * writing), because its value tracking is not strong enough.
 306                  */
 307                 cc = LLVMBuildICmp(builder, LLVMIntULE, index, c_max, "");
 308                 index = LLVMBuildSelect(builder, cc, index, c_max, "");
 309         }
 310
 311         return index;
 312 }
 313
 314 static struct si_llvm_flow *
 315 get_current_flow(struct si_shader_context *ctx)
 316 {
 317         if (ctx->flow_depth > 0)
 318                 return &ctx->flow[ctx->flow_depth - 1];
 319         return NULL;
 320 }
 321
 322 static struct si_llvm_flow *
 323 get_innermost_loop(struct si_shader_context *ctx)
 324 {
 325         for (unsigned i = ctx->flow_depth; i > 0; --i) {
 326                 if (ctx->flow[i - 1].loop_entry_block)
 327                         return &ctx->flow[i - 1];
 328         }
 329         return NULL;
 330 }
 331
 332 static struct si_llvm_flow *
 333 push_flow(struct si_shader_context *ctx)
 334 {
 335         struct si_llvm_flow *flow;
 336
 337         if (ctx->flow_depth >= ctx->flow_depth_max) {
 338                 unsigned new_max = MAX2(ctx->flow_depth << 1, RADEON_LLVM_INITIAL_CF_DEPTH);
 339                 ctx->flow = REALLOC(ctx->flow,
 340                                     ctx->flow_depth_max * sizeof(*ctx->flow),
 341                                     new_max * sizeof(*ctx->flow));
 342                 ctx->flow_depth_max = new_max;
 343         }
 344
 345         flow = &ctx->flow[ctx->flow_depth];
 346         ctx->flow_depth++;
 347
 348         flow->next_block = NULL;
 349         flow->loop_entry_block = NULL;
 350         return flow;
 351 }
 352
 353 static LLVMValueRef emit_swizzle(struct lp_build_tgsi_context *bld_base,
 354                                  LLVMValueRef value,
 355                                  unsigned swizzle_x,
 356                                  unsigned swizzle_y,
 357                                  unsigned swizzle_z,
 358                                  unsigned swizzle_w)
 359 {
 360         LLVMValueRef swizzles[4];
 361         LLVMTypeRef i32t =
 362                 LLVMInt32TypeInContext(bld_base->base.gallivm->context);
 363
 364         swizzles[0] = LLVMConstInt(i32t, swizzle_x, 0);
 365         swizzles[1] = LLVMConstInt(i32t, swizzle_y, 0);
 366         swizzles[2] = LLVMConstInt(i32t, swizzle_z, 0);
 367         swizzles[3] = LLVMConstInt(i32t, swizzle_w, 0);
 368
 369         return LLVMBuildShuffleVector(bld_base->base.gallivm->builder,
 370                                       value,
 371                                       LLVMGetUndef(LLVMTypeOf(value)),
 372                                       LLVMConstVector(swizzles, 4), "");
 373 }
 374
 375 /**
 376  * Return the description of the array covering the given temporary register
 377  * index.
 378  */
 379 static unsigned
 380 get_temp_array_id(struct lp_build_tgsi_context *bld_base,
 381                   unsigned reg_index,
 382                   const struct tgsi_ind_register *reg)
 383 {
 384         struct si_shader_context *ctx = si_shader_context(bld_base);
 385         unsigned num_arrays = ctx->bld_base.info->array_max[TGSI_FILE_TEMPORARY];
 386         unsigned i;
 387
 388         if (reg && reg->ArrayID > 0 && reg->ArrayID <= num_arrays)
 389                 return reg->ArrayID;
 390
 391         for (i = 0; i < num_arrays; i++) {
 392                 const struct tgsi_array_info *array = &ctx->temp_arrays[i];
 393
 394                 if (reg_index >= array->range.First && reg_index <= array->range.Last)
 395                         return i + 1;
 396         }
 397
 398         return 0;
 399 }
 400
 401 static struct tgsi_declaration_range
 402 get_array_range(struct lp_build_tgsi_context *bld_base,
 403                 unsigned File, unsigned reg_index,
 404                 const struct tgsi_ind_register *reg)
 405 {
 406         struct si_shader_context *ctx = si_shader_context(bld_base);
 407         struct tgsi_declaration_range range;
 408
 409         if (File == TGSI_FILE_TEMPORARY) {
 410                 unsigned array_id = get_temp_array_id(bld_base, reg_index, reg);
 411                 if (array_id)
 412                         return ctx->temp_arrays[array_id - 1].range;
 413         }
 414
 415         range.First = 0;
 416         range.Last = bld_base->info->file_max[File];
 417         return range;
 418 }
 419
 420 static LLVMValueRef
 421 emit_array_index(struct si_shader_context *ctx,
 422                  const struct tgsi_ind_register *reg,
 423                  unsigned offset)
 424 {
 425         struct gallivm_state *gallivm = ctx->bld_base.base.gallivm;
 426
 427         if (!reg) {
 428                 return lp_build_const_int32(gallivm, offset);
 429         }
 430         LLVMValueRef addr = LLVMBuildLoad(gallivm->builder, ctx->addrs[reg->Index][reg->Swizzle], "");
 431         return LLVMBuildAdd(gallivm->builder, addr, lp_build_const_int32(gallivm, offset), "");
 432 }
 433
 434 /**
 435  * For indirect registers, construct a pointer directly to the requested
 436  * element using getelementptr if possible.
 437  *
 438  * Returns NULL if the insertelement/extractelement fallback for array access
 439  * must be used.
 440  */
 441 static LLVMValueRef
 442 get_pointer_into_array(struct si_shader_context *ctx,
 443                        unsigned file,
 444                        unsigned swizzle,
 445                        unsigned reg_index,
 446                        const struct tgsi_ind_register *reg_indirect)
 447 {
 448         unsigned array_id;
 449         struct tgsi_array_info *array;
 450         struct gallivm_state *gallivm = ctx->bld_base.base.gallivm;
 451         LLVMBuilderRef builder = gallivm->builder;
 452         LLVMValueRef idxs[2];
 453         LLVMValueRef index;
 454         LLVMValueRef alloca;
 455
 456         if (file != TGSI_FILE_TEMPORARY)
 457                 return NULL;
 458
 459         array_id = get_temp_array_id(&ctx->bld_base, reg_index, reg_indirect);
 460         if (!array_id)
 461                 return NULL;
 462
 463         alloca = ctx->temp_array_allocas[array_id - 1];
 464         if (!alloca)
 465                 return NULL;
 466
 467         array = &ctx->temp_arrays[array_id - 1];
 468
 469         if (!(array->writemask & (1 << swizzle)))
 470                 return ctx->undef_alloca;
 471
 472         index = emit_array_index(ctx, reg_indirect,
 473                                  reg_index - ctx->temp_arrays[array_id - 1].range.First);
 474
 475         /* Ensure that the index is within a valid range, to guard against
 476          * VM faults and overwriting critical data (e.g. spilled resource
 477          * descriptors).
 478          *
 479          * TODO It should be possible to avoid the additional instructions
 480          * if LLVM is changed so that it guarantuees:
 481          * 1. the scratch space descriptor isolates the current wave (this
 482          *    could even save the scratch offset SGPR at the cost of an
 483          *    additional SALU instruction)
 484          * 2. the memory for allocas must be allocated at the _end_ of the
 485          *    scratch space (after spilled registers)
 486          */
 487         index = si_llvm_bound_index(ctx, index, array->range.Last - array->range.First + 1);
 488
 489         index = LLVMBuildMul(
 490                 builder, index,
 491                 lp_build_const_int32(gallivm, util_bitcount(array->writemask)),
 492                 "");
 493         index = LLVMBuildAdd(
 494                 builder, index,
 495                 lp_build_const_int32(
 496                         gallivm,
 497                         util_bitcount(array->writemask & ((1 << swizzle) - 1))),
 498                 "");
 499         idxs[0] = ctx->bld_base.uint_bld.zero;
 500         idxs[1] = index;
 501         return LLVMBuildGEP(builder, alloca, idxs, 2, "");
 502 }
 503
 504 LLVMValueRef
 505 si_llvm_emit_fetch_64bit(struct lp_build_tgsi_context *bld_base,
 506                          enum tgsi_opcode_type type,
 507                          LLVMValueRef ptr,
 508                          LLVMValueRef ptr2)
 509 {
 510         LLVMBuilderRef builder = bld_base->base.gallivm->builder;
 511         LLVMValueRef result;
 512
 513         result = LLVMGetUndef(LLVMVectorType(LLVMIntTypeInContext(bld_base->base.gallivm->context, 32), bld_base->base.type.length * 2));
 514
 515         result = LLVMBuildInsertElement(builder,
 516                                         result,
 517                                         bitcast(bld_base, TGSI_TYPE_UNSIGNED, ptr),
 518                                         bld_base->int_bld.zero, "");
 519         result = LLVMBuildInsertElement(builder,
 520                                         result,
 521                                         bitcast(bld_base, TGSI_TYPE_UNSIGNED, ptr2),
 522                                         bld_base->int_bld.one, "");
 523         return bitcast(bld_base, type, result);
 524 }
 525
 526 static LLVMValueRef
 527 emit_array_fetch(struct lp_build_tgsi_context *bld_base,
 528                  unsigned File, enum tgsi_opcode_type type,
 529                  struct tgsi_declaration_range range,
 530                  unsigned swizzle)
 531 {
 532         struct si_shader_context *ctx = si_shader_context(bld_base);
 533         struct gallivm_state *gallivm = ctx->bld_base.base.gallivm;
 534
 535         LLVMBuilderRef builder = bld_base->base.gallivm->builder;
 536
 537         unsigned i, size = range.Last - range.First + 1;
 538         LLVMTypeRef vec = LLVMVectorType(tgsi2llvmtype(bld_base, type), size);
 539         LLVMValueRef result = LLVMGetUndef(vec);
 540
 541         struct tgsi_full_src_register tmp_reg = {};
 542         tmp_reg.Register.File = File;
 543
 544         for (i = 0; i < size; ++i) {
 545                 tmp_reg.Register.Index = i + range.First;
 546                 LLVMValueRef temp = si_llvm_emit_fetch(bld_base, &tmp_reg, type, swizzle);
 547                 result = LLVMBuildInsertElement(builder, result, temp,
 548                         lp_build_const_int32(gallivm, i), "array_vector");
 549         }
 550         return result;
 551 }
 552
 553 static LLVMValueRef
 554 load_value_from_array(struct lp_build_tgsi_context *bld_base,
 555                       unsigned file,
 556                       enum tgsi_opcode_type type,
 557                       unsigned swizzle,
 558                       unsigned reg_index,
 559                       const struct tgsi_ind_register *reg_indirect)
 560 {
 561         struct si_shader_context *ctx = si_shader_context(bld_base);
 562         struct gallivm_state *gallivm = bld_base->base.gallivm;
 563         LLVMBuilderRef builder = gallivm->builder;
 564         LLVMValueRef ptr;
 565
 566         ptr = get_pointer_into_array(ctx, file, swizzle, reg_index, reg_indirect);
 567         if (ptr) {
 568                 LLVMValueRef val = LLVMBuildLoad(builder, ptr, "");
 569                 if (tgsi_type_is_64bit(type)) {
 570                         LLVMValueRef ptr_hi, val_hi;
 571                         ptr_hi = LLVMBuildGEP(builder, ptr, &bld_base->uint_bld.one, 1, "");
 572                         val_hi = LLVMBuildLoad(builder, ptr_hi, "");
 573                         val = si_llvm_emit_fetch_64bit(bld_base, type, val, val_hi);
 574                 }
 575
 576                 return val;
 577         } else {
 578                 struct tgsi_declaration_range range =
 579                         get_array_range(bld_base, file, reg_index, reg_indirect);
 580                 LLVMValueRef index =
 581                         emit_array_index(ctx, reg_indirect, reg_index - range.First);
 582                 LLVMValueRef array =
 583                         emit_array_fetch(bld_base, file, type, range, swizzle);
 584                 return LLVMBuildExtractElement(builder, array, index, "");
 585         }
 586 }
 587
 588 static void
 589 store_value_to_array(struct lp_build_tgsi_context *bld_base,
 590                      LLVMValueRef value,
 591                      unsigned file,
 592                      unsigned chan_index,
 593                      unsigned reg_index,
 594                      const struct tgsi_ind_register *reg_indirect)
 595 {
 596         struct si_shader_context *ctx = si_shader_context(bld_base);
 597         struct gallivm_state *gallivm = bld_base->base.gallivm;
 598         LLVMBuilderRef builder = gallivm->builder;
 599         LLVMValueRef ptr;
 600
 601         ptr = get_pointer_into_array(ctx, file, chan_index, reg_index, reg_indirect);
 602         if (ptr) {
 603                 LLVMBuildStore(builder, value, ptr);
 604         } else {
 605                 unsigned i, size;
 606                 struct tgsi_declaration_range range = get_array_range(bld_base, file, reg_index, reg_indirect);
 607                 LLVMValueRef index = emit_array_index(ctx, reg_indirect, reg_index - range.First);
 608                 LLVMValueRef array =
 609                         emit_array_fetch(bld_base, file, TGSI_TYPE_FLOAT, range, chan_index);
 610                 LLVMValueRef temp_ptr;
 611
 612                 array = LLVMBuildInsertElement(builder, array, value, index, "");
 613
 614                 size = range.Last - range.First + 1;
 615                 for (i = 0; i < size; ++i) {
 616                         switch(file) {
 617                         case TGSI_FILE_OUTPUT:
 618                                 temp_ptr = ctx->outputs[i + range.First][chan_index];
 619                                 break;
 620
 621                         case TGSI_FILE_TEMPORARY:
 622                                 if (range.First + i >= ctx->temps_count)
 623                                         continue;
 624                                 temp_ptr = ctx->temps[(i + range.First) * TGSI_NUM_CHANNELS + chan_index];
 625                                 break;
 626
 627                         default:
 628                                 continue;
 629                         }
 630                         value = LLVMBuildExtractElement(builder, array,
 631                                 lp_build_const_int32(gallivm, i), "");
 632                         LLVMBuildStore(builder, value, temp_ptr);
 633                 }
 634         }
 635 }
 636
 637 /* If this is true, preload FS inputs at the beginning of shaders. Otherwise,
 638  * reload them at each use. This must be true if the shader is using
 639  * derivatives and KILL, because KILL can leave the WQM and then a lazy
 640  * input load isn't in the WQM anymore.
 641  */
 642 static bool si_preload_fs_inputs(struct si_shader_context *ctx)
 643 {
 644         struct si_shader_selector *sel = ctx->shader->selector;
 645
 646         return sel->info.uses_derivatives &&
 647                sel->info.uses_kill;
 648 }
 649
 650 static LLVMValueRef
 651 get_output_ptr(struct lp_build_tgsi_context *bld_base, unsigned index,
 652                unsigned chan)
 653 {
 654         struct si_shader_context *ctx = si_shader_context(bld_base);
 655
 656         assert(index <= ctx->bld_base.info->file_max[TGSI_FILE_OUTPUT]);
 657         return ctx->outputs[index][chan];
 658 }
 659
 660 LLVMValueRef si_llvm_emit_fetch(struct lp_build_tgsi_context *bld_base,
 661                                 const struct tgsi_full_src_register *reg,
 662                                 enum tgsi_opcode_type type,
 663                                 unsigned swizzle)
 664 {
 665         struct si_shader_context *ctx = si_shader_context(bld_base);
 666         LLVMBuilderRef builder = bld_base->base.gallivm->builder;
 667         LLVMValueRef result = NULL, ptr, ptr2;
 668
 669         if (swizzle == ~0) {
 670                 LLVMValueRef values[TGSI_NUM_CHANNELS];
 671                 unsigned chan;
 672                 for (chan = 0; chan < TGSI_NUM_CHANNELS; chan++) {
 673                         values[chan] = si_llvm_emit_fetch(bld_base, reg, type, chan);
 674                 }
 675                 return lp_build_gather_values(bld_base->base.gallivm, values,
 676                                               TGSI_NUM_CHANNELS);
 677         }
 678
 679         if (reg->Register.Indirect) {
 680                 LLVMValueRef load = load_value_from_array(bld_base, reg->Register.File, type,
 681                                 swizzle, reg->Register.Index, &reg->Indirect);
 682                 return bitcast(bld_base, type, load);
 683         }
 684
 685         switch(reg->Register.File) {
 686         case TGSI_FILE_IMMEDIATE: {
 687                 LLVMTypeRef ctype = tgsi2llvmtype(bld_base, type);
 688                 if (tgsi_type_is_64bit(type)) {
 689                         result = LLVMGetUndef(LLVMVectorType(LLVMIntTypeInContext(bld_base->base.gallivm->context, 32), bld_base->base.type.length * 2));
 690                         result = LLVMConstInsertElement(result,
 691                                                         ctx->imms[reg->Register.Index * TGSI_NUM_CHANNELS + swizzle],
 692                                                         bld_base->int_bld.zero);
 693                         result = LLVMConstInsertElement(result,
 694                                                         ctx->imms[reg->Register.Index * TGSI_NUM_CHANNELS + swizzle + 1],
 695                                                         bld_base->int_bld.one);
 696                         return LLVMConstBitCast(result, ctype);
 697                 } else {
 698                         return LLVMConstBitCast(ctx->imms[reg->Register.Index * TGSI_NUM_CHANNELS + swizzle], ctype);
 699                 }
 700         }
 701
 702         case TGSI_FILE_INPUT: {
 703                 unsigned index = reg->Register.Index;
 704                 LLVMValueRef input[4];
 705
 706                 /* I don't think doing this for vertex shaders is beneficial.
 707                  * For those, we want to make sure the VMEM loads are executed
 708                  * only once. Fragment shaders don't care much, because
 709                  * v_interp instructions are much cheaper than VMEM loads.
 710                  */
 711                 if (!si_preload_fs_inputs(ctx) &&
 712                     ctx->bld_base.info->processor == PIPE_SHADER_FRAGMENT)
 713                         ctx->load_input(ctx, index, &ctx->input_decls[index], input);
 714                 else
 715                         memcpy(input, &ctx->inputs[index * 4], sizeof(input));
 716
 717                 result = input[swizzle];
 718
 719                 if (tgsi_type_is_64bit(type)) {
 720                         ptr = result;
 721                         ptr2 = input[swizzle + 1];
 722                         return si_llvm_emit_fetch_64bit(bld_base, type, ptr, ptr2);
 723                 }
 724                 break;
 725         }
 726
 727         case TGSI_FILE_TEMPORARY:
 728                 if (reg->Register.Index >= ctx->temps_count)
 729                         return LLVMGetUndef(tgsi2llvmtype(bld_base, type));
 730                 ptr = ctx->temps[reg->Register.Index * TGSI_NUM_CHANNELS + swizzle];
 731                 if (tgsi_type_is_64bit(type)) {
 732                         ptr2 = ctx->temps[reg->Register.Index * TGSI_NUM_CHANNELS + swizzle + 1];
 733                         return si_llvm_emit_fetch_64bit(bld_base, type,
 734                                                         LLVMBuildLoad(builder, ptr, ""),
 735                                                         LLVMBuildLoad(builder, ptr2, ""));
 736                 }
 737                 result = LLVMBuildLoad(builder, ptr, "");
 738                 break;
 739
 740         case TGSI_FILE_OUTPUT:
 741                 ptr = get_output_ptr(bld_base, reg->Register.Index, swizzle);
 742                 if (tgsi_type_is_64bit(type)) {
 743                         ptr2 = get_output_ptr(bld_base, reg->Register.Index, swizzle + 1);
 744                         return si_llvm_emit_fetch_64bit(bld_base, type,
 745                                                         LLVMBuildLoad(builder, ptr, ""),
 746                                                         LLVMBuildLoad(builder, ptr2, ""));
 747                 }
 748                 result = LLVMBuildLoad(builder, ptr, "");
 749                 break;
 750
 751         default:
 752                 return LLVMGetUndef(tgsi2llvmtype(bld_base, type));
 753         }
 754
 755         return bitcast(bld_base, type, result);
 756 }
 757
 758 static LLVMValueRef fetch_system_value(struct lp_build_tgsi_context *bld_base,
 759                                        const struct tgsi_full_src_register *reg,
 760                                        enum tgsi_opcode_type type,
 761                                        unsigned swizzle)
 762 {
 763         struct si_shader_context *ctx = si_shader_context(bld_base);
 764         struct gallivm_state *gallivm = bld_base->base.gallivm;
 765
 766         LLVMValueRef cval = ctx->system_values[reg->Register.Index];
 767         if (LLVMGetTypeKind(LLVMTypeOf(cval)) == LLVMVectorTypeKind) {
 768                 cval = LLVMBuildExtractElement(gallivm->builder, cval,
 769                                                lp_build_const_int32(gallivm, swizzle), "");
 770         }
 771         return bitcast(bld_base, type, cval);
 772 }
 773
 774 static void emit_declaration(struct lp_build_tgsi_context *bld_base,
 775                              const struct tgsi_full_declaration *decl)
 776 {
 777         struct si_shader_context *ctx = si_shader_context(bld_base);
 778         LLVMBuilderRef builder = bld_base->base.gallivm->builder;
 779         unsigned first, last, i;
 780         switch(decl->Declaration.File) {
 781         case TGSI_FILE_ADDRESS:
 782         {
 783                  unsigned idx;
 784                 for (idx = decl->Range.First; idx <= decl->Range.Last; idx++) {
 785                         unsigned chan;
 786                         for (chan = 0; chan < TGSI_NUM_CHANNELS; chan++) {
 787                                  ctx->addrs[idx][chan] = lp_build_alloca_undef(
 788                                         &ctx->gallivm,
 789                                         ctx->bld_base.uint_bld.elem_type, "");
 790                         }
 791                 }
 792                 break;
 793         }
 794
 795         case TGSI_FILE_TEMPORARY:
 796         {
 797                 char name[16] = "";
 798                 LLVMValueRef array_alloca = NULL;
 799                 unsigned decl_size;
 800                 unsigned writemask = decl->Declaration.UsageMask;
 801                 first = decl->Range.First;
 802                 last = decl->Range.Last;
 803                 decl_size = 4 * ((last - first) + 1);
 804
 805                 if (decl->Declaration.Array) {
 806                         unsigned id = decl->Array.ArrayID - 1;
 807                         unsigned array_size;
 808
 809                         writemask &= ctx->temp_arrays[id].writemask;
 810                         ctx->temp_arrays[id].writemask = writemask;
 811                         array_size = ((last - first) + 1) * util_bitcount(writemask);
 812
 813                         /* If the array has more than 16 elements, store it
 814                          * in memory using an alloca that spans the entire
 815                          * array.
 816                          *
 817                          * Otherwise, store each array element individually.
 818                          * We will then generate vectors (per-channel, up to
 819                          * <16 x float> if the usagemask is a single bit) for
 820                          * indirect addressing.
 821                          *
 822                          * Note that 16 is the number of vector elements that
 823                          * LLVM will store in a register, so theoretically an
 824                          * array with up to 4 * 16 = 64 elements could be
 825                          * handled this way, but whether that's a good idea
 826                          * depends on VGPR register pressure elsewhere.
 827                          *
 828                          * FIXME: We shouldn't need to have the non-alloca
 829                          * code path for arrays. LLVM should be smart enough to
 830                          * promote allocas into registers when profitable.
 831                          *
 832                          * LLVM 3.8 crashes with this.
 833                          */
 834                         if (HAVE_LLVM >= 0x0309 && array_size > 16) {
 835                                 array_alloca = LLVMBuildAlloca(builder,
 836                                         LLVMArrayType(bld_base->base.vec_type,
 837                                                       array_size), "array");
 838                                 ctx->temp_array_allocas[id] = array_alloca;
 839                         }
 840                 }
 841
 842                 if (!ctx->temps_count) {
 843                         ctx->temps_count = bld_base->info->file_max[TGSI_FILE_TEMPORARY] + 1;
 844                         ctx->temps = MALLOC(TGSI_NUM_CHANNELS * ctx->temps_count * sizeof(LLVMValueRef));
 845                 }
 846                 if (!array_alloca) {
 847                         for (i = 0; i < decl_size; ++i) {
 848 #ifdef DEBUG
 849                                 snprintf(name, sizeof(name), "TEMP%d.%c",
 850                                          first + i / 4, "xyzw"[i % 4]);
 851 #endif
 852                                 ctx->temps[first * TGSI_NUM_CHANNELS + i] =
 853                                         lp_build_alloca_undef(bld_base->base.gallivm,
 854                                                               bld_base->base.vec_type,
 855                                                               name);
 856                         }
 857                 } else {
 858                         LLVMValueRef idxs[2] = {
 859                                 bld_base->uint_bld.zero,
 860                                 NULL
 861                         };
 862                         unsigned j = 0;
 863
 864                         if (writemask != TGSI_WRITEMASK_XYZW &&
 865                             !ctx->undef_alloca) {
 866                                 /* Create a dummy alloca. We use it so that we
 867                                  * have a pointer that is safe to load from if
 868                                  * a shader ever reads from a channel that
 869                                  * it never writes to.
 870                                  */
 871                                 ctx->undef_alloca = lp_build_alloca_undef(
 872                                         bld_base->base.gallivm,
 873                                         bld_base->base.vec_type, "undef");
 874                         }
 875
 876                         for (i = 0; i < decl_size; ++i) {
 877                                 LLVMValueRef ptr;
 878                                 if (writemask & (1 << (i % 4))) {
 879 #ifdef DEBUG
 880                                         snprintf(name, sizeof(name), "TEMP%d.%c",
 881                                                  first + i / 4, "xyzw"[i % 4]);
 882 #endif
 883                                         idxs[1] = lp_build_const_int32(bld_base->base.gallivm, j);
 884                                         ptr = LLVMBuildGEP(builder, array_alloca, idxs, 2, name);
 885                                         j++;
 886                                 } else {
 887                                         ptr = ctx->undef_alloca;
 888                                 }
 889                                 ctx->temps[first * TGSI_NUM_CHANNELS + i] = ptr;
 890                         }
 891                 }
 892                 break;
 893         }
 894         case TGSI_FILE_INPUT:
 895         {
 896                 unsigned idx;
 897                 for (idx = decl->Range.First; idx <= decl->Range.Last; idx++) {
 898                         if (ctx->load_input &&
 899                             ctx->input_decls[idx].Declaration.File != TGSI_FILE_INPUT) {
 900                                 ctx->input_decls[idx] = *decl;
 901                                 ctx->input_decls[idx].Range.First = idx;
 902                                 ctx->input_decls[idx].Range.Last = idx;
 903                                 ctx->input_decls[idx].Semantic.Index += idx - decl->Range.First;
 904
 905                                 if (si_preload_fs_inputs(ctx) ||
 906                                     bld_base->info->processor != PIPE_SHADER_FRAGMENT)
 907                                         ctx->load_input(ctx, idx, &ctx->input_decls[idx],
 908                                                         &ctx->inputs[idx * 4]);
 909                         }
 910                 }
 911         }
 912         break;
 913
 914         case TGSI_FILE_SYSTEM_VALUE:
 915         {
 916                 unsigned idx;
 917                 for (idx = decl->Range.First; idx <= decl->Range.Last; idx++) {
 918                         ctx->load_system_value(ctx, idx, decl);
 919                 }
 920         }
 921         break;
 922
 923         case TGSI_FILE_OUTPUT:
 924         {
 925                 char name[16] = "";
 926                 unsigned idx;
 927                 for (idx = decl->Range.First; idx <= decl->Range.Last; idx++) {
 928                         unsigned chan;
 929                         assert(idx < RADEON_LLVM_MAX_OUTPUTS);
 930                         if (ctx->outputs[idx][0])
 931                                 continue;
 932                         for (chan = 0; chan < TGSI_NUM_CHANNELS; chan++) {
 933 #ifdef DEBUG
 934                                 snprintf(name, sizeof(name), "OUT%d.%c",
 935                                          idx, "xyzw"[chan % 4]);
 936 #endif
 937                                 ctx->outputs[idx][chan] = lp_build_alloca_undef(
 938                                         &ctx->gallivm,
 939                                         ctx->bld_base.base.elem_type, name);
 940                         }
 941                 }
 942                 break;
 943         }
 944
 945         case TGSI_FILE_MEMORY:
 946                 ctx->declare_memory_region(ctx, decl);
 947                 break;
 948
 949         default:
 950                 break;
 951         }
 952 }
 953
 954 void si_llvm_emit_store(struct lp_build_tgsi_context *bld_base,
 955                         const struct tgsi_full_instruction *inst,
 956                         const struct tgsi_opcode_info *info,
 957                         LLVMValueRef dst[4])
 958 {
 959         struct si_shader_context *ctx = si_shader_context(bld_base);
 960         struct gallivm_state *gallivm = ctx->bld_base.base.gallivm;
 961         const struct tgsi_full_dst_register *reg = &inst->Dst[0];
 962         LLVMBuilderRef builder = ctx->bld_base.base.gallivm->builder;
 963         LLVMValueRef temp_ptr, temp_ptr2 = NULL;
 964         unsigned chan, chan_index;
 965         bool is_vec_store = false;
 966         enum tgsi_opcode_type dtype = tgsi_opcode_infer_dst_type(inst->Instruction.Opcode);
 967
 968         if (dst[0]) {
 969                 LLVMTypeKind k = LLVMGetTypeKind(LLVMTypeOf(dst[0]));
 970                 is_vec_store = (k == LLVMVectorTypeKind);
 971         }
 972
 973         if (is_vec_store) {
 974                 LLVMValueRef values[4] = {};
 975                 TGSI_FOR_EACH_DST0_ENABLED_CHANNEL(inst, chan) {
 976                         LLVMValueRef index = lp_build_const_int32(gallivm, chan);
 977                         values[chan]  = LLVMBuildExtractElement(gallivm->builder,
 978                                                         dst[0], index, "");
 979                 }
 980                 bld_base->emit_store(bld_base, inst, info, values);
 981                 return;
 982         }
 983
 984         TGSI_FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
 985                 LLVMValueRef value = dst[chan_index];
 986
 987                 if (tgsi_type_is_64bit(dtype) && (chan_index == 1 || chan_index == 3))
 988                         continue;
 989                 if (inst->Instruction.Saturate)
 990                         value = ac_build_clamp(&ctx->ac, value);
 991
 992                 if (reg->Register.File == TGSI_FILE_ADDRESS) {
 993                         temp_ptr = ctx->addrs[reg->Register.Index][chan_index];
 994                         LLVMBuildStore(builder, value, temp_ptr);
 995                         continue;
 996                 }
 997
 998                 if (!tgsi_type_is_64bit(dtype))
 999                         value = bitcast(bld_base, TGSI_TYPE_FLOAT, value);
1000
1001                 if (reg->Register.Indirect) {
1002                         unsigned file = reg->Register.File;
1003                         unsigned reg_index = reg->Register.Index;
1004                         store_value_to_array(bld_base, value, file, chan_index,
1005                                              reg_index, &reg->Indirect);
1006                 } else {
1007                         switch(reg->Register.File) {
1008                         case TGSI_FILE_OUTPUT:
1009                                 temp_ptr = ctx->outputs[reg->Register.Index][chan_index];
1010                                 if (tgsi_type_is_64bit(dtype))
1011                                         temp_ptr2 = ctx->outputs[reg->Register.Index][chan_index + 1];
1012                                 break;
1013
1014                         case TGSI_FILE_TEMPORARY:
1015                         {
1016                                 if (reg->Register.Index >= ctx->temps_count)
1017                                         continue;
1018
1019                                 temp_ptr = ctx->temps[ TGSI_NUM_CHANNELS * reg->Register.Index + chan_index];
1020                                 if (tgsi_type_is_64bit(dtype))
1021                                         temp_ptr2 = ctx->temps[ TGSI_NUM_CHANNELS * reg->Register.Index + chan_index + 1];
1022
1023                                 break;
1024                         }
1025                         default:
1026                                 return;
1027                         }
1028                         if (!tgsi_type_is_64bit(dtype))
1029                                 LLVMBuildStore(builder, value, temp_ptr);
1030                         else {
1031                                 LLVMValueRef ptr = LLVMBuildBitCast(builder, value,
1032                                                                     LLVMVectorType(LLVMIntTypeInContext(bld_base->base.gallivm->context, 32), 2), "");
1033                                 LLVMValueRef val2;
1034                                 value = LLVMBuildExtractElement(builder, ptr,
1035                                                                 bld_base->uint_bld.zero, "");
1036                                 val2 = LLVMBuildExtractElement(builder, ptr,
1037                                                                 bld_base->uint_bld.one, "");
1038
1039                                 LLVMBuildStore(builder, bitcast(bld_base, TGSI_TYPE_FLOAT, value), temp_ptr);
1040                                 LLVMBuildStore(builder, bitcast(bld_base, TGSI_TYPE_FLOAT, val2), temp_ptr2);
1041                         }
1042                 }
1043         }
1044 }
1045
1046 static void set_basicblock_name(LLVMBasicBlockRef bb, const char *base, int pc)
1047 {
1048         char buf[32];
1049         /* Subtract 1 so that the number shown is that of the corresponding
1050          * opcode in the TGSI dump, e.g. an if block has the same suffix as
1051          * the instruction number of the corresponding TGSI IF.
1052          */
1053         snprintf(buf, sizeof(buf), "%s%d", base, pc - 1);
1054         LLVMSetValueName(LLVMBasicBlockAsValue(bb), buf);
1055 }
1056
1057 /* Append a basic block at the level of the parent flow.
1058  */
1059 static LLVMBasicBlockRef append_basic_block(struct si_shader_context *ctx,
1060                                             const char *name)
1061 {
1062         struct gallivm_state *gallivm = &ctx->gallivm;
1063
1064         assert(ctx->flow_depth >= 1);
1065
1066         if (ctx->flow_depth >= 2) {
1067                 struct si_llvm_flow *flow = &ctx->flow[ctx->flow_depth - 2];
1068
1069                 return LLVMInsertBasicBlockInContext(gallivm->context,
1070                                                      flow->next_block, name);
1071         }
1072
1073         return LLVMAppendBasicBlockInContext(gallivm->context, ctx->main_fn, name);
1074 }
1075
1076 /* Emit a branch to the given default target for the current block if
1077  * applicable -- that is, if the current block does not already contain a
1078  * branch from a break or continue.
1079  */
1080 static void emit_default_branch(LLVMBuilderRef builder, LLVMBasicBlockRef target)
1081 {
1082         if (!LLVMGetBasicBlockTerminator(LLVMGetInsertBlock(builder)))
1083                  LLVMBuildBr(builder, target);
1084 }
1085
1086 static void bgnloop_emit(const struct lp_build_tgsi_action *action,
1087                          struct lp_build_tgsi_context *bld_base,
1088                          struct lp_build_emit_data *emit_data)
1089 {
1090         struct si_shader_context *ctx = si_shader_context(bld_base);
1091         struct gallivm_state *gallivm = bld_base->base.gallivm;
1092         struct si_llvm_flow *flow = push_flow(ctx);
1093         flow->loop_entry_block = append_basic_block(ctx, "LOOP");
1094         flow->next_block = append_basic_block(ctx, "ENDLOOP");
1095         set_basicblock_name(flow->loop_entry_block, "loop", bld_base->pc);
1096         LLVMBuildBr(gallivm->builder, flow->loop_entry_block);
1097         LLVMPositionBuilderAtEnd(gallivm->builder, flow->loop_entry_block);
1098 }
1099
1100 static void brk_emit(const struct lp_build_tgsi_action *action,
1101                      struct lp_build_tgsi_context *bld_base,
1102                      struct lp_build_emit_data *emit_data)
1103 {
1104         struct si_shader_context *ctx = si_shader_context(bld_base);
1105         struct gallivm_state *gallivm = bld_base->base.gallivm;
1106         struct si_llvm_flow *flow = get_innermost_loop(ctx);
1107
1108         LLVMBuildBr(gallivm->builder, flow->next_block);
1109 }
1110
1111 static void cont_emit(const struct lp_build_tgsi_action *action,
1112                       struct lp_build_tgsi_context *bld_base,
1113                       struct lp_build_emit_data *emit_data)
1114 {
1115         struct si_shader_context *ctx = si_shader_context(bld_base);
1116         struct gallivm_state *gallivm = bld_base->base.gallivm;
1117         struct si_llvm_flow *flow = get_innermost_loop(ctx);
1118
1119         LLVMBuildBr(gallivm->builder, flow->loop_entry_block);
1120 }
1121
1122 static void else_emit(const struct lp_build_tgsi_action *action,
1123                       struct lp_build_tgsi_context *bld_base,
1124                       struct lp_build_emit_data *emit_data)
1125 {
1126         struct si_shader_context *ctx = si_shader_context(bld_base);
1127         struct gallivm_state *gallivm = bld_base->base.gallivm;
1128         struct si_llvm_flow *current_branch = get_current_flow(ctx);
1129         LLVMBasicBlockRef endif_block;
1130
1131         assert(!current_branch->loop_entry_block);
1132
1133         endif_block = append_basic_block(ctx, "ENDIF");
1134         emit_default_branch(gallivm->builder, endif_block);
1135
1136         LLVMPositionBuilderAtEnd(gallivm->builder, current_branch->next_block);
1137         set_basicblock_name(current_branch->next_block, "else", bld_base->pc);
1138
1139         current_branch->next_block = endif_block;
1140 }
1141
1142 static void endif_emit(const struct lp_build_tgsi_action *action,
1143                        struct lp_build_tgsi_context *bld_base,
1144                        struct lp_build_emit_data *emit_data)
1145 {
1146         struct si_shader_context *ctx = si_shader_context(bld_base);
1147         struct gallivm_state *gallivm = bld_base->base.gallivm;
1148         struct si_llvm_flow *current_branch = get_current_flow(ctx);
1149
1150         assert(!current_branch->loop_entry_block);
1151
1152         emit_default_branch(gallivm->builder, current_branch->next_block);
1153         LLVMPositionBuilderAtEnd(gallivm->builder, current_branch->next_block);
1154         set_basicblock_name(current_branch->next_block, "endif", bld_base->pc);
1155
1156         ctx->flow_depth--;
1157 }
1158
1159 static void endloop_emit(const struct lp_build_tgsi_action *action,
1160                          struct lp_build_tgsi_context *bld_base,
1161                          struct lp_build_emit_data *emit_data)
1162 {
1163         struct si_shader_context *ctx = si_shader_context(bld_base);
1164         struct gallivm_state *gallivm = bld_base->base.gallivm;
1165         struct si_llvm_flow *current_loop = get_current_flow(ctx);
1166
1167         assert(current_loop->loop_entry_block);
1168
1169         emit_default_branch(gallivm->builder, current_loop->loop_entry_block);
1170
1171         LLVMPositionBuilderAtEnd(gallivm->builder, current_loop->next_block);
1172         set_basicblock_name(current_loop->next_block, "endloop", bld_base->pc);
1173         ctx->flow_depth--;
1174 }
1175
1176 static void if_cond_emit(const struct lp_build_tgsi_action *action,
1177                          struct lp_build_tgsi_context *bld_base,
1178                          struct lp_build_emit_data *emit_data,
1179                          LLVMValueRef cond)
1180 {
1181         struct si_shader_context *ctx = si_shader_context(bld_base);
1182         struct gallivm_state *gallivm = bld_base->base.gallivm;
1183         struct si_llvm_flow *flow = push_flow(ctx);
1184         LLVMBasicBlockRef if_block;
1185
1186         if_block = append_basic_block(ctx, "IF");
1187         flow->next_block = append_basic_block(ctx, "ELSE");
1188         set_basicblock_name(if_block, "if", bld_base->pc);
1189         LLVMBuildCondBr(gallivm->builder, cond, if_block, flow->next_block);
1190         LLVMPositionBuilderAtEnd(gallivm->builder, if_block);
1191 }
1192
1193 static void if_emit(const struct lp_build_tgsi_action *action,
1194                     struct lp_build_tgsi_context *bld_base,
1195                     struct lp_build_emit_data *emit_data)
1196 {
1197         struct gallivm_state *gallivm = bld_base->base.gallivm;
1198         LLVMValueRef cond;
1199
1200         cond = LLVMBuildFCmp(gallivm->builder, LLVMRealUNE,
1201                         emit_data->args[0],
1202                         bld_base->base.zero, "");
1203
1204         if_cond_emit(action, bld_base, emit_data, cond);
1205 }
1206
1207 static void uif_emit(const struct lp_build_tgsi_action *action,
1208                      struct lp_build_tgsi_context *bld_base,
1209                      struct lp_build_emit_data *emit_data)
1210 {
1211         struct gallivm_state *gallivm = bld_base->base.gallivm;
1212         LLVMValueRef cond;
1213
1214         cond = LLVMBuildICmp(gallivm->builder, LLVMIntNE,
1215                 bitcast(bld_base, TGSI_TYPE_UNSIGNED, emit_data->args[0]),
1216                         bld_base->int_bld.zero, "");
1217
1218         if_cond_emit(action, bld_base, emit_data, cond);
1219 }
1220
1221 static void emit_immediate(struct lp_build_tgsi_context *bld_base,
1222                            const struct tgsi_full_immediate *imm)
1223 {
1224         unsigned i;
1225         struct si_shader_context *ctx = si_shader_context(bld_base);
1226
1227         for (i = 0; i < 4; ++i) {
1228                 ctx->imms[ctx->imms_num * TGSI_NUM_CHANNELS + i] =
1229                                 LLVMConstInt(bld_base->uint_bld.elem_type, imm->u[i].Uint, false   );
1230         }
1231
1232         ctx->imms_num++;
1233 }
1234
1235 void si_llvm_context_init(struct si_shader_context *ctx,
1236                           struct si_screen *sscreen,
1237                           struct si_shader *shader,
1238                           LLVMTargetMachineRef tm,
1239                           const struct tgsi_shader_info *info,
1240                           const struct tgsi_token *tokens)
1241 {
1242         struct lp_type type;
1243
1244         /* Initialize the gallivm object:
1245          * We are only using the module, context, and builder fields of this struct.
1246          * This should be enough for us to be able to pass our gallivm struct to the
1247          * helper functions in the gallivm module.
1248          */
1249         memset(ctx, 0, sizeof(*ctx));
1250         ctx->shader = shader;
1251         ctx->screen = sscreen;
1252         ctx->tm = tm;
1253         ctx->type = info ? info->processor : -1;
1254
1255         ctx->gallivm.context = LLVMContextCreate();
1256         ctx->gallivm.module = LLVMModuleCreateWithNameInContext("tgsi",
1257                                                 ctx->gallivm.context);
1258         LLVMSetTarget(ctx->gallivm.module, "amdgcn--");
1259
1260 #if HAVE_LLVM >= 0x0309
1261         LLVMTargetDataRef data_layout = LLVMCreateTargetDataLayout(tm);
1262         char *data_layout_str = LLVMCopyStringRepOfTargetData(data_layout);
1263         LLVMSetDataLayout(ctx->gallivm.module, data_layout_str);
1264         LLVMDisposeTargetData(data_layout);
1265         LLVMDisposeMessage(data_layout_str);
1266 #endif
1267
1268         bool unsafe_fpmath = (sscreen->b.debug_flags & DBG_UNSAFE_MATH) != 0;
1269         enum lp_float_mode float_mode =
1270                 unsafe_fpmath ? LP_FLOAT_MODE_UNSAFE_FP_MATH :
1271                                 LP_FLOAT_MODE_NO_SIGNED_ZEROS_FP_MATH;
1272
1273         ctx->gallivm.builder = lp_create_builder(ctx->gallivm.context,
1274                                                  float_mode);
1275
1276         ac_llvm_context_init(&ctx->ac, ctx->gallivm.context);
1277         ctx->ac.module = ctx->gallivm.module;
1278         ctx->ac.builder = ctx->gallivm.builder;
1279
1280         struct lp_build_tgsi_context *bld_base = &ctx->bld_base;
1281
1282         bld_base->info = info;
1283
1284         if (info && info->array_max[TGSI_FILE_TEMPORARY] > 0) {
1285                 int size = info->array_max[TGSI_FILE_TEMPORARY];
1286
1287                 ctx->temp_arrays = CALLOC(size, sizeof(ctx->temp_arrays[0]));
1288                 ctx->temp_array_allocas = CALLOC(size, sizeof(ctx->temp_array_allocas[0]));
1289
1290                 if (tokens)
1291                         tgsi_scan_arrays(tokens, TGSI_FILE_TEMPORARY, size,
1292                                          ctx->temp_arrays);
1293         }
1294
1295         if (info && info->file_max[TGSI_FILE_IMMEDIATE] >= 0) {
1296                 int size = info->file_max[TGSI_FILE_IMMEDIATE] + 1;
1297                 ctx->imms = MALLOC(size * TGSI_NUM_CHANNELS * sizeof(LLVMValueRef));
1298         }
1299
1300         type.floating = true;
1301         type.fixed = false;
1302         type.sign = true;
1303         type.norm = false;
1304         type.width = 32;
1305         type.length = 1;
1306
1307         lp_build_context_init(&bld_base->base, &ctx->gallivm, type);
1308         lp_build_context_init(&ctx->bld_base.uint_bld, &ctx->gallivm, lp_uint_type(type));
1309         lp_build_context_init(&ctx->bld_base.int_bld, &ctx->gallivm, lp_int_type(type));
1310         type.width *= 2;
1311         lp_build_context_init(&ctx->bld_base.dbl_bld, &ctx->gallivm, type);
1312         lp_build_context_init(&ctx->bld_base.uint64_bld, &ctx->gallivm, lp_uint_type(type));
1313         lp_build_context_init(&ctx->bld_base.int64_bld, &ctx->gallivm, lp_int_type(type));
1314
1315         bld_base->soa = 1;
1316         bld_base->emit_store = si_llvm_emit_store;
1317         bld_base->emit_swizzle = emit_swizzle;
1318         bld_base->emit_declaration = emit_declaration;
1319         bld_base->emit_immediate = emit_immediate;
1320
1321         bld_base->emit_fetch_funcs[TGSI_FILE_IMMEDIATE] = si_llvm_emit_fetch;
1322         bld_base->emit_fetch_funcs[TGSI_FILE_INPUT] = si_llvm_emit_fetch;
1323         bld_base->emit_fetch_funcs[TGSI_FILE_TEMPORARY] = si_llvm_emit_fetch;
1324         bld_base->emit_fetch_funcs[TGSI_FILE_OUTPUT] = si_llvm_emit_fetch;
1325         bld_base->emit_fetch_funcs[TGSI_FILE_SYSTEM_VALUE] = fetch_system_value;
1326
1327         /* metadata allowing 2.5 ULP */
1328         ctx->fpmath_md_kind = LLVMGetMDKindIDInContext(ctx->gallivm.context,
1329                                                        "fpmath", 6);
1330         LLVMValueRef arg = lp_build_const_float(&ctx->gallivm, 2.5);
1331         ctx->fpmath_md_2p5_ulp = LLVMMDNodeInContext(ctx->gallivm.context,
1332                                                      &arg, 1);
1333
1334         bld_base->op_actions[TGSI_OPCODE_BGNLOOP].emit = bgnloop_emit;
1335         bld_base->op_actions[TGSI_OPCODE_BRK].emit = brk_emit;
1336         bld_base->op_actions[TGSI_OPCODE_CONT].emit = cont_emit;
1337         bld_base->op_actions[TGSI_OPCODE_IF].emit = if_emit;
1338         bld_base->op_actions[TGSI_OPCODE_UIF].emit = uif_emit;
1339         bld_base->op_actions[TGSI_OPCODE_ELSE].emit = else_emit;
1340         bld_base->op_actions[TGSI_OPCODE_ENDIF].emit = endif_emit;
1341         bld_base->op_actions[TGSI_OPCODE_ENDLOOP].emit = endloop_emit;
1342
1343         si_shader_context_init_alu(&ctx->bld_base);
1344
1345         ctx->voidt = LLVMVoidTypeInContext(ctx->gallivm.context);
1346         ctx->i1 = LLVMInt1TypeInContext(ctx->gallivm.context);
1347         ctx->i8 = LLVMInt8TypeInContext(ctx->gallivm.context);
1348         ctx->i32 = LLVMInt32TypeInContext(ctx->gallivm.context);
1349         ctx->i64 = LLVMInt64TypeInContext(ctx->gallivm.context);
1350         ctx->i128 = LLVMIntTypeInContext(ctx->gallivm.context, 128);
1351         ctx->f32 = LLVMFloatTypeInContext(ctx->gallivm.context);
1352         ctx->v16i8 = LLVMVectorType(ctx->i8, 16);
1353         ctx->v2i32 = LLVMVectorType(ctx->i32, 2);
1354         ctx->v4i32 = LLVMVectorType(ctx->i32, 4);
1355         ctx->v4f32 = LLVMVectorType(ctx->f32, 4);
1356         ctx->v8i32 = LLVMVectorType(ctx->i32, 8);
1357
1358         ctx->i32_0 = LLVMConstInt(ctx->i32, 0, 0);
1359         ctx->i32_1 = LLVMConstInt(ctx->i32, 1, 0);
1360 }
1361
1362 void si_llvm_create_func(struct si_shader_context *ctx,
1363                          const char *name,
1364                          LLVMTypeRef *return_types, unsigned num_return_elems,
1365                          LLVMTypeRef *ParamTypes, unsigned ParamCount)
1366 {
1367         LLVMTypeRef main_fn_type, ret_type;
1368         LLVMBasicBlockRef main_fn_body;
1369
1370         if (num_return_elems)
1371                 ret_type = LLVMStructTypeInContext(ctx->gallivm.context,
1372                                                    return_types,
1373                                                    num_return_elems, true);
1374         else
1375                 ret_type = LLVMVoidTypeInContext(ctx->gallivm.context);
1376
1377         /* Setup the function */
1378         ctx->return_type = ret_type;
1379         main_fn_type = LLVMFunctionType(ret_type, ParamTypes, ParamCount, 0);
1380         ctx->main_fn = LLVMAddFunction(ctx->gallivm.module, name, main_fn_type);
1381         main_fn_body = LLVMAppendBasicBlockInContext(ctx->gallivm.context,
1382                         ctx->main_fn, "main_body");
1383         LLVMPositionBuilderAtEnd(ctx->gallivm.builder, main_fn_body);
1384 }
1385
1386 void si_llvm_finalize_module(struct si_shader_context *ctx,
1387                              bool run_verifier)
1388 {
1389         struct gallivm_state *gallivm = ctx->bld_base.base.gallivm;
1390         const char *triple = LLVMGetTarget(gallivm->module);
1391         LLVMTargetLibraryInfoRef target_library_info;
1392
1393         /* Create the pass manager */
1394         gallivm->passmgr = LLVMCreatePassManager();
1395
1396         target_library_info = gallivm_create_target_library_info(triple);
1397         LLVMAddTargetLibraryInfo(target_library_info, gallivm->passmgr);
1398
1399         if (run_verifier)
1400                 LLVMAddVerifierPass(gallivm->passmgr);
1401
1402         LLVMAddAlwaysInlinerPass(gallivm->passmgr);
1403
1404         /* This pass should eliminate all the load and store instructions */
1405         LLVMAddPromoteMemoryToRegisterPass(gallivm->passmgr);
1406
1407         /* Add some optimization passes */
1408         LLVMAddScalarReplAggregatesPass(gallivm->passmgr);
1409         LLVMAddLICMPass(gallivm->passmgr);
1410         LLVMAddAggressiveDCEPass(gallivm->passmgr);
1411         LLVMAddCFGSimplificationPass(gallivm->passmgr);
1412         LLVMAddInstructionCombiningPass(gallivm->passmgr);
1413
1414         /* Run the pass */
1415         LLVMRunPassManager(gallivm->passmgr, ctx->gallivm.module);
1416
1417         LLVMDisposeBuilder(gallivm->builder);
1418         LLVMDisposePassManager(gallivm->passmgr);
1419         gallivm_dispose_target_library_info(target_library_info);
1420 }
1421
1422 void si_llvm_dispose(struct si_shader_context *ctx)
1423 {
1424         LLVMDisposeModule(ctx->bld_base.base.gallivm->module);
1425         LLVMContextDispose(ctx->bld_base.base.gallivm->context);
1426         FREE(ctx->temp_arrays);
1427         ctx->temp_arrays = NULL;
1428         FREE(ctx->temp_array_allocas);
1429         ctx->temp_array_allocas = NULL;
1430         FREE(ctx->temps);
1431         ctx->temps = NULL;
1432         ctx->temps_count = 0;
1433         FREE(ctx->imms);
1434         ctx->imms = NULL;
1435         ctx->imms_num = 0;
1436         FREE(ctx->flow);
1437         ctx->flow = NULL;
1438         ctx->flow_depth_max = 0;
1439 }