src/gallium/drivers/radeonsi/si_shader_tgsi_setup.c

   1 /*
   2  * Copyright 2016 Advanced Micro Devices, Inc.
   3  *
   4  * Permission is hereby granted, free of charge, to any person obtaining a
   5  * copy of this software and associated documentation files (the "Software"),
   6  * to deal in the Software without restriction, including without limitation
   7  * on the rights to use, copy, modify, merge, publish, distribute, sub
   8  * license, and/or sell copies of the Software, and to permit persons to whom
   9  * the Software is furnished to do so, subject to the following conditions:
  10  *
  11  * The above copyright notice and this permission notice (including the next
  12  * paragraph) shall be included in all copies or substantial portions of the
  13  * Software.
  14  *
  15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  17  * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
  18  * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
  19  * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
  20  * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
  21  * USE OR OTHER DEALINGS IN THE SOFTWARE.
  22  */
  23
  24 #include "si_shader_internal.h"
  25 #include "si_pipe.h"
  26
  27 #include "gallivm/lp_bld_const.h"
  28 #include "gallivm/lp_bld_gather.h"
  29 #include "gallivm/lp_bld_flow.h"
  30 #include "gallivm/lp_bld_init.h"
  31 #include "gallivm/lp_bld_intr.h"
  32 #include "gallivm/lp_bld_misc.h"
  33 #include "gallivm/lp_bld_swizzle.h"
  34 #include "tgsi/tgsi_info.h"
  35 #include "tgsi/tgsi_parse.h"
  36 #include "util/u_math.h"
  37 #include "util/u_memory.h"
  38 #include "util/u_debug.h"
  39
  40 #include <stdio.h>
  41 #include <llvm-c/Transforms/IPO.h>
  42 #include <llvm-c/Transforms/Scalar.h>
  43 #include <llvm-c/Support.h>
  44
  45 /* Data for if/else/endif and bgnloop/endloop control flow structures.
  46  */
  47 struct si_llvm_flow {
  48         /* Loop exit or next part of if/else/endif. */
  49         LLVMBasicBlockRef next_block;
  50         LLVMBasicBlockRef loop_entry_block;
  51 };
  52
  53 #define CPU_STRING_LEN 30
  54 #define FS_STRING_LEN 30
  55 #define TRIPLE_STRING_LEN 7
  56
  57 /**
  58  * Shader types for the LLVM backend.
  59  */
  60 enum si_llvm_shader_type {
  61         RADEON_LLVM_SHADER_PS = 0,
  62         RADEON_LLVM_SHADER_VS = 1,
  63         RADEON_LLVM_SHADER_GS = 2,
  64         RADEON_LLVM_SHADER_CS = 3,
  65 };
  66
  67 enum si_llvm_calling_convention {
  68         RADEON_LLVM_AMDGPU_VS = 87,
  69         RADEON_LLVM_AMDGPU_GS = 88,
  70         RADEON_LLVM_AMDGPU_PS = 89,
  71         RADEON_LLVM_AMDGPU_CS = 90,
  72 };
  73
  74 void si_llvm_add_attribute(LLVMValueRef F, const char *name, int value)
  75 {
  76         char str[16];
  77
  78         snprintf(str, sizeof(str), "%i", value);
  79         LLVMAddTargetDependentFunctionAttr(F, name, str);
  80 }
  81
  82 /**
  83  * Set the shader type we want to compile
  84  *
  85  * @param type shader type to set
  86  */
  87 void si_llvm_shader_type(LLVMValueRef F, unsigned type)
  88 {
  89         enum si_llvm_shader_type llvm_type;
  90         enum si_llvm_calling_convention calling_conv;
  91
  92         switch (type) {
  93         case PIPE_SHADER_VERTEX:
  94         case PIPE_SHADER_TESS_CTRL:
  95         case PIPE_SHADER_TESS_EVAL:
  96                 llvm_type = RADEON_LLVM_SHADER_VS;
  97                 calling_conv = RADEON_LLVM_AMDGPU_VS;
  98                 break;
  99         case PIPE_SHADER_GEOMETRY:
 100                 llvm_type = RADEON_LLVM_SHADER_GS;
 101                 calling_conv = RADEON_LLVM_AMDGPU_GS;
 102                 break;
 103         case PIPE_SHADER_FRAGMENT:
 104                 llvm_type = RADEON_LLVM_SHADER_PS;
 105                 calling_conv = RADEON_LLVM_AMDGPU_PS;
 106                 break;
 107         case PIPE_SHADER_COMPUTE:
 108                 llvm_type = RADEON_LLVM_SHADER_CS;
 109                 calling_conv = RADEON_LLVM_AMDGPU_CS;
 110                 break;
 111         default:
 112                 unreachable("Unhandle shader type");
 113         }
 114
 115         if (HAVE_LLVM >= 0x309)
 116                 LLVMSetFunctionCallConv(F, calling_conv);
 117         else
 118                 si_llvm_add_attribute(F, "ShaderType", llvm_type);
 119 }
 120
 121 static void init_amdgpu_target()
 122 {
 123         gallivm_init_llvm_targets();
 124         LLVMInitializeAMDGPUTargetInfo();
 125         LLVMInitializeAMDGPUTarget();
 126         LLVMInitializeAMDGPUTargetMC();
 127         LLVMInitializeAMDGPUAsmPrinter();
 128
 129         if (HAVE_LLVM >= 0x0400) {
 130                 /*
 131                  * Workaround for bug in llvm 4.0 that causes image intrinsics
 132                  * to disappear.
 133                  * https://reviews.llvm.org/D26348
 134                  */
 135                 const char *argv[2] = {"mesa", "-simplifycfg-sink-common=false"};
 136                 LLVMParseCommandLineOptions(2, argv, NULL);
 137         }
 138 }
 139
 140 static once_flag init_amdgpu_target_once_flag = ONCE_FLAG_INIT;
 141
 142 LLVMTargetRef si_llvm_get_amdgpu_target(const char *triple)
 143 {
 144         LLVMTargetRef target = NULL;
 145         char *err_message = NULL;
 146
 147         call_once(&init_amdgpu_target_once_flag, init_amdgpu_target);
 148
 149         if (LLVMGetTargetFromTriple(triple, &target, &err_message)) {
 150                 fprintf(stderr, "Cannot find target for triple %s ", triple);
 151                 if (err_message) {
 152                         fprintf(stderr, "%s\n", err_message);
 153                 }
 154                 LLVMDisposeMessage(err_message);
 155                 return NULL;
 156         }
 157         return target;
 158 }
 159
 160 struct si_llvm_diagnostics {
 161         struct pipe_debug_callback *debug;
 162         unsigned retval;
 163 };
 164
 165 static void si_diagnostic_handler(LLVMDiagnosticInfoRef di, void *context)
 166 {
 167         struct si_llvm_diagnostics *diag = (struct si_llvm_diagnostics *)context;
 168         LLVMDiagnosticSeverity severity = LLVMGetDiagInfoSeverity(di);
 169         char *description = LLVMGetDiagInfoDescription(di);
 170         const char *severity_str = NULL;
 171
 172         switch (severity) {
 173         case LLVMDSError:
 174                 severity_str = "error";
 175                 break;
 176         case LLVMDSWarning:
 177                 severity_str = "warning";
 178                 break;
 179         case LLVMDSRemark:
 180                 severity_str = "remark";
 181                 break;
 182         case LLVMDSNote:
 183                 severity_str = "note";
 184                 break;
 185         default:
 186                 severity_str = "unknown";
 187         }
 188
 189         pipe_debug_message(diag->debug, SHADER_INFO,
 190                            "LLVM diagnostic (%s): %s", severity_str, description);
 191
 192         if (severity == LLVMDSError) {
 193                 diag->retval = 1;
 194                 fprintf(stderr,"LLVM triggered Diagnostic Handler: %s\n", description);
 195         }
 196
 197         LLVMDisposeMessage(description);
 198 }
 199
 200 /**
 201  * Compile an LLVM module to machine code.
 202  *
 203  * @returns 0 for success, 1 for failure
 204  */
 205 unsigned si_llvm_compile(LLVMModuleRef M, struct ac_shader_binary *binary,
 206                          LLVMTargetMachineRef tm,
 207                          struct pipe_debug_callback *debug)
 208 {
 209         struct si_llvm_diagnostics diag;
 210         char *err;
 211         LLVMContextRef llvm_ctx;
 212         LLVMMemoryBufferRef out_buffer;
 213         unsigned buffer_size;
 214         const char *buffer_data;
 215         LLVMBool mem_err;
 216
 217         diag.debug = debug;
 218         diag.retval = 0;
 219
 220         /* Setup Diagnostic Handler*/
 221         llvm_ctx = LLVMGetModuleContext(M);
 222
 223         LLVMContextSetDiagnosticHandler(llvm_ctx, si_diagnostic_handler, &diag);
 224
 225         /* Compile IR*/
 226         mem_err = LLVMTargetMachineEmitToMemoryBuffer(tm, M, LLVMObjectFile, &err,
 227                                                                  &out_buffer);
 228
 229         /* Process Errors/Warnings */
 230         if (mem_err) {
 231                 fprintf(stderr, "%s: %s", __FUNCTION__, err);
 232                 pipe_debug_message(debug, SHADER_INFO,
 233                                    "LLVM emit error: %s", err);
 234                 FREE(err);
 235                 diag.retval = 1;
 236                 goto out;
 237         }
 238
 239         /* Extract Shader Code*/
 240         buffer_size = LLVMGetBufferSize(out_buffer);
 241         buffer_data = LLVMGetBufferStart(out_buffer);
 242
 243         ac_elf_read(buffer_data, buffer_size, binary);
 244
 245         /* Clean up */
 246         LLVMDisposeMemoryBuffer(out_buffer);
 247
 248 out:
 249         if (diag.retval != 0)
 250                 pipe_debug_message(debug, SHADER_INFO, "LLVM compile failed");
 251         return diag.retval;
 252 }
 253
 254 LLVMTypeRef tgsi2llvmtype(struct lp_build_tgsi_context *bld_base,
 255                           enum tgsi_opcode_type type)
 256 {
 257         LLVMContextRef ctx = bld_base->base.gallivm->context;
 258
 259         switch (type) {
 260         case TGSI_TYPE_UNSIGNED:
 261         case TGSI_TYPE_SIGNED:
 262                 return LLVMInt32TypeInContext(ctx);
 263         case TGSI_TYPE_UNSIGNED64:
 264         case TGSI_TYPE_SIGNED64:
 265                 return LLVMInt64TypeInContext(ctx);
 266         case TGSI_TYPE_DOUBLE:
 267                 return LLVMDoubleTypeInContext(ctx);
 268         case TGSI_TYPE_UNTYPED:
 269         case TGSI_TYPE_FLOAT:
 270                 return LLVMFloatTypeInContext(ctx);
 271         default: break;
 272         }
 273         return 0;
 274 }
 275
 276 LLVMValueRef bitcast(struct lp_build_tgsi_context *bld_base,
 277                      enum tgsi_opcode_type type, LLVMValueRef value)
 278 {
 279         LLVMBuilderRef builder = bld_base->base.gallivm->builder;
 280         LLVMTypeRef dst_type = tgsi2llvmtype(bld_base, type);
 281
 282         if (dst_type)
 283                 return LLVMBuildBitCast(builder, value, dst_type, "");
 284         else
 285                 return value;
 286 }
 287
 288 /**
 289  * Return a value that is equal to the given i32 \p index if it lies in [0,num)
 290  * or an undefined value in the same interval otherwise.
 291  */
 292 LLVMValueRef si_llvm_bound_index(struct si_shader_context *ctx,
 293                                  LLVMValueRef index,
 294                                  unsigned num)
 295 {
 296         struct gallivm_state *gallivm = &ctx->gallivm;
 297         LLVMBuilderRef builder = gallivm->builder;
 298         LLVMValueRef c_max = LLVMConstInt(ctx->i32, num - 1, 0);
 299         LLVMValueRef cc;
 300
 301         if (util_is_power_of_two(num)) {
 302                 index = LLVMBuildAnd(builder, index, c_max, "");
 303         } else {
 304                 /* In theory, this MAX pattern should result in code that is
 305                  * as good as the bit-wise AND above.
 306                  *
 307                  * In practice, LLVM generates worse code (at the time of
 308                  * writing), because its value tracking is not strong enough.
 309                  */
 310                 cc = LLVMBuildICmp(builder, LLVMIntULE, index, c_max, "");
 311                 index = LLVMBuildSelect(builder, cc, index, c_max, "");
 312         }
 313
 314         return index;
 315 }
 316
 317 static struct si_llvm_flow *
 318 get_current_flow(struct si_shader_context *ctx)
 319 {
 320         if (ctx->flow_depth > 0)
 321                 return &ctx->flow[ctx->flow_depth - 1];
 322         return NULL;
 323 }
 324
 325 static struct si_llvm_flow *
 326 get_innermost_loop(struct si_shader_context *ctx)
 327 {
 328         for (unsigned i = ctx->flow_depth; i > 0; --i) {
 329                 if (ctx->flow[i - 1].loop_entry_block)
 330                         return &ctx->flow[i - 1];
 331         }
 332         return NULL;
 333 }
 334
 335 static struct si_llvm_flow *
 336 push_flow(struct si_shader_context *ctx)
 337 {
 338         struct si_llvm_flow *flow;
 339
 340         if (ctx->flow_depth >= ctx->flow_depth_max) {
 341                 unsigned new_max = MAX2(ctx->flow_depth << 1, RADEON_LLVM_INITIAL_CF_DEPTH);
 342                 ctx->flow = REALLOC(ctx->flow,
 343                                     ctx->flow_depth_max * sizeof(*ctx->flow),
 344                                     new_max * sizeof(*ctx->flow));
 345                 ctx->flow_depth_max = new_max;
 346         }
 347
 348         flow = &ctx->flow[ctx->flow_depth];
 349         ctx->flow_depth++;
 350
 351         flow->next_block = NULL;
 352         flow->loop_entry_block = NULL;
 353         return flow;
 354 }
 355
 356 static LLVMValueRef emit_swizzle(struct lp_build_tgsi_context *bld_base,
 357                                  LLVMValueRef value,
 358                                  unsigned swizzle_x,
 359                                  unsigned swizzle_y,
 360                                  unsigned swizzle_z,
 361                                  unsigned swizzle_w)
 362 {
 363         LLVMValueRef swizzles[4];
 364         LLVMTypeRef i32t =
 365                 LLVMInt32TypeInContext(bld_base->base.gallivm->context);
 366
 367         swizzles[0] = LLVMConstInt(i32t, swizzle_x, 0);
 368         swizzles[1] = LLVMConstInt(i32t, swizzle_y, 0);
 369         swizzles[2] = LLVMConstInt(i32t, swizzle_z, 0);
 370         swizzles[3] = LLVMConstInt(i32t, swizzle_w, 0);
 371
 372         return LLVMBuildShuffleVector(bld_base->base.gallivm->builder,
 373                                       value,
 374                                       LLVMGetUndef(LLVMTypeOf(value)),
 375                                       LLVMConstVector(swizzles, 4), "");
 376 }
 377
 378 /**
 379  * Return the description of the array covering the given temporary register
 380  * index.
 381  */
 382 static unsigned
 383 get_temp_array_id(struct lp_build_tgsi_context *bld_base,
 384                   unsigned reg_index,
 385                   const struct tgsi_ind_register *reg)
 386 {
 387         struct si_shader_context *ctx = si_shader_context(bld_base);
 388         unsigned num_arrays = ctx->bld_base.info->array_max[TGSI_FILE_TEMPORARY];
 389         unsigned i;
 390
 391         if (reg && reg->ArrayID > 0 && reg->ArrayID <= num_arrays)
 392                 return reg->ArrayID;
 393
 394         for (i = 0; i < num_arrays; i++) {
 395                 const struct tgsi_array_info *array = &ctx->temp_arrays[i];
 396
 397                 if (reg_index >= array->range.First && reg_index <= array->range.Last)
 398                         return i + 1;
 399         }
 400
 401         return 0;
 402 }
 403
 404 static struct tgsi_declaration_range
 405 get_array_range(struct lp_build_tgsi_context *bld_base,
 406                 unsigned File, unsigned reg_index,
 407                 const struct tgsi_ind_register *reg)
 408 {
 409         struct si_shader_context *ctx = si_shader_context(bld_base);
 410         struct tgsi_declaration_range range;
 411
 412         if (File == TGSI_FILE_TEMPORARY) {
 413                 unsigned array_id = get_temp_array_id(bld_base, reg_index, reg);
 414                 if (array_id)
 415                         return ctx->temp_arrays[array_id - 1].range;
 416         }
 417
 418         range.First = 0;
 419         range.Last = bld_base->info->file_max[File];
 420         return range;
 421 }
 422
 423 static LLVMValueRef
 424 emit_array_index(struct si_shader_context *ctx,
 425                  const struct tgsi_ind_register *reg,
 426                  unsigned offset)
 427 {
 428         struct gallivm_state *gallivm = ctx->bld_base.base.gallivm;
 429
 430         if (!reg) {
 431                 return LLVMConstInt(ctx->i32, offset, 0);
 432         }
 433         LLVMValueRef addr = LLVMBuildLoad(gallivm->builder, ctx->addrs[reg->Index][reg->Swizzle], "");
 434         return LLVMBuildAdd(gallivm->builder, addr, LLVMConstInt(ctx->i32, offset, 0), "");
 435 }
 436
 437 /**
 438  * For indirect registers, construct a pointer directly to the requested
 439  * element using getelementptr if possible.
 440  *
 441  * Returns NULL if the insertelement/extractelement fallback for array access
 442  * must be used.
 443  */
 444 static LLVMValueRef
 445 get_pointer_into_array(struct si_shader_context *ctx,
 446                        unsigned file,
 447                        unsigned swizzle,
 448                        unsigned reg_index,
 449                        const struct tgsi_ind_register *reg_indirect)
 450 {
 451         unsigned array_id;
 452         struct tgsi_array_info *array;
 453         struct gallivm_state *gallivm = ctx->bld_base.base.gallivm;
 454         LLVMBuilderRef builder = gallivm->builder;
 455         LLVMValueRef idxs[2];
 456         LLVMValueRef index;
 457         LLVMValueRef alloca;
 458
 459         if (file != TGSI_FILE_TEMPORARY)
 460                 return NULL;
 461
 462         array_id = get_temp_array_id(&ctx->bld_base, reg_index, reg_indirect);
 463         if (!array_id)
 464                 return NULL;
 465
 466         alloca = ctx->temp_array_allocas[array_id - 1];
 467         if (!alloca)
 468                 return NULL;
 469
 470         array = &ctx->temp_arrays[array_id - 1];
 471
 472         if (!(array->writemask & (1 << swizzle)))
 473                 return ctx->undef_alloca;
 474
 475         index = emit_array_index(ctx, reg_indirect,
 476                                  reg_index - ctx->temp_arrays[array_id - 1].range.First);
 477
 478         /* Ensure that the index is within a valid range, to guard against
 479          * VM faults and overwriting critical data (e.g. spilled resource
 480          * descriptors).
 481          *
 482          * TODO It should be possible to avoid the additional instructions
 483          * if LLVM is changed so that it guarantuees:
 484          * 1. the scratch space descriptor isolates the current wave (this
 485          *    could even save the scratch offset SGPR at the cost of an
 486          *    additional SALU instruction)
 487          * 2. the memory for allocas must be allocated at the _end_ of the
 488          *    scratch space (after spilled registers)
 489          */
 490         index = si_llvm_bound_index(ctx, index, array->range.Last - array->range.First + 1);
 491
 492         index = LLVMBuildMul(
 493                 builder, index,
 494                 LLVMConstInt(ctx->i32, util_bitcount(array->writemask), 0),
 495                 "");
 496         index = LLVMBuildAdd(
 497                 builder, index,
 498                 LLVMConstInt(ctx->i32,
 499                              util_bitcount(array->writemask & ((1 << swizzle) - 1)), 0),
 500                 "");
 501         idxs[0] = ctx->i32_0;
 502         idxs[1] = index;
 503         return LLVMBuildGEP(builder, alloca, idxs, 2, "");
 504 }
 505
 506 LLVMValueRef
 507 si_llvm_emit_fetch_64bit(struct lp_build_tgsi_context *bld_base,
 508                          enum tgsi_opcode_type type,
 509                          LLVMValueRef ptr,
 510                          LLVMValueRef ptr2)
 511 {
 512         LLVMBuilderRef builder = bld_base->base.gallivm->builder;
 513         LLVMValueRef result;
 514
 515         result = LLVMGetUndef(LLVMVectorType(LLVMIntTypeInContext(bld_base->base.gallivm->context, 32), bld_base->base.type.length * 2));
 516
 517         result = LLVMBuildInsertElement(builder,
 518                                         result,
 519                                         bitcast(bld_base, TGSI_TYPE_UNSIGNED, ptr),
 520                                         bld_base->int_bld.zero, "");
 521         result = LLVMBuildInsertElement(builder,
 522                                         result,
 523                                         bitcast(bld_base, TGSI_TYPE_UNSIGNED, ptr2),
 524                                         bld_base->int_bld.one, "");
 525         return bitcast(bld_base, type, result);
 526 }
 527
 528 static LLVMValueRef
 529 emit_array_fetch(struct lp_build_tgsi_context *bld_base,
 530                  unsigned File, enum tgsi_opcode_type type,
 531                  struct tgsi_declaration_range range,
 532                  unsigned swizzle)
 533 {
 534         struct si_shader_context *ctx = si_shader_context(bld_base);
 535
 536         LLVMBuilderRef builder = bld_base->base.gallivm->builder;
 537
 538         unsigned i, size = range.Last - range.First + 1;
 539         LLVMTypeRef vec = LLVMVectorType(tgsi2llvmtype(bld_base, type), size);
 540         LLVMValueRef result = LLVMGetUndef(vec);
 541
 542         struct tgsi_full_src_register tmp_reg = {};
 543         tmp_reg.Register.File = File;
 544
 545         for (i = 0; i < size; ++i) {
 546                 tmp_reg.Register.Index = i + range.First;
 547                 LLVMValueRef temp = si_llvm_emit_fetch(bld_base, &tmp_reg, type, swizzle);
 548                 result = LLVMBuildInsertElement(builder, result, temp,
 549                         LLVMConstInt(ctx->i32, i, 0), "array_vector");
 550         }
 551         return result;
 552 }
 553
 554 static LLVMValueRef
 555 load_value_from_array(struct lp_build_tgsi_context *bld_base,
 556                       unsigned file,
 557                       enum tgsi_opcode_type type,
 558                       unsigned swizzle,
 559                       unsigned reg_index,
 560                       const struct tgsi_ind_register *reg_indirect)
 561 {
 562         struct si_shader_context *ctx = si_shader_context(bld_base);
 563         struct gallivm_state *gallivm = bld_base->base.gallivm;
 564         LLVMBuilderRef builder = gallivm->builder;
 565         LLVMValueRef ptr;
 566
 567         ptr = get_pointer_into_array(ctx, file, swizzle, reg_index, reg_indirect);
 568         if (ptr) {
 569                 LLVMValueRef val = LLVMBuildLoad(builder, ptr, "");
 570                 if (tgsi_type_is_64bit(type)) {
 571                         LLVMValueRef ptr_hi, val_hi;
 572                         ptr_hi = LLVMBuildGEP(builder, ptr, &ctx->i32_1, 1, "");
 573                         val_hi = LLVMBuildLoad(builder, ptr_hi, "");
 574                         val = si_llvm_emit_fetch_64bit(bld_base, type, val, val_hi);
 575                 }
 576
 577                 return val;
 578         } else {
 579                 struct tgsi_declaration_range range =
 580                         get_array_range(bld_base, file, reg_index, reg_indirect);
 581                 LLVMValueRef index =
 582                         emit_array_index(ctx, reg_indirect, reg_index - range.First);
 583                 LLVMValueRef array =
 584                         emit_array_fetch(bld_base, file, type, range, swizzle);
 585                 return LLVMBuildExtractElement(builder, array, index, "");
 586         }
 587 }
 588
 589 static void
 590 store_value_to_array(struct lp_build_tgsi_context *bld_base,
 591                      LLVMValueRef value,
 592                      unsigned file,
 593                      unsigned chan_index,
 594                      unsigned reg_index,
 595                      const struct tgsi_ind_register *reg_indirect)
 596 {
 597         struct si_shader_context *ctx = si_shader_context(bld_base);
 598         struct gallivm_state *gallivm = bld_base->base.gallivm;
 599         LLVMBuilderRef builder = gallivm->builder;
 600         LLVMValueRef ptr;
 601
 602         ptr = get_pointer_into_array(ctx, file, chan_index, reg_index, reg_indirect);
 603         if (ptr) {
 604                 LLVMBuildStore(builder, value, ptr);
 605         } else {
 606                 unsigned i, size;
 607                 struct tgsi_declaration_range range = get_array_range(bld_base, file, reg_index, reg_indirect);
 608                 LLVMValueRef index = emit_array_index(ctx, reg_indirect, reg_index - range.First);
 609                 LLVMValueRef array =
 610                         emit_array_fetch(bld_base, file, TGSI_TYPE_FLOAT, range, chan_index);
 611                 LLVMValueRef temp_ptr;
 612
 613                 array = LLVMBuildInsertElement(builder, array, value, index, "");
 614
 615                 size = range.Last - range.First + 1;
 616                 for (i = 0; i < size; ++i) {
 617                         switch(file) {
 618                         case TGSI_FILE_OUTPUT:
 619                                 temp_ptr = ctx->outputs[i + range.First][chan_index];
 620                                 break;
 621
 622                         case TGSI_FILE_TEMPORARY:
 623                                 if (range.First + i >= ctx->temps_count)
 624                                         continue;
 625                                 temp_ptr = ctx->temps[(i + range.First) * TGSI_NUM_CHANNELS + chan_index];
 626                                 break;
 627
 628                         default:
 629                                 continue;
 630                         }
 631                         value = LLVMBuildExtractElement(builder, array,
 632                                 LLVMConstInt(ctx->i32, i, 0), "");
 633                         LLVMBuildStore(builder, value, temp_ptr);
 634                 }
 635         }
 636 }
 637
 638 /* If this is true, preload FS inputs at the beginning of shaders. Otherwise,
 639  * reload them at each use. This must be true if the shader is using
 640  * derivatives and KILL, because KILL can leave the WQM and then a lazy
 641  * input load isn't in the WQM anymore.
 642  */
 643 static bool si_preload_fs_inputs(struct si_shader_context *ctx)
 644 {
 645         struct si_shader_selector *sel = ctx->shader->selector;
 646
 647         return sel->info.uses_derivatives &&
 648                sel->info.uses_kill;
 649 }
 650
 651 static LLVMValueRef
 652 get_output_ptr(struct lp_build_tgsi_context *bld_base, unsigned index,
 653                unsigned chan)
 654 {
 655         struct si_shader_context *ctx = si_shader_context(bld_base);
 656
 657         assert(index <= ctx->bld_base.info->file_max[TGSI_FILE_OUTPUT]);
 658         return ctx->outputs[index][chan];
 659 }
 660
 661 LLVMValueRef si_llvm_emit_fetch(struct lp_build_tgsi_context *bld_base,
 662                                 const struct tgsi_full_src_register *reg,
 663                                 enum tgsi_opcode_type type,
 664                                 unsigned swizzle)
 665 {
 666         struct si_shader_context *ctx = si_shader_context(bld_base);
 667         LLVMBuilderRef builder = bld_base->base.gallivm->builder;
 668         LLVMValueRef result = NULL, ptr, ptr2;
 669
 670         if (swizzle == ~0) {
 671                 LLVMValueRef values[TGSI_NUM_CHANNELS];
 672                 unsigned chan;
 673                 for (chan = 0; chan < TGSI_NUM_CHANNELS; chan++) {
 674                         values[chan] = si_llvm_emit_fetch(bld_base, reg, type, chan);
 675                 }
 676                 return lp_build_gather_values(bld_base->base.gallivm, values,
 677                                               TGSI_NUM_CHANNELS);
 678         }
 679
 680         if (reg->Register.Indirect) {
 681                 LLVMValueRef load = load_value_from_array(bld_base, reg->Register.File, type,
 682                                 swizzle, reg->Register.Index, &reg->Indirect);
 683                 return bitcast(bld_base, type, load);
 684         }
 685
 686         switch(reg->Register.File) {
 687         case TGSI_FILE_IMMEDIATE: {
 688                 LLVMTypeRef ctype = tgsi2llvmtype(bld_base, type);
 689                 if (tgsi_type_is_64bit(type)) {
 690                         result = LLVMGetUndef(LLVMVectorType(ctx->i32, bld_base->base.type.length * 2));
 691                         result = LLVMConstInsertElement(result,
 692                                                         ctx->imms[reg->Register.Index * TGSI_NUM_CHANNELS + swizzle],
 693                                                         ctx->i32_0);
 694                         result = LLVMConstInsertElement(result,
 695                                                         ctx->imms[reg->Register.Index * TGSI_NUM_CHANNELS + swizzle + 1],
 696                                                         ctx->i32_1);
 697                         return LLVMConstBitCast(result, ctype);
 698                 } else {
 699                         return LLVMConstBitCast(ctx->imms[reg->Register.Index * TGSI_NUM_CHANNELS + swizzle], ctype);
 700                 }
 701         }
 702
 703         case TGSI_FILE_INPUT: {
 704                 unsigned index = reg->Register.Index;
 705                 LLVMValueRef input[4];
 706
 707                 /* I don't think doing this for vertex shaders is beneficial.
 708                  * For those, we want to make sure the VMEM loads are executed
 709                  * only once. Fragment shaders don't care much, because
 710                  * v_interp instructions are much cheaper than VMEM loads.
 711                  */
 712                 if (!si_preload_fs_inputs(ctx) &&
 713                     ctx->bld_base.info->processor == PIPE_SHADER_FRAGMENT)
 714                         ctx->load_input(ctx, index, &ctx->input_decls[index], input);
 715                 else
 716                         memcpy(input, &ctx->inputs[index * 4], sizeof(input));
 717
 718                 result = input[swizzle];
 719
 720                 if (tgsi_type_is_64bit(type)) {
 721                         ptr = result;
 722                         ptr2 = input[swizzle + 1];
 723                         return si_llvm_emit_fetch_64bit(bld_base, type, ptr, ptr2);
 724                 }
 725                 break;
 726         }
 727
 728         case TGSI_FILE_TEMPORARY:
 729                 if (reg->Register.Index >= ctx->temps_count)
 730                         return LLVMGetUndef(tgsi2llvmtype(bld_base, type));
 731                 ptr = ctx->temps[reg->Register.Index * TGSI_NUM_CHANNELS + swizzle];
 732                 if (tgsi_type_is_64bit(type)) {
 733                         ptr2 = ctx->temps[reg->Register.Index * TGSI_NUM_CHANNELS + swizzle + 1];
 734                         return si_llvm_emit_fetch_64bit(bld_base, type,
 735                                                         LLVMBuildLoad(builder, ptr, ""),
 736                                                         LLVMBuildLoad(builder, ptr2, ""));
 737                 }
 738                 result = LLVMBuildLoad(builder, ptr, "");
 739                 break;
 740
 741         case TGSI_FILE_OUTPUT:
 742                 ptr = get_output_ptr(bld_base, reg->Register.Index, swizzle);
 743                 if (tgsi_type_is_64bit(type)) {
 744                         ptr2 = get_output_ptr(bld_base, reg->Register.Index, swizzle + 1);
 745                         return si_llvm_emit_fetch_64bit(bld_base, type,
 746                                                         LLVMBuildLoad(builder, ptr, ""),
 747                                                         LLVMBuildLoad(builder, ptr2, ""));
 748                 }
 749                 result = LLVMBuildLoad(builder, ptr, "");
 750                 break;
 751
 752         default:
 753                 return LLVMGetUndef(tgsi2llvmtype(bld_base, type));
 754         }
 755
 756         return bitcast(bld_base, type, result);
 757 }
 758
 759 static LLVMValueRef fetch_system_value(struct lp_build_tgsi_context *bld_base,
 760                                        const struct tgsi_full_src_register *reg,
 761                                        enum tgsi_opcode_type type,
 762                                        unsigned swizzle)
 763 {
 764         struct si_shader_context *ctx = si_shader_context(bld_base);
 765         struct gallivm_state *gallivm = bld_base->base.gallivm;
 766
 767         LLVMValueRef cval = ctx->system_values[reg->Register.Index];
 768         if (LLVMGetTypeKind(LLVMTypeOf(cval)) == LLVMVectorTypeKind) {
 769                 cval = LLVMBuildExtractElement(gallivm->builder, cval,
 770                                                LLVMConstInt(ctx->i32, swizzle, 0), "");
 771         }
 772         return bitcast(bld_base, type, cval);
 773 }
 774
 775 static void emit_declaration(struct lp_build_tgsi_context *bld_base,
 776                              const struct tgsi_full_declaration *decl)
 777 {
 778         struct si_shader_context *ctx = si_shader_context(bld_base);
 779         LLVMBuilderRef builder = bld_base->base.gallivm->builder;
 780         unsigned first, last, i;
 781         switch(decl->Declaration.File) {
 782         case TGSI_FILE_ADDRESS:
 783         {
 784                  unsigned idx;
 785                 for (idx = decl->Range.First; idx <= decl->Range.Last; idx++) {
 786                         unsigned chan;
 787                         for (chan = 0; chan < TGSI_NUM_CHANNELS; chan++) {
 788                                  ctx->addrs[idx][chan] = lp_build_alloca_undef(
 789                                         &ctx->gallivm,
 790                                         ctx->i32, "");
 791                         }
 792                 }
 793                 break;
 794         }
 795
 796         case TGSI_FILE_TEMPORARY:
 797         {
 798                 char name[16] = "";
 799                 LLVMValueRef array_alloca = NULL;
 800                 unsigned decl_size;
 801                 unsigned writemask = decl->Declaration.UsageMask;
 802                 first = decl->Range.First;
 803                 last = decl->Range.Last;
 804                 decl_size = 4 * ((last - first) + 1);
 805
 806                 if (decl->Declaration.Array) {
 807                         unsigned id = decl->Array.ArrayID - 1;
 808                         unsigned array_size;
 809
 810                         writemask &= ctx->temp_arrays[id].writemask;
 811                         ctx->temp_arrays[id].writemask = writemask;
 812                         array_size = ((last - first) + 1) * util_bitcount(writemask);
 813
 814                         /* If the array has more than 16 elements, store it
 815                          * in memory using an alloca that spans the entire
 816                          * array.
 817                          *
 818                          * Otherwise, store each array element individually.
 819                          * We will then generate vectors (per-channel, up to
 820                          * <16 x float> if the usagemask is a single bit) for
 821                          * indirect addressing.
 822                          *
 823                          * Note that 16 is the number of vector elements that
 824                          * LLVM will store in a register, so theoretically an
 825                          * array with up to 4 * 16 = 64 elements could be
 826                          * handled this way, but whether that's a good idea
 827                          * depends on VGPR register pressure elsewhere.
 828                          *
 829                          * FIXME: We shouldn't need to have the non-alloca
 830                          * code path for arrays. LLVM should be smart enough to
 831                          * promote allocas into registers when profitable.
 832                          *
 833                          * LLVM 3.8 crashes with this.
 834                          */
 835                         if ((HAVE_LLVM >= 0x0309 && array_size > 16) ||
 836                             /* TODO: VGPR indexing is buggy on GFX9. */
 837                             ctx->screen->b.chip_class == GFX9) {
 838                                 array_alloca = LLVMBuildAlloca(builder,
 839                                         LLVMArrayType(ctx->f32,
 840                                                       array_size), "array");
 841                                 ctx->temp_array_allocas[id] = array_alloca;
 842                         }
 843                 }
 844
 845                 if (!ctx->temps_count) {
 846                         ctx->temps_count = bld_base->info->file_max[TGSI_FILE_TEMPORARY] + 1;
 847                         ctx->temps = MALLOC(TGSI_NUM_CHANNELS * ctx->temps_count * sizeof(LLVMValueRef));
 848                 }
 849                 if (!array_alloca) {
 850                         for (i = 0; i < decl_size; ++i) {
 851 #ifdef DEBUG
 852                                 snprintf(name, sizeof(name), "TEMP%d.%c",
 853                                          first + i / 4, "xyzw"[i % 4]);
 854 #endif
 855                                 ctx->temps[first * TGSI_NUM_CHANNELS + i] =
 856                                         lp_build_alloca_undef(bld_base->base.gallivm,
 857                                                               ctx->f32,
 858                                                               name);
 859                         }
 860                 } else {
 861                         LLVMValueRef idxs[2] = {
 862                                 ctx->i32_0,
 863                                 NULL
 864                         };
 865                         unsigned j = 0;
 866
 867                         if (writemask != TGSI_WRITEMASK_XYZW &&
 868                             !ctx->undef_alloca) {
 869                                 /* Create a dummy alloca. We use it so that we
 870                                  * have a pointer that is safe to load from if
 871                                  * a shader ever reads from a channel that
 872                                  * it never writes to.
 873                                  */
 874                                 ctx->undef_alloca = lp_build_alloca_undef(
 875                                         bld_base->base.gallivm,
 876                                         ctx->f32, "undef");
 877                         }
 878
 879                         for (i = 0; i < decl_size; ++i) {
 880                                 LLVMValueRef ptr;
 881                                 if (writemask & (1 << (i % 4))) {
 882 #ifdef DEBUG
 883                                         snprintf(name, sizeof(name), "TEMP%d.%c",
 884                                                  first + i / 4, "xyzw"[i % 4]);
 885 #endif
 886                                         idxs[1] = LLVMConstInt(ctx->i32, j, 0);
 887                                         ptr = LLVMBuildGEP(builder, array_alloca, idxs, 2, name);
 888                                         j++;
 889                                 } else {
 890                                         ptr = ctx->undef_alloca;
 891                                 }
 892                                 ctx->temps[first * TGSI_NUM_CHANNELS + i] = ptr;
 893                         }
 894                 }
 895                 break;
 896         }
 897         case TGSI_FILE_INPUT:
 898         {
 899                 unsigned idx;
 900                 for (idx = decl->Range.First; idx <= decl->Range.Last; idx++) {
 901                         if (ctx->load_input &&
 902                             ctx->input_decls[idx].Declaration.File != TGSI_FILE_INPUT) {
 903                                 ctx->input_decls[idx] = *decl;
 904                                 ctx->input_decls[idx].Range.First = idx;
 905                                 ctx->input_decls[idx].Range.Last = idx;
 906                                 ctx->input_decls[idx].Semantic.Index += idx - decl->Range.First;
 907
 908                                 if (si_preload_fs_inputs(ctx) ||
 909                                     bld_base->info->processor != PIPE_SHADER_FRAGMENT)
 910                                         ctx->load_input(ctx, idx, &ctx->input_decls[idx],
 911                                                         &ctx->inputs[idx * 4]);
 912                         }
 913                 }
 914         }
 915         break;
 916
 917         case TGSI_FILE_SYSTEM_VALUE:
 918         {
 919                 unsigned idx;
 920                 for (idx = decl->Range.First; idx <= decl->Range.Last; idx++) {
 921                         ctx->load_system_value(ctx, idx, decl);
 922                 }
 923         }
 924         break;
 925
 926         case TGSI_FILE_OUTPUT:
 927         {
 928                 char name[16] = "";
 929                 unsigned idx;
 930                 for (idx = decl->Range.First; idx <= decl->Range.Last; idx++) {
 931                         unsigned chan;
 932                         assert(idx < RADEON_LLVM_MAX_OUTPUTS);
 933                         if (ctx->outputs[idx][0])
 934                                 continue;
 935                         for (chan = 0; chan < TGSI_NUM_CHANNELS; chan++) {
 936 #ifdef DEBUG
 937                                 snprintf(name, sizeof(name), "OUT%d.%c",
 938                                          idx, "xyzw"[chan % 4]);
 939 #endif
 940                                 ctx->outputs[idx][chan] = lp_build_alloca_undef(
 941                                         &ctx->gallivm,
 942                                         ctx->f32, name);
 943                         }
 944                 }
 945                 break;
 946         }
 947
 948         case TGSI_FILE_MEMORY:
 949                 ctx->declare_memory_region(ctx, decl);
 950                 break;
 951
 952         default:
 953                 break;
 954         }
 955 }
 956
 957 void si_llvm_emit_store(struct lp_build_tgsi_context *bld_base,
 958                         const struct tgsi_full_instruction *inst,
 959                         const struct tgsi_opcode_info *info,
 960                         LLVMValueRef dst[4])
 961 {
 962         struct si_shader_context *ctx = si_shader_context(bld_base);
 963         struct gallivm_state *gallivm = ctx->bld_base.base.gallivm;
 964         const struct tgsi_full_dst_register *reg = &inst->Dst[0];
 965         LLVMBuilderRef builder = ctx->bld_base.base.gallivm->builder;
 966         LLVMValueRef temp_ptr, temp_ptr2 = NULL;
 967         unsigned chan, chan_index;
 968         bool is_vec_store = false;
 969         enum tgsi_opcode_type dtype = tgsi_opcode_infer_dst_type(inst->Instruction.Opcode);
 970
 971         if (dst[0]) {
 972                 LLVMTypeKind k = LLVMGetTypeKind(LLVMTypeOf(dst[0]));
 973                 is_vec_store = (k == LLVMVectorTypeKind);
 974         }
 975
 976         if (is_vec_store) {
 977                 LLVMValueRef values[4] = {};
 978                 TGSI_FOR_EACH_DST0_ENABLED_CHANNEL(inst, chan) {
 979                         LLVMValueRef index = LLVMConstInt(ctx->i32, chan, 0);
 980                         values[chan]  = LLVMBuildExtractElement(gallivm->builder,
 981                                                         dst[0], index, "");
 982                 }
 983                 bld_base->emit_store(bld_base, inst, info, values);
 984                 return;
 985         }
 986
 987         TGSI_FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
 988                 LLVMValueRef value = dst[chan_index];
 989
 990                 if (tgsi_type_is_64bit(dtype) && (chan_index == 1 || chan_index == 3))
 991                         continue;
 992                 if (inst->Instruction.Saturate)
 993                         value = ac_build_clamp(&ctx->ac, value);
 994
 995                 if (reg->Register.File == TGSI_FILE_ADDRESS) {
 996                         temp_ptr = ctx->addrs[reg->Register.Index][chan_index];
 997                         LLVMBuildStore(builder, value, temp_ptr);
 998                         continue;
 999                 }
1000
1001                 if (!tgsi_type_is_64bit(dtype))
1002                         value = bitcast(bld_base, TGSI_TYPE_FLOAT, value);
1003
1004                 if (reg->Register.Indirect) {
1005                         unsigned file = reg->Register.File;
1006                         unsigned reg_index = reg->Register.Index;
1007                         store_value_to_array(bld_base, value, file, chan_index,
1008                                              reg_index, &reg->Indirect);
1009                 } else {
1010                         switch(reg->Register.File) {
1011                         case TGSI_FILE_OUTPUT:
1012                                 temp_ptr = ctx->outputs[reg->Register.Index][chan_index];
1013                                 if (tgsi_type_is_64bit(dtype))
1014                                         temp_ptr2 = ctx->outputs[reg->Register.Index][chan_index + 1];
1015                                 break;
1016
1017                         case TGSI_FILE_TEMPORARY:
1018                         {
1019                                 if (reg->Register.Index >= ctx->temps_count)
1020                                         continue;
1021
1022                                 temp_ptr = ctx->temps[ TGSI_NUM_CHANNELS * reg->Register.Index + chan_index];
1023                                 if (tgsi_type_is_64bit(dtype))
1024                                         temp_ptr2 = ctx->temps[ TGSI_NUM_CHANNELS * reg->Register.Index + chan_index + 1];
1025
1026                                 break;
1027                         }
1028                         default:
1029                                 return;
1030                         }
1031                         if (!tgsi_type_is_64bit(dtype))
1032                                 LLVMBuildStore(builder, value, temp_ptr);
1033                         else {
1034                                 LLVMValueRef ptr = LLVMBuildBitCast(builder, value,
1035                                                                     LLVMVectorType(ctx->i32, 2), "");
1036                                 LLVMValueRef val2;
1037                                 value = LLVMBuildExtractElement(builder, ptr,
1038                                                                 ctx->i32_0, "");
1039                                 val2 = LLVMBuildExtractElement(builder, ptr,
1040                                                                ctx->i32_1, "");
1041
1042                                 LLVMBuildStore(builder, bitcast(bld_base, TGSI_TYPE_FLOAT, value), temp_ptr);
1043                                 LLVMBuildStore(builder, bitcast(bld_base, TGSI_TYPE_FLOAT, val2), temp_ptr2);
1044                         }
1045                 }
1046         }
1047 }
1048
1049 static void set_basicblock_name(LLVMBasicBlockRef bb, const char *base, int pc)
1050 {
1051         char buf[32];
1052         /* Subtract 1 so that the number shown is that of the corresponding
1053          * opcode in the TGSI dump, e.g. an if block has the same suffix as
1054          * the instruction number of the corresponding TGSI IF.
1055          */
1056         snprintf(buf, sizeof(buf), "%s%d", base, pc - 1);
1057         LLVMSetValueName(LLVMBasicBlockAsValue(bb), buf);
1058 }
1059
1060 /* Append a basic block at the level of the parent flow.
1061  */
1062 static LLVMBasicBlockRef append_basic_block(struct si_shader_context *ctx,
1063                                             const char *name)
1064 {
1065         struct gallivm_state *gallivm = &ctx->gallivm;
1066
1067         assert(ctx->flow_depth >= 1);
1068
1069         if (ctx->flow_depth >= 2) {
1070                 struct si_llvm_flow *flow = &ctx->flow[ctx->flow_depth - 2];
1071
1072                 return LLVMInsertBasicBlockInContext(gallivm->context,
1073                                                      flow->next_block, name);
1074         }
1075
1076         return LLVMAppendBasicBlockInContext(gallivm->context, ctx->main_fn, name);
1077 }
1078
1079 /* Emit a branch to the given default target for the current block if
1080  * applicable -- that is, if the current block does not already contain a
1081  * branch from a break or continue.
1082  */
1083 static void emit_default_branch(LLVMBuilderRef builder, LLVMBasicBlockRef target)
1084 {
1085         if (!LLVMGetBasicBlockTerminator(LLVMGetInsertBlock(builder)))
1086                  LLVMBuildBr(builder, target);
1087 }
1088
1089 static void bgnloop_emit(const struct lp_build_tgsi_action *action,
1090                          struct lp_build_tgsi_context *bld_base,
1091                          struct lp_build_emit_data *emit_data)
1092 {
1093         struct si_shader_context *ctx = si_shader_context(bld_base);
1094         struct gallivm_state *gallivm = bld_base->base.gallivm;
1095         struct si_llvm_flow *flow = push_flow(ctx);
1096         flow->loop_entry_block = append_basic_block(ctx, "LOOP");
1097         flow->next_block = append_basic_block(ctx, "ENDLOOP");
1098         set_basicblock_name(flow->loop_entry_block, "loop", bld_base->pc);
1099         LLVMBuildBr(gallivm->builder, flow->loop_entry_block);
1100         LLVMPositionBuilderAtEnd(gallivm->builder, flow->loop_entry_block);
1101 }
1102
1103 static void brk_emit(const struct lp_build_tgsi_action *action,
1104                      struct lp_build_tgsi_context *bld_base,
1105                      struct lp_build_emit_data *emit_data)
1106 {
1107         struct si_shader_context *ctx = si_shader_context(bld_base);
1108         struct gallivm_state *gallivm = bld_base->base.gallivm;
1109         struct si_llvm_flow *flow = get_innermost_loop(ctx);
1110
1111         LLVMBuildBr(gallivm->builder, flow->next_block);
1112 }
1113
1114 static void cont_emit(const struct lp_build_tgsi_action *action,
1115                       struct lp_build_tgsi_context *bld_base,
1116                       struct lp_build_emit_data *emit_data)
1117 {
1118         struct si_shader_context *ctx = si_shader_context(bld_base);
1119         struct gallivm_state *gallivm = bld_base->base.gallivm;
1120         struct si_llvm_flow *flow = get_innermost_loop(ctx);
1121
1122         LLVMBuildBr(gallivm->builder, flow->loop_entry_block);
1123 }
1124
1125 static void else_emit(const struct lp_build_tgsi_action *action,
1126                       struct lp_build_tgsi_context *bld_base,
1127                       struct lp_build_emit_data *emit_data)
1128 {
1129         struct si_shader_context *ctx = si_shader_context(bld_base);
1130         struct gallivm_state *gallivm = bld_base->base.gallivm;
1131         struct si_llvm_flow *current_branch = get_current_flow(ctx);
1132         LLVMBasicBlockRef endif_block;
1133
1134         assert(!current_branch->loop_entry_block);
1135
1136         endif_block = append_basic_block(ctx, "ENDIF");
1137         emit_default_branch(gallivm->builder, endif_block);
1138
1139         LLVMPositionBuilderAtEnd(gallivm->builder, current_branch->next_block);
1140         set_basicblock_name(current_branch->next_block, "else", bld_base->pc);
1141
1142         current_branch->next_block = endif_block;
1143 }
1144
1145 static void endif_emit(const struct lp_build_tgsi_action *action,
1146                        struct lp_build_tgsi_context *bld_base,
1147                        struct lp_build_emit_data *emit_data)
1148 {
1149         struct si_shader_context *ctx = si_shader_context(bld_base);
1150         struct gallivm_state *gallivm = bld_base->base.gallivm;
1151         struct si_llvm_flow *current_branch = get_current_flow(ctx);
1152
1153         assert(!current_branch->loop_entry_block);
1154
1155         emit_default_branch(gallivm->builder, current_branch->next_block);
1156         LLVMPositionBuilderAtEnd(gallivm->builder, current_branch->next_block);
1157         set_basicblock_name(current_branch->next_block, "endif", bld_base->pc);
1158
1159         ctx->flow_depth--;
1160 }
1161
1162 static void endloop_emit(const struct lp_build_tgsi_action *action,
1163                          struct lp_build_tgsi_context *bld_base,
1164                          struct lp_build_emit_data *emit_data)
1165 {
1166         struct si_shader_context *ctx = si_shader_context(bld_base);
1167         struct gallivm_state *gallivm = bld_base->base.gallivm;
1168         struct si_llvm_flow *current_loop = get_current_flow(ctx);
1169
1170         assert(current_loop->loop_entry_block);
1171
1172         emit_default_branch(gallivm->builder, current_loop->loop_entry_block);
1173
1174         LLVMPositionBuilderAtEnd(gallivm->builder, current_loop->next_block);
1175         set_basicblock_name(current_loop->next_block, "endloop", bld_base->pc);
1176         ctx->flow_depth--;
1177 }
1178
1179 static void if_cond_emit(const struct lp_build_tgsi_action *action,
1180                          struct lp_build_tgsi_context *bld_base,
1181                          struct lp_build_emit_data *emit_data,
1182                          LLVMValueRef cond)
1183 {
1184         struct si_shader_context *ctx = si_shader_context(bld_base);
1185         struct gallivm_state *gallivm = bld_base->base.gallivm;
1186         struct si_llvm_flow *flow = push_flow(ctx);
1187         LLVMBasicBlockRef if_block;
1188
1189         if_block = append_basic_block(ctx, "IF");
1190         flow->next_block = append_basic_block(ctx, "ELSE");
1191         set_basicblock_name(if_block, "if", bld_base->pc);
1192         LLVMBuildCondBr(gallivm->builder, cond, if_block, flow->next_block);
1193         LLVMPositionBuilderAtEnd(gallivm->builder, if_block);
1194 }
1195
1196 static void if_emit(const struct lp_build_tgsi_action *action,
1197                     struct lp_build_tgsi_context *bld_base,
1198                     struct lp_build_emit_data *emit_data)
1199 {
1200         struct gallivm_state *gallivm = bld_base->base.gallivm;
1201         LLVMValueRef cond;
1202
1203         cond = LLVMBuildFCmp(gallivm->builder, LLVMRealUNE,
1204                         emit_data->args[0],
1205                         bld_base->base.zero, "");
1206
1207         if_cond_emit(action, bld_base, emit_data, cond);
1208 }
1209
1210 static void uif_emit(const struct lp_build_tgsi_action *action,
1211                      struct lp_build_tgsi_context *bld_base,
1212                      struct lp_build_emit_data *emit_data)
1213 {
1214         struct gallivm_state *gallivm = bld_base->base.gallivm;
1215         LLVMValueRef cond;
1216
1217         cond = LLVMBuildICmp(gallivm->builder, LLVMIntNE,
1218                 bitcast(bld_base, TGSI_TYPE_UNSIGNED, emit_data->args[0]),
1219                         bld_base->int_bld.zero, "");
1220
1221         if_cond_emit(action, bld_base, emit_data, cond);
1222 }
1223
1224 static void emit_immediate(struct lp_build_tgsi_context *bld_base,
1225                            const struct tgsi_full_immediate *imm)
1226 {
1227         unsigned i;
1228         struct si_shader_context *ctx = si_shader_context(bld_base);
1229
1230         for (i = 0; i < 4; ++i) {
1231                 ctx->imms[ctx->imms_num * TGSI_NUM_CHANNELS + i] =
1232                                 LLVMConstInt(ctx->i32, imm->u[i].Uint, false   );
1233         }
1234
1235         ctx->imms_num++;
1236 }
1237
1238 void si_llvm_context_init(struct si_shader_context *ctx,
1239                           struct si_screen *sscreen,
1240                           struct si_shader *shader,
1241                           LLVMTargetMachineRef tm,
1242                           const struct tgsi_shader_info *info,
1243                           const struct tgsi_token *tokens)
1244 {
1245         struct lp_type type;
1246
1247         /* Initialize the gallivm object:
1248          * We are only using the module, context, and builder fields of this struct.
1249          * This should be enough for us to be able to pass our gallivm struct to the
1250          * helper functions in the gallivm module.
1251          */
1252         memset(ctx, 0, sizeof(*ctx));
1253         ctx->shader = shader;
1254         ctx->screen = sscreen;
1255         ctx->tm = tm;
1256         ctx->type = info ? info->processor : -1;
1257
1258         ctx->gallivm.context = LLVMContextCreate();
1259         ctx->gallivm.module = LLVMModuleCreateWithNameInContext("tgsi",
1260                                                 ctx->gallivm.context);
1261         LLVMSetTarget(ctx->gallivm.module, "amdgcn--");
1262
1263 #if HAVE_LLVM >= 0x0309
1264         LLVMTargetDataRef data_layout = LLVMCreateTargetDataLayout(tm);
1265         char *data_layout_str = LLVMCopyStringRepOfTargetData(data_layout);
1266         LLVMSetDataLayout(ctx->gallivm.module, data_layout_str);
1267         LLVMDisposeTargetData(data_layout);
1268         LLVMDisposeMessage(data_layout_str);
1269 #endif
1270
1271         bool unsafe_fpmath = (sscreen->b.debug_flags & DBG_UNSAFE_MATH) != 0;
1272         enum lp_float_mode float_mode =
1273                 unsafe_fpmath ? LP_FLOAT_MODE_UNSAFE_FP_MATH :
1274                                 LP_FLOAT_MODE_NO_SIGNED_ZEROS_FP_MATH;
1275
1276         ctx->gallivm.builder = lp_create_builder(ctx->gallivm.context,
1277                                                  float_mode);
1278
1279         ac_llvm_context_init(&ctx->ac, ctx->gallivm.context);
1280         ctx->ac.module = ctx->gallivm.module;
1281         ctx->ac.builder = ctx->gallivm.builder;
1282
1283         struct lp_build_tgsi_context *bld_base = &ctx->bld_base;
1284
1285         bld_base->info = info;
1286
1287         if (info && info->array_max[TGSI_FILE_TEMPORARY] > 0) {
1288                 int size = info->array_max[TGSI_FILE_TEMPORARY];
1289
1290                 ctx->temp_arrays = CALLOC(size, sizeof(ctx->temp_arrays[0]));
1291                 ctx->temp_array_allocas = CALLOC(size, sizeof(ctx->temp_array_allocas[0]));
1292
1293                 if (tokens)
1294                         tgsi_scan_arrays(tokens, TGSI_FILE_TEMPORARY, size,
1295                                          ctx->temp_arrays);
1296         }
1297
1298         if (info && info->file_max[TGSI_FILE_IMMEDIATE] >= 0) {
1299                 int size = info->file_max[TGSI_FILE_IMMEDIATE] + 1;
1300                 ctx->imms = MALLOC(size * TGSI_NUM_CHANNELS * sizeof(LLVMValueRef));
1301         }
1302
1303         type.floating = true;
1304         type.fixed = false;
1305         type.sign = true;
1306         type.norm = false;
1307         type.width = 32;
1308         type.length = 1;
1309
1310         lp_build_context_init(&bld_base->base, &ctx->gallivm, type);
1311         lp_build_context_init(&ctx->bld_base.uint_bld, &ctx->gallivm, lp_uint_type(type));
1312         lp_build_context_init(&ctx->bld_base.int_bld, &ctx->gallivm, lp_int_type(type));
1313         type.width *= 2;
1314         lp_build_context_init(&ctx->bld_base.dbl_bld, &ctx->gallivm, type);
1315         lp_build_context_init(&ctx->bld_base.uint64_bld, &ctx->gallivm, lp_uint_type(type));
1316         lp_build_context_init(&ctx->bld_base.int64_bld, &ctx->gallivm, lp_int_type(type));
1317
1318         bld_base->soa = 1;
1319         bld_base->emit_store = si_llvm_emit_store;
1320         bld_base->emit_swizzle = emit_swizzle;
1321         bld_base->emit_declaration = emit_declaration;
1322         bld_base->emit_immediate = emit_immediate;
1323
1324         bld_base->emit_fetch_funcs[TGSI_FILE_IMMEDIATE] = si_llvm_emit_fetch;
1325         bld_base->emit_fetch_funcs[TGSI_FILE_INPUT] = si_llvm_emit_fetch;
1326         bld_base->emit_fetch_funcs[TGSI_FILE_TEMPORARY] = si_llvm_emit_fetch;
1327         bld_base->emit_fetch_funcs[TGSI_FILE_OUTPUT] = si_llvm_emit_fetch;
1328         bld_base->emit_fetch_funcs[TGSI_FILE_SYSTEM_VALUE] = fetch_system_value;
1329
1330         /* metadata allowing 2.5 ULP */
1331         ctx->fpmath_md_kind = LLVMGetMDKindIDInContext(ctx->gallivm.context,
1332                                                        "fpmath", 6);
1333         LLVMValueRef arg = lp_build_const_float(&ctx->gallivm, 2.5);
1334         ctx->fpmath_md_2p5_ulp = LLVMMDNodeInContext(ctx->gallivm.context,
1335                                                      &arg, 1);
1336
1337         bld_base->op_actions[TGSI_OPCODE_BGNLOOP].emit = bgnloop_emit;
1338         bld_base->op_actions[TGSI_OPCODE_BRK].emit = brk_emit;
1339         bld_base->op_actions[TGSI_OPCODE_CONT].emit = cont_emit;
1340         bld_base->op_actions[TGSI_OPCODE_IF].emit = if_emit;
1341         bld_base->op_actions[TGSI_OPCODE_UIF].emit = uif_emit;
1342         bld_base->op_actions[TGSI_OPCODE_ELSE].emit = else_emit;
1343         bld_base->op_actions[TGSI_OPCODE_ENDIF].emit = endif_emit;
1344         bld_base->op_actions[TGSI_OPCODE_ENDLOOP].emit = endloop_emit;
1345
1346         si_shader_context_init_alu(&ctx->bld_base);
1347
1348         ctx->voidt = LLVMVoidTypeInContext(ctx->gallivm.context);
1349         ctx->i1 = LLVMInt1TypeInContext(ctx->gallivm.context);
1350         ctx->i8 = LLVMInt8TypeInContext(ctx->gallivm.context);
1351         ctx->i32 = LLVMInt32TypeInContext(ctx->gallivm.context);
1352         ctx->i64 = LLVMInt64TypeInContext(ctx->gallivm.context);
1353         ctx->i128 = LLVMIntTypeInContext(ctx->gallivm.context, 128);
1354         ctx->f32 = LLVMFloatTypeInContext(ctx->gallivm.context);
1355         ctx->v16i8 = LLVMVectorType(ctx->i8, 16);
1356         ctx->v2i32 = LLVMVectorType(ctx->i32, 2);
1357         ctx->v4i32 = LLVMVectorType(ctx->i32, 4);
1358         ctx->v4f32 = LLVMVectorType(ctx->f32, 4);
1359         ctx->v8i32 = LLVMVectorType(ctx->i32, 8);
1360
1361         ctx->i32_0 = LLVMConstInt(ctx->i32, 0, 0);
1362         ctx->i32_1 = LLVMConstInt(ctx->i32, 1, 0);
1363 }
1364
1365 void si_llvm_create_func(struct si_shader_context *ctx,
1366                          const char *name,
1367                          LLVMTypeRef *return_types, unsigned num_return_elems,
1368                          LLVMTypeRef *ParamTypes, unsigned ParamCount)
1369 {
1370         LLVMTypeRef main_fn_type, ret_type;
1371         LLVMBasicBlockRef main_fn_body;
1372
1373         if (num_return_elems)
1374                 ret_type = LLVMStructTypeInContext(ctx->gallivm.context,
1375                                                    return_types,
1376                                                    num_return_elems, true);
1377         else
1378                 ret_type = LLVMVoidTypeInContext(ctx->gallivm.context);
1379
1380         /* Setup the function */
1381         ctx->return_type = ret_type;
1382         main_fn_type = LLVMFunctionType(ret_type, ParamTypes, ParamCount, 0);
1383         ctx->main_fn = LLVMAddFunction(ctx->gallivm.module, name, main_fn_type);
1384         main_fn_body = LLVMAppendBasicBlockInContext(ctx->gallivm.context,
1385                         ctx->main_fn, "main_body");
1386         LLVMPositionBuilderAtEnd(ctx->gallivm.builder, main_fn_body);
1387 }
1388
1389 void si_llvm_finalize_module(struct si_shader_context *ctx,
1390                              bool run_verifier)
1391 {
1392         struct gallivm_state *gallivm = ctx->bld_base.base.gallivm;
1393         const char *triple = LLVMGetTarget(gallivm->module);
1394         LLVMTargetLibraryInfoRef target_library_info;
1395
1396         /* Create the pass manager */
1397         gallivm->passmgr = LLVMCreatePassManager();
1398
1399         target_library_info = gallivm_create_target_library_info(triple);
1400         LLVMAddTargetLibraryInfo(target_library_info, gallivm->passmgr);
1401
1402         if (run_verifier)
1403                 LLVMAddVerifierPass(gallivm->passmgr);
1404
1405         LLVMAddAlwaysInlinerPass(gallivm->passmgr);
1406
1407         /* This pass should eliminate all the load and store instructions */
1408         LLVMAddPromoteMemoryToRegisterPass(gallivm->passmgr);
1409
1410         /* Add some optimization passes */
1411         LLVMAddScalarReplAggregatesPass(gallivm->passmgr);
1412         LLVMAddLICMPass(gallivm->passmgr);
1413         LLVMAddAggressiveDCEPass(gallivm->passmgr);
1414         LLVMAddCFGSimplificationPass(gallivm->passmgr);
1415         LLVMAddInstructionCombiningPass(gallivm->passmgr);
1416
1417         /* Run the pass */
1418         LLVMRunPassManager(gallivm->passmgr, ctx->gallivm.module);
1419
1420         LLVMDisposeBuilder(gallivm->builder);
1421         LLVMDisposePassManager(gallivm->passmgr);
1422         gallivm_dispose_target_library_info(target_library_info);
1423 }
1424
1425 void si_llvm_dispose(struct si_shader_context *ctx)
1426 {
1427         LLVMDisposeModule(ctx->bld_base.base.gallivm->module);
1428         LLVMContextDispose(ctx->bld_base.base.gallivm->context);
1429         FREE(ctx->temp_arrays);
1430         ctx->temp_arrays = NULL;
1431         FREE(ctx->temp_array_allocas);
1432         ctx->temp_array_allocas = NULL;
1433         FREE(ctx->temps);
1434         ctx->temps = NULL;
1435         ctx->temps_count = 0;
1436         FREE(ctx->imms);
1437         ctx->imms = NULL;
1438         ctx->imms_num = 0;
1439         FREE(ctx->flow);
1440         ctx->flow = NULL;
1441         ctx->flow_depth_max = 0;
1442 }