src/gallium/drivers/radeon/radeon_setup_tgsi_llvm.c

   1 /*
   2  * Copyright 2011 Advanced Micro Devices, Inc.
   3  *
   4  * Permission is hereby granted, free of charge, to any person obtaining a
   5  * copy of this software and associated documentation files (the "Software"),
   6  * to deal in the Software without restriction, including without limitation
   7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
   8  * and/or sell copies of the Software, and to permit persons to whom the
   9  * Software is furnished to do so, subject to the following conditions:
  10  *
  11  * The above copyright notice and this permission notice (including the next
  12  * paragraph) shall be included in all copies or substantial portions of the
  13  * Software.
  14  *
  15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  17  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
  18  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
  19  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
  20  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
  21  * SOFTWARE.
  22  *
  23  * Authors: Tom Stellard <thomas.stellard@amd.com>
  24  *
  25  */
  26 #include "radeon_llvm.h"
  27
  28 #include "gallivm/lp_bld_const.h"
  29 #include "gallivm/lp_bld_gather.h"
  30 #include "gallivm/lp_bld_flow.h"
  31 #include "gallivm/lp_bld_init.h"
  32 #include "gallivm/lp_bld_intr.h"
  33 #include "gallivm/lp_bld_misc.h"
  34 #include "gallivm/lp_bld_swizzle.h"
  35 #include "tgsi/tgsi_info.h"
  36 #include "tgsi/tgsi_parse.h"
  37 #include "util/u_math.h"
  38 #include "util/u_memory.h"
  39 #include "util/u_debug.h"
  40
  41 #include <stdio.h>
  42 #include <llvm-c/Core.h>
  43 #include <llvm-c/Transforms/Scalar.h>
  44
  45 LLVMTypeRef tgsi2llvmtype(struct lp_build_tgsi_context *bld_base,
  46                           enum tgsi_opcode_type type)
  47 {
  48         LLVMContextRef ctx = bld_base->base.gallivm->context;
  49
  50         switch (type) {
  51         case TGSI_TYPE_UNSIGNED:
  52         case TGSI_TYPE_SIGNED:
  53                 return LLVMInt32TypeInContext(ctx);
  54         case TGSI_TYPE_UNSIGNED64:
  55         case TGSI_TYPE_SIGNED64:
  56                 return LLVMInt64TypeInContext(ctx);
  57         case TGSI_TYPE_DOUBLE:
  58                 return LLVMDoubleTypeInContext(ctx);
  59         case TGSI_TYPE_UNTYPED:
  60         case TGSI_TYPE_FLOAT:
  61                 return LLVMFloatTypeInContext(ctx);
  62         default: break;
  63         }
  64         return 0;
  65 }
  66
  67 LLVMValueRef bitcast(struct lp_build_tgsi_context *bld_base,
  68                      enum tgsi_opcode_type type, LLVMValueRef value)
  69 {
  70         LLVMBuilderRef builder = bld_base->base.gallivm->builder;
  71         LLVMTypeRef dst_type = tgsi2llvmtype(bld_base, type);
  72
  73         if (dst_type)
  74                 return LLVMBuildBitCast(builder, value, dst_type, "");
  75         else
  76                 return value;
  77 }
  78
  79 /**
  80  * Return a value that is equal to the given i32 \p index if it lies in [0,num)
  81  * or an undefined value in the same interval otherwise.
  82  */
  83 LLVMValueRef radeon_llvm_bound_index(struct radeon_llvm_context *ctx,
  84                                      LLVMValueRef index,
  85                                      unsigned num)
  86 {
  87         struct gallivm_state *gallivm = &ctx->gallivm;
  88         LLVMBuilderRef builder = gallivm->builder;
  89         LLVMValueRef c_max = lp_build_const_int32(gallivm, num - 1);
  90         LLVMValueRef cc;
  91
  92         if (util_is_power_of_two(num)) {
  93                 index = LLVMBuildAnd(builder, index, c_max, "");
  94         } else {
  95                 /* In theory, this MAX pattern should result in code that is
  96                  * as good as the bit-wise AND above.
  97                  *
  98                  * In practice, LLVM generates worse code (at the time of
  99                  * writing), because its value tracking is not strong enough.
 100                  */
 101                 cc = LLVMBuildICmp(builder, LLVMIntULE, index, c_max, "");
 102                 index = LLVMBuildSelect(builder, cc, index, c_max, "");
 103         }
 104
 105         return index;
 106 }
 107
 108 static struct radeon_llvm_loop *get_current_loop(struct radeon_llvm_context *ctx)
 109 {
 110         return ctx->loop_depth > 0 ? ctx->loop + (ctx->loop_depth - 1) : NULL;
 111 }
 112
 113 static struct radeon_llvm_branch *get_current_branch(struct radeon_llvm_context *ctx)
 114 {
 115         return ctx->branch_depth > 0 ?
 116                         ctx->branch + (ctx->branch_depth - 1) : NULL;
 117 }
 118
 119 unsigned radeon_llvm_reg_index_soa(unsigned index, unsigned chan)
 120 {
 121         return (index * 4) + chan;
 122 }
 123
 124 static LLVMValueRef emit_swizzle(struct lp_build_tgsi_context *bld_base,
 125                                  LLVMValueRef value,
 126                                  unsigned swizzle_x,
 127                                  unsigned swizzle_y,
 128                                  unsigned swizzle_z,
 129                                  unsigned swizzle_w)
 130 {
 131         LLVMValueRef swizzles[4];
 132         LLVMTypeRef i32t =
 133                 LLVMInt32TypeInContext(bld_base->base.gallivm->context);
 134
 135         swizzles[0] = LLVMConstInt(i32t, swizzle_x, 0);
 136         swizzles[1] = LLVMConstInt(i32t, swizzle_y, 0);
 137         swizzles[2] = LLVMConstInt(i32t, swizzle_z, 0);
 138         swizzles[3] = LLVMConstInt(i32t, swizzle_w, 0);
 139
 140         return LLVMBuildShuffleVector(bld_base->base.gallivm->builder,
 141                                       value,
 142                                       LLVMGetUndef(LLVMTypeOf(value)),
 143                                       LLVMConstVector(swizzles, 4), "");
 144 }
 145
 146 /**
 147  * Return the description of the array covering the given temporary register
 148  * index.
 149  */
 150 static unsigned
 151 get_temp_array_id(struct lp_build_tgsi_context *bld_base,
 152                   unsigned reg_index,
 153                   const struct tgsi_ind_register *reg)
 154 {
 155         struct radeon_llvm_context *ctx = radeon_llvm_context(bld_base);
 156         unsigned num_arrays = ctx->soa.bld_base.info->array_max[TGSI_FILE_TEMPORARY];
 157         unsigned i;
 158
 159         if (reg && reg->ArrayID > 0 && reg->ArrayID <= num_arrays)
 160                 return reg->ArrayID;
 161
 162         for (i = 0; i < num_arrays; i++) {
 163                 const struct tgsi_array_info *array = &ctx->temp_arrays[i];
 164
 165                 if (reg_index >= array->range.First && reg_index <= array->range.Last)
 166                         return i + 1;
 167         }
 168
 169         return 0;
 170 }
 171
 172 static struct tgsi_declaration_range
 173 get_array_range(struct lp_build_tgsi_context *bld_base,
 174                 unsigned File, unsigned reg_index,
 175                 const struct tgsi_ind_register *reg)
 176 {
 177         struct radeon_llvm_context *ctx = radeon_llvm_context(bld_base);
 178         struct tgsi_declaration_range range;
 179
 180         if (File == TGSI_FILE_TEMPORARY) {
 181                 unsigned array_id = get_temp_array_id(bld_base, reg_index, reg);
 182                 if (array_id)
 183                         return ctx->temp_arrays[array_id - 1].range;
 184         }
 185
 186         range.First = 0;
 187         range.Last = bld_base->info->file_max[File];
 188         return range;
 189 }
 190
 191 static LLVMValueRef
 192 emit_array_index(struct lp_build_tgsi_soa_context *bld,
 193                  const struct tgsi_ind_register *reg,
 194                  unsigned offset)
 195 {
 196         struct gallivm_state *gallivm = bld->bld_base.base.gallivm;
 197
 198         if (!reg) {
 199                 return lp_build_const_int32(gallivm, offset);
 200         }
 201         LLVMValueRef addr = LLVMBuildLoad(gallivm->builder, bld->addr[reg->Index][reg->Swizzle], "");
 202         return LLVMBuildAdd(gallivm->builder, addr, lp_build_const_int32(gallivm, offset), "");
 203 }
 204
 205 /**
 206  * For indirect registers, construct a pointer directly to the requested
 207  * element using getelementptr if possible.
 208  *
 209  * Returns NULL if the insertelement/extractelement fallback for array access
 210  * must be used.
 211  */
 212 static LLVMValueRef
 213 get_pointer_into_array(struct radeon_llvm_context *ctx,
 214                        unsigned file,
 215                        unsigned swizzle,
 216                        unsigned reg_index,
 217                        const struct tgsi_ind_register *reg_indirect)
 218 {
 219         unsigned array_id;
 220         struct tgsi_array_info *array;
 221         struct gallivm_state *gallivm = ctx->soa.bld_base.base.gallivm;
 222         LLVMBuilderRef builder = gallivm->builder;
 223         LLVMValueRef idxs[2];
 224         LLVMValueRef index;
 225         LLVMValueRef alloca;
 226
 227         if (file != TGSI_FILE_TEMPORARY)
 228                 return NULL;
 229
 230         array_id = get_temp_array_id(&ctx->soa.bld_base, reg_index, reg_indirect);
 231         if (!array_id)
 232                 return NULL;
 233
 234         alloca = ctx->temp_array_allocas[array_id - 1];
 235         if (!alloca)
 236                 return NULL;
 237
 238         array = &ctx->temp_arrays[array_id - 1];
 239
 240         if (!(array->writemask & (1 << swizzle)))
 241                 return ctx->undef_alloca;
 242
 243         index = emit_array_index(&ctx->soa, reg_indirect,
 244                                  reg_index - ctx->temp_arrays[array_id - 1].range.First);
 245
 246         /* Ensure that the index is within a valid range, to guard against
 247          * VM faults and overwriting critical data (e.g. spilled resource
 248          * descriptors).
 249          *
 250          * TODO It should be possible to avoid the additional instructions
 251          * if LLVM is changed so that it guarantuees:
 252          * 1. the scratch space descriptor isolates the current wave (this
 253          *    could even save the scratch offset SGPR at the cost of an
 254          *    additional SALU instruction)
 255          * 2. the memory for allocas must be allocated at the _end_ of the
 256          *    scratch space (after spilled registers)
 257          */
 258         index = radeon_llvm_bound_index(ctx, index, array->range.Last - array->range.First + 1);
 259
 260         index = LLVMBuildMul(
 261                 builder, index,
 262                 lp_build_const_int32(gallivm, util_bitcount(array->writemask)),
 263                 "");
 264         index = LLVMBuildAdd(
 265                 builder, index,
 266                 lp_build_const_int32(
 267                         gallivm,
 268                         util_bitcount(array->writemask & ((1 << swizzle) - 1))),
 269                 "");
 270         idxs[0] = ctx->soa.bld_base.uint_bld.zero;
 271         idxs[1] = index;
 272         return LLVMBuildGEP(builder, alloca, idxs, 2, "");
 273 }
 274
 275 LLVMValueRef
 276 radeon_llvm_emit_fetch_64bit(struct lp_build_tgsi_context *bld_base,
 277                              enum tgsi_opcode_type type,
 278                              LLVMValueRef ptr,
 279                              LLVMValueRef ptr2)
 280 {
 281         LLVMBuilderRef builder = bld_base->base.gallivm->builder;
 282         LLVMValueRef result;
 283
 284         result = LLVMGetUndef(LLVMVectorType(LLVMIntTypeInContext(bld_base->base.gallivm->context, 32), bld_base->base.type.length * 2));
 285
 286         result = LLVMBuildInsertElement(builder,
 287                                         result,
 288                                         bitcast(bld_base, TGSI_TYPE_UNSIGNED, ptr),
 289                                         bld_base->int_bld.zero, "");
 290         result = LLVMBuildInsertElement(builder,
 291                                         result,
 292                                         bitcast(bld_base, TGSI_TYPE_UNSIGNED, ptr2),
 293                                         bld_base->int_bld.one, "");
 294         return bitcast(bld_base, type, result);
 295 }
 296
 297 static LLVMValueRef
 298 emit_array_fetch(struct lp_build_tgsi_context *bld_base,
 299                  unsigned File, enum tgsi_opcode_type type,
 300                  struct tgsi_declaration_range range,
 301                  unsigned swizzle)
 302 {
 303         struct lp_build_tgsi_soa_context *bld = lp_soa_context(bld_base);
 304         struct gallivm_state *gallivm = bld->bld_base.base.gallivm;
 305         LLVMBuilderRef builder = bld_base->base.gallivm->builder;
 306
 307         unsigned i, size = range.Last - range.First + 1;
 308         LLVMTypeRef vec = LLVMVectorType(tgsi2llvmtype(bld_base, type), size);
 309         LLVMValueRef result = LLVMGetUndef(vec);
 310
 311         struct tgsi_full_src_register tmp_reg = {};
 312         tmp_reg.Register.File = File;
 313
 314         for (i = 0; i < size; ++i) {
 315                 tmp_reg.Register.Index = i + range.First;
 316                 LLVMValueRef temp = radeon_llvm_emit_fetch(bld_base, &tmp_reg, type, swizzle);
 317                 result = LLVMBuildInsertElement(builder, result, temp,
 318                         lp_build_const_int32(gallivm, i), "array_vector");
 319         }
 320         return result;
 321 }
 322
 323 static LLVMValueRef
 324 load_value_from_array(struct lp_build_tgsi_context *bld_base,
 325                       unsigned file,
 326                       enum tgsi_opcode_type type,
 327                       unsigned swizzle,
 328                       unsigned reg_index,
 329                       const struct tgsi_ind_register *reg_indirect)
 330 {
 331         struct radeon_llvm_context *ctx = radeon_llvm_context(bld_base);
 332         struct lp_build_tgsi_soa_context *bld = lp_soa_context(bld_base);
 333         struct gallivm_state *gallivm = bld_base->base.gallivm;
 334         LLVMBuilderRef builder = gallivm->builder;
 335         LLVMValueRef ptr;
 336
 337         ptr = get_pointer_into_array(ctx, file, swizzle, reg_index, reg_indirect);
 338         if (ptr) {
 339                 LLVMValueRef val = LLVMBuildLoad(builder, ptr, "");
 340                 if (tgsi_type_is_64bit(type)) {
 341                         LLVMValueRef ptr_hi, val_hi;
 342                         ptr_hi = LLVMBuildGEP(builder, ptr, &bld_base->uint_bld.one, 1, "");
 343                         val_hi = LLVMBuildLoad(builder, ptr_hi, "");
 344                         val = radeon_llvm_emit_fetch_64bit(bld_base, type, val, val_hi);
 345                 }
 346
 347                 return val;
 348         } else {
 349                 struct tgsi_declaration_range range =
 350                         get_array_range(bld_base, file, reg_index, reg_indirect);
 351                 LLVMValueRef index =
 352                         emit_array_index(bld, reg_indirect, reg_index - range.First);
 353                 LLVMValueRef array =
 354                         emit_array_fetch(bld_base, file, type, range, swizzle);
 355                 return LLVMBuildExtractElement(builder, array, index, "");
 356         }
 357 }
 358
 359 static void
 360 store_value_to_array(struct lp_build_tgsi_context *bld_base,
 361                      LLVMValueRef value,
 362                      unsigned file,
 363                      unsigned chan_index,
 364                      unsigned reg_index,
 365                      const struct tgsi_ind_register *reg_indirect)
 366 {
 367         struct radeon_llvm_context *ctx = radeon_llvm_context(bld_base);
 368         struct lp_build_tgsi_soa_context *bld = lp_soa_context(bld_base);
 369         struct gallivm_state *gallivm = bld_base->base.gallivm;
 370         LLVMBuilderRef builder = gallivm->builder;
 371         LLVMValueRef ptr;
 372
 373         ptr = get_pointer_into_array(ctx, file, chan_index, reg_index, reg_indirect);
 374         if (ptr) {
 375                 LLVMBuildStore(builder, value, ptr);
 376         } else {
 377                 unsigned i, size;
 378                 struct tgsi_declaration_range range = get_array_range(bld_base, file, reg_index, reg_indirect);
 379                 LLVMValueRef index = emit_array_index(bld, reg_indirect, reg_index - range.First);
 380                 LLVMValueRef array =
 381                         emit_array_fetch(bld_base, file, TGSI_TYPE_FLOAT, range, chan_index);
 382                 LLVMValueRef temp_ptr;
 383
 384                 array = LLVMBuildInsertElement(builder, array, value, index, "");
 385
 386                 size = range.Last - range.First + 1;
 387                 for (i = 0; i < size; ++i) {
 388                         switch(file) {
 389                         case TGSI_FILE_OUTPUT:
 390                                 temp_ptr = bld->outputs[i + range.First][chan_index];
 391                                 break;
 392
 393                         case TGSI_FILE_TEMPORARY:
 394                                 if (range.First + i >= ctx->temps_count)
 395                                         continue;
 396                                 temp_ptr = ctx->temps[(i + range.First) * TGSI_NUM_CHANNELS + chan_index];
 397                                 break;
 398
 399                         default:
 400                                 continue;
 401                         }
 402                         value = LLVMBuildExtractElement(builder, array,
 403                                 lp_build_const_int32(gallivm, i), "");
 404                         LLVMBuildStore(builder, value, temp_ptr);
 405                 }
 406         }
 407 }
 408
 409 LLVMValueRef radeon_llvm_emit_fetch(struct lp_build_tgsi_context *bld_base,
 410                                     const struct tgsi_full_src_register *reg,
 411                                     enum tgsi_opcode_type type,
 412                                     unsigned swizzle)
 413 {
 414         struct radeon_llvm_context *ctx = radeon_llvm_context(bld_base);
 415         struct lp_build_tgsi_soa_context *bld = lp_soa_context(bld_base);
 416         LLVMBuilderRef builder = bld_base->base.gallivm->builder;
 417         LLVMValueRef result = NULL, ptr, ptr2;
 418
 419         if (swizzle == ~0) {
 420                 LLVMValueRef values[TGSI_NUM_CHANNELS];
 421                 unsigned chan;
 422                 for (chan = 0; chan < TGSI_NUM_CHANNELS; chan++) {
 423                         values[chan] = radeon_llvm_emit_fetch(bld_base, reg, type, chan);
 424                 }
 425                 return lp_build_gather_values(bld_base->base.gallivm, values,
 426                                               TGSI_NUM_CHANNELS);
 427         }
 428
 429         if (reg->Register.Indirect) {
 430                 LLVMValueRef load = load_value_from_array(bld_base, reg->Register.File, type,
 431                                 swizzle, reg->Register.Index, &reg->Indirect);
 432                 return bitcast(bld_base, type, load);
 433         }
 434
 435         switch(reg->Register.File) {
 436         case TGSI_FILE_IMMEDIATE: {
 437                 LLVMTypeRef ctype = tgsi2llvmtype(bld_base, type);
 438                 if (tgsi_type_is_64bit(type)) {
 439                         result = LLVMGetUndef(LLVMVectorType(LLVMIntTypeInContext(bld_base->base.gallivm->context, 32), bld_base->base.type.length * 2));
 440                         result = LLVMConstInsertElement(result,
 441                                                         bld->immediates[reg->Register.Index][swizzle],
 442                                                         bld_base->int_bld.zero);
 443                         result = LLVMConstInsertElement(result,
 444                                                         bld->immediates[reg->Register.Index][swizzle + 1],
 445                                                         bld_base->int_bld.one);
 446                         return LLVMConstBitCast(result, ctype);
 447                 } else {
 448                         return LLVMConstBitCast(bld->immediates[reg->Register.Index][swizzle], ctype);
 449                 }
 450         }
 451
 452         case TGSI_FILE_INPUT: {
 453                 unsigned index = reg->Register.Index;
 454                 LLVMValueRef input[4];
 455
 456                 /* I don't think doing this for vertex shaders is beneficial.
 457                  * For those, we want to make sure the VMEM loads are executed
 458                  * only once. Fragment shaders don't care much, because
 459                  * v_interp instructions are much cheaper than VMEM loads.
 460                  */
 461                 if (ctx->soa.bld_base.info->processor == PIPE_SHADER_FRAGMENT)
 462                         ctx->load_input(ctx, index, &ctx->input_decls[index], input);
 463                 else
 464                         memcpy(input, &ctx->inputs[index * 4], sizeof(input));
 465
 466                 result = input[swizzle];
 467
 468                 if (tgsi_type_is_64bit(type)) {
 469                         ptr = result;
 470                         ptr2 = input[swizzle + 1];
 471                         return radeon_llvm_emit_fetch_64bit(bld_base, type, ptr, ptr2);
 472                 }
 473                 break;
 474         }
 475
 476         case TGSI_FILE_TEMPORARY:
 477                 if (reg->Register.Index >= ctx->temps_count)
 478                         return LLVMGetUndef(tgsi2llvmtype(bld_base, type));
 479                 ptr = ctx->temps[reg->Register.Index * TGSI_NUM_CHANNELS + swizzle];
 480                 if (tgsi_type_is_64bit(type)) {
 481                         ptr2 = ctx->temps[reg->Register.Index * TGSI_NUM_CHANNELS + swizzle + 1];
 482                         return radeon_llvm_emit_fetch_64bit(bld_base, type,
 483                                                  LLVMBuildLoad(builder, ptr, ""),
 484                                                  LLVMBuildLoad(builder, ptr2, ""));
 485                 }
 486                 result = LLVMBuildLoad(builder, ptr, "");
 487                 break;
 488
 489         case TGSI_FILE_OUTPUT:
 490                 ptr = lp_get_output_ptr(bld, reg->Register.Index, swizzle);
 491                 if (tgsi_type_is_64bit(type)) {
 492                         ptr2 = lp_get_output_ptr(bld, reg->Register.Index, swizzle + 1);
 493                         return radeon_llvm_emit_fetch_64bit(bld_base, type,
 494                                                  LLVMBuildLoad(builder, ptr, ""),
 495                                                  LLVMBuildLoad(builder, ptr2, ""));
 496                 }
 497                 result = LLVMBuildLoad(builder, ptr, "");
 498                 break;
 499
 500         default:
 501                 return LLVMGetUndef(tgsi2llvmtype(bld_base, type));
 502         }
 503
 504         return bitcast(bld_base, type, result);
 505 }
 506
 507 static LLVMValueRef fetch_system_value(struct lp_build_tgsi_context *bld_base,
 508                                        const struct tgsi_full_src_register *reg,
 509                                        enum tgsi_opcode_type type,
 510                                        unsigned swizzle)
 511 {
 512         struct radeon_llvm_context *ctx = radeon_llvm_context(bld_base);
 513         struct gallivm_state *gallivm = bld_base->base.gallivm;
 514
 515         LLVMValueRef cval = ctx->system_values[reg->Register.Index];
 516         if (LLVMGetTypeKind(LLVMTypeOf(cval)) == LLVMVectorTypeKind) {
 517                 cval = LLVMBuildExtractElement(gallivm->builder, cval,
 518                                                lp_build_const_int32(gallivm, swizzle), "");
 519         }
 520         return bitcast(bld_base, type, cval);
 521 }
 522
 523 static void emit_declaration(struct lp_build_tgsi_context *bld_base,
 524                              const struct tgsi_full_declaration *decl)
 525 {
 526         struct radeon_llvm_context *ctx = radeon_llvm_context(bld_base);
 527         LLVMBuilderRef builder = bld_base->base.gallivm->builder;
 528         unsigned first, last, i;
 529         switch(decl->Declaration.File) {
 530         case TGSI_FILE_ADDRESS:
 531         {
 532                  unsigned idx;
 533                 for (idx = decl->Range.First; idx <= decl->Range.Last; idx++) {
 534                         unsigned chan;
 535                         for (chan = 0; chan < TGSI_NUM_CHANNELS; chan++) {
 536                                  ctx->soa.addr[idx][chan] = lp_build_alloca_undef(
 537                                         &ctx->gallivm,
 538                                         ctx->soa.bld_base.uint_bld.elem_type, "");
 539                         }
 540                 }
 541                 break;
 542         }
 543
 544         case TGSI_FILE_TEMPORARY:
 545         {
 546                 char name[16] = "";
 547                 LLVMValueRef array_alloca = NULL;
 548                 unsigned decl_size;
 549                 unsigned writemask = decl->Declaration.UsageMask;
 550                 first = decl->Range.First;
 551                 last = decl->Range.Last;
 552                 decl_size = 4 * ((last - first) + 1);
 553
 554                 if (decl->Declaration.Array) {
 555                         unsigned id = decl->Array.ArrayID - 1;
 556                         unsigned array_size;
 557
 558                         writemask &= ctx->temp_arrays[id].writemask;
 559                         ctx->temp_arrays[id].writemask = writemask;
 560                         array_size = ((last - first) + 1) * util_bitcount(writemask);
 561
 562                         /* If the array has more than 16 elements, store it
 563                          * in memory using an alloca that spans the entire
 564                          * array.
 565                          *
 566                          * Otherwise, store each array element individually.
 567                          * We will then generate vectors (per-channel, up to
 568                          * <16 x float> if the usagemask is a single bit) for
 569                          * indirect addressing.
 570                          *
 571                          * Note that 16 is the number of vector elements that
 572                          * LLVM will store in a register, so theoretically an
 573                          * array with up to 4 * 16 = 64 elements could be
 574                          * handled this way, but whether that's a good idea
 575                          * depends on VGPR register pressure elsewhere.
 576                          *
 577                          * FIXME: We shouldn't need to have the non-alloca
 578                          * code path for arrays. LLVM should be smart enough to
 579                          * promote allocas into registers when profitable.
 580                          *
 581                          * LLVM 3.8 crashes with this.
 582                          */
 583                         if (HAVE_LLVM >= 0x0309 && array_size > 16) {
 584                                 array_alloca = LLVMBuildAlloca(builder,
 585                                         LLVMArrayType(bld_base->base.vec_type,
 586                                                       array_size), "array");
 587                                 ctx->temp_array_allocas[id] = array_alloca;
 588                         }
 589                 }
 590
 591                 if (!ctx->temps_count) {
 592                         ctx->temps_count = bld_base->info->file_max[TGSI_FILE_TEMPORARY] + 1;
 593                         ctx->temps = MALLOC(TGSI_NUM_CHANNELS * ctx->temps_count * sizeof(LLVMValueRef));
 594                 }
 595                 if (!array_alloca) {
 596                         for (i = 0; i < decl_size; ++i) {
 597 #ifdef DEBUG
 598                                 snprintf(name, sizeof(name), "TEMP%d.%c",
 599                                          first + i / 4, "xyzw"[i % 4]);
 600 #endif
 601                                 ctx->temps[first * TGSI_NUM_CHANNELS + i] =
 602                                         lp_build_alloca_undef(bld_base->base.gallivm,
 603                                                               bld_base->base.vec_type,
 604                                                               name);
 605                         }
 606                 } else {
 607                         LLVMValueRef idxs[2] = {
 608                                 bld_base->uint_bld.zero,
 609                                 NULL
 610                         };
 611                         unsigned j = 0;
 612
 613                         if (writemask != TGSI_WRITEMASK_XYZW &&
 614                             !ctx->undef_alloca) {
 615                                 /* Create a dummy alloca. We use it so that we
 616                                  * have a pointer that is safe to load from if
 617                                  * a shader ever reads from a channel that
 618                                  * it never writes to.
 619                                  */
 620                                 ctx->undef_alloca = lp_build_alloca_undef(
 621                                         bld_base->base.gallivm,
 622                                         bld_base->base.vec_type, "undef");
 623                         }
 624
 625                         for (i = 0; i < decl_size; ++i) {
 626                                 LLVMValueRef ptr;
 627                                 if (writemask & (1 << (i % 4))) {
 628 #ifdef DEBUG
 629                                         snprintf(name, sizeof(name), "TEMP%d.%c",
 630                                                  first + i / 4, "xyzw"[i % 4]);
 631 #endif
 632                                         idxs[1] = lp_build_const_int32(bld_base->base.gallivm, j);
 633                                         ptr = LLVMBuildGEP(builder, array_alloca, idxs, 2, name);
 634                                         j++;
 635                                 } else {
 636                                         ptr = ctx->undef_alloca;
 637                                 }
 638                                 ctx->temps[first * TGSI_NUM_CHANNELS + i] = ptr;
 639                         }
 640                 }
 641                 break;
 642         }
 643         case TGSI_FILE_INPUT:
 644         {
 645                 unsigned idx;
 646                 for (idx = decl->Range.First; idx <= decl->Range.Last; idx++) {
 647                         if (ctx->load_input) {
 648                                 ctx->input_decls[idx] = *decl;
 649
 650                                 if (bld_base->info->processor != PIPE_SHADER_FRAGMENT)
 651                                         ctx->load_input(ctx, idx, decl,
 652                                                         &ctx->inputs[idx * 4]);
 653                         }
 654                 }
 655         }
 656         break;
 657
 658         case TGSI_FILE_SYSTEM_VALUE:
 659         {
 660                 unsigned idx;
 661                 for (idx = decl->Range.First; idx <= decl->Range.Last; idx++) {
 662                         ctx->load_system_value(ctx, idx, decl);
 663                 }
 664         }
 665         break;
 666
 667         case TGSI_FILE_OUTPUT:
 668         {
 669                 unsigned idx;
 670                 for (idx = decl->Range.First; idx <= decl->Range.Last; idx++) {
 671                         unsigned chan;
 672                         assert(idx < RADEON_LLVM_MAX_OUTPUTS);
 673                         for (chan = 0; chan < TGSI_NUM_CHANNELS; chan++) {
 674                                 ctx->soa.outputs[idx][chan] = lp_build_alloca_undef(
 675                                         &ctx->gallivm,
 676                                         ctx->soa.bld_base.base.elem_type, "");
 677                         }
 678                 }
 679                 break;
 680         }
 681
 682         case TGSI_FILE_MEMORY:
 683                 ctx->declare_memory_region(ctx, decl);
 684                 break;
 685
 686         default:
 687                 break;
 688         }
 689 }
 690
 691 LLVMValueRef radeon_llvm_saturate(struct lp_build_tgsi_context *bld_base,
 692                                   LLVMValueRef value)
 693 {
 694         struct lp_build_emit_data clamp_emit_data;
 695
 696         memset(&clamp_emit_data, 0, sizeof(clamp_emit_data));
 697         clamp_emit_data.arg_count = 3;
 698         clamp_emit_data.args[0] = value;
 699         clamp_emit_data.args[2] = bld_base->base.one;
 700         clamp_emit_data.args[1] = bld_base->base.zero;
 701
 702         return lp_build_emit_llvm(bld_base, TGSI_OPCODE_CLAMP,
 703                                   &clamp_emit_data);
 704 }
 705
 706 void radeon_llvm_emit_store(struct lp_build_tgsi_context *bld_base,
 707                             const struct tgsi_full_instruction *inst,
 708                             const struct tgsi_opcode_info *info,
 709                             LLVMValueRef dst[4])
 710 {
 711         struct radeon_llvm_context *ctx = radeon_llvm_context(bld_base);
 712         struct lp_build_tgsi_soa_context *bld = lp_soa_context(bld_base);
 713         struct gallivm_state *gallivm = bld->bld_base.base.gallivm;
 714         const struct tgsi_full_dst_register *reg = &inst->Dst[0];
 715         LLVMBuilderRef builder = bld->bld_base.base.gallivm->builder;
 716         LLVMValueRef temp_ptr, temp_ptr2 = NULL;
 717         unsigned chan, chan_index;
 718         bool is_vec_store = false;
 719         enum tgsi_opcode_type dtype = tgsi_opcode_infer_dst_type(inst->Instruction.Opcode);
 720
 721         if (dst[0]) {
 722                 LLVMTypeKind k = LLVMGetTypeKind(LLVMTypeOf(dst[0]));
 723                 is_vec_store = (k == LLVMVectorTypeKind);
 724         }
 725
 726         if (is_vec_store) {
 727                 LLVMValueRef values[4] = {};
 728                 TGSI_FOR_EACH_DST0_ENABLED_CHANNEL(inst, chan) {
 729                         LLVMValueRef index = lp_build_const_int32(gallivm, chan);
 730                         values[chan]  = LLVMBuildExtractElement(gallivm->builder,
 731                                                         dst[0], index, "");
 732                 }
 733                 bld_base->emit_store(bld_base, inst, info, values);
 734                 return;
 735         }
 736
 737         TGSI_FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
 738                 LLVMValueRef value = dst[chan_index];
 739
 740                 if (tgsi_type_is_64bit(dtype) && (chan_index == 1 || chan_index == 3))
 741                         continue;
 742                 if (inst->Instruction.Saturate)
 743                         value = radeon_llvm_saturate(bld_base, value);
 744
 745                 if (reg->Register.File == TGSI_FILE_ADDRESS) {
 746                         temp_ptr = bld->addr[reg->Register.Index][chan_index];
 747                         LLVMBuildStore(builder, value, temp_ptr);
 748                         continue;
 749                 }
 750
 751                 if (!tgsi_type_is_64bit(dtype))
 752                         value = bitcast(bld_base, TGSI_TYPE_FLOAT, value);
 753
 754                 if (reg->Register.Indirect) {
 755                         unsigned file = reg->Register.File;
 756                         unsigned reg_index = reg->Register.Index;
 757                         store_value_to_array(bld_base, value, file, chan_index,
 758                                              reg_index, &reg->Indirect);
 759                 } else {
 760                         switch(reg->Register.File) {
 761                         case TGSI_FILE_OUTPUT:
 762                                 temp_ptr = bld->outputs[reg->Register.Index][chan_index];
 763                                 if (tgsi_type_is_64bit(dtype))
 764                                         temp_ptr2 = bld->outputs[reg->Register.Index][chan_index + 1];
 765                                 break;
 766
 767                         case TGSI_FILE_TEMPORARY:
 768                         {
 769                                 if (reg->Register.Index >= ctx->temps_count)
 770                                         continue;
 771
 772                                 temp_ptr = ctx->temps[ TGSI_NUM_CHANNELS * reg->Register.Index + chan_index];
 773                                 if (tgsi_type_is_64bit(dtype))
 774                                         temp_ptr2 = ctx->temps[ TGSI_NUM_CHANNELS * reg->Register.Index + chan_index + 1];
 775
 776                                 break;
 777                         }
 778                         default:
 779                                 return;
 780                         }
 781                         if (!tgsi_type_is_64bit(dtype))
 782                                 LLVMBuildStore(builder, value, temp_ptr);
 783                         else {
 784                                 LLVMValueRef ptr = LLVMBuildBitCast(builder, value,
 785                                                                     LLVMVectorType(LLVMIntTypeInContext(bld_base->base.gallivm->context, 32), 2), "");
 786                                 LLVMValueRef val2;
 787                                 value = LLVMBuildExtractElement(builder, ptr,
 788                                                                 bld_base->uint_bld.zero, "");
 789                                 val2 = LLVMBuildExtractElement(builder, ptr,
 790                                                                 bld_base->uint_bld.one, "");
 791
 792                                 LLVMBuildStore(builder, bitcast(bld_base, TGSI_TYPE_FLOAT, value), temp_ptr);
 793                                 LLVMBuildStore(builder, bitcast(bld_base, TGSI_TYPE_FLOAT, val2), temp_ptr2);
 794                         }
 795                 }
 796         }
 797 }
 798
 799 static void set_basicblock_name(LLVMBasicBlockRef bb, const char *base, int pc)
 800 {
 801         char buf[32];
 802         /* Subtract 1 so that the number shown is that of the corresponding
 803          * opcode in the TGSI dump, e.g. an if block has the same suffix as
 804          * the instruction number of the corresponding TGSI IF.
 805          */
 806         snprintf(buf, sizeof(buf), "%s%d", base, pc - 1);
 807         LLVMSetValueName(LLVMBasicBlockAsValue(bb), buf);
 808 }
 809
 810 /* Emit a branch to the given default target for the current block if
 811  * applicable -- that is, if the current block does not already contain a
 812  * branch from a break or continue.
 813  */
 814 static void emit_default_branch(LLVMBuilderRef builder, LLVMBasicBlockRef target)
 815 {
 816         if (!LLVMGetBasicBlockTerminator(LLVMGetInsertBlock(builder)))
 817                  LLVMBuildBr(builder, target);
 818 }
 819
 820 static void bgnloop_emit(const struct lp_build_tgsi_action *action,
 821                          struct lp_build_tgsi_context *bld_base,
 822                          struct lp_build_emit_data *emit_data)
 823 {
 824         struct radeon_llvm_context *ctx = radeon_llvm_context(bld_base);
 825         struct gallivm_state *gallivm = bld_base->base.gallivm;
 826         LLVMBasicBlockRef loop_block;
 827         LLVMBasicBlockRef endloop_block;
 828         endloop_block = LLVMAppendBasicBlockInContext(gallivm->context,
 829                                                 ctx->main_fn, "ENDLOOP");
 830         loop_block = LLVMInsertBasicBlockInContext(gallivm->context,
 831                                                 endloop_block, "LOOP");
 832         set_basicblock_name(loop_block, "loop", bld_base->pc);
 833         LLVMBuildBr(gallivm->builder, loop_block);
 834         LLVMPositionBuilderAtEnd(gallivm->builder, loop_block);
 835
 836         if (++ctx->loop_depth > ctx->loop_depth_max) {
 837                 unsigned new_max = ctx->loop_depth_max << 1;
 838
 839                 if (!new_max)
 840                         new_max = RADEON_LLVM_INITIAL_CF_DEPTH;
 841
 842                 ctx->loop = REALLOC(ctx->loop, ctx->loop_depth_max *
 843                                     sizeof(ctx->loop[0]),
 844                                     new_max * sizeof(ctx->loop[0]));
 845                 ctx->loop_depth_max = new_max;
 846         }
 847
 848         ctx->loop[ctx->loop_depth - 1].loop_block = loop_block;
 849         ctx->loop[ctx->loop_depth - 1].endloop_block = endloop_block;
 850 }
 851
 852 static void brk_emit(const struct lp_build_tgsi_action *action,
 853                      struct lp_build_tgsi_context *bld_base,
 854                      struct lp_build_emit_data *emit_data)
 855 {
 856         struct radeon_llvm_context *ctx = radeon_llvm_context(bld_base);
 857         struct gallivm_state *gallivm = bld_base->base.gallivm;
 858         struct radeon_llvm_loop *current_loop = get_current_loop(ctx);
 859
 860         LLVMBuildBr(gallivm->builder, current_loop->endloop_block);
 861 }
 862
 863 static void cont_emit(const struct lp_build_tgsi_action *action,
 864                       struct lp_build_tgsi_context *bld_base,
 865                       struct lp_build_emit_data *emit_data)
 866 {
 867         struct radeon_llvm_context *ctx = radeon_llvm_context(bld_base);
 868         struct gallivm_state *gallivm = bld_base->base.gallivm;
 869         struct radeon_llvm_loop *current_loop = get_current_loop(ctx);
 870
 871         LLVMBuildBr(gallivm->builder, current_loop->loop_block);
 872 }
 873
 874 static void else_emit(const struct lp_build_tgsi_action *action,
 875                       struct lp_build_tgsi_context *bld_base,
 876                       struct lp_build_emit_data *emit_data)
 877 {
 878         struct radeon_llvm_context *ctx = radeon_llvm_context(bld_base);
 879         struct gallivm_state *gallivm = bld_base->base.gallivm;
 880         struct radeon_llvm_branch *current_branch = get_current_branch(ctx);
 881
 882         emit_default_branch(gallivm->builder, current_branch->endif_block);
 883         current_branch->has_else = 1;
 884         LLVMPositionBuilderAtEnd(gallivm->builder, current_branch->else_block);
 885         set_basicblock_name(current_branch->else_block, "else", bld_base->pc);
 886 }
 887
 888 static void endif_emit(const struct lp_build_tgsi_action *action,
 889                        struct lp_build_tgsi_context *bld_base,
 890                        struct lp_build_emit_data *emit_data)
 891 {
 892         struct radeon_llvm_context *ctx = radeon_llvm_context(bld_base);
 893         struct gallivm_state *gallivm = bld_base->base.gallivm;
 894         struct radeon_llvm_branch *current_branch = get_current_branch(ctx);
 895
 896         emit_default_branch(gallivm->builder, current_branch->endif_block);
 897
 898         /* Need to fixup an empty else block if there was no ELSE opcode. */
 899         if (!LLVMGetBasicBlockTerminator(current_branch->else_block)) {
 900                 LLVMPositionBuilderAtEnd(gallivm->builder, current_branch->else_block);
 901                 LLVMBuildBr(gallivm->builder, current_branch->endif_block);
 902                 set_basicblock_name(current_branch->else_block, "empty_else", bld_base->pc);
 903         }
 904
 905         LLVMPositionBuilderAtEnd(gallivm->builder, current_branch->endif_block);
 906         set_basicblock_name(current_branch->endif_block, "endif", bld_base->pc);
 907         ctx->branch_depth--;
 908 }
 909
 910 static void endloop_emit(const struct lp_build_tgsi_action *action,
 911                          struct lp_build_tgsi_context *bld_base,
 912                          struct lp_build_emit_data *emit_data)
 913 {
 914         struct radeon_llvm_context *ctx = radeon_llvm_context(bld_base);
 915         struct gallivm_state *gallivm = bld_base->base.gallivm;
 916         struct radeon_llvm_loop *current_loop = get_current_loop(ctx);
 917
 918         emit_default_branch(gallivm->builder, current_loop->loop_block);
 919
 920         LLVMPositionBuilderAtEnd(gallivm->builder, current_loop->endloop_block);
 921         set_basicblock_name(current_loop->endloop_block, "endloop", bld_base->pc);
 922         ctx->loop_depth--;
 923 }
 924
 925 static void if_cond_emit(const struct lp_build_tgsi_action *action,
 926                          struct lp_build_tgsi_context *bld_base,
 927                          struct lp_build_emit_data *emit_data,
 928                          LLVMValueRef cond)
 929 {
 930         struct radeon_llvm_context *ctx = radeon_llvm_context(bld_base);
 931         struct gallivm_state *gallivm = bld_base->base.gallivm;
 932         LLVMBasicBlockRef if_block, else_block, endif_block;
 933
 934         endif_block = LLVMAppendBasicBlockInContext(gallivm->context,
 935                                                 ctx->main_fn, "ENDIF");
 936         if_block = LLVMInsertBasicBlockInContext(gallivm->context,
 937                                                 endif_block, "IF");
 938         else_block = LLVMInsertBasicBlockInContext(gallivm->context,
 939                                                 endif_block, "ELSE");
 940         set_basicblock_name(if_block, "if", bld_base->pc);
 941         LLVMBuildCondBr(gallivm->builder, cond, if_block, else_block);
 942         LLVMPositionBuilderAtEnd(gallivm->builder, if_block);
 943
 944         if (++ctx->branch_depth > ctx->branch_depth_max) {
 945                 unsigned new_max = ctx->branch_depth_max << 1;
 946
 947                 if (!new_max)
 948                         new_max = RADEON_LLVM_INITIAL_CF_DEPTH;
 949
 950                 ctx->branch = REALLOC(ctx->branch, ctx->branch_depth_max *
 951                                       sizeof(ctx->branch[0]),
 952                                       new_max * sizeof(ctx->branch[0]));
 953                 ctx->branch_depth_max = new_max;
 954         }
 955
 956         ctx->branch[ctx->branch_depth - 1].endif_block = endif_block;
 957         ctx->branch[ctx->branch_depth - 1].if_block = if_block;
 958         ctx->branch[ctx->branch_depth - 1].else_block = else_block;
 959         ctx->branch[ctx->branch_depth - 1].has_else = 0;
 960 }
 961
 962 static void if_emit(const struct lp_build_tgsi_action *action,
 963                     struct lp_build_tgsi_context *bld_base,
 964                     struct lp_build_emit_data *emit_data)
 965 {
 966         struct gallivm_state *gallivm = bld_base->base.gallivm;
 967         LLVMValueRef cond;
 968
 969         cond = LLVMBuildFCmp(gallivm->builder, LLVMRealUNE,
 970                         emit_data->args[0],
 971                         bld_base->base.zero, "");
 972
 973         if_cond_emit(action, bld_base, emit_data, cond);
 974 }
 975
 976 static void uif_emit(const struct lp_build_tgsi_action *action,
 977                      struct lp_build_tgsi_context *bld_base,
 978                      struct lp_build_emit_data *emit_data)
 979 {
 980         struct gallivm_state *gallivm = bld_base->base.gallivm;
 981         LLVMValueRef cond;
 982
 983         cond = LLVMBuildICmp(gallivm->builder, LLVMIntNE,
 984                 bitcast(bld_base, TGSI_TYPE_UNSIGNED, emit_data->args[0]),
 985                         bld_base->int_bld.zero, "");
 986
 987         if_cond_emit(action, bld_base, emit_data, cond);
 988 }
 989
 990 static void kill_if_fetch_args(struct lp_build_tgsi_context *bld_base,
 991                                struct lp_build_emit_data *emit_data)
 992 {
 993         const struct tgsi_full_instruction *inst = emit_data->inst;
 994         struct gallivm_state *gallivm = bld_base->base.gallivm;
 995         LLVMBuilderRef builder = gallivm->builder;
 996         unsigned i;
 997         LLVMValueRef conds[TGSI_NUM_CHANNELS];
 998
 999         for (i = 0; i < TGSI_NUM_CHANNELS; i++) {
1000                 LLVMValueRef value = lp_build_emit_fetch(bld_base, inst, 0, i);
1001                 conds[i] = LLVMBuildFCmp(builder, LLVMRealOLT, value,
1002                                         bld_base->base.zero, "");
1003         }
1004
1005         /* Or the conditions together */
1006         for (i = TGSI_NUM_CHANNELS - 1; i > 0; i--) {
1007                 conds[i - 1] = LLVMBuildOr(builder, conds[i], conds[i - 1], "");
1008         }
1009
1010         emit_data->dst_type = LLVMVoidTypeInContext(gallivm->context);
1011         emit_data->arg_count = 1;
1012         emit_data->args[0] = LLVMBuildSelect(builder, conds[0],
1013                                         lp_build_const_float(gallivm, -1.0f),
1014                                         bld_base->base.zero, "");
1015 }
1016
1017 static void kil_emit(const struct lp_build_tgsi_action *action,
1018                      struct lp_build_tgsi_context *bld_base,
1019                      struct lp_build_emit_data *emit_data)
1020 {
1021         unsigned i;
1022         for (i = 0; i < emit_data->arg_count; i++) {
1023                 emit_data->output[i] = lp_build_intrinsic_unary(
1024                         bld_base->base.gallivm->builder,
1025                         action->intr_name,
1026                         emit_data->dst_type, emit_data->args[i]);
1027         }
1028 }
1029
1030 static LLVMValueRef build_cube_intrinsic(struct gallivm_state *gallivm,
1031                                          LLVMValueRef in[3])
1032 {
1033         if (HAVE_LLVM >= 0x0309) {
1034                 LLVMTypeRef f32 = LLVMTypeOf(in[0]);
1035                 LLVMValueRef out[4];
1036
1037                 out[0] = lp_build_intrinsic(gallivm->builder, "llvm.amdgcn.cubetc",
1038                                             f32, in, 3, LLVMReadNoneAttribute);
1039                 out[1] = lp_build_intrinsic(gallivm->builder, "llvm.amdgcn.cubesc",
1040                                             f32, in, 3, LLVMReadNoneAttribute);
1041                 out[2] = lp_build_intrinsic(gallivm->builder, "llvm.amdgcn.cubema",
1042                                             f32, in, 3, LLVMReadNoneAttribute);
1043                 out[3] = lp_build_intrinsic(gallivm->builder, "llvm.amdgcn.cubeid",
1044                                             f32, in, 3, LLVMReadNoneAttribute);
1045
1046                 return lp_build_gather_values(gallivm, out, 4);
1047         } else {
1048                 LLVMValueRef c[4] = {
1049                         in[0],
1050                         in[1],
1051                         in[2],
1052                         LLVMGetUndef(LLVMTypeOf(in[0]))
1053                 };
1054                 LLVMValueRef vec = lp_build_gather_values(gallivm, c, 4);
1055
1056                 return lp_build_intrinsic(gallivm->builder, "llvm.AMDGPU.cube",
1057                                           LLVMTypeOf(vec), &vec, 1,
1058                                           LLVMReadNoneAttribute);
1059         }
1060 }
1061
1062 static void radeon_llvm_cube_to_2d_coords(struct lp_build_tgsi_context *bld_base,
1063                                           LLVMValueRef *in, LLVMValueRef *out)
1064 {
1065         struct gallivm_state *gallivm = bld_base->base.gallivm;
1066         LLVMBuilderRef builder = gallivm->builder;
1067         LLVMTypeRef type = bld_base->base.elem_type;
1068         LLVMValueRef coords[4];
1069         LLVMValueRef mad_args[3];
1070         LLVMValueRef v;
1071         unsigned i;
1072
1073         v = build_cube_intrinsic(gallivm, in);
1074
1075         for (i = 0; i < 4; ++i)
1076                 coords[i] = LLVMBuildExtractElement(builder, v,
1077                                                     lp_build_const_int32(gallivm, i), "");
1078
1079         coords[2] = lp_build_intrinsic(builder, "llvm.fabs.f32",
1080                         type, &coords[2], 1, LLVMReadNoneAttribute);
1081         coords[2] = lp_build_emit_llvm_unary(bld_base, TGSI_OPCODE_RCP, coords[2]);
1082
1083         mad_args[1] = coords[2];
1084         mad_args[2] = LLVMConstReal(type, 1.5);
1085
1086         mad_args[0] = coords[0];
1087         coords[0] = lp_build_emit_llvm_ternary(bld_base, TGSI_OPCODE_MAD,
1088                         mad_args[0], mad_args[1], mad_args[2]);
1089
1090         mad_args[0] = coords[1];
1091         coords[1] = lp_build_emit_llvm_ternary(bld_base, TGSI_OPCODE_MAD,
1092                         mad_args[0], mad_args[1], mad_args[2]);
1093
1094         /* apply xyz = yxw swizzle to cooords */
1095         out[0] = coords[1];
1096         out[1] = coords[0];
1097         out[2] = coords[3];
1098 }
1099
1100 void radeon_llvm_emit_prepare_cube_coords(struct lp_build_tgsi_context *bld_base,
1101                                           struct lp_build_emit_data *emit_data,
1102                                           LLVMValueRef *coords_arg,
1103                                           LLVMValueRef *derivs_arg)
1104 {
1105
1106         unsigned target = emit_data->inst->Texture.Texture;
1107         unsigned opcode = emit_data->inst->Instruction.Opcode;
1108         struct gallivm_state *gallivm = bld_base->base.gallivm;
1109         LLVMBuilderRef builder = gallivm->builder;
1110         LLVMValueRef coords[4];
1111         unsigned i;
1112
1113         radeon_llvm_cube_to_2d_coords(bld_base, coords_arg, coords);
1114
1115         if (opcode == TGSI_OPCODE_TXD && derivs_arg) {
1116                 LLVMValueRef derivs[4];
1117                 int axis;
1118
1119                 /* Convert cube derivatives to 2D derivatives. */
1120                 for (axis = 0; axis < 2; axis++) {
1121                         LLVMValueRef shifted_cube_coords[4], shifted_coords[4];
1122
1123                         /* Shift the cube coordinates by the derivatives to get
1124                          * the cube coordinates of the "neighboring pixel".
1125                          */
1126                         for (i = 0; i < 3; i++)
1127                                 shifted_cube_coords[i] =
1128                                         LLVMBuildFAdd(builder, coords_arg[i],
1129                                                       derivs_arg[axis*3+i], "");
1130                         shifted_cube_coords[3] = LLVMGetUndef(bld_base->base.elem_type);
1131
1132                         /* Project the shifted cube coordinates onto the face. */
1133                         radeon_llvm_cube_to_2d_coords(bld_base, shifted_cube_coords,
1134                                                       shifted_coords);
1135
1136                         /* Subtract both sets of 2D coordinates to get 2D derivatives.
1137                          * This won't work if the shifted coordinates ended up
1138                          * in a different face.
1139                          */
1140                         for (i = 0; i < 2; i++)
1141                                 derivs[axis * 2 + i] =
1142                                         LLVMBuildFSub(builder, shifted_coords[i],
1143                                                       coords[i], "");
1144                 }
1145
1146                 memcpy(derivs_arg, derivs, sizeof(derivs));
1147         }
1148
1149         if (target == TGSI_TEXTURE_CUBE_ARRAY ||
1150             target == TGSI_TEXTURE_SHADOWCUBE_ARRAY) {
1151                 /* for cube arrays coord.z = coord.w(array_index) * 8 + face */
1152                 /* coords_arg.w component - array_index for cube arrays */
1153                 coords[2] = lp_build_emit_llvm_ternary(bld_base, TGSI_OPCODE_MAD,
1154                                                        coords_arg[3], lp_build_const_float(gallivm, 8.0), coords[2]);
1155         }
1156
1157         /* Preserve compare/lod/bias. Put it in coords.w. */
1158         if (opcode == TGSI_OPCODE_TEX2 ||
1159             opcode == TGSI_OPCODE_TXB2 ||
1160             opcode == TGSI_OPCODE_TXL2) {
1161                 coords[3] = coords_arg[4];
1162         } else if (opcode == TGSI_OPCODE_TXB ||
1163                    opcode == TGSI_OPCODE_TXL ||
1164                    target == TGSI_TEXTURE_SHADOWCUBE) {
1165                 coords[3] = coords_arg[3];
1166         }
1167
1168         memcpy(coords_arg, coords, sizeof(coords));
1169 }
1170
1171 static void emit_icmp(const struct lp_build_tgsi_action *action,
1172                       struct lp_build_tgsi_context *bld_base,
1173                       struct lp_build_emit_data *emit_data)
1174 {
1175         unsigned pred;
1176         LLVMBuilderRef builder = bld_base->base.gallivm->builder;
1177         LLVMContextRef context = bld_base->base.gallivm->context;
1178
1179         switch (emit_data->inst->Instruction.Opcode) {
1180         case TGSI_OPCODE_USEQ:
1181         case TGSI_OPCODE_U64SEQ: pred = LLVMIntEQ; break;
1182         case TGSI_OPCODE_USNE:
1183         case TGSI_OPCODE_U64SNE: pred = LLVMIntNE; break;
1184         case TGSI_OPCODE_USGE:
1185         case TGSI_OPCODE_U64SGE: pred = LLVMIntUGE; break;
1186         case TGSI_OPCODE_USLT:
1187         case TGSI_OPCODE_U64SLT: pred = LLVMIntULT; break;
1188         case TGSI_OPCODE_ISGE:
1189         case TGSI_OPCODE_I64SGE: pred = LLVMIntSGE; break;
1190         case TGSI_OPCODE_ISLT:
1191         case TGSI_OPCODE_I64SLT: pred = LLVMIntSLT; break;
1192         default:
1193                 assert(!"unknown instruction");
1194                 pred = 0;
1195                 break;
1196         }
1197
1198         LLVMValueRef v = LLVMBuildICmp(builder, pred,
1199                         emit_data->args[0], emit_data->args[1],"");
1200
1201         v = LLVMBuildSExtOrBitCast(builder, v,
1202                         LLVMInt32TypeInContext(context), "");
1203
1204         emit_data->output[emit_data->chan] = v;
1205 }
1206
1207 static void emit_ucmp(const struct lp_build_tgsi_action *action,
1208                       struct lp_build_tgsi_context *bld_base,
1209                       struct lp_build_emit_data *emit_data)
1210 {
1211         LLVMBuilderRef builder = bld_base->base.gallivm->builder;
1212
1213         LLVMValueRef arg0 = LLVMBuildBitCast(builder, emit_data->args[0],
1214                                              bld_base->uint_bld.elem_type, "");
1215
1216         LLVMValueRef v = LLVMBuildICmp(builder, LLVMIntNE, arg0,
1217                                        bld_base->uint_bld.zero, "");
1218
1219         emit_data->output[emit_data->chan] =
1220                 LLVMBuildSelect(builder, v, emit_data->args[1], emit_data->args[2], "");
1221 }
1222
1223 static void emit_cmp(const struct lp_build_tgsi_action *action,
1224                      struct lp_build_tgsi_context *bld_base,
1225                      struct lp_build_emit_data *emit_data)
1226 {
1227         LLVMBuilderRef builder = bld_base->base.gallivm->builder;
1228         LLVMValueRef cond, *args = emit_data->args;
1229
1230         cond = LLVMBuildFCmp(builder, LLVMRealOLT, args[0],
1231                              bld_base->base.zero, "");
1232
1233         emit_data->output[emit_data->chan] =
1234                 LLVMBuildSelect(builder, cond, args[1], args[2], "");
1235 }
1236
1237 static void emit_set_cond(const struct lp_build_tgsi_action *action,
1238                           struct lp_build_tgsi_context *bld_base,
1239                           struct lp_build_emit_data *emit_data)
1240 {
1241         LLVMBuilderRef builder = bld_base->base.gallivm->builder;
1242         LLVMRealPredicate pred;
1243         LLVMValueRef cond;
1244
1245         /* Use ordered for everything but NE (which is usual for
1246          * float comparisons)
1247          */
1248         switch (emit_data->inst->Instruction.Opcode) {
1249         case TGSI_OPCODE_SGE: pred = LLVMRealOGE; break;
1250         case TGSI_OPCODE_SEQ: pred = LLVMRealOEQ; break;
1251         case TGSI_OPCODE_SLE: pred = LLVMRealOLE; break;
1252         case TGSI_OPCODE_SLT: pred = LLVMRealOLT; break;
1253         case TGSI_OPCODE_SNE: pred = LLVMRealUNE; break;
1254         case TGSI_OPCODE_SGT: pred = LLVMRealOGT; break;
1255         default: assert(!"unknown instruction"); pred = 0; break;
1256         }
1257
1258         cond = LLVMBuildFCmp(builder,
1259                 pred, emit_data->args[0], emit_data->args[1], "");
1260
1261         emit_data->output[emit_data->chan] = LLVMBuildSelect(builder,
1262                 cond, bld_base->base.one, bld_base->base.zero, "");
1263 }
1264
1265 static void emit_fcmp(const struct lp_build_tgsi_action *action,
1266                       struct lp_build_tgsi_context *bld_base,
1267                       struct lp_build_emit_data *emit_data)
1268 {
1269         LLVMBuilderRef builder = bld_base->base.gallivm->builder;
1270         LLVMContextRef context = bld_base->base.gallivm->context;
1271         LLVMRealPredicate pred;
1272
1273         /* Use ordered for everything but NE (which is usual for
1274          * float comparisons)
1275          */
1276         switch (emit_data->inst->Instruction.Opcode) {
1277         case TGSI_OPCODE_FSEQ: pred = LLVMRealOEQ; break;
1278         case TGSI_OPCODE_FSGE: pred = LLVMRealOGE; break;
1279         case TGSI_OPCODE_FSLT: pred = LLVMRealOLT; break;
1280         case TGSI_OPCODE_FSNE: pred = LLVMRealUNE; break;
1281         default: assert(!"unknown instruction"); pred = 0; break;
1282         }
1283
1284         LLVMValueRef v = LLVMBuildFCmp(builder, pred,
1285                         emit_data->args[0], emit_data->args[1],"");
1286
1287         v = LLVMBuildSExtOrBitCast(builder, v,
1288                         LLVMInt32TypeInContext(context), "");
1289
1290         emit_data->output[emit_data->chan] = v;
1291 }
1292
1293 static void emit_dcmp(const struct lp_build_tgsi_action *action,
1294                       struct lp_build_tgsi_context *bld_base,
1295                       struct lp_build_emit_data *emit_data)
1296 {
1297         LLVMBuilderRef builder = bld_base->base.gallivm->builder;
1298         LLVMContextRef context = bld_base->base.gallivm->context;
1299         LLVMRealPredicate pred;
1300
1301         /* Use ordered for everything but NE (which is usual for
1302          * float comparisons)
1303          */
1304         switch (emit_data->inst->Instruction.Opcode) {
1305         case TGSI_OPCODE_DSEQ: pred = LLVMRealOEQ; break;
1306         case TGSI_OPCODE_DSGE: pred = LLVMRealOGE; break;
1307         case TGSI_OPCODE_DSLT: pred = LLVMRealOLT; break;
1308         case TGSI_OPCODE_DSNE: pred = LLVMRealUNE; break;
1309         default: assert(!"unknown instruction"); pred = 0; break;
1310         }
1311
1312         LLVMValueRef v = LLVMBuildFCmp(builder, pred,
1313                         emit_data->args[0], emit_data->args[1],"");
1314
1315         v = LLVMBuildSExtOrBitCast(builder, v,
1316                         LLVMInt32TypeInContext(context), "");
1317
1318         emit_data->output[emit_data->chan] = v;
1319 }
1320
1321 static void emit_not(const struct lp_build_tgsi_action *action,
1322                      struct lp_build_tgsi_context *bld_base,
1323                      struct lp_build_emit_data *emit_data)
1324 {
1325         LLVMBuilderRef builder = bld_base->base.gallivm->builder;
1326         LLVMValueRef v = bitcast(bld_base, TGSI_TYPE_UNSIGNED,
1327                         emit_data->args[0]);
1328         emit_data->output[emit_data->chan] = LLVMBuildNot(builder, v, "");
1329 }
1330
1331 static void emit_arl(const struct lp_build_tgsi_action *action,
1332                      struct lp_build_tgsi_context *bld_base,
1333                      struct lp_build_emit_data *emit_data)
1334 {
1335         LLVMBuilderRef builder = bld_base->base.gallivm->builder;
1336         LLVMValueRef floor_index =  lp_build_emit_llvm_unary(bld_base, TGSI_OPCODE_FLR, emit_data->args[0]);
1337         emit_data->output[emit_data->chan] = LLVMBuildFPToSI(builder,
1338                         floor_index, bld_base->base.int_elem_type , "");
1339 }
1340
1341 static void emit_and(const struct lp_build_tgsi_action *action,
1342                      struct lp_build_tgsi_context *bld_base,
1343                      struct lp_build_emit_data *emit_data)
1344 {
1345         LLVMBuilderRef builder = bld_base->base.gallivm->builder;
1346         emit_data->output[emit_data->chan] = LLVMBuildAnd(builder,
1347                         emit_data->args[0], emit_data->args[1], "");
1348 }
1349
1350 static void emit_or(const struct lp_build_tgsi_action *action,
1351                     struct lp_build_tgsi_context *bld_base,
1352                     struct lp_build_emit_data *emit_data)
1353 {
1354         LLVMBuilderRef builder = bld_base->base.gallivm->builder;
1355         emit_data->output[emit_data->chan] = LLVMBuildOr(builder,
1356                         emit_data->args[0], emit_data->args[1], "");
1357 }
1358
1359 static void emit_uadd(const struct lp_build_tgsi_action *action,
1360                       struct lp_build_tgsi_context *bld_base,
1361                       struct lp_build_emit_data *emit_data)
1362 {
1363         LLVMBuilderRef builder = bld_base->base.gallivm->builder;
1364         emit_data->output[emit_data->chan] = LLVMBuildAdd(builder,
1365                         emit_data->args[0], emit_data->args[1], "");
1366 }
1367
1368 static void emit_udiv(const struct lp_build_tgsi_action *action,
1369                       struct lp_build_tgsi_context *bld_base,
1370                       struct lp_build_emit_data *emit_data)
1371 {
1372         LLVMBuilderRef builder = bld_base->base.gallivm->builder;
1373         emit_data->output[emit_data->chan] = LLVMBuildUDiv(builder,
1374                         emit_data->args[0], emit_data->args[1], "");
1375 }
1376
1377 static void emit_idiv(const struct lp_build_tgsi_action *action,
1378                       struct lp_build_tgsi_context *bld_base,
1379                       struct lp_build_emit_data *emit_data)
1380 {
1381         LLVMBuilderRef builder = bld_base->base.gallivm->builder;
1382         emit_data->output[emit_data->chan] = LLVMBuildSDiv(builder,
1383                         emit_data->args[0], emit_data->args[1], "");
1384 }
1385
1386 static void emit_mod(const struct lp_build_tgsi_action *action,
1387                      struct lp_build_tgsi_context *bld_base,
1388                      struct lp_build_emit_data *emit_data)
1389 {
1390         LLVMBuilderRef builder = bld_base->base.gallivm->builder;
1391         emit_data->output[emit_data->chan] = LLVMBuildSRem(builder,
1392                         emit_data->args[0], emit_data->args[1], "");
1393 }
1394
1395 static void emit_umod(const struct lp_build_tgsi_action *action,
1396                       struct lp_build_tgsi_context *bld_base,
1397                       struct lp_build_emit_data *emit_data)
1398 {
1399         LLVMBuilderRef builder = bld_base->base.gallivm->builder;
1400         emit_data->output[emit_data->chan] = LLVMBuildURem(builder,
1401                         emit_data->args[0], emit_data->args[1], "");
1402 }
1403
1404 static void emit_shl(const struct lp_build_tgsi_action *action,
1405                      struct lp_build_tgsi_context *bld_base,
1406                      struct lp_build_emit_data *emit_data)
1407 {
1408         LLVMBuilderRef builder = bld_base->base.gallivm->builder;
1409         emit_data->output[emit_data->chan] = LLVMBuildShl(builder,
1410                         emit_data->args[0], emit_data->args[1], "");
1411 }
1412
1413 static void emit_ushr(const struct lp_build_tgsi_action *action,
1414                       struct lp_build_tgsi_context *bld_base,
1415                       struct lp_build_emit_data *emit_data)
1416 {
1417         LLVMBuilderRef builder = bld_base->base.gallivm->builder;
1418         emit_data->output[emit_data->chan] = LLVMBuildLShr(builder,
1419                         emit_data->args[0], emit_data->args[1], "");
1420 }
1421 static void emit_ishr(const struct lp_build_tgsi_action *action,
1422                       struct lp_build_tgsi_context *bld_base,
1423                       struct lp_build_emit_data *emit_data)
1424 {
1425         LLVMBuilderRef builder = bld_base->base.gallivm->builder;
1426         emit_data->output[emit_data->chan] = LLVMBuildAShr(builder,
1427                         emit_data->args[0], emit_data->args[1], "");
1428 }
1429
1430 static void emit_xor(const struct lp_build_tgsi_action *action,
1431                      struct lp_build_tgsi_context *bld_base,
1432                      struct lp_build_emit_data *emit_data)
1433 {
1434         LLVMBuilderRef builder = bld_base->base.gallivm->builder;
1435         emit_data->output[emit_data->chan] = LLVMBuildXor(builder,
1436                         emit_data->args[0], emit_data->args[1], "");
1437 }
1438
1439 static void emit_ssg(const struct lp_build_tgsi_action *action,
1440                      struct lp_build_tgsi_context *bld_base,
1441                      struct lp_build_emit_data *emit_data)
1442 {
1443         LLVMBuilderRef builder = bld_base->base.gallivm->builder;
1444
1445         LLVMValueRef cmp, val;
1446
1447         if (emit_data->inst->Instruction.Opcode == TGSI_OPCODE_I64SSG) {
1448                 cmp = LLVMBuildICmp(builder, LLVMIntSGT, emit_data->args[0], bld_base->int64_bld.zero, "");
1449                 val = LLVMBuildSelect(builder, cmp, bld_base->int64_bld.one, emit_data->args[0], "");
1450                 cmp = LLVMBuildICmp(builder, LLVMIntSGE, val, bld_base->int64_bld.zero, "");
1451                 val = LLVMBuildSelect(builder, cmp, val, LLVMConstInt(bld_base->int64_bld.elem_type, -1, true), "");
1452         } else if (emit_data->inst->Instruction.Opcode == TGSI_OPCODE_ISSG) {
1453                 cmp = LLVMBuildICmp(builder, LLVMIntSGT, emit_data->args[0], bld_base->int_bld.zero, "");
1454                 val = LLVMBuildSelect(builder, cmp, bld_base->int_bld.one, emit_data->args[0], "");
1455                 cmp = LLVMBuildICmp(builder, LLVMIntSGE, val, bld_base->int_bld.zero, "");
1456                 val = LLVMBuildSelect(builder, cmp, val, LLVMConstInt(bld_base->int_bld.elem_type, -1, true), "");
1457         } else { // float SSG
1458                 cmp = LLVMBuildFCmp(builder, LLVMRealOGT, emit_data->args[0], bld_base->base.zero, "");
1459                 val = LLVMBuildSelect(builder, cmp, bld_base->base.one, emit_data->args[0], "");
1460                 cmp = LLVMBuildFCmp(builder, LLVMRealOGE, val, bld_base->base.zero, "");
1461                 val = LLVMBuildSelect(builder, cmp, val, LLVMConstReal(bld_base->base.elem_type, -1), "");
1462         }
1463
1464         emit_data->output[emit_data->chan] = val;
1465 }
1466
1467 static void emit_ineg(const struct lp_build_tgsi_action *action,
1468                       struct lp_build_tgsi_context *bld_base,
1469                       struct lp_build_emit_data *emit_data)
1470 {
1471         LLVMBuilderRef builder = bld_base->base.gallivm->builder;
1472         emit_data->output[emit_data->chan] = LLVMBuildNeg(builder,
1473                         emit_data->args[0], "");
1474 }
1475
1476 static void emit_dneg(const struct lp_build_tgsi_action *action,
1477                       struct lp_build_tgsi_context *bld_base,
1478                       struct lp_build_emit_data *emit_data)
1479 {
1480         LLVMBuilderRef builder = bld_base->base.gallivm->builder;
1481         emit_data->output[emit_data->chan] = LLVMBuildFNeg(builder,
1482                         emit_data->args[0], "");
1483 }
1484
1485 static void emit_frac(const struct lp_build_tgsi_action *action,
1486                       struct lp_build_tgsi_context *bld_base,
1487                       struct lp_build_emit_data *emit_data)
1488 {
1489         LLVMBuilderRef builder = bld_base->base.gallivm->builder;
1490         char *intr;
1491
1492         if (emit_data->info->opcode == TGSI_OPCODE_FRC)
1493                 intr = "llvm.floor.f32";
1494         else if (emit_data->info->opcode == TGSI_OPCODE_DFRAC)
1495                 intr = "llvm.floor.f64";
1496         else {
1497                 assert(0);
1498                 return;
1499         }
1500
1501         LLVMValueRef floor = lp_build_intrinsic(builder, intr, emit_data->dst_type,
1502                                                 &emit_data->args[0], 1,
1503                                                 LLVMReadNoneAttribute);
1504         emit_data->output[emit_data->chan] = LLVMBuildFSub(builder,
1505                         emit_data->args[0], floor, "");
1506 }
1507
1508 static void emit_f2i(const struct lp_build_tgsi_action *action,
1509                      struct lp_build_tgsi_context *bld_base,
1510                      struct lp_build_emit_data *emit_data)
1511 {
1512         LLVMBuilderRef builder = bld_base->base.gallivm->builder;
1513         emit_data->output[emit_data->chan] = LLVMBuildFPToSI(builder,
1514                         emit_data->args[0], bld_base->int_bld.elem_type, "");
1515 }
1516
1517 static void emit_f2u(const struct lp_build_tgsi_action *action,
1518                      struct lp_build_tgsi_context *bld_base,
1519                      struct lp_build_emit_data *emit_data)
1520 {
1521         LLVMBuilderRef builder = bld_base->base.gallivm->builder;
1522         emit_data->output[emit_data->chan] = LLVMBuildFPToUI(builder,
1523                         emit_data->args[0], bld_base->uint_bld.elem_type, "");
1524 }
1525
1526 static void emit_i2f(const struct lp_build_tgsi_action *action,
1527                      struct lp_build_tgsi_context *bld_base,
1528                      struct lp_build_emit_data *emit_data)
1529 {
1530         LLVMBuilderRef builder = bld_base->base.gallivm->builder;
1531         emit_data->output[emit_data->chan] = LLVMBuildSIToFP(builder,
1532                         emit_data->args[0], bld_base->base.elem_type, "");
1533 }
1534
1535 static void emit_u2f(const struct lp_build_tgsi_action *action,
1536                      struct lp_build_tgsi_context *bld_base,
1537                      struct lp_build_emit_data *emit_data)
1538 {
1539         LLVMBuilderRef builder = bld_base->base.gallivm->builder;
1540         emit_data->output[emit_data->chan] = LLVMBuildUIToFP(builder,
1541                         emit_data->args[0], bld_base->base.elem_type, "");
1542 }
1543
1544 static void emit_immediate(struct lp_build_tgsi_context *bld_base,
1545                            const struct tgsi_full_immediate *imm)
1546 {
1547         unsigned i;
1548         struct radeon_llvm_context *ctx = radeon_llvm_context(bld_base);
1549
1550         for (i = 0; i < 4; ++i) {
1551                 ctx->soa.immediates[ctx->soa.num_immediates][i] =
1552                                 LLVMConstInt(bld_base->uint_bld.elem_type, imm->u[i].Uint, false   );
1553         }
1554
1555         ctx->soa.num_immediates++;
1556 }
1557
1558 void
1559 build_tgsi_intrinsic_nomem(const struct lp_build_tgsi_action *action,
1560                            struct lp_build_tgsi_context *bld_base,
1561                            struct lp_build_emit_data *emit_data)
1562 {
1563         struct lp_build_context *base = &bld_base->base;
1564         emit_data->output[emit_data->chan] =
1565                 lp_build_intrinsic(base->gallivm->builder, action->intr_name,
1566                                    emit_data->dst_type, emit_data->args,
1567                                    emit_data->arg_count, LLVMReadNoneAttribute);
1568 }
1569
1570 static void emit_bfi(const struct lp_build_tgsi_action *action,
1571                      struct lp_build_tgsi_context *bld_base,
1572                      struct lp_build_emit_data *emit_data)
1573 {
1574         struct gallivm_state *gallivm = bld_base->base.gallivm;
1575         LLVMBuilderRef builder = gallivm->builder;
1576         LLVMValueRef bfi_args[3];
1577
1578         // Calculate the bitmask: (((1 << src3) - 1) << src2
1579         bfi_args[0] = LLVMBuildShl(builder,
1580                                    LLVMBuildSub(builder,
1581                                                 LLVMBuildShl(builder,
1582                                                              bld_base->int_bld.one,
1583                                                              emit_data->args[3], ""),
1584                                                 bld_base->int_bld.one, ""),
1585                                    emit_data->args[2], "");
1586
1587         bfi_args[1] = LLVMBuildShl(builder, emit_data->args[1],
1588                                    emit_data->args[2], "");
1589
1590         bfi_args[2] = emit_data->args[0];
1591
1592         /* Calculate:
1593          *   (arg0 & arg1) | (~arg0 & arg2) = arg2 ^ (arg0 & (arg1 ^ arg2)
1594          * Use the right-hand side, which the LLVM backend can convert to V_BFI.
1595          */
1596         emit_data->output[emit_data->chan] =
1597                 LLVMBuildXor(builder, bfi_args[2],
1598                         LLVMBuildAnd(builder, bfi_args[0],
1599                                 LLVMBuildXor(builder, bfi_args[1], bfi_args[2],
1600                                              ""), ""), "");
1601 }
1602
1603 /* this is ffs in C */
1604 static void emit_lsb(const struct lp_build_tgsi_action *action,
1605                      struct lp_build_tgsi_context *bld_base,
1606                      struct lp_build_emit_data *emit_data)
1607 {
1608         struct gallivm_state *gallivm = bld_base->base.gallivm;
1609         LLVMValueRef args[2] = {
1610                 emit_data->args[0],
1611
1612                 /* The value of 1 means that ffs(x=0) = undef, so LLVM won't
1613                  * add special code to check for x=0. The reason is that
1614                  * the LLVM behavior for x=0 is different from what we
1615                  * need here.
1616                  *
1617                  * The hardware already implements the correct behavior.
1618                  */
1619                 lp_build_const_int32(gallivm, 1)
1620         };
1621
1622         emit_data->output[emit_data->chan] =
1623                 lp_build_intrinsic(gallivm->builder, "llvm.cttz.i32",
1624                                 emit_data->dst_type, args, ARRAY_SIZE(args),
1625                                 LLVMReadNoneAttribute);
1626 }
1627
1628 /* Find the last bit set. */
1629 static void emit_umsb(const struct lp_build_tgsi_action *action,
1630                       struct lp_build_tgsi_context *bld_base,
1631                       struct lp_build_emit_data *emit_data)
1632 {
1633         struct gallivm_state *gallivm = bld_base->base.gallivm;
1634         LLVMBuilderRef builder = gallivm->builder;
1635         LLVMValueRef args[2] = {
1636                 emit_data->args[0],
1637                 /* Don't generate code for handling zero: */
1638                 lp_build_const_int32(gallivm, 1)
1639         };
1640
1641         LLVMValueRef msb =
1642                 lp_build_intrinsic(builder, "llvm.ctlz.i32",
1643                                 emit_data->dst_type, args, ARRAY_SIZE(args),
1644                                 LLVMReadNoneAttribute);
1645
1646         /* The HW returns the last bit index from MSB, but TGSI wants
1647          * the index from LSB. Invert it by doing "31 - msb". */
1648         msb = LLVMBuildSub(builder, lp_build_const_int32(gallivm, 31),
1649                            msb, "");
1650
1651         /* Check for zero: */
1652         emit_data->output[emit_data->chan] =
1653                 LLVMBuildSelect(builder,
1654                                 LLVMBuildICmp(builder, LLVMIntEQ, args[0],
1655                                               bld_base->uint_bld.zero, ""),
1656                                 lp_build_const_int32(gallivm, -1), msb, "");
1657 }
1658
1659 /* Find the last bit opposite of the sign bit. */
1660 static void emit_imsb(const struct lp_build_tgsi_action *action,
1661                       struct lp_build_tgsi_context *bld_base,
1662                       struct lp_build_emit_data *emit_data)
1663 {
1664         struct gallivm_state *gallivm = bld_base->base.gallivm;
1665         LLVMBuilderRef builder = gallivm->builder;
1666         LLVMValueRef arg = emit_data->args[0];
1667
1668         LLVMValueRef msb =
1669                 lp_build_intrinsic(builder, "llvm.AMDGPU.flbit.i32",
1670                                 emit_data->dst_type, &arg, 1,
1671                                 LLVMReadNoneAttribute);
1672
1673         /* The HW returns the last bit index from MSB, but TGSI wants
1674          * the index from LSB. Invert it by doing "31 - msb". */
1675         msb = LLVMBuildSub(builder, lp_build_const_int32(gallivm, 31),
1676                            msb, "");
1677
1678         /* If arg == 0 || arg == -1 (0xffffffff), return -1. */
1679         LLVMValueRef all_ones = lp_build_const_int32(gallivm, -1);
1680
1681         LLVMValueRef cond =
1682                 LLVMBuildOr(builder,
1683                             LLVMBuildICmp(builder, LLVMIntEQ, arg,
1684                                           bld_base->uint_bld.zero, ""),
1685                             LLVMBuildICmp(builder, LLVMIntEQ, arg,
1686                                           all_ones, ""), "");
1687
1688         emit_data->output[emit_data->chan] =
1689                 LLVMBuildSelect(builder, cond, all_ones, msb, "");
1690 }
1691
1692 static void emit_iabs(const struct lp_build_tgsi_action *action,
1693                       struct lp_build_tgsi_context *bld_base,
1694                       struct lp_build_emit_data *emit_data)
1695 {
1696         LLVMBuilderRef builder = bld_base->base.gallivm->builder;
1697
1698         emit_data->output[emit_data->chan] =
1699                 lp_build_emit_llvm_binary(bld_base, TGSI_OPCODE_IMAX,
1700                                           emit_data->args[0],
1701                                           LLVMBuildNeg(builder,
1702                                                        emit_data->args[0], ""));
1703 }
1704
1705 static void emit_minmax_int(const struct lp_build_tgsi_action *action,
1706                             struct lp_build_tgsi_context *bld_base,
1707                             struct lp_build_emit_data *emit_data)
1708 {
1709         LLVMBuilderRef builder = bld_base->base.gallivm->builder;
1710         LLVMIntPredicate op;
1711
1712         switch (emit_data->info->opcode) {
1713         default:
1714                 assert(0);
1715         case TGSI_OPCODE_IMAX:
1716         case TGSI_OPCODE_I64MAX:
1717                 op = LLVMIntSGT;
1718                 break;
1719         case TGSI_OPCODE_IMIN:
1720         case TGSI_OPCODE_I64MIN:
1721                 op = LLVMIntSLT;
1722                 break;
1723         case TGSI_OPCODE_UMAX:
1724         case TGSI_OPCODE_U64MAX:
1725                 op = LLVMIntUGT;
1726                 break;
1727         case TGSI_OPCODE_UMIN:
1728         case TGSI_OPCODE_U64MIN:
1729                 op = LLVMIntULT;
1730                 break;
1731         }
1732
1733         emit_data->output[emit_data->chan] =
1734                 LLVMBuildSelect(builder,
1735                                 LLVMBuildICmp(builder, op, emit_data->args[0],
1736                                               emit_data->args[1], ""),
1737                                 emit_data->args[0],
1738                                 emit_data->args[1], "");
1739 }
1740
1741 static void pk2h_fetch_args(struct lp_build_tgsi_context *bld_base,
1742                             struct lp_build_emit_data *emit_data)
1743 {
1744         emit_data->args[0] = lp_build_emit_fetch(bld_base, emit_data->inst,
1745                                                  0, TGSI_CHAN_X);
1746         emit_data->args[1] = lp_build_emit_fetch(bld_base, emit_data->inst,
1747                                                  0, TGSI_CHAN_Y);
1748 }
1749
1750 static void emit_pk2h(const struct lp_build_tgsi_action *action,
1751                       struct lp_build_tgsi_context *bld_base,
1752                       struct lp_build_emit_data *emit_data)
1753 {
1754         LLVMBuilderRef builder = bld_base->base.gallivm->builder;
1755         LLVMContextRef context = bld_base->base.gallivm->context;
1756         struct lp_build_context *uint_bld = &bld_base->uint_bld;
1757         LLVMTypeRef fp16, i16;
1758         LLVMValueRef const16, comp[2];
1759         unsigned i;
1760
1761         fp16 = LLVMHalfTypeInContext(context);
1762         i16 = LLVMInt16TypeInContext(context);
1763         const16 = lp_build_const_int32(uint_bld->gallivm, 16);
1764
1765         for (i = 0; i < 2; i++) {
1766                 comp[i] = LLVMBuildFPTrunc(builder, emit_data->args[i], fp16, "");
1767                 comp[i] = LLVMBuildBitCast(builder, comp[i], i16, "");
1768                 comp[i] = LLVMBuildZExt(builder, comp[i], uint_bld->elem_type, "");
1769         }
1770
1771         comp[1] = LLVMBuildShl(builder, comp[1], const16, "");
1772         comp[0] = LLVMBuildOr(builder, comp[0], comp[1], "");
1773
1774         emit_data->output[emit_data->chan] = comp[0];
1775 }
1776
1777 static void up2h_fetch_args(struct lp_build_tgsi_context *bld_base,
1778                             struct lp_build_emit_data *emit_data)
1779 {
1780         emit_data->args[0] = lp_build_emit_fetch(bld_base, emit_data->inst,
1781                                                  0, TGSI_CHAN_X);
1782 }
1783
1784 static void emit_up2h(const struct lp_build_tgsi_action *action,
1785                       struct lp_build_tgsi_context *bld_base,
1786                       struct lp_build_emit_data *emit_data)
1787 {
1788         LLVMBuilderRef builder = bld_base->base.gallivm->builder;
1789         LLVMContextRef context = bld_base->base.gallivm->context;
1790         struct lp_build_context *uint_bld = &bld_base->uint_bld;
1791         LLVMTypeRef fp16, i16;
1792         LLVMValueRef const16, input, val;
1793         unsigned i;
1794
1795         fp16 = LLVMHalfTypeInContext(context);
1796         i16 = LLVMInt16TypeInContext(context);
1797         const16 = lp_build_const_int32(uint_bld->gallivm, 16);
1798         input = emit_data->args[0];
1799
1800         for (i = 0; i < 2; i++) {
1801                 val = i == 1 ? LLVMBuildLShr(builder, input, const16, "") : input;
1802                 val = LLVMBuildTrunc(builder, val, i16, "");
1803                 val = LLVMBuildBitCast(builder, val, fp16, "");
1804                 emit_data->output[i] =
1805                         LLVMBuildFPExt(builder, val, bld_base->base.elem_type, "");
1806         }
1807 }
1808
1809 static void emit_fdiv(const struct lp_build_tgsi_action *action,
1810                       struct lp_build_tgsi_context *bld_base,
1811                       struct lp_build_emit_data *emit_data)
1812 {
1813         struct radeon_llvm_context *ctx = radeon_llvm_context(bld_base);
1814
1815         emit_data->output[emit_data->chan] =
1816                 LLVMBuildFDiv(bld_base->base.gallivm->builder,
1817                               emit_data->args[0], emit_data->args[1], "");
1818
1819         /* Use v_rcp_f32 instead of precise division. */
1820         if (HAVE_LLVM >= 0x0309 &&
1821             !LLVMIsConstant(emit_data->output[emit_data->chan]))
1822                 LLVMSetMetadata(emit_data->output[emit_data->chan],
1823                                 ctx->fpmath_md_kind, ctx->fpmath_md_2p5_ulp);
1824 }
1825
1826 /* 1/sqrt is translated to rsq for f32 if fp32 denormals are not enabled in
1827  * the target machine. f64 needs global unsafe math flags to get rsq. */
1828 static void emit_rsq(const struct lp_build_tgsi_action *action,
1829                      struct lp_build_tgsi_context *bld_base,
1830                      struct lp_build_emit_data *emit_data)
1831 {
1832         LLVMValueRef sqrt =
1833                 lp_build_emit_llvm_unary(bld_base, TGSI_OPCODE_SQRT,
1834                                          emit_data->args[0]);
1835
1836         emit_data->output[emit_data->chan] =
1837                 lp_build_emit_llvm_binary(bld_base, TGSI_OPCODE_DIV,
1838                                           bld_base->base.one, sqrt);
1839 }
1840
1841 void radeon_llvm_context_init(struct radeon_llvm_context *ctx, const char *triple,
1842                               const struct tgsi_shader_info *info,
1843                               const struct tgsi_token *tokens)
1844 {
1845         struct lp_type type;
1846
1847         /* Initialize the gallivm object:
1848          * We are only using the module, context, and builder fields of this struct.
1849          * This should be enough for us to be able to pass our gallivm struct to the
1850          * helper functions in the gallivm module.
1851          */
1852         memset(&ctx->gallivm, 0, sizeof (ctx->gallivm));
1853         memset(&ctx->soa, 0, sizeof(ctx->soa));
1854         ctx->gallivm.context = LLVMContextCreate();
1855         ctx->gallivm.module = LLVMModuleCreateWithNameInContext("tgsi",
1856                                                 ctx->gallivm.context);
1857         LLVMSetTarget(ctx->gallivm.module, triple);
1858         ctx->gallivm.builder = LLVMCreateBuilderInContext(ctx->gallivm.context);
1859
1860         struct lp_build_tgsi_context *bld_base = &ctx->soa.bld_base;
1861
1862         bld_base->info = info;
1863
1864         if (info && info->array_max[TGSI_FILE_TEMPORARY] > 0) {
1865                 int size = info->array_max[TGSI_FILE_TEMPORARY];
1866
1867                 ctx->temp_arrays = CALLOC(size, sizeof(ctx->temp_arrays[0]));
1868                 ctx->temp_array_allocas = CALLOC(size, sizeof(ctx->temp_array_allocas[0]));
1869
1870                 if (tokens)
1871                         tgsi_scan_arrays(tokens, TGSI_FILE_TEMPORARY, size,
1872                                          ctx->temp_arrays);
1873         }
1874
1875         type.floating = true;
1876         type.fixed = false;
1877         type.sign = true;
1878         type.norm = false;
1879         type.width = 32;
1880         type.length = 1;
1881
1882         lp_build_context_init(&bld_base->base, &ctx->gallivm, type);
1883         lp_build_context_init(&ctx->soa.bld_base.uint_bld, &ctx->gallivm, lp_uint_type(type));
1884         lp_build_context_init(&ctx->soa.bld_base.int_bld, &ctx->gallivm, lp_int_type(type));
1885         {
1886                 struct lp_type dbl_type;
1887                 dbl_type = type;
1888                 dbl_type.width *= 2;
1889                 lp_build_context_init(&ctx->soa.bld_base.dbl_bld, &ctx->gallivm, dbl_type);
1890         }
1891         {
1892                 struct lp_type dtype;
1893                 dtype = lp_uint_type(type);
1894                 dtype.width *= 2;
1895                 lp_build_context_init(&ctx->soa.bld_base.uint64_bld, &ctx->gallivm, dtype);
1896         }
1897         {
1898                 struct lp_type dtype;
1899                 dtype = lp_int_type(type);
1900                 dtype.width *= 2;
1901                 lp_build_context_init(&ctx->soa.bld_base.int64_bld, &ctx->gallivm, dtype);
1902         }
1903
1904         bld_base->soa = 1;
1905         bld_base->emit_store = radeon_llvm_emit_store;
1906         bld_base->emit_swizzle = emit_swizzle;
1907         bld_base->emit_declaration = emit_declaration;
1908         bld_base->emit_immediate = emit_immediate;
1909
1910         bld_base->emit_fetch_funcs[TGSI_FILE_IMMEDIATE] = radeon_llvm_emit_fetch;
1911         bld_base->emit_fetch_funcs[TGSI_FILE_INPUT] = radeon_llvm_emit_fetch;
1912         bld_base->emit_fetch_funcs[TGSI_FILE_TEMPORARY] = radeon_llvm_emit_fetch;
1913         bld_base->emit_fetch_funcs[TGSI_FILE_OUTPUT] = radeon_llvm_emit_fetch;
1914         bld_base->emit_fetch_funcs[TGSI_FILE_SYSTEM_VALUE] = fetch_system_value;
1915
1916         /* metadata allowing 2.5 ULP */
1917         ctx->fpmath_md_kind = LLVMGetMDKindIDInContext(ctx->gallivm.context,
1918                                                        "fpmath", 6);
1919         LLVMValueRef arg = lp_build_const_float(&ctx->gallivm, 2.5);
1920         ctx->fpmath_md_2p5_ulp = LLVMMDNodeInContext(ctx->gallivm.context,
1921                                                      &arg, 1);
1922
1923         /* Allocate outputs */
1924         ctx->soa.outputs = ctx->outputs;
1925
1926         lp_set_default_actions(bld_base);
1927
1928         bld_base->op_actions[TGSI_OPCODE_ABS].emit = build_tgsi_intrinsic_nomem;
1929         bld_base->op_actions[TGSI_OPCODE_ABS].intr_name = "llvm.fabs.f32";
1930         bld_base->op_actions[TGSI_OPCODE_AND].emit = emit_and;
1931         bld_base->op_actions[TGSI_OPCODE_ARL].emit = emit_arl;
1932         bld_base->op_actions[TGSI_OPCODE_BFI].emit = emit_bfi;
1933         bld_base->op_actions[TGSI_OPCODE_BGNLOOP].emit = bgnloop_emit;
1934         bld_base->op_actions[TGSI_OPCODE_BREV].emit = build_tgsi_intrinsic_nomem;
1935         bld_base->op_actions[TGSI_OPCODE_BREV].intr_name =
1936                 HAVE_LLVM >= 0x0308 ? "llvm.bitreverse.i32" : "llvm.AMDGPU.brev";
1937         bld_base->op_actions[TGSI_OPCODE_BRK].emit = brk_emit;
1938         bld_base->op_actions[TGSI_OPCODE_CEIL].emit = build_tgsi_intrinsic_nomem;
1939         bld_base->op_actions[TGSI_OPCODE_CEIL].intr_name = "llvm.ceil.f32";
1940         bld_base->op_actions[TGSI_OPCODE_CLAMP].emit = build_tgsi_intrinsic_nomem;
1941         bld_base->op_actions[TGSI_OPCODE_CLAMP].intr_name =
1942                 HAVE_LLVM >= 0x0308 ? "llvm.AMDGPU.clamp." : "llvm.AMDIL.clamp.";
1943         bld_base->op_actions[TGSI_OPCODE_CMP].emit = emit_cmp;
1944         bld_base->op_actions[TGSI_OPCODE_CONT].emit = cont_emit;
1945         bld_base->op_actions[TGSI_OPCODE_COS].emit = build_tgsi_intrinsic_nomem;
1946         bld_base->op_actions[TGSI_OPCODE_COS].intr_name = "llvm.cos.f32";
1947         bld_base->op_actions[TGSI_OPCODE_DABS].emit = build_tgsi_intrinsic_nomem;
1948         bld_base->op_actions[TGSI_OPCODE_DABS].intr_name = "llvm.fabs.f64";
1949         bld_base->op_actions[TGSI_OPCODE_DFMA].emit = build_tgsi_intrinsic_nomem;
1950         bld_base->op_actions[TGSI_OPCODE_DFMA].intr_name = "llvm.fma.f64";
1951         bld_base->op_actions[TGSI_OPCODE_DFRAC].emit = emit_frac;
1952         bld_base->op_actions[TGSI_OPCODE_DIV].emit = emit_fdiv;
1953         bld_base->op_actions[TGSI_OPCODE_DNEG].emit = emit_dneg;
1954         bld_base->op_actions[TGSI_OPCODE_DSEQ].emit = emit_dcmp;
1955         bld_base->op_actions[TGSI_OPCODE_DSGE].emit = emit_dcmp;
1956         bld_base->op_actions[TGSI_OPCODE_DSLT].emit = emit_dcmp;
1957         bld_base->op_actions[TGSI_OPCODE_DSNE].emit = emit_dcmp;
1958         bld_base->op_actions[TGSI_OPCODE_DRSQ].emit = build_tgsi_intrinsic_nomem;
1959         bld_base->op_actions[TGSI_OPCODE_DRSQ].intr_name =
1960                 HAVE_LLVM >= 0x0309 ? "llvm.amdgcn.rsq.f64" : "llvm.AMDGPU.rsq.f64";
1961         bld_base->op_actions[TGSI_OPCODE_DSQRT].emit = build_tgsi_intrinsic_nomem;
1962         bld_base->op_actions[TGSI_OPCODE_DSQRT].intr_name = "llvm.sqrt.f64";
1963         bld_base->op_actions[TGSI_OPCODE_ELSE].emit = else_emit;
1964         bld_base->op_actions[TGSI_OPCODE_ENDIF].emit = endif_emit;
1965         bld_base->op_actions[TGSI_OPCODE_ENDLOOP].emit = endloop_emit;
1966         bld_base->op_actions[TGSI_OPCODE_EX2].emit = build_tgsi_intrinsic_nomem;
1967         bld_base->op_actions[TGSI_OPCODE_EX2].intr_name =
1968                 HAVE_LLVM >= 0x0308 ? "llvm.exp2.f32" : "llvm.AMDIL.exp.";
1969         bld_base->op_actions[TGSI_OPCODE_FLR].emit = build_tgsi_intrinsic_nomem;
1970         bld_base->op_actions[TGSI_OPCODE_FLR].intr_name = "llvm.floor.f32";
1971         bld_base->op_actions[TGSI_OPCODE_FMA].emit =
1972                 bld_base->op_actions[TGSI_OPCODE_MAD].emit;
1973         bld_base->op_actions[TGSI_OPCODE_FRC].emit = emit_frac;
1974         bld_base->op_actions[TGSI_OPCODE_F2I].emit = emit_f2i;
1975         bld_base->op_actions[TGSI_OPCODE_F2U].emit = emit_f2u;
1976         bld_base->op_actions[TGSI_OPCODE_FSEQ].emit = emit_fcmp;
1977         bld_base->op_actions[TGSI_OPCODE_FSGE].emit = emit_fcmp;
1978         bld_base->op_actions[TGSI_OPCODE_FSLT].emit = emit_fcmp;
1979         bld_base->op_actions[TGSI_OPCODE_FSNE].emit = emit_fcmp;
1980         bld_base->op_actions[TGSI_OPCODE_IABS].emit = emit_iabs;
1981         bld_base->op_actions[TGSI_OPCODE_IBFE].emit = build_tgsi_intrinsic_nomem;
1982         bld_base->op_actions[TGSI_OPCODE_IBFE].intr_name = "llvm.AMDGPU.bfe.i32";
1983         bld_base->op_actions[TGSI_OPCODE_IDIV].emit = emit_idiv;
1984         bld_base->op_actions[TGSI_OPCODE_IF].emit = if_emit;
1985         bld_base->op_actions[TGSI_OPCODE_UIF].emit = uif_emit;
1986         bld_base->op_actions[TGSI_OPCODE_IMAX].emit = emit_minmax_int;
1987         bld_base->op_actions[TGSI_OPCODE_IMIN].emit = emit_minmax_int;
1988         bld_base->op_actions[TGSI_OPCODE_IMSB].emit = emit_imsb;
1989         bld_base->op_actions[TGSI_OPCODE_INEG].emit = emit_ineg;
1990         bld_base->op_actions[TGSI_OPCODE_ISHR].emit = emit_ishr;
1991         bld_base->op_actions[TGSI_OPCODE_ISGE].emit = emit_icmp;
1992         bld_base->op_actions[TGSI_OPCODE_ISLT].emit = emit_icmp;
1993         bld_base->op_actions[TGSI_OPCODE_ISSG].emit = emit_ssg;
1994         bld_base->op_actions[TGSI_OPCODE_I2F].emit = emit_i2f;
1995         bld_base->op_actions[TGSI_OPCODE_KILL_IF].fetch_args = kill_if_fetch_args;
1996         bld_base->op_actions[TGSI_OPCODE_KILL_IF].emit = kil_emit;
1997         bld_base->op_actions[TGSI_OPCODE_KILL_IF].intr_name = "llvm.AMDGPU.kill";
1998         bld_base->op_actions[TGSI_OPCODE_KILL].emit = lp_build_tgsi_intrinsic;
1999         bld_base->op_actions[TGSI_OPCODE_KILL].intr_name = "llvm.AMDGPU.kilp";
2000         bld_base->op_actions[TGSI_OPCODE_LSB].emit = emit_lsb;
2001         bld_base->op_actions[TGSI_OPCODE_LG2].emit = build_tgsi_intrinsic_nomem;
2002         bld_base->op_actions[TGSI_OPCODE_LG2].intr_name = "llvm.log2.f32";
2003         bld_base->op_actions[TGSI_OPCODE_MOD].emit = emit_mod;
2004         bld_base->op_actions[TGSI_OPCODE_UMSB].emit = emit_umsb;
2005         bld_base->op_actions[TGSI_OPCODE_NOT].emit = emit_not;
2006         bld_base->op_actions[TGSI_OPCODE_OR].emit = emit_or;
2007         bld_base->op_actions[TGSI_OPCODE_PK2H].fetch_args = pk2h_fetch_args;
2008         bld_base->op_actions[TGSI_OPCODE_PK2H].emit = emit_pk2h;
2009         bld_base->op_actions[TGSI_OPCODE_POPC].emit = build_tgsi_intrinsic_nomem;
2010         bld_base->op_actions[TGSI_OPCODE_POPC].intr_name = "llvm.ctpop.i32";
2011         bld_base->op_actions[TGSI_OPCODE_POW].emit = build_tgsi_intrinsic_nomem;
2012         bld_base->op_actions[TGSI_OPCODE_POW].intr_name = "llvm.pow.f32";
2013         bld_base->op_actions[TGSI_OPCODE_ROUND].emit = build_tgsi_intrinsic_nomem;
2014         bld_base->op_actions[TGSI_OPCODE_ROUND].intr_name = "llvm.rint.f32";
2015         bld_base->op_actions[TGSI_OPCODE_RSQ].emit = emit_rsq;
2016         bld_base->op_actions[TGSI_OPCODE_SGE].emit = emit_set_cond;
2017         bld_base->op_actions[TGSI_OPCODE_SEQ].emit = emit_set_cond;
2018         bld_base->op_actions[TGSI_OPCODE_SHL].emit = emit_shl;
2019         bld_base->op_actions[TGSI_OPCODE_SLE].emit = emit_set_cond;
2020         bld_base->op_actions[TGSI_OPCODE_SLT].emit = emit_set_cond;
2021         bld_base->op_actions[TGSI_OPCODE_SNE].emit = emit_set_cond;
2022         bld_base->op_actions[TGSI_OPCODE_SGT].emit = emit_set_cond;
2023         bld_base->op_actions[TGSI_OPCODE_SIN].emit = build_tgsi_intrinsic_nomem;
2024         bld_base->op_actions[TGSI_OPCODE_SIN].intr_name = "llvm.sin.f32";
2025         bld_base->op_actions[TGSI_OPCODE_SQRT].emit = build_tgsi_intrinsic_nomem;
2026         bld_base->op_actions[TGSI_OPCODE_SQRT].intr_name = "llvm.sqrt.f32";
2027         bld_base->op_actions[TGSI_OPCODE_SSG].emit = emit_ssg;
2028         bld_base->op_actions[TGSI_OPCODE_TRUNC].emit = build_tgsi_intrinsic_nomem;
2029         bld_base->op_actions[TGSI_OPCODE_TRUNC].intr_name = "llvm.trunc.f32";
2030         bld_base->op_actions[TGSI_OPCODE_UADD].emit = emit_uadd;
2031         bld_base->op_actions[TGSI_OPCODE_UBFE].emit = build_tgsi_intrinsic_nomem;
2032         bld_base->op_actions[TGSI_OPCODE_UBFE].intr_name = "llvm.AMDGPU.bfe.u32";
2033         bld_base->op_actions[TGSI_OPCODE_UDIV].emit = emit_udiv;
2034         bld_base->op_actions[TGSI_OPCODE_UMAX].emit = emit_minmax_int;
2035         bld_base->op_actions[TGSI_OPCODE_UMIN].emit = emit_minmax_int;
2036         bld_base->op_actions[TGSI_OPCODE_UMOD].emit = emit_umod;
2037         bld_base->op_actions[TGSI_OPCODE_USEQ].emit = emit_icmp;
2038         bld_base->op_actions[TGSI_OPCODE_USGE].emit = emit_icmp;
2039         bld_base->op_actions[TGSI_OPCODE_USHR].emit = emit_ushr;
2040         bld_base->op_actions[TGSI_OPCODE_USLT].emit = emit_icmp;
2041         bld_base->op_actions[TGSI_OPCODE_USNE].emit = emit_icmp;
2042         bld_base->op_actions[TGSI_OPCODE_U2F].emit = emit_u2f;
2043         bld_base->op_actions[TGSI_OPCODE_XOR].emit = emit_xor;
2044         bld_base->op_actions[TGSI_OPCODE_UCMP].emit = emit_ucmp;
2045         bld_base->op_actions[TGSI_OPCODE_UP2H].fetch_args = up2h_fetch_args;
2046         bld_base->op_actions[TGSI_OPCODE_UP2H].emit = emit_up2h;
2047
2048         bld_base->op_actions[TGSI_OPCODE_I64MAX].emit = emit_minmax_int;
2049         bld_base->op_actions[TGSI_OPCODE_I64MIN].emit = emit_minmax_int;
2050         bld_base->op_actions[TGSI_OPCODE_U64MAX].emit = emit_minmax_int;
2051         bld_base->op_actions[TGSI_OPCODE_U64MIN].emit = emit_minmax_int;
2052         bld_base->op_actions[TGSI_OPCODE_I64ABS].emit = emit_iabs;
2053         bld_base->op_actions[TGSI_OPCODE_I64SSG].emit = emit_ssg;
2054         bld_base->op_actions[TGSI_OPCODE_I64NEG].emit = emit_ineg;
2055
2056         bld_base->op_actions[TGSI_OPCODE_U64SEQ].emit = emit_icmp;
2057         bld_base->op_actions[TGSI_OPCODE_U64SNE].emit = emit_icmp;
2058         bld_base->op_actions[TGSI_OPCODE_U64SGE].emit = emit_icmp;
2059         bld_base->op_actions[TGSI_OPCODE_U64SLT].emit = emit_icmp;
2060         bld_base->op_actions[TGSI_OPCODE_I64SGE].emit = emit_icmp;
2061         bld_base->op_actions[TGSI_OPCODE_I64SLT].emit = emit_icmp;
2062
2063         bld_base->op_actions[TGSI_OPCODE_U64ADD].emit = emit_uadd;
2064         bld_base->op_actions[TGSI_OPCODE_U64SHL].emit = emit_shl;
2065         bld_base->op_actions[TGSI_OPCODE_U64SHR].emit = emit_ushr;
2066         bld_base->op_actions[TGSI_OPCODE_I64SHR].emit = emit_ishr;
2067
2068         bld_base->op_actions[TGSI_OPCODE_U64MOD].emit = emit_umod;
2069         bld_base->op_actions[TGSI_OPCODE_I64MOD].emit = emit_mod;
2070         bld_base->op_actions[TGSI_OPCODE_U64DIV].emit = emit_udiv;
2071         bld_base->op_actions[TGSI_OPCODE_I64DIV].emit = emit_idiv;
2072 }
2073
2074 void radeon_llvm_create_func(struct radeon_llvm_context *ctx,
2075                              LLVMTypeRef *return_types, unsigned num_return_elems,
2076                              LLVMTypeRef *ParamTypes, unsigned ParamCount)
2077 {
2078         LLVMTypeRef main_fn_type, ret_type;
2079         LLVMBasicBlockRef main_fn_body;
2080
2081         if (num_return_elems)
2082                 ret_type = LLVMStructTypeInContext(ctx->gallivm.context,
2083                                                    return_types,
2084                                                    num_return_elems, true);
2085         else
2086                 ret_type = LLVMVoidTypeInContext(ctx->gallivm.context);
2087
2088         /* Setup the function */
2089         ctx->return_type = ret_type;
2090         main_fn_type = LLVMFunctionType(ret_type, ParamTypes, ParamCount, 0);
2091         ctx->main_fn = LLVMAddFunction(ctx->gallivm.module, "main", main_fn_type);
2092         main_fn_body = LLVMAppendBasicBlockInContext(ctx->gallivm.context,
2093                         ctx->main_fn, "main_body");
2094         LLVMPositionBuilderAtEnd(ctx->gallivm.builder, main_fn_body);
2095 }
2096
2097 void radeon_llvm_finalize_module(struct radeon_llvm_context *ctx)
2098 {
2099         struct gallivm_state *gallivm = ctx->soa.bld_base.base.gallivm;
2100         const char *triple = LLVMGetTarget(gallivm->module);
2101         LLVMTargetLibraryInfoRef target_library_info;
2102
2103         /* Create the pass manager */
2104         gallivm->passmgr = LLVMCreateFunctionPassManagerForModule(
2105                                                         gallivm->module);
2106
2107         target_library_info = gallivm_create_target_library_info(triple);
2108         LLVMAddTargetLibraryInfo(target_library_info, gallivm->passmgr);
2109
2110         /* This pass should eliminate all the load and store instructions */
2111         LLVMAddPromoteMemoryToRegisterPass(gallivm->passmgr);
2112
2113         /* Add some optimization passes */
2114         LLVMAddScalarReplAggregatesPass(gallivm->passmgr);
2115         LLVMAddLICMPass(gallivm->passmgr);
2116         LLVMAddAggressiveDCEPass(gallivm->passmgr);
2117         LLVMAddCFGSimplificationPass(gallivm->passmgr);
2118         LLVMAddInstructionCombiningPass(gallivm->passmgr);
2119
2120         /* Run the pass */
2121         LLVMInitializeFunctionPassManager(gallivm->passmgr);
2122         LLVMRunFunctionPassManager(gallivm->passmgr, ctx->main_fn);
2123         LLVMFinalizeFunctionPassManager(gallivm->passmgr);
2124
2125         LLVMDisposeBuilder(gallivm->builder);
2126         LLVMDisposePassManager(gallivm->passmgr);
2127         gallivm_dispose_target_library_info(target_library_info);
2128 }
2129
2130 void radeon_llvm_dispose(struct radeon_llvm_context *ctx)
2131 {
2132         LLVMDisposeModule(ctx->soa.bld_base.base.gallivm->module);
2133         LLVMContextDispose(ctx->soa.bld_base.base.gallivm->context);
2134         FREE(ctx->temp_arrays);
2135         ctx->temp_arrays = NULL;
2136         FREE(ctx->temp_array_allocas);
2137         ctx->temp_array_allocas = NULL;
2138         FREE(ctx->temps);
2139         ctx->temps = NULL;
2140         ctx->temps_count = 0;
2141         FREE(ctx->loop);
2142         ctx->loop = NULL;
2143         ctx->loop_depth_max = 0;
2144         FREE(ctx->branch);
2145         ctx->branch = NULL;
2146         ctx->branch_depth_max = 0;
2147 }