src/gallium/drivers/radeon/radeon_setup_tgsi_llvm.c

   1 /*
   2  * Copyright 2011 Advanced Micro Devices, Inc.
   3  *
   4  * Permission is hereby granted, free of charge, to any person obtaining a
   5  * copy of this software and associated documentation files (the "Software"),
   6  * to deal in the Software without restriction, including without limitation
   7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
   8  * and/or sell copies of the Software, and to permit persons to whom the
   9  * Software is furnished to do so, subject to the following conditions:
  10  *
  11  * The above copyright notice and this permission notice (including the next
  12  * paragraph) shall be included in all copies or substantial portions of the
  13  * Software.
  14  *
  15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  17  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
  18  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
  19  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
  20  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
  21  * SOFTWARE.
  22  *
  23  * Authors: Tom Stellard <thomas.stellard@amd.com>
  24  *
  25  */
  26 #include "radeon_llvm.h"
  27
  28 #include "gallivm/lp_bld_const.h"
  29 #include "gallivm/lp_bld_gather.h"
  30 #include "gallivm/lp_bld_flow.h"
  31 #include "gallivm/lp_bld_init.h"
  32 #include "gallivm/lp_bld_intr.h"
  33 #include "gallivm/lp_bld_misc.h"
  34 #include "gallivm/lp_bld_swizzle.h"
  35 #include "tgsi/tgsi_info.h"
  36 #include "tgsi/tgsi_parse.h"
  37 #include "util/u_math.h"
  38 #include "util/u_memory.h"
  39 #include "util/u_debug.h"
  40
  41 #include <stdio.h>
  42 #include <llvm-c/Core.h>
  43 #include <llvm-c/Transforms/Scalar.h>
  44
  45 /* Data for if/else/endif and bgnloop/endloop control flow structures.
  46  */
  47 struct radeon_llvm_flow {
  48         /* Loop exit or next part of if/else/endif. */
  49         LLVMBasicBlockRef next_block;
  50         LLVMBasicBlockRef loop_entry_block;
  51 };
  52
  53 LLVMTypeRef tgsi2llvmtype(struct lp_build_tgsi_context *bld_base,
  54                           enum tgsi_opcode_type type)
  55 {
  56         LLVMContextRef ctx = bld_base->base.gallivm->context;
  57
  58         switch (type) {
  59         case TGSI_TYPE_UNSIGNED:
  60         case TGSI_TYPE_SIGNED:
  61                 return LLVMInt32TypeInContext(ctx);
  62         case TGSI_TYPE_UNSIGNED64:
  63         case TGSI_TYPE_SIGNED64:
  64                 return LLVMInt64TypeInContext(ctx);
  65         case TGSI_TYPE_DOUBLE:
  66                 return LLVMDoubleTypeInContext(ctx);
  67         case TGSI_TYPE_UNTYPED:
  68         case TGSI_TYPE_FLOAT:
  69                 return LLVMFloatTypeInContext(ctx);
  70         default: break;
  71         }
  72         return 0;
  73 }
  74
  75 LLVMValueRef bitcast(struct lp_build_tgsi_context *bld_base,
  76                      enum tgsi_opcode_type type, LLVMValueRef value)
  77 {
  78         LLVMBuilderRef builder = bld_base->base.gallivm->builder;
  79         LLVMTypeRef dst_type = tgsi2llvmtype(bld_base, type);
  80
  81         if (dst_type)
  82                 return LLVMBuildBitCast(builder, value, dst_type, "");
  83         else
  84                 return value;
  85 }
  86
  87 /**
  88  * Return a value that is equal to the given i32 \p index if it lies in [0,num)
  89  * or an undefined value in the same interval otherwise.
  90  */
  91 LLVMValueRef radeon_llvm_bound_index(struct radeon_llvm_context *ctx,
  92                                      LLVMValueRef index,
  93                                      unsigned num)
  94 {
  95         struct gallivm_state *gallivm = &ctx->gallivm;
  96         LLVMBuilderRef builder = gallivm->builder;
  97         LLVMValueRef c_max = lp_build_const_int32(gallivm, num - 1);
  98         LLVMValueRef cc;
  99
 100         if (util_is_power_of_two(num)) {
 101                 index = LLVMBuildAnd(builder, index, c_max, "");
 102         } else {
 103                 /* In theory, this MAX pattern should result in code that is
 104                  * as good as the bit-wise AND above.
 105                  *
 106                  * In practice, LLVM generates worse code (at the time of
 107                  * writing), because its value tracking is not strong enough.
 108                  */
 109                 cc = LLVMBuildICmp(builder, LLVMIntULE, index, c_max, "");
 110                 index = LLVMBuildSelect(builder, cc, index, c_max, "");
 111         }
 112
 113         return index;
 114 }
 115
 116 static struct radeon_llvm_flow *
 117 get_current_flow(struct radeon_llvm_context *ctx)
 118 {
 119         if (ctx->flow_depth > 0)
 120                 return &ctx->flow[ctx->flow_depth - 1];
 121         return NULL;
 122 }
 123
 124 static struct radeon_llvm_flow *
 125 get_innermost_loop(struct radeon_llvm_context *ctx)
 126 {
 127         for (unsigned i = ctx->flow_depth; i > 0; --i) {
 128                 if (ctx->flow[i - 1].loop_entry_block)
 129                         return &ctx->flow[i - 1];
 130         }
 131         return NULL;
 132 }
 133
 134 static struct radeon_llvm_flow *
 135 push_flow(struct radeon_llvm_context *ctx)
 136 {
 137         struct radeon_llvm_flow *flow;
 138
 139         if (ctx->flow_depth >= ctx->flow_depth_max) {
 140                 unsigned new_max = MAX2(ctx->flow_depth << 1, RADEON_LLVM_INITIAL_CF_DEPTH);
 141                 ctx->flow = REALLOC(ctx->flow,
 142                                     ctx->flow_depth_max * sizeof(*ctx->flow),
 143                                     new_max * sizeof(*ctx->flow));
 144                 ctx->flow_depth_max = new_max;
 145         }
 146
 147         flow = &ctx->flow[ctx->flow_depth];
 148         ctx->flow_depth++;
 149
 150         flow->next_block = NULL;
 151         flow->loop_entry_block = NULL;
 152         return flow;
 153 }
 154
 155 unsigned radeon_llvm_reg_index_soa(unsigned index, unsigned chan)
 156 {
 157         return (index * 4) + chan;
 158 }
 159
 160 static LLVMValueRef emit_swizzle(struct lp_build_tgsi_context *bld_base,
 161                                  LLVMValueRef value,
 162                                  unsigned swizzle_x,
 163                                  unsigned swizzle_y,
 164                                  unsigned swizzle_z,
 165                                  unsigned swizzle_w)
 166 {
 167         LLVMValueRef swizzles[4];
 168         LLVMTypeRef i32t =
 169                 LLVMInt32TypeInContext(bld_base->base.gallivm->context);
 170
 171         swizzles[0] = LLVMConstInt(i32t, swizzle_x, 0);
 172         swizzles[1] = LLVMConstInt(i32t, swizzle_y, 0);
 173         swizzles[2] = LLVMConstInt(i32t, swizzle_z, 0);
 174         swizzles[3] = LLVMConstInt(i32t, swizzle_w, 0);
 175
 176         return LLVMBuildShuffleVector(bld_base->base.gallivm->builder,
 177                                       value,
 178                                       LLVMGetUndef(LLVMTypeOf(value)),
 179                                       LLVMConstVector(swizzles, 4), "");
 180 }
 181
 182 /**
 183  * Return the description of the array covering the given temporary register
 184  * index.
 185  */
 186 static unsigned
 187 get_temp_array_id(struct lp_build_tgsi_context *bld_base,
 188                   unsigned reg_index,
 189                   const struct tgsi_ind_register *reg)
 190 {
 191         struct radeon_llvm_context *ctx = radeon_llvm_context(bld_base);
 192         unsigned num_arrays = ctx->soa.bld_base.info->array_max[TGSI_FILE_TEMPORARY];
 193         unsigned i;
 194
 195         if (reg && reg->ArrayID > 0 && reg->ArrayID <= num_arrays)
 196                 return reg->ArrayID;
 197
 198         for (i = 0; i < num_arrays; i++) {
 199                 const struct tgsi_array_info *array = &ctx->temp_arrays[i];
 200
 201                 if (reg_index >= array->range.First && reg_index <= array->range.Last)
 202                         return i + 1;
 203         }
 204
 205         return 0;
 206 }
 207
 208 static struct tgsi_declaration_range
 209 get_array_range(struct lp_build_tgsi_context *bld_base,
 210                 unsigned File, unsigned reg_index,
 211                 const struct tgsi_ind_register *reg)
 212 {
 213         struct radeon_llvm_context *ctx = radeon_llvm_context(bld_base);
 214         struct tgsi_declaration_range range;
 215
 216         if (File == TGSI_FILE_TEMPORARY) {
 217                 unsigned array_id = get_temp_array_id(bld_base, reg_index, reg);
 218                 if (array_id)
 219                         return ctx->temp_arrays[array_id - 1].range;
 220         }
 221
 222         range.First = 0;
 223         range.Last = bld_base->info->file_max[File];
 224         return range;
 225 }
 226
 227 static LLVMValueRef
 228 emit_array_index(struct lp_build_tgsi_soa_context *bld,
 229                  const struct tgsi_ind_register *reg,
 230                  unsigned offset)
 231 {
 232         struct gallivm_state *gallivm = bld->bld_base.base.gallivm;
 233
 234         if (!reg) {
 235                 return lp_build_const_int32(gallivm, offset);
 236         }
 237         LLVMValueRef addr = LLVMBuildLoad(gallivm->builder, bld->addr[reg->Index][reg->Swizzle], "");
 238         return LLVMBuildAdd(gallivm->builder, addr, lp_build_const_int32(gallivm, offset), "");
 239 }
 240
 241 /**
 242  * For indirect registers, construct a pointer directly to the requested
 243  * element using getelementptr if possible.
 244  *
 245  * Returns NULL if the insertelement/extractelement fallback for array access
 246  * must be used.
 247  */
 248 static LLVMValueRef
 249 get_pointer_into_array(struct radeon_llvm_context *ctx,
 250                        unsigned file,
 251                        unsigned swizzle,
 252                        unsigned reg_index,
 253                        const struct tgsi_ind_register *reg_indirect)
 254 {
 255         unsigned array_id;
 256         struct tgsi_array_info *array;
 257         struct gallivm_state *gallivm = ctx->soa.bld_base.base.gallivm;
 258         LLVMBuilderRef builder = gallivm->builder;
 259         LLVMValueRef idxs[2];
 260         LLVMValueRef index;
 261         LLVMValueRef alloca;
 262
 263         if (file != TGSI_FILE_TEMPORARY)
 264                 return NULL;
 265
 266         array_id = get_temp_array_id(&ctx->soa.bld_base, reg_index, reg_indirect);
 267         if (!array_id)
 268                 return NULL;
 269
 270         alloca = ctx->temp_array_allocas[array_id - 1];
 271         if (!alloca)
 272                 return NULL;
 273
 274         array = &ctx->temp_arrays[array_id - 1];
 275
 276         if (!(array->writemask & (1 << swizzle)))
 277                 return ctx->undef_alloca;
 278
 279         index = emit_array_index(&ctx->soa, reg_indirect,
 280                                  reg_index - ctx->temp_arrays[array_id - 1].range.First);
 281
 282         /* Ensure that the index is within a valid range, to guard against
 283          * VM faults and overwriting critical data (e.g. spilled resource
 284          * descriptors).
 285          *
 286          * TODO It should be possible to avoid the additional instructions
 287          * if LLVM is changed so that it guarantuees:
 288          * 1. the scratch space descriptor isolates the current wave (this
 289          *    could even save the scratch offset SGPR at the cost of an
 290          *    additional SALU instruction)
 291          * 2. the memory for allocas must be allocated at the _end_ of the
 292          *    scratch space (after spilled registers)
 293          */
 294         index = radeon_llvm_bound_index(ctx, index, array->range.Last - array->range.First + 1);
 295
 296         index = LLVMBuildMul(
 297                 builder, index,
 298                 lp_build_const_int32(gallivm, util_bitcount(array->writemask)),
 299                 "");
 300         index = LLVMBuildAdd(
 301                 builder, index,
 302                 lp_build_const_int32(
 303                         gallivm,
 304                         util_bitcount(array->writemask & ((1 << swizzle) - 1))),
 305                 "");
 306         idxs[0] = ctx->soa.bld_base.uint_bld.zero;
 307         idxs[1] = index;
 308         return LLVMBuildGEP(builder, alloca, idxs, 2, "");
 309 }
 310
 311 LLVMValueRef
 312 radeon_llvm_emit_fetch_64bit(struct lp_build_tgsi_context *bld_base,
 313                              enum tgsi_opcode_type type,
 314                              LLVMValueRef ptr,
 315                              LLVMValueRef ptr2)
 316 {
 317         LLVMBuilderRef builder = bld_base->base.gallivm->builder;
 318         LLVMValueRef result;
 319
 320         result = LLVMGetUndef(LLVMVectorType(LLVMIntTypeInContext(bld_base->base.gallivm->context, 32), bld_base->base.type.length * 2));
 321
 322         result = LLVMBuildInsertElement(builder,
 323                                         result,
 324                                         bitcast(bld_base, TGSI_TYPE_UNSIGNED, ptr),
 325                                         bld_base->int_bld.zero, "");
 326         result = LLVMBuildInsertElement(builder,
 327                                         result,
 328                                         bitcast(bld_base, TGSI_TYPE_UNSIGNED, ptr2),
 329                                         bld_base->int_bld.one, "");
 330         return bitcast(bld_base, type, result);
 331 }
 332
 333 static LLVMValueRef
 334 emit_array_fetch(struct lp_build_tgsi_context *bld_base,
 335                  unsigned File, enum tgsi_opcode_type type,
 336                  struct tgsi_declaration_range range,
 337                  unsigned swizzle)
 338 {
 339         struct lp_build_tgsi_soa_context *bld = lp_soa_context(bld_base);
 340         struct gallivm_state *gallivm = bld->bld_base.base.gallivm;
 341         LLVMBuilderRef builder = bld_base->base.gallivm->builder;
 342
 343         unsigned i, size = range.Last - range.First + 1;
 344         LLVMTypeRef vec = LLVMVectorType(tgsi2llvmtype(bld_base, type), size);
 345         LLVMValueRef result = LLVMGetUndef(vec);
 346
 347         struct tgsi_full_src_register tmp_reg = {};
 348         tmp_reg.Register.File = File;
 349
 350         for (i = 0; i < size; ++i) {
 351                 tmp_reg.Register.Index = i + range.First;
 352                 LLVMValueRef temp = radeon_llvm_emit_fetch(bld_base, &tmp_reg, type, swizzle);
 353                 result = LLVMBuildInsertElement(builder, result, temp,
 354                         lp_build_const_int32(gallivm, i), "array_vector");
 355         }
 356         return result;
 357 }
 358
 359 static LLVMValueRef
 360 load_value_from_array(struct lp_build_tgsi_context *bld_base,
 361                       unsigned file,
 362                       enum tgsi_opcode_type type,
 363                       unsigned swizzle,
 364                       unsigned reg_index,
 365                       const struct tgsi_ind_register *reg_indirect)
 366 {
 367         struct radeon_llvm_context *ctx = radeon_llvm_context(bld_base);
 368         struct lp_build_tgsi_soa_context *bld = lp_soa_context(bld_base);
 369         struct gallivm_state *gallivm = bld_base->base.gallivm;
 370         LLVMBuilderRef builder = gallivm->builder;
 371         LLVMValueRef ptr;
 372
 373         ptr = get_pointer_into_array(ctx, file, swizzle, reg_index, reg_indirect);
 374         if (ptr) {
 375                 LLVMValueRef val = LLVMBuildLoad(builder, ptr, "");
 376                 if (tgsi_type_is_64bit(type)) {
 377                         LLVMValueRef ptr_hi, val_hi;
 378                         ptr_hi = LLVMBuildGEP(builder, ptr, &bld_base->uint_bld.one, 1, "");
 379                         val_hi = LLVMBuildLoad(builder, ptr_hi, "");
 380                         val = radeon_llvm_emit_fetch_64bit(bld_base, type, val, val_hi);
 381                 }
 382
 383                 return val;
 384         } else {
 385                 struct tgsi_declaration_range range =
 386                         get_array_range(bld_base, file, reg_index, reg_indirect);
 387                 LLVMValueRef index =
 388                         emit_array_index(bld, reg_indirect, reg_index - range.First);
 389                 LLVMValueRef array =
 390                         emit_array_fetch(bld_base, file, type, range, swizzle);
 391                 return LLVMBuildExtractElement(builder, array, index, "");
 392         }
 393 }
 394
 395 static void
 396 store_value_to_array(struct lp_build_tgsi_context *bld_base,
 397                      LLVMValueRef value,
 398                      unsigned file,
 399                      unsigned chan_index,
 400                      unsigned reg_index,
 401                      const struct tgsi_ind_register *reg_indirect)
 402 {
 403         struct radeon_llvm_context *ctx = radeon_llvm_context(bld_base);
 404         struct lp_build_tgsi_soa_context *bld = lp_soa_context(bld_base);
 405         struct gallivm_state *gallivm = bld_base->base.gallivm;
 406         LLVMBuilderRef builder = gallivm->builder;
 407         LLVMValueRef ptr;
 408
 409         ptr = get_pointer_into_array(ctx, file, chan_index, reg_index, reg_indirect);
 410         if (ptr) {
 411                 LLVMBuildStore(builder, value, ptr);
 412         } else {
 413                 unsigned i, size;
 414                 struct tgsi_declaration_range range = get_array_range(bld_base, file, reg_index, reg_indirect);
 415                 LLVMValueRef index = emit_array_index(bld, reg_indirect, reg_index - range.First);
 416                 LLVMValueRef array =
 417                         emit_array_fetch(bld_base, file, TGSI_TYPE_FLOAT, range, chan_index);
 418                 LLVMValueRef temp_ptr;
 419
 420                 array = LLVMBuildInsertElement(builder, array, value, index, "");
 421
 422                 size = range.Last - range.First + 1;
 423                 for (i = 0; i < size; ++i) {
 424                         switch(file) {
 425                         case TGSI_FILE_OUTPUT:
 426                                 temp_ptr = bld->outputs[i + range.First][chan_index];
 427                                 break;
 428
 429                         case TGSI_FILE_TEMPORARY:
 430                                 if (range.First + i >= ctx->temps_count)
 431                                         continue;
 432                                 temp_ptr = ctx->temps[(i + range.First) * TGSI_NUM_CHANNELS + chan_index];
 433                                 break;
 434
 435                         default:
 436                                 continue;
 437                         }
 438                         value = LLVMBuildExtractElement(builder, array,
 439                                 lp_build_const_int32(gallivm, i), "");
 440                         LLVMBuildStore(builder, value, temp_ptr);
 441                 }
 442         }
 443 }
 444
 445 LLVMValueRef radeon_llvm_emit_fetch(struct lp_build_tgsi_context *bld_base,
 446                                     const struct tgsi_full_src_register *reg,
 447                                     enum tgsi_opcode_type type,
 448                                     unsigned swizzle)
 449 {
 450         struct radeon_llvm_context *ctx = radeon_llvm_context(bld_base);
 451         struct lp_build_tgsi_soa_context *bld = lp_soa_context(bld_base);
 452         LLVMBuilderRef builder = bld_base->base.gallivm->builder;
 453         LLVMValueRef result = NULL, ptr, ptr2;
 454
 455         if (swizzle == ~0) {
 456                 LLVMValueRef values[TGSI_NUM_CHANNELS];
 457                 unsigned chan;
 458                 for (chan = 0; chan < TGSI_NUM_CHANNELS; chan++) {
 459                         values[chan] = radeon_llvm_emit_fetch(bld_base, reg, type, chan);
 460                 }
 461                 return lp_build_gather_values(bld_base->base.gallivm, values,
 462                                               TGSI_NUM_CHANNELS);
 463         }
 464
 465         if (reg->Register.Indirect) {
 466                 LLVMValueRef load = load_value_from_array(bld_base, reg->Register.File, type,
 467                                 swizzle, reg->Register.Index, &reg->Indirect);
 468                 return bitcast(bld_base, type, load);
 469         }
 470
 471         switch(reg->Register.File) {
 472         case TGSI_FILE_IMMEDIATE: {
 473                 LLVMTypeRef ctype = tgsi2llvmtype(bld_base, type);
 474                 if (tgsi_type_is_64bit(type)) {
 475                         result = LLVMGetUndef(LLVMVectorType(LLVMIntTypeInContext(bld_base->base.gallivm->context, 32), bld_base->base.type.length * 2));
 476                         result = LLVMConstInsertElement(result,
 477                                                         bld->immediates[reg->Register.Index][swizzle],
 478                                                         bld_base->int_bld.zero);
 479                         result = LLVMConstInsertElement(result,
 480                                                         bld->immediates[reg->Register.Index][swizzle + 1],
 481                                                         bld_base->int_bld.one);
 482                         return LLVMConstBitCast(result, ctype);
 483                 } else {
 484                         return LLVMConstBitCast(bld->immediates[reg->Register.Index][swizzle], ctype);
 485                 }
 486         }
 487
 488         case TGSI_FILE_INPUT: {
 489                 unsigned index = reg->Register.Index;
 490                 LLVMValueRef input[4];
 491
 492                 /* I don't think doing this for vertex shaders is beneficial.
 493                  * For those, we want to make sure the VMEM loads are executed
 494                  * only once. Fragment shaders don't care much, because
 495                  * v_interp instructions are much cheaper than VMEM loads.
 496                  */
 497                 if (ctx->soa.bld_base.info->processor == PIPE_SHADER_FRAGMENT)
 498                         ctx->load_input(ctx, index, &ctx->input_decls[index], input);
 499                 else
 500                         memcpy(input, &ctx->inputs[index * 4], sizeof(input));
 501
 502                 result = input[swizzle];
 503
 504                 if (tgsi_type_is_64bit(type)) {
 505                         ptr = result;
 506                         ptr2 = input[swizzle + 1];
 507                         return radeon_llvm_emit_fetch_64bit(bld_base, type, ptr, ptr2);
 508                 }
 509                 break;
 510         }
 511
 512         case TGSI_FILE_TEMPORARY:
 513                 if (reg->Register.Index >= ctx->temps_count)
 514                         return LLVMGetUndef(tgsi2llvmtype(bld_base, type));
 515                 ptr = ctx->temps[reg->Register.Index * TGSI_NUM_CHANNELS + swizzle];
 516                 if (tgsi_type_is_64bit(type)) {
 517                         ptr2 = ctx->temps[reg->Register.Index * TGSI_NUM_CHANNELS + swizzle + 1];
 518                         return radeon_llvm_emit_fetch_64bit(bld_base, type,
 519                                                  LLVMBuildLoad(builder, ptr, ""),
 520                                                  LLVMBuildLoad(builder, ptr2, ""));
 521                 }
 522                 result = LLVMBuildLoad(builder, ptr, "");
 523                 break;
 524
 525         case TGSI_FILE_OUTPUT:
 526                 ptr = lp_get_output_ptr(bld, reg->Register.Index, swizzle);
 527                 if (tgsi_type_is_64bit(type)) {
 528                         ptr2 = lp_get_output_ptr(bld, reg->Register.Index, swizzle + 1);
 529                         return radeon_llvm_emit_fetch_64bit(bld_base, type,
 530                                                  LLVMBuildLoad(builder, ptr, ""),
 531                                                  LLVMBuildLoad(builder, ptr2, ""));
 532                 }
 533                 result = LLVMBuildLoad(builder, ptr, "");
 534                 break;
 535
 536         default:
 537                 return LLVMGetUndef(tgsi2llvmtype(bld_base, type));
 538         }
 539
 540         return bitcast(bld_base, type, result);
 541 }
 542
 543 static LLVMValueRef fetch_system_value(struct lp_build_tgsi_context *bld_base,
 544                                        const struct tgsi_full_src_register *reg,
 545                                        enum tgsi_opcode_type type,
 546                                        unsigned swizzle)
 547 {
 548         struct radeon_llvm_context *ctx = radeon_llvm_context(bld_base);
 549         struct gallivm_state *gallivm = bld_base->base.gallivm;
 550
 551         LLVMValueRef cval = ctx->system_values[reg->Register.Index];
 552         if (LLVMGetTypeKind(LLVMTypeOf(cval)) == LLVMVectorTypeKind) {
 553                 cval = LLVMBuildExtractElement(gallivm->builder, cval,
 554                                                lp_build_const_int32(gallivm, swizzle), "");
 555         }
 556         return bitcast(bld_base, type, cval);
 557 }
 558
 559 static void emit_declaration(struct lp_build_tgsi_context *bld_base,
 560                              const struct tgsi_full_declaration *decl)
 561 {
 562         struct radeon_llvm_context *ctx = radeon_llvm_context(bld_base);
 563         LLVMBuilderRef builder = bld_base->base.gallivm->builder;
 564         unsigned first, last, i;
 565         switch(decl->Declaration.File) {
 566         case TGSI_FILE_ADDRESS:
 567         {
 568                  unsigned idx;
 569                 for (idx = decl->Range.First; idx <= decl->Range.Last; idx++) {
 570                         unsigned chan;
 571                         for (chan = 0; chan < TGSI_NUM_CHANNELS; chan++) {
 572                                  ctx->soa.addr[idx][chan] = lp_build_alloca_undef(
 573                                         &ctx->gallivm,
 574                                         ctx->soa.bld_base.uint_bld.elem_type, "");
 575                         }
 576                 }
 577                 break;
 578         }
 579
 580         case TGSI_FILE_TEMPORARY:
 581         {
 582                 char name[16] = "";
 583                 LLVMValueRef array_alloca = NULL;
 584                 unsigned decl_size;
 585                 unsigned writemask = decl->Declaration.UsageMask;
 586                 first = decl->Range.First;
 587                 last = decl->Range.Last;
 588                 decl_size = 4 * ((last - first) + 1);
 589
 590                 if (decl->Declaration.Array) {
 591                         unsigned id = decl->Array.ArrayID - 1;
 592                         unsigned array_size;
 593
 594                         writemask &= ctx->temp_arrays[id].writemask;
 595                         ctx->temp_arrays[id].writemask = writemask;
 596                         array_size = ((last - first) + 1) * util_bitcount(writemask);
 597
 598                         /* If the array has more than 16 elements, store it
 599                          * in memory using an alloca that spans the entire
 600                          * array.
 601                          *
 602                          * Otherwise, store each array element individually.
 603                          * We will then generate vectors (per-channel, up to
 604                          * <16 x float> if the usagemask is a single bit) for
 605                          * indirect addressing.
 606                          *
 607                          * Note that 16 is the number of vector elements that
 608                          * LLVM will store in a register, so theoretically an
 609                          * array with up to 4 * 16 = 64 elements could be
 610                          * handled this way, but whether that's a good idea
 611                          * depends on VGPR register pressure elsewhere.
 612                          *
 613                          * FIXME: We shouldn't need to have the non-alloca
 614                          * code path for arrays. LLVM should be smart enough to
 615                          * promote allocas into registers when profitable.
 616                          *
 617                          * LLVM 3.8 crashes with this.
 618                          */
 619                         if (HAVE_LLVM >= 0x0309 && array_size > 16) {
 620                                 array_alloca = LLVMBuildAlloca(builder,
 621                                         LLVMArrayType(bld_base->base.vec_type,
 622                                                       array_size), "array");
 623                                 ctx->temp_array_allocas[id] = array_alloca;
 624                         }
 625                 }
 626
 627                 if (!ctx->temps_count) {
 628                         ctx->temps_count = bld_base->info->file_max[TGSI_FILE_TEMPORARY] + 1;
 629                         ctx->temps = MALLOC(TGSI_NUM_CHANNELS * ctx->temps_count * sizeof(LLVMValueRef));
 630                 }
 631                 if (!array_alloca) {
 632                         for (i = 0; i < decl_size; ++i) {
 633 #ifdef DEBUG
 634                                 snprintf(name, sizeof(name), "TEMP%d.%c",
 635                                          first + i / 4, "xyzw"[i % 4]);
 636 #endif
 637                                 ctx->temps[first * TGSI_NUM_CHANNELS + i] =
 638                                         lp_build_alloca_undef(bld_base->base.gallivm,
 639                                                               bld_base->base.vec_type,
 640                                                               name);
 641                         }
 642                 } else {
 643                         LLVMValueRef idxs[2] = {
 644                                 bld_base->uint_bld.zero,
 645                                 NULL
 646                         };
 647                         unsigned j = 0;
 648
 649                         if (writemask != TGSI_WRITEMASK_XYZW &&
 650                             !ctx->undef_alloca) {
 651                                 /* Create a dummy alloca. We use it so that we
 652                                  * have a pointer that is safe to load from if
 653                                  * a shader ever reads from a channel that
 654                                  * it never writes to.
 655                                  */
 656                                 ctx->undef_alloca = lp_build_alloca_undef(
 657                                         bld_base->base.gallivm,
 658                                         bld_base->base.vec_type, "undef");
 659                         }
 660
 661                         for (i = 0; i < decl_size; ++i) {
 662                                 LLVMValueRef ptr;
 663                                 if (writemask & (1 << (i % 4))) {
 664 #ifdef DEBUG
 665                                         snprintf(name, sizeof(name), "TEMP%d.%c",
 666                                                  first + i / 4, "xyzw"[i % 4]);
 667 #endif
 668                                         idxs[1] = lp_build_const_int32(bld_base->base.gallivm, j);
 669                                         ptr = LLVMBuildGEP(builder, array_alloca, idxs, 2, name);
 670                                         j++;
 671                                 } else {
 672                                         ptr = ctx->undef_alloca;
 673                                 }
 674                                 ctx->temps[first * TGSI_NUM_CHANNELS + i] = ptr;
 675                         }
 676                 }
 677                 break;
 678         }
 679         case TGSI_FILE_INPUT:
 680         {
 681                 unsigned idx;
 682                 for (idx = decl->Range.First; idx <= decl->Range.Last; idx++) {
 683                         if (ctx->load_input) {
 684                                 ctx->input_decls[idx] = *decl;
 685
 686                                 if (bld_base->info->processor != PIPE_SHADER_FRAGMENT)
 687                                         ctx->load_input(ctx, idx, decl,
 688                                                         &ctx->inputs[idx * 4]);
 689                         }
 690                 }
 691         }
 692         break;
 693
 694         case TGSI_FILE_SYSTEM_VALUE:
 695         {
 696                 unsigned idx;
 697                 for (idx = decl->Range.First; idx <= decl->Range.Last; idx++) {
 698                         ctx->load_system_value(ctx, idx, decl);
 699                 }
 700         }
 701         break;
 702
 703         case TGSI_FILE_OUTPUT:
 704         {
 705                 unsigned idx;
 706                 for (idx = decl->Range.First; idx <= decl->Range.Last; idx++) {
 707                         unsigned chan;
 708                         assert(idx < RADEON_LLVM_MAX_OUTPUTS);
 709                         for (chan = 0; chan < TGSI_NUM_CHANNELS; chan++) {
 710                                 ctx->soa.outputs[idx][chan] = lp_build_alloca_undef(
 711                                         &ctx->gallivm,
 712                                         ctx->soa.bld_base.base.elem_type, "");
 713                         }
 714                 }
 715                 break;
 716         }
 717
 718         case TGSI_FILE_MEMORY:
 719                 ctx->declare_memory_region(ctx, decl);
 720                 break;
 721
 722         default:
 723                 break;
 724         }
 725 }
 726
 727 LLVMValueRef radeon_llvm_saturate(struct lp_build_tgsi_context *bld_base,
 728                                   LLVMValueRef value)
 729 {
 730         struct lp_build_emit_data clamp_emit_data;
 731
 732         memset(&clamp_emit_data, 0, sizeof(clamp_emit_data));
 733         clamp_emit_data.arg_count = 3;
 734         clamp_emit_data.args[0] = value;
 735         clamp_emit_data.args[2] = bld_base->base.one;
 736         clamp_emit_data.args[1] = bld_base->base.zero;
 737
 738         return lp_build_emit_llvm(bld_base, TGSI_OPCODE_CLAMP,
 739                                   &clamp_emit_data);
 740 }
 741
 742 void radeon_llvm_emit_store(struct lp_build_tgsi_context *bld_base,
 743                             const struct tgsi_full_instruction *inst,
 744                             const struct tgsi_opcode_info *info,
 745                             LLVMValueRef dst[4])
 746 {
 747         struct radeon_llvm_context *ctx = radeon_llvm_context(bld_base);
 748         struct lp_build_tgsi_soa_context *bld = lp_soa_context(bld_base);
 749         struct gallivm_state *gallivm = bld->bld_base.base.gallivm;
 750         const struct tgsi_full_dst_register *reg = &inst->Dst[0];
 751         LLVMBuilderRef builder = bld->bld_base.base.gallivm->builder;
 752         LLVMValueRef temp_ptr, temp_ptr2 = NULL;
 753         unsigned chan, chan_index;
 754         bool is_vec_store = false;
 755         enum tgsi_opcode_type dtype = tgsi_opcode_infer_dst_type(inst->Instruction.Opcode);
 756
 757         if (dst[0]) {
 758                 LLVMTypeKind k = LLVMGetTypeKind(LLVMTypeOf(dst[0]));
 759                 is_vec_store = (k == LLVMVectorTypeKind);
 760         }
 761
 762         if (is_vec_store) {
 763                 LLVMValueRef values[4] = {};
 764                 TGSI_FOR_EACH_DST0_ENABLED_CHANNEL(inst, chan) {
 765                         LLVMValueRef index = lp_build_const_int32(gallivm, chan);
 766                         values[chan]  = LLVMBuildExtractElement(gallivm->builder,
 767                                                         dst[0], index, "");
 768                 }
 769                 bld_base->emit_store(bld_base, inst, info, values);
 770                 return;
 771         }
 772
 773         TGSI_FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
 774                 LLVMValueRef value = dst[chan_index];
 775
 776                 if (tgsi_type_is_64bit(dtype) && (chan_index == 1 || chan_index == 3))
 777                         continue;
 778                 if (inst->Instruction.Saturate)
 779                         value = radeon_llvm_saturate(bld_base, value);
 780
 781                 if (reg->Register.File == TGSI_FILE_ADDRESS) {
 782                         temp_ptr = bld->addr[reg->Register.Index][chan_index];
 783                         LLVMBuildStore(builder, value, temp_ptr);
 784                         continue;
 785                 }
 786
 787                 if (!tgsi_type_is_64bit(dtype))
 788                         value = bitcast(bld_base, TGSI_TYPE_FLOAT, value);
 789
 790                 if (reg->Register.Indirect) {
 791                         unsigned file = reg->Register.File;
 792                         unsigned reg_index = reg->Register.Index;
 793                         store_value_to_array(bld_base, value, file, chan_index,
 794                                              reg_index, &reg->Indirect);
 795                 } else {
 796                         switch(reg->Register.File) {
 797                         case TGSI_FILE_OUTPUT:
 798                                 temp_ptr = bld->outputs[reg->Register.Index][chan_index];
 799                                 if (tgsi_type_is_64bit(dtype))
 800                                         temp_ptr2 = bld->outputs[reg->Register.Index][chan_index + 1];
 801                                 break;
 802
 803                         case TGSI_FILE_TEMPORARY:
 804                         {
 805                                 if (reg->Register.Index >= ctx->temps_count)
 806                                         continue;
 807
 808                                 temp_ptr = ctx->temps[ TGSI_NUM_CHANNELS * reg->Register.Index + chan_index];
 809                                 if (tgsi_type_is_64bit(dtype))
 810                                         temp_ptr2 = ctx->temps[ TGSI_NUM_CHANNELS * reg->Register.Index + chan_index + 1];
 811
 812                                 break;
 813                         }
 814                         default:
 815                                 return;
 816                         }
 817                         if (!tgsi_type_is_64bit(dtype))
 818                                 LLVMBuildStore(builder, value, temp_ptr);
 819                         else {
 820                                 LLVMValueRef ptr = LLVMBuildBitCast(builder, value,
 821                                                                     LLVMVectorType(LLVMIntTypeInContext(bld_base->base.gallivm->context, 32), 2), "");
 822                                 LLVMValueRef val2;
 823                                 value = LLVMBuildExtractElement(builder, ptr,
 824                                                                 bld_base->uint_bld.zero, "");
 825                                 val2 = LLVMBuildExtractElement(builder, ptr,
 826                                                                 bld_base->uint_bld.one, "");
 827
 828                                 LLVMBuildStore(builder, bitcast(bld_base, TGSI_TYPE_FLOAT, value), temp_ptr);
 829                                 LLVMBuildStore(builder, bitcast(bld_base, TGSI_TYPE_FLOAT, val2), temp_ptr2);
 830                         }
 831                 }
 832         }
 833 }
 834
 835 static void set_basicblock_name(LLVMBasicBlockRef bb, const char *base, int pc)
 836 {
 837         char buf[32];
 838         /* Subtract 1 so that the number shown is that of the corresponding
 839          * opcode in the TGSI dump, e.g. an if block has the same suffix as
 840          * the instruction number of the corresponding TGSI IF.
 841          */
 842         snprintf(buf, sizeof(buf), "%s%d", base, pc - 1);
 843         LLVMSetValueName(LLVMBasicBlockAsValue(bb), buf);
 844 }
 845
 846 /* Append a basic block at the level of the parent flow.
 847  */
 848 static LLVMBasicBlockRef append_basic_block(struct radeon_llvm_context *ctx,
 849                                             const char *name)
 850 {
 851         struct gallivm_state *gallivm = &ctx->gallivm;
 852
 853         assert(ctx->flow_depth >= 1);
 854
 855         if (ctx->flow_depth >= 2) {
 856                 struct radeon_llvm_flow *flow = &ctx->flow[ctx->flow_depth - 2];
 857
 858                 return LLVMInsertBasicBlockInContext(gallivm->context,
 859                                                      flow->next_block, name);
 860         }
 861
 862         return LLVMAppendBasicBlockInContext(gallivm->context, ctx->main_fn, name);
 863 }
 864
 865 /* Emit a branch to the given default target for the current block if
 866  * applicable -- that is, if the current block does not already contain a
 867  * branch from a break or continue.
 868  */
 869 static void emit_default_branch(LLVMBuilderRef builder, LLVMBasicBlockRef target)
 870 {
 871         if (!LLVMGetBasicBlockTerminator(LLVMGetInsertBlock(builder)))
 872                  LLVMBuildBr(builder, target);
 873 }
 874
 875 static void bgnloop_emit(const struct lp_build_tgsi_action *action,
 876                          struct lp_build_tgsi_context *bld_base,
 877                          struct lp_build_emit_data *emit_data)
 878 {
 879         struct radeon_llvm_context *ctx = radeon_llvm_context(bld_base);
 880         struct gallivm_state *gallivm = bld_base->base.gallivm;
 881         struct radeon_llvm_flow *flow = push_flow(ctx);
 882         flow->loop_entry_block = append_basic_block(ctx, "LOOP");
 883         flow->next_block = append_basic_block(ctx, "ENDLOOP");
 884         set_basicblock_name(flow->loop_entry_block, "loop", bld_base->pc);
 885         LLVMBuildBr(gallivm->builder, flow->loop_entry_block);
 886         LLVMPositionBuilderAtEnd(gallivm->builder, flow->loop_entry_block);
 887 }
 888
 889 static void brk_emit(const struct lp_build_tgsi_action *action,
 890                      struct lp_build_tgsi_context *bld_base,
 891                      struct lp_build_emit_data *emit_data)
 892 {
 893         struct radeon_llvm_context *ctx = radeon_llvm_context(bld_base);
 894         struct gallivm_state *gallivm = bld_base->base.gallivm;
 895         struct radeon_llvm_flow *flow = get_innermost_loop(ctx);
 896
 897         LLVMBuildBr(gallivm->builder, flow->next_block);
 898 }
 899
 900 static void cont_emit(const struct lp_build_tgsi_action *action,
 901                       struct lp_build_tgsi_context *bld_base,
 902                       struct lp_build_emit_data *emit_data)
 903 {
 904         struct radeon_llvm_context *ctx = radeon_llvm_context(bld_base);
 905         struct gallivm_state *gallivm = bld_base->base.gallivm;
 906         struct radeon_llvm_flow *flow = get_innermost_loop(ctx);
 907
 908         LLVMBuildBr(gallivm->builder, flow->loop_entry_block);
 909 }
 910
 911 static void else_emit(const struct lp_build_tgsi_action *action,
 912                       struct lp_build_tgsi_context *bld_base,
 913                       struct lp_build_emit_data *emit_data)
 914 {
 915         struct radeon_llvm_context *ctx = radeon_llvm_context(bld_base);
 916         struct gallivm_state *gallivm = bld_base->base.gallivm;
 917         struct radeon_llvm_flow *current_branch = get_current_flow(ctx);
 918         LLVMBasicBlockRef endif_block;
 919
 920         assert(!current_branch->loop_entry_block);
 921
 922         endif_block = append_basic_block(ctx, "ENDIF");
 923         emit_default_branch(gallivm->builder, endif_block);
 924
 925         LLVMPositionBuilderAtEnd(gallivm->builder, current_branch->next_block);
 926         set_basicblock_name(current_branch->next_block, "else", bld_base->pc);
 927
 928         current_branch->next_block = endif_block;
 929 }
 930
 931 static void endif_emit(const struct lp_build_tgsi_action *action,
 932                        struct lp_build_tgsi_context *bld_base,
 933                        struct lp_build_emit_data *emit_data)
 934 {
 935         struct radeon_llvm_context *ctx = radeon_llvm_context(bld_base);
 936         struct gallivm_state *gallivm = bld_base->base.gallivm;
 937         struct radeon_llvm_flow *current_branch = get_current_flow(ctx);
 938
 939         assert(!current_branch->loop_entry_block);
 940
 941         emit_default_branch(gallivm->builder, current_branch->next_block);
 942         LLVMPositionBuilderAtEnd(gallivm->builder, current_branch->next_block);
 943         set_basicblock_name(current_branch->next_block, "endif", bld_base->pc);
 944
 945         ctx->flow_depth--;
 946 }
 947
 948 static void endloop_emit(const struct lp_build_tgsi_action *action,
 949                          struct lp_build_tgsi_context *bld_base,
 950                          struct lp_build_emit_data *emit_data)
 951 {
 952         struct radeon_llvm_context *ctx = radeon_llvm_context(bld_base);
 953         struct gallivm_state *gallivm = bld_base->base.gallivm;
 954         struct radeon_llvm_flow *current_loop = get_current_flow(ctx);
 955
 956         assert(current_loop->loop_entry_block);
 957
 958         emit_default_branch(gallivm->builder, current_loop->loop_entry_block);
 959
 960         LLVMPositionBuilderAtEnd(gallivm->builder, current_loop->next_block);
 961         set_basicblock_name(current_loop->next_block, "endloop", bld_base->pc);
 962         ctx->flow_depth--;
 963 }
 964
 965 static void if_cond_emit(const struct lp_build_tgsi_action *action,
 966                          struct lp_build_tgsi_context *bld_base,
 967                          struct lp_build_emit_data *emit_data,
 968                          LLVMValueRef cond)
 969 {
 970         struct radeon_llvm_context *ctx = radeon_llvm_context(bld_base);
 971         struct gallivm_state *gallivm = bld_base->base.gallivm;
 972         struct radeon_llvm_flow *flow = push_flow(ctx);
 973         LLVMBasicBlockRef if_block;
 974
 975         if_block = append_basic_block(ctx, "IF");
 976         flow->next_block = append_basic_block(ctx, "ELSE");
 977         set_basicblock_name(if_block, "if", bld_base->pc);
 978         LLVMBuildCondBr(gallivm->builder, cond, if_block, flow->next_block);
 979         LLVMPositionBuilderAtEnd(gallivm->builder, if_block);
 980 }
 981
 982 static void if_emit(const struct lp_build_tgsi_action *action,
 983                     struct lp_build_tgsi_context *bld_base,
 984                     struct lp_build_emit_data *emit_data)
 985 {
 986         struct gallivm_state *gallivm = bld_base->base.gallivm;
 987         LLVMValueRef cond;
 988
 989         cond = LLVMBuildFCmp(gallivm->builder, LLVMRealUNE,
 990                         emit_data->args[0],
 991                         bld_base->base.zero, "");
 992
 993         if_cond_emit(action, bld_base, emit_data, cond);
 994 }
 995
 996 static void uif_emit(const struct lp_build_tgsi_action *action,
 997                      struct lp_build_tgsi_context *bld_base,
 998                      struct lp_build_emit_data *emit_data)
 999 {
1000         struct gallivm_state *gallivm = bld_base->base.gallivm;
1001         LLVMValueRef cond;
1002
1003         cond = LLVMBuildICmp(gallivm->builder, LLVMIntNE,
1004                 bitcast(bld_base, TGSI_TYPE_UNSIGNED, emit_data->args[0]),
1005                         bld_base->int_bld.zero, "");
1006
1007         if_cond_emit(action, bld_base, emit_data, cond);
1008 }
1009
1010 static void kill_if_fetch_args(struct lp_build_tgsi_context *bld_base,
1011                                struct lp_build_emit_data *emit_data)
1012 {
1013         const struct tgsi_full_instruction *inst = emit_data->inst;
1014         struct gallivm_state *gallivm = bld_base->base.gallivm;
1015         LLVMBuilderRef builder = gallivm->builder;
1016         unsigned i;
1017         LLVMValueRef conds[TGSI_NUM_CHANNELS];
1018
1019         for (i = 0; i < TGSI_NUM_CHANNELS; i++) {
1020                 LLVMValueRef value = lp_build_emit_fetch(bld_base, inst, 0, i);
1021                 conds[i] = LLVMBuildFCmp(builder, LLVMRealOLT, value,
1022                                         bld_base->base.zero, "");
1023         }
1024
1025         /* Or the conditions together */
1026         for (i = TGSI_NUM_CHANNELS - 1; i > 0; i--) {
1027                 conds[i - 1] = LLVMBuildOr(builder, conds[i], conds[i - 1], "");
1028         }
1029
1030         emit_data->dst_type = LLVMVoidTypeInContext(gallivm->context);
1031         emit_data->arg_count = 1;
1032         emit_data->args[0] = LLVMBuildSelect(builder, conds[0],
1033                                         lp_build_const_float(gallivm, -1.0f),
1034                                         bld_base->base.zero, "");
1035 }
1036
1037 static void kil_emit(const struct lp_build_tgsi_action *action,
1038                      struct lp_build_tgsi_context *bld_base,
1039                      struct lp_build_emit_data *emit_data)
1040 {
1041         unsigned i;
1042         for (i = 0; i < emit_data->arg_count; i++) {
1043                 emit_data->output[i] = lp_build_intrinsic_unary(
1044                         bld_base->base.gallivm->builder,
1045                         action->intr_name,
1046                         emit_data->dst_type, emit_data->args[i]);
1047         }
1048 }
1049
1050 static LLVMValueRef build_cube_intrinsic(struct gallivm_state *gallivm,
1051                                          LLVMValueRef in[3])
1052 {
1053         if (HAVE_LLVM >= 0x0309) {
1054                 LLVMTypeRef f32 = LLVMTypeOf(in[0]);
1055                 LLVMValueRef out[4];
1056
1057                 out[0] = lp_build_intrinsic(gallivm->builder, "llvm.amdgcn.cubetc",
1058                                             f32, in, 3, LLVMReadNoneAttribute);
1059                 out[1] = lp_build_intrinsic(gallivm->builder, "llvm.amdgcn.cubesc",
1060                                             f32, in, 3, LLVMReadNoneAttribute);
1061                 out[2] = lp_build_intrinsic(gallivm->builder, "llvm.amdgcn.cubema",
1062                                             f32, in, 3, LLVMReadNoneAttribute);
1063                 out[3] = lp_build_intrinsic(gallivm->builder, "llvm.amdgcn.cubeid",
1064                                             f32, in, 3, LLVMReadNoneAttribute);
1065
1066                 return lp_build_gather_values(gallivm, out, 4);
1067         } else {
1068                 LLVMValueRef c[4] = {
1069                         in[0],
1070                         in[1],
1071                         in[2],
1072                         LLVMGetUndef(LLVMTypeOf(in[0]))
1073                 };
1074                 LLVMValueRef vec = lp_build_gather_values(gallivm, c, 4);
1075
1076                 return lp_build_intrinsic(gallivm->builder, "llvm.AMDGPU.cube",
1077                                           LLVMTypeOf(vec), &vec, 1,
1078                                           LLVMReadNoneAttribute);
1079         }
1080 }
1081
1082 static void radeon_llvm_cube_to_2d_coords(struct lp_build_tgsi_context *bld_base,
1083                                           LLVMValueRef *in, LLVMValueRef *out)
1084 {
1085         struct gallivm_state *gallivm = bld_base->base.gallivm;
1086         LLVMBuilderRef builder = gallivm->builder;
1087         LLVMTypeRef type = bld_base->base.elem_type;
1088         LLVMValueRef coords[4];
1089         LLVMValueRef mad_args[3];
1090         LLVMValueRef v;
1091         unsigned i;
1092
1093         v = build_cube_intrinsic(gallivm, in);
1094
1095         for (i = 0; i < 4; ++i)
1096                 coords[i] = LLVMBuildExtractElement(builder, v,
1097                                                     lp_build_const_int32(gallivm, i), "");
1098
1099         coords[2] = lp_build_intrinsic(builder, "llvm.fabs.f32",
1100                         type, &coords[2], 1, LLVMReadNoneAttribute);
1101         coords[2] = lp_build_emit_llvm_unary(bld_base, TGSI_OPCODE_RCP, coords[2]);
1102
1103         mad_args[1] = coords[2];
1104         mad_args[2] = LLVMConstReal(type, 1.5);
1105
1106         mad_args[0] = coords[0];
1107         coords[0] = lp_build_emit_llvm_ternary(bld_base, TGSI_OPCODE_MAD,
1108                         mad_args[0], mad_args[1], mad_args[2]);
1109
1110         mad_args[0] = coords[1];
1111         coords[1] = lp_build_emit_llvm_ternary(bld_base, TGSI_OPCODE_MAD,
1112                         mad_args[0], mad_args[1], mad_args[2]);
1113
1114         /* apply xyz = yxw swizzle to cooords */
1115         out[0] = coords[1];
1116         out[1] = coords[0];
1117         out[2] = coords[3];
1118 }
1119
1120 void radeon_llvm_emit_prepare_cube_coords(struct lp_build_tgsi_context *bld_base,
1121                                           struct lp_build_emit_data *emit_data,
1122                                           LLVMValueRef *coords_arg,
1123                                           LLVMValueRef *derivs_arg)
1124 {
1125
1126         unsigned target = emit_data->inst->Texture.Texture;
1127         unsigned opcode = emit_data->inst->Instruction.Opcode;
1128         struct gallivm_state *gallivm = bld_base->base.gallivm;
1129         LLVMBuilderRef builder = gallivm->builder;
1130         LLVMValueRef coords[4];
1131         unsigned i;
1132
1133         radeon_llvm_cube_to_2d_coords(bld_base, coords_arg, coords);
1134
1135         if (opcode == TGSI_OPCODE_TXD && derivs_arg) {
1136                 LLVMValueRef derivs[4];
1137                 int axis;
1138
1139                 /* Convert cube derivatives to 2D derivatives. */
1140                 for (axis = 0; axis < 2; axis++) {
1141                         LLVMValueRef shifted_cube_coords[4], shifted_coords[4];
1142
1143                         /* Shift the cube coordinates by the derivatives to get
1144                          * the cube coordinates of the "neighboring pixel".
1145                          */
1146                         for (i = 0; i < 3; i++)
1147                                 shifted_cube_coords[i] =
1148                                         LLVMBuildFAdd(builder, coords_arg[i],
1149                                                       derivs_arg[axis*3+i], "");
1150                         shifted_cube_coords[3] = LLVMGetUndef(bld_base->base.elem_type);
1151
1152                         /* Project the shifted cube coordinates onto the face. */
1153                         radeon_llvm_cube_to_2d_coords(bld_base, shifted_cube_coords,
1154                                                       shifted_coords);
1155
1156                         /* Subtract both sets of 2D coordinates to get 2D derivatives.
1157                          * This won't work if the shifted coordinates ended up
1158                          * in a different face.
1159                          */
1160                         for (i = 0; i < 2; i++)
1161                                 derivs[axis * 2 + i] =
1162                                         LLVMBuildFSub(builder, shifted_coords[i],
1163                                                       coords[i], "");
1164                 }
1165
1166                 memcpy(derivs_arg, derivs, sizeof(derivs));
1167         }
1168
1169         if (target == TGSI_TEXTURE_CUBE_ARRAY ||
1170             target == TGSI_TEXTURE_SHADOWCUBE_ARRAY) {
1171                 /* for cube arrays coord.z = coord.w(array_index) * 8 + face */
1172                 /* coords_arg.w component - array_index for cube arrays */
1173                 coords[2] = lp_build_emit_llvm_ternary(bld_base, TGSI_OPCODE_MAD,
1174                                                        coords_arg[3], lp_build_const_float(gallivm, 8.0), coords[2]);
1175         }
1176
1177         /* Preserve compare/lod/bias. Put it in coords.w. */
1178         if (opcode == TGSI_OPCODE_TEX2 ||
1179             opcode == TGSI_OPCODE_TXB2 ||
1180             opcode == TGSI_OPCODE_TXL2) {
1181                 coords[3] = coords_arg[4];
1182         } else if (opcode == TGSI_OPCODE_TXB ||
1183                    opcode == TGSI_OPCODE_TXL ||
1184                    target == TGSI_TEXTURE_SHADOWCUBE) {
1185                 coords[3] = coords_arg[3];
1186         }
1187
1188         memcpy(coords_arg, coords, sizeof(coords));
1189 }
1190
1191 static void emit_icmp(const struct lp_build_tgsi_action *action,
1192                       struct lp_build_tgsi_context *bld_base,
1193                       struct lp_build_emit_data *emit_data)
1194 {
1195         unsigned pred;
1196         LLVMBuilderRef builder = bld_base->base.gallivm->builder;
1197         LLVMContextRef context = bld_base->base.gallivm->context;
1198
1199         switch (emit_data->inst->Instruction.Opcode) {
1200         case TGSI_OPCODE_USEQ:
1201         case TGSI_OPCODE_U64SEQ: pred = LLVMIntEQ; break;
1202         case TGSI_OPCODE_USNE:
1203         case TGSI_OPCODE_U64SNE: pred = LLVMIntNE; break;
1204         case TGSI_OPCODE_USGE:
1205         case TGSI_OPCODE_U64SGE: pred = LLVMIntUGE; break;
1206         case TGSI_OPCODE_USLT:
1207         case TGSI_OPCODE_U64SLT: pred = LLVMIntULT; break;
1208         case TGSI_OPCODE_ISGE:
1209         case TGSI_OPCODE_I64SGE: pred = LLVMIntSGE; break;
1210         case TGSI_OPCODE_ISLT:
1211         case TGSI_OPCODE_I64SLT: pred = LLVMIntSLT; break;
1212         default:
1213                 assert(!"unknown instruction");
1214                 pred = 0;
1215                 break;
1216         }
1217
1218         LLVMValueRef v = LLVMBuildICmp(builder, pred,
1219                         emit_data->args[0], emit_data->args[1],"");
1220
1221         v = LLVMBuildSExtOrBitCast(builder, v,
1222                         LLVMInt32TypeInContext(context), "");
1223
1224         emit_data->output[emit_data->chan] = v;
1225 }
1226
1227 static void emit_ucmp(const struct lp_build_tgsi_action *action,
1228                       struct lp_build_tgsi_context *bld_base,
1229                       struct lp_build_emit_data *emit_data)
1230 {
1231         LLVMBuilderRef builder = bld_base->base.gallivm->builder;
1232
1233         LLVMValueRef arg0 = LLVMBuildBitCast(builder, emit_data->args[0],
1234                                              bld_base->uint_bld.elem_type, "");
1235
1236         LLVMValueRef v = LLVMBuildICmp(builder, LLVMIntNE, arg0,
1237                                        bld_base->uint_bld.zero, "");
1238
1239         emit_data->output[emit_data->chan] =
1240                 LLVMBuildSelect(builder, v, emit_data->args[1], emit_data->args[2], "");
1241 }
1242
1243 static void emit_cmp(const struct lp_build_tgsi_action *action,
1244                      struct lp_build_tgsi_context *bld_base,
1245                      struct lp_build_emit_data *emit_data)
1246 {
1247         LLVMBuilderRef builder = bld_base->base.gallivm->builder;
1248         LLVMValueRef cond, *args = emit_data->args;
1249
1250         cond = LLVMBuildFCmp(builder, LLVMRealOLT, args[0],
1251                              bld_base->base.zero, "");
1252
1253         emit_data->output[emit_data->chan] =
1254                 LLVMBuildSelect(builder, cond, args[1], args[2], "");
1255 }
1256
1257 static void emit_set_cond(const struct lp_build_tgsi_action *action,
1258                           struct lp_build_tgsi_context *bld_base,
1259                           struct lp_build_emit_data *emit_data)
1260 {
1261         LLVMBuilderRef builder = bld_base->base.gallivm->builder;
1262         LLVMRealPredicate pred;
1263         LLVMValueRef cond;
1264
1265         /* Use ordered for everything but NE (which is usual for
1266          * float comparisons)
1267          */
1268         switch (emit_data->inst->Instruction.Opcode) {
1269         case TGSI_OPCODE_SGE: pred = LLVMRealOGE; break;
1270         case TGSI_OPCODE_SEQ: pred = LLVMRealOEQ; break;
1271         case TGSI_OPCODE_SLE: pred = LLVMRealOLE; break;
1272         case TGSI_OPCODE_SLT: pred = LLVMRealOLT; break;
1273         case TGSI_OPCODE_SNE: pred = LLVMRealUNE; break;
1274         case TGSI_OPCODE_SGT: pred = LLVMRealOGT; break;
1275         default: assert(!"unknown instruction"); pred = 0; break;
1276         }
1277
1278         cond = LLVMBuildFCmp(builder,
1279                 pred, emit_data->args[0], emit_data->args[1], "");
1280
1281         emit_data->output[emit_data->chan] = LLVMBuildSelect(builder,
1282                 cond, bld_base->base.one, bld_base->base.zero, "");
1283 }
1284
1285 static void emit_fcmp(const struct lp_build_tgsi_action *action,
1286                       struct lp_build_tgsi_context *bld_base,
1287                       struct lp_build_emit_data *emit_data)
1288 {
1289         LLVMBuilderRef builder = bld_base->base.gallivm->builder;
1290         LLVMContextRef context = bld_base->base.gallivm->context;
1291         LLVMRealPredicate pred;
1292
1293         /* Use ordered for everything but NE (which is usual for
1294          * float comparisons)
1295          */
1296         switch (emit_data->inst->Instruction.Opcode) {
1297         case TGSI_OPCODE_FSEQ: pred = LLVMRealOEQ; break;
1298         case TGSI_OPCODE_FSGE: pred = LLVMRealOGE; break;
1299         case TGSI_OPCODE_FSLT: pred = LLVMRealOLT; break;
1300         case TGSI_OPCODE_FSNE: pred = LLVMRealUNE; break;
1301         default: assert(!"unknown instruction"); pred = 0; break;
1302         }
1303
1304         LLVMValueRef v = LLVMBuildFCmp(builder, pred,
1305                         emit_data->args[0], emit_data->args[1],"");
1306
1307         v = LLVMBuildSExtOrBitCast(builder, v,
1308                         LLVMInt32TypeInContext(context), "");
1309
1310         emit_data->output[emit_data->chan] = v;
1311 }
1312
1313 static void emit_dcmp(const struct lp_build_tgsi_action *action,
1314                       struct lp_build_tgsi_context *bld_base,
1315                       struct lp_build_emit_data *emit_data)
1316 {
1317         LLVMBuilderRef builder = bld_base->base.gallivm->builder;
1318         LLVMContextRef context = bld_base->base.gallivm->context;
1319         LLVMRealPredicate pred;
1320
1321         /* Use ordered for everything but NE (which is usual for
1322          * float comparisons)
1323          */
1324         switch (emit_data->inst->Instruction.Opcode) {
1325         case TGSI_OPCODE_DSEQ: pred = LLVMRealOEQ; break;
1326         case TGSI_OPCODE_DSGE: pred = LLVMRealOGE; break;
1327         case TGSI_OPCODE_DSLT: pred = LLVMRealOLT; break;
1328         case TGSI_OPCODE_DSNE: pred = LLVMRealUNE; break;
1329         default: assert(!"unknown instruction"); pred = 0; break;
1330         }
1331
1332         LLVMValueRef v = LLVMBuildFCmp(builder, pred,
1333                         emit_data->args[0], emit_data->args[1],"");
1334
1335         v = LLVMBuildSExtOrBitCast(builder, v,
1336                         LLVMInt32TypeInContext(context), "");
1337
1338         emit_data->output[emit_data->chan] = v;
1339 }
1340
1341 static void emit_not(const struct lp_build_tgsi_action *action,
1342                      struct lp_build_tgsi_context *bld_base,
1343                      struct lp_build_emit_data *emit_data)
1344 {
1345         LLVMBuilderRef builder = bld_base->base.gallivm->builder;
1346         LLVMValueRef v = bitcast(bld_base, TGSI_TYPE_UNSIGNED,
1347                         emit_data->args[0]);
1348         emit_data->output[emit_data->chan] = LLVMBuildNot(builder, v, "");
1349 }
1350
1351 static void emit_arl(const struct lp_build_tgsi_action *action,
1352                      struct lp_build_tgsi_context *bld_base,
1353                      struct lp_build_emit_data *emit_data)
1354 {
1355         LLVMBuilderRef builder = bld_base->base.gallivm->builder;
1356         LLVMValueRef floor_index =  lp_build_emit_llvm_unary(bld_base, TGSI_OPCODE_FLR, emit_data->args[0]);
1357         emit_data->output[emit_data->chan] = LLVMBuildFPToSI(builder,
1358                         floor_index, bld_base->base.int_elem_type , "");
1359 }
1360
1361 static void emit_and(const struct lp_build_tgsi_action *action,
1362                      struct lp_build_tgsi_context *bld_base,
1363                      struct lp_build_emit_data *emit_data)
1364 {
1365         LLVMBuilderRef builder = bld_base->base.gallivm->builder;
1366         emit_data->output[emit_data->chan] = LLVMBuildAnd(builder,
1367                         emit_data->args[0], emit_data->args[1], "");
1368 }
1369
1370 static void emit_or(const struct lp_build_tgsi_action *action,
1371                     struct lp_build_tgsi_context *bld_base,
1372                     struct lp_build_emit_data *emit_data)
1373 {
1374         LLVMBuilderRef builder = bld_base->base.gallivm->builder;
1375         emit_data->output[emit_data->chan] = LLVMBuildOr(builder,
1376                         emit_data->args[0], emit_data->args[1], "");
1377 }
1378
1379 static void emit_uadd(const struct lp_build_tgsi_action *action,
1380                       struct lp_build_tgsi_context *bld_base,
1381                       struct lp_build_emit_data *emit_data)
1382 {
1383         LLVMBuilderRef builder = bld_base->base.gallivm->builder;
1384         emit_data->output[emit_data->chan] = LLVMBuildAdd(builder,
1385                         emit_data->args[0], emit_data->args[1], "");
1386 }
1387
1388 static void emit_udiv(const struct lp_build_tgsi_action *action,
1389                       struct lp_build_tgsi_context *bld_base,
1390                       struct lp_build_emit_data *emit_data)
1391 {
1392         LLVMBuilderRef builder = bld_base->base.gallivm->builder;
1393         emit_data->output[emit_data->chan] = LLVMBuildUDiv(builder,
1394                         emit_data->args[0], emit_data->args[1], "");
1395 }
1396
1397 static void emit_idiv(const struct lp_build_tgsi_action *action,
1398                       struct lp_build_tgsi_context *bld_base,
1399                       struct lp_build_emit_data *emit_data)
1400 {
1401         LLVMBuilderRef builder = bld_base->base.gallivm->builder;
1402         emit_data->output[emit_data->chan] = LLVMBuildSDiv(builder,
1403                         emit_data->args[0], emit_data->args[1], "");
1404 }
1405
1406 static void emit_mod(const struct lp_build_tgsi_action *action,
1407                      struct lp_build_tgsi_context *bld_base,
1408                      struct lp_build_emit_data *emit_data)
1409 {
1410         LLVMBuilderRef builder = bld_base->base.gallivm->builder;
1411         emit_data->output[emit_data->chan] = LLVMBuildSRem(builder,
1412                         emit_data->args[0], emit_data->args[1], "");
1413 }
1414
1415 static void emit_umod(const struct lp_build_tgsi_action *action,
1416                       struct lp_build_tgsi_context *bld_base,
1417                       struct lp_build_emit_data *emit_data)
1418 {
1419         LLVMBuilderRef builder = bld_base->base.gallivm->builder;
1420         emit_data->output[emit_data->chan] = LLVMBuildURem(builder,
1421                         emit_data->args[0], emit_data->args[1], "");
1422 }
1423
1424 static void emit_shl(const struct lp_build_tgsi_action *action,
1425                      struct lp_build_tgsi_context *bld_base,
1426                      struct lp_build_emit_data *emit_data)
1427 {
1428         LLVMBuilderRef builder = bld_base->base.gallivm->builder;
1429         emit_data->output[emit_data->chan] = LLVMBuildShl(builder,
1430                         emit_data->args[0], emit_data->args[1], "");
1431 }
1432
1433 static void emit_ushr(const struct lp_build_tgsi_action *action,
1434                       struct lp_build_tgsi_context *bld_base,
1435                       struct lp_build_emit_data *emit_data)
1436 {
1437         LLVMBuilderRef builder = bld_base->base.gallivm->builder;
1438         emit_data->output[emit_data->chan] = LLVMBuildLShr(builder,
1439                         emit_data->args[0], emit_data->args[1], "");
1440 }
1441 static void emit_ishr(const struct lp_build_tgsi_action *action,
1442                       struct lp_build_tgsi_context *bld_base,
1443                       struct lp_build_emit_data *emit_data)
1444 {
1445         LLVMBuilderRef builder = bld_base->base.gallivm->builder;
1446         emit_data->output[emit_data->chan] = LLVMBuildAShr(builder,
1447                         emit_data->args[0], emit_data->args[1], "");
1448 }
1449
1450 static void emit_xor(const struct lp_build_tgsi_action *action,
1451                      struct lp_build_tgsi_context *bld_base,
1452                      struct lp_build_emit_data *emit_data)
1453 {
1454         LLVMBuilderRef builder = bld_base->base.gallivm->builder;
1455         emit_data->output[emit_data->chan] = LLVMBuildXor(builder,
1456                         emit_data->args[0], emit_data->args[1], "");
1457 }
1458
1459 static void emit_ssg(const struct lp_build_tgsi_action *action,
1460                      struct lp_build_tgsi_context *bld_base,
1461                      struct lp_build_emit_data *emit_data)
1462 {
1463         LLVMBuilderRef builder = bld_base->base.gallivm->builder;
1464
1465         LLVMValueRef cmp, val;
1466
1467         if (emit_data->inst->Instruction.Opcode == TGSI_OPCODE_I64SSG) {
1468                 cmp = LLVMBuildICmp(builder, LLVMIntSGT, emit_data->args[0], bld_base->int64_bld.zero, "");
1469                 val = LLVMBuildSelect(builder, cmp, bld_base->int64_bld.one, emit_data->args[0], "");
1470                 cmp = LLVMBuildICmp(builder, LLVMIntSGE, val, bld_base->int64_bld.zero, "");
1471                 val = LLVMBuildSelect(builder, cmp, val, LLVMConstInt(bld_base->int64_bld.elem_type, -1, true), "");
1472         } else if (emit_data->inst->Instruction.Opcode == TGSI_OPCODE_ISSG) {
1473                 cmp = LLVMBuildICmp(builder, LLVMIntSGT, emit_data->args[0], bld_base->int_bld.zero, "");
1474                 val = LLVMBuildSelect(builder, cmp, bld_base->int_bld.one, emit_data->args[0], "");
1475                 cmp = LLVMBuildICmp(builder, LLVMIntSGE, val, bld_base->int_bld.zero, "");
1476                 val = LLVMBuildSelect(builder, cmp, val, LLVMConstInt(bld_base->int_bld.elem_type, -1, true), "");
1477         } else { // float SSG
1478                 cmp = LLVMBuildFCmp(builder, LLVMRealOGT, emit_data->args[0], bld_base->base.zero, "");
1479                 val = LLVMBuildSelect(builder, cmp, bld_base->base.one, emit_data->args[0], "");
1480                 cmp = LLVMBuildFCmp(builder, LLVMRealOGE, val, bld_base->base.zero, "");
1481                 val = LLVMBuildSelect(builder, cmp, val, LLVMConstReal(bld_base->base.elem_type, -1), "");
1482         }
1483
1484         emit_data->output[emit_data->chan] = val;
1485 }
1486
1487 static void emit_ineg(const struct lp_build_tgsi_action *action,
1488                       struct lp_build_tgsi_context *bld_base,
1489                       struct lp_build_emit_data *emit_data)
1490 {
1491         LLVMBuilderRef builder = bld_base->base.gallivm->builder;
1492         emit_data->output[emit_data->chan] = LLVMBuildNeg(builder,
1493                         emit_data->args[0], "");
1494 }
1495
1496 static void emit_dneg(const struct lp_build_tgsi_action *action,
1497                       struct lp_build_tgsi_context *bld_base,
1498                       struct lp_build_emit_data *emit_data)
1499 {
1500         LLVMBuilderRef builder = bld_base->base.gallivm->builder;
1501         emit_data->output[emit_data->chan] = LLVMBuildFNeg(builder,
1502                         emit_data->args[0], "");
1503 }
1504
1505 static void emit_frac(const struct lp_build_tgsi_action *action,
1506                       struct lp_build_tgsi_context *bld_base,
1507                       struct lp_build_emit_data *emit_data)
1508 {
1509         LLVMBuilderRef builder = bld_base->base.gallivm->builder;
1510         char *intr;
1511
1512         if (emit_data->info->opcode == TGSI_OPCODE_FRC)
1513                 intr = "llvm.floor.f32";
1514         else if (emit_data->info->opcode == TGSI_OPCODE_DFRAC)
1515                 intr = "llvm.floor.f64";
1516         else {
1517                 assert(0);
1518                 return;
1519         }
1520
1521         LLVMValueRef floor = lp_build_intrinsic(builder, intr, emit_data->dst_type,
1522                                                 &emit_data->args[0], 1,
1523                                                 LLVMReadNoneAttribute);
1524         emit_data->output[emit_data->chan] = LLVMBuildFSub(builder,
1525                         emit_data->args[0], floor, "");
1526 }
1527
1528 static void emit_f2i(const struct lp_build_tgsi_action *action,
1529                      struct lp_build_tgsi_context *bld_base,
1530                      struct lp_build_emit_data *emit_data)
1531 {
1532         LLVMBuilderRef builder = bld_base->base.gallivm->builder;
1533         emit_data->output[emit_data->chan] = LLVMBuildFPToSI(builder,
1534                         emit_data->args[0], bld_base->int_bld.elem_type, "");
1535 }
1536
1537 static void emit_f2u(const struct lp_build_tgsi_action *action,
1538                      struct lp_build_tgsi_context *bld_base,
1539                      struct lp_build_emit_data *emit_data)
1540 {
1541         LLVMBuilderRef builder = bld_base->base.gallivm->builder;
1542         emit_data->output[emit_data->chan] = LLVMBuildFPToUI(builder,
1543                         emit_data->args[0], bld_base->uint_bld.elem_type, "");
1544 }
1545
1546 static void emit_i2f(const struct lp_build_tgsi_action *action,
1547                      struct lp_build_tgsi_context *bld_base,
1548                      struct lp_build_emit_data *emit_data)
1549 {
1550         LLVMBuilderRef builder = bld_base->base.gallivm->builder;
1551         emit_data->output[emit_data->chan] = LLVMBuildSIToFP(builder,
1552                         emit_data->args[0], bld_base->base.elem_type, "");
1553 }
1554
1555 static void emit_u2f(const struct lp_build_tgsi_action *action,
1556                      struct lp_build_tgsi_context *bld_base,
1557                      struct lp_build_emit_data *emit_data)
1558 {
1559         LLVMBuilderRef builder = bld_base->base.gallivm->builder;
1560         emit_data->output[emit_data->chan] = LLVMBuildUIToFP(builder,
1561                         emit_data->args[0], bld_base->base.elem_type, "");
1562 }
1563
1564 static void emit_immediate(struct lp_build_tgsi_context *bld_base,
1565                            const struct tgsi_full_immediate *imm)
1566 {
1567         unsigned i;
1568         struct radeon_llvm_context *ctx = radeon_llvm_context(bld_base);
1569
1570         for (i = 0; i < 4; ++i) {
1571                 ctx->soa.immediates[ctx->soa.num_immediates][i] =
1572                                 LLVMConstInt(bld_base->uint_bld.elem_type, imm->u[i].Uint, false   );
1573         }
1574
1575         ctx->soa.num_immediates++;
1576 }
1577
1578 void
1579 build_tgsi_intrinsic_nomem(const struct lp_build_tgsi_action *action,
1580                            struct lp_build_tgsi_context *bld_base,
1581                            struct lp_build_emit_data *emit_data)
1582 {
1583         struct lp_build_context *base = &bld_base->base;
1584         emit_data->output[emit_data->chan] =
1585                 lp_build_intrinsic(base->gallivm->builder, action->intr_name,
1586                                    emit_data->dst_type, emit_data->args,
1587                                    emit_data->arg_count, LLVMReadNoneAttribute);
1588 }
1589
1590 static void emit_bfi(const struct lp_build_tgsi_action *action,
1591                      struct lp_build_tgsi_context *bld_base,
1592                      struct lp_build_emit_data *emit_data)
1593 {
1594         struct gallivm_state *gallivm = bld_base->base.gallivm;
1595         LLVMBuilderRef builder = gallivm->builder;
1596         LLVMValueRef bfi_args[3];
1597
1598         // Calculate the bitmask: (((1 << src3) - 1) << src2
1599         bfi_args[0] = LLVMBuildShl(builder,
1600                                    LLVMBuildSub(builder,
1601                                                 LLVMBuildShl(builder,
1602                                                              bld_base->int_bld.one,
1603                                                              emit_data->args[3], ""),
1604                                                 bld_base->int_bld.one, ""),
1605                                    emit_data->args[2], "");
1606
1607         bfi_args[1] = LLVMBuildShl(builder, emit_data->args[1],
1608                                    emit_data->args[2], "");
1609
1610         bfi_args[2] = emit_data->args[0];
1611
1612         /* Calculate:
1613          *   (arg0 & arg1) | (~arg0 & arg2) = arg2 ^ (arg0 & (arg1 ^ arg2)
1614          * Use the right-hand side, which the LLVM backend can convert to V_BFI.
1615          */
1616         emit_data->output[emit_data->chan] =
1617                 LLVMBuildXor(builder, bfi_args[2],
1618                         LLVMBuildAnd(builder, bfi_args[0],
1619                                 LLVMBuildXor(builder, bfi_args[1], bfi_args[2],
1620                                              ""), ""), "");
1621 }
1622
1623 /* this is ffs in C */
1624 static void emit_lsb(const struct lp_build_tgsi_action *action,
1625                      struct lp_build_tgsi_context *bld_base,
1626                      struct lp_build_emit_data *emit_data)
1627 {
1628         struct gallivm_state *gallivm = bld_base->base.gallivm;
1629         LLVMValueRef args[2] = {
1630                 emit_data->args[0],
1631
1632                 /* The value of 1 means that ffs(x=0) = undef, so LLVM won't
1633                  * add special code to check for x=0. The reason is that
1634                  * the LLVM behavior for x=0 is different from what we
1635                  * need here.
1636                  *
1637                  * The hardware already implements the correct behavior.
1638                  */
1639                 lp_build_const_int32(gallivm, 1)
1640         };
1641
1642         emit_data->output[emit_data->chan] =
1643                 lp_build_intrinsic(gallivm->builder, "llvm.cttz.i32",
1644                                 emit_data->dst_type, args, ARRAY_SIZE(args),
1645                                 LLVMReadNoneAttribute);
1646 }
1647
1648 /* Find the last bit set. */
1649 static void emit_umsb(const struct lp_build_tgsi_action *action,
1650                       struct lp_build_tgsi_context *bld_base,
1651                       struct lp_build_emit_data *emit_data)
1652 {
1653         struct gallivm_state *gallivm = bld_base->base.gallivm;
1654         LLVMBuilderRef builder = gallivm->builder;
1655         LLVMValueRef args[2] = {
1656                 emit_data->args[0],
1657                 /* Don't generate code for handling zero: */
1658                 lp_build_const_int32(gallivm, 1)
1659         };
1660
1661         LLVMValueRef msb =
1662                 lp_build_intrinsic(builder, "llvm.ctlz.i32",
1663                                 emit_data->dst_type, args, ARRAY_SIZE(args),
1664                                 LLVMReadNoneAttribute);
1665
1666         /* The HW returns the last bit index from MSB, but TGSI wants
1667          * the index from LSB. Invert it by doing "31 - msb". */
1668         msb = LLVMBuildSub(builder, lp_build_const_int32(gallivm, 31),
1669                            msb, "");
1670
1671         /* Check for zero: */
1672         emit_data->output[emit_data->chan] =
1673                 LLVMBuildSelect(builder,
1674                                 LLVMBuildICmp(builder, LLVMIntEQ, args[0],
1675                                               bld_base->uint_bld.zero, ""),
1676                                 lp_build_const_int32(gallivm, -1), msb, "");
1677 }
1678
1679 /* Find the last bit opposite of the sign bit. */
1680 static void emit_imsb(const struct lp_build_tgsi_action *action,
1681                       struct lp_build_tgsi_context *bld_base,
1682                       struct lp_build_emit_data *emit_data)
1683 {
1684         struct gallivm_state *gallivm = bld_base->base.gallivm;
1685         LLVMBuilderRef builder = gallivm->builder;
1686         LLVMValueRef arg = emit_data->args[0];
1687
1688         LLVMValueRef msb =
1689                 lp_build_intrinsic(builder, "llvm.AMDGPU.flbit.i32",
1690                                 emit_data->dst_type, &arg, 1,
1691                                 LLVMReadNoneAttribute);
1692
1693         /* The HW returns the last bit index from MSB, but TGSI wants
1694          * the index from LSB. Invert it by doing "31 - msb". */
1695         msb = LLVMBuildSub(builder, lp_build_const_int32(gallivm, 31),
1696                            msb, "");
1697
1698         /* If arg == 0 || arg == -1 (0xffffffff), return -1. */
1699         LLVMValueRef all_ones = lp_build_const_int32(gallivm, -1);
1700
1701         LLVMValueRef cond =
1702                 LLVMBuildOr(builder,
1703                             LLVMBuildICmp(builder, LLVMIntEQ, arg,
1704                                           bld_base->uint_bld.zero, ""),
1705                             LLVMBuildICmp(builder, LLVMIntEQ, arg,
1706                                           all_ones, ""), "");
1707
1708         emit_data->output[emit_data->chan] =
1709                 LLVMBuildSelect(builder, cond, all_ones, msb, "");
1710 }
1711
1712 static void emit_iabs(const struct lp_build_tgsi_action *action,
1713                       struct lp_build_tgsi_context *bld_base,
1714                       struct lp_build_emit_data *emit_data)
1715 {
1716         LLVMBuilderRef builder = bld_base->base.gallivm->builder;
1717
1718         emit_data->output[emit_data->chan] =
1719                 lp_build_emit_llvm_binary(bld_base, TGSI_OPCODE_IMAX,
1720                                           emit_data->args[0],
1721                                           LLVMBuildNeg(builder,
1722                                                        emit_data->args[0], ""));
1723 }
1724
1725 static void emit_minmax_int(const struct lp_build_tgsi_action *action,
1726                             struct lp_build_tgsi_context *bld_base,
1727                             struct lp_build_emit_data *emit_data)
1728 {
1729         LLVMBuilderRef builder = bld_base->base.gallivm->builder;
1730         LLVMIntPredicate op;
1731
1732         switch (emit_data->info->opcode) {
1733         default:
1734                 assert(0);
1735         case TGSI_OPCODE_IMAX:
1736         case TGSI_OPCODE_I64MAX:
1737                 op = LLVMIntSGT;
1738                 break;
1739         case TGSI_OPCODE_IMIN:
1740         case TGSI_OPCODE_I64MIN:
1741                 op = LLVMIntSLT;
1742                 break;
1743         case TGSI_OPCODE_UMAX:
1744         case TGSI_OPCODE_U64MAX:
1745                 op = LLVMIntUGT;
1746                 break;
1747         case TGSI_OPCODE_UMIN:
1748         case TGSI_OPCODE_U64MIN:
1749                 op = LLVMIntULT;
1750                 break;
1751         }
1752
1753         emit_data->output[emit_data->chan] =
1754                 LLVMBuildSelect(builder,
1755                                 LLVMBuildICmp(builder, op, emit_data->args[0],
1756                                               emit_data->args[1], ""),
1757                                 emit_data->args[0],
1758                                 emit_data->args[1], "");
1759 }
1760
1761 static void pk2h_fetch_args(struct lp_build_tgsi_context *bld_base,
1762                             struct lp_build_emit_data *emit_data)
1763 {
1764         emit_data->args[0] = lp_build_emit_fetch(bld_base, emit_data->inst,
1765                                                  0, TGSI_CHAN_X);
1766         emit_data->args[1] = lp_build_emit_fetch(bld_base, emit_data->inst,
1767                                                  0, TGSI_CHAN_Y);
1768 }
1769
1770 static void emit_pk2h(const struct lp_build_tgsi_action *action,
1771                       struct lp_build_tgsi_context *bld_base,
1772                       struct lp_build_emit_data *emit_data)
1773 {
1774         LLVMBuilderRef builder = bld_base->base.gallivm->builder;
1775         LLVMContextRef context = bld_base->base.gallivm->context;
1776         struct lp_build_context *uint_bld = &bld_base->uint_bld;
1777         LLVMTypeRef fp16, i16;
1778         LLVMValueRef const16, comp[2];
1779         unsigned i;
1780
1781         fp16 = LLVMHalfTypeInContext(context);
1782         i16 = LLVMInt16TypeInContext(context);
1783         const16 = lp_build_const_int32(uint_bld->gallivm, 16);
1784
1785         for (i = 0; i < 2; i++) {
1786                 comp[i] = LLVMBuildFPTrunc(builder, emit_data->args[i], fp16, "");
1787                 comp[i] = LLVMBuildBitCast(builder, comp[i], i16, "");
1788                 comp[i] = LLVMBuildZExt(builder, comp[i], uint_bld->elem_type, "");
1789         }
1790
1791         comp[1] = LLVMBuildShl(builder, comp[1], const16, "");
1792         comp[0] = LLVMBuildOr(builder, comp[0], comp[1], "");
1793
1794         emit_data->output[emit_data->chan] = comp[0];
1795 }
1796
1797 static void up2h_fetch_args(struct lp_build_tgsi_context *bld_base,
1798                             struct lp_build_emit_data *emit_data)
1799 {
1800         emit_data->args[0] = lp_build_emit_fetch(bld_base, emit_data->inst,
1801                                                  0, TGSI_CHAN_X);
1802 }
1803
1804 static void emit_up2h(const struct lp_build_tgsi_action *action,
1805                       struct lp_build_tgsi_context *bld_base,
1806                       struct lp_build_emit_data *emit_data)
1807 {
1808         LLVMBuilderRef builder = bld_base->base.gallivm->builder;
1809         LLVMContextRef context = bld_base->base.gallivm->context;
1810         struct lp_build_context *uint_bld = &bld_base->uint_bld;
1811         LLVMTypeRef fp16, i16;
1812         LLVMValueRef const16, input, val;
1813         unsigned i;
1814
1815         fp16 = LLVMHalfTypeInContext(context);
1816         i16 = LLVMInt16TypeInContext(context);
1817         const16 = lp_build_const_int32(uint_bld->gallivm, 16);
1818         input = emit_data->args[0];
1819
1820         for (i = 0; i < 2; i++) {
1821                 val = i == 1 ? LLVMBuildLShr(builder, input, const16, "") : input;
1822                 val = LLVMBuildTrunc(builder, val, i16, "");
1823                 val = LLVMBuildBitCast(builder, val, fp16, "");
1824                 emit_data->output[i] =
1825                         LLVMBuildFPExt(builder, val, bld_base->base.elem_type, "");
1826         }
1827 }
1828
1829 static void emit_fdiv(const struct lp_build_tgsi_action *action,
1830                       struct lp_build_tgsi_context *bld_base,
1831                       struct lp_build_emit_data *emit_data)
1832 {
1833         struct radeon_llvm_context *ctx = radeon_llvm_context(bld_base);
1834
1835         emit_data->output[emit_data->chan] =
1836                 LLVMBuildFDiv(bld_base->base.gallivm->builder,
1837                               emit_data->args[0], emit_data->args[1], "");
1838
1839         /* Use v_rcp_f32 instead of precise division. */
1840         if (HAVE_LLVM >= 0x0309 &&
1841             !LLVMIsConstant(emit_data->output[emit_data->chan]))
1842                 LLVMSetMetadata(emit_data->output[emit_data->chan],
1843                                 ctx->fpmath_md_kind, ctx->fpmath_md_2p5_ulp);
1844 }
1845
1846 /* 1/sqrt is translated to rsq for f32 if fp32 denormals are not enabled in
1847  * the target machine. f64 needs global unsafe math flags to get rsq. */
1848 static void emit_rsq(const struct lp_build_tgsi_action *action,
1849                      struct lp_build_tgsi_context *bld_base,
1850                      struct lp_build_emit_data *emit_data)
1851 {
1852         LLVMValueRef sqrt =
1853                 lp_build_emit_llvm_unary(bld_base, TGSI_OPCODE_SQRT,
1854                                          emit_data->args[0]);
1855
1856         emit_data->output[emit_data->chan] =
1857                 lp_build_emit_llvm_binary(bld_base, TGSI_OPCODE_DIV,
1858                                           bld_base->base.one, sqrt);
1859 }
1860
1861 void radeon_llvm_context_init(struct radeon_llvm_context *ctx, const char *triple,
1862                               const struct tgsi_shader_info *info,
1863                               const struct tgsi_token *tokens)
1864 {
1865         struct lp_type type;
1866
1867         /* Initialize the gallivm object:
1868          * We are only using the module, context, and builder fields of this struct.
1869          * This should be enough for us to be able to pass our gallivm struct to the
1870          * helper functions in the gallivm module.
1871          */
1872         memset(&ctx->gallivm, 0, sizeof (ctx->gallivm));
1873         memset(&ctx->soa, 0, sizeof(ctx->soa));
1874         ctx->gallivm.context = LLVMContextCreate();
1875         ctx->gallivm.module = LLVMModuleCreateWithNameInContext("tgsi",
1876                                                 ctx->gallivm.context);
1877         LLVMSetTarget(ctx->gallivm.module, triple);
1878         ctx->gallivm.builder = LLVMCreateBuilderInContext(ctx->gallivm.context);
1879
1880         struct lp_build_tgsi_context *bld_base = &ctx->soa.bld_base;
1881
1882         bld_base->info = info;
1883
1884         if (info && info->array_max[TGSI_FILE_TEMPORARY] > 0) {
1885                 int size = info->array_max[TGSI_FILE_TEMPORARY];
1886
1887                 ctx->temp_arrays = CALLOC(size, sizeof(ctx->temp_arrays[0]));
1888                 ctx->temp_array_allocas = CALLOC(size, sizeof(ctx->temp_array_allocas[0]));
1889
1890                 if (tokens)
1891                         tgsi_scan_arrays(tokens, TGSI_FILE_TEMPORARY, size,
1892                                          ctx->temp_arrays);
1893         }
1894
1895         type.floating = true;
1896         type.fixed = false;
1897         type.sign = true;
1898         type.norm = false;
1899         type.width = 32;
1900         type.length = 1;
1901
1902         lp_build_context_init(&bld_base->base, &ctx->gallivm, type);
1903         lp_build_context_init(&ctx->soa.bld_base.uint_bld, &ctx->gallivm, lp_uint_type(type));
1904         lp_build_context_init(&ctx->soa.bld_base.int_bld, &ctx->gallivm, lp_int_type(type));
1905         {
1906                 struct lp_type dbl_type;
1907                 dbl_type = type;
1908                 dbl_type.width *= 2;
1909                 lp_build_context_init(&ctx->soa.bld_base.dbl_bld, &ctx->gallivm, dbl_type);
1910         }
1911         {
1912                 struct lp_type dtype;
1913                 dtype = lp_uint_type(type);
1914                 dtype.width *= 2;
1915                 lp_build_context_init(&ctx->soa.bld_base.uint64_bld, &ctx->gallivm, dtype);
1916         }
1917         {
1918                 struct lp_type dtype;
1919                 dtype = lp_int_type(type);
1920                 dtype.width *= 2;
1921                 lp_build_context_init(&ctx->soa.bld_base.int64_bld, &ctx->gallivm, dtype);
1922         }
1923
1924         bld_base->soa = 1;
1925         bld_base->emit_store = radeon_llvm_emit_store;
1926         bld_base->emit_swizzle = emit_swizzle;
1927         bld_base->emit_declaration = emit_declaration;
1928         bld_base->emit_immediate = emit_immediate;
1929
1930         bld_base->emit_fetch_funcs[TGSI_FILE_IMMEDIATE] = radeon_llvm_emit_fetch;
1931         bld_base->emit_fetch_funcs[TGSI_FILE_INPUT] = radeon_llvm_emit_fetch;
1932         bld_base->emit_fetch_funcs[TGSI_FILE_TEMPORARY] = radeon_llvm_emit_fetch;
1933         bld_base->emit_fetch_funcs[TGSI_FILE_OUTPUT] = radeon_llvm_emit_fetch;
1934         bld_base->emit_fetch_funcs[TGSI_FILE_SYSTEM_VALUE] = fetch_system_value;
1935
1936         /* metadata allowing 2.5 ULP */
1937         ctx->fpmath_md_kind = LLVMGetMDKindIDInContext(ctx->gallivm.context,
1938                                                        "fpmath", 6);
1939         LLVMValueRef arg = lp_build_const_float(&ctx->gallivm, 2.5);
1940         ctx->fpmath_md_2p5_ulp = LLVMMDNodeInContext(ctx->gallivm.context,
1941                                                      &arg, 1);
1942
1943         /* Allocate outputs */
1944         ctx->soa.outputs = ctx->outputs;
1945
1946         lp_set_default_actions(bld_base);
1947
1948         bld_base->op_actions[TGSI_OPCODE_ABS].emit = build_tgsi_intrinsic_nomem;
1949         bld_base->op_actions[TGSI_OPCODE_ABS].intr_name = "llvm.fabs.f32";
1950         bld_base->op_actions[TGSI_OPCODE_AND].emit = emit_and;
1951         bld_base->op_actions[TGSI_OPCODE_ARL].emit = emit_arl;
1952         bld_base->op_actions[TGSI_OPCODE_BFI].emit = emit_bfi;
1953         bld_base->op_actions[TGSI_OPCODE_BGNLOOP].emit = bgnloop_emit;
1954         bld_base->op_actions[TGSI_OPCODE_BREV].emit = build_tgsi_intrinsic_nomem;
1955         bld_base->op_actions[TGSI_OPCODE_BREV].intr_name =
1956                 HAVE_LLVM >= 0x0308 ? "llvm.bitreverse.i32" : "llvm.AMDGPU.brev";
1957         bld_base->op_actions[TGSI_OPCODE_BRK].emit = brk_emit;
1958         bld_base->op_actions[TGSI_OPCODE_CEIL].emit = build_tgsi_intrinsic_nomem;
1959         bld_base->op_actions[TGSI_OPCODE_CEIL].intr_name = "llvm.ceil.f32";
1960         bld_base->op_actions[TGSI_OPCODE_CLAMP].emit = build_tgsi_intrinsic_nomem;
1961         bld_base->op_actions[TGSI_OPCODE_CLAMP].intr_name =
1962                 HAVE_LLVM >= 0x0308 ? "llvm.AMDGPU.clamp." : "llvm.AMDIL.clamp.";
1963         bld_base->op_actions[TGSI_OPCODE_CMP].emit = emit_cmp;
1964         bld_base->op_actions[TGSI_OPCODE_CONT].emit = cont_emit;
1965         bld_base->op_actions[TGSI_OPCODE_COS].emit = build_tgsi_intrinsic_nomem;
1966         bld_base->op_actions[TGSI_OPCODE_COS].intr_name = "llvm.cos.f32";
1967         bld_base->op_actions[TGSI_OPCODE_DABS].emit = build_tgsi_intrinsic_nomem;
1968         bld_base->op_actions[TGSI_OPCODE_DABS].intr_name = "llvm.fabs.f64";
1969         bld_base->op_actions[TGSI_OPCODE_DFMA].emit = build_tgsi_intrinsic_nomem;
1970         bld_base->op_actions[TGSI_OPCODE_DFMA].intr_name = "llvm.fma.f64";
1971         bld_base->op_actions[TGSI_OPCODE_DFRAC].emit = emit_frac;
1972         bld_base->op_actions[TGSI_OPCODE_DIV].emit = emit_fdiv;
1973         bld_base->op_actions[TGSI_OPCODE_DNEG].emit = emit_dneg;
1974         bld_base->op_actions[TGSI_OPCODE_DSEQ].emit = emit_dcmp;
1975         bld_base->op_actions[TGSI_OPCODE_DSGE].emit = emit_dcmp;
1976         bld_base->op_actions[TGSI_OPCODE_DSLT].emit = emit_dcmp;
1977         bld_base->op_actions[TGSI_OPCODE_DSNE].emit = emit_dcmp;
1978         bld_base->op_actions[TGSI_OPCODE_DRSQ].emit = build_tgsi_intrinsic_nomem;
1979         bld_base->op_actions[TGSI_OPCODE_DRSQ].intr_name =
1980                 HAVE_LLVM >= 0x0309 ? "llvm.amdgcn.rsq.f64" : "llvm.AMDGPU.rsq.f64";
1981         bld_base->op_actions[TGSI_OPCODE_DSQRT].emit = build_tgsi_intrinsic_nomem;
1982         bld_base->op_actions[TGSI_OPCODE_DSQRT].intr_name = "llvm.sqrt.f64";
1983         bld_base->op_actions[TGSI_OPCODE_ELSE].emit = else_emit;
1984         bld_base->op_actions[TGSI_OPCODE_ENDIF].emit = endif_emit;
1985         bld_base->op_actions[TGSI_OPCODE_ENDLOOP].emit = endloop_emit;
1986         bld_base->op_actions[TGSI_OPCODE_EX2].emit = build_tgsi_intrinsic_nomem;
1987         bld_base->op_actions[TGSI_OPCODE_EX2].intr_name =
1988                 HAVE_LLVM >= 0x0308 ? "llvm.exp2.f32" : "llvm.AMDIL.exp.";
1989         bld_base->op_actions[TGSI_OPCODE_FLR].emit = build_tgsi_intrinsic_nomem;
1990         bld_base->op_actions[TGSI_OPCODE_FLR].intr_name = "llvm.floor.f32";
1991         bld_base->op_actions[TGSI_OPCODE_FMA].emit =
1992                 bld_base->op_actions[TGSI_OPCODE_MAD].emit;
1993         bld_base->op_actions[TGSI_OPCODE_FRC].emit = emit_frac;
1994         bld_base->op_actions[TGSI_OPCODE_F2I].emit = emit_f2i;
1995         bld_base->op_actions[TGSI_OPCODE_F2U].emit = emit_f2u;
1996         bld_base->op_actions[TGSI_OPCODE_FSEQ].emit = emit_fcmp;
1997         bld_base->op_actions[TGSI_OPCODE_FSGE].emit = emit_fcmp;
1998         bld_base->op_actions[TGSI_OPCODE_FSLT].emit = emit_fcmp;
1999         bld_base->op_actions[TGSI_OPCODE_FSNE].emit = emit_fcmp;
2000         bld_base->op_actions[TGSI_OPCODE_IABS].emit = emit_iabs;
2001         bld_base->op_actions[TGSI_OPCODE_IBFE].emit = build_tgsi_intrinsic_nomem;
2002         bld_base->op_actions[TGSI_OPCODE_IBFE].intr_name = "llvm.AMDGPU.bfe.i32";
2003         bld_base->op_actions[TGSI_OPCODE_IDIV].emit = emit_idiv;
2004         bld_base->op_actions[TGSI_OPCODE_IF].emit = if_emit;
2005         bld_base->op_actions[TGSI_OPCODE_UIF].emit = uif_emit;
2006         bld_base->op_actions[TGSI_OPCODE_IMAX].emit = emit_minmax_int;
2007         bld_base->op_actions[TGSI_OPCODE_IMIN].emit = emit_minmax_int;
2008         bld_base->op_actions[TGSI_OPCODE_IMSB].emit = emit_imsb;
2009         bld_base->op_actions[TGSI_OPCODE_INEG].emit = emit_ineg;
2010         bld_base->op_actions[TGSI_OPCODE_ISHR].emit = emit_ishr;
2011         bld_base->op_actions[TGSI_OPCODE_ISGE].emit = emit_icmp;
2012         bld_base->op_actions[TGSI_OPCODE_ISLT].emit = emit_icmp;
2013         bld_base->op_actions[TGSI_OPCODE_ISSG].emit = emit_ssg;
2014         bld_base->op_actions[TGSI_OPCODE_I2F].emit = emit_i2f;
2015         bld_base->op_actions[TGSI_OPCODE_KILL_IF].fetch_args = kill_if_fetch_args;
2016         bld_base->op_actions[TGSI_OPCODE_KILL_IF].emit = kil_emit;
2017         bld_base->op_actions[TGSI_OPCODE_KILL_IF].intr_name = "llvm.AMDGPU.kill";
2018         bld_base->op_actions[TGSI_OPCODE_KILL].emit = lp_build_tgsi_intrinsic;
2019         bld_base->op_actions[TGSI_OPCODE_KILL].intr_name = "llvm.AMDGPU.kilp";
2020         bld_base->op_actions[TGSI_OPCODE_LSB].emit = emit_lsb;
2021         bld_base->op_actions[TGSI_OPCODE_LG2].emit = build_tgsi_intrinsic_nomem;
2022         bld_base->op_actions[TGSI_OPCODE_LG2].intr_name = "llvm.log2.f32";
2023         bld_base->op_actions[TGSI_OPCODE_MOD].emit = emit_mod;
2024         bld_base->op_actions[TGSI_OPCODE_UMSB].emit = emit_umsb;
2025         bld_base->op_actions[TGSI_OPCODE_NOT].emit = emit_not;
2026         bld_base->op_actions[TGSI_OPCODE_OR].emit = emit_or;
2027         bld_base->op_actions[TGSI_OPCODE_PK2H].fetch_args = pk2h_fetch_args;
2028         bld_base->op_actions[TGSI_OPCODE_PK2H].emit = emit_pk2h;
2029         bld_base->op_actions[TGSI_OPCODE_POPC].emit = build_tgsi_intrinsic_nomem;
2030         bld_base->op_actions[TGSI_OPCODE_POPC].intr_name = "llvm.ctpop.i32";
2031         bld_base->op_actions[TGSI_OPCODE_POW].emit = build_tgsi_intrinsic_nomem;
2032         bld_base->op_actions[TGSI_OPCODE_POW].intr_name = "llvm.pow.f32";
2033         bld_base->op_actions[TGSI_OPCODE_ROUND].emit = build_tgsi_intrinsic_nomem;
2034         bld_base->op_actions[TGSI_OPCODE_ROUND].intr_name = "llvm.rint.f32";
2035         bld_base->op_actions[TGSI_OPCODE_RSQ].emit = emit_rsq;
2036         bld_base->op_actions[TGSI_OPCODE_SGE].emit = emit_set_cond;
2037         bld_base->op_actions[TGSI_OPCODE_SEQ].emit = emit_set_cond;
2038         bld_base->op_actions[TGSI_OPCODE_SHL].emit = emit_shl;
2039         bld_base->op_actions[TGSI_OPCODE_SLE].emit = emit_set_cond;
2040         bld_base->op_actions[TGSI_OPCODE_SLT].emit = emit_set_cond;
2041         bld_base->op_actions[TGSI_OPCODE_SNE].emit = emit_set_cond;
2042         bld_base->op_actions[TGSI_OPCODE_SGT].emit = emit_set_cond;
2043         bld_base->op_actions[TGSI_OPCODE_SIN].emit = build_tgsi_intrinsic_nomem;
2044         bld_base->op_actions[TGSI_OPCODE_SIN].intr_name = "llvm.sin.f32";
2045         bld_base->op_actions[TGSI_OPCODE_SQRT].emit = build_tgsi_intrinsic_nomem;
2046         bld_base->op_actions[TGSI_OPCODE_SQRT].intr_name = "llvm.sqrt.f32";
2047         bld_base->op_actions[TGSI_OPCODE_SSG].emit = emit_ssg;
2048         bld_base->op_actions[TGSI_OPCODE_TRUNC].emit = build_tgsi_intrinsic_nomem;
2049         bld_base->op_actions[TGSI_OPCODE_TRUNC].intr_name = "llvm.trunc.f32";
2050         bld_base->op_actions[TGSI_OPCODE_UADD].emit = emit_uadd;
2051         bld_base->op_actions[TGSI_OPCODE_UBFE].emit = build_tgsi_intrinsic_nomem;
2052         bld_base->op_actions[TGSI_OPCODE_UBFE].intr_name = "llvm.AMDGPU.bfe.u32";
2053         bld_base->op_actions[TGSI_OPCODE_UDIV].emit = emit_udiv;
2054         bld_base->op_actions[TGSI_OPCODE_UMAX].emit = emit_minmax_int;
2055         bld_base->op_actions[TGSI_OPCODE_UMIN].emit = emit_minmax_int;
2056         bld_base->op_actions[TGSI_OPCODE_UMOD].emit = emit_umod;
2057         bld_base->op_actions[TGSI_OPCODE_USEQ].emit = emit_icmp;
2058         bld_base->op_actions[TGSI_OPCODE_USGE].emit = emit_icmp;
2059         bld_base->op_actions[TGSI_OPCODE_USHR].emit = emit_ushr;
2060         bld_base->op_actions[TGSI_OPCODE_USLT].emit = emit_icmp;
2061         bld_base->op_actions[TGSI_OPCODE_USNE].emit = emit_icmp;
2062         bld_base->op_actions[TGSI_OPCODE_U2F].emit = emit_u2f;
2063         bld_base->op_actions[TGSI_OPCODE_XOR].emit = emit_xor;
2064         bld_base->op_actions[TGSI_OPCODE_UCMP].emit = emit_ucmp;
2065         bld_base->op_actions[TGSI_OPCODE_UP2H].fetch_args = up2h_fetch_args;
2066         bld_base->op_actions[TGSI_OPCODE_UP2H].emit = emit_up2h;
2067
2068         bld_base->op_actions[TGSI_OPCODE_I64MAX].emit = emit_minmax_int;
2069         bld_base->op_actions[TGSI_OPCODE_I64MIN].emit = emit_minmax_int;
2070         bld_base->op_actions[TGSI_OPCODE_U64MAX].emit = emit_minmax_int;
2071         bld_base->op_actions[TGSI_OPCODE_U64MIN].emit = emit_minmax_int;
2072         bld_base->op_actions[TGSI_OPCODE_I64ABS].emit = emit_iabs;
2073         bld_base->op_actions[TGSI_OPCODE_I64SSG].emit = emit_ssg;
2074         bld_base->op_actions[TGSI_OPCODE_I64NEG].emit = emit_ineg;
2075
2076         bld_base->op_actions[TGSI_OPCODE_U64SEQ].emit = emit_icmp;
2077         bld_base->op_actions[TGSI_OPCODE_U64SNE].emit = emit_icmp;
2078         bld_base->op_actions[TGSI_OPCODE_U64SGE].emit = emit_icmp;
2079         bld_base->op_actions[TGSI_OPCODE_U64SLT].emit = emit_icmp;
2080         bld_base->op_actions[TGSI_OPCODE_I64SGE].emit = emit_icmp;
2081         bld_base->op_actions[TGSI_OPCODE_I64SLT].emit = emit_icmp;
2082
2083         bld_base->op_actions[TGSI_OPCODE_U64ADD].emit = emit_uadd;
2084         bld_base->op_actions[TGSI_OPCODE_U64SHL].emit = emit_shl;
2085         bld_base->op_actions[TGSI_OPCODE_U64SHR].emit = emit_ushr;
2086         bld_base->op_actions[TGSI_OPCODE_I64SHR].emit = emit_ishr;
2087
2088         bld_base->op_actions[TGSI_OPCODE_U64MOD].emit = emit_umod;
2089         bld_base->op_actions[TGSI_OPCODE_I64MOD].emit = emit_mod;
2090         bld_base->op_actions[TGSI_OPCODE_U64DIV].emit = emit_udiv;
2091         bld_base->op_actions[TGSI_OPCODE_I64DIV].emit = emit_idiv;
2092 }
2093
2094 void radeon_llvm_create_func(struct radeon_llvm_context *ctx,
2095                              LLVMTypeRef *return_types, unsigned num_return_elems,
2096                              LLVMTypeRef *ParamTypes, unsigned ParamCount)
2097 {
2098         LLVMTypeRef main_fn_type, ret_type;
2099         LLVMBasicBlockRef main_fn_body;
2100
2101         if (num_return_elems)
2102                 ret_type = LLVMStructTypeInContext(ctx->gallivm.context,
2103                                                    return_types,
2104                                                    num_return_elems, true);
2105         else
2106                 ret_type = LLVMVoidTypeInContext(ctx->gallivm.context);
2107
2108         /* Setup the function */
2109         ctx->return_type = ret_type;
2110         main_fn_type = LLVMFunctionType(ret_type, ParamTypes, ParamCount, 0);
2111         ctx->main_fn = LLVMAddFunction(ctx->gallivm.module, "main", main_fn_type);
2112         main_fn_body = LLVMAppendBasicBlockInContext(ctx->gallivm.context,
2113                         ctx->main_fn, "main_body");
2114         LLVMPositionBuilderAtEnd(ctx->gallivm.builder, main_fn_body);
2115 }
2116
2117 void radeon_llvm_finalize_module(struct radeon_llvm_context *ctx)
2118 {
2119         struct gallivm_state *gallivm = ctx->soa.bld_base.base.gallivm;
2120         const char *triple = LLVMGetTarget(gallivm->module);
2121         LLVMTargetLibraryInfoRef target_library_info;
2122
2123         /* Create the pass manager */
2124         gallivm->passmgr = LLVMCreateFunctionPassManagerForModule(
2125                                                         gallivm->module);
2126
2127         target_library_info = gallivm_create_target_library_info(triple);
2128         LLVMAddTargetLibraryInfo(target_library_info, gallivm->passmgr);
2129
2130         /* This pass should eliminate all the load and store instructions */
2131         LLVMAddPromoteMemoryToRegisterPass(gallivm->passmgr);
2132
2133         /* Add some optimization passes */
2134         LLVMAddScalarReplAggregatesPass(gallivm->passmgr);
2135         LLVMAddLICMPass(gallivm->passmgr);
2136         LLVMAddAggressiveDCEPass(gallivm->passmgr);
2137         LLVMAddCFGSimplificationPass(gallivm->passmgr);
2138         LLVMAddInstructionCombiningPass(gallivm->passmgr);
2139
2140         /* Run the pass */
2141         LLVMInitializeFunctionPassManager(gallivm->passmgr);
2142         LLVMRunFunctionPassManager(gallivm->passmgr, ctx->main_fn);
2143         LLVMFinalizeFunctionPassManager(gallivm->passmgr);
2144
2145         LLVMDisposeBuilder(gallivm->builder);
2146         LLVMDisposePassManager(gallivm->passmgr);
2147         gallivm_dispose_target_library_info(target_library_info);
2148 }
2149
2150 void radeon_llvm_dispose(struct radeon_llvm_context *ctx)
2151 {
2152         LLVMDisposeModule(ctx->soa.bld_base.base.gallivm->module);
2153         LLVMContextDispose(ctx->soa.bld_base.base.gallivm->context);
2154         FREE(ctx->temp_arrays);
2155         ctx->temp_arrays = NULL;
2156         FREE(ctx->temp_array_allocas);
2157         ctx->temp_array_allocas = NULL;
2158         FREE(ctx->temps);
2159         ctx->temps = NULL;
2160         ctx->temps_count = 0;
2161         FREE(ctx->flow);
2162         ctx->flow = NULL;
2163         ctx->flow_depth_max = 0;
2164 }