src/gallium/drivers/radeon/radeon_setup_tgsi_llvm.c

   1 /*
   2  * Copyright 2011 Advanced Micro Devices, Inc.
   3  *
   4  * Permission is hereby granted, free of charge, to any person obtaining a
   5  * copy of this software and associated documentation files (the "Software"),
   6  * to deal in the Software without restriction, including without limitation
   7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
   8  * and/or sell copies of the Software, and to permit persons to whom the
   9  * Software is furnished to do so, subject to the following conditions:
  10  *
  11  * The above copyright notice and this permission notice (including the next
  12  * paragraph) shall be included in all copies or substantial portions of the
  13  * Software.
  14  *
  15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  17  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
  18  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
  19  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
  20  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
  21  * SOFTWARE.
  22  *
  23  * Authors: Tom Stellard <thomas.stellard@amd.com>
  24  *
  25  */
  26 #include "radeon_llvm.h"
  27
  28 #include "gallivm/lp_bld_const.h"
  29 #include "gallivm/lp_bld_gather.h"
  30 #include "gallivm/lp_bld_flow.h"
  31 #include "gallivm/lp_bld_init.h"
  32 #include "gallivm/lp_bld_intr.h"
  33 #include "gallivm/lp_bld_misc.h"
  34 #include "gallivm/lp_bld_swizzle.h"
  35 #include "tgsi/tgsi_info.h"
  36 #include "tgsi/tgsi_parse.h"
  37 #include "util/u_math.h"
  38 #include "util/u_memory.h"
  39 #include "util/u_debug.h"
  40
  41 #include <llvm-c/Core.h>
  42 #include <llvm-c/Transforms/Scalar.h>
  43
  44 LLVMTypeRef tgsi2llvmtype(struct lp_build_tgsi_context *bld_base,
  45                           enum tgsi_opcode_type type)
  46 {
  47         LLVMContextRef ctx = bld_base->base.gallivm->context;
  48
  49         switch (type) {
  50         case TGSI_TYPE_UNSIGNED:
  51         case TGSI_TYPE_SIGNED:
  52                 return LLVMInt32TypeInContext(ctx);
  53         case TGSI_TYPE_DOUBLE:
  54                 return LLVMDoubleTypeInContext(ctx);
  55         case TGSI_TYPE_UNTYPED:
  56         case TGSI_TYPE_FLOAT:
  57                 return LLVMFloatTypeInContext(ctx);
  58         default: break;
  59         }
  60         return 0;
  61 }
  62
  63 LLVMValueRef bitcast(struct lp_build_tgsi_context *bld_base,
  64                      enum tgsi_opcode_type type, LLVMValueRef value)
  65 {
  66         LLVMBuilderRef builder = bld_base->base.gallivm->builder;
  67         LLVMTypeRef dst_type = tgsi2llvmtype(bld_base, type);
  68
  69         if (dst_type)
  70                 return LLVMBuildBitCast(builder, value, dst_type, "");
  71         else
  72                 return value;
  73 }
  74
  75 static struct radeon_llvm_loop *get_current_loop(struct radeon_llvm_context *ctx)
  76 {
  77         return ctx->loop_depth > 0 ? ctx->loop + (ctx->loop_depth - 1) : NULL;
  78 }
  79
  80 static struct radeon_llvm_branch *get_current_branch(struct radeon_llvm_context *ctx)
  81 {
  82         return ctx->branch_depth > 0 ?
  83                         ctx->branch + (ctx->branch_depth - 1) : NULL;
  84 }
  85
  86 unsigned radeon_llvm_reg_index_soa(unsigned index, unsigned chan)
  87 {
  88         return (index * 4) + chan;
  89 }
  90
  91 static LLVMValueRef emit_swizzle(struct lp_build_tgsi_context *bld_base,
  92                                  LLVMValueRef value,
  93                                  unsigned swizzle_x,
  94                                  unsigned swizzle_y,
  95                                  unsigned swizzle_z,
  96                                  unsigned swizzle_w)
  97 {
  98         LLVMValueRef swizzles[4];
  99         LLVMTypeRef i32t =
 100                 LLVMInt32TypeInContext(bld_base->base.gallivm->context);
 101
 102         swizzles[0] = LLVMConstInt(i32t, swizzle_x, 0);
 103         swizzles[1] = LLVMConstInt(i32t, swizzle_y, 0);
 104         swizzles[2] = LLVMConstInt(i32t, swizzle_z, 0);
 105         swizzles[3] = LLVMConstInt(i32t, swizzle_w, 0);
 106
 107         return LLVMBuildShuffleVector(bld_base->base.gallivm->builder,
 108                                       value,
 109                                       LLVMGetUndef(LLVMTypeOf(value)),
 110                                       LLVMConstVector(swizzles, 4), "");
 111 }
 112
 113 static struct tgsi_declaration_range
 114 get_array_range(struct lp_build_tgsi_context *bld_base,
 115                 unsigned File, const struct tgsi_ind_register *reg)
 116 {
 117         struct radeon_llvm_context *ctx = radeon_llvm_context(bld_base);
 118
 119         if (File != TGSI_FILE_TEMPORARY || reg->ArrayID == 0 ||
 120             reg->ArrayID > bld_base->info->array_max[TGSI_FILE_TEMPORARY]) {
 121                 struct tgsi_declaration_range range;
 122                 range.First = 0;
 123                 range.Last = bld_base->info->file_max[File];
 124                 return range;
 125         }
 126
 127         return ctx->arrays[reg->ArrayID - 1];
 128 }
 129
 130 static LLVMValueRef
 131 emit_array_index(struct lp_build_tgsi_soa_context *bld,
 132                  const struct tgsi_ind_register *reg,
 133                  unsigned offset)
 134 {
 135         struct gallivm_state *gallivm = bld->bld_base.base.gallivm;
 136
 137         LLVMValueRef addr = LLVMBuildLoad(gallivm->builder, bld->addr[reg->Index][reg->Swizzle], "");
 138         return LLVMBuildAdd(gallivm->builder, addr, lp_build_const_int32(gallivm, offset), "");
 139 }
 140
 141 LLVMValueRef
 142 radeon_llvm_emit_fetch_64bit(struct lp_build_tgsi_context *bld_base,
 143                              enum tgsi_opcode_type type,
 144                              LLVMValueRef ptr,
 145                              LLVMValueRef ptr2)
 146 {
 147         LLVMBuilderRef builder = bld_base->base.gallivm->builder;
 148         LLVMValueRef result;
 149
 150         result = LLVMGetUndef(LLVMVectorType(LLVMIntTypeInContext(bld_base->base.gallivm->context, 32), bld_base->base.type.length * 2));
 151
 152         result = LLVMBuildInsertElement(builder,
 153                                         result,
 154                                         bitcast(bld_base, TGSI_TYPE_UNSIGNED, ptr),
 155                                         bld_base->int_bld.zero, "");
 156         result = LLVMBuildInsertElement(builder,
 157                                         result,
 158                                         bitcast(bld_base, TGSI_TYPE_UNSIGNED, ptr2),
 159                                         bld_base->int_bld.one, "");
 160         return bitcast(bld_base, type, result);
 161 }
 162
 163 static LLVMValueRef
 164 emit_array_fetch(struct lp_build_tgsi_context *bld_base,
 165                  unsigned File, enum tgsi_opcode_type type,
 166                  struct tgsi_declaration_range range,
 167                  unsigned swizzle)
 168 {
 169         struct lp_build_tgsi_soa_context *bld = lp_soa_context(bld_base);
 170         struct gallivm_state *gallivm = bld->bld_base.base.gallivm;
 171         LLVMBuilderRef builder = bld_base->base.gallivm->builder;
 172
 173         unsigned i, size = range.Last - range.First + 1;
 174         LLVMTypeRef vec = LLVMVectorType(tgsi2llvmtype(bld_base, type), size);
 175         LLVMValueRef result = LLVMGetUndef(vec);
 176
 177         struct tgsi_full_src_register tmp_reg = {};
 178         tmp_reg.Register.File = File;
 179
 180         for (i = 0; i < size; ++i) {
 181                 tmp_reg.Register.Index = i + range.First;
 182                 LLVMValueRef temp = radeon_llvm_emit_fetch(bld_base, &tmp_reg, type, swizzle);
 183                 result = LLVMBuildInsertElement(builder, result, temp,
 184                         lp_build_const_int32(gallivm, i), "");
 185         }
 186         return result;
 187 }
 188
 189 static LLVMValueRef
 190 load_value_from_array(struct lp_build_tgsi_context *bld_base,
 191                       unsigned file,
 192                       enum tgsi_opcode_type type,
 193                       unsigned swizzle,
 194                       unsigned reg_index,
 195                       const struct tgsi_ind_register *reg_indirect)
 196 {
 197         struct lp_build_tgsi_soa_context *bld = lp_soa_context(bld_base);
 198         LLVMBuilderRef builder = bld_base->base.gallivm->builder;
 199         struct tgsi_declaration_range range = get_array_range(bld_base, file, reg_indirect);
 200
 201         return LLVMBuildExtractElement(builder,
 202                         emit_array_fetch(bld_base, file, type, range, swizzle),
 203                         emit_array_index(bld, reg_indirect, reg_index - range.First), "");
 204
 205 }
 206
 207 static LLVMValueRef
 208 store_value_to_array(struct lp_build_tgsi_context *bld_base,
 209                      LLVMValueRef value,
 210                      unsigned file,
 211                      unsigned chan_index,
 212                      unsigned reg_index,
 213                      const struct tgsi_ind_register *reg_indirect)
 214 {
 215         struct lp_build_tgsi_soa_context *bld = lp_soa_context(bld_base);
 216         LLVMBuilderRef builder = bld_base->base.gallivm->builder;
 217         struct tgsi_declaration_range range = get_array_range(bld_base, file, reg_indirect);
 218
 219         return LLVMBuildInsertElement(builder,
 220                                 emit_array_fetch(bld_base, file, TGSI_TYPE_FLOAT, range, chan_index),
 221                                 value,  emit_array_index(bld, reg_indirect, reg_index - range.First), "");
 222         return NULL;
 223 }
 224
 225 LLVMValueRef radeon_llvm_emit_fetch(struct lp_build_tgsi_context *bld_base,
 226                                     const struct tgsi_full_src_register *reg,
 227                                     enum tgsi_opcode_type type,
 228                                     unsigned swizzle)
 229 {
 230         struct radeon_llvm_context *ctx = radeon_llvm_context(bld_base);
 231         struct lp_build_tgsi_soa_context *bld = lp_soa_context(bld_base);
 232         LLVMBuilderRef builder = bld_base->base.gallivm->builder;
 233         LLVMValueRef result = NULL, ptr, ptr2;
 234
 235         if (swizzle == ~0) {
 236                 LLVMValueRef values[TGSI_NUM_CHANNELS];
 237                 unsigned chan;
 238                 for (chan = 0; chan < TGSI_NUM_CHANNELS; chan++) {
 239                         values[chan] = radeon_llvm_emit_fetch(bld_base, reg, type, chan);
 240                 }
 241                 return lp_build_gather_values(bld_base->base.gallivm, values,
 242                                               TGSI_NUM_CHANNELS);
 243         }
 244
 245         if (reg->Register.Indirect) {
 246                 return load_value_from_array(bld_base, reg->Register.File, type,
 247                                 swizzle, reg->Register.Index, &reg->Indirect);
 248         }
 249
 250         switch(reg->Register.File) {
 251         case TGSI_FILE_IMMEDIATE: {
 252                 LLVMTypeRef ctype = tgsi2llvmtype(bld_base, type);
 253                 if (tgsi_type_is_64bit(type)) {
 254                         result = LLVMGetUndef(LLVMVectorType(LLVMIntTypeInContext(bld_base->base.gallivm->context, 32), bld_base->base.type.length * 2));
 255                         result = LLVMConstInsertElement(result,
 256                                                         bld->immediates[reg->Register.Index][swizzle],
 257                                                         bld_base->int_bld.zero);
 258                         result = LLVMConstInsertElement(result,
 259                                                         bld->immediates[reg->Register.Index][swizzle + 1],
 260                                                         bld_base->int_bld.one);
 261                         return LLVMConstBitCast(result, ctype);
 262                 } else {
 263                         return LLVMConstBitCast(bld->immediates[reg->Register.Index][swizzle], ctype);
 264                 }
 265         }
 266
 267         case TGSI_FILE_INPUT:
 268                 result = ctx->inputs[radeon_llvm_reg_index_soa(reg->Register.Index, swizzle)];
 269                 if (tgsi_type_is_64bit(type)) {
 270                         ptr = result;
 271                         ptr2 = ctx->inputs[radeon_llvm_reg_index_soa(reg->Register.Index, swizzle + 1)];
 272                         return radeon_llvm_emit_fetch_64bit(bld_base, type, ptr, ptr2);
 273                 }
 274                 break;
 275
 276         case TGSI_FILE_TEMPORARY:
 277                 if (reg->Register.Index >= ctx->temps_count)
 278                         return LLVMGetUndef(tgsi2llvmtype(bld_base, type));
 279                 ptr = ctx->temps[reg->Register.Index * TGSI_NUM_CHANNELS + swizzle];
 280                 if (tgsi_type_is_64bit(type)) {
 281                         ptr2 = ctx->temps[reg->Register.Index * TGSI_NUM_CHANNELS + swizzle + 1];
 282                         return radeon_llvm_emit_fetch_64bit(bld_base, type,
 283                                                  LLVMBuildLoad(builder, ptr, ""),
 284                                                  LLVMBuildLoad(builder, ptr2, ""));
 285                 }
 286                 result = LLVMBuildLoad(builder, ptr, "");
 287                 break;
 288
 289         case TGSI_FILE_OUTPUT:
 290                 ptr = lp_get_output_ptr(bld, reg->Register.Index, swizzle);
 291                 if (tgsi_type_is_64bit(type)) {
 292                         ptr2 = lp_get_output_ptr(bld, reg->Register.Index, swizzle + 1);
 293                         return radeon_llvm_emit_fetch_64bit(bld_base, type,
 294                                                  LLVMBuildLoad(builder, ptr, ""),
 295                                                  LLVMBuildLoad(builder, ptr2, ""));
 296                 }
 297                 result = LLVMBuildLoad(builder, ptr, "");
 298                 break;
 299
 300         default:
 301                 return LLVMGetUndef(tgsi2llvmtype(bld_base, type));
 302         }
 303
 304         return bitcast(bld_base, type, result);
 305 }
 306
 307 static LLVMValueRef fetch_system_value(struct lp_build_tgsi_context *bld_base,
 308                                        const struct tgsi_full_src_register *reg,
 309                                        enum tgsi_opcode_type type,
 310                                        unsigned swizzle)
 311 {
 312         struct radeon_llvm_context *ctx = radeon_llvm_context(bld_base);
 313         struct gallivm_state *gallivm = bld_base->base.gallivm;
 314
 315         LLVMValueRef cval = ctx->system_values[reg->Register.Index];
 316         if (LLVMGetTypeKind(LLVMTypeOf(cval)) == LLVMVectorTypeKind) {
 317                 cval = LLVMBuildExtractElement(gallivm->builder, cval,
 318                                                lp_build_const_int32(gallivm, swizzle), "");
 319         }
 320         return bitcast(bld_base, type, cval);
 321 }
 322
 323 static LLVMValueRef si_build_alloca_undef(struct gallivm_state *gallivm,
 324                                           LLVMTypeRef type,
 325                                           const char *name)
 326 {
 327         LLVMValueRef ptr = lp_build_alloca(gallivm, type, name);
 328         LLVMBuildStore(gallivm->builder, LLVMGetUndef(type), ptr);
 329         return ptr;
 330 }
 331
 332 static void emit_declaration(struct lp_build_tgsi_context *bld_base,
 333                              const struct tgsi_full_declaration *decl)
 334 {
 335         struct radeon_llvm_context *ctx = radeon_llvm_context(bld_base);
 336         unsigned first, last, i, idx;
 337         switch(decl->Declaration.File) {
 338         case TGSI_FILE_ADDRESS:
 339         {
 340                  unsigned idx;
 341                 for (idx = decl->Range.First; idx <= decl->Range.Last; idx++) {
 342                         unsigned chan;
 343                         for (chan = 0; chan < TGSI_NUM_CHANNELS; chan++) {
 344                                  ctx->soa.addr[idx][chan] = si_build_alloca_undef(
 345                                         &ctx->gallivm,
 346                                         ctx->soa.bld_base.uint_bld.elem_type, "");
 347                         }
 348                 }
 349                 break;
 350         }
 351
 352         case TGSI_FILE_TEMPORARY:
 353                 if (decl->Declaration.Array) {
 354                         if (!ctx->arrays) {
 355                                 int size = bld_base->info->array_max[TGSI_FILE_TEMPORARY];
 356                                 ctx->arrays = MALLOC(sizeof(ctx->arrays[0]) * size);
 357                         }
 358
 359                         ctx->arrays[decl->Array.ArrayID - 1] = decl->Range;
 360                 }
 361                 first = decl->Range.First;
 362                 last = decl->Range.Last;
 363                 if (!ctx->temps_count) {
 364                         ctx->temps_count = bld_base->info->file_max[TGSI_FILE_TEMPORARY] + 1;
 365                         ctx->temps = MALLOC(TGSI_NUM_CHANNELS * ctx->temps_count * sizeof(LLVMValueRef));
 366                 }
 367                 for (idx = first; idx <= last; idx++) {
 368                         for (i = 0; i < TGSI_NUM_CHANNELS; i++) {
 369                                 ctx->temps[idx * TGSI_NUM_CHANNELS + i] =
 370                                         si_build_alloca_undef(bld_base->base.gallivm,
 371                                                               bld_base->base.vec_type,
 372                                                               "temp");
 373                         }
 374                 }
 375                 break;
 376
 377         case TGSI_FILE_INPUT:
 378         {
 379                 unsigned idx;
 380                 for (idx = decl->Range.First; idx <= decl->Range.Last; idx++) {
 381                         if (ctx->load_input)
 382                                 ctx->load_input(ctx, idx, decl);
 383                 }
 384         }
 385         break;
 386
 387         case TGSI_FILE_SYSTEM_VALUE:
 388         {
 389                 unsigned idx;
 390                 for (idx = decl->Range.First; idx <= decl->Range.Last; idx++) {
 391                         ctx->load_system_value(ctx, idx, decl);
 392                 }
 393         }
 394         break;
 395
 396         case TGSI_FILE_OUTPUT:
 397         {
 398                 unsigned idx;
 399                 for (idx = decl->Range.First; idx <= decl->Range.Last; idx++) {
 400                         unsigned chan;
 401                         assert(idx < RADEON_LLVM_MAX_OUTPUTS);
 402                         for (chan = 0; chan < TGSI_NUM_CHANNELS; chan++) {
 403                                 ctx->soa.outputs[idx][chan] = si_build_alloca_undef(
 404                                         &ctx->gallivm,
 405                                         ctx->soa.bld_base.base.elem_type, "");
 406                         }
 407                 }
 408                 break;
 409         }
 410
 411         case TGSI_FILE_MEMORY:
 412                 ctx->declare_memory_region(ctx, decl);
 413                 break;
 414
 415         default:
 416                 break;
 417         }
 418 }
 419
 420 LLVMValueRef radeon_llvm_saturate(struct lp_build_tgsi_context *bld_base,
 421                                   LLVMValueRef value)
 422 {
 423         struct lp_build_emit_data clamp_emit_data;
 424
 425         memset(&clamp_emit_data, 0, sizeof(clamp_emit_data));
 426         clamp_emit_data.arg_count = 3;
 427         clamp_emit_data.args[0] = value;
 428         clamp_emit_data.args[2] = bld_base->base.one;
 429         clamp_emit_data.args[1] = bld_base->base.zero;
 430
 431         return lp_build_emit_llvm(bld_base, TGSI_OPCODE_CLAMP,
 432                                   &clamp_emit_data);
 433 }
 434
 435 void radeon_llvm_emit_store(struct lp_build_tgsi_context *bld_base,
 436                             const struct tgsi_full_instruction *inst,
 437                             const struct tgsi_opcode_info *info,
 438                             LLVMValueRef dst[4])
 439 {
 440         struct radeon_llvm_context *ctx = radeon_llvm_context(bld_base);
 441         struct lp_build_tgsi_soa_context *bld = lp_soa_context(bld_base);
 442         struct gallivm_state *gallivm = bld->bld_base.base.gallivm;
 443         const struct tgsi_full_dst_register *reg = &inst->Dst[0];
 444         LLVMBuilderRef builder = bld->bld_base.base.gallivm->builder;
 445         LLVMValueRef temp_ptr, temp_ptr2 = NULL;
 446         unsigned chan, chan_index;
 447         bool is_vec_store = false;
 448         enum tgsi_opcode_type dtype = tgsi_opcode_infer_dst_type(inst->Instruction.Opcode);
 449
 450         if (dst[0]) {
 451                 LLVMTypeKind k = LLVMGetTypeKind(LLVMTypeOf(dst[0]));
 452                 is_vec_store = (k == LLVMVectorTypeKind);
 453         }
 454
 455         if (is_vec_store) {
 456                 LLVMValueRef values[4] = {};
 457                 TGSI_FOR_EACH_DST0_ENABLED_CHANNEL(inst, chan) {
 458                         LLVMValueRef index = lp_build_const_int32(gallivm, chan);
 459                         values[chan]  = LLVMBuildExtractElement(gallivm->builder,
 460                                                         dst[0], index, "");
 461                 }
 462                 bld_base->emit_store(bld_base, inst, info, values);
 463                 return;
 464         }
 465
 466         TGSI_FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
 467                 LLVMValueRef value = dst[chan_index];
 468
 469                 if (tgsi_type_is_64bit(dtype) && (chan_index == 1 || chan_index == 3))
 470                         continue;
 471                 if (inst->Instruction.Saturate)
 472                         value = radeon_llvm_saturate(bld_base, value);
 473
 474                 if (reg->Register.File == TGSI_FILE_ADDRESS) {
 475                         temp_ptr = bld->addr[reg->Register.Index][chan_index];
 476                         LLVMBuildStore(builder, value, temp_ptr);
 477                         continue;
 478                 }
 479
 480                 if (!tgsi_type_is_64bit(dtype))
 481                         value = bitcast(bld_base, TGSI_TYPE_FLOAT, value);
 482
 483                 if (reg->Register.Indirect) {
 484                         struct tgsi_declaration_range range = get_array_range(bld_base,
 485                                 reg->Register.File, &reg->Indirect);
 486
 487                         unsigned i, size = range.Last - range.First + 1;
 488                         LLVMValueRef array = store_value_to_array(bld_base, value, reg->Register.File, chan_index,
 489                                                                   reg->Register.Index, &reg->Indirect);
 490                         for (i = 0; i < size; ++i) {
 491                                 switch(reg->Register.File) {
 492                                 case TGSI_FILE_OUTPUT:
 493                                         temp_ptr = bld->outputs[i + range.First][chan_index];
 494                                         break;
 495
 496                                 case TGSI_FILE_TEMPORARY:
 497                                         if (range.First + i >= ctx->temps_count)
 498                                                 continue;
 499                                         temp_ptr = ctx->temps[(i + range.First) * TGSI_NUM_CHANNELS + chan_index];
 500                                         break;
 501
 502                                 default:
 503                                         return;
 504                                 }
 505                                 value = LLVMBuildExtractElement(builder, array,
 506                                         lp_build_const_int32(gallivm, i), "");
 507                                 LLVMBuildStore(builder, value, temp_ptr);
 508                         }
 509
 510                 } else {
 511                         switch(reg->Register.File) {
 512                         case TGSI_FILE_OUTPUT:
 513                                 temp_ptr = bld->outputs[reg->Register.Index][chan_index];
 514                                 if (tgsi_type_is_64bit(dtype))
 515                                         temp_ptr2 = bld->outputs[reg->Register.Index][chan_index + 1];
 516                                 break;
 517
 518                         case TGSI_FILE_TEMPORARY:
 519                                 if (reg->Register.Index >= ctx->temps_count)
 520                                         continue;
 521                                 temp_ptr = ctx->temps[ TGSI_NUM_CHANNELS * reg->Register.Index + chan_index];
 522                                 if (tgsi_type_is_64bit(dtype))
 523                                         temp_ptr2 = ctx->temps[ TGSI_NUM_CHANNELS * reg->Register.Index + chan_index + 1];
 524
 525                                 break;
 526
 527                         default:
 528                                 return;
 529                         }
 530                         if (!tgsi_type_is_64bit(dtype))
 531                                 LLVMBuildStore(builder, value, temp_ptr);
 532                         else {
 533                                 LLVMValueRef ptr = LLVMBuildBitCast(builder, value,
 534                                                                     LLVMVectorType(LLVMIntTypeInContext(bld_base->base.gallivm->context, 32), 2), "");
 535                                 LLVMValueRef val2;
 536                                 value = LLVMBuildExtractElement(builder, ptr,
 537                                                                 bld_base->uint_bld.zero, "");
 538                                 val2 = LLVMBuildExtractElement(builder, ptr,
 539                                                                 bld_base->uint_bld.one, "");
 540
 541                                 LLVMBuildStore(builder, bitcast(bld_base, TGSI_TYPE_FLOAT, value), temp_ptr);
 542                                 LLVMBuildStore(builder, bitcast(bld_base, TGSI_TYPE_FLOAT, val2), temp_ptr2);
 543                         }
 544                 }
 545         }
 546 }
 547
 548 static void bgnloop_emit(const struct lp_build_tgsi_action *action,
 549                          struct lp_build_tgsi_context *bld_base,
 550                          struct lp_build_emit_data *emit_data)
 551 {
 552         struct radeon_llvm_context *ctx = radeon_llvm_context(bld_base);
 553         struct gallivm_state *gallivm = bld_base->base.gallivm;
 554         LLVMBasicBlockRef loop_block;
 555         LLVMBasicBlockRef endloop_block;
 556         endloop_block = LLVMAppendBasicBlockInContext(gallivm->context,
 557                                                 ctx->main_fn, "ENDLOOP");
 558         loop_block = LLVMInsertBasicBlockInContext(gallivm->context,
 559                                                 endloop_block, "LOOP");
 560         LLVMBuildBr(gallivm->builder, loop_block);
 561         LLVMPositionBuilderAtEnd(gallivm->builder, loop_block);
 562
 563         if (++ctx->loop_depth > ctx->loop_depth_max) {
 564                 unsigned new_max = ctx->loop_depth_max << 1;
 565
 566                 if (!new_max)
 567                         new_max = RADEON_LLVM_INITIAL_CF_DEPTH;
 568
 569                 ctx->loop = REALLOC(ctx->loop, ctx->loop_depth_max *
 570                                     sizeof(ctx->loop[0]),
 571                                     new_max * sizeof(ctx->loop[0]));
 572                 ctx->loop_depth_max = new_max;
 573         }
 574
 575         ctx->loop[ctx->loop_depth - 1].loop_block = loop_block;
 576         ctx->loop[ctx->loop_depth - 1].endloop_block = endloop_block;
 577 }
 578
 579 static void brk_emit(const struct lp_build_tgsi_action *action,
 580                      struct lp_build_tgsi_context *bld_base,
 581                      struct lp_build_emit_data *emit_data)
 582 {
 583         struct radeon_llvm_context *ctx = radeon_llvm_context(bld_base);
 584         struct gallivm_state *gallivm = bld_base->base.gallivm;
 585         struct radeon_llvm_loop *current_loop = get_current_loop(ctx);
 586
 587         LLVMBuildBr(gallivm->builder, current_loop->endloop_block);
 588 }
 589
 590 static void cont_emit(const struct lp_build_tgsi_action *action,
 591                       struct lp_build_tgsi_context *bld_base,
 592                       struct lp_build_emit_data *emit_data)
 593 {
 594         struct radeon_llvm_context *ctx = radeon_llvm_context(bld_base);
 595         struct gallivm_state *gallivm = bld_base->base.gallivm;
 596         struct radeon_llvm_loop *current_loop = get_current_loop(ctx);
 597
 598         LLVMBuildBr(gallivm->builder, current_loop->loop_block);
 599 }
 600
 601 static void else_emit(const struct lp_build_tgsi_action *action,
 602                       struct lp_build_tgsi_context *bld_base,
 603                       struct lp_build_emit_data *emit_data)
 604 {
 605         struct radeon_llvm_context *ctx = radeon_llvm_context(bld_base);
 606         struct gallivm_state *gallivm = bld_base->base.gallivm;
 607         struct radeon_llvm_branch *current_branch = get_current_branch(ctx);
 608         LLVMBasicBlockRef current_block = LLVMGetInsertBlock(gallivm->builder);
 609
 610         /* We need to add a terminator to the current block if the previous
 611          * instruction was an ENDIF.Example:
 612          * IF
 613          *   [code]
 614          *   IF
 615          *     [code]
 616          *   ELSE
 617          *    [code]
 618          *   ENDIF <--
 619          * ELSE<--
 620          *   [code]
 621          * ENDIF
 622          */
 623
 624         if (current_block != current_branch->if_block) {
 625                 LLVMBuildBr(gallivm->builder, current_branch->endif_block);
 626         }
 627         if (!LLVMGetBasicBlockTerminator(current_branch->if_block)) {
 628                 LLVMBuildBr(gallivm->builder, current_branch->endif_block);
 629         }
 630         current_branch->has_else = 1;
 631         LLVMPositionBuilderAtEnd(gallivm->builder, current_branch->else_block);
 632 }
 633
 634 static void endif_emit(const struct lp_build_tgsi_action *action,
 635                        struct lp_build_tgsi_context *bld_base,
 636                        struct lp_build_emit_data *emit_data)
 637 {
 638         struct radeon_llvm_context *ctx = radeon_llvm_context(bld_base);
 639         struct gallivm_state *gallivm = bld_base->base.gallivm;
 640         struct radeon_llvm_branch *current_branch = get_current_branch(ctx);
 641         LLVMBasicBlockRef current_block = LLVMGetInsertBlock(gallivm->builder);
 642
 643         /* If we have consecutive ENDIF instructions, then the first ENDIF
 644          * will not have a terminator, so we need to add one. */
 645         if (current_block != current_branch->if_block
 646                         && current_block != current_branch->else_block
 647                         && !LLVMGetBasicBlockTerminator(current_block)) {
 648
 649                  LLVMBuildBr(gallivm->builder, current_branch->endif_block);
 650         }
 651         if (!LLVMGetBasicBlockTerminator(current_branch->else_block)) {
 652                 LLVMPositionBuilderAtEnd(gallivm->builder, current_branch->else_block);
 653                 LLVMBuildBr(gallivm->builder, current_branch->endif_block);
 654         }
 655
 656         if (!LLVMGetBasicBlockTerminator(current_branch->if_block)) {
 657                 LLVMPositionBuilderAtEnd(gallivm->builder, current_branch->if_block);
 658                 LLVMBuildBr(gallivm->builder, current_branch->endif_block);
 659         }
 660
 661         LLVMPositionBuilderAtEnd(gallivm->builder, current_branch->endif_block);
 662         ctx->branch_depth--;
 663 }
 664
 665 static void endloop_emit(const struct lp_build_tgsi_action *action,
 666                          struct lp_build_tgsi_context *bld_base,
 667                          struct lp_build_emit_data *emit_data)
 668 {
 669         struct radeon_llvm_context *ctx = radeon_llvm_context(bld_base);
 670         struct gallivm_state *gallivm = bld_base->base.gallivm;
 671         struct radeon_llvm_loop *current_loop = get_current_loop(ctx);
 672
 673         if (!LLVMGetBasicBlockTerminator(LLVMGetInsertBlock(gallivm->builder))) {
 674                  LLVMBuildBr(gallivm->builder, current_loop->loop_block);
 675         }
 676
 677         LLVMPositionBuilderAtEnd(gallivm->builder, current_loop->endloop_block);
 678         ctx->loop_depth--;
 679 }
 680
 681 static void if_cond_emit(const struct lp_build_tgsi_action *action,
 682                          struct lp_build_tgsi_context *bld_base,
 683                          struct lp_build_emit_data *emit_data,
 684                          LLVMValueRef cond)
 685 {
 686         struct radeon_llvm_context *ctx = radeon_llvm_context(bld_base);
 687         struct gallivm_state *gallivm = bld_base->base.gallivm;
 688         LLVMBasicBlockRef if_block, else_block, endif_block;
 689
 690         endif_block = LLVMAppendBasicBlockInContext(gallivm->context,
 691                                                 ctx->main_fn, "ENDIF");
 692         if_block = LLVMInsertBasicBlockInContext(gallivm->context,
 693                                                 endif_block, "IF");
 694         else_block = LLVMInsertBasicBlockInContext(gallivm->context,
 695                                                 endif_block, "ELSE");
 696         LLVMBuildCondBr(gallivm->builder, cond, if_block, else_block);
 697         LLVMPositionBuilderAtEnd(gallivm->builder, if_block);
 698
 699         if (++ctx->branch_depth > ctx->branch_depth_max) {
 700                 unsigned new_max = ctx->branch_depth_max << 1;
 701
 702                 if (!new_max)
 703                         new_max = RADEON_LLVM_INITIAL_CF_DEPTH;
 704
 705                 ctx->branch = REALLOC(ctx->branch, ctx->branch_depth_max *
 706                                       sizeof(ctx->branch[0]),
 707                                       new_max * sizeof(ctx->branch[0]));
 708                 ctx->branch_depth_max = new_max;
 709         }
 710
 711         ctx->branch[ctx->branch_depth - 1].endif_block = endif_block;
 712         ctx->branch[ctx->branch_depth - 1].if_block = if_block;
 713         ctx->branch[ctx->branch_depth - 1].else_block = else_block;
 714         ctx->branch[ctx->branch_depth - 1].has_else = 0;
 715 }
 716
 717 static void if_emit(const struct lp_build_tgsi_action *action,
 718                     struct lp_build_tgsi_context *bld_base,
 719                     struct lp_build_emit_data *emit_data)
 720 {
 721         struct gallivm_state *gallivm = bld_base->base.gallivm;
 722         LLVMValueRef cond;
 723
 724         cond = LLVMBuildFCmp(gallivm->builder, LLVMRealUNE,
 725                         emit_data->args[0],
 726                         bld_base->base.zero, "");
 727
 728         if_cond_emit(action, bld_base, emit_data, cond);
 729 }
 730
 731 static void uif_emit(const struct lp_build_tgsi_action *action,
 732                      struct lp_build_tgsi_context *bld_base,
 733                      struct lp_build_emit_data *emit_data)
 734 {
 735         struct gallivm_state *gallivm = bld_base->base.gallivm;
 736         LLVMValueRef cond;
 737
 738         cond = LLVMBuildICmp(gallivm->builder, LLVMIntNE,
 739                 bitcast(bld_base, TGSI_TYPE_UNSIGNED, emit_data->args[0]),
 740                         bld_base->int_bld.zero, "");
 741
 742         if_cond_emit(action, bld_base, emit_data, cond);
 743 }
 744
 745 static void kill_if_fetch_args(struct lp_build_tgsi_context *bld_base,
 746                                struct lp_build_emit_data *emit_data)
 747 {
 748         const struct tgsi_full_instruction *inst = emit_data->inst;
 749         struct gallivm_state *gallivm = bld_base->base.gallivm;
 750         LLVMBuilderRef builder = gallivm->builder;
 751         unsigned i;
 752         LLVMValueRef conds[TGSI_NUM_CHANNELS];
 753
 754         for (i = 0; i < TGSI_NUM_CHANNELS; i++) {
 755                 LLVMValueRef value = lp_build_emit_fetch(bld_base, inst, 0, i);
 756                 conds[i] = LLVMBuildFCmp(builder, LLVMRealOLT, value,
 757                                         bld_base->base.zero, "");
 758         }
 759
 760         /* Or the conditions together */
 761         for (i = TGSI_NUM_CHANNELS - 1; i > 0; i--) {
 762                 conds[i - 1] = LLVMBuildOr(builder, conds[i], conds[i - 1], "");
 763         }
 764
 765         emit_data->dst_type = LLVMVoidTypeInContext(gallivm->context);
 766         emit_data->arg_count = 1;
 767         emit_data->args[0] = LLVMBuildSelect(builder, conds[0],
 768                                         lp_build_const_float(gallivm, -1.0f),
 769                                         bld_base->base.zero, "");
 770 }
 771
 772 static void kil_emit(const struct lp_build_tgsi_action *action,
 773                      struct lp_build_tgsi_context *bld_base,
 774                      struct lp_build_emit_data *emit_data)
 775 {
 776         unsigned i;
 777         for (i = 0; i < emit_data->arg_count; i++) {
 778                 emit_data->output[i] = lp_build_intrinsic_unary(
 779                         bld_base->base.gallivm->builder,
 780                         action->intr_name,
 781                         emit_data->dst_type, emit_data->args[i]);
 782         }
 783 }
 784
 785 static void radeon_llvm_cube_to_2d_coords(struct lp_build_tgsi_context *bld_base,
 786                                           LLVMValueRef *in, LLVMValueRef *out)
 787 {
 788         struct gallivm_state *gallivm = bld_base->base.gallivm;
 789         LLVMBuilderRef builder = gallivm->builder;
 790         LLVMTypeRef type = bld_base->base.elem_type;
 791         LLVMValueRef coords[4];
 792         LLVMValueRef mad_args[3];
 793         LLVMValueRef v, cube_vec;
 794         unsigned i;
 795
 796         cube_vec = lp_build_gather_values(bld_base->base.gallivm, in, 4);
 797         v = lp_build_intrinsic(builder, "llvm.AMDGPU.cube", LLVMVectorType(type, 4),
 798                             &cube_vec, 1, LLVMReadNoneAttribute);
 799
 800         for (i = 0; i < 4; ++i)
 801                 coords[i] = LLVMBuildExtractElement(builder, v,
 802                                                     lp_build_const_int32(gallivm, i), "");
 803
 804         coords[2] = lp_build_intrinsic(builder, "llvm.fabs.f32",
 805                         type, &coords[2], 1, LLVMReadNoneAttribute);
 806         coords[2] = lp_build_emit_llvm_unary(bld_base, TGSI_OPCODE_RCP, coords[2]);
 807
 808         mad_args[1] = coords[2];
 809         mad_args[2] = LLVMConstReal(type, 1.5);
 810
 811         mad_args[0] = coords[0];
 812         coords[0] = lp_build_emit_llvm_ternary(bld_base, TGSI_OPCODE_MAD,
 813                         mad_args[0], mad_args[1], mad_args[2]);
 814
 815         mad_args[0] = coords[1];
 816         coords[1] = lp_build_emit_llvm_ternary(bld_base, TGSI_OPCODE_MAD,
 817                         mad_args[0], mad_args[1], mad_args[2]);
 818
 819         /* apply xyz = yxw swizzle to cooords */
 820         out[0] = coords[1];
 821         out[1] = coords[0];
 822         out[2] = coords[3];
 823 }
 824
 825 void radeon_llvm_emit_prepare_cube_coords(struct lp_build_tgsi_context *bld_base,
 826                                           struct lp_build_emit_data *emit_data,
 827                                           LLVMValueRef *coords_arg,
 828                                           LLVMValueRef *derivs_arg)
 829 {
 830
 831         unsigned target = emit_data->inst->Texture.Texture;
 832         unsigned opcode = emit_data->inst->Instruction.Opcode;
 833         struct gallivm_state *gallivm = bld_base->base.gallivm;
 834         LLVMBuilderRef builder = gallivm->builder;
 835         LLVMValueRef coords[4];
 836         unsigned i;
 837
 838         radeon_llvm_cube_to_2d_coords(bld_base, coords_arg, coords);
 839
 840         if (opcode == TGSI_OPCODE_TXD && derivs_arg) {
 841                 LLVMValueRef derivs[4];
 842                 int axis;
 843
 844                 /* Convert cube derivatives to 2D derivatives. */
 845                 for (axis = 0; axis < 2; axis++) {
 846                         LLVMValueRef shifted_cube_coords[4], shifted_coords[4];
 847
 848                         /* Shift the cube coordinates by the derivatives to get
 849                          * the cube coordinates of the "neighboring pixel".
 850                          */
 851                         for (i = 0; i < 3; i++)
 852                                 shifted_cube_coords[i] =
 853                                         LLVMBuildFAdd(builder, coords_arg[i],
 854                                                       derivs_arg[axis*3+i], "");
 855                         shifted_cube_coords[3] = LLVMGetUndef(bld_base->base.elem_type);
 856
 857                         /* Project the shifted cube coordinates onto the face. */
 858                         radeon_llvm_cube_to_2d_coords(bld_base, shifted_cube_coords,
 859                                                       shifted_coords);
 860
 861                         /* Subtract both sets of 2D coordinates to get 2D derivatives.
 862                          * This won't work if the shifted coordinates ended up
 863                          * in a different face.
 864                          */
 865                         for (i = 0; i < 2; i++)
 866                                 derivs[axis * 2 + i] =
 867                                         LLVMBuildFSub(builder, shifted_coords[i],
 868                                                       coords[i], "");
 869                 }
 870
 871                 memcpy(derivs_arg, derivs, sizeof(derivs));
 872         }
 873
 874         if (target == TGSI_TEXTURE_CUBE_ARRAY ||
 875             target == TGSI_TEXTURE_SHADOWCUBE_ARRAY) {
 876                 /* for cube arrays coord.z = coord.w(array_index) * 8 + face */
 877                 /* coords_arg.w component - array_index for cube arrays */
 878                 coords[2] = lp_build_emit_llvm_ternary(bld_base, TGSI_OPCODE_MAD,
 879                                                        coords_arg[3], lp_build_const_float(gallivm, 8.0), coords[2]);
 880         }
 881
 882         /* Preserve compare/lod/bias. Put it in coords.w. */
 883         if (opcode == TGSI_OPCODE_TEX2 ||
 884             opcode == TGSI_OPCODE_TXB2 ||
 885             opcode == TGSI_OPCODE_TXL2) {
 886                 coords[3] = coords_arg[4];
 887         } else if (opcode == TGSI_OPCODE_TXB ||
 888                    opcode == TGSI_OPCODE_TXL ||
 889                    target == TGSI_TEXTURE_SHADOWCUBE) {
 890                 coords[3] = coords_arg[3];
 891         }
 892
 893         memcpy(coords_arg, coords, sizeof(coords));
 894 }
 895
 896 static void emit_icmp(const struct lp_build_tgsi_action *action,
 897                       struct lp_build_tgsi_context *bld_base,
 898                       struct lp_build_emit_data *emit_data)
 899 {
 900         unsigned pred;
 901         LLVMBuilderRef builder = bld_base->base.gallivm->builder;
 902         LLVMContextRef context = bld_base->base.gallivm->context;
 903
 904         switch (emit_data->inst->Instruction.Opcode) {
 905         case TGSI_OPCODE_USEQ: pred = LLVMIntEQ; break;
 906         case TGSI_OPCODE_USNE: pred = LLVMIntNE; break;
 907         case TGSI_OPCODE_USGE: pred = LLVMIntUGE; break;
 908         case TGSI_OPCODE_USLT: pred = LLVMIntULT; break;
 909         case TGSI_OPCODE_ISGE: pred = LLVMIntSGE; break;
 910         case TGSI_OPCODE_ISLT: pred = LLVMIntSLT; break;
 911         default:
 912                 assert(!"unknown instruction");
 913                 pred = 0;
 914                 break;
 915         }
 916
 917         LLVMValueRef v = LLVMBuildICmp(builder, pred,
 918                         emit_data->args[0], emit_data->args[1],"");
 919
 920         v = LLVMBuildSExtOrBitCast(builder, v,
 921                         LLVMInt32TypeInContext(context), "");
 922
 923         emit_data->output[emit_data->chan] = v;
 924 }
 925
 926 static void emit_ucmp(const struct lp_build_tgsi_action *action,
 927                       struct lp_build_tgsi_context *bld_base,
 928                       struct lp_build_emit_data *emit_data)
 929 {
 930         LLVMBuilderRef builder = bld_base->base.gallivm->builder;
 931
 932         LLVMValueRef arg0 = LLVMBuildBitCast(builder, emit_data->args[0],
 933                                              bld_base->uint_bld.elem_type, "");
 934
 935         LLVMValueRef v = LLVMBuildICmp(builder, LLVMIntNE, arg0,
 936                                        bld_base->uint_bld.zero, "");
 937
 938         emit_data->output[emit_data->chan] =
 939                 LLVMBuildSelect(builder, v, emit_data->args[1], emit_data->args[2], "");
 940 }
 941
 942 static void emit_cmp(const struct lp_build_tgsi_action *action,
 943                      struct lp_build_tgsi_context *bld_base,
 944                      struct lp_build_emit_data *emit_data)
 945 {
 946         LLVMBuilderRef builder = bld_base->base.gallivm->builder;
 947         LLVMValueRef cond, *args = emit_data->args;
 948
 949         cond = LLVMBuildFCmp(builder, LLVMRealOLT, args[0],
 950                              bld_base->base.zero, "");
 951
 952         emit_data->output[emit_data->chan] =
 953                 LLVMBuildSelect(builder, cond, args[1], args[2], "");
 954 }
 955
 956 static void emit_set_cond(const struct lp_build_tgsi_action *action,
 957                           struct lp_build_tgsi_context *bld_base,
 958                           struct lp_build_emit_data *emit_data)
 959 {
 960         LLVMBuilderRef builder = bld_base->base.gallivm->builder;
 961         LLVMRealPredicate pred;
 962         LLVMValueRef cond;
 963
 964         /* Use ordered for everything but NE (which is usual for
 965          * float comparisons)
 966          */
 967         switch (emit_data->inst->Instruction.Opcode) {
 968         case TGSI_OPCODE_SGE: pred = LLVMRealOGE; break;
 969         case TGSI_OPCODE_SEQ: pred = LLVMRealOEQ; break;
 970         case TGSI_OPCODE_SLE: pred = LLVMRealOLE; break;
 971         case TGSI_OPCODE_SLT: pred = LLVMRealOLT; break;
 972         case TGSI_OPCODE_SNE: pred = LLVMRealUNE; break;
 973         case TGSI_OPCODE_SGT: pred = LLVMRealOGT; break;
 974         default: assert(!"unknown instruction"); pred = 0; break;
 975         }
 976
 977         cond = LLVMBuildFCmp(builder,
 978                 pred, emit_data->args[0], emit_data->args[1], "");
 979
 980         emit_data->output[emit_data->chan] = LLVMBuildSelect(builder,
 981                 cond, bld_base->base.one, bld_base->base.zero, "");
 982 }
 983
 984 static void emit_fcmp(const struct lp_build_tgsi_action *action,
 985                       struct lp_build_tgsi_context *bld_base,
 986                       struct lp_build_emit_data *emit_data)
 987 {
 988         LLVMBuilderRef builder = bld_base->base.gallivm->builder;
 989         LLVMContextRef context = bld_base->base.gallivm->context;
 990         LLVMRealPredicate pred;
 991
 992         /* Use ordered for everything but NE (which is usual for
 993          * float comparisons)
 994          */
 995         switch (emit_data->inst->Instruction.Opcode) {
 996         case TGSI_OPCODE_FSEQ: pred = LLVMRealOEQ; break;
 997         case TGSI_OPCODE_FSGE: pred = LLVMRealOGE; break;
 998         case TGSI_OPCODE_FSLT: pred = LLVMRealOLT; break;
 999         case TGSI_OPCODE_FSNE: pred = LLVMRealUNE; break;
1000         default: assert(!"unknown instruction"); pred = 0; break;
1001         }
1002
1003         LLVMValueRef v = LLVMBuildFCmp(builder, pred,
1004                         emit_data->args[0], emit_data->args[1],"");
1005
1006         v = LLVMBuildSExtOrBitCast(builder, v,
1007                         LLVMInt32TypeInContext(context), "");
1008
1009         emit_data->output[emit_data->chan] = v;
1010 }
1011
1012 static void emit_dcmp(const struct lp_build_tgsi_action *action,
1013                       struct lp_build_tgsi_context *bld_base,
1014                       struct lp_build_emit_data *emit_data)
1015 {
1016         LLVMBuilderRef builder = bld_base->base.gallivm->builder;
1017         LLVMContextRef context = bld_base->base.gallivm->context;
1018         LLVMRealPredicate pred;
1019
1020         /* Use ordered for everything but NE (which is usual for
1021          * float comparisons)
1022          */
1023         switch (emit_data->inst->Instruction.Opcode) {
1024         case TGSI_OPCODE_DSEQ: pred = LLVMRealOEQ; break;
1025         case TGSI_OPCODE_DSGE: pred = LLVMRealOGE; break;
1026         case TGSI_OPCODE_DSLT: pred = LLVMRealOLT; break;
1027         case TGSI_OPCODE_DSNE: pred = LLVMRealUNE; break;
1028         default: assert(!"unknown instruction"); pred = 0; break;
1029         }
1030
1031         LLVMValueRef v = LLVMBuildFCmp(builder, pred,
1032                         emit_data->args[0], emit_data->args[1],"");
1033
1034         v = LLVMBuildSExtOrBitCast(builder, v,
1035                         LLVMInt32TypeInContext(context), "");
1036
1037         emit_data->output[emit_data->chan] = v;
1038 }
1039
1040 static void emit_not(const struct lp_build_tgsi_action *action,
1041                      struct lp_build_tgsi_context *bld_base,
1042                      struct lp_build_emit_data *emit_data)
1043 {
1044         LLVMBuilderRef builder = bld_base->base.gallivm->builder;
1045         LLVMValueRef v = bitcast(bld_base, TGSI_TYPE_UNSIGNED,
1046                         emit_data->args[0]);
1047         emit_data->output[emit_data->chan] = LLVMBuildNot(builder, v, "");
1048 }
1049
1050 static void emit_arl(const struct lp_build_tgsi_action *action,
1051                      struct lp_build_tgsi_context *bld_base,
1052                      struct lp_build_emit_data *emit_data)
1053 {
1054         LLVMBuilderRef builder = bld_base->base.gallivm->builder;
1055         LLVMValueRef floor_index =  lp_build_emit_llvm_unary(bld_base, TGSI_OPCODE_FLR, emit_data->args[0]);
1056         emit_data->output[emit_data->chan] = LLVMBuildFPToSI(builder,
1057                         floor_index, bld_base->base.int_elem_type , "");
1058 }
1059
1060 static void emit_and(const struct lp_build_tgsi_action *action,
1061                      struct lp_build_tgsi_context *bld_base,
1062                      struct lp_build_emit_data *emit_data)
1063 {
1064         LLVMBuilderRef builder = bld_base->base.gallivm->builder;
1065         emit_data->output[emit_data->chan] = LLVMBuildAnd(builder,
1066                         emit_data->args[0], emit_data->args[1], "");
1067 }
1068
1069 static void emit_or(const struct lp_build_tgsi_action *action,
1070                     struct lp_build_tgsi_context *bld_base,
1071                     struct lp_build_emit_data *emit_data)
1072 {
1073         LLVMBuilderRef builder = bld_base->base.gallivm->builder;
1074         emit_data->output[emit_data->chan] = LLVMBuildOr(builder,
1075                         emit_data->args[0], emit_data->args[1], "");
1076 }
1077
1078 static void emit_uadd(const struct lp_build_tgsi_action *action,
1079                       struct lp_build_tgsi_context *bld_base,
1080                       struct lp_build_emit_data *emit_data)
1081 {
1082         LLVMBuilderRef builder = bld_base->base.gallivm->builder;
1083         emit_data->output[emit_data->chan] = LLVMBuildAdd(builder,
1084                         emit_data->args[0], emit_data->args[1], "");
1085 }
1086
1087 static void emit_udiv(const struct lp_build_tgsi_action *action,
1088                       struct lp_build_tgsi_context *bld_base,
1089                       struct lp_build_emit_data *emit_data)
1090 {
1091         LLVMBuilderRef builder = bld_base->base.gallivm->builder;
1092         emit_data->output[emit_data->chan] = LLVMBuildUDiv(builder,
1093                         emit_data->args[0], emit_data->args[1], "");
1094 }
1095
1096 static void emit_idiv(const struct lp_build_tgsi_action *action,
1097                       struct lp_build_tgsi_context *bld_base,
1098                       struct lp_build_emit_data *emit_data)
1099 {
1100         LLVMBuilderRef builder = bld_base->base.gallivm->builder;
1101         emit_data->output[emit_data->chan] = LLVMBuildSDiv(builder,
1102                         emit_data->args[0], emit_data->args[1], "");
1103 }
1104
1105 static void emit_mod(const struct lp_build_tgsi_action *action,
1106                      struct lp_build_tgsi_context *bld_base,
1107                      struct lp_build_emit_data *emit_data)
1108 {
1109         LLVMBuilderRef builder = bld_base->base.gallivm->builder;
1110         emit_data->output[emit_data->chan] = LLVMBuildSRem(builder,
1111                         emit_data->args[0], emit_data->args[1], "");
1112 }
1113
1114 static void emit_umod(const struct lp_build_tgsi_action *action,
1115                       struct lp_build_tgsi_context *bld_base,
1116                       struct lp_build_emit_data *emit_data)
1117 {
1118         LLVMBuilderRef builder = bld_base->base.gallivm->builder;
1119         emit_data->output[emit_data->chan] = LLVMBuildURem(builder,
1120                         emit_data->args[0], emit_data->args[1], "");
1121 }
1122
1123 static void emit_shl(const struct lp_build_tgsi_action *action,
1124                      struct lp_build_tgsi_context *bld_base,
1125                      struct lp_build_emit_data *emit_data)
1126 {
1127         LLVMBuilderRef builder = bld_base->base.gallivm->builder;
1128         emit_data->output[emit_data->chan] = LLVMBuildShl(builder,
1129                         emit_data->args[0], emit_data->args[1], "");
1130 }
1131
1132 static void emit_ushr(const struct lp_build_tgsi_action *action,
1133                       struct lp_build_tgsi_context *bld_base,
1134                       struct lp_build_emit_data *emit_data)
1135 {
1136         LLVMBuilderRef builder = bld_base->base.gallivm->builder;
1137         emit_data->output[emit_data->chan] = LLVMBuildLShr(builder,
1138                         emit_data->args[0], emit_data->args[1], "");
1139 }
1140 static void emit_ishr(const struct lp_build_tgsi_action *action,
1141                       struct lp_build_tgsi_context *bld_base,
1142                       struct lp_build_emit_data *emit_data)
1143 {
1144         LLVMBuilderRef builder = bld_base->base.gallivm->builder;
1145         emit_data->output[emit_data->chan] = LLVMBuildAShr(builder,
1146                         emit_data->args[0], emit_data->args[1], "");
1147 }
1148
1149 static void emit_xor(const struct lp_build_tgsi_action *action,
1150                      struct lp_build_tgsi_context *bld_base,
1151                      struct lp_build_emit_data *emit_data)
1152 {
1153         LLVMBuilderRef builder = bld_base->base.gallivm->builder;
1154         emit_data->output[emit_data->chan] = LLVMBuildXor(builder,
1155                         emit_data->args[0], emit_data->args[1], "");
1156 }
1157
1158 static void emit_ssg(const struct lp_build_tgsi_action *action,
1159                      struct lp_build_tgsi_context *bld_base,
1160                      struct lp_build_emit_data *emit_data)
1161 {
1162         LLVMBuilderRef builder = bld_base->base.gallivm->builder;
1163
1164         LLVMValueRef cmp, val;
1165
1166         if (emit_data->inst->Instruction.Opcode == TGSI_OPCODE_ISSG) {
1167                 cmp = LLVMBuildICmp(builder, LLVMIntSGT, emit_data->args[0], bld_base->int_bld.zero, "");
1168                 val = LLVMBuildSelect(builder, cmp, bld_base->int_bld.one, emit_data->args[0], "");
1169                 cmp = LLVMBuildICmp(builder, LLVMIntSGE, val, bld_base->int_bld.zero, "");
1170                 val = LLVMBuildSelect(builder, cmp, val, LLVMConstInt(bld_base->int_bld.elem_type, -1, true), "");
1171         } else { // float SSG
1172                 cmp = LLVMBuildFCmp(builder, LLVMRealOGT, emit_data->args[0], bld_base->base.zero, "");
1173                 val = LLVMBuildSelect(builder, cmp, bld_base->base.one, emit_data->args[0], "");
1174                 cmp = LLVMBuildFCmp(builder, LLVMRealOGE, val, bld_base->base.zero, "");
1175                 val = LLVMBuildSelect(builder, cmp, val, LLVMConstReal(bld_base->base.elem_type, -1), "");
1176         }
1177
1178         emit_data->output[emit_data->chan] = val;
1179 }
1180
1181 static void emit_ineg(const struct lp_build_tgsi_action *action,
1182                       struct lp_build_tgsi_context *bld_base,
1183                       struct lp_build_emit_data *emit_data)
1184 {
1185         LLVMBuilderRef builder = bld_base->base.gallivm->builder;
1186         emit_data->output[emit_data->chan] = LLVMBuildNeg(builder,
1187                         emit_data->args[0], "");
1188 }
1189
1190 static void emit_dneg(const struct lp_build_tgsi_action *action,
1191                       struct lp_build_tgsi_context *bld_base,
1192                       struct lp_build_emit_data *emit_data)
1193 {
1194         LLVMBuilderRef builder = bld_base->base.gallivm->builder;
1195         emit_data->output[emit_data->chan] = LLVMBuildFNeg(builder,
1196                         emit_data->args[0], "");
1197 }
1198
1199 static void emit_frac(const struct lp_build_tgsi_action *action,
1200                       struct lp_build_tgsi_context *bld_base,
1201                       struct lp_build_emit_data *emit_data)
1202 {
1203         LLVMBuilderRef builder = bld_base->base.gallivm->builder;
1204         char *intr;
1205
1206         if (emit_data->info->opcode == TGSI_OPCODE_FRC)
1207                 intr = "llvm.floor.f32";
1208         else if (emit_data->info->opcode == TGSI_OPCODE_DFRAC)
1209                 intr = "llvm.floor.f64";
1210         else {
1211                 assert(0);
1212                 return;
1213         }
1214
1215         LLVMValueRef floor = lp_build_intrinsic(builder, intr, emit_data->dst_type,
1216                                                 &emit_data->args[0], 1,
1217                                                 LLVMReadNoneAttribute);
1218         emit_data->output[emit_data->chan] = LLVMBuildFSub(builder,
1219                         emit_data->args[0], floor, "");
1220 }
1221
1222 static void emit_f2i(const struct lp_build_tgsi_action *action,
1223                      struct lp_build_tgsi_context *bld_base,
1224                      struct lp_build_emit_data *emit_data)
1225 {
1226         LLVMBuilderRef builder = bld_base->base.gallivm->builder;
1227         emit_data->output[emit_data->chan] = LLVMBuildFPToSI(builder,
1228                         emit_data->args[0], bld_base->int_bld.elem_type, "");
1229 }
1230
1231 static void emit_f2u(const struct lp_build_tgsi_action *action,
1232                      struct lp_build_tgsi_context *bld_base,
1233                      struct lp_build_emit_data *emit_data)
1234 {
1235         LLVMBuilderRef builder = bld_base->base.gallivm->builder;
1236         emit_data->output[emit_data->chan] = LLVMBuildFPToUI(builder,
1237                         emit_data->args[0], bld_base->uint_bld.elem_type, "");
1238 }
1239
1240 static void emit_i2f(const struct lp_build_tgsi_action *action,
1241                      struct lp_build_tgsi_context *bld_base,
1242                      struct lp_build_emit_data *emit_data)
1243 {
1244         LLVMBuilderRef builder = bld_base->base.gallivm->builder;
1245         emit_data->output[emit_data->chan] = LLVMBuildSIToFP(builder,
1246                         emit_data->args[0], bld_base->base.elem_type, "");
1247 }
1248
1249 static void emit_u2f(const struct lp_build_tgsi_action *action,
1250                      struct lp_build_tgsi_context *bld_base,
1251                      struct lp_build_emit_data *emit_data)
1252 {
1253         LLVMBuilderRef builder = bld_base->base.gallivm->builder;
1254         emit_data->output[emit_data->chan] = LLVMBuildUIToFP(builder,
1255                         emit_data->args[0], bld_base->base.elem_type, "");
1256 }
1257
1258 static void emit_immediate(struct lp_build_tgsi_context *bld_base,
1259                            const struct tgsi_full_immediate *imm)
1260 {
1261         unsigned i;
1262         struct radeon_llvm_context *ctx = radeon_llvm_context(bld_base);
1263
1264         for (i = 0; i < 4; ++i) {
1265                 ctx->soa.immediates[ctx->soa.num_immediates][i] =
1266                                 LLVMConstInt(bld_base->uint_bld.elem_type, imm->u[i].Uint, false   );
1267         }
1268
1269         ctx->soa.num_immediates++;
1270 }
1271
1272 void
1273 build_tgsi_intrinsic_nomem(const struct lp_build_tgsi_action *action,
1274                            struct lp_build_tgsi_context *bld_base,
1275                            struct lp_build_emit_data *emit_data)
1276 {
1277         struct lp_build_context *base = &bld_base->base;
1278         emit_data->output[emit_data->chan] =
1279                 lp_build_intrinsic(base->gallivm->builder, action->intr_name,
1280                                    emit_data->dst_type, emit_data->args,
1281                                    emit_data->arg_count, LLVMReadNoneAttribute);
1282 }
1283
1284 static void emit_bfi(const struct lp_build_tgsi_action *action,
1285                      struct lp_build_tgsi_context *bld_base,
1286                      struct lp_build_emit_data *emit_data)
1287 {
1288         struct gallivm_state *gallivm = bld_base->base.gallivm;
1289         LLVMBuilderRef builder = gallivm->builder;
1290         LLVMValueRef bfi_args[3];
1291
1292         // Calculate the bitmask: (((1 << src3) - 1) << src2
1293         bfi_args[0] = LLVMBuildShl(builder,
1294                                    LLVMBuildSub(builder,
1295                                                 LLVMBuildShl(builder,
1296                                                              bld_base->int_bld.one,
1297                                                              emit_data->args[3], ""),
1298                                                 bld_base->int_bld.one, ""),
1299                                    emit_data->args[2], "");
1300
1301         bfi_args[1] = LLVMBuildShl(builder, emit_data->args[1],
1302                                    emit_data->args[2], "");
1303
1304         bfi_args[2] = emit_data->args[0];
1305
1306         /* Calculate:
1307          *   (arg0 & arg1) | (~arg0 & arg2) = arg2 ^ (arg0 & (arg1 ^ arg2)
1308          * Use the right-hand side, which the LLVM backend can convert to V_BFI.
1309          */
1310         emit_data->output[emit_data->chan] =
1311                 LLVMBuildXor(builder, bfi_args[2],
1312                         LLVMBuildAnd(builder, bfi_args[0],
1313                                 LLVMBuildXor(builder, bfi_args[1], bfi_args[2],
1314                                              ""), ""), "");
1315 }
1316
1317 /* this is ffs in C */
1318 static void emit_lsb(const struct lp_build_tgsi_action *action,
1319                      struct lp_build_tgsi_context *bld_base,
1320                      struct lp_build_emit_data *emit_data)
1321 {
1322         struct gallivm_state *gallivm = bld_base->base.gallivm;
1323         LLVMValueRef args[2] = {
1324                 emit_data->args[0],
1325
1326                 /* The value of 1 means that ffs(x=0) = undef, so LLVM won't
1327                  * add special code to check for x=0. The reason is that
1328                  * the LLVM behavior for x=0 is different from what we
1329                  * need here.
1330                  *
1331                  * The hardware already implements the correct behavior.
1332                  */
1333                 lp_build_const_int32(gallivm, 1)
1334         };
1335
1336         emit_data->output[emit_data->chan] =
1337                 lp_build_intrinsic(gallivm->builder, "llvm.cttz.i32",
1338                                 emit_data->dst_type, args, ARRAY_SIZE(args),
1339                                 LLVMReadNoneAttribute);
1340 }
1341
1342 /* Find the last bit set. */
1343 static void emit_umsb(const struct lp_build_tgsi_action *action,
1344                       struct lp_build_tgsi_context *bld_base,
1345                       struct lp_build_emit_data *emit_data)
1346 {
1347         struct gallivm_state *gallivm = bld_base->base.gallivm;
1348         LLVMBuilderRef builder = gallivm->builder;
1349         LLVMValueRef args[2] = {
1350                 emit_data->args[0],
1351                 /* Don't generate code for handling zero: */
1352                 lp_build_const_int32(gallivm, 1)
1353         };
1354
1355         LLVMValueRef msb =
1356                 lp_build_intrinsic(builder, "llvm.ctlz.i32",
1357                                 emit_data->dst_type, args, ARRAY_SIZE(args),
1358                                 LLVMReadNoneAttribute);
1359
1360         /* The HW returns the last bit index from MSB, but TGSI wants
1361          * the index from LSB. Invert it by doing "31 - msb". */
1362         msb = LLVMBuildSub(builder, lp_build_const_int32(gallivm, 31),
1363                            msb, "");
1364
1365         /* Check for zero: */
1366         emit_data->output[emit_data->chan] =
1367                 LLVMBuildSelect(builder,
1368                                 LLVMBuildICmp(builder, LLVMIntEQ, args[0],
1369                                               bld_base->uint_bld.zero, ""),
1370                                 lp_build_const_int32(gallivm, -1), msb, "");
1371 }
1372
1373 /* Find the last bit opposite of the sign bit. */
1374 static void emit_imsb(const struct lp_build_tgsi_action *action,
1375                       struct lp_build_tgsi_context *bld_base,
1376                       struct lp_build_emit_data *emit_data)
1377 {
1378         struct gallivm_state *gallivm = bld_base->base.gallivm;
1379         LLVMBuilderRef builder = gallivm->builder;
1380         LLVMValueRef arg = emit_data->args[0];
1381
1382         LLVMValueRef msb =
1383                 lp_build_intrinsic(builder, "llvm.AMDGPU.flbit.i32",
1384                                 emit_data->dst_type, &arg, 1,
1385                                 LLVMReadNoneAttribute);
1386
1387         /* The HW returns the last bit index from MSB, but TGSI wants
1388          * the index from LSB. Invert it by doing "31 - msb". */
1389         msb = LLVMBuildSub(builder, lp_build_const_int32(gallivm, 31),
1390                            msb, "");
1391
1392         /* If arg == 0 || arg == -1 (0xffffffff), return -1. */
1393         LLVMValueRef all_ones = lp_build_const_int32(gallivm, -1);
1394
1395         LLVMValueRef cond =
1396                 LLVMBuildOr(builder,
1397                             LLVMBuildICmp(builder, LLVMIntEQ, arg,
1398                                           bld_base->uint_bld.zero, ""),
1399                             LLVMBuildICmp(builder, LLVMIntEQ, arg,
1400                                           all_ones, ""), "");
1401
1402         emit_data->output[emit_data->chan] =
1403                 LLVMBuildSelect(builder, cond, all_ones, msb, "");
1404 }
1405
1406 static void emit_iabs(const struct lp_build_tgsi_action *action,
1407                       struct lp_build_tgsi_context *bld_base,
1408                       struct lp_build_emit_data *emit_data)
1409 {
1410         LLVMBuilderRef builder = bld_base->base.gallivm->builder;
1411
1412         emit_data->output[emit_data->chan] =
1413                 lp_build_emit_llvm_binary(bld_base, TGSI_OPCODE_IMAX,
1414                                           emit_data->args[0],
1415                                           LLVMBuildNeg(builder,
1416                                                        emit_data->args[0], ""));
1417 }
1418
1419 static void emit_minmax_int(const struct lp_build_tgsi_action *action,
1420                             struct lp_build_tgsi_context *bld_base,
1421                             struct lp_build_emit_data *emit_data)
1422 {
1423         LLVMBuilderRef builder = bld_base->base.gallivm->builder;
1424         LLVMIntPredicate op;
1425
1426         switch (emit_data->info->opcode) {
1427         default:
1428                 assert(0);
1429         case TGSI_OPCODE_IMAX:
1430                 op = LLVMIntSGT;
1431                 break;
1432         case TGSI_OPCODE_IMIN:
1433                 op = LLVMIntSLT;
1434                 break;
1435         case TGSI_OPCODE_UMAX:
1436                 op = LLVMIntUGT;
1437                 break;
1438         case TGSI_OPCODE_UMIN:
1439                 op = LLVMIntULT;
1440                 break;
1441         }
1442
1443         emit_data->output[emit_data->chan] =
1444                 LLVMBuildSelect(builder,
1445                                 LLVMBuildICmp(builder, op, emit_data->args[0],
1446                                               emit_data->args[1], ""),
1447                                 emit_data->args[0],
1448                                 emit_data->args[1], "");
1449 }
1450
1451 static void pk2h_fetch_args(struct lp_build_tgsi_context *bld_base,
1452                             struct lp_build_emit_data *emit_data)
1453 {
1454         emit_data->args[0] = lp_build_emit_fetch(bld_base, emit_data->inst,
1455                                                  0, TGSI_CHAN_X);
1456         emit_data->args[1] = lp_build_emit_fetch(bld_base, emit_data->inst,
1457                                                  0, TGSI_CHAN_Y);
1458 }
1459
1460 static void emit_pk2h(const struct lp_build_tgsi_action *action,
1461                       struct lp_build_tgsi_context *bld_base,
1462                       struct lp_build_emit_data *emit_data)
1463 {
1464         LLVMBuilderRef builder = bld_base->base.gallivm->builder;
1465         LLVMContextRef context = bld_base->base.gallivm->context;
1466         struct lp_build_context *uint_bld = &bld_base->uint_bld;
1467         LLVMTypeRef fp16, i16;
1468         LLVMValueRef const16, comp[2];
1469         unsigned i;
1470
1471         fp16 = LLVMHalfTypeInContext(context);
1472         i16 = LLVMInt16TypeInContext(context);
1473         const16 = lp_build_const_int32(uint_bld->gallivm, 16);
1474
1475         for (i = 0; i < 2; i++) {
1476                 comp[i] = LLVMBuildFPTrunc(builder, emit_data->args[i], fp16, "");
1477                 comp[i] = LLVMBuildBitCast(builder, comp[i], i16, "");
1478                 comp[i] = LLVMBuildZExt(builder, comp[i], uint_bld->elem_type, "");
1479         }
1480
1481         comp[1] = LLVMBuildShl(builder, comp[1], const16, "");
1482         comp[0] = LLVMBuildOr(builder, comp[0], comp[1], "");
1483
1484         emit_data->output[emit_data->chan] = comp[0];
1485 }
1486
1487 static void up2h_fetch_args(struct lp_build_tgsi_context *bld_base,
1488                             struct lp_build_emit_data *emit_data)
1489 {
1490         emit_data->args[0] = lp_build_emit_fetch(bld_base, emit_data->inst,
1491                                                  0, TGSI_CHAN_X);
1492 }
1493
1494 static void emit_up2h(const struct lp_build_tgsi_action *action,
1495                       struct lp_build_tgsi_context *bld_base,
1496                       struct lp_build_emit_data *emit_data)
1497 {
1498         LLVMBuilderRef builder = bld_base->base.gallivm->builder;
1499         LLVMContextRef context = bld_base->base.gallivm->context;
1500         struct lp_build_context *uint_bld = &bld_base->uint_bld;
1501         LLVMTypeRef fp16, i16;
1502         LLVMValueRef const16, input, val;
1503         unsigned i;
1504
1505         fp16 = LLVMHalfTypeInContext(context);
1506         i16 = LLVMInt16TypeInContext(context);
1507         const16 = lp_build_const_int32(uint_bld->gallivm, 16);
1508         input = emit_data->args[0];
1509
1510         for (i = 0; i < 2; i++) {
1511                 val = i == 1 ? LLVMBuildLShr(builder, input, const16, "") : input;
1512                 val = LLVMBuildTrunc(builder, val, i16, "");
1513                 val = LLVMBuildBitCast(builder, val, fp16, "");
1514                 emit_data->output[i] =
1515                         LLVMBuildFPExt(builder, val, bld_base->base.elem_type, "");
1516         }
1517 }
1518
1519 static void emit_fdiv(const struct lp_build_tgsi_action *action,
1520                       struct lp_build_tgsi_context *bld_base,
1521                       struct lp_build_emit_data *emit_data)
1522 {
1523         struct radeon_llvm_context *ctx = radeon_llvm_context(bld_base);
1524
1525         emit_data->output[emit_data->chan] =
1526                 LLVMBuildFDiv(bld_base->base.gallivm->builder,
1527                               emit_data->args[0], emit_data->args[1], "");
1528
1529         /* Use v_rcp_f32 instead of precise division. */
1530         if (HAVE_LLVM >= 0x0309 &&
1531             !LLVMIsConstant(emit_data->output[emit_data->chan]))
1532                 LLVMSetMetadata(emit_data->output[emit_data->chan],
1533                                 ctx->fpmath_md_kind, ctx->fpmath_md_2p5_ulp);
1534 }
1535
1536 /* 1/sqrt is translated to rsq for f32 if fp32 denormals are not enabled in
1537  * the target machine. f64 needs global unsafe math flags to get rsq. */
1538 static void emit_rsq(const struct lp_build_tgsi_action *action,
1539                      struct lp_build_tgsi_context *bld_base,
1540                      struct lp_build_emit_data *emit_data)
1541 {
1542         LLVMValueRef sqrt =
1543                 lp_build_emit_llvm_unary(bld_base, TGSI_OPCODE_SQRT,
1544                                          emit_data->args[0]);
1545
1546         emit_data->output[emit_data->chan] =
1547                 lp_build_emit_llvm_binary(bld_base, TGSI_OPCODE_DIV,
1548                                           bld_base->base.one, sqrt);
1549 }
1550
1551 void radeon_llvm_context_init(struct radeon_llvm_context *ctx, const char *triple)
1552 {
1553         struct lp_type type;
1554
1555         /* Initialize the gallivm object:
1556          * We are only using the module, context, and builder fields of this struct.
1557          * This should be enough for us to be able to pass our gallivm struct to the
1558          * helper functions in the gallivm module.
1559          */
1560         memset(&ctx->gallivm, 0, sizeof (ctx->gallivm));
1561         memset(&ctx->soa, 0, sizeof(ctx->soa));
1562         ctx->gallivm.context = LLVMContextCreate();
1563         ctx->gallivm.module = LLVMModuleCreateWithNameInContext("tgsi",
1564                                                 ctx->gallivm.context);
1565         LLVMSetTarget(ctx->gallivm.module, triple);
1566         ctx->gallivm.builder = LLVMCreateBuilderInContext(ctx->gallivm.context);
1567
1568         struct lp_build_tgsi_context *bld_base = &ctx->soa.bld_base;
1569
1570         type.floating = true;
1571         type.fixed = false;
1572         type.sign = true;
1573         type.norm = false;
1574         type.width = 32;
1575         type.length = 1;
1576
1577         lp_build_context_init(&bld_base->base, &ctx->gallivm, type);
1578         lp_build_context_init(&ctx->soa.bld_base.uint_bld, &ctx->gallivm, lp_uint_type(type));
1579         lp_build_context_init(&ctx->soa.bld_base.int_bld, &ctx->gallivm, lp_int_type(type));
1580         {
1581                 struct lp_type dbl_type;
1582                 dbl_type = type;
1583                 dbl_type.width *= 2;
1584                 lp_build_context_init(&ctx->soa.bld_base.dbl_bld, &ctx->gallivm, dbl_type);
1585         }
1586
1587         bld_base->soa = 1;
1588         bld_base->emit_store = radeon_llvm_emit_store;
1589         bld_base->emit_swizzle = emit_swizzle;
1590         bld_base->emit_declaration = emit_declaration;
1591         bld_base->emit_immediate = emit_immediate;
1592
1593         bld_base->emit_fetch_funcs[TGSI_FILE_IMMEDIATE] = radeon_llvm_emit_fetch;
1594         bld_base->emit_fetch_funcs[TGSI_FILE_INPUT] = radeon_llvm_emit_fetch;
1595         bld_base->emit_fetch_funcs[TGSI_FILE_TEMPORARY] = radeon_llvm_emit_fetch;
1596         bld_base->emit_fetch_funcs[TGSI_FILE_OUTPUT] = radeon_llvm_emit_fetch;
1597         bld_base->emit_fetch_funcs[TGSI_FILE_SYSTEM_VALUE] = fetch_system_value;
1598
1599         /* metadata allowing 2.5 ULP */
1600         ctx->fpmath_md_kind = LLVMGetMDKindIDInContext(ctx->gallivm.context,
1601                                                        "fpmath", 6);
1602         LLVMValueRef arg = lp_build_const_float(&ctx->gallivm, 2.5);
1603         ctx->fpmath_md_2p5_ulp = LLVMMDNodeInContext(ctx->gallivm.context,
1604                                                      &arg, 1);
1605
1606         /* Allocate outputs */
1607         ctx->soa.outputs = ctx->outputs;
1608
1609         lp_set_default_actions(bld_base);
1610
1611         bld_base->op_actions[TGSI_OPCODE_ABS].emit = build_tgsi_intrinsic_nomem;
1612         bld_base->op_actions[TGSI_OPCODE_ABS].intr_name = "llvm.fabs.f32";
1613         bld_base->op_actions[TGSI_OPCODE_AND].emit = emit_and;
1614         bld_base->op_actions[TGSI_OPCODE_ARL].emit = emit_arl;
1615         bld_base->op_actions[TGSI_OPCODE_BFI].emit = emit_bfi;
1616         bld_base->op_actions[TGSI_OPCODE_BGNLOOP].emit = bgnloop_emit;
1617         bld_base->op_actions[TGSI_OPCODE_BREV].emit = build_tgsi_intrinsic_nomem;
1618         bld_base->op_actions[TGSI_OPCODE_BREV].intr_name =
1619                 HAVE_LLVM >= 0x0308 ? "llvm.bitreverse.i32" : "llvm.AMDGPU.brev";
1620         bld_base->op_actions[TGSI_OPCODE_BRK].emit = brk_emit;
1621         bld_base->op_actions[TGSI_OPCODE_CEIL].emit = build_tgsi_intrinsic_nomem;
1622         bld_base->op_actions[TGSI_OPCODE_CEIL].intr_name = "llvm.ceil.f32";
1623         bld_base->op_actions[TGSI_OPCODE_CLAMP].emit = build_tgsi_intrinsic_nomem;
1624         bld_base->op_actions[TGSI_OPCODE_CLAMP].intr_name =
1625                 HAVE_LLVM >= 0x0308 ? "llvm.AMDGPU.clamp." : "llvm.AMDIL.clamp.";
1626         bld_base->op_actions[TGSI_OPCODE_CMP].emit = emit_cmp;
1627         bld_base->op_actions[TGSI_OPCODE_CONT].emit = cont_emit;
1628         bld_base->op_actions[TGSI_OPCODE_COS].emit = build_tgsi_intrinsic_nomem;
1629         bld_base->op_actions[TGSI_OPCODE_COS].intr_name = "llvm.cos.f32";
1630         bld_base->op_actions[TGSI_OPCODE_DABS].emit = build_tgsi_intrinsic_nomem;
1631         bld_base->op_actions[TGSI_OPCODE_DABS].intr_name = "llvm.fabs.f64";
1632         bld_base->op_actions[TGSI_OPCODE_DFMA].emit = build_tgsi_intrinsic_nomem;
1633         bld_base->op_actions[TGSI_OPCODE_DFMA].intr_name = "llvm.fma.f64";
1634         bld_base->op_actions[TGSI_OPCODE_DFRAC].emit = emit_frac;
1635         bld_base->op_actions[TGSI_OPCODE_DIV].emit = emit_fdiv;
1636         bld_base->op_actions[TGSI_OPCODE_DNEG].emit = emit_dneg;
1637         bld_base->op_actions[TGSI_OPCODE_DSEQ].emit = emit_dcmp;
1638         bld_base->op_actions[TGSI_OPCODE_DSGE].emit = emit_dcmp;
1639         bld_base->op_actions[TGSI_OPCODE_DSLT].emit = emit_dcmp;
1640         bld_base->op_actions[TGSI_OPCODE_DSNE].emit = emit_dcmp;
1641         bld_base->op_actions[TGSI_OPCODE_DRSQ].emit = build_tgsi_intrinsic_nomem;
1642         bld_base->op_actions[TGSI_OPCODE_DRSQ].intr_name = "llvm.AMDGPU.rsq.f64";
1643         bld_base->op_actions[TGSI_OPCODE_DSQRT].emit = build_tgsi_intrinsic_nomem;
1644         bld_base->op_actions[TGSI_OPCODE_DSQRT].intr_name = "llvm.sqrt.f64";
1645         bld_base->op_actions[TGSI_OPCODE_ELSE].emit = else_emit;
1646         bld_base->op_actions[TGSI_OPCODE_ENDIF].emit = endif_emit;
1647         bld_base->op_actions[TGSI_OPCODE_ENDLOOP].emit = endloop_emit;
1648         bld_base->op_actions[TGSI_OPCODE_EX2].emit = build_tgsi_intrinsic_nomem;
1649         bld_base->op_actions[TGSI_OPCODE_EX2].intr_name =
1650                 HAVE_LLVM >= 0x0308 ? "llvm.exp2.f32" : "llvm.AMDIL.exp.";
1651         bld_base->op_actions[TGSI_OPCODE_FLR].emit = build_tgsi_intrinsic_nomem;
1652         bld_base->op_actions[TGSI_OPCODE_FLR].intr_name = "llvm.floor.f32";
1653         bld_base->op_actions[TGSI_OPCODE_FMA].emit = build_tgsi_intrinsic_nomem;
1654         bld_base->op_actions[TGSI_OPCODE_FMA].intr_name = "llvm.fma.f32";
1655         bld_base->op_actions[TGSI_OPCODE_FRC].emit = emit_frac;
1656         bld_base->op_actions[TGSI_OPCODE_F2I].emit = emit_f2i;
1657         bld_base->op_actions[TGSI_OPCODE_F2U].emit = emit_f2u;
1658         bld_base->op_actions[TGSI_OPCODE_FSEQ].emit = emit_fcmp;
1659         bld_base->op_actions[TGSI_OPCODE_FSGE].emit = emit_fcmp;
1660         bld_base->op_actions[TGSI_OPCODE_FSLT].emit = emit_fcmp;
1661         bld_base->op_actions[TGSI_OPCODE_FSNE].emit = emit_fcmp;
1662         bld_base->op_actions[TGSI_OPCODE_IABS].emit = emit_iabs;
1663         bld_base->op_actions[TGSI_OPCODE_IBFE].emit = build_tgsi_intrinsic_nomem;
1664         bld_base->op_actions[TGSI_OPCODE_IBFE].intr_name = "llvm.AMDGPU.bfe.i32";
1665         bld_base->op_actions[TGSI_OPCODE_IDIV].emit = emit_idiv;
1666         bld_base->op_actions[TGSI_OPCODE_IF].emit = if_emit;
1667         bld_base->op_actions[TGSI_OPCODE_UIF].emit = uif_emit;
1668         bld_base->op_actions[TGSI_OPCODE_IMAX].emit = emit_minmax_int;
1669         bld_base->op_actions[TGSI_OPCODE_IMIN].emit = emit_minmax_int;
1670         bld_base->op_actions[TGSI_OPCODE_IMSB].emit = emit_imsb;
1671         bld_base->op_actions[TGSI_OPCODE_INEG].emit = emit_ineg;
1672         bld_base->op_actions[TGSI_OPCODE_ISHR].emit = emit_ishr;
1673         bld_base->op_actions[TGSI_OPCODE_ISGE].emit = emit_icmp;
1674         bld_base->op_actions[TGSI_OPCODE_ISLT].emit = emit_icmp;
1675         bld_base->op_actions[TGSI_OPCODE_ISSG].emit = emit_ssg;
1676         bld_base->op_actions[TGSI_OPCODE_I2F].emit = emit_i2f;
1677         bld_base->op_actions[TGSI_OPCODE_KILL_IF].fetch_args = kill_if_fetch_args;
1678         bld_base->op_actions[TGSI_OPCODE_KILL_IF].emit = kil_emit;
1679         bld_base->op_actions[TGSI_OPCODE_KILL_IF].intr_name = "llvm.AMDGPU.kill";
1680         bld_base->op_actions[TGSI_OPCODE_KILL].emit = lp_build_tgsi_intrinsic;
1681         bld_base->op_actions[TGSI_OPCODE_KILL].intr_name = "llvm.AMDGPU.kilp";
1682         bld_base->op_actions[TGSI_OPCODE_LSB].emit = emit_lsb;
1683         bld_base->op_actions[TGSI_OPCODE_LG2].emit = build_tgsi_intrinsic_nomem;
1684         bld_base->op_actions[TGSI_OPCODE_LG2].intr_name = "llvm.log2.f32";
1685         bld_base->op_actions[TGSI_OPCODE_MOD].emit = emit_mod;
1686         bld_base->op_actions[TGSI_OPCODE_UMSB].emit = emit_umsb;
1687         bld_base->op_actions[TGSI_OPCODE_NOT].emit = emit_not;
1688         bld_base->op_actions[TGSI_OPCODE_OR].emit = emit_or;
1689         bld_base->op_actions[TGSI_OPCODE_PK2H].fetch_args = pk2h_fetch_args;
1690         bld_base->op_actions[TGSI_OPCODE_PK2H].emit = emit_pk2h;
1691         bld_base->op_actions[TGSI_OPCODE_POPC].emit = build_tgsi_intrinsic_nomem;
1692         bld_base->op_actions[TGSI_OPCODE_POPC].intr_name = "llvm.ctpop.i32";
1693         bld_base->op_actions[TGSI_OPCODE_POW].emit = build_tgsi_intrinsic_nomem;
1694         bld_base->op_actions[TGSI_OPCODE_POW].intr_name = "llvm.pow.f32";
1695         bld_base->op_actions[TGSI_OPCODE_ROUND].emit = build_tgsi_intrinsic_nomem;
1696         bld_base->op_actions[TGSI_OPCODE_ROUND].intr_name = "llvm.rint.f32";
1697         bld_base->op_actions[TGSI_OPCODE_RSQ].emit = emit_rsq;
1698         bld_base->op_actions[TGSI_OPCODE_SGE].emit = emit_set_cond;
1699         bld_base->op_actions[TGSI_OPCODE_SEQ].emit = emit_set_cond;
1700         bld_base->op_actions[TGSI_OPCODE_SHL].emit = emit_shl;
1701         bld_base->op_actions[TGSI_OPCODE_SLE].emit = emit_set_cond;
1702         bld_base->op_actions[TGSI_OPCODE_SLT].emit = emit_set_cond;
1703         bld_base->op_actions[TGSI_OPCODE_SNE].emit = emit_set_cond;
1704         bld_base->op_actions[TGSI_OPCODE_SGT].emit = emit_set_cond;
1705         bld_base->op_actions[TGSI_OPCODE_SIN].emit = build_tgsi_intrinsic_nomem;
1706         bld_base->op_actions[TGSI_OPCODE_SIN].intr_name = "llvm.sin.f32";
1707         bld_base->op_actions[TGSI_OPCODE_SQRT].emit = build_tgsi_intrinsic_nomem;
1708         bld_base->op_actions[TGSI_OPCODE_SQRT].intr_name = "llvm.sqrt.f32";
1709         bld_base->op_actions[TGSI_OPCODE_SSG].emit = emit_ssg;
1710         bld_base->op_actions[TGSI_OPCODE_TRUNC].emit = build_tgsi_intrinsic_nomem;
1711         bld_base->op_actions[TGSI_OPCODE_TRUNC].intr_name = "llvm.trunc.f32";
1712         bld_base->op_actions[TGSI_OPCODE_UADD].emit = emit_uadd;
1713         bld_base->op_actions[TGSI_OPCODE_UBFE].emit = build_tgsi_intrinsic_nomem;
1714         bld_base->op_actions[TGSI_OPCODE_UBFE].intr_name = "llvm.AMDGPU.bfe.u32";
1715         bld_base->op_actions[TGSI_OPCODE_UDIV].emit = emit_udiv;
1716         bld_base->op_actions[TGSI_OPCODE_UMAX].emit = emit_minmax_int;
1717         bld_base->op_actions[TGSI_OPCODE_UMIN].emit = emit_minmax_int;
1718         bld_base->op_actions[TGSI_OPCODE_UMOD].emit = emit_umod;
1719         bld_base->op_actions[TGSI_OPCODE_USEQ].emit = emit_icmp;
1720         bld_base->op_actions[TGSI_OPCODE_USGE].emit = emit_icmp;
1721         bld_base->op_actions[TGSI_OPCODE_USHR].emit = emit_ushr;
1722         bld_base->op_actions[TGSI_OPCODE_USLT].emit = emit_icmp;
1723         bld_base->op_actions[TGSI_OPCODE_USNE].emit = emit_icmp;
1724         bld_base->op_actions[TGSI_OPCODE_U2F].emit = emit_u2f;
1725         bld_base->op_actions[TGSI_OPCODE_XOR].emit = emit_xor;
1726         bld_base->op_actions[TGSI_OPCODE_UCMP].emit = emit_ucmp;
1727         bld_base->op_actions[TGSI_OPCODE_UP2H].fetch_args = up2h_fetch_args;
1728         bld_base->op_actions[TGSI_OPCODE_UP2H].emit = emit_up2h;
1729 }
1730
1731 void radeon_llvm_create_func(struct radeon_llvm_context *ctx,
1732                              LLVMTypeRef *return_types, unsigned num_return_elems,
1733                              LLVMTypeRef *ParamTypes, unsigned ParamCount)
1734 {
1735         LLVMTypeRef main_fn_type, ret_type;
1736         LLVMBasicBlockRef main_fn_body;
1737
1738         if (num_return_elems)
1739                 ret_type = LLVMStructTypeInContext(ctx->gallivm.context,
1740                                                    return_types,
1741                                                    num_return_elems, true);
1742         else
1743                 ret_type = LLVMVoidTypeInContext(ctx->gallivm.context);
1744
1745         /* Setup the function */
1746         ctx->return_type = ret_type;
1747         main_fn_type = LLVMFunctionType(ret_type, ParamTypes, ParamCount, 0);
1748         ctx->main_fn = LLVMAddFunction(ctx->gallivm.module, "main", main_fn_type);
1749         main_fn_body = LLVMAppendBasicBlockInContext(ctx->gallivm.context,
1750                         ctx->main_fn, "main_body");
1751         LLVMPositionBuilderAtEnd(ctx->gallivm.builder, main_fn_body);
1752 }
1753
1754 void radeon_llvm_finalize_module(struct radeon_llvm_context *ctx)
1755 {
1756         struct gallivm_state *gallivm = ctx->soa.bld_base.base.gallivm;
1757         const char *triple = LLVMGetTarget(gallivm->module);
1758         LLVMTargetLibraryInfoRef target_library_info;
1759
1760         /* Create the pass manager */
1761         gallivm->passmgr = LLVMCreateFunctionPassManagerForModule(
1762                                                         gallivm->module);
1763
1764         target_library_info = gallivm_create_target_library_info(triple);
1765         LLVMAddTargetLibraryInfo(target_library_info, gallivm->passmgr);
1766
1767         /* This pass should eliminate all the load and store instructions */
1768         LLVMAddPromoteMemoryToRegisterPass(gallivm->passmgr);
1769
1770         /* Add some optimization passes */
1771         LLVMAddScalarReplAggregatesPass(gallivm->passmgr);
1772         LLVMAddLICMPass(gallivm->passmgr);
1773         LLVMAddAggressiveDCEPass(gallivm->passmgr);
1774         LLVMAddCFGSimplificationPass(gallivm->passmgr);
1775         LLVMAddInstructionCombiningPass(gallivm->passmgr);
1776
1777         /* Run the pass */
1778         LLVMRunFunctionPassManager(gallivm->passmgr, ctx->main_fn);
1779
1780         LLVMDisposeBuilder(gallivm->builder);
1781         LLVMDisposePassManager(gallivm->passmgr);
1782         gallivm_dispose_target_library_info(target_library_info);
1783 }
1784
1785 void radeon_llvm_dispose(struct radeon_llvm_context *ctx)
1786 {
1787         LLVMDisposeModule(ctx->soa.bld_base.base.gallivm->module);
1788         LLVMContextDispose(ctx->soa.bld_base.base.gallivm->context);
1789         FREE(ctx->arrays);
1790         ctx->arrays = NULL;
1791         FREE(ctx->temps);
1792         ctx->temps = NULL;
1793         ctx->temps_count = 0;
1794         FREE(ctx->loop);
1795         ctx->loop = NULL;
1796         ctx->loop_depth_max = 0;
1797         FREE(ctx->branch);
1798         ctx->branch = NULL;
1799         ctx->branch_depth_max = 0;
1800 }