src/gallium/drivers/radeon/radeon_setup_tgsi_llvm.c

   1 /*
   2  * Copyright 2011 Advanced Micro Devices, Inc.
   3  *
   4  * Permission is hereby granted, free of charge, to any person obtaining a
   5  * copy of this software and associated documentation files (the "Software"),
   6  * to deal in the Software without restriction, including without limitation
   7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
   8  * and/or sell copies of the Software, and to permit persons to whom the
   9  * Software is furnished to do so, subject to the following conditions:
  10  *
  11  * The above copyright notice and this permission notice (including the next
  12  * paragraph) shall be included in all copies or substantial portions of the
  13  * Software.
  14  *
  15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  17  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
  18  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
  19  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
  20  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
  21  * SOFTWARE.
  22  *
  23  * Authors: Tom Stellard <thomas.stellard@amd.com>
  24  *
  25  */
  26 #include "radeon_llvm.h"
  27
  28 #include "gallivm/lp_bld_const.h"
  29 #include "gallivm/lp_bld_gather.h"
  30 #include "gallivm/lp_bld_flow.h"
  31 #include "gallivm/lp_bld_init.h"
  32 #include "gallivm/lp_bld_intr.h"
  33 #include "gallivm/lp_bld_misc.h"
  34 #include "gallivm/lp_bld_swizzle.h"
  35 #include "tgsi/tgsi_info.h"
  36 #include "tgsi/tgsi_parse.h"
  37 #include "util/u_math.h"
  38 #include "util/u_memory.h"
  39 #include "util/u_debug.h"
  40
  41 #include <llvm-c/Core.h>
  42 #include <llvm-c/Transforms/Scalar.h>
  43
  44 LLVMTypeRef tgsi2llvmtype(struct lp_build_tgsi_context *bld_base,
  45                           enum tgsi_opcode_type type)
  46 {
  47         LLVMContextRef ctx = bld_base->base.gallivm->context;
  48
  49         switch (type) {
  50         case TGSI_TYPE_UNSIGNED:
  51         case TGSI_TYPE_SIGNED:
  52                 return LLVMInt32TypeInContext(ctx);
  53         case TGSI_TYPE_DOUBLE:
  54                 return LLVMDoubleTypeInContext(ctx);
  55         case TGSI_TYPE_UNTYPED:
  56         case TGSI_TYPE_FLOAT:
  57                 return LLVMFloatTypeInContext(ctx);
  58         default: break;
  59         }
  60         return 0;
  61 }
  62
  63 LLVMValueRef bitcast(struct lp_build_tgsi_context *bld_base,
  64                      enum tgsi_opcode_type type, LLVMValueRef value)
  65 {
  66         LLVMBuilderRef builder = bld_base->base.gallivm->builder;
  67         LLVMTypeRef dst_type = tgsi2llvmtype(bld_base, type);
  68
  69         if (dst_type)
  70                 return LLVMBuildBitCast(builder, value, dst_type, "");
  71         else
  72                 return value;
  73 }
  74
  75 static struct radeon_llvm_loop *get_current_loop(struct radeon_llvm_context *ctx)
  76 {
  77         return ctx->loop_depth > 0 ? ctx->loop + (ctx->loop_depth - 1) : NULL;
  78 }
  79
  80 static struct radeon_llvm_branch *get_current_branch(struct radeon_llvm_context *ctx)
  81 {
  82         return ctx->branch_depth > 0 ?
  83                         ctx->branch + (ctx->branch_depth - 1) : NULL;
  84 }
  85
  86 unsigned radeon_llvm_reg_index_soa(unsigned index, unsigned chan)
  87 {
  88         return (index * 4) + chan;
  89 }
  90
  91 static LLVMValueRef emit_swizzle(struct lp_build_tgsi_context *bld_base,
  92                                  LLVMValueRef value,
  93                                  unsigned swizzle_x,
  94                                  unsigned swizzle_y,
  95                                  unsigned swizzle_z,
  96                                  unsigned swizzle_w)
  97 {
  98         LLVMValueRef swizzles[4];
  99         LLVMTypeRef i32t =
 100                 LLVMInt32TypeInContext(bld_base->base.gallivm->context);
 101
 102         swizzles[0] = LLVMConstInt(i32t, swizzle_x, 0);
 103         swizzles[1] = LLVMConstInt(i32t, swizzle_y, 0);
 104         swizzles[2] = LLVMConstInt(i32t, swizzle_z, 0);
 105         swizzles[3] = LLVMConstInt(i32t, swizzle_w, 0);
 106
 107         return LLVMBuildShuffleVector(bld_base->base.gallivm->builder,
 108                                       value,
 109                                       LLVMGetUndef(LLVMTypeOf(value)),
 110                                       LLVMConstVector(swizzles, 4), "");
 111 }
 112
 113 static struct tgsi_declaration_range
 114 get_array_range(struct lp_build_tgsi_context *bld_base,
 115                 unsigned File, unsigned reg_index,
 116                 const struct tgsi_ind_register *reg)
 117 {
 118         struct radeon_llvm_context *ctx = radeon_llvm_context(bld_base);
 119
 120         if (!reg) {
 121                 unsigned i;
 122                 unsigned num_arrays = bld_base->info->array_max[TGSI_FILE_TEMPORARY];
 123                 for (i = 0; i < num_arrays; i++) {
 124                         const struct tgsi_declaration_range *range =
 125                                                 &ctx->arrays[i].range;
 126
 127                         if (reg_index >= range->First && reg_index <= range->Last) {
 128                                 return ctx->arrays[i].range;
 129                         }
 130                 }
 131         }
 132
 133         if (File != TGSI_FILE_TEMPORARY || !reg || reg->ArrayID == 0 ||
 134             reg->ArrayID > bld_base->info->array_max[TGSI_FILE_TEMPORARY]) {
 135                 struct tgsi_declaration_range range;
 136                 range.First = 0;
 137                 range.Last = bld_base->info->file_max[File];
 138                 return range;
 139         }
 140
 141         return ctx->arrays[reg->ArrayID - 1].range;
 142 }
 143
 144 static LLVMValueRef get_alloca_for_array(struct lp_build_tgsi_context *bld_base,
 145                                          unsigned file,
 146                                          unsigned index)
 147 {
 148         unsigned i;
 149         unsigned num_arrays;
 150         struct radeon_llvm_context *ctx = radeon_llvm_context(bld_base);
 151
 152         if (file != TGSI_FILE_TEMPORARY)
 153                 return NULL;
 154
 155         num_arrays = bld_base->info->array_max[TGSI_FILE_TEMPORARY];
 156         for (i = 0; i < num_arrays; i++) {
 157                 const struct tgsi_declaration_range *range =
 158                                                 &ctx->arrays[i].range;
 159
 160                 if (index >= range->First && index <= range->Last) {
 161                         return ctx->arrays[i].alloca;
 162                 }
 163         }
 164         return NULL;
 165 }
 166
 167 static LLVMValueRef
 168 emit_array_index(struct lp_build_tgsi_soa_context *bld,
 169                  const struct tgsi_ind_register *reg,
 170                  unsigned offset)
 171 {
 172         struct gallivm_state *gallivm = bld->bld_base.base.gallivm;
 173
 174         if (!reg) {
 175                 return lp_build_const_int32(gallivm, offset);
 176         }
 177         LLVMValueRef addr = LLVMBuildLoad(gallivm->builder, bld->addr[reg->Index][reg->Swizzle], "");
 178         return LLVMBuildAdd(gallivm->builder, addr, lp_build_const_int32(gallivm, offset), "");
 179 }
 180
 181 LLVMValueRef
 182 radeon_llvm_emit_fetch_64bit(struct lp_build_tgsi_context *bld_base,
 183                              enum tgsi_opcode_type type,
 184                              LLVMValueRef ptr,
 185                              LLVMValueRef ptr2)
 186 {
 187         LLVMBuilderRef builder = bld_base->base.gallivm->builder;
 188         LLVMValueRef result;
 189
 190         result = LLVMGetUndef(LLVMVectorType(LLVMIntTypeInContext(bld_base->base.gallivm->context, 32), bld_base->base.type.length * 2));
 191
 192         result = LLVMBuildInsertElement(builder,
 193                                         result,
 194                                         bitcast(bld_base, TGSI_TYPE_UNSIGNED, ptr),
 195                                         bld_base->int_bld.zero, "");
 196         result = LLVMBuildInsertElement(builder,
 197                                         result,
 198                                         bitcast(bld_base, TGSI_TYPE_UNSIGNED, ptr2),
 199                                         bld_base->int_bld.one, "");
 200         return bitcast(bld_base, type, result);
 201 }
 202
 203 static LLVMValueRef
 204 emit_array_fetch(struct lp_build_tgsi_context *bld_base,
 205                  unsigned File, enum tgsi_opcode_type type,
 206                  struct tgsi_declaration_range range,
 207                  unsigned swizzle)
 208 {
 209         struct lp_build_tgsi_soa_context *bld = lp_soa_context(bld_base);
 210         struct gallivm_state *gallivm = bld->bld_base.base.gallivm;
 211         LLVMBuilderRef builder = bld_base->base.gallivm->builder;
 212
 213         unsigned i, size = range.Last - range.First + 1;
 214         LLVMTypeRef vec = LLVMVectorType(tgsi2llvmtype(bld_base, type), size);
 215         LLVMValueRef result = LLVMGetUndef(vec);
 216
 217         struct tgsi_full_src_register tmp_reg = {};
 218         tmp_reg.Register.File = File;
 219
 220         for (i = 0; i < size; ++i) {
 221                 tmp_reg.Register.Index = i + range.First;
 222                 LLVMValueRef temp = radeon_llvm_emit_fetch(bld_base, &tmp_reg, type, swizzle);
 223                 result = LLVMBuildInsertElement(builder, result, temp,
 224                         lp_build_const_int32(gallivm, i), "array_vector");
 225         }
 226         return result;
 227 }
 228
 229 static LLVMValueRef
 230 load_value_from_array(struct lp_build_tgsi_context *bld_base,
 231                       unsigned file,
 232                       enum tgsi_opcode_type type,
 233                       unsigned swizzle,
 234                       unsigned reg_index,
 235                       const struct tgsi_ind_register *reg_indirect)
 236 {
 237         struct lp_build_tgsi_soa_context *bld = lp_soa_context(bld_base);
 238         struct gallivm_state *gallivm = bld_base->base.gallivm;
 239         LLVMBuilderRef builder = gallivm->builder;
 240         struct tgsi_declaration_range range = get_array_range(bld_base, file, reg_index, reg_indirect);
 241         LLVMValueRef index = emit_array_index(bld, reg_indirect, reg_index - range.First);
 242         LLVMValueRef array = get_alloca_for_array(bld_base, file, reg_index);
 243         LLVMValueRef ptr, val, indices[2];
 244
 245         if (!array) {
 246                 /* Handle the case where the array is stored as a vector. */
 247                 return LLVMBuildExtractElement(builder,
 248                                 emit_array_fetch(bld_base, file, type, range, swizzle),
 249                                 index, "");
 250         }
 251
 252         index = LLVMBuildMul(builder, index, lp_build_const_int32(gallivm, TGSI_NUM_CHANNELS), "");
 253         index = LLVMBuildAdd(builder, index, lp_build_const_int32(gallivm, swizzle), "");
 254         indices[0] = bld_base->uint_bld.zero;
 255         indices[1] = index;
 256         ptr = LLVMBuildGEP(builder, array, indices, 2, "");
 257         val = LLVMBuildLoad(builder, ptr, "");
 258         if (tgsi_type_is_64bit(type)) {
 259                 LLVMValueRef ptr_hi, val_hi;
 260                 indices[0] = lp_build_const_int32(gallivm, 1);
 261                 ptr_hi = LLVMBuildGEP(builder, ptr, indices, 1, "");
 262                 val_hi = LLVMBuildLoad(builder, ptr_hi, "");
 263                 val = radeon_llvm_emit_fetch_64bit(bld_base, type, val, val_hi);
 264
 265         }
 266         return val;
 267 }
 268
 269 static LLVMValueRef
 270 store_value_to_array(struct lp_build_tgsi_context *bld_base,
 271                      LLVMValueRef value,
 272                      unsigned file,
 273                      unsigned chan_index,
 274                      unsigned reg_index,
 275                      const struct tgsi_ind_register *reg_indirect)
 276 {
 277         struct lp_build_tgsi_soa_context *bld = lp_soa_context(bld_base);
 278         struct gallivm_state *gallivm = bld_base->base.gallivm;
 279         LLVMBuilderRef builder = gallivm->builder;
 280         struct tgsi_declaration_range range = get_array_range(bld_base, file, reg_index, reg_indirect);
 281         LLVMValueRef index = emit_array_index(bld, reg_indirect, reg_index - range.First);
 282         LLVMValueRef array = get_alloca_for_array(bld_base, file, reg_index);
 283
 284         if (array) {
 285                 LLVMValueRef indices[2];
 286                 index = LLVMBuildMul(builder, index, lp_build_const_int32(gallivm, TGSI_NUM_CHANNELS), "");
 287                 index = LLVMBuildAdd(builder, index, lp_build_const_int32(gallivm, chan_index), "");
 288                 indices[0] = bld_base->uint_bld.zero;
 289                 indices[1] = index;
 290                 LLVMValueRef pointer = LLVMBuildGEP(builder, array, indices, 2, "");
 291                 LLVMBuildStore(builder, value, pointer);
 292                 return NULL;
 293         } else {
 294                 return LLVMBuildInsertElement(builder,
 295                                 emit_array_fetch(bld_base, file, TGSI_TYPE_FLOAT, range, chan_index),
 296                                 value, index, "");
 297         }
 298 }
 299
 300 LLVMValueRef radeon_llvm_emit_fetch(struct lp_build_tgsi_context *bld_base,
 301                                     const struct tgsi_full_src_register *reg,
 302                                     enum tgsi_opcode_type type,
 303                                     unsigned swizzle)
 304 {
 305         struct radeon_llvm_context *ctx = radeon_llvm_context(bld_base);
 306         struct lp_build_tgsi_soa_context *bld = lp_soa_context(bld_base);
 307         LLVMBuilderRef builder = bld_base->base.gallivm->builder;
 308         LLVMValueRef result = NULL, ptr, ptr2;
 309
 310         if (swizzle == ~0) {
 311                 LLVMValueRef values[TGSI_NUM_CHANNELS];
 312                 unsigned chan;
 313                 for (chan = 0; chan < TGSI_NUM_CHANNELS; chan++) {
 314                         values[chan] = radeon_llvm_emit_fetch(bld_base, reg, type, chan);
 315                 }
 316                 return lp_build_gather_values(bld_base->base.gallivm, values,
 317                                               TGSI_NUM_CHANNELS);
 318         }
 319
 320         if (reg->Register.Indirect) {
 321                 LLVMValueRef load = load_value_from_array(bld_base, reg->Register.File, type,
 322                                 swizzle, reg->Register.Index, &reg->Indirect);
 323                 return bitcast(bld_base, type, load);
 324         }
 325
 326         switch(reg->Register.File) {
 327         case TGSI_FILE_IMMEDIATE: {
 328                 LLVMTypeRef ctype = tgsi2llvmtype(bld_base, type);
 329                 if (tgsi_type_is_64bit(type)) {
 330                         result = LLVMGetUndef(LLVMVectorType(LLVMIntTypeInContext(bld_base->base.gallivm->context, 32), bld_base->base.type.length * 2));
 331                         result = LLVMConstInsertElement(result,
 332                                                         bld->immediates[reg->Register.Index][swizzle],
 333                                                         bld_base->int_bld.zero);
 334                         result = LLVMConstInsertElement(result,
 335                                                         bld->immediates[reg->Register.Index][swizzle + 1],
 336                                                         bld_base->int_bld.one);
 337                         return LLVMConstBitCast(result, ctype);
 338                 } else {
 339                         return LLVMConstBitCast(bld->immediates[reg->Register.Index][swizzle], ctype);
 340                 }
 341         }
 342
 343         case TGSI_FILE_INPUT:
 344                 result = ctx->inputs[radeon_llvm_reg_index_soa(reg->Register.Index, swizzle)];
 345                 if (tgsi_type_is_64bit(type)) {
 346                         ptr = result;
 347                         ptr2 = ctx->inputs[radeon_llvm_reg_index_soa(reg->Register.Index, swizzle + 1)];
 348                         return radeon_llvm_emit_fetch_64bit(bld_base, type, ptr, ptr2);
 349                 }
 350                 break;
 351
 352         case TGSI_FILE_TEMPORARY:
 353                 if (reg->Register.Index >= ctx->temps_count)
 354                         return LLVMGetUndef(tgsi2llvmtype(bld_base, type));
 355                 ptr = ctx->temps[reg->Register.Index * TGSI_NUM_CHANNELS + swizzle];
 356                 if (tgsi_type_is_64bit(type)) {
 357                         ptr2 = ctx->temps[reg->Register.Index * TGSI_NUM_CHANNELS + swizzle + 1];
 358                         return radeon_llvm_emit_fetch_64bit(bld_base, type,
 359                                                  LLVMBuildLoad(builder, ptr, ""),
 360                                                  LLVMBuildLoad(builder, ptr2, ""));
 361                 }
 362                 LLVMValueRef array = get_alloca_for_array(bld_base, reg->Register.File, reg->Register.Index);
 363                 if (array) {
 364                         return bitcast(bld_base, type, load_value_from_array(bld_base, reg->Register.File, type,
 365                                         swizzle, reg->Register.Index, NULL));
 366                 }
 367                 result = LLVMBuildLoad(builder, ptr, "");
 368                 break;
 369
 370         case TGSI_FILE_OUTPUT:
 371                 ptr = lp_get_output_ptr(bld, reg->Register.Index, swizzle);
 372                 if (tgsi_type_is_64bit(type)) {
 373                         ptr2 = lp_get_output_ptr(bld, reg->Register.Index, swizzle + 1);
 374                         return radeon_llvm_emit_fetch_64bit(bld_base, type,
 375                                                  LLVMBuildLoad(builder, ptr, ""),
 376                                                  LLVMBuildLoad(builder, ptr2, ""));
 377                 }
 378                 result = LLVMBuildLoad(builder, ptr, "");
 379                 break;
 380
 381         default:
 382                 return LLVMGetUndef(tgsi2llvmtype(bld_base, type));
 383         }
 384
 385         return bitcast(bld_base, type, result);
 386 }
 387
 388 static LLVMValueRef fetch_system_value(struct lp_build_tgsi_context *bld_base,
 389                                        const struct tgsi_full_src_register *reg,
 390                                        enum tgsi_opcode_type type,
 391                                        unsigned swizzle)
 392 {
 393         struct radeon_llvm_context *ctx = radeon_llvm_context(bld_base);
 394         struct gallivm_state *gallivm = bld_base->base.gallivm;
 395
 396         LLVMValueRef cval = ctx->system_values[reg->Register.Index];
 397         if (LLVMGetTypeKind(LLVMTypeOf(cval)) == LLVMVectorTypeKind) {
 398                 cval = LLVMBuildExtractElement(gallivm->builder, cval,
 399                                                lp_build_const_int32(gallivm, swizzle), "");
 400         }
 401         return bitcast(bld_base, type, cval);
 402 }
 403
 404 static LLVMValueRef si_build_alloca_undef(struct gallivm_state *gallivm,
 405                                           LLVMTypeRef type,
 406                                           const char *name)
 407 {
 408         LLVMValueRef ptr = lp_build_alloca(gallivm, type, name);
 409         LLVMBuildStore(gallivm->builder, LLVMGetUndef(type), ptr);
 410         return ptr;
 411 }
 412
 413 static void emit_declaration(struct lp_build_tgsi_context *bld_base,
 414                              const struct tgsi_full_declaration *decl)
 415 {
 416         struct radeon_llvm_context *ctx = radeon_llvm_context(bld_base);
 417         LLVMBuilderRef builder = bld_base->base.gallivm->builder;
 418         unsigned first, last, i, idx;
 419         switch(decl->Declaration.File) {
 420         case TGSI_FILE_ADDRESS:
 421         {
 422                  unsigned idx;
 423                 for (idx = decl->Range.First; idx <= decl->Range.Last; idx++) {
 424                         unsigned chan;
 425                         for (chan = 0; chan < TGSI_NUM_CHANNELS; chan++) {
 426                                  ctx->soa.addr[idx][chan] = si_build_alloca_undef(
 427                                         &ctx->gallivm,
 428                                         ctx->soa.bld_base.uint_bld.elem_type, "");
 429                         }
 430                 }
 431                 break;
 432         }
 433
 434         case TGSI_FILE_TEMPORARY:
 435         {
 436                 unsigned decl_size;
 437                 first = decl->Range.First;
 438                 last = decl->Range.Last;
 439                 decl_size = 4 * ((last - first) + 1);
 440                 if (decl->Declaration.Array) {
 441                         unsigned id = decl->Array.ArrayID - 1;
 442                         if (!ctx->arrays) {
 443                                 int size = bld_base->info->array_max[TGSI_FILE_TEMPORARY];
 444                                 ctx->arrays = CALLOC(size, sizeof(ctx->arrays[0]));
 445                         for (i = 0; i < size; ++i) {
 446                                 assert(!ctx->arrays[i].alloca);}
 447                         }
 448
 449                         ctx->arrays[id].range = decl->Range;
 450
 451                         /* If the array is more than 16 elements (each element
 452                          * is 32-bits), then store it in a vector.  Storing the
 453                          * array in a vector will causes the compiler to store
 454                          * the array in registers and access it using indirect
 455                          * addressing.  16 is number of vector elements that
 456                          * LLVM will store in a register.
 457                          * FIXME: We shouldn't need to do this.  LLVM should be
 458                          * smart enough to promote allocas int registers when
 459                          * profitable.
 460                          */
 461                         if (decl_size > 16) {
 462                                 ctx->arrays[id].alloca = LLVMBuildAlloca(builder,
 463                                         LLVMArrayType(bld_base->base.vec_type, decl_size),"array");
 464                         }
 465                 }
 466                 first = decl->Range.First;
 467                 last = decl->Range.Last;
 468                 if (!ctx->temps_count) {
 469                         ctx->temps_count = bld_base->info->file_max[TGSI_FILE_TEMPORARY] + 1;
 470                         ctx->temps = MALLOC(TGSI_NUM_CHANNELS * ctx->temps_count * sizeof(LLVMValueRef));
 471                 }
 472                 for (idx = first; idx <= last; idx++) {
 473                         for (i = 0; i < TGSI_NUM_CHANNELS; i++) {
 474                                 ctx->temps[idx * TGSI_NUM_CHANNELS + i] =
 475                                         si_build_alloca_undef(bld_base->base.gallivm,
 476                                                               bld_base->base.vec_type,
 477                                                               "temp");
 478                         }
 479                 }
 480                 break;
 481         }
 482         case TGSI_FILE_INPUT:
 483         {
 484                 unsigned idx;
 485                 for (idx = decl->Range.First; idx <= decl->Range.Last; idx++) {
 486                         if (ctx->load_input)
 487                                 ctx->load_input(ctx, idx, decl);
 488                 }
 489         }
 490         break;
 491
 492         case TGSI_FILE_SYSTEM_VALUE:
 493         {
 494                 unsigned idx;
 495                 for (idx = decl->Range.First; idx <= decl->Range.Last; idx++) {
 496                         ctx->load_system_value(ctx, idx, decl);
 497                 }
 498         }
 499         break;
 500
 501         case TGSI_FILE_OUTPUT:
 502         {
 503                 unsigned idx;
 504                 for (idx = decl->Range.First; idx <= decl->Range.Last; idx++) {
 505                         unsigned chan;
 506                         assert(idx < RADEON_LLVM_MAX_OUTPUTS);
 507                         for (chan = 0; chan < TGSI_NUM_CHANNELS; chan++) {
 508                                 ctx->soa.outputs[idx][chan] = si_build_alloca_undef(
 509                                         &ctx->gallivm,
 510                                         ctx->soa.bld_base.base.elem_type, "");
 511                         }
 512                 }
 513                 break;
 514         }
 515
 516         case TGSI_FILE_MEMORY:
 517                 ctx->declare_memory_region(ctx, decl);
 518                 break;
 519
 520         default:
 521                 break;
 522         }
 523 }
 524
 525 LLVMValueRef radeon_llvm_saturate(struct lp_build_tgsi_context *bld_base,
 526                                   LLVMValueRef value)
 527 {
 528         struct lp_build_emit_data clamp_emit_data;
 529
 530         memset(&clamp_emit_data, 0, sizeof(clamp_emit_data));
 531         clamp_emit_data.arg_count = 3;
 532         clamp_emit_data.args[0] = value;
 533         clamp_emit_data.args[2] = bld_base->base.one;
 534         clamp_emit_data.args[1] = bld_base->base.zero;
 535
 536         return lp_build_emit_llvm(bld_base, TGSI_OPCODE_CLAMP,
 537                                   &clamp_emit_data);
 538 }
 539
 540 void radeon_llvm_emit_store(struct lp_build_tgsi_context *bld_base,
 541                             const struct tgsi_full_instruction *inst,
 542                             const struct tgsi_opcode_info *info,
 543                             LLVMValueRef dst[4])
 544 {
 545         struct radeon_llvm_context *ctx = radeon_llvm_context(bld_base);
 546         struct lp_build_tgsi_soa_context *bld = lp_soa_context(bld_base);
 547         struct gallivm_state *gallivm = bld->bld_base.base.gallivm;
 548         const struct tgsi_full_dst_register *reg = &inst->Dst[0];
 549         LLVMBuilderRef builder = bld->bld_base.base.gallivm->builder;
 550         LLVMValueRef temp_ptr, temp_ptr2 = NULL;
 551         unsigned chan, chan_index;
 552         bool is_vec_store = false;
 553         enum tgsi_opcode_type dtype = tgsi_opcode_infer_dst_type(inst->Instruction.Opcode);
 554
 555         if (dst[0]) {
 556                 LLVMTypeKind k = LLVMGetTypeKind(LLVMTypeOf(dst[0]));
 557                 is_vec_store = (k == LLVMVectorTypeKind);
 558         }
 559
 560         if (is_vec_store) {
 561                 LLVMValueRef values[4] = {};
 562                 TGSI_FOR_EACH_DST0_ENABLED_CHANNEL(inst, chan) {
 563                         LLVMValueRef index = lp_build_const_int32(gallivm, chan);
 564                         values[chan]  = LLVMBuildExtractElement(gallivm->builder,
 565                                                         dst[0], index, "");
 566                 }
 567                 bld_base->emit_store(bld_base, inst, info, values);
 568                 return;
 569         }
 570
 571         TGSI_FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
 572                 LLVMValueRef value = dst[chan_index];
 573
 574                 if (tgsi_type_is_64bit(dtype) && (chan_index == 1 || chan_index == 3))
 575                         continue;
 576                 if (inst->Instruction.Saturate)
 577                         value = radeon_llvm_saturate(bld_base, value);
 578
 579                 if (reg->Register.File == TGSI_FILE_ADDRESS) {
 580                         temp_ptr = bld->addr[reg->Register.Index][chan_index];
 581                         LLVMBuildStore(builder, value, temp_ptr);
 582                         continue;
 583                 }
 584
 585                 if (!tgsi_type_is_64bit(dtype))
 586                         value = bitcast(bld_base, TGSI_TYPE_FLOAT, value);
 587
 588                 if (reg->Register.Indirect) {
 589                         struct tgsi_declaration_range range = get_array_range(bld_base,
 590                                 reg->Register.File, reg->Register.Index, &reg->Indirect);
 591
 592                         unsigned i, size = range.Last - range.First + 1;
 593                         unsigned file = reg->Register.File;
 594                         unsigned reg_index = reg->Register.Index;
 595                         LLVMValueRef array = store_value_to_array(bld_base, value, file, chan_index,
 596                                                                   reg_index, &reg->Indirect);
 597                         if (get_alloca_for_array(bld_base, file, reg_index)) {
 598                                 continue;
 599                         }
 600                         for (i = 0; i < size; ++i) {
 601                                 switch(reg->Register.File) {
 602                                 case TGSI_FILE_OUTPUT:
 603                                         temp_ptr = bld->outputs[i + range.First][chan_index];
 604                                         break;
 605
 606                                 case TGSI_FILE_TEMPORARY:
 607                                         if (range.First + i >= ctx->temps_count)
 608                                                 continue;
 609                                         temp_ptr = ctx->temps[(i + range.First) * TGSI_NUM_CHANNELS + chan_index];
 610                                         break;
 611
 612                                 default:
 613                                         continue;
 614                                 }
 615                                 value = LLVMBuildExtractElement(builder, array,
 616                                         lp_build_const_int32(gallivm, i), "");
 617                                 LLVMBuildStore(builder, value, temp_ptr);
 618                         }
 619
 620                 } else {
 621                         switch(reg->Register.File) {
 622                         case TGSI_FILE_OUTPUT:
 623                                 temp_ptr = bld->outputs[reg->Register.Index][chan_index];
 624                                 if (tgsi_type_is_64bit(dtype))
 625                                         temp_ptr2 = bld->outputs[reg->Register.Index][chan_index + 1];
 626                                 break;
 627
 628                         case TGSI_FILE_TEMPORARY:
 629                         {
 630                                 LLVMValueRef array;
 631                                 if (reg->Register.Index >= ctx->temps_count)
 632                                         continue;
 633                                 array = get_alloca_for_array(bld_base, reg->Register.File, reg->Register.Index);
 634
 635                                 if (array) {
 636                                         store_value_to_array(bld_base, value, reg->Register.File, chan_index, reg->Register.Index,
 637                                                                 NULL);
 638                                         continue;
 639                                 }
 640                                 temp_ptr = ctx->temps[ TGSI_NUM_CHANNELS * reg->Register.Index + chan_index];
 641                                 if (tgsi_type_is_64bit(dtype))
 642                                         temp_ptr2 = ctx->temps[ TGSI_NUM_CHANNELS * reg->Register.Index + chan_index + 1];
 643
 644                                 break;
 645                         }
 646                         default:
 647                                 return;
 648                         }
 649                         if (!tgsi_type_is_64bit(dtype))
 650                                 LLVMBuildStore(builder, value, temp_ptr);
 651                         else {
 652                                 LLVMValueRef ptr = LLVMBuildBitCast(builder, value,
 653                                                                     LLVMVectorType(LLVMIntTypeInContext(bld_base->base.gallivm->context, 32), 2), "");
 654                                 LLVMValueRef val2;
 655                                 value = LLVMBuildExtractElement(builder, ptr,
 656                                                                 bld_base->uint_bld.zero, "");
 657                                 val2 = LLVMBuildExtractElement(builder, ptr,
 658                                                                 bld_base->uint_bld.one, "");
 659
 660                                 LLVMBuildStore(builder, bitcast(bld_base, TGSI_TYPE_FLOAT, value), temp_ptr);
 661                                 LLVMBuildStore(builder, bitcast(bld_base, TGSI_TYPE_FLOAT, val2), temp_ptr2);
 662                         }
 663                 }
 664         }
 665 }
 666
 667 static void bgnloop_emit(const struct lp_build_tgsi_action *action,
 668                          struct lp_build_tgsi_context *bld_base,
 669                          struct lp_build_emit_data *emit_data)
 670 {
 671         struct radeon_llvm_context *ctx = radeon_llvm_context(bld_base);
 672         struct gallivm_state *gallivm = bld_base->base.gallivm;
 673         LLVMBasicBlockRef loop_block;
 674         LLVMBasicBlockRef endloop_block;
 675         endloop_block = LLVMAppendBasicBlockInContext(gallivm->context,
 676                                                 ctx->main_fn, "ENDLOOP");
 677         loop_block = LLVMInsertBasicBlockInContext(gallivm->context,
 678                                                 endloop_block, "LOOP");
 679         LLVMBuildBr(gallivm->builder, loop_block);
 680         LLVMPositionBuilderAtEnd(gallivm->builder, loop_block);
 681
 682         if (++ctx->loop_depth > ctx->loop_depth_max) {
 683                 unsigned new_max = ctx->loop_depth_max << 1;
 684
 685                 if (!new_max)
 686                         new_max = RADEON_LLVM_INITIAL_CF_DEPTH;
 687
 688                 ctx->loop = REALLOC(ctx->loop, ctx->loop_depth_max *
 689                                     sizeof(ctx->loop[0]),
 690                                     new_max * sizeof(ctx->loop[0]));
 691                 ctx->loop_depth_max = new_max;
 692         }
 693
 694         ctx->loop[ctx->loop_depth - 1].loop_block = loop_block;
 695         ctx->loop[ctx->loop_depth - 1].endloop_block = endloop_block;
 696 }
 697
 698 static void brk_emit(const struct lp_build_tgsi_action *action,
 699                      struct lp_build_tgsi_context *bld_base,
 700                      struct lp_build_emit_data *emit_data)
 701 {
 702         struct radeon_llvm_context *ctx = radeon_llvm_context(bld_base);
 703         struct gallivm_state *gallivm = bld_base->base.gallivm;
 704         struct radeon_llvm_loop *current_loop = get_current_loop(ctx);
 705
 706         LLVMBuildBr(gallivm->builder, current_loop->endloop_block);
 707 }
 708
 709 static void cont_emit(const struct lp_build_tgsi_action *action,
 710                       struct lp_build_tgsi_context *bld_base,
 711                       struct lp_build_emit_data *emit_data)
 712 {
 713         struct radeon_llvm_context *ctx = radeon_llvm_context(bld_base);
 714         struct gallivm_state *gallivm = bld_base->base.gallivm;
 715         struct radeon_llvm_loop *current_loop = get_current_loop(ctx);
 716
 717         LLVMBuildBr(gallivm->builder, current_loop->loop_block);
 718 }
 719
 720 static void else_emit(const struct lp_build_tgsi_action *action,
 721                       struct lp_build_tgsi_context *bld_base,
 722                       struct lp_build_emit_data *emit_data)
 723 {
 724         struct radeon_llvm_context *ctx = radeon_llvm_context(bld_base);
 725         struct gallivm_state *gallivm = bld_base->base.gallivm;
 726         struct radeon_llvm_branch *current_branch = get_current_branch(ctx);
 727         LLVMBasicBlockRef current_block = LLVMGetInsertBlock(gallivm->builder);
 728
 729         /* We need to add a terminator to the current block if the previous
 730          * instruction was an ENDIF.Example:
 731          * IF
 732          *   [code]
 733          *   IF
 734          *     [code]
 735          *   ELSE
 736          *    [code]
 737          *   ENDIF <--
 738          * ELSE<--
 739          *   [code]
 740          * ENDIF
 741          */
 742
 743         if (current_block != current_branch->if_block) {
 744                 LLVMBuildBr(gallivm->builder, current_branch->endif_block);
 745         }
 746         if (!LLVMGetBasicBlockTerminator(current_branch->if_block)) {
 747                 LLVMBuildBr(gallivm->builder, current_branch->endif_block);
 748         }
 749         current_branch->has_else = 1;
 750         LLVMPositionBuilderAtEnd(gallivm->builder, current_branch->else_block);
 751 }
 752
 753 static void endif_emit(const struct lp_build_tgsi_action *action,
 754                        struct lp_build_tgsi_context *bld_base,
 755                        struct lp_build_emit_data *emit_data)
 756 {
 757         struct radeon_llvm_context *ctx = radeon_llvm_context(bld_base);
 758         struct gallivm_state *gallivm = bld_base->base.gallivm;
 759         struct radeon_llvm_branch *current_branch = get_current_branch(ctx);
 760         LLVMBasicBlockRef current_block = LLVMGetInsertBlock(gallivm->builder);
 761
 762         /* If we have consecutive ENDIF instructions, then the first ENDIF
 763          * will not have a terminator, so we need to add one. */
 764         if (current_block != current_branch->if_block
 765                         && current_block != current_branch->else_block
 766                         && !LLVMGetBasicBlockTerminator(current_block)) {
 767
 768                  LLVMBuildBr(gallivm->builder, current_branch->endif_block);
 769         }
 770         if (!LLVMGetBasicBlockTerminator(current_branch->else_block)) {
 771                 LLVMPositionBuilderAtEnd(gallivm->builder, current_branch->else_block);
 772                 LLVMBuildBr(gallivm->builder, current_branch->endif_block);
 773         }
 774
 775         if (!LLVMGetBasicBlockTerminator(current_branch->if_block)) {
 776                 LLVMPositionBuilderAtEnd(gallivm->builder, current_branch->if_block);
 777                 LLVMBuildBr(gallivm->builder, current_branch->endif_block);
 778         }
 779
 780         LLVMPositionBuilderAtEnd(gallivm->builder, current_branch->endif_block);
 781         ctx->branch_depth--;
 782 }
 783
 784 static void endloop_emit(const struct lp_build_tgsi_action *action,
 785                          struct lp_build_tgsi_context *bld_base,
 786                          struct lp_build_emit_data *emit_data)
 787 {
 788         struct radeon_llvm_context *ctx = radeon_llvm_context(bld_base);
 789         struct gallivm_state *gallivm = bld_base->base.gallivm;
 790         struct radeon_llvm_loop *current_loop = get_current_loop(ctx);
 791
 792         if (!LLVMGetBasicBlockTerminator(LLVMGetInsertBlock(gallivm->builder))) {
 793                  LLVMBuildBr(gallivm->builder, current_loop->loop_block);
 794         }
 795
 796         LLVMPositionBuilderAtEnd(gallivm->builder, current_loop->endloop_block);
 797         ctx->loop_depth--;
 798 }
 799
 800 static void if_cond_emit(const struct lp_build_tgsi_action *action,
 801                          struct lp_build_tgsi_context *bld_base,
 802                          struct lp_build_emit_data *emit_data,
 803                          LLVMValueRef cond)
 804 {
 805         struct radeon_llvm_context *ctx = radeon_llvm_context(bld_base);
 806         struct gallivm_state *gallivm = bld_base->base.gallivm;
 807         LLVMBasicBlockRef if_block, else_block, endif_block;
 808
 809         endif_block = LLVMAppendBasicBlockInContext(gallivm->context,
 810                                                 ctx->main_fn, "ENDIF");
 811         if_block = LLVMInsertBasicBlockInContext(gallivm->context,
 812                                                 endif_block, "IF");
 813         else_block = LLVMInsertBasicBlockInContext(gallivm->context,
 814                                                 endif_block, "ELSE");
 815         LLVMBuildCondBr(gallivm->builder, cond, if_block, else_block);
 816         LLVMPositionBuilderAtEnd(gallivm->builder, if_block);
 817
 818         if (++ctx->branch_depth > ctx->branch_depth_max) {
 819                 unsigned new_max = ctx->branch_depth_max << 1;
 820
 821                 if (!new_max)
 822                         new_max = RADEON_LLVM_INITIAL_CF_DEPTH;
 823
 824                 ctx->branch = REALLOC(ctx->branch, ctx->branch_depth_max *
 825                                       sizeof(ctx->branch[0]),
 826                                       new_max * sizeof(ctx->branch[0]));
 827                 ctx->branch_depth_max = new_max;
 828         }
 829
 830         ctx->branch[ctx->branch_depth - 1].endif_block = endif_block;
 831         ctx->branch[ctx->branch_depth - 1].if_block = if_block;
 832         ctx->branch[ctx->branch_depth - 1].else_block = else_block;
 833         ctx->branch[ctx->branch_depth - 1].has_else = 0;
 834 }
 835
 836 static void if_emit(const struct lp_build_tgsi_action *action,
 837                     struct lp_build_tgsi_context *bld_base,
 838                     struct lp_build_emit_data *emit_data)
 839 {
 840         struct gallivm_state *gallivm = bld_base->base.gallivm;
 841         LLVMValueRef cond;
 842
 843         cond = LLVMBuildFCmp(gallivm->builder, LLVMRealUNE,
 844                         emit_data->args[0],
 845                         bld_base->base.zero, "");
 846
 847         if_cond_emit(action, bld_base, emit_data, cond);
 848 }
 849
 850 static void uif_emit(const struct lp_build_tgsi_action *action,
 851                      struct lp_build_tgsi_context *bld_base,
 852                      struct lp_build_emit_data *emit_data)
 853 {
 854         struct gallivm_state *gallivm = bld_base->base.gallivm;
 855         LLVMValueRef cond;
 856
 857         cond = LLVMBuildICmp(gallivm->builder, LLVMIntNE,
 858                 bitcast(bld_base, TGSI_TYPE_UNSIGNED, emit_data->args[0]),
 859                         bld_base->int_bld.zero, "");
 860
 861         if_cond_emit(action, bld_base, emit_data, cond);
 862 }
 863
 864 static void kill_if_fetch_args(struct lp_build_tgsi_context *bld_base,
 865                                struct lp_build_emit_data *emit_data)
 866 {
 867         const struct tgsi_full_instruction *inst = emit_data->inst;
 868         struct gallivm_state *gallivm = bld_base->base.gallivm;
 869         LLVMBuilderRef builder = gallivm->builder;
 870         unsigned i;
 871         LLVMValueRef conds[TGSI_NUM_CHANNELS];
 872
 873         for (i = 0; i < TGSI_NUM_CHANNELS; i++) {
 874                 LLVMValueRef value = lp_build_emit_fetch(bld_base, inst, 0, i);
 875                 conds[i] = LLVMBuildFCmp(builder, LLVMRealOLT, value,
 876                                         bld_base->base.zero, "");
 877         }
 878
 879         /* Or the conditions together */
 880         for (i = TGSI_NUM_CHANNELS - 1; i > 0; i--) {
 881                 conds[i - 1] = LLVMBuildOr(builder, conds[i], conds[i - 1], "");
 882         }
 883
 884         emit_data->dst_type = LLVMVoidTypeInContext(gallivm->context);
 885         emit_data->arg_count = 1;
 886         emit_data->args[0] = LLVMBuildSelect(builder, conds[0],
 887                                         lp_build_const_float(gallivm, -1.0f),
 888                                         bld_base->base.zero, "");
 889 }
 890
 891 static void kil_emit(const struct lp_build_tgsi_action *action,
 892                      struct lp_build_tgsi_context *bld_base,
 893                      struct lp_build_emit_data *emit_data)
 894 {
 895         unsigned i;
 896         for (i = 0; i < emit_data->arg_count; i++) {
 897                 emit_data->output[i] = lp_build_intrinsic_unary(
 898                         bld_base->base.gallivm->builder,
 899                         action->intr_name,
 900                         emit_data->dst_type, emit_data->args[i]);
 901         }
 902 }
 903
 904 static void radeon_llvm_cube_to_2d_coords(struct lp_build_tgsi_context *bld_base,
 905                                           LLVMValueRef *in, LLVMValueRef *out)
 906 {
 907         struct gallivm_state *gallivm = bld_base->base.gallivm;
 908         LLVMBuilderRef builder = gallivm->builder;
 909         LLVMTypeRef type = bld_base->base.elem_type;
 910         LLVMValueRef coords[4];
 911         LLVMValueRef mad_args[3];
 912         LLVMValueRef v, cube_vec;
 913         unsigned i;
 914
 915         cube_vec = lp_build_gather_values(bld_base->base.gallivm, in, 4);
 916         v = lp_build_intrinsic(builder, "llvm.AMDGPU.cube", LLVMVectorType(type, 4),
 917                             &cube_vec, 1, LLVMReadNoneAttribute);
 918
 919         for (i = 0; i < 4; ++i)
 920                 coords[i] = LLVMBuildExtractElement(builder, v,
 921                                                     lp_build_const_int32(gallivm, i), "");
 922
 923         coords[2] = lp_build_intrinsic(builder, "llvm.fabs.f32",
 924                         type, &coords[2], 1, LLVMReadNoneAttribute);
 925         coords[2] = lp_build_emit_llvm_unary(bld_base, TGSI_OPCODE_RCP, coords[2]);
 926
 927         mad_args[1] = coords[2];
 928         mad_args[2] = LLVMConstReal(type, 1.5);
 929
 930         mad_args[0] = coords[0];
 931         coords[0] = lp_build_emit_llvm_ternary(bld_base, TGSI_OPCODE_MAD,
 932                         mad_args[0], mad_args[1], mad_args[2]);
 933
 934         mad_args[0] = coords[1];
 935         coords[1] = lp_build_emit_llvm_ternary(bld_base, TGSI_OPCODE_MAD,
 936                         mad_args[0], mad_args[1], mad_args[2]);
 937
 938         /* apply xyz = yxw swizzle to cooords */
 939         out[0] = coords[1];
 940         out[1] = coords[0];
 941         out[2] = coords[3];
 942 }
 943
 944 void radeon_llvm_emit_prepare_cube_coords(struct lp_build_tgsi_context *bld_base,
 945                                           struct lp_build_emit_data *emit_data,
 946                                           LLVMValueRef *coords_arg,
 947                                           LLVMValueRef *derivs_arg)
 948 {
 949
 950         unsigned target = emit_data->inst->Texture.Texture;
 951         unsigned opcode = emit_data->inst->Instruction.Opcode;
 952         struct gallivm_state *gallivm = bld_base->base.gallivm;
 953         LLVMBuilderRef builder = gallivm->builder;
 954         LLVMValueRef coords[4];
 955         unsigned i;
 956
 957         radeon_llvm_cube_to_2d_coords(bld_base, coords_arg, coords);
 958
 959         if (opcode == TGSI_OPCODE_TXD && derivs_arg) {
 960                 LLVMValueRef derivs[4];
 961                 int axis;
 962
 963                 /* Convert cube derivatives to 2D derivatives. */
 964                 for (axis = 0; axis < 2; axis++) {
 965                         LLVMValueRef shifted_cube_coords[4], shifted_coords[4];
 966
 967                         /* Shift the cube coordinates by the derivatives to get
 968                          * the cube coordinates of the "neighboring pixel".
 969                          */
 970                         for (i = 0; i < 3; i++)
 971                                 shifted_cube_coords[i] =
 972                                         LLVMBuildFAdd(builder, coords_arg[i],
 973                                                       derivs_arg[axis*3+i], "");
 974                         shifted_cube_coords[3] = LLVMGetUndef(bld_base->base.elem_type);
 975
 976                         /* Project the shifted cube coordinates onto the face. */
 977                         radeon_llvm_cube_to_2d_coords(bld_base, shifted_cube_coords,
 978                                                       shifted_coords);
 979
 980                         /* Subtract both sets of 2D coordinates to get 2D derivatives.
 981                          * This won't work if the shifted coordinates ended up
 982                          * in a different face.
 983                          */
 984                         for (i = 0; i < 2; i++)
 985                                 derivs[axis * 2 + i] =
 986                                         LLVMBuildFSub(builder, shifted_coords[i],
 987                                                       coords[i], "");
 988                 }
 989
 990                 memcpy(derivs_arg, derivs, sizeof(derivs));
 991         }
 992
 993         if (target == TGSI_TEXTURE_CUBE_ARRAY ||
 994             target == TGSI_TEXTURE_SHADOWCUBE_ARRAY) {
 995                 /* for cube arrays coord.z = coord.w(array_index) * 8 + face */
 996                 /* coords_arg.w component - array_index for cube arrays */
 997                 coords[2] = lp_build_emit_llvm_ternary(bld_base, TGSI_OPCODE_MAD,
 998                                                        coords_arg[3], lp_build_const_float(gallivm, 8.0), coords[2]);
 999         }
1000
1001         /* Preserve compare/lod/bias. Put it in coords.w. */
1002         if (opcode == TGSI_OPCODE_TEX2 ||
1003             opcode == TGSI_OPCODE_TXB2 ||
1004             opcode == TGSI_OPCODE_TXL2) {
1005                 coords[3] = coords_arg[4];
1006         } else if (opcode == TGSI_OPCODE_TXB ||
1007                    opcode == TGSI_OPCODE_TXL ||
1008                    target == TGSI_TEXTURE_SHADOWCUBE) {
1009                 coords[3] = coords_arg[3];
1010         }
1011
1012         memcpy(coords_arg, coords, sizeof(coords));
1013 }
1014
1015 static void emit_icmp(const struct lp_build_tgsi_action *action,
1016                       struct lp_build_tgsi_context *bld_base,
1017                       struct lp_build_emit_data *emit_data)
1018 {
1019         unsigned pred;
1020         LLVMBuilderRef builder = bld_base->base.gallivm->builder;
1021         LLVMContextRef context = bld_base->base.gallivm->context;
1022
1023         switch (emit_data->inst->Instruction.Opcode) {
1024         case TGSI_OPCODE_USEQ: pred = LLVMIntEQ; break;
1025         case TGSI_OPCODE_USNE: pred = LLVMIntNE; break;
1026         case TGSI_OPCODE_USGE: pred = LLVMIntUGE; break;
1027         case TGSI_OPCODE_USLT: pred = LLVMIntULT; break;
1028         case TGSI_OPCODE_ISGE: pred = LLVMIntSGE; break;
1029         case TGSI_OPCODE_ISLT: pred = LLVMIntSLT; break;
1030         default:
1031                 assert(!"unknown instruction");
1032                 pred = 0;
1033                 break;
1034         }
1035
1036         LLVMValueRef v = LLVMBuildICmp(builder, pred,
1037                         emit_data->args[0], emit_data->args[1],"");
1038
1039         v = LLVMBuildSExtOrBitCast(builder, v,
1040                         LLVMInt32TypeInContext(context), "");
1041
1042         emit_data->output[emit_data->chan] = v;
1043 }
1044
1045 static void emit_ucmp(const struct lp_build_tgsi_action *action,
1046                       struct lp_build_tgsi_context *bld_base,
1047                       struct lp_build_emit_data *emit_data)
1048 {
1049         LLVMBuilderRef builder = bld_base->base.gallivm->builder;
1050
1051         LLVMValueRef arg0 = LLVMBuildBitCast(builder, emit_data->args[0],
1052                                              bld_base->uint_bld.elem_type, "");
1053
1054         LLVMValueRef v = LLVMBuildICmp(builder, LLVMIntNE, arg0,
1055                                        bld_base->uint_bld.zero, "");
1056
1057         emit_data->output[emit_data->chan] =
1058                 LLVMBuildSelect(builder, v, emit_data->args[1], emit_data->args[2], "");
1059 }
1060
1061 static void emit_cmp(const struct lp_build_tgsi_action *action,
1062                      struct lp_build_tgsi_context *bld_base,
1063                      struct lp_build_emit_data *emit_data)
1064 {
1065         LLVMBuilderRef builder = bld_base->base.gallivm->builder;
1066         LLVMValueRef cond, *args = emit_data->args;
1067
1068         cond = LLVMBuildFCmp(builder, LLVMRealOLT, args[0],
1069                              bld_base->base.zero, "");
1070
1071         emit_data->output[emit_data->chan] =
1072                 LLVMBuildSelect(builder, cond, args[1], args[2], "");
1073 }
1074
1075 static void emit_set_cond(const struct lp_build_tgsi_action *action,
1076                           struct lp_build_tgsi_context *bld_base,
1077                           struct lp_build_emit_data *emit_data)
1078 {
1079         LLVMBuilderRef builder = bld_base->base.gallivm->builder;
1080         LLVMRealPredicate pred;
1081         LLVMValueRef cond;
1082
1083         /* Use ordered for everything but NE (which is usual for
1084          * float comparisons)
1085          */
1086         switch (emit_data->inst->Instruction.Opcode) {
1087         case TGSI_OPCODE_SGE: pred = LLVMRealOGE; break;
1088         case TGSI_OPCODE_SEQ: pred = LLVMRealOEQ; break;
1089         case TGSI_OPCODE_SLE: pred = LLVMRealOLE; break;
1090         case TGSI_OPCODE_SLT: pred = LLVMRealOLT; break;
1091         case TGSI_OPCODE_SNE: pred = LLVMRealUNE; break;
1092         case TGSI_OPCODE_SGT: pred = LLVMRealOGT; break;
1093         default: assert(!"unknown instruction"); pred = 0; break;
1094         }
1095
1096         cond = LLVMBuildFCmp(builder,
1097                 pred, emit_data->args[0], emit_data->args[1], "");
1098
1099         emit_data->output[emit_data->chan] = LLVMBuildSelect(builder,
1100                 cond, bld_base->base.one, bld_base->base.zero, "");
1101 }
1102
1103 static void emit_fcmp(const struct lp_build_tgsi_action *action,
1104                       struct lp_build_tgsi_context *bld_base,
1105                       struct lp_build_emit_data *emit_data)
1106 {
1107         LLVMBuilderRef builder = bld_base->base.gallivm->builder;
1108         LLVMContextRef context = bld_base->base.gallivm->context;
1109         LLVMRealPredicate pred;
1110
1111         /* Use ordered for everything but NE (which is usual for
1112          * float comparisons)
1113          */
1114         switch (emit_data->inst->Instruction.Opcode) {
1115         case TGSI_OPCODE_FSEQ: pred = LLVMRealOEQ; break;
1116         case TGSI_OPCODE_FSGE: pred = LLVMRealOGE; break;
1117         case TGSI_OPCODE_FSLT: pred = LLVMRealOLT; break;
1118         case TGSI_OPCODE_FSNE: pred = LLVMRealUNE; break;
1119         default: assert(!"unknown instruction"); pred = 0; break;
1120         }
1121
1122         LLVMValueRef v = LLVMBuildFCmp(builder, pred,
1123                         emit_data->args[0], emit_data->args[1],"");
1124
1125         v = LLVMBuildSExtOrBitCast(builder, v,
1126                         LLVMInt32TypeInContext(context), "");
1127
1128         emit_data->output[emit_data->chan] = v;
1129 }
1130
1131 static void emit_dcmp(const struct lp_build_tgsi_action *action,
1132                       struct lp_build_tgsi_context *bld_base,
1133                       struct lp_build_emit_data *emit_data)
1134 {
1135         LLVMBuilderRef builder = bld_base->base.gallivm->builder;
1136         LLVMContextRef context = bld_base->base.gallivm->context;
1137         LLVMRealPredicate pred;
1138
1139         /* Use ordered for everything but NE (which is usual for
1140          * float comparisons)
1141          */
1142         switch (emit_data->inst->Instruction.Opcode) {
1143         case TGSI_OPCODE_DSEQ: pred = LLVMRealOEQ; break;
1144         case TGSI_OPCODE_DSGE: pred = LLVMRealOGE; break;
1145         case TGSI_OPCODE_DSLT: pred = LLVMRealOLT; break;
1146         case TGSI_OPCODE_DSNE: pred = LLVMRealUNE; break;
1147         default: assert(!"unknown instruction"); pred = 0; break;
1148         }
1149
1150         LLVMValueRef v = LLVMBuildFCmp(builder, pred,
1151                         emit_data->args[0], emit_data->args[1],"");
1152
1153         v = LLVMBuildSExtOrBitCast(builder, v,
1154                         LLVMInt32TypeInContext(context), "");
1155
1156         emit_data->output[emit_data->chan] = v;
1157 }
1158
1159 static void emit_not(const struct lp_build_tgsi_action *action,
1160                      struct lp_build_tgsi_context *bld_base,
1161                      struct lp_build_emit_data *emit_data)
1162 {
1163         LLVMBuilderRef builder = bld_base->base.gallivm->builder;
1164         LLVMValueRef v = bitcast(bld_base, TGSI_TYPE_UNSIGNED,
1165                         emit_data->args[0]);
1166         emit_data->output[emit_data->chan] = LLVMBuildNot(builder, v, "");
1167 }
1168
1169 static void emit_arl(const struct lp_build_tgsi_action *action,
1170                      struct lp_build_tgsi_context *bld_base,
1171                      struct lp_build_emit_data *emit_data)
1172 {
1173         LLVMBuilderRef builder = bld_base->base.gallivm->builder;
1174         LLVMValueRef floor_index =  lp_build_emit_llvm_unary(bld_base, TGSI_OPCODE_FLR, emit_data->args[0]);
1175         emit_data->output[emit_data->chan] = LLVMBuildFPToSI(builder,
1176                         floor_index, bld_base->base.int_elem_type , "");
1177 }
1178
1179 static void emit_and(const struct lp_build_tgsi_action *action,
1180                      struct lp_build_tgsi_context *bld_base,
1181                      struct lp_build_emit_data *emit_data)
1182 {
1183         LLVMBuilderRef builder = bld_base->base.gallivm->builder;
1184         emit_data->output[emit_data->chan] = LLVMBuildAnd(builder,
1185                         emit_data->args[0], emit_data->args[1], "");
1186 }
1187
1188 static void emit_or(const struct lp_build_tgsi_action *action,
1189                     struct lp_build_tgsi_context *bld_base,
1190                     struct lp_build_emit_data *emit_data)
1191 {
1192         LLVMBuilderRef builder = bld_base->base.gallivm->builder;
1193         emit_data->output[emit_data->chan] = LLVMBuildOr(builder,
1194                         emit_data->args[0], emit_data->args[1], "");
1195 }
1196
1197 static void emit_uadd(const struct lp_build_tgsi_action *action,
1198                       struct lp_build_tgsi_context *bld_base,
1199                       struct lp_build_emit_data *emit_data)
1200 {
1201         LLVMBuilderRef builder = bld_base->base.gallivm->builder;
1202         emit_data->output[emit_data->chan] = LLVMBuildAdd(builder,
1203                         emit_data->args[0], emit_data->args[1], "");
1204 }
1205
1206 static void emit_udiv(const struct lp_build_tgsi_action *action,
1207                       struct lp_build_tgsi_context *bld_base,
1208                       struct lp_build_emit_data *emit_data)
1209 {
1210         LLVMBuilderRef builder = bld_base->base.gallivm->builder;
1211         emit_data->output[emit_data->chan] = LLVMBuildUDiv(builder,
1212                         emit_data->args[0], emit_data->args[1], "");
1213 }
1214
1215 static void emit_idiv(const struct lp_build_tgsi_action *action,
1216                       struct lp_build_tgsi_context *bld_base,
1217                       struct lp_build_emit_data *emit_data)
1218 {
1219         LLVMBuilderRef builder = bld_base->base.gallivm->builder;
1220         emit_data->output[emit_data->chan] = LLVMBuildSDiv(builder,
1221                         emit_data->args[0], emit_data->args[1], "");
1222 }
1223
1224 static void emit_mod(const struct lp_build_tgsi_action *action,
1225                      struct lp_build_tgsi_context *bld_base,
1226                      struct lp_build_emit_data *emit_data)
1227 {
1228         LLVMBuilderRef builder = bld_base->base.gallivm->builder;
1229         emit_data->output[emit_data->chan] = LLVMBuildSRem(builder,
1230                         emit_data->args[0], emit_data->args[1], "");
1231 }
1232
1233 static void emit_umod(const struct lp_build_tgsi_action *action,
1234                       struct lp_build_tgsi_context *bld_base,
1235                       struct lp_build_emit_data *emit_data)
1236 {
1237         LLVMBuilderRef builder = bld_base->base.gallivm->builder;
1238         emit_data->output[emit_data->chan] = LLVMBuildURem(builder,
1239                         emit_data->args[0], emit_data->args[1], "");
1240 }
1241
1242 static void emit_shl(const struct lp_build_tgsi_action *action,
1243                      struct lp_build_tgsi_context *bld_base,
1244                      struct lp_build_emit_data *emit_data)
1245 {
1246         LLVMBuilderRef builder = bld_base->base.gallivm->builder;
1247         emit_data->output[emit_data->chan] = LLVMBuildShl(builder,
1248                         emit_data->args[0], emit_data->args[1], "");
1249 }
1250
1251 static void emit_ushr(const struct lp_build_tgsi_action *action,
1252                       struct lp_build_tgsi_context *bld_base,
1253                       struct lp_build_emit_data *emit_data)
1254 {
1255         LLVMBuilderRef builder = bld_base->base.gallivm->builder;
1256         emit_data->output[emit_data->chan] = LLVMBuildLShr(builder,
1257                         emit_data->args[0], emit_data->args[1], "");
1258 }
1259 static void emit_ishr(const struct lp_build_tgsi_action *action,
1260                       struct lp_build_tgsi_context *bld_base,
1261                       struct lp_build_emit_data *emit_data)
1262 {
1263         LLVMBuilderRef builder = bld_base->base.gallivm->builder;
1264         emit_data->output[emit_data->chan] = LLVMBuildAShr(builder,
1265                         emit_data->args[0], emit_data->args[1], "");
1266 }
1267
1268 static void emit_xor(const struct lp_build_tgsi_action *action,
1269                      struct lp_build_tgsi_context *bld_base,
1270                      struct lp_build_emit_data *emit_data)
1271 {
1272         LLVMBuilderRef builder = bld_base->base.gallivm->builder;
1273         emit_data->output[emit_data->chan] = LLVMBuildXor(builder,
1274                         emit_data->args[0], emit_data->args[1], "");
1275 }
1276
1277 static void emit_ssg(const struct lp_build_tgsi_action *action,
1278                      struct lp_build_tgsi_context *bld_base,
1279                      struct lp_build_emit_data *emit_data)
1280 {
1281         LLVMBuilderRef builder = bld_base->base.gallivm->builder;
1282
1283         LLVMValueRef cmp, val;
1284
1285         if (emit_data->inst->Instruction.Opcode == TGSI_OPCODE_ISSG) {
1286                 cmp = LLVMBuildICmp(builder, LLVMIntSGT, emit_data->args[0], bld_base->int_bld.zero, "");
1287                 val = LLVMBuildSelect(builder, cmp, bld_base->int_bld.one, emit_data->args[0], "");
1288                 cmp = LLVMBuildICmp(builder, LLVMIntSGE, val, bld_base->int_bld.zero, "");
1289                 val = LLVMBuildSelect(builder, cmp, val, LLVMConstInt(bld_base->int_bld.elem_type, -1, true), "");
1290         } else { // float SSG
1291                 cmp = LLVMBuildFCmp(builder, LLVMRealOGT, emit_data->args[0], bld_base->base.zero, "");
1292                 val = LLVMBuildSelect(builder, cmp, bld_base->base.one, emit_data->args[0], "");
1293                 cmp = LLVMBuildFCmp(builder, LLVMRealOGE, val, bld_base->base.zero, "");
1294                 val = LLVMBuildSelect(builder, cmp, val, LLVMConstReal(bld_base->base.elem_type, -1), "");
1295         }
1296
1297         emit_data->output[emit_data->chan] = val;
1298 }
1299
1300 static void emit_ineg(const struct lp_build_tgsi_action *action,
1301                       struct lp_build_tgsi_context *bld_base,
1302                       struct lp_build_emit_data *emit_data)
1303 {
1304         LLVMBuilderRef builder = bld_base->base.gallivm->builder;
1305         emit_data->output[emit_data->chan] = LLVMBuildNeg(builder,
1306                         emit_data->args[0], "");
1307 }
1308
1309 static void emit_dneg(const struct lp_build_tgsi_action *action,
1310                       struct lp_build_tgsi_context *bld_base,
1311                       struct lp_build_emit_data *emit_data)
1312 {
1313         LLVMBuilderRef builder = bld_base->base.gallivm->builder;
1314         emit_data->output[emit_data->chan] = LLVMBuildFNeg(builder,
1315                         emit_data->args[0], "");
1316 }
1317
1318 static void emit_frac(const struct lp_build_tgsi_action *action,
1319                       struct lp_build_tgsi_context *bld_base,
1320                       struct lp_build_emit_data *emit_data)
1321 {
1322         LLVMBuilderRef builder = bld_base->base.gallivm->builder;
1323         char *intr;
1324
1325         if (emit_data->info->opcode == TGSI_OPCODE_FRC)
1326                 intr = "llvm.floor.f32";
1327         else if (emit_data->info->opcode == TGSI_OPCODE_DFRAC)
1328                 intr = "llvm.floor.f64";
1329         else {
1330                 assert(0);
1331                 return;
1332         }
1333
1334         LLVMValueRef floor = lp_build_intrinsic(builder, intr, emit_data->dst_type,
1335                                                 &emit_data->args[0], 1,
1336                                                 LLVMReadNoneAttribute);
1337         emit_data->output[emit_data->chan] = LLVMBuildFSub(builder,
1338                         emit_data->args[0], floor, "");
1339 }
1340
1341 static void emit_f2i(const struct lp_build_tgsi_action *action,
1342                      struct lp_build_tgsi_context *bld_base,
1343                      struct lp_build_emit_data *emit_data)
1344 {
1345         LLVMBuilderRef builder = bld_base->base.gallivm->builder;
1346         emit_data->output[emit_data->chan] = LLVMBuildFPToSI(builder,
1347                         emit_data->args[0], bld_base->int_bld.elem_type, "");
1348 }
1349
1350 static void emit_f2u(const struct lp_build_tgsi_action *action,
1351                      struct lp_build_tgsi_context *bld_base,
1352                      struct lp_build_emit_data *emit_data)
1353 {
1354         LLVMBuilderRef builder = bld_base->base.gallivm->builder;
1355         emit_data->output[emit_data->chan] = LLVMBuildFPToUI(builder,
1356                         emit_data->args[0], bld_base->uint_bld.elem_type, "");
1357 }
1358
1359 static void emit_i2f(const struct lp_build_tgsi_action *action,
1360                      struct lp_build_tgsi_context *bld_base,
1361                      struct lp_build_emit_data *emit_data)
1362 {
1363         LLVMBuilderRef builder = bld_base->base.gallivm->builder;
1364         emit_data->output[emit_data->chan] = LLVMBuildSIToFP(builder,
1365                         emit_data->args[0], bld_base->base.elem_type, "");
1366 }
1367
1368 static void emit_u2f(const struct lp_build_tgsi_action *action,
1369                      struct lp_build_tgsi_context *bld_base,
1370                      struct lp_build_emit_data *emit_data)
1371 {
1372         LLVMBuilderRef builder = bld_base->base.gallivm->builder;
1373         emit_data->output[emit_data->chan] = LLVMBuildUIToFP(builder,
1374                         emit_data->args[0], bld_base->base.elem_type, "");
1375 }
1376
1377 static void emit_immediate(struct lp_build_tgsi_context *bld_base,
1378                            const struct tgsi_full_immediate *imm)
1379 {
1380         unsigned i;
1381         struct radeon_llvm_context *ctx = radeon_llvm_context(bld_base);
1382
1383         for (i = 0; i < 4; ++i) {
1384                 ctx->soa.immediates[ctx->soa.num_immediates][i] =
1385                                 LLVMConstInt(bld_base->uint_bld.elem_type, imm->u[i].Uint, false   );
1386         }
1387
1388         ctx->soa.num_immediates++;
1389 }
1390
1391 void
1392 build_tgsi_intrinsic_nomem(const struct lp_build_tgsi_action *action,
1393                            struct lp_build_tgsi_context *bld_base,
1394                            struct lp_build_emit_data *emit_data)
1395 {
1396         struct lp_build_context *base = &bld_base->base;
1397         emit_data->output[emit_data->chan] =
1398                 lp_build_intrinsic(base->gallivm->builder, action->intr_name,
1399                                    emit_data->dst_type, emit_data->args,
1400                                    emit_data->arg_count, LLVMReadNoneAttribute);
1401 }
1402
1403 static void emit_bfi(const struct lp_build_tgsi_action *action,
1404                      struct lp_build_tgsi_context *bld_base,
1405                      struct lp_build_emit_data *emit_data)
1406 {
1407         struct gallivm_state *gallivm = bld_base->base.gallivm;
1408         LLVMBuilderRef builder = gallivm->builder;
1409         LLVMValueRef bfi_args[3];
1410
1411         // Calculate the bitmask: (((1 << src3) - 1) << src2
1412         bfi_args[0] = LLVMBuildShl(builder,
1413                                    LLVMBuildSub(builder,
1414                                                 LLVMBuildShl(builder,
1415                                                              bld_base->int_bld.one,
1416                                                              emit_data->args[3], ""),
1417                                                 bld_base->int_bld.one, ""),
1418                                    emit_data->args[2], "");
1419
1420         bfi_args[1] = LLVMBuildShl(builder, emit_data->args[1],
1421                                    emit_data->args[2], "");
1422
1423         bfi_args[2] = emit_data->args[0];
1424
1425         /* Calculate:
1426          *   (arg0 & arg1) | (~arg0 & arg2) = arg2 ^ (arg0 & (arg1 ^ arg2)
1427          * Use the right-hand side, which the LLVM backend can convert to V_BFI.
1428          */
1429         emit_data->output[emit_data->chan] =
1430                 LLVMBuildXor(builder, bfi_args[2],
1431                         LLVMBuildAnd(builder, bfi_args[0],
1432                                 LLVMBuildXor(builder, bfi_args[1], bfi_args[2],
1433                                              ""), ""), "");
1434 }
1435
1436 /* this is ffs in C */
1437 static void emit_lsb(const struct lp_build_tgsi_action *action,
1438                      struct lp_build_tgsi_context *bld_base,
1439                      struct lp_build_emit_data *emit_data)
1440 {
1441         struct gallivm_state *gallivm = bld_base->base.gallivm;
1442         LLVMValueRef args[2] = {
1443                 emit_data->args[0],
1444
1445                 /* The value of 1 means that ffs(x=0) = undef, so LLVM won't
1446                  * add special code to check for x=0. The reason is that
1447                  * the LLVM behavior for x=0 is different from what we
1448                  * need here.
1449                  *
1450                  * The hardware already implements the correct behavior.
1451                  */
1452                 lp_build_const_int32(gallivm, 1)
1453         };
1454
1455         emit_data->output[emit_data->chan] =
1456                 lp_build_intrinsic(gallivm->builder, "llvm.cttz.i32",
1457                                 emit_data->dst_type, args, ARRAY_SIZE(args),
1458                                 LLVMReadNoneAttribute);
1459 }
1460
1461 /* Find the last bit set. */
1462 static void emit_umsb(const struct lp_build_tgsi_action *action,
1463                       struct lp_build_tgsi_context *bld_base,
1464                       struct lp_build_emit_data *emit_data)
1465 {
1466         struct gallivm_state *gallivm = bld_base->base.gallivm;
1467         LLVMBuilderRef builder = gallivm->builder;
1468         LLVMValueRef args[2] = {
1469                 emit_data->args[0],
1470                 /* Don't generate code for handling zero: */
1471                 lp_build_const_int32(gallivm, 1)
1472         };
1473
1474         LLVMValueRef msb =
1475                 lp_build_intrinsic(builder, "llvm.ctlz.i32",
1476                                 emit_data->dst_type, args, ARRAY_SIZE(args),
1477                                 LLVMReadNoneAttribute);
1478
1479         /* The HW returns the last bit index from MSB, but TGSI wants
1480          * the index from LSB. Invert it by doing "31 - msb". */
1481         msb = LLVMBuildSub(builder, lp_build_const_int32(gallivm, 31),
1482                            msb, "");
1483
1484         /* Check for zero: */
1485         emit_data->output[emit_data->chan] =
1486                 LLVMBuildSelect(builder,
1487                                 LLVMBuildICmp(builder, LLVMIntEQ, args[0],
1488                                               bld_base->uint_bld.zero, ""),
1489                                 lp_build_const_int32(gallivm, -1), msb, "");
1490 }
1491
1492 /* Find the last bit opposite of the sign bit. */
1493 static void emit_imsb(const struct lp_build_tgsi_action *action,
1494                       struct lp_build_tgsi_context *bld_base,
1495                       struct lp_build_emit_data *emit_data)
1496 {
1497         struct gallivm_state *gallivm = bld_base->base.gallivm;
1498         LLVMBuilderRef builder = gallivm->builder;
1499         LLVMValueRef arg = emit_data->args[0];
1500
1501         LLVMValueRef msb =
1502                 lp_build_intrinsic(builder, "llvm.AMDGPU.flbit.i32",
1503                                 emit_data->dst_type, &arg, 1,
1504                                 LLVMReadNoneAttribute);
1505
1506         /* The HW returns the last bit index from MSB, but TGSI wants
1507          * the index from LSB. Invert it by doing "31 - msb". */
1508         msb = LLVMBuildSub(builder, lp_build_const_int32(gallivm, 31),
1509                            msb, "");
1510
1511         /* If arg == 0 || arg == -1 (0xffffffff), return -1. */
1512         LLVMValueRef all_ones = lp_build_const_int32(gallivm, -1);
1513
1514         LLVMValueRef cond =
1515                 LLVMBuildOr(builder,
1516                             LLVMBuildICmp(builder, LLVMIntEQ, arg,
1517                                           bld_base->uint_bld.zero, ""),
1518                             LLVMBuildICmp(builder, LLVMIntEQ, arg,
1519                                           all_ones, ""), "");
1520
1521         emit_data->output[emit_data->chan] =
1522                 LLVMBuildSelect(builder, cond, all_ones, msb, "");
1523 }
1524
1525 static void emit_iabs(const struct lp_build_tgsi_action *action,
1526                       struct lp_build_tgsi_context *bld_base,
1527                       struct lp_build_emit_data *emit_data)
1528 {
1529         LLVMBuilderRef builder = bld_base->base.gallivm->builder;
1530
1531         emit_data->output[emit_data->chan] =
1532                 lp_build_emit_llvm_binary(bld_base, TGSI_OPCODE_IMAX,
1533                                           emit_data->args[0],
1534                                           LLVMBuildNeg(builder,
1535                                                        emit_data->args[0], ""));
1536 }
1537
1538 static void emit_minmax_int(const struct lp_build_tgsi_action *action,
1539                             struct lp_build_tgsi_context *bld_base,
1540                             struct lp_build_emit_data *emit_data)
1541 {
1542         LLVMBuilderRef builder = bld_base->base.gallivm->builder;
1543         LLVMIntPredicate op;
1544
1545         switch (emit_data->info->opcode) {
1546         default:
1547                 assert(0);
1548         case TGSI_OPCODE_IMAX:
1549                 op = LLVMIntSGT;
1550                 break;
1551         case TGSI_OPCODE_IMIN:
1552                 op = LLVMIntSLT;
1553                 break;
1554         case TGSI_OPCODE_UMAX:
1555                 op = LLVMIntUGT;
1556                 break;
1557         case TGSI_OPCODE_UMIN:
1558                 op = LLVMIntULT;
1559                 break;
1560         }
1561
1562         emit_data->output[emit_data->chan] =
1563                 LLVMBuildSelect(builder,
1564                                 LLVMBuildICmp(builder, op, emit_data->args[0],
1565                                               emit_data->args[1], ""),
1566                                 emit_data->args[0],
1567                                 emit_data->args[1], "");
1568 }
1569
1570 static void pk2h_fetch_args(struct lp_build_tgsi_context *bld_base,
1571                             struct lp_build_emit_data *emit_data)
1572 {
1573         emit_data->args[0] = lp_build_emit_fetch(bld_base, emit_data->inst,
1574                                                  0, TGSI_CHAN_X);
1575         emit_data->args[1] = lp_build_emit_fetch(bld_base, emit_data->inst,
1576                                                  0, TGSI_CHAN_Y);
1577 }
1578
1579 static void emit_pk2h(const struct lp_build_tgsi_action *action,
1580                       struct lp_build_tgsi_context *bld_base,
1581                       struct lp_build_emit_data *emit_data)
1582 {
1583         LLVMBuilderRef builder = bld_base->base.gallivm->builder;
1584         LLVMContextRef context = bld_base->base.gallivm->context;
1585         struct lp_build_context *uint_bld = &bld_base->uint_bld;
1586         LLVMTypeRef fp16, i16;
1587         LLVMValueRef const16, comp[2];
1588         unsigned i;
1589
1590         fp16 = LLVMHalfTypeInContext(context);
1591         i16 = LLVMInt16TypeInContext(context);
1592         const16 = lp_build_const_int32(uint_bld->gallivm, 16);
1593
1594         for (i = 0; i < 2; i++) {
1595                 comp[i] = LLVMBuildFPTrunc(builder, emit_data->args[i], fp16, "");
1596                 comp[i] = LLVMBuildBitCast(builder, comp[i], i16, "");
1597                 comp[i] = LLVMBuildZExt(builder, comp[i], uint_bld->elem_type, "");
1598         }
1599
1600         comp[1] = LLVMBuildShl(builder, comp[1], const16, "");
1601         comp[0] = LLVMBuildOr(builder, comp[0], comp[1], "");
1602
1603         emit_data->output[emit_data->chan] = comp[0];
1604 }
1605
1606 static void up2h_fetch_args(struct lp_build_tgsi_context *bld_base,
1607                             struct lp_build_emit_data *emit_data)
1608 {
1609         emit_data->args[0] = lp_build_emit_fetch(bld_base, emit_data->inst,
1610                                                  0, TGSI_CHAN_X);
1611 }
1612
1613 static void emit_up2h(const struct lp_build_tgsi_action *action,
1614                       struct lp_build_tgsi_context *bld_base,
1615                       struct lp_build_emit_data *emit_data)
1616 {
1617         LLVMBuilderRef builder = bld_base->base.gallivm->builder;
1618         LLVMContextRef context = bld_base->base.gallivm->context;
1619         struct lp_build_context *uint_bld = &bld_base->uint_bld;
1620         LLVMTypeRef fp16, i16;
1621         LLVMValueRef const16, input, val;
1622         unsigned i;
1623
1624         fp16 = LLVMHalfTypeInContext(context);
1625         i16 = LLVMInt16TypeInContext(context);
1626         const16 = lp_build_const_int32(uint_bld->gallivm, 16);
1627         input = emit_data->args[0];
1628
1629         for (i = 0; i < 2; i++) {
1630                 val = i == 1 ? LLVMBuildLShr(builder, input, const16, "") : input;
1631                 val = LLVMBuildTrunc(builder, val, i16, "");
1632                 val = LLVMBuildBitCast(builder, val, fp16, "");
1633                 emit_data->output[i] =
1634                         LLVMBuildFPExt(builder, val, bld_base->base.elem_type, "");
1635         }
1636 }
1637
1638 static void emit_fdiv(const struct lp_build_tgsi_action *action,
1639                       struct lp_build_tgsi_context *bld_base,
1640                       struct lp_build_emit_data *emit_data)
1641 {
1642         struct radeon_llvm_context *ctx = radeon_llvm_context(bld_base);
1643
1644         emit_data->output[emit_data->chan] =
1645                 LLVMBuildFDiv(bld_base->base.gallivm->builder,
1646                               emit_data->args[0], emit_data->args[1], "");
1647
1648         /* Use v_rcp_f32 instead of precise division. */
1649         if (HAVE_LLVM >= 0x0309 &&
1650             !LLVMIsConstant(emit_data->output[emit_data->chan]))
1651                 LLVMSetMetadata(emit_data->output[emit_data->chan],
1652                                 ctx->fpmath_md_kind, ctx->fpmath_md_2p5_ulp);
1653 }
1654
1655 /* 1/sqrt is translated to rsq for f32 if fp32 denormals are not enabled in
1656  * the target machine. f64 needs global unsafe math flags to get rsq. */
1657 static void emit_rsq(const struct lp_build_tgsi_action *action,
1658                      struct lp_build_tgsi_context *bld_base,
1659                      struct lp_build_emit_data *emit_data)
1660 {
1661         LLVMValueRef sqrt =
1662                 lp_build_emit_llvm_unary(bld_base, TGSI_OPCODE_SQRT,
1663                                          emit_data->args[0]);
1664
1665         emit_data->output[emit_data->chan] =
1666                 lp_build_emit_llvm_binary(bld_base, TGSI_OPCODE_DIV,
1667                                           bld_base->base.one, sqrt);
1668 }
1669
1670 void radeon_llvm_context_init(struct radeon_llvm_context *ctx, const char *triple)
1671 {
1672         struct lp_type type;
1673
1674         /* Initialize the gallivm object:
1675          * We are only using the module, context, and builder fields of this struct.
1676          * This should be enough for us to be able to pass our gallivm struct to the
1677          * helper functions in the gallivm module.
1678          */
1679         memset(&ctx->gallivm, 0, sizeof (ctx->gallivm));
1680         memset(&ctx->soa, 0, sizeof(ctx->soa));
1681         ctx->gallivm.context = LLVMContextCreate();
1682         ctx->gallivm.module = LLVMModuleCreateWithNameInContext("tgsi",
1683                                                 ctx->gallivm.context);
1684         LLVMSetTarget(ctx->gallivm.module, triple);
1685         ctx->gallivm.builder = LLVMCreateBuilderInContext(ctx->gallivm.context);
1686
1687         struct lp_build_tgsi_context *bld_base = &ctx->soa.bld_base;
1688
1689         type.floating = true;
1690         type.fixed = false;
1691         type.sign = true;
1692         type.norm = false;
1693         type.width = 32;
1694         type.length = 1;
1695
1696         lp_build_context_init(&bld_base->base, &ctx->gallivm, type);
1697         lp_build_context_init(&ctx->soa.bld_base.uint_bld, &ctx->gallivm, lp_uint_type(type));
1698         lp_build_context_init(&ctx->soa.bld_base.int_bld, &ctx->gallivm, lp_int_type(type));
1699         {
1700                 struct lp_type dbl_type;
1701                 dbl_type = type;
1702                 dbl_type.width *= 2;
1703                 lp_build_context_init(&ctx->soa.bld_base.dbl_bld, &ctx->gallivm, dbl_type);
1704         }
1705
1706         bld_base->soa = 1;
1707         bld_base->emit_store = radeon_llvm_emit_store;
1708         bld_base->emit_swizzle = emit_swizzle;
1709         bld_base->emit_declaration = emit_declaration;
1710         bld_base->emit_immediate = emit_immediate;
1711
1712         bld_base->emit_fetch_funcs[TGSI_FILE_IMMEDIATE] = radeon_llvm_emit_fetch;
1713         bld_base->emit_fetch_funcs[TGSI_FILE_INPUT] = radeon_llvm_emit_fetch;
1714         bld_base->emit_fetch_funcs[TGSI_FILE_TEMPORARY] = radeon_llvm_emit_fetch;
1715         bld_base->emit_fetch_funcs[TGSI_FILE_OUTPUT] = radeon_llvm_emit_fetch;
1716         bld_base->emit_fetch_funcs[TGSI_FILE_SYSTEM_VALUE] = fetch_system_value;
1717
1718         /* metadata allowing 2.5 ULP */
1719         ctx->fpmath_md_kind = LLVMGetMDKindIDInContext(ctx->gallivm.context,
1720                                                        "fpmath", 6);
1721         LLVMValueRef arg = lp_build_const_float(&ctx->gallivm, 2.5);
1722         ctx->fpmath_md_2p5_ulp = LLVMMDNodeInContext(ctx->gallivm.context,
1723                                                      &arg, 1);
1724
1725         /* Allocate outputs */
1726         ctx->soa.outputs = ctx->outputs;
1727
1728         lp_set_default_actions(bld_base);
1729
1730         bld_base->op_actions[TGSI_OPCODE_ABS].emit = build_tgsi_intrinsic_nomem;
1731         bld_base->op_actions[TGSI_OPCODE_ABS].intr_name = "llvm.fabs.f32";
1732         bld_base->op_actions[TGSI_OPCODE_AND].emit = emit_and;
1733         bld_base->op_actions[TGSI_OPCODE_ARL].emit = emit_arl;
1734         bld_base->op_actions[TGSI_OPCODE_BFI].emit = emit_bfi;
1735         bld_base->op_actions[TGSI_OPCODE_BGNLOOP].emit = bgnloop_emit;
1736         bld_base->op_actions[TGSI_OPCODE_BREV].emit = build_tgsi_intrinsic_nomem;
1737         bld_base->op_actions[TGSI_OPCODE_BREV].intr_name =
1738                 HAVE_LLVM >= 0x0308 ? "llvm.bitreverse.i32" : "llvm.AMDGPU.brev";
1739         bld_base->op_actions[TGSI_OPCODE_BRK].emit = brk_emit;
1740         bld_base->op_actions[TGSI_OPCODE_CEIL].emit = build_tgsi_intrinsic_nomem;
1741         bld_base->op_actions[TGSI_OPCODE_CEIL].intr_name = "llvm.ceil.f32";
1742         bld_base->op_actions[TGSI_OPCODE_CLAMP].emit = build_tgsi_intrinsic_nomem;
1743         bld_base->op_actions[TGSI_OPCODE_CLAMP].intr_name =
1744                 HAVE_LLVM >= 0x0308 ? "llvm.AMDGPU.clamp." : "llvm.AMDIL.clamp.";
1745         bld_base->op_actions[TGSI_OPCODE_CMP].emit = emit_cmp;
1746         bld_base->op_actions[TGSI_OPCODE_CONT].emit = cont_emit;
1747         bld_base->op_actions[TGSI_OPCODE_COS].emit = build_tgsi_intrinsic_nomem;
1748         bld_base->op_actions[TGSI_OPCODE_COS].intr_name = "llvm.cos.f32";
1749         bld_base->op_actions[TGSI_OPCODE_DABS].emit = build_tgsi_intrinsic_nomem;
1750         bld_base->op_actions[TGSI_OPCODE_DABS].intr_name = "llvm.fabs.f64";
1751         bld_base->op_actions[TGSI_OPCODE_DFMA].emit = build_tgsi_intrinsic_nomem;
1752         bld_base->op_actions[TGSI_OPCODE_DFMA].intr_name = "llvm.fma.f64";
1753         bld_base->op_actions[TGSI_OPCODE_DFRAC].emit = emit_frac;
1754         bld_base->op_actions[TGSI_OPCODE_DIV].emit = emit_fdiv;
1755         bld_base->op_actions[TGSI_OPCODE_DNEG].emit = emit_dneg;
1756         bld_base->op_actions[TGSI_OPCODE_DSEQ].emit = emit_dcmp;
1757         bld_base->op_actions[TGSI_OPCODE_DSGE].emit = emit_dcmp;
1758         bld_base->op_actions[TGSI_OPCODE_DSLT].emit = emit_dcmp;
1759         bld_base->op_actions[TGSI_OPCODE_DSNE].emit = emit_dcmp;
1760         bld_base->op_actions[TGSI_OPCODE_DRSQ].emit = build_tgsi_intrinsic_nomem;
1761         bld_base->op_actions[TGSI_OPCODE_DRSQ].intr_name =
1762                 HAVE_LLVM >= 0x0309 ? "llvm.amdgcn.rsq.f64" : "llvm.AMDGPU.rsq.f64";
1763         bld_base->op_actions[TGSI_OPCODE_DSQRT].emit = build_tgsi_intrinsic_nomem;
1764         bld_base->op_actions[TGSI_OPCODE_DSQRT].intr_name = "llvm.sqrt.f64";
1765         bld_base->op_actions[TGSI_OPCODE_ELSE].emit = else_emit;
1766         bld_base->op_actions[TGSI_OPCODE_ENDIF].emit = endif_emit;
1767         bld_base->op_actions[TGSI_OPCODE_ENDLOOP].emit = endloop_emit;
1768         bld_base->op_actions[TGSI_OPCODE_EX2].emit = build_tgsi_intrinsic_nomem;
1769         bld_base->op_actions[TGSI_OPCODE_EX2].intr_name =
1770                 HAVE_LLVM >= 0x0308 ? "llvm.exp2.f32" : "llvm.AMDIL.exp.";
1771         bld_base->op_actions[TGSI_OPCODE_FLR].emit = build_tgsi_intrinsic_nomem;
1772         bld_base->op_actions[TGSI_OPCODE_FLR].intr_name = "llvm.floor.f32";
1773         bld_base->op_actions[TGSI_OPCODE_FMA].emit =
1774                 bld_base->op_actions[TGSI_OPCODE_MAD].emit;
1775         bld_base->op_actions[TGSI_OPCODE_FRC].emit = emit_frac;
1776         bld_base->op_actions[TGSI_OPCODE_F2I].emit = emit_f2i;
1777         bld_base->op_actions[TGSI_OPCODE_F2U].emit = emit_f2u;
1778         bld_base->op_actions[TGSI_OPCODE_FSEQ].emit = emit_fcmp;
1779         bld_base->op_actions[TGSI_OPCODE_FSGE].emit = emit_fcmp;
1780         bld_base->op_actions[TGSI_OPCODE_FSLT].emit = emit_fcmp;
1781         bld_base->op_actions[TGSI_OPCODE_FSNE].emit = emit_fcmp;
1782         bld_base->op_actions[TGSI_OPCODE_IABS].emit = emit_iabs;
1783         bld_base->op_actions[TGSI_OPCODE_IBFE].emit = build_tgsi_intrinsic_nomem;
1784         bld_base->op_actions[TGSI_OPCODE_IBFE].intr_name = "llvm.AMDGPU.bfe.i32";
1785         bld_base->op_actions[TGSI_OPCODE_IDIV].emit = emit_idiv;
1786         bld_base->op_actions[TGSI_OPCODE_IF].emit = if_emit;
1787         bld_base->op_actions[TGSI_OPCODE_UIF].emit = uif_emit;
1788         bld_base->op_actions[TGSI_OPCODE_IMAX].emit = emit_minmax_int;
1789         bld_base->op_actions[TGSI_OPCODE_IMIN].emit = emit_minmax_int;
1790         bld_base->op_actions[TGSI_OPCODE_IMSB].emit = emit_imsb;
1791         bld_base->op_actions[TGSI_OPCODE_INEG].emit = emit_ineg;
1792         bld_base->op_actions[TGSI_OPCODE_ISHR].emit = emit_ishr;
1793         bld_base->op_actions[TGSI_OPCODE_ISGE].emit = emit_icmp;
1794         bld_base->op_actions[TGSI_OPCODE_ISLT].emit = emit_icmp;
1795         bld_base->op_actions[TGSI_OPCODE_ISSG].emit = emit_ssg;
1796         bld_base->op_actions[TGSI_OPCODE_I2F].emit = emit_i2f;
1797         bld_base->op_actions[TGSI_OPCODE_KILL_IF].fetch_args = kill_if_fetch_args;
1798         bld_base->op_actions[TGSI_OPCODE_KILL_IF].emit = kil_emit;
1799         bld_base->op_actions[TGSI_OPCODE_KILL_IF].intr_name = "llvm.AMDGPU.kill";
1800         bld_base->op_actions[TGSI_OPCODE_KILL].emit = lp_build_tgsi_intrinsic;
1801         bld_base->op_actions[TGSI_OPCODE_KILL].intr_name = "llvm.AMDGPU.kilp";
1802         bld_base->op_actions[TGSI_OPCODE_LSB].emit = emit_lsb;
1803         bld_base->op_actions[TGSI_OPCODE_LG2].emit = build_tgsi_intrinsic_nomem;
1804         bld_base->op_actions[TGSI_OPCODE_LG2].intr_name = "llvm.log2.f32";
1805         bld_base->op_actions[TGSI_OPCODE_MOD].emit = emit_mod;
1806         bld_base->op_actions[TGSI_OPCODE_UMSB].emit = emit_umsb;
1807         bld_base->op_actions[TGSI_OPCODE_NOT].emit = emit_not;
1808         bld_base->op_actions[TGSI_OPCODE_OR].emit = emit_or;
1809         bld_base->op_actions[TGSI_OPCODE_PK2H].fetch_args = pk2h_fetch_args;
1810         bld_base->op_actions[TGSI_OPCODE_PK2H].emit = emit_pk2h;
1811         bld_base->op_actions[TGSI_OPCODE_POPC].emit = build_tgsi_intrinsic_nomem;
1812         bld_base->op_actions[TGSI_OPCODE_POPC].intr_name = "llvm.ctpop.i32";
1813         bld_base->op_actions[TGSI_OPCODE_POW].emit = build_tgsi_intrinsic_nomem;
1814         bld_base->op_actions[TGSI_OPCODE_POW].intr_name = "llvm.pow.f32";
1815         bld_base->op_actions[TGSI_OPCODE_ROUND].emit = build_tgsi_intrinsic_nomem;
1816         bld_base->op_actions[TGSI_OPCODE_ROUND].intr_name = "llvm.rint.f32";
1817         bld_base->op_actions[TGSI_OPCODE_RSQ].emit = emit_rsq;
1818         bld_base->op_actions[TGSI_OPCODE_SGE].emit = emit_set_cond;
1819         bld_base->op_actions[TGSI_OPCODE_SEQ].emit = emit_set_cond;
1820         bld_base->op_actions[TGSI_OPCODE_SHL].emit = emit_shl;
1821         bld_base->op_actions[TGSI_OPCODE_SLE].emit = emit_set_cond;
1822         bld_base->op_actions[TGSI_OPCODE_SLT].emit = emit_set_cond;
1823         bld_base->op_actions[TGSI_OPCODE_SNE].emit = emit_set_cond;
1824         bld_base->op_actions[TGSI_OPCODE_SGT].emit = emit_set_cond;
1825         bld_base->op_actions[TGSI_OPCODE_SIN].emit = build_tgsi_intrinsic_nomem;
1826         bld_base->op_actions[TGSI_OPCODE_SIN].intr_name = "llvm.sin.f32";
1827         bld_base->op_actions[TGSI_OPCODE_SQRT].emit = build_tgsi_intrinsic_nomem;
1828         bld_base->op_actions[TGSI_OPCODE_SQRT].intr_name = "llvm.sqrt.f32";
1829         bld_base->op_actions[TGSI_OPCODE_SSG].emit = emit_ssg;
1830         bld_base->op_actions[TGSI_OPCODE_TRUNC].emit = build_tgsi_intrinsic_nomem;
1831         bld_base->op_actions[TGSI_OPCODE_TRUNC].intr_name = "llvm.trunc.f32";
1832         bld_base->op_actions[TGSI_OPCODE_UADD].emit = emit_uadd;
1833         bld_base->op_actions[TGSI_OPCODE_UBFE].emit = build_tgsi_intrinsic_nomem;
1834         bld_base->op_actions[TGSI_OPCODE_UBFE].intr_name = "llvm.AMDGPU.bfe.u32";
1835         bld_base->op_actions[TGSI_OPCODE_UDIV].emit = emit_udiv;
1836         bld_base->op_actions[TGSI_OPCODE_UMAX].emit = emit_minmax_int;
1837         bld_base->op_actions[TGSI_OPCODE_UMIN].emit = emit_minmax_int;
1838         bld_base->op_actions[TGSI_OPCODE_UMOD].emit = emit_umod;
1839         bld_base->op_actions[TGSI_OPCODE_USEQ].emit = emit_icmp;
1840         bld_base->op_actions[TGSI_OPCODE_USGE].emit = emit_icmp;
1841         bld_base->op_actions[TGSI_OPCODE_USHR].emit = emit_ushr;
1842         bld_base->op_actions[TGSI_OPCODE_USLT].emit = emit_icmp;
1843         bld_base->op_actions[TGSI_OPCODE_USNE].emit = emit_icmp;
1844         bld_base->op_actions[TGSI_OPCODE_U2F].emit = emit_u2f;
1845         bld_base->op_actions[TGSI_OPCODE_XOR].emit = emit_xor;
1846         bld_base->op_actions[TGSI_OPCODE_UCMP].emit = emit_ucmp;
1847         bld_base->op_actions[TGSI_OPCODE_UP2H].fetch_args = up2h_fetch_args;
1848         bld_base->op_actions[TGSI_OPCODE_UP2H].emit = emit_up2h;
1849 }
1850
1851 void radeon_llvm_create_func(struct radeon_llvm_context *ctx,
1852                              LLVMTypeRef *return_types, unsigned num_return_elems,
1853                              LLVMTypeRef *ParamTypes, unsigned ParamCount)
1854 {
1855         LLVMTypeRef main_fn_type, ret_type;
1856         LLVMBasicBlockRef main_fn_body;
1857
1858         if (num_return_elems)
1859                 ret_type = LLVMStructTypeInContext(ctx->gallivm.context,
1860                                                    return_types,
1861                                                    num_return_elems, true);
1862         else
1863                 ret_type = LLVMVoidTypeInContext(ctx->gallivm.context);
1864
1865         /* Setup the function */
1866         ctx->return_type = ret_type;
1867         main_fn_type = LLVMFunctionType(ret_type, ParamTypes, ParamCount, 0);
1868         ctx->main_fn = LLVMAddFunction(ctx->gallivm.module, "main", main_fn_type);
1869         main_fn_body = LLVMAppendBasicBlockInContext(ctx->gallivm.context,
1870                         ctx->main_fn, "main_body");
1871         LLVMPositionBuilderAtEnd(ctx->gallivm.builder, main_fn_body);
1872 }
1873
1874 void radeon_llvm_finalize_module(struct radeon_llvm_context *ctx)
1875 {
1876         struct gallivm_state *gallivm = ctx->soa.bld_base.base.gallivm;
1877         const char *triple = LLVMGetTarget(gallivm->module);
1878         LLVMTargetLibraryInfoRef target_library_info;
1879
1880         /* Create the pass manager */
1881         gallivm->passmgr = LLVMCreateFunctionPassManagerForModule(
1882                                                         gallivm->module);
1883
1884         target_library_info = gallivm_create_target_library_info(triple);
1885         LLVMAddTargetLibraryInfo(target_library_info, gallivm->passmgr);
1886
1887         /* This pass should eliminate all the load and store instructions */
1888         LLVMAddPromoteMemoryToRegisterPass(gallivm->passmgr);
1889
1890         /* Add some optimization passes */
1891         LLVMAddScalarReplAggregatesPass(gallivm->passmgr);
1892         LLVMAddLICMPass(gallivm->passmgr);
1893         LLVMAddAggressiveDCEPass(gallivm->passmgr);
1894         LLVMAddCFGSimplificationPass(gallivm->passmgr);
1895         LLVMAddInstructionCombiningPass(gallivm->passmgr);
1896
1897         /* Run the pass */
1898         LLVMRunFunctionPassManager(gallivm->passmgr, ctx->main_fn);
1899
1900         LLVMDisposeBuilder(gallivm->builder);
1901         LLVMDisposePassManager(gallivm->passmgr);
1902         gallivm_dispose_target_library_info(target_library_info);
1903 }
1904
1905 void radeon_llvm_dispose(struct radeon_llvm_context *ctx)
1906 {
1907         LLVMDisposeModule(ctx->soa.bld_base.base.gallivm->module);
1908         LLVMContextDispose(ctx->soa.bld_base.base.gallivm->context);
1909         FREE(ctx->arrays);
1910         ctx->arrays = NULL;
1911         FREE(ctx->temps);
1912         ctx->temps = NULL;
1913         ctx->temps_count = 0;
1914         FREE(ctx->loop);
1915         ctx->loop = NULL;
1916         ctx->loop_depth_max = 0;
1917         FREE(ctx->branch);
1918         ctx->branch = NULL;
1919         ctx->branch_depth_max = 0;
1920 }