gallium/radeon: fix argument type of llvm.{cttz,ctlz}.i32 intrinsics

[mesa.git] / src / gallium / drivers / radeon / radeon_setup_tgsi_llvm.c
diff --git a/src/gallium/drivers/radeon/radeon_setup_tgsi_llvm.c b/src/gallium/drivers/radeon/radeon_setup_tgsi_llvm.c

index 8076443f081102b96777895ded3feed5f0bb563e..80e9707244380c0a934113edb15f69c6b4047ac3 100644 (file)
--- a/src/gallium/drivers/radeon/radeon_setup_tgsi_llvm.c
+++ b/src/gallium/drivers/radeon/radeon_setup_tgsi_llvm.c
@@ -42,6 +42,14 @@
  #include <llvm-c/Core.h>
  #include <llvm-c/Transforms/Scalar.h>
  
+/* Data for if/else/endif and bgnloop/endloop control flow structures.
+ */
+struct radeon_llvm_flow {
+       /* Loop exit or next part of if/else/endif. */
+       LLVMBasicBlockRef next_block;
+       LLVMBasicBlockRef loop_entry_block;
+};
+
  LLVMTypeRef tgsi2llvmtype(struct lp_build_tgsi_context *bld_base,
                           enum tgsi_opcode_type type)
  {
@@ -51,6 +59,9 @@ LLVMTypeRef tgsi2llvmtype(struct lp_build_tgsi_context *bld_base,
         case TGSI_TYPE_UNSIGNED:
         case TGSI_TYPE_SIGNED:
                 return LLVMInt32TypeInContext(ctx);
+       case TGSI_TYPE_UNSIGNED64:
+       case TGSI_TYPE_SIGNED64:
+               return LLVMInt64TypeInContext(ctx);
         case TGSI_TYPE_DOUBLE:
                 return LLVMDoubleTypeInContext(ctx);
         case TGSI_TYPE_UNTYPED:
@@ -102,15 +113,43 @@ LLVMValueRef radeon_llvm_bound_index(struct radeon_llvm_context *ctx,
         return index;
  }
  
-static struct radeon_llvm_loop *get_current_loop(struct radeon_llvm_context *ctx)
+static struct radeon_llvm_flow *
+get_current_flow(struct radeon_llvm_context *ctx)
  {
-       return ctx->loop_depth > 0 ? ctx->loop + (ctx->loop_depth - 1) : NULL;
+       if (ctx->flow_depth > 0)
+               return &ctx->flow[ctx->flow_depth - 1];
+       return NULL;
  }
  
-static struct radeon_llvm_branch *get_current_branch(struct radeon_llvm_context *ctx)
+static struct radeon_llvm_flow *
+get_innermost_loop(struct radeon_llvm_context *ctx)
  {
-       return ctx->branch_depth > 0 ?
-                       ctx->branch + (ctx->branch_depth - 1) : NULL;
+       for (unsigned i = ctx->flow_depth; i > 0; --i) {
+               if (ctx->flow[i - 1].loop_entry_block)
+                       return &ctx->flow[i - 1];
+       }
+       return NULL;
+}
+
+static struct radeon_llvm_flow *
+push_flow(struct radeon_llvm_context *ctx)
+{
+       struct radeon_llvm_flow *flow;
+
+       if (ctx->flow_depth >= ctx->flow_depth_max) {
+               unsigned new_max = MAX2(ctx->flow_depth << 1, RADEON_LLVM_INITIAL_CF_DEPTH);
+               ctx->flow = REALLOC(ctx->flow,
+                                   ctx->flow_depth_max * sizeof(*ctx->flow),
+                                   new_max * sizeof(*ctx->flow));
+               ctx->flow_depth_max = new_max;
+       }
+
+       flow = &ctx->flow[ctx->flow_depth];
+       ctx->flow_depth++;
+
+       flow->next_block = NULL;
+       flow->loop_entry_block = NULL;
+       return flow;
  }
  
  unsigned radeon_llvm_reg_index_soa(unsigned index, unsigned chan)
@@ -446,14 +485,29 @@ LLVMValueRef radeon_llvm_emit_fetch(struct lp_build_tgsi_context *bld_base,
                 }
         }
  
-       case TGSI_FILE_INPUT:
-               result = ctx->inputs[radeon_llvm_reg_index_soa(reg->Register.Index, swizzle)];
+       case TGSI_FILE_INPUT: {
+               unsigned index = reg->Register.Index;
+               LLVMValueRef input[4];
+
+               /* I don't think doing this for vertex shaders is beneficial.
+                * For those, we want to make sure the VMEM loads are executed
+                * only once. Fragment shaders don't care much, because
+                * v_interp instructions are much cheaper than VMEM loads.
+                */
+               if (ctx->soa.bld_base.info->processor == PIPE_SHADER_FRAGMENT)
+                       ctx->load_input(ctx, index, &ctx->input_decls[index], input);
+               else
+                       memcpy(input, &ctx->inputs[index * 4], sizeof(input));
+
+               result = input[swizzle];
+
                 if (tgsi_type_is_64bit(type)) {
                         ptr = result;
-                       ptr2 = ctx->inputs[radeon_llvm_reg_index_soa(reg->Register.Index, swizzle + 1)];
+                       ptr2 = input[swizzle + 1];
                         return radeon_llvm_emit_fetch_64bit(bld_base, type, ptr, ptr2);
                 }
                 break;
+       }
  
         case TGSI_FILE_TEMPORARY:
                 if (reg->Register.Index >= ctx->temps_count)
@@ -559,8 +613,10 @@ static void emit_declaration(struct lp_build_tgsi_context *bld_base,
                          * FIXME: We shouldn't need to have the non-alloca
                          * code path for arrays. LLVM should be smart enough to
                          * promote allocas into registers when profitable.
+                        *
+                        * LLVM 3.8 crashes with this.
                          */
-                       if (array_size > 16) {
+                       if (HAVE_LLVM >= 0x0309 && array_size > 16) {
                                 array_alloca = LLVMBuildAlloca(builder,
                                         LLVMArrayType(bld_base->base.vec_type,
                                                       array_size), "array");
@@ -624,8 +680,13 @@ static void emit_declaration(struct lp_build_tgsi_context *bld_base,
         {
                 unsigned idx;
                 for (idx = decl->Range.First; idx <= decl->Range.Last; idx++) {
-                       if (ctx->load_input)
-                               ctx->load_input(ctx, idx, decl);
+                       if (ctx->load_input) {
+                               ctx->input_decls[idx] = *decl;
+
+                               if (bld_base->info->processor != PIPE_SHADER_FRAGMENT)
+                                       ctx->load_input(ctx, idx, decl,
+                                                       &ctx->inputs[idx * 4]);
+                       }
                 }
         }
         break;
@@ -771,35 +832,58 @@ void radeon_llvm_emit_store(struct lp_build_tgsi_context *bld_base,
         }
  }
  
+static void set_basicblock_name(LLVMBasicBlockRef bb, const char *base, int pc)
+{
+       char buf[32];
+       /* Subtract 1 so that the number shown is that of the corresponding
+        * opcode in the TGSI dump, e.g. an if block has the same suffix as
+        * the instruction number of the corresponding TGSI IF.
+        */
+       snprintf(buf, sizeof(buf), "%s%d", base, pc - 1);
+       LLVMSetValueName(LLVMBasicBlockAsValue(bb), buf);
+}
+
+/* Append a basic block at the level of the parent flow.
+ */
+static LLVMBasicBlockRef append_basic_block(struct radeon_llvm_context *ctx,
+                                           const char *name)
+{
+       struct gallivm_state *gallivm = &ctx->gallivm;
+
+       assert(ctx->flow_depth >= 1);
+
+       if (ctx->flow_depth >= 2) {
+               struct radeon_llvm_flow *flow = &ctx->flow[ctx->flow_depth - 2];
+
+               return LLVMInsertBasicBlockInContext(gallivm->context,
+                                                    flow->next_block, name);
+       }
+
+       return LLVMAppendBasicBlockInContext(gallivm->context, ctx->main_fn, name);
+}
+
+/* Emit a branch to the given default target for the current block if
+ * applicable -- that is, if the current block does not already contain a
+ * branch from a break or continue.
+ */
+static void emit_default_branch(LLVMBuilderRef builder, LLVMBasicBlockRef target)
+{
+       if (!LLVMGetBasicBlockTerminator(LLVMGetInsertBlock(builder)))
+                LLVMBuildBr(builder, target);
+}
+
  static void bgnloop_emit(const struct lp_build_tgsi_action *action,
                          struct lp_build_tgsi_context *bld_base,
                          struct lp_build_emit_data *emit_data)
  {
         struct radeon_llvm_context *ctx = radeon_llvm_context(bld_base);
         struct gallivm_state *gallivm = bld_base->base.gallivm;
-       LLVMBasicBlockRef loop_block;
-       LLVMBasicBlockRef endloop_block;
-       endloop_block = LLVMAppendBasicBlockInContext(gallivm->context,
-                                               ctx->main_fn, "ENDLOOP");
-       loop_block = LLVMInsertBasicBlockInContext(gallivm->context,
-                                               endloop_block, "LOOP");
-       LLVMBuildBr(gallivm->builder, loop_block);
-       LLVMPositionBuilderAtEnd(gallivm->builder, loop_block);
-
-       if (++ctx->loop_depth > ctx->loop_depth_max) {
-               unsigned new_max = ctx->loop_depth_max << 1;
-
-               if (!new_max)
-                       new_max = RADEON_LLVM_INITIAL_CF_DEPTH;
-
-               ctx->loop = REALLOC(ctx->loop, ctx->loop_depth_max *
-                                   sizeof(ctx->loop[0]),
-                                   new_max * sizeof(ctx->loop[0]));
-               ctx->loop_depth_max = new_max;
-       }
-
-       ctx->loop[ctx->loop_depth - 1].loop_block = loop_block;
-       ctx->loop[ctx->loop_depth - 1].endloop_block = endloop_block;
+       struct radeon_llvm_flow *flow = push_flow(ctx);
+       flow->loop_entry_block = append_basic_block(ctx, "LOOP");
+       flow->next_block = append_basic_block(ctx, "ENDLOOP");
+       set_basicblock_name(flow->loop_entry_block, "loop", bld_base->pc);
+       LLVMBuildBr(gallivm->builder, flow->loop_entry_block);
+       LLVMPositionBuilderAtEnd(gallivm->builder, flow->loop_entry_block);
  }
  
  static void brk_emit(const struct lp_build_tgsi_action *action,
@@ -808,9 +892,9 @@ static void brk_emit(const struct lp_build_tgsi_action *action,
  {
         struct radeon_llvm_context *ctx = radeon_llvm_context(bld_base);
         struct gallivm_state *gallivm = bld_base->base.gallivm;
-       struct radeon_llvm_loop *current_loop = get_current_loop(ctx);
+       struct radeon_llvm_flow *flow = get_innermost_loop(ctx);
  
-       LLVMBuildBr(gallivm->builder, current_loop->endloop_block);
+       LLVMBuildBr(gallivm->builder, flow->next_block);
  }
  
  static void cont_emit(const struct lp_build_tgsi_action *action,
@@ -819,9 +903,9 @@ static void cont_emit(const struct lp_build_tgsi_action *action,
  {
         struct radeon_llvm_context *ctx = radeon_llvm_context(bld_base);
         struct gallivm_state *gallivm = bld_base->base.gallivm;
-       struct radeon_llvm_loop *current_loop = get_current_loop(ctx);
+       struct radeon_llvm_flow *flow = get_innermost_loop(ctx);
  
-       LLVMBuildBr(gallivm->builder, current_loop->loop_block);
+       LLVMBuildBr(gallivm->builder, flow->loop_entry_block);
  }
  
  static void else_emit(const struct lp_build_tgsi_action *action,
@@ -830,31 +914,18 @@ static void else_emit(const struct lp_build_tgsi_action *action,
  {
         struct radeon_llvm_context *ctx = radeon_llvm_context(bld_base);
         struct gallivm_state *gallivm = bld_base->base.gallivm;
-       struct radeon_llvm_branch *current_branch = get_current_branch(ctx);
-       LLVMBasicBlockRef current_block = LLVMGetInsertBlock(gallivm->builder);
-
-       /* We need to add a terminator to the current block if the previous
-        * instruction was an ENDIF.Example:
-        * IF
-        *   [code]
-        *   IF
-        *     [code]
-        *   ELSE
-        *    [code]
-        *   ENDIF <--
-        * ELSE<--
-        *   [code]
-        * ENDIF
-        */
+       struct radeon_llvm_flow *current_branch = get_current_flow(ctx);
+       LLVMBasicBlockRef endif_block;
  
-       if (current_block != current_branch->if_block) {
-               LLVMBuildBr(gallivm->builder, current_branch->endif_block);
-       }
-       if (!LLVMGetBasicBlockTerminator(current_branch->if_block)) {
-               LLVMBuildBr(gallivm->builder, current_branch->endif_block);
-       }
-       current_branch->has_else = 1;
-       LLVMPositionBuilderAtEnd(gallivm->builder, current_branch->else_block);
+       assert(!current_branch->loop_entry_block);
+
+       endif_block = append_basic_block(ctx, "ENDIF");
+       emit_default_branch(gallivm->builder, endif_block);
+
+       LLVMPositionBuilderAtEnd(gallivm->builder, current_branch->next_block);
+       set_basicblock_name(current_branch->next_block, "else", bld_base->pc);
+
+       current_branch->next_block = endif_block;
  }
  
  static void endif_emit(const struct lp_build_tgsi_action *action,
@@ -863,29 +934,15 @@ static void endif_emit(const struct lp_build_tgsi_action *action,
  {
         struct radeon_llvm_context *ctx = radeon_llvm_context(bld_base);
         struct gallivm_state *gallivm = bld_base->base.gallivm;
-       struct radeon_llvm_branch *current_branch = get_current_branch(ctx);
-       LLVMBasicBlockRef current_block = LLVMGetInsertBlock(gallivm->builder);
+       struct radeon_llvm_flow *current_branch = get_current_flow(ctx);
  
-       /* If we have consecutive ENDIF instructions, then the first ENDIF
-        * will not have a terminator, so we need to add one. */
-       if (current_block != current_branch->if_block
-                       && current_block != current_branch->else_block
-                       && !LLVMGetBasicBlockTerminator(current_block)) {
+       assert(!current_branch->loop_entry_block);
  
-                LLVMBuildBr(gallivm->builder, current_branch->endif_block);
-       }
-       if (!LLVMGetBasicBlockTerminator(current_branch->else_block)) {
-               LLVMPositionBuilderAtEnd(gallivm->builder, current_branch->else_block);
-               LLVMBuildBr(gallivm->builder, current_branch->endif_block);
-       }
-
-       if (!LLVMGetBasicBlockTerminator(current_branch->if_block)) {
-               LLVMPositionBuilderAtEnd(gallivm->builder, current_branch->if_block);
-               LLVMBuildBr(gallivm->builder, current_branch->endif_block);
-       }
+       emit_default_branch(gallivm->builder, current_branch->next_block);
+       LLVMPositionBuilderAtEnd(gallivm->builder, current_branch->next_block);
+       set_basicblock_name(current_branch->next_block, "endif", bld_base->pc);
  
-       LLVMPositionBuilderAtEnd(gallivm->builder, current_branch->endif_block);
-       ctx->branch_depth--;
+       ctx->flow_depth--;
  }
  
  static void endloop_emit(const struct lp_build_tgsi_action *action,
@@ -894,14 +951,15 @@ static void endloop_emit(const struct lp_build_tgsi_action *action,
  {
         struct radeon_llvm_context *ctx = radeon_llvm_context(bld_base);
         struct gallivm_state *gallivm = bld_base->base.gallivm;
-       struct radeon_llvm_loop *current_loop = get_current_loop(ctx);
+       struct radeon_llvm_flow *current_loop = get_current_flow(ctx);
  
-       if (!LLVMGetBasicBlockTerminator(LLVMGetInsertBlock(gallivm->builder))) {
-                LLVMBuildBr(gallivm->builder, current_loop->loop_block);
-       }
+       assert(current_loop->loop_entry_block);
+
+       emit_default_branch(gallivm->builder, current_loop->loop_entry_block);
  
-       LLVMPositionBuilderAtEnd(gallivm->builder, current_loop->endloop_block);
-       ctx->loop_depth--;
+       LLVMPositionBuilderAtEnd(gallivm->builder, current_loop->next_block);
+       set_basicblock_name(current_loop->next_block, "endloop", bld_base->pc);
+       ctx->flow_depth--;
  }
  
  static void if_cond_emit(const struct lp_build_tgsi_action *action,
@@ -911,33 +969,14 @@ static void if_cond_emit(const struct lp_build_tgsi_action *action,
  {
         struct radeon_llvm_context *ctx = radeon_llvm_context(bld_base);
         struct gallivm_state *gallivm = bld_base->base.gallivm;
-       LLVMBasicBlockRef if_block, else_block, endif_block;
-
-       endif_block = LLVMAppendBasicBlockInContext(gallivm->context,
-                                               ctx->main_fn, "ENDIF");
-       if_block = LLVMInsertBasicBlockInContext(gallivm->context,
-                                               endif_block, "IF");
-       else_block = LLVMInsertBasicBlockInContext(gallivm->context,
-                                               endif_block, "ELSE");
-       LLVMBuildCondBr(gallivm->builder, cond, if_block, else_block);
-       LLVMPositionBuilderAtEnd(gallivm->builder, if_block);
-
-       if (++ctx->branch_depth > ctx->branch_depth_max) {
-               unsigned new_max = ctx->branch_depth_max << 1;
+       struct radeon_llvm_flow *flow = push_flow(ctx);
+       LLVMBasicBlockRef if_block;
  
-               if (!new_max)
-                       new_max = RADEON_LLVM_INITIAL_CF_DEPTH;
-
-               ctx->branch = REALLOC(ctx->branch, ctx->branch_depth_max *
-                                     sizeof(ctx->branch[0]),
-                                     new_max * sizeof(ctx->branch[0]));
-               ctx->branch_depth_max = new_max;
-       }
-
-       ctx->branch[ctx->branch_depth - 1].endif_block = endif_block;
-       ctx->branch[ctx->branch_depth - 1].if_block = if_block;
-       ctx->branch[ctx->branch_depth - 1].else_block = else_block;
-       ctx->branch[ctx->branch_depth - 1].has_else = 0;
+       if_block = append_basic_block(ctx, "IF");
+       flow->next_block = append_basic_block(ctx, "ELSE");
+       set_basicblock_name(if_block, "if", bld_base->pc);
+       LLVMBuildCondBr(gallivm->builder, cond, if_block, flow->next_block);
+       LLVMPositionBuilderAtEnd(gallivm->builder, if_block);
  }
  
  static void if_emit(const struct lp_build_tgsi_action *action,
@@ -1158,12 +1197,18 @@ static void emit_icmp(const struct lp_build_tgsi_action *action,
         LLVMContextRef context = bld_base->base.gallivm->context;
  
         switch (emit_data->inst->Instruction.Opcode) {
-       case TGSI_OPCODE_USEQ: pred = LLVMIntEQ; break;
-       case TGSI_OPCODE_USNE: pred = LLVMIntNE; break;
-       case TGSI_OPCODE_USGE: pred = LLVMIntUGE; break;
-       case TGSI_OPCODE_USLT: pred = LLVMIntULT; break;
-       case TGSI_OPCODE_ISGE: pred = LLVMIntSGE; break;
-       case TGSI_OPCODE_ISLT: pred = LLVMIntSLT; break;
+       case TGSI_OPCODE_USEQ:
+       case TGSI_OPCODE_U64SEQ: pred = LLVMIntEQ; break;
+       case TGSI_OPCODE_USNE:
+       case TGSI_OPCODE_U64SNE: pred = LLVMIntNE; break;
+       case TGSI_OPCODE_USGE:
+       case TGSI_OPCODE_U64SGE: pred = LLVMIntUGE; break;
+       case TGSI_OPCODE_USLT:
+       case TGSI_OPCODE_U64SLT: pred = LLVMIntULT; break;
+       case TGSI_OPCODE_ISGE:
+       case TGSI_OPCODE_I64SGE: pred = LLVMIntSGE; break;
+       case TGSI_OPCODE_ISLT:
+       case TGSI_OPCODE_I64SLT: pred = LLVMIntSLT; break;
         default:
                 assert(!"unknown instruction");
                 pred = 0;
@@ -1419,7 +1464,12 @@ static void emit_ssg(const struct lp_build_tgsi_action *action,
  
         LLVMValueRef cmp, val;
  
-       if (emit_data->inst->Instruction.Opcode == TGSI_OPCODE_ISSG) {
+       if (emit_data->inst->Instruction.Opcode == TGSI_OPCODE_I64SSG) {
+               cmp = LLVMBuildICmp(builder, LLVMIntSGT, emit_data->args[0], bld_base->int64_bld.zero, "");
+               val = LLVMBuildSelect(builder, cmp, bld_base->int64_bld.one, emit_data->args[0], "");
+               cmp = LLVMBuildICmp(builder, LLVMIntSGE, val, bld_base->int64_bld.zero, "");
+               val = LLVMBuildSelect(builder, cmp, val, LLVMConstInt(bld_base->int64_bld.elem_type, -1, true), "");
+       } else if (emit_data->inst->Instruction.Opcode == TGSI_OPCODE_ISSG) {
                 cmp = LLVMBuildICmp(builder, LLVMIntSGT, emit_data->args[0], bld_base->int_bld.zero, "");
                 val = LLVMBuildSelect(builder, cmp, bld_base->int_bld.one, emit_data->args[0], "");
                 cmp = LLVMBuildICmp(builder, LLVMIntSGE, val, bld_base->int_bld.zero, "");
@@ -1586,7 +1636,7 @@ static void emit_lsb(const struct lp_build_tgsi_action *action,
                  *
                  * The hardware already implements the correct behavior.
                  */
-               lp_build_const_int32(gallivm, 1)
+               LLVMConstInt(LLVMInt1TypeInContext(gallivm->context), 1, 0)
         };
  
         emit_data->output[emit_data->chan] =
@@ -1605,7 +1655,7 @@ static void emit_umsb(const struct lp_build_tgsi_action *action,
         LLVMValueRef args[2] = {
                 emit_data->args[0],
                 /* Don't generate code for handling zero: */
-               lp_build_const_int32(gallivm, 1)
+               LLVMConstInt(LLVMInt1TypeInContext(gallivm->context), 1, 0)
         };
  
         LLVMValueRef msb =
@@ -1683,15 +1733,19 @@ static void emit_minmax_int(const struct lp_build_tgsi_action *action,
         default:
                 assert(0);
         case TGSI_OPCODE_IMAX:
+       case TGSI_OPCODE_I64MAX:
                 op = LLVMIntSGT;
                 break;
         case TGSI_OPCODE_IMIN:
+       case TGSI_OPCODE_I64MIN:
                 op = LLVMIntSLT;
                 break;
         case TGSI_OPCODE_UMAX:
+       case TGSI_OPCODE_U64MAX:
                 op = LLVMIntUGT;
                 break;
         case TGSI_OPCODE_UMIN:
+       case TGSI_OPCODE_U64MIN:
                 op = LLVMIntULT;
                 break;
         }
@@ -1854,6 +1908,18 @@ void radeon_llvm_context_init(struct radeon_llvm_context *ctx, const char *tripl
                 dbl_type.width *= 2;
                 lp_build_context_init(&ctx->soa.bld_base.dbl_bld, &ctx->gallivm, dbl_type);
         }
+       {
+               struct lp_type dtype;
+               dtype = lp_uint_type(type);
+               dtype.width *= 2;
+               lp_build_context_init(&ctx->soa.bld_base.uint64_bld, &ctx->gallivm, dtype);
+       }
+       {
+               struct lp_type dtype;
+               dtype = lp_int_type(type);
+               dtype.width *= 2;
+               lp_build_context_init(&ctx->soa.bld_base.int64_bld, &ctx->gallivm, dtype);
+       }
  
         bld_base->soa = 1;
         bld_base->emit_store = radeon_llvm_emit_store;
@@ -1998,6 +2064,31 @@ void radeon_llvm_context_init(struct radeon_llvm_context *ctx, const char *tripl
         bld_base->op_actions[TGSI_OPCODE_UCMP].emit = emit_ucmp;
         bld_base->op_actions[TGSI_OPCODE_UP2H].fetch_args = up2h_fetch_args;
         bld_base->op_actions[TGSI_OPCODE_UP2H].emit = emit_up2h;
+
+       bld_base->op_actions[TGSI_OPCODE_I64MAX].emit = emit_minmax_int;
+       bld_base->op_actions[TGSI_OPCODE_I64MIN].emit = emit_minmax_int;
+       bld_base->op_actions[TGSI_OPCODE_U64MAX].emit = emit_minmax_int;
+       bld_base->op_actions[TGSI_OPCODE_U64MIN].emit = emit_minmax_int;
+       bld_base->op_actions[TGSI_OPCODE_I64ABS].emit = emit_iabs;
+       bld_base->op_actions[TGSI_OPCODE_I64SSG].emit = emit_ssg;
+       bld_base->op_actions[TGSI_OPCODE_I64NEG].emit = emit_ineg;
+
+       bld_base->op_actions[TGSI_OPCODE_U64SEQ].emit = emit_icmp;
+       bld_base->op_actions[TGSI_OPCODE_U64SNE].emit = emit_icmp;
+       bld_base->op_actions[TGSI_OPCODE_U64SGE].emit = emit_icmp;
+       bld_base->op_actions[TGSI_OPCODE_U64SLT].emit = emit_icmp;
+       bld_base->op_actions[TGSI_OPCODE_I64SGE].emit = emit_icmp;
+       bld_base->op_actions[TGSI_OPCODE_I64SLT].emit = emit_icmp;
+
+       bld_base->op_actions[TGSI_OPCODE_U64ADD].emit = emit_uadd;
+       bld_base->op_actions[TGSI_OPCODE_U64SHL].emit = emit_shl;
+       bld_base->op_actions[TGSI_OPCODE_U64SHR].emit = emit_ushr;
+       bld_base->op_actions[TGSI_OPCODE_I64SHR].emit = emit_ishr;
+
+       bld_base->op_actions[TGSI_OPCODE_U64MOD].emit = emit_umod;
+       bld_base->op_actions[TGSI_OPCODE_I64MOD].emit = emit_mod;
+       bld_base->op_actions[TGSI_OPCODE_U64DIV].emit = emit_udiv;
+       bld_base->op_actions[TGSI_OPCODE_I64DIV].emit = emit_idiv;
  }
  
  void radeon_llvm_create_func(struct radeon_llvm_context *ctx,
@@ -2047,7 +2138,9 @@ void radeon_llvm_finalize_module(struct radeon_llvm_context *ctx)
         LLVMAddInstructionCombiningPass(gallivm->passmgr);
  
         /* Run the pass */
+       LLVMInitializeFunctionPassManager(gallivm->passmgr);
         LLVMRunFunctionPassManager(gallivm->passmgr, ctx->main_fn);
+       LLVMFinalizeFunctionPassManager(gallivm->passmgr);
  
         LLVMDisposeBuilder(gallivm->builder);
         LLVMDisposePassManager(gallivm->passmgr);
@@ -2065,10 +2158,7 @@ void radeon_llvm_dispose(struct radeon_llvm_context *ctx)
         FREE(ctx->temps);
         ctx->temps = NULL;
         ctx->temps_count = 0;
-       FREE(ctx->loop);
-       ctx->loop = NULL;
-       ctx->loop_depth_max = 0;
-       FREE(ctx->branch);
-       ctx->branch = NULL;
-       ctx->branch_depth_max = 0;
+       FREE(ctx->flow);
+       ctx->flow = NULL;
+       ctx->flow_depth_max = 0;
  }