gallium/radeon: fix argument type of llvm.{cttz,ctlz}.i32 intrinsics
[mesa.git] / src / gallium / drivers / radeon / radeon_setup_tgsi_llvm.c
index 8076443f081102b96777895ded3feed5f0bb563e..80e9707244380c0a934113edb15f69c6b4047ac3 100644 (file)
 #include <llvm-c/Core.h>
 #include <llvm-c/Transforms/Scalar.h>
 
+/* Data for if/else/endif and bgnloop/endloop control flow structures.
+ */
+struct radeon_llvm_flow {
+       /* Loop exit or next part of if/else/endif. */
+       LLVMBasicBlockRef next_block;
+       LLVMBasicBlockRef loop_entry_block;
+};
+
 LLVMTypeRef tgsi2llvmtype(struct lp_build_tgsi_context *bld_base,
                          enum tgsi_opcode_type type)
 {
@@ -51,6 +59,9 @@ LLVMTypeRef tgsi2llvmtype(struct lp_build_tgsi_context *bld_base,
        case TGSI_TYPE_UNSIGNED:
        case TGSI_TYPE_SIGNED:
                return LLVMInt32TypeInContext(ctx);
+       case TGSI_TYPE_UNSIGNED64:
+       case TGSI_TYPE_SIGNED64:
+               return LLVMInt64TypeInContext(ctx);
        case TGSI_TYPE_DOUBLE:
                return LLVMDoubleTypeInContext(ctx);
        case TGSI_TYPE_UNTYPED:
@@ -102,15 +113,43 @@ LLVMValueRef radeon_llvm_bound_index(struct radeon_llvm_context *ctx,
        return index;
 }
 
-static struct radeon_llvm_loop *get_current_loop(struct radeon_llvm_context *ctx)
+static struct radeon_llvm_flow *
+get_current_flow(struct radeon_llvm_context *ctx)
 {
-       return ctx->loop_depth > 0 ? ctx->loop + (ctx->loop_depth - 1) : NULL;
+       if (ctx->flow_depth > 0)
+               return &ctx->flow[ctx->flow_depth - 1];
+       return NULL;
 }
 
-static struct radeon_llvm_branch *get_current_branch(struct radeon_llvm_context *ctx)
+static struct radeon_llvm_flow *
+get_innermost_loop(struct radeon_llvm_context *ctx)
 {
-       return ctx->branch_depth > 0 ?
-                       ctx->branch + (ctx->branch_depth - 1) : NULL;
+       for (unsigned i = ctx->flow_depth; i > 0; --i) {
+               if (ctx->flow[i - 1].loop_entry_block)
+                       return &ctx->flow[i - 1];
+       }
+       return NULL;
+}
+
+static struct radeon_llvm_flow *
+push_flow(struct radeon_llvm_context *ctx)
+{
+       struct radeon_llvm_flow *flow;
+
+       if (ctx->flow_depth >= ctx->flow_depth_max) {
+               unsigned new_max = MAX2(ctx->flow_depth << 1, RADEON_LLVM_INITIAL_CF_DEPTH);
+               ctx->flow = REALLOC(ctx->flow,
+                                   ctx->flow_depth_max * sizeof(*ctx->flow),
+                                   new_max * sizeof(*ctx->flow));
+               ctx->flow_depth_max = new_max;
+       }
+
+       flow = &ctx->flow[ctx->flow_depth];
+       ctx->flow_depth++;
+
+       flow->next_block = NULL;
+       flow->loop_entry_block = NULL;
+       return flow;
 }
 
 unsigned radeon_llvm_reg_index_soa(unsigned index, unsigned chan)
@@ -446,14 +485,29 @@ LLVMValueRef radeon_llvm_emit_fetch(struct lp_build_tgsi_context *bld_base,
                }
        }
 
-       case TGSI_FILE_INPUT:
-               result = ctx->inputs[radeon_llvm_reg_index_soa(reg->Register.Index, swizzle)];
+       case TGSI_FILE_INPUT: {
+               unsigned index = reg->Register.Index;
+               LLVMValueRef input[4];
+
+               /* I don't think doing this for vertex shaders is beneficial.
+                * For those, we want to make sure the VMEM loads are executed
+                * only once. Fragment shaders don't care much, because
+                * v_interp instructions are much cheaper than VMEM loads.
+                */
+               if (ctx->soa.bld_base.info->processor == PIPE_SHADER_FRAGMENT)
+                       ctx->load_input(ctx, index, &ctx->input_decls[index], input);
+               else
+                       memcpy(input, &ctx->inputs[index * 4], sizeof(input));
+
+               result = input[swizzle];
+
                if (tgsi_type_is_64bit(type)) {
                        ptr = result;
-                       ptr2 = ctx->inputs[radeon_llvm_reg_index_soa(reg->Register.Index, swizzle + 1)];
+                       ptr2 = input[swizzle + 1];
                        return radeon_llvm_emit_fetch_64bit(bld_base, type, ptr, ptr2);
                }
                break;
+       }
 
        case TGSI_FILE_TEMPORARY:
                if (reg->Register.Index >= ctx->temps_count)
@@ -559,8 +613,10 @@ static void emit_declaration(struct lp_build_tgsi_context *bld_base,
                         * FIXME: We shouldn't need to have the non-alloca
                         * code path for arrays. LLVM should be smart enough to
                         * promote allocas into registers when profitable.
+                        *
+                        * LLVM 3.8 crashes with this.
                         */
-                       if (array_size > 16) {
+                       if (HAVE_LLVM >= 0x0309 && array_size > 16) {
                                array_alloca = LLVMBuildAlloca(builder,
                                        LLVMArrayType(bld_base->base.vec_type,
                                                      array_size), "array");
@@ -624,8 +680,13 @@ static void emit_declaration(struct lp_build_tgsi_context *bld_base,
        {
                unsigned idx;
                for (idx = decl->Range.First; idx <= decl->Range.Last; idx++) {
-                       if (ctx->load_input)
-                               ctx->load_input(ctx, idx, decl);
+                       if (ctx->load_input) {
+                               ctx->input_decls[idx] = *decl;
+
+                               if (bld_base->info->processor != PIPE_SHADER_FRAGMENT)
+                                       ctx->load_input(ctx, idx, decl,
+                                                       &ctx->inputs[idx * 4]);
+                       }
                }
        }
        break;
@@ -771,35 +832,58 @@ void radeon_llvm_emit_store(struct lp_build_tgsi_context *bld_base,
        }
 }
 
+static void set_basicblock_name(LLVMBasicBlockRef bb, const char *base, int pc)
+{
+       char buf[32];
+       /* Subtract 1 so that the number shown is that of the corresponding
+        * opcode in the TGSI dump, e.g. an if block has the same suffix as
+        * the instruction number of the corresponding TGSI IF.
+        */
+       snprintf(buf, sizeof(buf), "%s%d", base, pc - 1);
+       LLVMSetValueName(LLVMBasicBlockAsValue(bb), buf);
+}
+
+/* Append a basic block at the level of the parent flow.
+ */
+static LLVMBasicBlockRef append_basic_block(struct radeon_llvm_context *ctx,
+                                           const char *name)
+{
+       struct gallivm_state *gallivm = &ctx->gallivm;
+
+       assert(ctx->flow_depth >= 1);
+
+       if (ctx->flow_depth >= 2) {
+               struct radeon_llvm_flow *flow = &ctx->flow[ctx->flow_depth - 2];
+
+               return LLVMInsertBasicBlockInContext(gallivm->context,
+                                                    flow->next_block, name);
+       }
+
+       return LLVMAppendBasicBlockInContext(gallivm->context, ctx->main_fn, name);
+}
+
+/* Emit a branch to the given default target for the current block if
+ * applicable -- that is, if the current block does not already contain a
+ * branch from a break or continue.
+ */
+static void emit_default_branch(LLVMBuilderRef builder, LLVMBasicBlockRef target)
+{
+       if (!LLVMGetBasicBlockTerminator(LLVMGetInsertBlock(builder)))
+                LLVMBuildBr(builder, target);
+}
+
 static void bgnloop_emit(const struct lp_build_tgsi_action *action,
                         struct lp_build_tgsi_context *bld_base,
                         struct lp_build_emit_data *emit_data)
 {
        struct radeon_llvm_context *ctx = radeon_llvm_context(bld_base);
        struct gallivm_state *gallivm = bld_base->base.gallivm;
-       LLVMBasicBlockRef loop_block;
-       LLVMBasicBlockRef endloop_block;
-       endloop_block = LLVMAppendBasicBlockInContext(gallivm->context,
-                                               ctx->main_fn, "ENDLOOP");
-       loop_block = LLVMInsertBasicBlockInContext(gallivm->context,
-                                               endloop_block, "LOOP");
-       LLVMBuildBr(gallivm->builder, loop_block);
-       LLVMPositionBuilderAtEnd(gallivm->builder, loop_block);
-
-       if (++ctx->loop_depth > ctx->loop_depth_max) {
-               unsigned new_max = ctx->loop_depth_max << 1;
-
-               if (!new_max)
-                       new_max = RADEON_LLVM_INITIAL_CF_DEPTH;
-
-               ctx->loop = REALLOC(ctx->loop, ctx->loop_depth_max *
-                                   sizeof(ctx->loop[0]),
-                                   new_max * sizeof(ctx->loop[0]));
-               ctx->loop_depth_max = new_max;
-       }
-
-       ctx->loop[ctx->loop_depth - 1].loop_block = loop_block;
-       ctx->loop[ctx->loop_depth - 1].endloop_block = endloop_block;
+       struct radeon_llvm_flow *flow = push_flow(ctx);
+       flow->loop_entry_block = append_basic_block(ctx, "LOOP");
+       flow->next_block = append_basic_block(ctx, "ENDLOOP");
+       set_basicblock_name(flow->loop_entry_block, "loop", bld_base->pc);
+       LLVMBuildBr(gallivm->builder, flow->loop_entry_block);
+       LLVMPositionBuilderAtEnd(gallivm->builder, flow->loop_entry_block);
 }
 
 static void brk_emit(const struct lp_build_tgsi_action *action,
@@ -808,9 +892,9 @@ static void brk_emit(const struct lp_build_tgsi_action *action,
 {
        struct radeon_llvm_context *ctx = radeon_llvm_context(bld_base);
        struct gallivm_state *gallivm = bld_base->base.gallivm;
-       struct radeon_llvm_loop *current_loop = get_current_loop(ctx);
+       struct radeon_llvm_flow *flow = get_innermost_loop(ctx);
 
-       LLVMBuildBr(gallivm->builder, current_loop->endloop_block);
+       LLVMBuildBr(gallivm->builder, flow->next_block);
 }
 
 static void cont_emit(const struct lp_build_tgsi_action *action,
@@ -819,9 +903,9 @@ static void cont_emit(const struct lp_build_tgsi_action *action,
 {
        struct radeon_llvm_context *ctx = radeon_llvm_context(bld_base);
        struct gallivm_state *gallivm = bld_base->base.gallivm;
-       struct radeon_llvm_loop *current_loop = get_current_loop(ctx);
+       struct radeon_llvm_flow *flow = get_innermost_loop(ctx);
 
-       LLVMBuildBr(gallivm->builder, current_loop->loop_block);
+       LLVMBuildBr(gallivm->builder, flow->loop_entry_block);
 }
 
 static void else_emit(const struct lp_build_tgsi_action *action,
@@ -830,31 +914,18 @@ static void else_emit(const struct lp_build_tgsi_action *action,
 {
        struct radeon_llvm_context *ctx = radeon_llvm_context(bld_base);
        struct gallivm_state *gallivm = bld_base->base.gallivm;
-       struct radeon_llvm_branch *current_branch = get_current_branch(ctx);
-       LLVMBasicBlockRef current_block = LLVMGetInsertBlock(gallivm->builder);
-
-       /* We need to add a terminator to the current block if the previous
-        * instruction was an ENDIF.Example:
-        * IF
-        *   [code]
-        *   IF
-        *     [code]
-        *   ELSE
-        *    [code]
-        *   ENDIF <--
-        * ELSE<--
-        *   [code]
-        * ENDIF
-        */
+       struct radeon_llvm_flow *current_branch = get_current_flow(ctx);
+       LLVMBasicBlockRef endif_block;
 
-       if (current_block != current_branch->if_block) {
-               LLVMBuildBr(gallivm->builder, current_branch->endif_block);
-       }
-       if (!LLVMGetBasicBlockTerminator(current_branch->if_block)) {
-               LLVMBuildBr(gallivm->builder, current_branch->endif_block);
-       }
-       current_branch->has_else = 1;
-       LLVMPositionBuilderAtEnd(gallivm->builder, current_branch->else_block);
+       assert(!current_branch->loop_entry_block);
+
+       endif_block = append_basic_block(ctx, "ENDIF");
+       emit_default_branch(gallivm->builder, endif_block);
+
+       LLVMPositionBuilderAtEnd(gallivm->builder, current_branch->next_block);
+       set_basicblock_name(current_branch->next_block, "else", bld_base->pc);
+
+       current_branch->next_block = endif_block;
 }
 
 static void endif_emit(const struct lp_build_tgsi_action *action,
@@ -863,29 +934,15 @@ static void endif_emit(const struct lp_build_tgsi_action *action,
 {
        struct radeon_llvm_context *ctx = radeon_llvm_context(bld_base);
        struct gallivm_state *gallivm = bld_base->base.gallivm;
-       struct radeon_llvm_branch *current_branch = get_current_branch(ctx);
-       LLVMBasicBlockRef current_block = LLVMGetInsertBlock(gallivm->builder);
+       struct radeon_llvm_flow *current_branch = get_current_flow(ctx);
 
-       /* If we have consecutive ENDIF instructions, then the first ENDIF
-        * will not have a terminator, so we need to add one. */
-       if (current_block != current_branch->if_block
-                       && current_block != current_branch->else_block
-                       && !LLVMGetBasicBlockTerminator(current_block)) {
+       assert(!current_branch->loop_entry_block);
 
-                LLVMBuildBr(gallivm->builder, current_branch->endif_block);
-       }
-       if (!LLVMGetBasicBlockTerminator(current_branch->else_block)) {
-               LLVMPositionBuilderAtEnd(gallivm->builder, current_branch->else_block);
-               LLVMBuildBr(gallivm->builder, current_branch->endif_block);
-       }
-
-       if (!LLVMGetBasicBlockTerminator(current_branch->if_block)) {
-               LLVMPositionBuilderAtEnd(gallivm->builder, current_branch->if_block);
-               LLVMBuildBr(gallivm->builder, current_branch->endif_block);
-       }
+       emit_default_branch(gallivm->builder, current_branch->next_block);
+       LLVMPositionBuilderAtEnd(gallivm->builder, current_branch->next_block);
+       set_basicblock_name(current_branch->next_block, "endif", bld_base->pc);
 
-       LLVMPositionBuilderAtEnd(gallivm->builder, current_branch->endif_block);
-       ctx->branch_depth--;
+       ctx->flow_depth--;
 }
 
 static void endloop_emit(const struct lp_build_tgsi_action *action,
@@ -894,14 +951,15 @@ static void endloop_emit(const struct lp_build_tgsi_action *action,
 {
        struct radeon_llvm_context *ctx = radeon_llvm_context(bld_base);
        struct gallivm_state *gallivm = bld_base->base.gallivm;
-       struct radeon_llvm_loop *current_loop = get_current_loop(ctx);
+       struct radeon_llvm_flow *current_loop = get_current_flow(ctx);
 
-       if (!LLVMGetBasicBlockTerminator(LLVMGetInsertBlock(gallivm->builder))) {
-                LLVMBuildBr(gallivm->builder, current_loop->loop_block);
-       }
+       assert(current_loop->loop_entry_block);
+
+       emit_default_branch(gallivm->builder, current_loop->loop_entry_block);
 
-       LLVMPositionBuilderAtEnd(gallivm->builder, current_loop->endloop_block);
-       ctx->loop_depth--;
+       LLVMPositionBuilderAtEnd(gallivm->builder, current_loop->next_block);
+       set_basicblock_name(current_loop->next_block, "endloop", bld_base->pc);
+       ctx->flow_depth--;
 }
 
 static void if_cond_emit(const struct lp_build_tgsi_action *action,
@@ -911,33 +969,14 @@ static void if_cond_emit(const struct lp_build_tgsi_action *action,
 {
        struct radeon_llvm_context *ctx = radeon_llvm_context(bld_base);
        struct gallivm_state *gallivm = bld_base->base.gallivm;
-       LLVMBasicBlockRef if_block, else_block, endif_block;
-
-       endif_block = LLVMAppendBasicBlockInContext(gallivm->context,
-                                               ctx->main_fn, "ENDIF");
-       if_block = LLVMInsertBasicBlockInContext(gallivm->context,
-                                               endif_block, "IF");
-       else_block = LLVMInsertBasicBlockInContext(gallivm->context,
-                                               endif_block, "ELSE");
-       LLVMBuildCondBr(gallivm->builder, cond, if_block, else_block);
-       LLVMPositionBuilderAtEnd(gallivm->builder, if_block);
-
-       if (++ctx->branch_depth > ctx->branch_depth_max) {
-               unsigned new_max = ctx->branch_depth_max << 1;
+       struct radeon_llvm_flow *flow = push_flow(ctx);
+       LLVMBasicBlockRef if_block;
 
-               if (!new_max)
-                       new_max = RADEON_LLVM_INITIAL_CF_DEPTH;
-
-               ctx->branch = REALLOC(ctx->branch, ctx->branch_depth_max *
-                                     sizeof(ctx->branch[0]),
-                                     new_max * sizeof(ctx->branch[0]));
-               ctx->branch_depth_max = new_max;
-       }
-
-       ctx->branch[ctx->branch_depth - 1].endif_block = endif_block;
-       ctx->branch[ctx->branch_depth - 1].if_block = if_block;
-       ctx->branch[ctx->branch_depth - 1].else_block = else_block;
-       ctx->branch[ctx->branch_depth - 1].has_else = 0;
+       if_block = append_basic_block(ctx, "IF");
+       flow->next_block = append_basic_block(ctx, "ELSE");
+       set_basicblock_name(if_block, "if", bld_base->pc);
+       LLVMBuildCondBr(gallivm->builder, cond, if_block, flow->next_block);
+       LLVMPositionBuilderAtEnd(gallivm->builder, if_block);
 }
 
 static void if_emit(const struct lp_build_tgsi_action *action,
@@ -1158,12 +1197,18 @@ static void emit_icmp(const struct lp_build_tgsi_action *action,
        LLVMContextRef context = bld_base->base.gallivm->context;
 
        switch (emit_data->inst->Instruction.Opcode) {
-       case TGSI_OPCODE_USEQ: pred = LLVMIntEQ; break;
-       case TGSI_OPCODE_USNE: pred = LLVMIntNE; break;
-       case TGSI_OPCODE_USGE: pred = LLVMIntUGE; break;
-       case TGSI_OPCODE_USLT: pred = LLVMIntULT; break;
-       case TGSI_OPCODE_ISGE: pred = LLVMIntSGE; break;
-       case TGSI_OPCODE_ISLT: pred = LLVMIntSLT; break;
+       case TGSI_OPCODE_USEQ:
+       case TGSI_OPCODE_U64SEQ: pred = LLVMIntEQ; break;
+       case TGSI_OPCODE_USNE:
+       case TGSI_OPCODE_U64SNE: pred = LLVMIntNE; break;
+       case TGSI_OPCODE_USGE:
+       case TGSI_OPCODE_U64SGE: pred = LLVMIntUGE; break;
+       case TGSI_OPCODE_USLT:
+       case TGSI_OPCODE_U64SLT: pred = LLVMIntULT; break;
+       case TGSI_OPCODE_ISGE:
+       case TGSI_OPCODE_I64SGE: pred = LLVMIntSGE; break;
+       case TGSI_OPCODE_ISLT:
+       case TGSI_OPCODE_I64SLT: pred = LLVMIntSLT; break;
        default:
                assert(!"unknown instruction");
                pred = 0;
@@ -1419,7 +1464,12 @@ static void emit_ssg(const struct lp_build_tgsi_action *action,
 
        LLVMValueRef cmp, val;
 
-       if (emit_data->inst->Instruction.Opcode == TGSI_OPCODE_ISSG) {
+       if (emit_data->inst->Instruction.Opcode == TGSI_OPCODE_I64SSG) {
+               cmp = LLVMBuildICmp(builder, LLVMIntSGT, emit_data->args[0], bld_base->int64_bld.zero, "");
+               val = LLVMBuildSelect(builder, cmp, bld_base->int64_bld.one, emit_data->args[0], "");
+               cmp = LLVMBuildICmp(builder, LLVMIntSGE, val, bld_base->int64_bld.zero, "");
+               val = LLVMBuildSelect(builder, cmp, val, LLVMConstInt(bld_base->int64_bld.elem_type, -1, true), "");
+       } else if (emit_data->inst->Instruction.Opcode == TGSI_OPCODE_ISSG) {
                cmp = LLVMBuildICmp(builder, LLVMIntSGT, emit_data->args[0], bld_base->int_bld.zero, "");
                val = LLVMBuildSelect(builder, cmp, bld_base->int_bld.one, emit_data->args[0], "");
                cmp = LLVMBuildICmp(builder, LLVMIntSGE, val, bld_base->int_bld.zero, "");
@@ -1586,7 +1636,7 @@ static void emit_lsb(const struct lp_build_tgsi_action *action,
                 *
                 * The hardware already implements the correct behavior.
                 */
-               lp_build_const_int32(gallivm, 1)
+               LLVMConstInt(LLVMInt1TypeInContext(gallivm->context), 1, 0)
        };
 
        emit_data->output[emit_data->chan] =
@@ -1605,7 +1655,7 @@ static void emit_umsb(const struct lp_build_tgsi_action *action,
        LLVMValueRef args[2] = {
                emit_data->args[0],
                /* Don't generate code for handling zero: */
-               lp_build_const_int32(gallivm, 1)
+               LLVMConstInt(LLVMInt1TypeInContext(gallivm->context), 1, 0)
        };
 
        LLVMValueRef msb =
@@ -1683,15 +1733,19 @@ static void emit_minmax_int(const struct lp_build_tgsi_action *action,
        default:
                assert(0);
        case TGSI_OPCODE_IMAX:
+       case TGSI_OPCODE_I64MAX:
                op = LLVMIntSGT;
                break;
        case TGSI_OPCODE_IMIN:
+       case TGSI_OPCODE_I64MIN:
                op = LLVMIntSLT;
                break;
        case TGSI_OPCODE_UMAX:
+       case TGSI_OPCODE_U64MAX:
                op = LLVMIntUGT;
                break;
        case TGSI_OPCODE_UMIN:
+       case TGSI_OPCODE_U64MIN:
                op = LLVMIntULT;
                break;
        }
@@ -1854,6 +1908,18 @@ void radeon_llvm_context_init(struct radeon_llvm_context *ctx, const char *tripl
                dbl_type.width *= 2;
                lp_build_context_init(&ctx->soa.bld_base.dbl_bld, &ctx->gallivm, dbl_type);
        }
+       {
+               struct lp_type dtype;
+               dtype = lp_uint_type(type);
+               dtype.width *= 2;
+               lp_build_context_init(&ctx->soa.bld_base.uint64_bld, &ctx->gallivm, dtype);
+       }
+       {
+               struct lp_type dtype;
+               dtype = lp_int_type(type);
+               dtype.width *= 2;
+               lp_build_context_init(&ctx->soa.bld_base.int64_bld, &ctx->gallivm, dtype);
+       }
 
        bld_base->soa = 1;
        bld_base->emit_store = radeon_llvm_emit_store;
@@ -1998,6 +2064,31 @@ void radeon_llvm_context_init(struct radeon_llvm_context *ctx, const char *tripl
        bld_base->op_actions[TGSI_OPCODE_UCMP].emit = emit_ucmp;
        bld_base->op_actions[TGSI_OPCODE_UP2H].fetch_args = up2h_fetch_args;
        bld_base->op_actions[TGSI_OPCODE_UP2H].emit = emit_up2h;
+
+       bld_base->op_actions[TGSI_OPCODE_I64MAX].emit = emit_minmax_int;
+       bld_base->op_actions[TGSI_OPCODE_I64MIN].emit = emit_minmax_int;
+       bld_base->op_actions[TGSI_OPCODE_U64MAX].emit = emit_minmax_int;
+       bld_base->op_actions[TGSI_OPCODE_U64MIN].emit = emit_minmax_int;
+       bld_base->op_actions[TGSI_OPCODE_I64ABS].emit = emit_iabs;
+       bld_base->op_actions[TGSI_OPCODE_I64SSG].emit = emit_ssg;
+       bld_base->op_actions[TGSI_OPCODE_I64NEG].emit = emit_ineg;
+
+       bld_base->op_actions[TGSI_OPCODE_U64SEQ].emit = emit_icmp;
+       bld_base->op_actions[TGSI_OPCODE_U64SNE].emit = emit_icmp;
+       bld_base->op_actions[TGSI_OPCODE_U64SGE].emit = emit_icmp;
+       bld_base->op_actions[TGSI_OPCODE_U64SLT].emit = emit_icmp;
+       bld_base->op_actions[TGSI_OPCODE_I64SGE].emit = emit_icmp;
+       bld_base->op_actions[TGSI_OPCODE_I64SLT].emit = emit_icmp;
+
+       bld_base->op_actions[TGSI_OPCODE_U64ADD].emit = emit_uadd;
+       bld_base->op_actions[TGSI_OPCODE_U64SHL].emit = emit_shl;
+       bld_base->op_actions[TGSI_OPCODE_U64SHR].emit = emit_ushr;
+       bld_base->op_actions[TGSI_OPCODE_I64SHR].emit = emit_ishr;
+
+       bld_base->op_actions[TGSI_OPCODE_U64MOD].emit = emit_umod;
+       bld_base->op_actions[TGSI_OPCODE_I64MOD].emit = emit_mod;
+       bld_base->op_actions[TGSI_OPCODE_U64DIV].emit = emit_udiv;
+       bld_base->op_actions[TGSI_OPCODE_I64DIV].emit = emit_idiv;
 }
 
 void radeon_llvm_create_func(struct radeon_llvm_context *ctx,
@@ -2047,7 +2138,9 @@ void radeon_llvm_finalize_module(struct radeon_llvm_context *ctx)
        LLVMAddInstructionCombiningPass(gallivm->passmgr);
 
        /* Run the pass */
+       LLVMInitializeFunctionPassManager(gallivm->passmgr);
        LLVMRunFunctionPassManager(gallivm->passmgr, ctx->main_fn);
+       LLVMFinalizeFunctionPassManager(gallivm->passmgr);
 
        LLVMDisposeBuilder(gallivm->builder);
        LLVMDisposePassManager(gallivm->passmgr);
@@ -2065,10 +2158,7 @@ void radeon_llvm_dispose(struct radeon_llvm_context *ctx)
        FREE(ctx->temps);
        ctx->temps = NULL;
        ctx->temps_count = 0;
-       FREE(ctx->loop);
-       ctx->loop = NULL;
-       ctx->loop_depth_max = 0;
-       FREE(ctx->branch);
-       ctx->branch = NULL;
-       ctx->branch_depth_max = 0;
+       FREE(ctx->flow);
+       ctx->flow = NULL;
+       ctx->flow_depth_max = 0;
 }