#include <llvm-c/Core.h>
#include <llvm-c/Transforms/Scalar.h>
+/* Data for if/else/endif and bgnloop/endloop control flow structures.
+ */
+struct radeon_llvm_flow {
+ /* Loop exit or next part of if/else/endif. */
+ LLVMBasicBlockRef next_block;
+ LLVMBasicBlockRef loop_entry_block;
+};
+
LLVMTypeRef tgsi2llvmtype(struct lp_build_tgsi_context *bld_base,
enum tgsi_opcode_type type)
{
case TGSI_TYPE_UNSIGNED:
case TGSI_TYPE_SIGNED:
return LLVMInt32TypeInContext(ctx);
+ case TGSI_TYPE_UNSIGNED64:
+ case TGSI_TYPE_SIGNED64:
+ return LLVMInt64TypeInContext(ctx);
case TGSI_TYPE_DOUBLE:
return LLVMDoubleTypeInContext(ctx);
case TGSI_TYPE_UNTYPED:
return index;
}
-static struct radeon_llvm_loop *get_current_loop(struct radeon_llvm_context *ctx)
+static struct radeon_llvm_flow *
+get_current_flow(struct radeon_llvm_context *ctx)
{
- return ctx->loop_depth > 0 ? ctx->loop + (ctx->loop_depth - 1) : NULL;
+ if (ctx->flow_depth > 0)
+ return &ctx->flow[ctx->flow_depth - 1];
+ return NULL;
}
-static struct radeon_llvm_branch *get_current_branch(struct radeon_llvm_context *ctx)
+static struct radeon_llvm_flow *
+get_innermost_loop(struct radeon_llvm_context *ctx)
{
- return ctx->branch_depth > 0 ?
- ctx->branch + (ctx->branch_depth - 1) : NULL;
+ for (unsigned i = ctx->flow_depth; i > 0; --i) {
+ if (ctx->flow[i - 1].loop_entry_block)
+ return &ctx->flow[i - 1];
+ }
+ return NULL;
+}
+
+static struct radeon_llvm_flow *
+push_flow(struct radeon_llvm_context *ctx)
+{
+ struct radeon_llvm_flow *flow;
+
+ if (ctx->flow_depth >= ctx->flow_depth_max) {
+ unsigned new_max = MAX2(ctx->flow_depth << 1, RADEON_LLVM_INITIAL_CF_DEPTH);
+ ctx->flow = REALLOC(ctx->flow,
+ ctx->flow_depth_max * sizeof(*ctx->flow),
+ new_max * sizeof(*ctx->flow));
+ ctx->flow_depth_max = new_max;
+ }
+
+ flow = &ctx->flow[ctx->flow_depth];
+ ctx->flow_depth++;
+
+ flow->next_block = NULL;
+ flow->loop_entry_block = NULL;
+ return flow;
}
unsigned radeon_llvm_reg_index_soa(unsigned index, unsigned chan)
}
}
- case TGSI_FILE_INPUT:
- result = ctx->inputs[radeon_llvm_reg_index_soa(reg->Register.Index, swizzle)];
+ case TGSI_FILE_INPUT: {
+ unsigned index = reg->Register.Index;
+ LLVMValueRef input[4];
+
+ /* I don't think doing this for vertex shaders is beneficial.
+ * For those, we want to make sure the VMEM loads are executed
+ * only once. Fragment shaders don't care much, because
+ * v_interp instructions are much cheaper than VMEM loads.
+ */
+ if (ctx->soa.bld_base.info->processor == PIPE_SHADER_FRAGMENT)
+ ctx->load_input(ctx, index, &ctx->input_decls[index], input);
+ else
+ memcpy(input, &ctx->inputs[index * 4], sizeof(input));
+
+ result = input[swizzle];
+
if (tgsi_type_is_64bit(type)) {
ptr = result;
- ptr2 = ctx->inputs[radeon_llvm_reg_index_soa(reg->Register.Index, swizzle + 1)];
+ ptr2 = input[swizzle + 1];
return radeon_llvm_emit_fetch_64bit(bld_base, type, ptr, ptr2);
}
break;
+ }
case TGSI_FILE_TEMPORARY:
if (reg->Register.Index >= ctx->temps_count)
* FIXME: We shouldn't need to have the non-alloca
* code path for arrays. LLVM should be smart enough to
* promote allocas into registers when profitable.
+ *
+ * LLVM 3.8 crashes with this.
*/
- if (array_size > 16) {
+ if (HAVE_LLVM >= 0x0309 && array_size > 16) {
array_alloca = LLVMBuildAlloca(builder,
LLVMArrayType(bld_base->base.vec_type,
array_size), "array");
{
unsigned idx;
for (idx = decl->Range.First; idx <= decl->Range.Last; idx++) {
- if (ctx->load_input)
- ctx->load_input(ctx, idx, decl);
+ if (ctx->load_input) {
+ ctx->input_decls[idx] = *decl;
+
+ if (bld_base->info->processor != PIPE_SHADER_FRAGMENT)
+ ctx->load_input(ctx, idx, decl,
+ &ctx->inputs[idx * 4]);
+ }
}
}
break;
}
}
+static void set_basicblock_name(LLVMBasicBlockRef bb, const char *base, int pc)
+{
+ char buf[32];
+ /* Subtract 1 so that the number shown is that of the corresponding
+ * opcode in the TGSI dump, e.g. an if block has the same suffix as
+ * the instruction number of the corresponding TGSI IF.
+ */
+ snprintf(buf, sizeof(buf), "%s%d", base, pc - 1);
+ LLVMSetValueName(LLVMBasicBlockAsValue(bb), buf);
+}
+
+/* Append a basic block at the level of the parent flow.
+ */
+static LLVMBasicBlockRef append_basic_block(struct radeon_llvm_context *ctx,
+ const char *name)
+{
+ struct gallivm_state *gallivm = &ctx->gallivm;
+
+ assert(ctx->flow_depth >= 1);
+
+ if (ctx->flow_depth >= 2) {
+ struct radeon_llvm_flow *flow = &ctx->flow[ctx->flow_depth - 2];
+
+ return LLVMInsertBasicBlockInContext(gallivm->context,
+ flow->next_block, name);
+ }
+
+ return LLVMAppendBasicBlockInContext(gallivm->context, ctx->main_fn, name);
+}
+
+/* Emit a branch to the given default target for the current block if
+ * applicable -- that is, if the current block does not already contain a
+ * branch from a break or continue.
+ */
+static void emit_default_branch(LLVMBuilderRef builder, LLVMBasicBlockRef target)
+{
+ if (!LLVMGetBasicBlockTerminator(LLVMGetInsertBlock(builder)))
+ LLVMBuildBr(builder, target);
+}
+
static void bgnloop_emit(const struct lp_build_tgsi_action *action,
struct lp_build_tgsi_context *bld_base,
struct lp_build_emit_data *emit_data)
{
struct radeon_llvm_context *ctx = radeon_llvm_context(bld_base);
struct gallivm_state *gallivm = bld_base->base.gallivm;
- LLVMBasicBlockRef loop_block;
- LLVMBasicBlockRef endloop_block;
- endloop_block = LLVMAppendBasicBlockInContext(gallivm->context,
- ctx->main_fn, "ENDLOOP");
- loop_block = LLVMInsertBasicBlockInContext(gallivm->context,
- endloop_block, "LOOP");
- LLVMBuildBr(gallivm->builder, loop_block);
- LLVMPositionBuilderAtEnd(gallivm->builder, loop_block);
-
- if (++ctx->loop_depth > ctx->loop_depth_max) {
- unsigned new_max = ctx->loop_depth_max << 1;
-
- if (!new_max)
- new_max = RADEON_LLVM_INITIAL_CF_DEPTH;
-
- ctx->loop = REALLOC(ctx->loop, ctx->loop_depth_max *
- sizeof(ctx->loop[0]),
- new_max * sizeof(ctx->loop[0]));
- ctx->loop_depth_max = new_max;
- }
-
- ctx->loop[ctx->loop_depth - 1].loop_block = loop_block;
- ctx->loop[ctx->loop_depth - 1].endloop_block = endloop_block;
+ struct radeon_llvm_flow *flow = push_flow(ctx);
+ flow->loop_entry_block = append_basic_block(ctx, "LOOP");
+ flow->next_block = append_basic_block(ctx, "ENDLOOP");
+ set_basicblock_name(flow->loop_entry_block, "loop", bld_base->pc);
+ LLVMBuildBr(gallivm->builder, flow->loop_entry_block);
+ LLVMPositionBuilderAtEnd(gallivm->builder, flow->loop_entry_block);
}
static void brk_emit(const struct lp_build_tgsi_action *action,
{
struct radeon_llvm_context *ctx = radeon_llvm_context(bld_base);
struct gallivm_state *gallivm = bld_base->base.gallivm;
- struct radeon_llvm_loop *current_loop = get_current_loop(ctx);
+ struct radeon_llvm_flow *flow = get_innermost_loop(ctx);
- LLVMBuildBr(gallivm->builder, current_loop->endloop_block);
+ LLVMBuildBr(gallivm->builder, flow->next_block);
}
static void cont_emit(const struct lp_build_tgsi_action *action,
{
struct radeon_llvm_context *ctx = radeon_llvm_context(bld_base);
struct gallivm_state *gallivm = bld_base->base.gallivm;
- struct radeon_llvm_loop *current_loop = get_current_loop(ctx);
+ struct radeon_llvm_flow *flow = get_innermost_loop(ctx);
- LLVMBuildBr(gallivm->builder, current_loop->loop_block);
+ LLVMBuildBr(gallivm->builder, flow->loop_entry_block);
}
static void else_emit(const struct lp_build_tgsi_action *action,
{
struct radeon_llvm_context *ctx = radeon_llvm_context(bld_base);
struct gallivm_state *gallivm = bld_base->base.gallivm;
- struct radeon_llvm_branch *current_branch = get_current_branch(ctx);
- LLVMBasicBlockRef current_block = LLVMGetInsertBlock(gallivm->builder);
-
- /* We need to add a terminator to the current block if the previous
- * instruction was an ENDIF.Example:
- * IF
- * [code]
- * IF
- * [code]
- * ELSE
- * [code]
- * ENDIF <--
- * ELSE<--
- * [code]
- * ENDIF
- */
+ struct radeon_llvm_flow *current_branch = get_current_flow(ctx);
+ LLVMBasicBlockRef endif_block;
- if (current_block != current_branch->if_block) {
- LLVMBuildBr(gallivm->builder, current_branch->endif_block);
- }
- if (!LLVMGetBasicBlockTerminator(current_branch->if_block)) {
- LLVMBuildBr(gallivm->builder, current_branch->endif_block);
- }
- current_branch->has_else = 1;
- LLVMPositionBuilderAtEnd(gallivm->builder, current_branch->else_block);
+ assert(!current_branch->loop_entry_block);
+
+ endif_block = append_basic_block(ctx, "ENDIF");
+ emit_default_branch(gallivm->builder, endif_block);
+
+ LLVMPositionBuilderAtEnd(gallivm->builder, current_branch->next_block);
+ set_basicblock_name(current_branch->next_block, "else", bld_base->pc);
+
+ current_branch->next_block = endif_block;
}
static void endif_emit(const struct lp_build_tgsi_action *action,
{
struct radeon_llvm_context *ctx = radeon_llvm_context(bld_base);
struct gallivm_state *gallivm = bld_base->base.gallivm;
- struct radeon_llvm_branch *current_branch = get_current_branch(ctx);
- LLVMBasicBlockRef current_block = LLVMGetInsertBlock(gallivm->builder);
+ struct radeon_llvm_flow *current_branch = get_current_flow(ctx);
- /* If we have consecutive ENDIF instructions, then the first ENDIF
- * will not have a terminator, so we need to add one. */
- if (current_block != current_branch->if_block
- && current_block != current_branch->else_block
- && !LLVMGetBasicBlockTerminator(current_block)) {
+ assert(!current_branch->loop_entry_block);
- LLVMBuildBr(gallivm->builder, current_branch->endif_block);
- }
- if (!LLVMGetBasicBlockTerminator(current_branch->else_block)) {
- LLVMPositionBuilderAtEnd(gallivm->builder, current_branch->else_block);
- LLVMBuildBr(gallivm->builder, current_branch->endif_block);
- }
-
- if (!LLVMGetBasicBlockTerminator(current_branch->if_block)) {
- LLVMPositionBuilderAtEnd(gallivm->builder, current_branch->if_block);
- LLVMBuildBr(gallivm->builder, current_branch->endif_block);
- }
+ emit_default_branch(gallivm->builder, current_branch->next_block);
+ LLVMPositionBuilderAtEnd(gallivm->builder, current_branch->next_block);
+ set_basicblock_name(current_branch->next_block, "endif", bld_base->pc);
- LLVMPositionBuilderAtEnd(gallivm->builder, current_branch->endif_block);
- ctx->branch_depth--;
+ ctx->flow_depth--;
}
static void endloop_emit(const struct lp_build_tgsi_action *action,
{
struct radeon_llvm_context *ctx = radeon_llvm_context(bld_base);
struct gallivm_state *gallivm = bld_base->base.gallivm;
- struct radeon_llvm_loop *current_loop = get_current_loop(ctx);
+ struct radeon_llvm_flow *current_loop = get_current_flow(ctx);
- if (!LLVMGetBasicBlockTerminator(LLVMGetInsertBlock(gallivm->builder))) {
- LLVMBuildBr(gallivm->builder, current_loop->loop_block);
- }
+ assert(current_loop->loop_entry_block);
+
+ emit_default_branch(gallivm->builder, current_loop->loop_entry_block);
- LLVMPositionBuilderAtEnd(gallivm->builder, current_loop->endloop_block);
- ctx->loop_depth--;
+ LLVMPositionBuilderAtEnd(gallivm->builder, current_loop->next_block);
+ set_basicblock_name(current_loop->next_block, "endloop", bld_base->pc);
+ ctx->flow_depth--;
}
static void if_cond_emit(const struct lp_build_tgsi_action *action,
{
struct radeon_llvm_context *ctx = radeon_llvm_context(bld_base);
struct gallivm_state *gallivm = bld_base->base.gallivm;
- LLVMBasicBlockRef if_block, else_block, endif_block;
-
- endif_block = LLVMAppendBasicBlockInContext(gallivm->context,
- ctx->main_fn, "ENDIF");
- if_block = LLVMInsertBasicBlockInContext(gallivm->context,
- endif_block, "IF");
- else_block = LLVMInsertBasicBlockInContext(gallivm->context,
- endif_block, "ELSE");
- LLVMBuildCondBr(gallivm->builder, cond, if_block, else_block);
- LLVMPositionBuilderAtEnd(gallivm->builder, if_block);
-
- if (++ctx->branch_depth > ctx->branch_depth_max) {
- unsigned new_max = ctx->branch_depth_max << 1;
+ struct radeon_llvm_flow *flow = push_flow(ctx);
+ LLVMBasicBlockRef if_block;
- if (!new_max)
- new_max = RADEON_LLVM_INITIAL_CF_DEPTH;
-
- ctx->branch = REALLOC(ctx->branch, ctx->branch_depth_max *
- sizeof(ctx->branch[0]),
- new_max * sizeof(ctx->branch[0]));
- ctx->branch_depth_max = new_max;
- }
-
- ctx->branch[ctx->branch_depth - 1].endif_block = endif_block;
- ctx->branch[ctx->branch_depth - 1].if_block = if_block;
- ctx->branch[ctx->branch_depth - 1].else_block = else_block;
- ctx->branch[ctx->branch_depth - 1].has_else = 0;
+ if_block = append_basic_block(ctx, "IF");
+ flow->next_block = append_basic_block(ctx, "ELSE");
+ set_basicblock_name(if_block, "if", bld_base->pc);
+ LLVMBuildCondBr(gallivm->builder, cond, if_block, flow->next_block);
+ LLVMPositionBuilderAtEnd(gallivm->builder, if_block);
}
static void if_emit(const struct lp_build_tgsi_action *action,
LLVMContextRef context = bld_base->base.gallivm->context;
switch (emit_data->inst->Instruction.Opcode) {
- case TGSI_OPCODE_USEQ: pred = LLVMIntEQ; break;
- case TGSI_OPCODE_USNE: pred = LLVMIntNE; break;
- case TGSI_OPCODE_USGE: pred = LLVMIntUGE; break;
- case TGSI_OPCODE_USLT: pred = LLVMIntULT; break;
- case TGSI_OPCODE_ISGE: pred = LLVMIntSGE; break;
- case TGSI_OPCODE_ISLT: pred = LLVMIntSLT; break;
+ case TGSI_OPCODE_USEQ:
+ case TGSI_OPCODE_U64SEQ: pred = LLVMIntEQ; break;
+ case TGSI_OPCODE_USNE:
+ case TGSI_OPCODE_U64SNE: pred = LLVMIntNE; break;
+ case TGSI_OPCODE_USGE:
+ case TGSI_OPCODE_U64SGE: pred = LLVMIntUGE; break;
+ case TGSI_OPCODE_USLT:
+ case TGSI_OPCODE_U64SLT: pred = LLVMIntULT; break;
+ case TGSI_OPCODE_ISGE:
+ case TGSI_OPCODE_I64SGE: pred = LLVMIntSGE; break;
+ case TGSI_OPCODE_ISLT:
+ case TGSI_OPCODE_I64SLT: pred = LLVMIntSLT; break;
default:
assert(!"unknown instruction");
pred = 0;
LLVMValueRef cmp, val;
- if (emit_data->inst->Instruction.Opcode == TGSI_OPCODE_ISSG) {
+ if (emit_data->inst->Instruction.Opcode == TGSI_OPCODE_I64SSG) {
+ cmp = LLVMBuildICmp(builder, LLVMIntSGT, emit_data->args[0], bld_base->int64_bld.zero, "");
+ val = LLVMBuildSelect(builder, cmp, bld_base->int64_bld.one, emit_data->args[0], "");
+ cmp = LLVMBuildICmp(builder, LLVMIntSGE, val, bld_base->int64_bld.zero, "");
+ val = LLVMBuildSelect(builder, cmp, val, LLVMConstInt(bld_base->int64_bld.elem_type, -1, true), "");
+ } else if (emit_data->inst->Instruction.Opcode == TGSI_OPCODE_ISSG) {
cmp = LLVMBuildICmp(builder, LLVMIntSGT, emit_data->args[0], bld_base->int_bld.zero, "");
val = LLVMBuildSelect(builder, cmp, bld_base->int_bld.one, emit_data->args[0], "");
cmp = LLVMBuildICmp(builder, LLVMIntSGE, val, bld_base->int_bld.zero, "");
*
* The hardware already implements the correct behavior.
*/
- lp_build_const_int32(gallivm, 1)
+ LLVMConstInt(LLVMInt1TypeInContext(gallivm->context), 1, 0)
};
emit_data->output[emit_data->chan] =
LLVMValueRef args[2] = {
emit_data->args[0],
/* Don't generate code for handling zero: */
- lp_build_const_int32(gallivm, 1)
+ LLVMConstInt(LLVMInt1TypeInContext(gallivm->context), 1, 0)
};
LLVMValueRef msb =
default:
assert(0);
case TGSI_OPCODE_IMAX:
+ case TGSI_OPCODE_I64MAX:
op = LLVMIntSGT;
break;
case TGSI_OPCODE_IMIN:
+ case TGSI_OPCODE_I64MIN:
op = LLVMIntSLT;
break;
case TGSI_OPCODE_UMAX:
+ case TGSI_OPCODE_U64MAX:
op = LLVMIntUGT;
break;
case TGSI_OPCODE_UMIN:
+ case TGSI_OPCODE_U64MIN:
op = LLVMIntULT;
break;
}
dbl_type.width *= 2;
lp_build_context_init(&ctx->soa.bld_base.dbl_bld, &ctx->gallivm, dbl_type);
}
+ {
+ struct lp_type dtype;
+ dtype = lp_uint_type(type);
+ dtype.width *= 2;
+ lp_build_context_init(&ctx->soa.bld_base.uint64_bld, &ctx->gallivm, dtype);
+ }
+ {
+ struct lp_type dtype;
+ dtype = lp_int_type(type);
+ dtype.width *= 2;
+ lp_build_context_init(&ctx->soa.bld_base.int64_bld, &ctx->gallivm, dtype);
+ }
bld_base->soa = 1;
bld_base->emit_store = radeon_llvm_emit_store;
bld_base->op_actions[TGSI_OPCODE_UCMP].emit = emit_ucmp;
bld_base->op_actions[TGSI_OPCODE_UP2H].fetch_args = up2h_fetch_args;
bld_base->op_actions[TGSI_OPCODE_UP2H].emit = emit_up2h;
+
+ bld_base->op_actions[TGSI_OPCODE_I64MAX].emit = emit_minmax_int;
+ bld_base->op_actions[TGSI_OPCODE_I64MIN].emit = emit_minmax_int;
+ bld_base->op_actions[TGSI_OPCODE_U64MAX].emit = emit_minmax_int;
+ bld_base->op_actions[TGSI_OPCODE_U64MIN].emit = emit_minmax_int;
+ bld_base->op_actions[TGSI_OPCODE_I64ABS].emit = emit_iabs;
+ bld_base->op_actions[TGSI_OPCODE_I64SSG].emit = emit_ssg;
+ bld_base->op_actions[TGSI_OPCODE_I64NEG].emit = emit_ineg;
+
+ bld_base->op_actions[TGSI_OPCODE_U64SEQ].emit = emit_icmp;
+ bld_base->op_actions[TGSI_OPCODE_U64SNE].emit = emit_icmp;
+ bld_base->op_actions[TGSI_OPCODE_U64SGE].emit = emit_icmp;
+ bld_base->op_actions[TGSI_OPCODE_U64SLT].emit = emit_icmp;
+ bld_base->op_actions[TGSI_OPCODE_I64SGE].emit = emit_icmp;
+ bld_base->op_actions[TGSI_OPCODE_I64SLT].emit = emit_icmp;
+
+ bld_base->op_actions[TGSI_OPCODE_U64ADD].emit = emit_uadd;
+ bld_base->op_actions[TGSI_OPCODE_U64SHL].emit = emit_shl;
+ bld_base->op_actions[TGSI_OPCODE_U64SHR].emit = emit_ushr;
+ bld_base->op_actions[TGSI_OPCODE_I64SHR].emit = emit_ishr;
+
+ bld_base->op_actions[TGSI_OPCODE_U64MOD].emit = emit_umod;
+ bld_base->op_actions[TGSI_OPCODE_I64MOD].emit = emit_mod;
+ bld_base->op_actions[TGSI_OPCODE_U64DIV].emit = emit_udiv;
+ bld_base->op_actions[TGSI_OPCODE_I64DIV].emit = emit_idiv;
}
void radeon_llvm_create_func(struct radeon_llvm_context *ctx,
LLVMAddInstructionCombiningPass(gallivm->passmgr);
/* Run the pass */
+ LLVMInitializeFunctionPassManager(gallivm->passmgr);
LLVMRunFunctionPassManager(gallivm->passmgr, ctx->main_fn);
+ LLVMFinalizeFunctionPassManager(gallivm->passmgr);
LLVMDisposeBuilder(gallivm->builder);
LLVMDisposePassManager(gallivm->passmgr);
FREE(ctx->temps);
ctx->temps = NULL;
ctx->temps_count = 0;
- FREE(ctx->loop);
- ctx->loop = NULL;
- ctx->loop_depth_max = 0;
- FREE(ctx->branch);
- ctx->branch = NULL;
- ctx->branch_depth_max = 0;
+ FREE(ctx->flow);
+ ctx->flow = NULL;
+ ctx->flow_depth_max = 0;
}