struct draw_llvm *
draw_llvm_create(struct draw_context *draw)
{
- struct draw_llvm *llvm = CALLOC_STRUCT( draw_llvm );
+ struct draw_llvm *llvm;
+#ifdef PIPE_ARCH_X86
util_cpu_detect();
+ /* require SSE2 due to LLVM PR6960. */
+ if (!util_cpu_caps.has_sse2)
+ return NULL;
+#endif
+
+ llvm = CALLOC_STRUCT( draw_llvm );
llvm->draw = draw;
llvm->engine = draw->engine;
/* These are the passes currently listed in llvm-c/Transforms/Scalar.h,
* but there are more on SVN. */
/* TODO: Add more passes */
+ LLVMAddCFGSimplificationPass(llvm->pass);
+ LLVMAddPromoteMemoryToRegisterPass(llvm->pass);
LLVMAddConstantPropagationPass(llvm->pass);
if(util_cpu_caps.has_sse4_1) {
/* FIXME: There is a bug in this pass, whereby the combination of fptosi
*/
LLVMAddInstructionCombiningPass(llvm->pass);
}
- LLVMAddPromoteMemoryToRegisterPass(llvm->pass);
LLVMAddGVNPass(llvm->pass);
- LLVMAddCFGSimplificationPass(llvm->pass);
init_globals(llvm);
#if 0
- LLVMDumpModule(llvm->module);
+ LLVMDumpModule(lp_build_module);
#endif
return llvm;
void
draw_llvm_destroy(struct draw_llvm *llvm)
{
+ LLVMDisposePassManager(llvm->pass);
+
FREE(llvm);
}
*/
#ifdef DEBUG
if(LLVMVerifyFunction(variant->function, LLVMPrintMessageAction)) {
- LLVMDumpValue(variant->function);
+ lp_debug_dump_value(variant->function);
assert(0);
}
#endif
LLVMRunFunctionPassManager(llvm->pass, variant->function);
if (0) {
- LLVMDumpValue(variant->function);
+ lp_debug_dump_value(variant->function);
debug_printf("\n");
}
variant->jit_func = (draw_jit_vert_func)LLVMGetPointerToGlobal(llvm->draw->engine, variant->function);
struct draw_context *draw = llvm->draw;
unsigned i, j;
struct lp_build_context bld;
+ struct lp_build_context bld_int;
struct lp_build_loop_state lp_loop;
struct lp_type vs_type = lp_type_float_vec(32);
const int max_vertices = 4;
LLVMValueRef outputs[PIPE_MAX_SHADER_OUTPUTS][NUM_CHANNELS];
+ LLVMValueRef fetch_max;
arg_types[0] = llvm->context_ptr_type; /* context */
arg_types[1] = llvm->vertex_header_ptr_type; /* vertex_header */
LLVMPositionBuilderAtEnd(builder, block);
lp_build_context_init(&bld, builder, vs_type);
+ lp_build_context_init(&bld_int, builder, lp_type_int(32));
step = LLVMConstInt(LLVMInt32Type(), max_vertices, 0);
+ fetch_max = LLVMBuildSub(builder, fetch_count,
+ LLVMConstInt(LLVMInt32Type(), 1, 0),
+ "fetch_max");
+
lp_build_loop_begin(builder, LLVMConstInt(LLVMInt32Type(), 0, 0), &lp_loop);
{
LLVMValueRef inputs[PIPE_MAX_SHADER_INPUTS][NUM_CHANNELS];
builder,
lp_loop.counter,
LLVMConstInt(LLVMInt32Type(), i, 0), "");
- LLVMValueRef fetch_ptr = LLVMBuildGEP(builder, fetch_elts,
- &true_index, 1, "");
+ LLVMValueRef fetch_ptr;
+
+ /* make sure we're not out of bounds which can happen
+ * if fetch_count % 4 != 0, because on the last iteration
+ * a few of the 4 vertex fetches will be out of bounds */
+ true_index = lp_build_min(&bld_int, true_index, fetch_max);
+
+ fetch_ptr = LLVMBuildGEP(builder, fetch_elts,
+ &true_index, 1, "");
true_index = LLVMBuildLoad(builder, fetch_ptr, "fetch_elt");
for (j = 0; j < draw->pt.nr_vertex_elements; ++j) {
struct pipe_vertex_element *velem = &draw->pt.vertex_element[j];
*/
#ifdef DEBUG
if(LLVMVerifyFunction(variant->function_elts, LLVMPrintMessageAction)) {
- LLVMDumpValue(variant->function_elts);
+ lp_debug_dump_value(variant->function_elts);
assert(0);
}
#endif
LLVMRunFunctionPassManager(llvm->pass, variant->function_elts);
if (0) {
- LLVMDumpValue(variant->function_elts);
+ lp_debug_dump_value(variant->function_elts);
debug_printf("\n");
}
variant->jit_func_elts = (draw_jit_vert_func_elts)LLVMGetPointerToGlobal(