#include "gallivm/lp_bld_debug.h"
#include "gallivm/lp_bld_tgsi.h"
#include "gallivm/lp_bld_printf.h"
-#include "gallivm/lp_bld_init.h"
#include "tgsi/tgsi_exec.h"
/* generates the draw jit function */
static void
draw_llvm_generate(struct draw_llvm *llvm, struct draw_llvm_variant *var);
+static void
+draw_llvm_generate_elts(struct draw_llvm *llvm, struct draw_llvm_variant *var);
static void
init_globals(struct draw_llvm *llvm)
struct draw_llvm *
draw_llvm_create(struct draw_context *draw)
{
- struct draw_llvm *llvm = CALLOC_STRUCT( draw_llvm );
+ struct draw_llvm *llvm;
+#ifdef PIPE_ARCH_X86
util_cpu_detect();
+ /* require SSE2 due to LLVM PR6960. */
+ if (!util_cpu_caps.has_sse2)
+ return NULL;
+#endif
+
+ llvm = CALLOC_STRUCT( draw_llvm );
llvm->draw = draw;
llvm->engine = draw->engine;
/* These are the passes currently listed in llvm-c/Transforms/Scalar.h,
* but there are more on SVN. */
/* TODO: Add more passes */
+ LLVMAddCFGSimplificationPass(llvm->pass);
+ LLVMAddPromoteMemoryToRegisterPass(llvm->pass);
LLVMAddConstantPropagationPass(llvm->pass);
if(util_cpu_caps.has_sse4_1) {
/* FIXME: There is a bug in this pass, whereby the combination of fptosi
*/
LLVMAddInstructionCombiningPass(llvm->pass);
}
- LLVMAddPromoteMemoryToRegisterPass(llvm->pass);
LLVMAddGVNPass(llvm->pass);
- LLVMAddCFGSimplificationPass(llvm->pass);
init_globals(llvm);
#if 0
- LLVMDumpModule(llvm->module);
+ LLVMDumpModule(lp_build_module);
#endif
return llvm;
void
draw_llvm_destroy(struct draw_llvm *llvm)
{
- free(llvm);
+ LLVMDisposePassManager(llvm->pass);
+
+ FREE(llvm);
}
struct draw_llvm_variant *
llvm->vertex_header_ptr_type = create_vertex_header(llvm, num_inputs);
draw_llvm_generate(llvm, variant);
+ draw_llvm_generate_elts(llvm, variant);
return variant;
}
NULL /*pos*/,
inputs,
outputs,
- NULL/*sampler*/);
+ NULL/*sampler*/,
+ &llvm->draw->vs.vertex_shader->info);
}
#if DEBUG_STORE
LLVMValueRef vbuffer_ptr = LLVMBuildGEP(builder, vbuffers_ptr,
&indices, 1, "");
LLVMValueRef vb_stride = draw_jit_vbuffer_stride(builder, vbuf);
+ LLVMValueRef vb_max_index = draw_jit_vbuffer_max_index(builder, vbuf);
LLVMValueRef vb_buffer_offset = draw_jit_vbuffer_offset(builder, vbuf);
- LLVMValueRef stride = LLVMBuildMul(builder,
- vb_stride,
- index, "");
+ LLVMValueRef cond;
+ LLVMValueRef stride;
+
+ cond = LLVMBuildICmp(builder, LLVMIntULE, index, vb_max_index, "");
+
+ index = LLVMBuildSelect(builder, cond, index, vb_max_index, "");
+
+ stride = LLVMBuildMul(builder, vb_stride, index, "");
vbuffer_ptr = LLVMBuildLoad(builder, vbuffer_ptr, "vbuffer");
*/
#ifdef DEBUG
if(LLVMVerifyFunction(variant->function, LLVMPrintMessageAction)) {
- LLVMDumpValue(variant->function);
+ lp_debug_dump_value(variant->function);
assert(0);
}
#endif
LLVMRunFunctionPassManager(llvm->pass, variant->function);
if (0) {
- LLVMDumpValue(variant->function);
+ lp_debug_dump_value(variant->function);
debug_printf("\n");
}
variant->jit_func = (draw_jit_vert_func)LLVMGetPointerToGlobal(llvm->draw->engine, variant->function);
lp_disassemble(variant->jit_func);
}
+
+static void
+draw_llvm_generate_elts(struct draw_llvm *llvm, struct draw_llvm_variant *variant)
+{
+ LLVMTypeRef arg_types[7];
+ LLVMTypeRef func_type;
+ LLVMValueRef context_ptr;
+ LLVMBasicBlockRef block;
+ LLVMBuilderRef builder;
+ LLVMValueRef fetch_elts, fetch_count, stride, step, io_itr;
+ LLVMValueRef io_ptr, vbuffers_ptr, vb_ptr;
+ struct draw_context *draw = llvm->draw;
+ unsigned i, j;
+ struct lp_build_context bld;
+ struct lp_build_context bld_int;
+ struct lp_build_loop_state lp_loop;
+ struct lp_type vs_type = lp_type_float_vec(32);
+ const int max_vertices = 4;
+ LLVMValueRef outputs[PIPE_MAX_SHADER_OUTPUTS][NUM_CHANNELS];
+ LLVMValueRef fetch_max;
+
+ arg_types[0] = llvm->context_ptr_type; /* context */
+ arg_types[1] = llvm->vertex_header_ptr_type; /* vertex_header */
+ arg_types[2] = llvm->buffer_ptr_type; /* vbuffers */
+ arg_types[3] = LLVMPointerType(LLVMInt32Type(), 0); /* fetch_elts * */
+ arg_types[4] = LLVMInt32Type(); /* fetch_count */
+ arg_types[5] = LLVMInt32Type(); /* stride */
+ arg_types[6] = llvm->vb_ptr_type; /* pipe_vertex_buffer's */
+
+ func_type = LLVMFunctionType(LLVMVoidType(), arg_types, Elements(arg_types), 0);
+
+ variant->function_elts = LLVMAddFunction(llvm->module, "draw_llvm_shader_elts", func_type);
+ LLVMSetFunctionCallConv(variant->function_elts, LLVMCCallConv);
+ for(i = 0; i < Elements(arg_types); ++i)
+ if(LLVMGetTypeKind(arg_types[i]) == LLVMPointerTypeKind)
+ LLVMAddAttribute(LLVMGetParam(variant->function_elts, i), LLVMNoAliasAttribute);
+
+ context_ptr = LLVMGetParam(variant->function_elts, 0);
+ io_ptr = LLVMGetParam(variant->function_elts, 1);
+ vbuffers_ptr = LLVMGetParam(variant->function_elts, 2);
+ fetch_elts = LLVMGetParam(variant->function_elts, 3);
+ fetch_count = LLVMGetParam(variant->function_elts, 4);
+ stride = LLVMGetParam(variant->function_elts, 5);
+ vb_ptr = LLVMGetParam(variant->function_elts, 6);
+
+ lp_build_name(context_ptr, "context");
+ lp_build_name(io_ptr, "io");
+ lp_build_name(vbuffers_ptr, "vbuffers");
+ lp_build_name(fetch_elts, "fetch_elts");
+ lp_build_name(fetch_count, "fetch_count");
+ lp_build_name(stride, "stride");
+ lp_build_name(vb_ptr, "vb");
+
+ /*
+ * Function body
+ */
+
+ block = LLVMAppendBasicBlock(variant->function_elts, "entry");
+ builder = LLVMCreateBuilder();
+ LLVMPositionBuilderAtEnd(builder, block);
+
+ lp_build_context_init(&bld, builder, vs_type);
+ lp_build_context_init(&bld_int, builder, lp_type_int(32));
+
+ step = LLVMConstInt(LLVMInt32Type(), max_vertices, 0);
+
+ fetch_max = LLVMBuildSub(builder, fetch_count,
+ LLVMConstInt(LLVMInt32Type(), 1, 0),
+ "fetch_max");
+
+ lp_build_loop_begin(builder, LLVMConstInt(LLVMInt32Type(), 0, 0), &lp_loop);
+ {
+ LLVMValueRef inputs[PIPE_MAX_SHADER_INPUTS][NUM_CHANNELS];
+ LLVMValueRef aos_attribs[PIPE_MAX_SHADER_INPUTS][NUM_CHANNELS] = { { 0 } };
+ LLVMValueRef io;
+ const LLVMValueRef (*ptr_aos)[NUM_CHANNELS];
+
+ io_itr = lp_loop.counter;
+ io = LLVMBuildGEP(builder, io_ptr, &io_itr, 1, "");
+#if DEBUG_STORE
+ lp_build_printf(builder, " --- io %d = %p, loop counter %d\n",
+ io_itr, io, lp_loop.counter);
+#endif
+ for (i = 0; i < NUM_CHANNELS; ++i) {
+ LLVMValueRef true_index = LLVMBuildAdd(
+ builder,
+ lp_loop.counter,
+ LLVMConstInt(LLVMInt32Type(), i, 0), "");
+ LLVMValueRef fetch_ptr;
+
+ /* make sure we're not out of bounds which can happen
+ * if fetch_count % 4 != 0, because on the last iteration
+ * a few of the 4 vertex fetches will be out of bounds */
+ true_index = lp_build_min(&bld_int, true_index, fetch_max);
+
+ fetch_ptr = LLVMBuildGEP(builder, fetch_elts,
+ &true_index, 1, "");
+ true_index = LLVMBuildLoad(builder, fetch_ptr, "fetch_elt");
+ for (j = 0; j < draw->pt.nr_vertex_elements; ++j) {
+ struct pipe_vertex_element *velem = &draw->pt.vertex_element[j];
+ LLVMValueRef vb_index = LLVMConstInt(LLVMInt32Type(),
+ velem->vertex_buffer_index,
+ 0);
+ LLVMValueRef vb = LLVMBuildGEP(builder, vb_ptr,
+ &vb_index, 1, "");
+ generate_fetch(builder, vbuffers_ptr,
+ &aos_attribs[j][i], velem, vb, true_index);
+ }
+ }
+ convert_to_soa(builder, aos_attribs, inputs,
+ draw->pt.nr_vertex_elements);
+
+ ptr_aos = (const LLVMValueRef (*)[NUM_CHANNELS]) inputs;
+ generate_vs(llvm,
+ builder,
+ outputs,
+ ptr_aos,
+ context_ptr);
+
+ convert_to_aos(builder, io, outputs,
+ draw->vs.vertex_shader->info.num_outputs,
+ max_vertices);
+ }
+ lp_build_loop_end_cond(builder, fetch_count, step, LLVMIntUGE, &lp_loop);
+
+ LLVMBuildRetVoid(builder);
+
+ LLVMDisposeBuilder(builder);
+
+ /*
+ * Translate the LLVM IR into machine code.
+ */
+#ifdef DEBUG
+ if(LLVMVerifyFunction(variant->function_elts, LLVMPrintMessageAction)) {
+ lp_debug_dump_value(variant->function_elts);
+ assert(0);
+ }
+#endif
+
+ LLVMRunFunctionPassManager(llvm->pass, variant->function_elts);
+
+ if (0) {
+ lp_debug_dump_value(variant->function_elts);
+ debug_printf("\n");
+ }
+ variant->jit_func_elts = (draw_jit_vert_func_elts)LLVMGetPointerToGlobal(
+ llvm->draw->engine, variant->function_elts);
+
+ if (0)
+ lp_disassemble(variant->jit_func_elts);
+}
+
void
draw_llvm_make_variant_key(struct draw_llvm *llvm,
struct draw_llvm_variant_key *key)