X-Git-Url: https://git.libre-soc.org/?a=blobdiff_plain;f=src%2Fgallium%2Fauxiliary%2Fdraw%2Fdraw_llvm.c;h=cfafa183c81e16c25a67b0dad68b94ceaa051fc3;hb=8ad3e0b55df50beac8ba3c5cafa0be79641a4977;hp=e3ef9e425fa6bc21fddf1fdb516acc02fdab40a1;hpb=35489ef285f1fde234b2b9bbb91fdc41fddefc02;p=mesa.git diff --git a/src/gallium/auxiliary/draw/draw_llvm.c b/src/gallium/auxiliary/draw/draw_llvm.c index e3ef9e425fa..cfafa183c81 100644 --- a/src/gallium/auxiliary/draw/draw_llvm.c +++ b/src/gallium/auxiliary/draw/draw_llvm.c @@ -10,7 +10,6 @@ #include "gallivm/lp_bld_debug.h" #include "gallivm/lp_bld_tgsi.h" #include "gallivm/lp_bld_printf.h" -#include "gallivm/lp_bld_init.h" #include "tgsi/tgsi_exec.h" @@ -25,6 +24,8 @@ /* generates the draw jit function */ static void draw_llvm_generate(struct draw_llvm *llvm, struct draw_llvm_variant *var); +static void +draw_llvm_generate_elts(struct draw_llvm *llvm, struct draw_llvm_variant *var); static void init_globals(struct draw_llvm *llvm) @@ -160,9 +161,16 @@ create_vertex_header(struct draw_llvm *llvm, int data_elems) struct draw_llvm * draw_llvm_create(struct draw_context *draw) { - struct draw_llvm *llvm = CALLOC_STRUCT( draw_llvm ); + struct draw_llvm *llvm; +#ifdef PIPE_ARCH_X86 util_cpu_detect(); + /* require SSE2 due to LLVM PR6960. */ + if (!util_cpu_caps.has_sse2) + return NULL; +#endif + + llvm = CALLOC_STRUCT( draw_llvm ); llvm->draw = draw; llvm->engine = draw->engine; @@ -181,6 +189,8 @@ draw_llvm_create(struct draw_context *draw) /* These are the passes currently listed in llvm-c/Transforms/Scalar.h, * but there are more on SVN. */ /* TODO: Add more passes */ + LLVMAddCFGSimplificationPass(llvm->pass); + LLVMAddPromoteMemoryToRegisterPass(llvm->pass); LLVMAddConstantPropagationPass(llvm->pass); if(util_cpu_caps.has_sse4_1) { /* FIXME: There is a bug in this pass, whereby the combination of fptosi @@ -189,15 +199,13 @@ draw_llvm_create(struct draw_context *draw) */ LLVMAddInstructionCombiningPass(llvm->pass); } - LLVMAddPromoteMemoryToRegisterPass(llvm->pass); LLVMAddGVNPass(llvm->pass); - LLVMAddCFGSimplificationPass(llvm->pass); init_globals(llvm); #if 0 - LLVMDumpModule(llvm->module); + LLVMDumpModule(lp_build_module); #endif return llvm; @@ -206,7 +214,9 @@ draw_llvm_create(struct draw_context *draw) void draw_llvm_destroy(struct draw_llvm *llvm) { - free(llvm); + LLVMDisposePassManager(llvm->pass); + + FREE(llvm); } struct draw_llvm_variant * @@ -219,6 +229,7 @@ draw_llvm_prepare(struct draw_llvm *llvm, int num_inputs) llvm->vertex_header_ptr_type = create_vertex_header(llvm, num_inputs); draw_llvm_generate(llvm, variant); + draw_llvm_generate_elts(llvm, variant); return variant; } @@ -253,7 +264,8 @@ generate_vs(struct draw_llvm *llvm, NULL /*pos*/, inputs, outputs, - NULL/*sampler*/); + NULL/*sampler*/, + &llvm->draw->vs.vertex_shader->info); } #if DEBUG_STORE @@ -286,10 +298,16 @@ generate_fetch(LLVMBuilderRef builder, LLVMValueRef vbuffer_ptr = LLVMBuildGEP(builder, vbuffers_ptr, &indices, 1, ""); LLVMValueRef vb_stride = draw_jit_vbuffer_stride(builder, vbuf); + LLVMValueRef vb_max_index = draw_jit_vbuffer_max_index(builder, vbuf); LLVMValueRef vb_buffer_offset = draw_jit_vbuffer_offset(builder, vbuf); - LLVMValueRef stride = LLVMBuildMul(builder, - vb_stride, - index, ""); + LLVMValueRef cond; + LLVMValueRef stride; + + cond = LLVMBuildICmp(builder, LLVMIntULE, index, vb_max_index, ""); + + index = LLVMBuildSelect(builder, cond, index, vb_max_index, ""); + + stride = LLVMBuildMul(builder, vb_stride, index, ""); vbuffer_ptr = LLVMBuildLoad(builder, vbuffer_ptr, "vbuffer"); @@ -673,7 +691,7 @@ draw_llvm_generate(struct draw_llvm *llvm, struct draw_llvm_variant *variant) */ #ifdef DEBUG if(LLVMVerifyFunction(variant->function, LLVMPrintMessageAction)) { - LLVMDumpValue(variant->function); + lp_debug_dump_value(variant->function); assert(0); } #endif @@ -681,7 +699,7 @@ draw_llvm_generate(struct draw_llvm *llvm, struct draw_llvm_variant *variant) LLVMRunFunctionPassManager(llvm->pass, variant->function); if (0) { - LLVMDumpValue(variant->function); + lp_debug_dump_value(variant->function); debug_printf("\n"); } variant->jit_func = (draw_jit_vert_func)LLVMGetPointerToGlobal(llvm->draw->engine, variant->function); @@ -690,6 +708,158 @@ draw_llvm_generate(struct draw_llvm *llvm, struct draw_llvm_variant *variant) lp_disassemble(variant->jit_func); } + +static void +draw_llvm_generate_elts(struct draw_llvm *llvm, struct draw_llvm_variant *variant) +{ + LLVMTypeRef arg_types[7]; + LLVMTypeRef func_type; + LLVMValueRef context_ptr; + LLVMBasicBlockRef block; + LLVMBuilderRef builder; + LLVMValueRef fetch_elts, fetch_count, stride, step, io_itr; + LLVMValueRef io_ptr, vbuffers_ptr, vb_ptr; + struct draw_context *draw = llvm->draw; + unsigned i, j; + struct lp_build_context bld; + struct lp_build_context bld_int; + struct lp_build_loop_state lp_loop; + struct lp_type vs_type = lp_type_float_vec(32); + const int max_vertices = 4; + LLVMValueRef outputs[PIPE_MAX_SHADER_OUTPUTS][NUM_CHANNELS]; + LLVMValueRef fetch_max; + + arg_types[0] = llvm->context_ptr_type; /* context */ + arg_types[1] = llvm->vertex_header_ptr_type; /* vertex_header */ + arg_types[2] = llvm->buffer_ptr_type; /* vbuffers */ + arg_types[3] = LLVMPointerType(LLVMInt32Type(), 0); /* fetch_elts * */ + arg_types[4] = LLVMInt32Type(); /* fetch_count */ + arg_types[5] = LLVMInt32Type(); /* stride */ + arg_types[6] = llvm->vb_ptr_type; /* pipe_vertex_buffer's */ + + func_type = LLVMFunctionType(LLVMVoidType(), arg_types, Elements(arg_types), 0); + + variant->function_elts = LLVMAddFunction(llvm->module, "draw_llvm_shader_elts", func_type); + LLVMSetFunctionCallConv(variant->function_elts, LLVMCCallConv); + for(i = 0; i < Elements(arg_types); ++i) + if(LLVMGetTypeKind(arg_types[i]) == LLVMPointerTypeKind) + LLVMAddAttribute(LLVMGetParam(variant->function_elts, i), LLVMNoAliasAttribute); + + context_ptr = LLVMGetParam(variant->function_elts, 0); + io_ptr = LLVMGetParam(variant->function_elts, 1); + vbuffers_ptr = LLVMGetParam(variant->function_elts, 2); + fetch_elts = LLVMGetParam(variant->function_elts, 3); + fetch_count = LLVMGetParam(variant->function_elts, 4); + stride = LLVMGetParam(variant->function_elts, 5); + vb_ptr = LLVMGetParam(variant->function_elts, 6); + + lp_build_name(context_ptr, "context"); + lp_build_name(io_ptr, "io"); + lp_build_name(vbuffers_ptr, "vbuffers"); + lp_build_name(fetch_elts, "fetch_elts"); + lp_build_name(fetch_count, "fetch_count"); + lp_build_name(stride, "stride"); + lp_build_name(vb_ptr, "vb"); + + /* + * Function body + */ + + block = LLVMAppendBasicBlock(variant->function_elts, "entry"); + builder = LLVMCreateBuilder(); + LLVMPositionBuilderAtEnd(builder, block); + + lp_build_context_init(&bld, builder, vs_type); + lp_build_context_init(&bld_int, builder, lp_type_int(32)); + + step = LLVMConstInt(LLVMInt32Type(), max_vertices, 0); + + fetch_max = LLVMBuildSub(builder, fetch_count, + LLVMConstInt(LLVMInt32Type(), 1, 0), + "fetch_max"); + + lp_build_loop_begin(builder, LLVMConstInt(LLVMInt32Type(), 0, 0), &lp_loop); + { + LLVMValueRef inputs[PIPE_MAX_SHADER_INPUTS][NUM_CHANNELS]; + LLVMValueRef aos_attribs[PIPE_MAX_SHADER_INPUTS][NUM_CHANNELS] = { { 0 } }; + LLVMValueRef io; + const LLVMValueRef (*ptr_aos)[NUM_CHANNELS]; + + io_itr = lp_loop.counter; + io = LLVMBuildGEP(builder, io_ptr, &io_itr, 1, ""); +#if DEBUG_STORE + lp_build_printf(builder, " --- io %d = %p, loop counter %d\n", + io_itr, io, lp_loop.counter); +#endif + for (i = 0; i < NUM_CHANNELS; ++i) { + LLVMValueRef true_index = LLVMBuildAdd( + builder, + lp_loop.counter, + LLVMConstInt(LLVMInt32Type(), i, 0), ""); + LLVMValueRef fetch_ptr; + + /* make sure we're not out of bounds which can happen + * if fetch_count % 4 != 0, because on the last iteration + * a few of the 4 vertex fetches will be out of bounds */ + true_index = lp_build_min(&bld_int, true_index, fetch_max); + + fetch_ptr = LLVMBuildGEP(builder, fetch_elts, + &true_index, 1, ""); + true_index = LLVMBuildLoad(builder, fetch_ptr, "fetch_elt"); + for (j = 0; j < draw->pt.nr_vertex_elements; ++j) { + struct pipe_vertex_element *velem = &draw->pt.vertex_element[j]; + LLVMValueRef vb_index = LLVMConstInt(LLVMInt32Type(), + velem->vertex_buffer_index, + 0); + LLVMValueRef vb = LLVMBuildGEP(builder, vb_ptr, + &vb_index, 1, ""); + generate_fetch(builder, vbuffers_ptr, + &aos_attribs[j][i], velem, vb, true_index); + } + } + convert_to_soa(builder, aos_attribs, inputs, + draw->pt.nr_vertex_elements); + + ptr_aos = (const LLVMValueRef (*)[NUM_CHANNELS]) inputs; + generate_vs(llvm, + builder, + outputs, + ptr_aos, + context_ptr); + + convert_to_aos(builder, io, outputs, + draw->vs.vertex_shader->info.num_outputs, + max_vertices); + } + lp_build_loop_end_cond(builder, fetch_count, step, LLVMIntUGE, &lp_loop); + + LLVMBuildRetVoid(builder); + + LLVMDisposeBuilder(builder); + + /* + * Translate the LLVM IR into machine code. + */ +#ifdef DEBUG + if(LLVMVerifyFunction(variant->function_elts, LLVMPrintMessageAction)) { + lp_debug_dump_value(variant->function_elts); + assert(0); + } +#endif + + LLVMRunFunctionPassManager(llvm->pass, variant->function_elts); + + if (0) { + lp_debug_dump_value(variant->function_elts); + debug_printf("\n"); + } + variant->jit_func_elts = (draw_jit_vert_func_elts)LLVMGetPointerToGlobal( + llvm->draw->engine, variant->function_elts); + + if (0) + lp_disassemble(variant->jit_func_elts); +} + void draw_llvm_make_variant_key(struct draw_llvm *llvm, struct draw_llvm_variant_key *key)