X-Git-Url: https://git.libre-soc.org/?a=blobdiff_plain;f=src%2Fgallium%2Fauxiliary%2Fdraw%2Fdraw_llvm.c;h=e08221eb3929a820130bf8d3a1c5d13f43d5ae19;hb=c790c2c7598dea2d5a5b0bfbe47732956e1e89a6;hp=7fb86d7cb275f5f5c5bfe7ffa56658f1fc76e98c;hpb=1218430e1200a08cd64b6555d3fd1fd0274ad9e5;p=mesa.git diff --git a/src/gallium/auxiliary/draw/draw_llvm.c b/src/gallium/auxiliary/draw/draw_llvm.c index 7fb86d7cb27..e08221eb392 100644 --- a/src/gallium/auxiliary/draw/draw_llvm.c +++ b/src/gallium/auxiliary/draw/draw_llvm.c @@ -31,6 +31,9 @@ #include "draw_vs.h" #include "gallivm/lp_bld_arit.h" +#include "gallivm/lp_bld_logic.h" +#include "gallivm/lp_bld_const.h" +#include "gallivm/lp_bld_swizzle.h" #include "gallivm/lp_bld_struct.h" #include "gallivm/lp_bld_type.h" #include "gallivm/lp_bld_flow.h" @@ -39,269 +42,381 @@ #include "gallivm/lp_bld_printf.h" #include "gallivm/lp_bld_intr.h" #include "gallivm/lp_bld_init.h" +#include "gallivm/lp_bld_type.h" #include "tgsi/tgsi_exec.h" #include "tgsi/tgsi_dump.h" -#include "util/u_cpu_detect.h" #include "util/u_math.h" #include "util/u_pointer.h" #include "util/u_string.h" +#include "util/u_simple_list.h" -#include #define DEBUG_STORE 0 -/* generates the draw jit function */ -static void -draw_llvm_generate(struct draw_llvm *llvm, struct draw_llvm_variant *var); + +/** + * This function is called by the gallivm "garbage collector" when + * the LLVM global data structures are freed. We must free all LLVM-related + * data. Specifically, all JIT'd shader variants. + */ static void -draw_llvm_generate_elts(struct draw_llvm *llvm, struct draw_llvm_variant *var); +draw_llvm_garbage_collect_callback(void *cb_data) +{ + struct draw_llvm *llvm = (struct draw_llvm *) cb_data; + struct draw_context *draw = llvm->draw; + struct draw_llvm_variant_list_item *li; + + /* Ensure prepare will be run and shaders recompiled */ + assert(!draw->suspend_flushing); + draw_do_flush(draw, DRAW_FLUSH_STATE_CHANGE); + + /* free all shader variants */ + li = first_elem(&llvm->vs_variants_list); + while (!at_end(&llvm->vs_variants_list, li)) { + struct draw_llvm_variant_list_item *next = next_elem(li); + draw_llvm_destroy_variant(li->base); + li = next; + } + + /* Null-out these pointers so they get remade next time they're needed. + * See the accessor functions below. + */ + llvm->context_ptr_type = NULL; + llvm->buffer_ptr_type = NULL; + llvm->vb_ptr_type = NULL; + llvm->vertex_header_ptr_type = NULL; +} + static void -init_globals(struct draw_llvm *llvm) +draw_llvm_generate(struct draw_llvm *llvm, struct draw_llvm_variant *var, + boolean elts); + + +/** + * Create LLVM type for struct draw_jit_texture + */ +static LLVMTypeRef +create_jit_texture_type(struct gallivm_state *gallivm, const char *struct_name) { + LLVMTargetDataRef target = gallivm->target; LLVMTypeRef texture_type; + LLVMTypeRef elem_types[DRAW_JIT_TEXTURE_NUM_FIELDS]; + LLVMTypeRef int32_type = LLVMInt32TypeInContext(gallivm->context); + + elem_types[DRAW_JIT_TEXTURE_WIDTH] = + elem_types[DRAW_JIT_TEXTURE_HEIGHT] = + elem_types[DRAW_JIT_TEXTURE_DEPTH] = + elem_types[DRAW_JIT_TEXTURE_FIRST_LEVEL] = + elem_types[DRAW_JIT_TEXTURE_LAST_LEVEL] = int32_type; + elem_types[DRAW_JIT_TEXTURE_ROW_STRIDE] = + elem_types[DRAW_JIT_TEXTURE_IMG_STRIDE] = + LLVMArrayType(int32_type, PIPE_MAX_TEXTURE_LEVELS); + elem_types[DRAW_JIT_TEXTURE_DATA] = + LLVMArrayType(LLVMPointerType(LLVMInt8TypeInContext(gallivm->context), 0), + PIPE_MAX_TEXTURE_LEVELS); + elem_types[DRAW_JIT_TEXTURE_MIN_LOD] = + elem_types[DRAW_JIT_TEXTURE_MAX_LOD] = + elem_types[DRAW_JIT_TEXTURE_LOD_BIAS] = LLVMFloatTypeInContext(gallivm->context); + elem_types[DRAW_JIT_TEXTURE_BORDER_COLOR] = + LLVMArrayType(LLVMFloatTypeInContext(gallivm->context), 4); + + texture_type = LLVMStructTypeInContext(gallivm->context, elem_types, + Elements(elem_types), 0); + +#if HAVE_LLVM < 0x0300 + LLVMAddTypeName(gallivm->module, struct_name, texture_type); + + /* Make sure the target's struct layout cache doesn't return + * stale/invalid data. + */ + LLVMInvalidateStructLayout(gallivm->target, texture_type); +#endif - /* struct draw_jit_texture */ - { - LLVMTypeRef elem_types[DRAW_JIT_TEXTURE_NUM_FIELDS]; - - elem_types[DRAW_JIT_TEXTURE_WIDTH] = LLVMInt32Type(); - elem_types[DRAW_JIT_TEXTURE_HEIGHT] = LLVMInt32Type(); - elem_types[DRAW_JIT_TEXTURE_DEPTH] = LLVMInt32Type(); - elem_types[DRAW_JIT_TEXTURE_LAST_LEVEL] = LLVMInt32Type(); - elem_types[DRAW_JIT_TEXTURE_ROW_STRIDE] = - LLVMArrayType(LLVMInt32Type(), DRAW_MAX_TEXTURE_LEVELS); - elem_types[DRAW_JIT_TEXTURE_IMG_STRIDE] = - LLVMArrayType(LLVMInt32Type(), DRAW_MAX_TEXTURE_LEVELS); - elem_types[DRAW_JIT_TEXTURE_DATA] = - LLVMArrayType(LLVMPointerType(LLVMInt8Type(), 0), - DRAW_MAX_TEXTURE_LEVELS); - elem_types[DRAW_JIT_TEXTURE_MIN_LOD] = LLVMFloatType(); - elem_types[DRAW_JIT_TEXTURE_MAX_LOD] = LLVMFloatType(); - elem_types[DRAW_JIT_TEXTURE_LOD_BIAS] = LLVMFloatType(); - elem_types[DRAW_JIT_TEXTURE_BORDER_COLOR] = - LLVMArrayType(LLVMFloatType(), 4); - - texture_type = LLVMStructType(elem_types, Elements(elem_types), 0); - - LP_CHECK_MEMBER_OFFSET(struct draw_jit_texture, width, - llvm->target, texture_type, - DRAW_JIT_TEXTURE_WIDTH); - LP_CHECK_MEMBER_OFFSET(struct draw_jit_texture, height, - llvm->target, texture_type, - DRAW_JIT_TEXTURE_HEIGHT); - LP_CHECK_MEMBER_OFFSET(struct draw_jit_texture, depth, - llvm->target, texture_type, - DRAW_JIT_TEXTURE_DEPTH); - LP_CHECK_MEMBER_OFFSET(struct draw_jit_texture, last_level, - llvm->target, texture_type, - DRAW_JIT_TEXTURE_LAST_LEVEL); - LP_CHECK_MEMBER_OFFSET(struct draw_jit_texture, row_stride, - llvm->target, texture_type, - DRAW_JIT_TEXTURE_ROW_STRIDE); - LP_CHECK_MEMBER_OFFSET(struct draw_jit_texture, img_stride, - llvm->target, texture_type, - DRAW_JIT_TEXTURE_IMG_STRIDE); - LP_CHECK_MEMBER_OFFSET(struct draw_jit_texture, data, - llvm->target, texture_type, - DRAW_JIT_TEXTURE_DATA); - LP_CHECK_MEMBER_OFFSET(struct draw_jit_texture, min_lod, - llvm->target, texture_type, - DRAW_JIT_TEXTURE_MIN_LOD); - LP_CHECK_MEMBER_OFFSET(struct draw_jit_texture, max_lod, - llvm->target, texture_type, - DRAW_JIT_TEXTURE_MAX_LOD); - LP_CHECK_MEMBER_OFFSET(struct draw_jit_texture, lod_bias, - llvm->target, texture_type, - DRAW_JIT_TEXTURE_LOD_BIAS); - LP_CHECK_MEMBER_OFFSET(struct draw_jit_texture, border_color, - llvm->target, texture_type, - DRAW_JIT_TEXTURE_BORDER_COLOR); - LP_CHECK_STRUCT_SIZE(struct draw_jit_texture, - llvm->target, texture_type); - - LLVMAddTypeName(llvm->module, "texture", texture_type); - } + LP_CHECK_MEMBER_OFFSET(struct draw_jit_texture, width, + target, texture_type, + DRAW_JIT_TEXTURE_WIDTH); + LP_CHECK_MEMBER_OFFSET(struct draw_jit_texture, height, + target, texture_type, + DRAW_JIT_TEXTURE_HEIGHT); + LP_CHECK_MEMBER_OFFSET(struct draw_jit_texture, depth, + target, texture_type, + DRAW_JIT_TEXTURE_DEPTH); + LP_CHECK_MEMBER_OFFSET(struct draw_jit_texture, first_level, + target, texture_type, + DRAW_JIT_TEXTURE_FIRST_LEVEL); + LP_CHECK_MEMBER_OFFSET(struct draw_jit_texture, last_level, + target, texture_type, + DRAW_JIT_TEXTURE_LAST_LEVEL); + LP_CHECK_MEMBER_OFFSET(struct draw_jit_texture, row_stride, + target, texture_type, + DRAW_JIT_TEXTURE_ROW_STRIDE); + LP_CHECK_MEMBER_OFFSET(struct draw_jit_texture, img_stride, + target, texture_type, + DRAW_JIT_TEXTURE_IMG_STRIDE); + LP_CHECK_MEMBER_OFFSET(struct draw_jit_texture, data, + target, texture_type, + DRAW_JIT_TEXTURE_DATA); + LP_CHECK_MEMBER_OFFSET(struct draw_jit_texture, min_lod, + target, texture_type, + DRAW_JIT_TEXTURE_MIN_LOD); + LP_CHECK_MEMBER_OFFSET(struct draw_jit_texture, max_lod, + target, texture_type, + DRAW_JIT_TEXTURE_MAX_LOD); + LP_CHECK_MEMBER_OFFSET(struct draw_jit_texture, lod_bias, + target, texture_type, + DRAW_JIT_TEXTURE_LOD_BIAS); + LP_CHECK_MEMBER_OFFSET(struct draw_jit_texture, border_color, + target, texture_type, + DRAW_JIT_TEXTURE_BORDER_COLOR); + + LP_CHECK_STRUCT_SIZE(struct draw_jit_texture, target, texture_type); + + return texture_type; +} - /* struct draw_jit_context */ - { - LLVMTypeRef elem_types[3]; - LLVMTypeRef context_type; +/** + * Create LLVM type for struct draw_jit_texture + */ +static LLVMTypeRef +create_jit_context_type(struct gallivm_state *gallivm, + LLVMTypeRef texture_type, const char *struct_name) +{ + LLVMTargetDataRef target = gallivm->target; + LLVMTypeRef float_type = LLVMFloatTypeInContext(gallivm->context); + LLVMTypeRef elem_types[5]; + LLVMTypeRef context_type; + + elem_types[0] = LLVMPointerType(float_type, 0); /* vs_constants */ + elem_types[1] = LLVMPointerType(float_type, 0); /* gs_constants */ + elem_types[2] = LLVMPointerType(LLVMArrayType(LLVMArrayType(float_type, 4), + DRAW_TOTAL_CLIP_PLANES), 0); + elem_types[3] = LLVMPointerType(float_type, 0); /* viewport */ + elem_types[4] = LLVMArrayType(texture_type, + PIPE_MAX_VERTEX_SAMPLERS); /* textures */ + context_type = LLVMStructTypeInContext(gallivm->context, elem_types, + Elements(elem_types), 0); +#if HAVE_LLVM < 0x0300 + LLVMAddTypeName(gallivm->module, struct_name, context_type); + + LLVMInvalidateStructLayout(gallivm->target, context_type); +#endif - elem_types[0] = LLVMPointerType(LLVMFloatType(), 0); /* vs_constants */ - elem_types[1] = LLVMPointerType(LLVMFloatType(), 0); /* vs_constants */ - elem_types[2] = LLVMArrayType(texture_type, - PIPE_MAX_VERTEX_SAMPLERS); /* textures */ + LP_CHECK_MEMBER_OFFSET(struct draw_jit_context, vs_constants, + target, context_type, 0); + LP_CHECK_MEMBER_OFFSET(struct draw_jit_context, gs_constants, + target, context_type, 1); + LP_CHECK_MEMBER_OFFSET(struct draw_jit_context, planes, + target, context_type, 2); + LP_CHECK_MEMBER_OFFSET(struct draw_jit_context, textures, + target, context_type, + DRAW_JIT_CTX_TEXTURES); + LP_CHECK_STRUCT_SIZE(struct draw_jit_context, + target, context_type); + + return context_type; +} - context_type = LLVMStructType(elem_types, Elements(elem_types), 0); - LP_CHECK_MEMBER_OFFSET(struct draw_jit_context, vs_constants, - llvm->target, context_type, 0); - LP_CHECK_MEMBER_OFFSET(struct draw_jit_context, gs_constants, - llvm->target, context_type, 1); - LP_CHECK_MEMBER_OFFSET(struct draw_jit_context, textures, - llvm->target, context_type, - DRAW_JIT_CTX_TEXTURES); - LP_CHECK_STRUCT_SIZE(struct draw_jit_context, - llvm->target, context_type); +/** + * Create LLVM type for struct pipe_vertex_buffer + */ +static LLVMTypeRef +create_jit_vertex_buffer_type(struct gallivm_state *gallivm, const char *struct_name) +{ + LLVMTargetDataRef target = gallivm->target; + LLVMTypeRef elem_types[4]; + LLVMTypeRef vb_type; - LLVMAddTypeName(llvm->module, "draw_jit_context", context_type); + elem_types[0] = + elem_types[1] = LLVMInt32TypeInContext(gallivm->context); + elem_types[2] = + elem_types[3] = LLVMPointerType(LLVMInt8TypeInContext(gallivm->context), 0); /* vs_constants */ - llvm->context_ptr_type = LLVMPointerType(context_type, 0); - } - { - LLVMTypeRef buffer_ptr = LLVMPointerType(LLVMIntType(8), 0); - llvm->buffer_ptr_type = LLVMPointerType(buffer_ptr, 0); - } - /* struct pipe_vertex_buffer */ - { - LLVMTypeRef elem_types[4]; - LLVMTypeRef vb_type; - - elem_types[0] = LLVMInt32Type(); - elem_types[1] = LLVMInt32Type(); - elem_types[2] = LLVMInt32Type(); - elem_types[3] = LLVMPointerType(LLVMOpaqueType(), 0); /* vs_constants */ + vb_type = LLVMStructTypeInContext(gallivm->context, elem_types, + Elements(elem_types), 0); +#if HAVE_LLVM < 0x0300 + LLVMAddTypeName(gallivm->module, struct_name, vb_type); - vb_type = LLVMStructType(elem_types, Elements(elem_types), 0); + LLVMInvalidateStructLayout(gallivm->target, vb_type); +#endif - LP_CHECK_MEMBER_OFFSET(struct pipe_vertex_buffer, stride, - llvm->target, vb_type, 0); - LP_CHECK_MEMBER_OFFSET(struct pipe_vertex_buffer, buffer_offset, - llvm->target, vb_type, 2); - LP_CHECK_STRUCT_SIZE(struct pipe_vertex_buffer, - llvm->target, vb_type); + LP_CHECK_MEMBER_OFFSET(struct pipe_vertex_buffer, stride, + target, vb_type, 0); + LP_CHECK_MEMBER_OFFSET(struct pipe_vertex_buffer, buffer_offset, + target, vb_type, 1); - LLVMAddTypeName(llvm->module, "pipe_vertex_buffer", vb_type); + LP_CHECK_STRUCT_SIZE(struct pipe_vertex_buffer, target, vb_type); - llvm->vb_ptr_type = LLVMPointerType(vb_type, 0); - } + return vb_type; } + +/** + * Create LLVM type for struct vertex_header; + */ static LLVMTypeRef -create_vertex_header(struct draw_llvm *llvm, int data_elems) +create_jit_vertex_header(struct gallivm_state *gallivm, int data_elems) { - /* struct vertex_header */ - LLVMTypeRef elem_types[3]; + LLVMTargetDataRef target = gallivm->target; + LLVMTypeRef elem_types[4]; LLVMTypeRef vertex_header; char struct_name[24]; util_snprintf(struct_name, 23, "vertex_header%d", data_elems); - elem_types[0] = LLVMIntType(32); - elem_types[1] = LLVMArrayType(LLVMFloatType(), 4); - elem_types[2] = LLVMArrayType(elem_types[1], data_elems); + elem_types[DRAW_JIT_VERTEX_VERTEX_ID] = LLVMIntTypeInContext(gallivm->context, 32); + elem_types[DRAW_JIT_VERTEX_CLIP] = LLVMArrayType(LLVMFloatTypeInContext(gallivm->context), 4); + elem_types[DRAW_JIT_VERTEX_PRE_CLIP_POS] = LLVMArrayType(LLVMFloatTypeInContext(gallivm->context), 4); + elem_types[DRAW_JIT_VERTEX_DATA] = LLVMArrayType(elem_types[1], data_elems); + + vertex_header = LLVMStructTypeInContext(gallivm->context, elem_types, + Elements(elem_types), 0); +#if HAVE_LLVM < 0x0300 + LLVMAddTypeName(gallivm->module, struct_name, vertex_header); - vertex_header = LLVMStructType(elem_types, Elements(elem_types), 0); + LLVMInvalidateStructLayout(gallivm->target, vertex_header); +#endif /* these are bit-fields and we can't take address of them LP_CHECK_MEMBER_OFFSET(struct vertex_header, clipmask, - llvm->target, vertex_header, + target, vertex_header, DRAW_JIT_VERTEX_CLIPMASK); LP_CHECK_MEMBER_OFFSET(struct vertex_header, edgeflag, - llvm->target, vertex_header, + target, vertex_header, DRAW_JIT_VERTEX_EDGEFLAG); LP_CHECK_MEMBER_OFFSET(struct vertex_header, pad, - llvm->target, vertex_header, + target, vertex_header, DRAW_JIT_VERTEX_PAD); LP_CHECK_MEMBER_OFFSET(struct vertex_header, vertex_id, - llvm->target, vertex_header, + target, vertex_header, DRAW_JIT_VERTEX_VERTEX_ID); */ LP_CHECK_MEMBER_OFFSET(struct vertex_header, clip, - llvm->target, vertex_header, + target, vertex_header, DRAW_JIT_VERTEX_CLIP); + LP_CHECK_MEMBER_OFFSET(struct vertex_header, pre_clip_pos, + target, vertex_header, + DRAW_JIT_VERTEX_PRE_CLIP_POS); LP_CHECK_MEMBER_OFFSET(struct vertex_header, data, - llvm->target, vertex_header, + target, vertex_header, DRAW_JIT_VERTEX_DATA); - LLVMAddTypeName(llvm->module, struct_name, vertex_header); + assert(LLVMABISizeOfType(target, vertex_header) == + offsetof(struct vertex_header, data[data_elems])); - return LLVMPointerType(vertex_header, 0); + return vertex_header; } -struct draw_llvm * -draw_llvm_create(struct draw_context *draw) + +/** + * Create LLVM types for various structures. + */ +static void +create_jit_types(struct draw_llvm *llvm) { - struct draw_llvm *llvm; + struct gallivm_state *gallivm = llvm->gallivm; + LLVMTypeRef texture_type, context_type, buffer_type, vb_type; - llvm = CALLOC_STRUCT( draw_llvm ); - if (!llvm) - return NULL; + texture_type = create_jit_texture_type(gallivm, "texture"); - llvm->draw = draw; - llvm->engine = draw->engine; + context_type = create_jit_context_type(gallivm, texture_type, "draw_jit_context"); + llvm->context_ptr_type = LLVMPointerType(context_type, 0); - debug_assert(llvm->engine); + buffer_type = LLVMPointerType(LLVMIntTypeInContext(gallivm->context, 8), 0); + llvm->buffer_ptr_type = LLVMPointerType(buffer_type, 0); - llvm->module = LLVMModuleCreateWithName("draw_llvm"); - llvm->provider = LLVMCreateModuleProviderForExistingModule(llvm->module); + vb_type = create_jit_vertex_buffer_type(gallivm, "pipe_vertex_buffer"); + llvm->vb_ptr_type = LLVMPointerType(vb_type, 0); +} - LLVMAddModuleProvider(llvm->engine, llvm->provider); - llvm->target = LLVMGetExecutionEngineTargetData(llvm->engine); +static LLVMTypeRef +get_context_ptr_type(struct draw_llvm *llvm) +{ + if (!llvm->context_ptr_type) + create_jit_types(llvm); + return llvm->context_ptr_type; +} - llvm->pass = LLVMCreateFunctionPassManager(llvm->provider); - LLVMAddTargetData(llvm->target, llvm->pass); - if ((gallivm_debug & GALLIVM_DEBUG_NO_OPT) == 0) { - /* These are the passes currently listed in llvm-c/Transforms/Scalar.h, - * but there are more on SVN. */ - /* TODO: Add more passes */ +static LLVMTypeRef +get_buffer_ptr_type(struct draw_llvm *llvm) +{ + if (!llvm->buffer_ptr_type) + create_jit_types(llvm); + return llvm->buffer_ptr_type; +} - LLVMAddCFGSimplificationPass(llvm->pass); - if (HAVE_LLVM >= 0x207 && sizeof(void*) == 4) { - /* For LLVM >= 2.7 and 32-bit build, use this order of passes to - * avoid generating bad code. - * Test with piglit glsl-vs-sqrt-zero test. - */ - LLVMAddConstantPropagationPass(llvm->pass); - LLVMAddPromoteMemoryToRegisterPass(llvm->pass); - } - else { - LLVMAddPromoteMemoryToRegisterPass(llvm->pass); - LLVMAddConstantPropagationPass(llvm->pass); - } +static LLVMTypeRef +get_vb_ptr_type(struct draw_llvm *llvm) +{ + if (!llvm->vb_ptr_type) + create_jit_types(llvm); + return llvm->vb_ptr_type; +} - if(util_cpu_caps.has_sse4_1) { - /* FIXME: There is a bug in this pass, whereby the combination of fptosi - * and sitofp (necessary for trunc/floor/ceil/round implementation) - * somehow becomes invalid code. - */ - LLVMAddInstructionCombiningPass(llvm->pass); - } - LLVMAddGVNPass(llvm->pass); - } else { - /* We need at least this pass to prevent the backends to fail in - * unexpected ways. - */ - LLVMAddPromoteMemoryToRegisterPass(llvm->pass); - } +static LLVMTypeRef +get_vertex_header_ptr_type(struct draw_llvm *llvm) +{ + if (!llvm->vertex_header_ptr_type) + create_jit_types(llvm); + return llvm->vertex_header_ptr_type; +} + + +/** + * Create per-context LLVM info. + */ +struct draw_llvm * +draw_llvm_create(struct draw_context *draw, struct gallivm_state *gallivm) +{ + struct draw_llvm *llvm; + + llvm = CALLOC_STRUCT( draw_llvm ); + if (!llvm) + return NULL; - init_globals(llvm); + lp_build_init(); + + llvm->draw = draw; + llvm->gallivm = gallivm; if (gallivm_debug & GALLIVM_DEBUG_IR) { - LLVMDumpModule(llvm->module); + LLVMDumpModule(llvm->gallivm->module); } llvm->nr_variants = 0; make_empty_list(&llvm->vs_variants_list); + gallivm_register_garbage_collector_callback( + draw_llvm_garbage_collect_callback, llvm); + return llvm; } + +/** + * Free per-context LLVM info. + */ void draw_llvm_destroy(struct draw_llvm *llvm) { - LLVMDisposePassManager(llvm->pass); + gallivm_remove_garbage_collector_callback( + draw_llvm_garbage_collect_callback, llvm); + /* XXX free other draw_llvm data? */ FREE(llvm); } + +/** + * Create LLVM-generated code for a vertex shader. + */ struct draw_llvm_variant * draw_llvm_create_variant(struct draw_llvm *llvm, unsigned num_inputs, @@ -310,6 +425,7 @@ draw_llvm_create_variant(struct draw_llvm *llvm, struct draw_llvm_variant *variant; struct llvm_vertex_shader *shader = llvm_vertex_shader(llvm->draw->vs.vertex_shader); + LLVMTypeRef vertex_header; variant = MALLOC(sizeof *variant + shader->variant_key_size - @@ -321,10 +437,12 @@ draw_llvm_create_variant(struct draw_llvm *llvm, memcpy(&variant->key, key, shader->variant_key_size); - llvm->vertex_header_ptr_type = create_vertex_header(llvm, num_inputs); + vertex_header = create_jit_vertex_header(llvm->gallivm, num_inputs); + + llvm->vertex_header_ptr_type = LLVMPointerType(vertex_header, 0); - draw_llvm_generate(llvm, variant); - draw_llvm_generate_elts(llvm, variant); + draw_llvm_generate(llvm, variant, FALSE); /* linear */ + draw_llvm_generate(llvm, variant, TRUE); /* elts */ variant->shader = shader; variant->list_item_global.base = variant; @@ -335,17 +453,20 @@ draw_llvm_create_variant(struct draw_llvm *llvm, return variant; } + static void generate_vs(struct draw_llvm *llvm, LLVMBuilderRef builder, - LLVMValueRef (*outputs)[NUM_CHANNELS], - const LLVMValueRef (*inputs)[NUM_CHANNELS], + LLVMValueRef (*outputs)[TGSI_NUM_CHANNELS], + const LLVMValueRef (*inputs)[TGSI_NUM_CHANNELS], + const struct lp_bld_tgsi_system_values *system_values, LLVMValueRef context_ptr, - struct lp_build_sampler_soa *draw_sampler) + struct lp_build_sampler_soa *draw_sampler, + boolean clamp_vertex_color) { const struct tgsi_token *tokens = llvm->draw->vs.vertex_shader->state.tokens; struct lp_type vs_type; - LLVMValueRef consts_ptr = draw_jit_context_vs_constants(builder, context_ptr); + LLVMValueRef consts_ptr = draw_jit_context_vs_constants(llvm->gallivm, context_ptr); struct lp_build_sampler_soa *sampler = 0; memset(&vs_type, 0, sizeof vs_type); @@ -362,42 +483,75 @@ generate_vs(struct draw_llvm *llvm, tgsi_dump(tokens, 0); } - if (llvm->draw->num_sampler_views && - llvm->draw->num_samplers) + if (llvm->draw->num_sampler_views && llvm->draw->num_samplers) sampler = draw_sampler; - lp_build_tgsi_soa(builder, + lp_build_tgsi_soa(llvm->gallivm, tokens, vs_type, NULL /*struct lp_build_mask_context *mask*/, consts_ptr, + system_values, NULL /*pos*/, inputs, outputs, sampler, &llvm->draw->vs.vertex_shader->info); + + { + LLVMValueRef out; + unsigned chan, attrib; + struct lp_build_context bld; + struct tgsi_shader_info* info = &llvm->draw->vs.vertex_shader->info; + lp_build_context_init(&bld, llvm->gallivm, vs_type); + + for (attrib = 0; attrib < info->num_outputs; ++attrib) { + for (chan = 0; chan < TGSI_NUM_CHANNELS; ++chan) { + if (outputs[attrib][chan]) { + switch (info->output_semantic_name[attrib]) { + case TGSI_SEMANTIC_COLOR: + case TGSI_SEMANTIC_BCOLOR: + if (clamp_vertex_color) { + out = LLVMBuildLoad(builder, outputs[attrib][chan], ""); + out = lp_build_clamp(&bld, out, bld.zero, bld.one); + LLVMBuildStore(builder, out, outputs[attrib][chan]); + } + break; + case TGSI_SEMANTIC_FOG: + if (chan == 1 || chan == 2) + LLVMBuildStore(builder, bld.zero, outputs[attrib][chan]); + else if (chan == 3) + LLVMBuildStore(builder, bld.one, outputs[attrib][chan]); + break; + } + } + } + } + } } + #if DEBUG_STORE static void print_vectorf(LLVMBuilderRef builder, LLVMValueRef vec) { LLVMValueRef val[4]; val[0] = LLVMBuildExtractElement(builder, vec, - LLVMConstInt(LLVMInt32Type(), 0, 0), ""); + lp_build_const_int32(gallivm, 0), ""); val[1] = LLVMBuildExtractElement(builder, vec, - LLVMConstInt(LLVMInt32Type(), 1, 0), ""); + lp_build_const_int32(gallivm, 1), ""); val[2] = LLVMBuildExtractElement(builder, vec, - LLVMConstInt(LLVMInt32Type(), 2, 0), ""); + lp_build_const_int32(gallivm, 2), ""); val[3] = LLVMBuildExtractElement(builder, vec, - LLVMConstInt(LLVMInt32Type(), 3, 0), ""); + lp_build_const_int32(gallivm, 3), ""); lp_build_printf(builder, "vector = [%f, %f, %f, %f]\n", val[0], val[1], val[2], val[3]); } #endif + static void -generate_fetch(LLVMBuilderRef builder, +generate_fetch(struct gallivm_state *gallivm, LLVMValueRef vbuffers_ptr, LLVMValueRef *res, struct pipe_vertex_element *velem, @@ -405,26 +559,23 @@ generate_fetch(LLVMBuilderRef builder, LLVMValueRef index, LLVMValueRef instance_id) { - LLVMValueRef indices = LLVMConstInt(LLVMInt64Type(), velem->vertex_buffer_index, 0); + LLVMBuilderRef builder = gallivm->builder; + LLVMValueRef indices = + LLVMConstInt(LLVMInt64TypeInContext(gallivm->context), + velem->vertex_buffer_index, 0); LLVMValueRef vbuffer_ptr = LLVMBuildGEP(builder, vbuffers_ptr, &indices, 1, ""); - LLVMValueRef vb_stride = draw_jit_vbuffer_stride(builder, vbuf); - LLVMValueRef vb_max_index = draw_jit_vbuffer_max_index(builder, vbuf); - LLVMValueRef vb_buffer_offset = draw_jit_vbuffer_offset(builder, vbuf); - LLVMValueRef cond; + LLVMValueRef vb_stride = draw_jit_vbuffer_stride(gallivm, vbuf); + LLVMValueRef vb_buffer_offset = draw_jit_vbuffer_offset(gallivm, vbuf); LLVMValueRef stride; if (velem->instance_divisor) { /* array index = instance_id / instance_divisor */ index = LLVMBuildUDiv(builder, instance_id, - LLVMConstInt(LLVMInt32Type(), velem->instance_divisor, 0), + lp_build_const_int32(gallivm, velem->instance_divisor), "instance_divisor"); } - /* limit index to min(inex, vb_max_index) */ - cond = LLVMBuildICmp(builder, LLVMIntULE, index, vb_max_index, ""); - index = LLVMBuildSelect(builder, cond, index, vb_max_index, ""); - stride = LLVMBuildMul(builder, vb_stride, index, ""); vbuffer_ptr = LLVMBuildLoad(builder, vbuffer_ptr, "vbuffer"); @@ -433,23 +584,25 @@ generate_fetch(LLVMBuilderRef builder, vb_buffer_offset, ""); stride = LLVMBuildAdd(builder, stride, - LLVMConstInt(LLVMInt32Type(), velem->src_offset, 0), + lp_build_const_int32(gallivm, velem->src_offset), ""); /*lp_build_printf(builder, "vbuf index = %d, stride is %d\n", indices, stride);*/ vbuffer_ptr = LLVMBuildGEP(builder, vbuffer_ptr, &stride, 1, ""); - *res = draw_llvm_translate_from(builder, vbuffer_ptr, velem->src_format); + *res = draw_llvm_translate_from(gallivm, vbuffer_ptr, velem->src_format); } + static LLVMValueRef -aos_to_soa(LLVMBuilderRef builder, +aos_to_soa(struct gallivm_state *gallivm, LLVMValueRef val0, LLVMValueRef val1, LLVMValueRef val2, LLVMValueRef val3, LLVMValueRef channel) { + LLVMBuilderRef builder = gallivm->builder; LLVMValueRef ex, res; ex = LLVMBuildExtractElement(builder, val0, @@ -457,77 +610,80 @@ aos_to_soa(LLVMBuilderRef builder, res = LLVMBuildInsertElement(builder, LLVMConstNull(LLVMTypeOf(val0)), ex, - LLVMConstInt(LLVMInt32Type(), 0, 0), + lp_build_const_int32(gallivm, 0), ""); ex = LLVMBuildExtractElement(builder, val1, channel, ""); res = LLVMBuildInsertElement(builder, res, ex, - LLVMConstInt(LLVMInt32Type(), 1, 0), + lp_build_const_int32(gallivm, 1), ""); ex = LLVMBuildExtractElement(builder, val2, channel, ""); res = LLVMBuildInsertElement(builder, res, ex, - LLVMConstInt(LLVMInt32Type(), 2, 0), + lp_build_const_int32(gallivm, 2), ""); ex = LLVMBuildExtractElement(builder, val3, channel, ""); res = LLVMBuildInsertElement(builder, res, ex, - LLVMConstInt(LLVMInt32Type(), 3, 0), + lp_build_const_int32(gallivm, 3), ""); return res; } + static void -soa_to_aos(LLVMBuilderRef builder, - LLVMValueRef soa[NUM_CHANNELS], - LLVMValueRef aos[NUM_CHANNELS]) +soa_to_aos(struct gallivm_state *gallivm, + LLVMValueRef soa[TGSI_NUM_CHANNELS], + LLVMValueRef aos[TGSI_NUM_CHANNELS]) { + LLVMBuilderRef builder = gallivm->builder; LLVMValueRef comp; int i = 0; - debug_assert(NUM_CHANNELS == 4); + debug_assert(TGSI_NUM_CHANNELS == 4); aos[0] = LLVMConstNull(LLVMTypeOf(soa[0])); aos[1] = aos[2] = aos[3] = aos[0]; - for (i = 0; i < NUM_CHANNELS; ++i) { - LLVMValueRef channel = LLVMConstInt(LLVMInt32Type(), i, 0); + for (i = 0; i < TGSI_NUM_CHANNELS; ++i) { + LLVMValueRef channel = lp_build_const_int32(gallivm, i); comp = LLVMBuildExtractElement(builder, soa[i], - LLVMConstInt(LLVMInt32Type(), 0, 0), ""); + lp_build_const_int32(gallivm, 0), ""); aos[0] = LLVMBuildInsertElement(builder, aos[0], comp, channel, ""); comp = LLVMBuildExtractElement(builder, soa[i], - LLVMConstInt(LLVMInt32Type(), 1, 0), ""); + lp_build_const_int32(gallivm, 1), ""); aos[1] = LLVMBuildInsertElement(builder, aos[1], comp, channel, ""); comp = LLVMBuildExtractElement(builder, soa[i], - LLVMConstInt(LLVMInt32Type(), 2, 0), ""); + lp_build_const_int32(gallivm, 2), ""); aos[2] = LLVMBuildInsertElement(builder, aos[2], comp, channel, ""); comp = LLVMBuildExtractElement(builder, soa[i], - LLVMConstInt(LLVMInt32Type(), 3, 0), ""); + lp_build_const_int32(gallivm, 3), ""); aos[3] = LLVMBuildInsertElement(builder, aos[3], comp, channel, ""); } } + static void -convert_to_soa(LLVMBuilderRef builder, - LLVMValueRef (*aos)[NUM_CHANNELS], - LLVMValueRef (*soa)[NUM_CHANNELS], +convert_to_soa(struct gallivm_state *gallivm, + LLVMValueRef (*aos)[TGSI_NUM_CHANNELS], + LLVMValueRef (*soa)[TGSI_NUM_CHANNELS], int num_attribs) { int i; - debug_assert(NUM_CHANNELS == 4); + debug_assert(TGSI_NUM_CHANNELS == 4); for (i = 0; i < num_attribs; ++i) { LLVMValueRef val0 = aos[i][0]; @@ -535,34 +691,51 @@ convert_to_soa(LLVMBuilderRef builder, LLVMValueRef val2 = aos[i][2]; LLVMValueRef val3 = aos[i][3]; - soa[i][0] = aos_to_soa(builder, val0, val1, val2, val3, - LLVMConstInt(LLVMInt32Type(), 0, 0)); - soa[i][1] = aos_to_soa(builder, val0, val1, val2, val3, - LLVMConstInt(LLVMInt32Type(), 1, 0)); - soa[i][2] = aos_to_soa(builder, val0, val1, val2, val3, - LLVMConstInt(LLVMInt32Type(), 2, 0)); - soa[i][3] = aos_to_soa(builder, val0, val1, val2, val3, - LLVMConstInt(LLVMInt32Type(), 3, 0)); + soa[i][0] = aos_to_soa(gallivm, val0, val1, val2, val3, + lp_build_const_int32(gallivm, 0)); + soa[i][1] = aos_to_soa(gallivm, val0, val1, val2, val3, + lp_build_const_int32(gallivm, 1)); + soa[i][2] = aos_to_soa(gallivm, val0, val1, val2, val3, + lp_build_const_int32(gallivm, 2)); + soa[i][3] = aos_to_soa(gallivm, val0, val1, val2, val3, + lp_build_const_int32(gallivm, 3)); } } + static void -store_aos(LLVMBuilderRef builder, +store_aos(struct gallivm_state *gallivm, LLVMValueRef io_ptr, LLVMValueRef index, - LLVMValueRef value) + LLVMValueRef value, + LLVMValueRef clipmask, boolean have_clipdist) { - LLVMValueRef id_ptr = draw_jit_header_id(builder, io_ptr); - LLVMValueRef data_ptr = draw_jit_header_data(builder, io_ptr); + LLVMBuilderRef builder = gallivm->builder; + LLVMValueRef id_ptr = draw_jit_header_id(gallivm, io_ptr); + LLVMValueRef data_ptr = draw_jit_header_data(gallivm, io_ptr); LLVMValueRef indices[3]; + LLVMValueRef val; + int vertex_id_pad_edgeflag; - indices[0] = LLVMConstInt(LLVMInt32Type(), 0, 0); + indices[0] = lp_build_const_int32(gallivm, 0); indices[1] = index; - indices[2] = LLVMConstInt(LLVMInt32Type(), 0, 0); + indices[2] = lp_build_const_int32(gallivm, 0); + + /* If this assertion fails, it means we need to update the bit twidding + * code here. See struct vertex_header in draw_private.h. + */ + assert(DRAW_TOTAL_CLIP_PLANES==14); + /* initialize vertex id:16 = 0xffff, have_clipdist:1 = 0, edgeflag:1 = 1 */ + vertex_id_pad_edgeflag = (0xffff << 16) | (1 << DRAW_TOTAL_CLIP_PLANES); + if (have_clipdist) + vertex_id_pad_edgeflag |= 1 << (DRAW_TOTAL_CLIP_PLANES+1); + val = lp_build_const_int32(gallivm, vertex_id_pad_edgeflag); + /* OR with the clipmask */ + val = LLVMBuildOr(builder, val, clipmask, ""); + + /* store vertex header */ + LLVMBuildStore(builder, val, id_ptr); - /* undefined vertex */ - LLVMBuildStore(builder, LLVMConstInt(LLVMInt32Type(), - 0xffff, 0), id_ptr); #if DEBUG_STORE lp_build_printf(builder, " ---- %p storing attribute %d (io = %p)\n", data_ptr, index, io_ptr); @@ -571,7 +744,7 @@ store_aos(LLVMBuilderRef builder, /*lp_build_printf(builder, " ---- %p storing at %d (%p) ", io_ptr, index, data_ptr); print_vectorf(builder, value);*/ data_ptr = LLVMBuildBitCast(builder, data_ptr, - LLVMPointerType(LLVMArrayType(LLVMVectorType(LLVMFloatType(), 4), 0), 0), + LLVMPointerType(LLVMArrayType(LLVMVectorType(LLVMFloatTypeInContext(gallivm->context), 4), 0), 0), "datavec"); data_ptr = LLVMBuildGEP(builder, data_ptr, indices, 2, ""); @@ -583,10 +756,10 @@ store_aos(LLVMBuilderRef builder, LLVMValueRef gep0, gep1, gep2, gep3; data_ptr = LLVMBuildGEP(builder, data_ptr, indices, 3, ""); - idx0 = LLVMConstInt(LLVMInt32Type(), 0, 0); - idx1 = LLVMConstInt(LLVMInt32Type(), 1, 0); - idx2 = LLVMConstInt(LLVMInt32Type(), 2, 0); - idx3 = LLVMConstInt(LLVMInt32Type(), 3, 0); + idx0 = lp_build_const_int32(gallivm, 0); + idx1 = lp_build_const_int32(gallivm, 1); + idx2 = lp_build_const_int32(gallivm, 2); + idx3 = lp_build_const_int32(gallivm, 3); x = LLVMBuildExtractElement(builder, value, idx0, ""); @@ -612,21 +785,26 @@ store_aos(LLVMBuilderRef builder, #endif } + static void -store_aos_array(LLVMBuilderRef builder, +store_aos_array(struct gallivm_state *gallivm, LLVMValueRef io_ptr, - LLVMValueRef aos[NUM_CHANNELS], + LLVMValueRef aos[TGSI_NUM_CHANNELS], int attrib, - int num_outputs) + int num_outputs, + LLVMValueRef clipmask, + boolean have_clipdist) { - LLVMValueRef attr_index = LLVMConstInt(LLVMInt32Type(), attrib, 0); - LLVMValueRef ind0 = LLVMConstInt(LLVMInt32Type(), 0, 0); - LLVMValueRef ind1 = LLVMConstInt(LLVMInt32Type(), 1, 0); - LLVMValueRef ind2 = LLVMConstInt(LLVMInt32Type(), 2, 0); - LLVMValueRef ind3 = LLVMConstInt(LLVMInt32Type(), 3, 0); + LLVMBuilderRef builder = gallivm->builder; + LLVMValueRef attr_index = lp_build_const_int32(gallivm, attrib); + LLVMValueRef ind0 = lp_build_const_int32(gallivm, 0); + LLVMValueRef ind1 = lp_build_const_int32(gallivm, 1); + LLVMValueRef ind2 = lp_build_const_int32(gallivm, 2); + LLVMValueRef ind3 = lp_build_const_int32(gallivm, 3); LLVMValueRef io0_ptr, io1_ptr, io2_ptr, io3_ptr; - - debug_assert(NUM_CHANNELS == 4); + LLVMValueRef clipmask0, clipmask1, clipmask2, clipmask3; + + debug_assert(TGSI_NUM_CHANNELS == 4); io0_ptr = LLVMBuildGEP(builder, io_ptr, &ind0, 1, ""); @@ -637,24 +815,36 @@ store_aos_array(LLVMBuilderRef builder, io3_ptr = LLVMBuildGEP(builder, io_ptr, &ind3, 1, ""); + clipmask0 = LLVMBuildExtractElement(builder, clipmask, + ind0, ""); + clipmask1 = LLVMBuildExtractElement(builder, clipmask, + ind1, ""); + clipmask2 = LLVMBuildExtractElement(builder, clipmask, + ind2, ""); + clipmask3 = LLVMBuildExtractElement(builder, clipmask, + ind3, ""); + #if DEBUG_STORE - lp_build_printf(builder, " io = %p, indexes[%d, %d, %d, %d]\n", - io_ptr, ind0, ind1, ind2, ind3); + lp_build_printf(builder, "io = %p, indexes[%d, %d, %d, %d]\n, clipmask0 = %x, clipmask1 = %x, clipmask2 = %x, clipmask3 = %x\n", + io_ptr, ind0, ind1, ind2, ind3, clipmask0, clipmask1, clipmask2, clipmask3); #endif - - store_aos(builder, io0_ptr, attr_index, aos[0]); - store_aos(builder, io1_ptr, attr_index, aos[1]); - store_aos(builder, io2_ptr, attr_index, aos[2]); - store_aos(builder, io3_ptr, attr_index, aos[3]); + /* store for each of the 4 vertices */ + store_aos(gallivm, io0_ptr, attr_index, aos[0], clipmask0, have_clipdist); + store_aos(gallivm, io1_ptr, attr_index, aos[1], clipmask1, have_clipdist); + store_aos(gallivm, io2_ptr, attr_index, aos[2], clipmask2, have_clipdist); + store_aos(gallivm, io3_ptr, attr_index, aos[3], clipmask3, have_clipdist); } + static void -convert_to_aos(LLVMBuilderRef builder, +convert_to_aos(struct gallivm_state *gallivm, LLVMValueRef io, - LLVMValueRef (*outputs)[NUM_CHANNELS], + LLVMValueRef (*outputs)[TGSI_NUM_CHANNELS], + LLVMValueRef clipmask, int num_outputs, - int max_vertices) + int max_vertices, boolean have_clipdist) { + LLVMBuilderRef builder = gallivm->builder; unsigned chan, attrib; #if DEBUG_STORE @@ -663,8 +853,8 @@ convert_to_aos(LLVMBuilderRef builder, for (attrib = 0; attrib < num_outputs; ++attrib) { LLVMValueRef soa[4]; LLVMValueRef aos[4]; - for(chan = 0; chan < NUM_CHANNELS; ++chan) { - if(outputs[attrib][chan]) { + for (chan = 0; chan < TGSI_NUM_CHANNELS; ++chan) { + if (outputs[attrib][chan]) { LLVMValueRef out = LLVMBuildLoad(builder, outputs[attrib][chan], ""); lp_build_name(out, "output%u.%c", attrib, "xyzw"[chan]); /*lp_build_printf(builder, "output %d : %d ", @@ -672,355 +862,637 @@ convert_to_aos(LLVMBuilderRef builder, LLVMConstInt(LLVMInt32Type(), chan, 0)); print_vectorf(builder, out);*/ soa[chan] = out; - } else + } + else { soa[chan] = 0; + } } - soa_to_aos(builder, soa, aos); - store_aos_array(builder, + soa_to_aos(gallivm, soa, aos); + store_aos_array(gallivm, io, aos, attrib, - num_outputs); + num_outputs, + clipmask, have_clipdist); } #if DEBUG_STORE lp_build_printf(builder, " # storing end\n"); #endif } + +/** + * Stores original vertex positions in clip coordinates + * There is probably a more efficient way to do this, 4 floats at once + * rather than extracting each element one by one. + * idx is the output to store things too, if pre_clip_pos is set + * we store the pos to the idx, if not we store the clipvertex to it. + */ static void -draw_llvm_generate(struct draw_llvm *llvm, struct draw_llvm_variant *variant) +store_clip(struct gallivm_state *gallivm, + LLVMValueRef io_ptr, + LLVMValueRef (*outputs)[TGSI_NUM_CHANNELS], + boolean pre_clip_pos, int idx) { - LLVMTypeRef arg_types[8]; - LLVMTypeRef func_type; - LLVMValueRef context_ptr; - LLVMBasicBlockRef block; - LLVMBuilderRef builder; - LLVMValueRef start, end, count, stride, step, io_itr; - LLVMValueRef io_ptr, vbuffers_ptr, vb_ptr; - LLVMValueRef instance_id; - struct draw_context *draw = llvm->draw; - unsigned i, j; - struct lp_build_context bld; - struct lp_build_loop_state lp_loop; - const int max_vertices = 4; - LLVMValueRef outputs[PIPE_MAX_SHADER_OUTPUTS][NUM_CHANNELS]; - void *code; - struct lp_build_sampler_soa *sampler = 0; - - arg_types[0] = llvm->context_ptr_type; /* context */ - arg_types[1] = llvm->vertex_header_ptr_type; /* vertex_header */ - arg_types[2] = llvm->buffer_ptr_type; /* vbuffers */ - arg_types[3] = LLVMInt32Type(); /* start */ - arg_types[4] = LLVMInt32Type(); /* count */ - arg_types[5] = LLVMInt32Type(); /* stride */ - arg_types[6] = llvm->vb_ptr_type; /* pipe_vertex_buffer's */ - arg_types[7] = LLVMInt32Type(); /* instance_id */ - - func_type = LLVMFunctionType(LLVMVoidType(), arg_types, Elements(arg_types), 0); - - variant->function = LLVMAddFunction(llvm->module, "draw_llvm_shader", func_type); - LLVMSetFunctionCallConv(variant->function, LLVMCCallConv); - for(i = 0; i < Elements(arg_types); ++i) - if(LLVMGetTypeKind(arg_types[i]) == LLVMPointerTypeKind) - LLVMAddAttribute(LLVMGetParam(variant->function, i), LLVMNoAliasAttribute); - - context_ptr = LLVMGetParam(variant->function, 0); - io_ptr = LLVMGetParam(variant->function, 1); - vbuffers_ptr = LLVMGetParam(variant->function, 2); - start = LLVMGetParam(variant->function, 3); - count = LLVMGetParam(variant->function, 4); - stride = LLVMGetParam(variant->function, 5); - vb_ptr = LLVMGetParam(variant->function, 6); - instance_id = LLVMGetParam(variant->function, 7); - - lp_build_name(context_ptr, "context"); - lp_build_name(io_ptr, "io"); - lp_build_name(vbuffers_ptr, "vbuffers"); - lp_build_name(start, "start"); - lp_build_name(count, "count"); - lp_build_name(stride, "stride"); - lp_build_name(vb_ptr, "vb"); - lp_build_name(instance_id, "instance_id"); - - /* - * Function body - */ + LLVMBuilderRef builder = gallivm->builder; + LLVMValueRef out[4]; + LLVMValueRef indices[2]; + LLVMValueRef io0_ptr, io1_ptr, io2_ptr, io3_ptr; + LLVMValueRef clip_ptr0, clip_ptr1, clip_ptr2, clip_ptr3; + LLVMValueRef clip0_ptr, clip1_ptr, clip2_ptr, clip3_ptr; + LLVMValueRef out0elem, out1elem, out2elem, out3elem; + int i; - block = LLVMAppendBasicBlock(variant->function, "entry"); - builder = LLVMCreateBuilder(); - LLVMPositionBuilderAtEnd(builder, block); + LLVMValueRef ind0 = lp_build_const_int32(gallivm, 0); + LLVMValueRef ind1 = lp_build_const_int32(gallivm, 1); + LLVMValueRef ind2 = lp_build_const_int32(gallivm, 2); + LLVMValueRef ind3 = lp_build_const_int32(gallivm, 3); + + indices[0] = + indices[1] = lp_build_const_int32(gallivm, 0); + + out[0] = LLVMBuildLoad(builder, outputs[idx][0], ""); /*x0 x1 x2 x3*/ + out[1] = LLVMBuildLoad(builder, outputs[idx][1], ""); /*y0 y1 y2 y3*/ + out[2] = LLVMBuildLoad(builder, outputs[idx][2], ""); /*z0 z1 z2 z3*/ + out[3] = LLVMBuildLoad(builder, outputs[idx][3], ""); /*w0 w1 w2 w3*/ + + io0_ptr = LLVMBuildGEP(builder, io_ptr, &ind0, 1, ""); + io1_ptr = LLVMBuildGEP(builder, io_ptr, &ind1, 1, ""); + io2_ptr = LLVMBuildGEP(builder, io_ptr, &ind2, 1, ""); + io3_ptr = LLVMBuildGEP(builder, io_ptr, &ind3, 1, ""); + + if (!pre_clip_pos) { + clip_ptr0 = draw_jit_header_clip(gallivm, io0_ptr); + clip_ptr1 = draw_jit_header_clip(gallivm, io1_ptr); + clip_ptr2 = draw_jit_header_clip(gallivm, io2_ptr); + clip_ptr3 = draw_jit_header_clip(gallivm, io3_ptr); + } else { + clip_ptr0 = draw_jit_header_pre_clip_pos(gallivm, io0_ptr); + clip_ptr1 = draw_jit_header_pre_clip_pos(gallivm, io1_ptr); + clip_ptr2 = draw_jit_header_pre_clip_pos(gallivm, io2_ptr); + clip_ptr3 = draw_jit_header_pre_clip_pos(gallivm, io3_ptr); + } - lp_build_context_init(&bld, builder, lp_type_int(32)); + for (i = 0; i<4; i++) { + clip0_ptr = LLVMBuildGEP(builder, clip_ptr0, indices, 2, ""); /* x0 */ + clip1_ptr = LLVMBuildGEP(builder, clip_ptr1, indices, 2, ""); /* x1 */ + clip2_ptr = LLVMBuildGEP(builder, clip_ptr2, indices, 2, ""); /* x2 */ + clip3_ptr = LLVMBuildGEP(builder, clip_ptr3, indices, 2, ""); /* x3 */ + + out0elem = LLVMBuildExtractElement(builder, out[i], ind0, ""); /* x0 */ + out1elem = LLVMBuildExtractElement(builder, out[i], ind1, ""); /* x1 */ + out2elem = LLVMBuildExtractElement(builder, out[i], ind2, ""); /* x2 */ + out3elem = LLVMBuildExtractElement(builder, out[i], ind3, ""); /* x3 */ + + LLVMBuildStore(builder, out0elem, clip0_ptr); + LLVMBuildStore(builder, out1elem, clip1_ptr); + LLVMBuildStore(builder, out2elem, clip2_ptr); + LLVMBuildStore(builder, out3elem, clip3_ptr); + + indices[1]= LLVMBuildAdd(builder, indices[1], ind1, ""); + } - end = lp_build_add(&bld, start, count); +} - step = LLVMConstInt(LLVMInt32Type(), max_vertices, 0); - /* code generated texture sampling */ - sampler = draw_llvm_sampler_soa_create( - draw_llvm_variant_key_samplers(&variant->key), - context_ptr); +/** + * Equivalent of _mm_set1_ps(a) + */ +static LLVMValueRef +vec4f_from_scalar(struct gallivm_state *gallivm, + LLVMValueRef a, + const char *name) +{ + LLVMTypeRef float_type = LLVMFloatTypeInContext(gallivm->context); + LLVMValueRef res = LLVMGetUndef(LLVMVectorType(float_type, 4)); + int i; -#if DEBUG_STORE - lp_build_printf(builder, "start = %d, end = %d, step = %d\n", - start, end, step); -#endif - lp_build_loop_begin(builder, start, &lp_loop); - { - LLVMValueRef inputs[PIPE_MAX_SHADER_INPUTS][NUM_CHANNELS]; - LLVMValueRef aos_attribs[PIPE_MAX_SHADER_INPUTS][NUM_CHANNELS] = { { 0 } }; - LLVMValueRef io; - const LLVMValueRef (*ptr_aos)[NUM_CHANNELS]; + for (i = 0; i < 4; ++i) { + LLVMValueRef index = lp_build_const_int32(gallivm, i); + res = LLVMBuildInsertElement(gallivm->builder, res, a, + index, i == 3 ? name : ""); + } - io_itr = LLVMBuildSub(builder, lp_loop.counter, start, ""); - io = LLVMBuildGEP(builder, io_ptr, &io_itr, 1, ""); -#if DEBUG_STORE - lp_build_printf(builder, " --- io %d = %p, loop counter %d\n", - io_itr, io, lp_loop.counter); -#endif - for (i = 0; i < NUM_CHANNELS; ++i) { - LLVMValueRef true_index = LLVMBuildAdd( - builder, - lp_loop.counter, - LLVMConstInt(LLVMInt32Type(), i, 0), ""); - for (j = 0; j < draw->pt.nr_vertex_elements; ++j) { - struct pipe_vertex_element *velem = &draw->pt.vertex_element[j]; - LLVMValueRef vb_index = LLVMConstInt(LLVMInt32Type(), - velem->vertex_buffer_index, - 0); - LLVMValueRef vb = LLVMBuildGEP(builder, vb_ptr, - &vb_index, 1, ""); - generate_fetch(builder, vbuffers_ptr, - &aos_attribs[j][i], velem, vb, true_index, - instance_id); - } - } - convert_to_soa(builder, aos_attribs, inputs, - draw->pt.nr_vertex_elements); + return res; +} - ptr_aos = (const LLVMValueRef (*)[NUM_CHANNELS]) inputs; - generate_vs(llvm, - builder, - outputs, - ptr_aos, - context_ptr, - sampler); - convert_to_aos(builder, io, outputs, - draw->vs.vertex_shader->info.num_outputs, - max_vertices); +/** + * Transforms the outputs for viewport mapping + */ +static void +generate_viewport(struct draw_llvm *llvm, + LLVMBuilderRef builder, + LLVMValueRef (*outputs)[TGSI_NUM_CHANNELS], + LLVMValueRef context_ptr) +{ + int i; + struct gallivm_state *gallivm = llvm->gallivm; + struct lp_type f32_type = lp_type_float_vec(32); + LLVMValueRef out3 = LLVMBuildLoad(builder, outputs[0][3], ""); /*w0 w1 w2 w3*/ + LLVMValueRef const1 = lp_build_const_vec(gallivm, f32_type, 1.0); /*1.0 1.0 1.0 1.0*/ + LLVMValueRef vp_ptr = draw_jit_context_viewport(gallivm, context_ptr); + + /* for 1/w convention*/ + out3 = LLVMBuildFDiv(builder, const1, out3, ""); + LLVMBuildStore(builder, out3, outputs[0][3]); + + /* Viewport Mapping */ + for (i=0; i<3; i++) { + LLVMValueRef out = LLVMBuildLoad(builder, outputs[0][i], ""); /*x0 x1 x2 x3*/ + LLVMValueRef scale; + LLVMValueRef trans; + LLVMValueRef scale_i; + LLVMValueRef trans_i; + LLVMValueRef index; + + index = lp_build_const_int32(gallivm, i); + scale_i = LLVMBuildGEP(builder, vp_ptr, &index, 1, ""); + + index = lp_build_const_int32(gallivm, i+4); + trans_i = LLVMBuildGEP(builder, vp_ptr, &index, 1, ""); + + scale = vec4f_from_scalar(gallivm, LLVMBuildLoad(builder, scale_i, ""), "scale"); + trans = vec4f_from_scalar(gallivm, LLVMBuildLoad(builder, trans_i, ""), "trans"); + + /* divide by w */ + out = LLVMBuildFMul(builder, out, out3, ""); + /* mult by scale */ + out = LLVMBuildFMul(builder, out, scale, ""); + /* add translation */ + out = LLVMBuildFAdd(builder, out, trans, ""); + + /* store transformed outputs */ + LLVMBuildStore(builder, out, outputs[0][i]); } - lp_build_loop_end_cond(builder, end, step, LLVMIntUGE, &lp_loop); - - sampler->destroy(sampler); - -#ifdef PIPE_ARCH_X86 - /* Avoid corrupting the FPU stack on 32bit OSes. */ - lp_build_intrinsic(builder, "llvm.x86.mmx.emms", LLVMVoidType(), NULL, 0); -#endif + +} - LLVMBuildRetVoid(builder); - LLVMDisposeBuilder(builder); +/** + * Returns clipmask as 4xi32 bitmask for the 4 vertices + */ +static LLVMValueRef +generate_clipmask(struct draw_llvm *llvm, + LLVMValueRef (*outputs)[TGSI_NUM_CHANNELS], + boolean clip_xy, + boolean clip_z, + boolean clip_user, + boolean clip_halfz, + unsigned ucp_enable, + LLVMValueRef context_ptr, + boolean *have_clipdist) +{ + struct gallivm_state *gallivm = llvm->gallivm; + LLVMBuilderRef builder = gallivm->builder; + LLVMValueRef mask; /* stores the <4xi32> clipmasks */ + LLVMValueRef test, temp; + LLVMValueRef zero, shift; + LLVMValueRef pos_x, pos_y, pos_z, pos_w; + LLVMValueRef cv_x, cv_y, cv_z, cv_w; + LLVMValueRef plane1, planes, plane_ptr, sum; + struct lp_type f32_type = lp_type_float_vec(32); + const unsigned pos = draw_current_shader_position_output(llvm->draw); + const unsigned cv = draw_current_shader_clipvertex_output(llvm->draw); + int num_written_clipdistance = llvm->draw->vs.vertex_shader->info.num_written_clipdistance; + bool have_cd = false; + unsigned cd[2]; + + cd[0] = draw_current_shader_clipdistance_output(llvm->draw, 0); + cd[1] = draw_current_shader_clipdistance_output(llvm->draw, 1); + + if (cd[0] != pos || cd[1] != pos) + have_cd = true; + + mask = lp_build_const_int_vec(gallivm, lp_type_int_vec(32), 0); + temp = lp_build_const_int_vec(gallivm, lp_type_int_vec(32), 0); + zero = lp_build_const_vec(gallivm, f32_type, 0); /* 0.0f 0.0f 0.0f 0.0f */ + shift = lp_build_const_int_vec(gallivm, lp_type_int_vec(32), 1); /* 1 1 1 1 */ /* - * Translate the LLVM IR into machine code. + * load clipvertex and position from correct locations. + * if they are the same just load them once. */ -#ifdef DEBUG - if(LLVMVerifyFunction(variant->function, LLVMPrintMessageAction)) { - lp_debug_dump_value(variant->function); - assert(0); + pos_x = LLVMBuildLoad(builder, outputs[pos][0], ""); /*x0 x1 x2 x3*/ + pos_y = LLVMBuildLoad(builder, outputs[pos][1], ""); /*y0 y1 y2 y3*/ + pos_z = LLVMBuildLoad(builder, outputs[pos][2], ""); /*z0 z1 z2 z3*/ + pos_w = LLVMBuildLoad(builder, outputs[pos][3], ""); /*w0 w1 w2 w3*/ + + if (clip_user && cv != pos) { + cv_x = LLVMBuildLoad(builder, outputs[cv][0], ""); /*x0 x1 x2 x3*/ + cv_y = LLVMBuildLoad(builder, outputs[cv][1], ""); /*y0 y1 y2 y3*/ + cv_z = LLVMBuildLoad(builder, outputs[cv][2], ""); /*z0 z1 z2 z3*/ + cv_w = LLVMBuildLoad(builder, outputs[cv][3], ""); /*w0 w1 w2 w3*/ + } else { + cv_x = pos_x; + cv_y = pos_y; + cv_z = pos_z; + cv_w = pos_w; } -#endif - LLVMRunFunctionPassManager(llvm->pass, variant->function); + /* Cliptest, for hardwired planes */ + if (clip_xy) { + /* plane 1 */ + test = lp_build_compare(gallivm, f32_type, PIPE_FUNC_GREATER, pos_x , pos_w); + temp = shift; + test = LLVMBuildAnd(builder, test, temp, ""); + mask = test; + + /* plane 2 */ + test = LLVMBuildFAdd(builder, pos_x, pos_w, ""); + test = lp_build_compare(gallivm, f32_type, PIPE_FUNC_GREATER, zero, test); + temp = LLVMBuildShl(builder, temp, shift, ""); + test = LLVMBuildAnd(builder, test, temp, ""); + mask = LLVMBuildOr(builder, mask, test, ""); + + /* plane 3 */ + test = lp_build_compare(gallivm, f32_type, PIPE_FUNC_GREATER, pos_y, pos_w); + temp = LLVMBuildShl(builder, temp, shift, ""); + test = LLVMBuildAnd(builder, test, temp, ""); + mask = LLVMBuildOr(builder, mask, test, ""); + + /* plane 4 */ + test = LLVMBuildFAdd(builder, pos_y, pos_w, ""); + test = lp_build_compare(gallivm, f32_type, PIPE_FUNC_GREATER, zero, test); + temp = LLVMBuildShl(builder, temp, shift, ""); + test = LLVMBuildAnd(builder, test, temp, ""); + mask = LLVMBuildOr(builder, mask, test, ""); + } - if (gallivm_debug & GALLIVM_DEBUG_IR) { - lp_debug_dump_value(variant->function); - debug_printf("\n"); + if (clip_z) { + temp = lp_build_const_int_vec(gallivm, lp_type_int_vec(32), 16); + if (clip_halfz) { + /* plane 5 */ + test = lp_build_compare(gallivm, f32_type, PIPE_FUNC_GREATER, zero, pos_z); + test = LLVMBuildAnd(builder, test, temp, ""); + mask = LLVMBuildOr(builder, mask, test, ""); + } + else { + /* plane 5 */ + test = LLVMBuildFAdd(builder, pos_z, pos_w, ""); + test = lp_build_compare(gallivm, f32_type, PIPE_FUNC_GREATER, zero, test); + test = LLVMBuildAnd(builder, test, temp, ""); + mask = LLVMBuildOr(builder, mask, test, ""); + } + /* plane 6 */ + test = lp_build_compare(gallivm, f32_type, PIPE_FUNC_GREATER, pos_z, pos_w); + temp = LLVMBuildShl(builder, temp, shift, ""); + test = LLVMBuildAnd(builder, test, temp, ""); + mask = LLVMBuildOr(builder, mask, test, ""); + } + + if (clip_user) { + LLVMValueRef planes_ptr = draw_jit_context_planes(gallivm, context_ptr); + LLVMValueRef indices[3]; + + /* userclip planes */ + while (ucp_enable) { + unsigned plane_idx = ffs(ucp_enable)-1; + ucp_enable &= ~(1 << plane_idx); + plane_idx += 6; + + if (have_cd && num_written_clipdistance) { + LLVMValueRef clipdist; + int i; + i = plane_idx - 6; + + *have_clipdist = TRUE; + if (i < 4) { + clipdist = LLVMBuildLoad(builder, outputs[cd[0]][i], ""); + } else { + clipdist = LLVMBuildLoad(builder, outputs[cd[1]][i-4], ""); + } + test = lp_build_compare(gallivm, f32_type, PIPE_FUNC_GREATER, zero, clipdist); + temp = lp_build_const_int_vec(gallivm, lp_type_int_vec(32), 1 << plane_idx); + test = LLVMBuildAnd(builder, test, temp, ""); + mask = LLVMBuildOr(builder, mask, test, ""); + } else { + indices[0] = lp_build_const_int32(gallivm, 0); + indices[1] = lp_build_const_int32(gallivm, plane_idx); + + indices[2] = lp_build_const_int32(gallivm, 0); + plane_ptr = LLVMBuildGEP(builder, planes_ptr, indices, 3, ""); + plane1 = LLVMBuildLoad(builder, plane_ptr, "plane_x"); + planes = vec4f_from_scalar(gallivm, plane1, "plane4_x"); + sum = LLVMBuildFMul(builder, planes, cv_x, ""); + + indices[2] = lp_build_const_int32(gallivm, 1); + plane_ptr = LLVMBuildGEP(builder, planes_ptr, indices, 3, ""); + plane1 = LLVMBuildLoad(builder, plane_ptr, "plane_y"); + planes = vec4f_from_scalar(gallivm, plane1, "plane4_y"); + test = LLVMBuildFMul(builder, planes, cv_y, ""); + sum = LLVMBuildFAdd(builder, sum, test, ""); + + indices[2] = lp_build_const_int32(gallivm, 2); + plane_ptr = LLVMBuildGEP(builder, planes_ptr, indices, 3, ""); + plane1 = LLVMBuildLoad(builder, plane_ptr, "plane_z"); + planes = vec4f_from_scalar(gallivm, plane1, "plane4_z"); + test = LLVMBuildFMul(builder, planes, cv_z, ""); + sum = LLVMBuildFAdd(builder, sum, test, ""); + + indices[2] = lp_build_const_int32(gallivm, 3); + plane_ptr = LLVMBuildGEP(builder, planes_ptr, indices, 3, ""); + plane1 = LLVMBuildLoad(builder, plane_ptr, "plane_w"); + planes = vec4f_from_scalar(gallivm, plane1, "plane4_w"); + test = LLVMBuildFMul(builder, planes, cv_w, ""); + sum = LLVMBuildFAdd(builder, sum, test, ""); + + test = lp_build_compare(gallivm, f32_type, PIPE_FUNC_GREATER, zero, sum); + temp = lp_build_const_int_vec(gallivm, lp_type_int_vec(32), 1 << plane_idx); + test = LLVMBuildAnd(builder, test, temp, ""); + mask = LLVMBuildOr(builder, mask, test, ""); + } + } } + return mask; +} - code = LLVMGetPointerToGlobal(llvm->draw->engine, variant->function); - variant->jit_func = (draw_jit_vert_func)pointer_to_func(code); - if (gallivm_debug & GALLIVM_DEBUG_ASM) { - lp_disassemble(code); +/** + * Returns boolean if any clipping has occurred + * Used zero/non-zero i32 value to represent boolean + */ +static void +clipmask_bool(struct gallivm_state *gallivm, + LLVMValueRef clipmask, + LLVMValueRef ret_ptr) +{ + LLVMBuilderRef builder = gallivm->builder; + LLVMValueRef ret = LLVMBuildLoad(builder, ret_ptr, ""); + LLVMValueRef temp; + int i; + + for (i=0; i<4; i++) { + temp = LLVMBuildExtractElement(builder, clipmask, + lp_build_const_int32(gallivm, i) , ""); + ret = LLVMBuildOr(builder, ret, temp, ""); } - lp_func_delete_body(variant->function); + + LLVMBuildStore(builder, ret, ret_ptr); } static void -draw_llvm_generate_elts(struct draw_llvm *llvm, struct draw_llvm_variant *variant) +draw_llvm_generate(struct draw_llvm *llvm, struct draw_llvm_variant *variant, + boolean elts) { + struct gallivm_state *gallivm = llvm->gallivm; + LLVMContextRef context = gallivm->context; + LLVMTypeRef int32_type = LLVMInt32TypeInContext(context); LLVMTypeRef arg_types[8]; LLVMTypeRef func_type; LLVMValueRef context_ptr; LLVMBasicBlockRef block; LLVMBuilderRef builder; - LLVMValueRef fetch_elts, fetch_count, stride, step, io_itr; + LLVMValueRef end, start; + LLVMValueRef count, fetch_elts, fetch_count; + LLVMValueRef stride, step, io_itr; LLVMValueRef io_ptr, vbuffers_ptr, vb_ptr; - LLVMValueRef instance_id; + LLVMValueRef zero = lp_build_const_int32(gallivm, 0); + LLVMValueRef one = lp_build_const_int32(gallivm, 1); struct draw_context *draw = llvm->draw; + const struct tgsi_shader_info *vs_info = &draw->vs.vertex_shader->info; unsigned i, j; struct lp_build_context bld; struct lp_build_loop_state lp_loop; const int max_vertices = 4; - LLVMValueRef outputs[PIPE_MAX_SHADER_OUTPUTS][NUM_CHANNELS]; + LLVMValueRef outputs[PIPE_MAX_SHADER_OUTPUTS][TGSI_NUM_CHANNELS]; LLVMValueRef fetch_max; void *code; struct lp_build_sampler_soa *sampler = 0; - - arg_types[0] = llvm->context_ptr_type; /* context */ - arg_types[1] = llvm->vertex_header_ptr_type; /* vertex_header */ - arg_types[2] = llvm->buffer_ptr_type; /* vbuffers */ - arg_types[3] = LLVMPointerType(LLVMInt32Type(), 0); /* fetch_elts * */ - arg_types[4] = LLVMInt32Type(); /* fetch_count */ - arg_types[5] = LLVMInt32Type(); /* stride */ - arg_types[6] = llvm->vb_ptr_type; /* pipe_vertex_buffer's */ - arg_types[7] = LLVMInt32Type(); /* instance_id */ - - func_type = LLVMFunctionType(LLVMVoidType(), arg_types, Elements(arg_types), 0); - - variant->function_elts = LLVMAddFunction(llvm->module, "draw_llvm_shader_elts", - func_type); - LLVMSetFunctionCallConv(variant->function_elts, LLVMCCallConv); - for(i = 0; i < Elements(arg_types); ++i) - if(LLVMGetTypeKind(arg_types[i]) == LLVMPointerTypeKind) - LLVMAddAttribute(LLVMGetParam(variant->function_elts, i), + LLVMValueRef ret, ret_ptr; + const boolean bypass_viewport = variant->key.bypass_viewport; + const boolean enable_cliptest = variant->key.clip_xy || + variant->key.clip_z || + variant->key.clip_user; + LLVMValueRef variant_func; + const unsigned pos = draw_current_shader_position_output(llvm->draw); + const unsigned cv = draw_current_shader_clipvertex_output(llvm->draw); + boolean have_clipdist = FALSE; + struct lp_bld_tgsi_system_values system_values; + + memset(&system_values, 0, sizeof(system_values)); + + arg_types[0] = get_context_ptr_type(llvm); /* context */ + arg_types[1] = get_vertex_header_ptr_type(llvm); /* vertex_header */ + arg_types[2] = get_buffer_ptr_type(llvm); /* vbuffers */ + if (elts) + arg_types[3] = LLVMPointerType(int32_type, 0);/* fetch_elts * */ + else + arg_types[3] = int32_type; /* start */ + arg_types[4] = int32_type; /* fetch_count / count */ + arg_types[5] = int32_type; /* stride */ + arg_types[6] = get_vb_ptr_type(llvm); /* pipe_vertex_buffer's */ + arg_types[7] = int32_type; /* instance_id */ + + func_type = LLVMFunctionType(int32_type, arg_types, Elements(arg_types), 0); + + variant_func = LLVMAddFunction(gallivm->module, + elts ? "draw_llvm_shader_elts" : "draw_llvm_shader", + func_type); + + if (elts) + variant->function_elts = variant_func; + else + variant->function = variant_func; + + LLVMSetFunctionCallConv(variant_func, LLVMCCallConv); + for (i = 0; i < Elements(arg_types); ++i) + if (LLVMGetTypeKind(arg_types[i]) == LLVMPointerTypeKind) + LLVMAddAttribute(LLVMGetParam(variant_func, i), LLVMNoAliasAttribute); - context_ptr = LLVMGetParam(variant->function_elts, 0); - io_ptr = LLVMGetParam(variant->function_elts, 1); - vbuffers_ptr = LLVMGetParam(variant->function_elts, 2); - fetch_elts = LLVMGetParam(variant->function_elts, 3); - fetch_count = LLVMGetParam(variant->function_elts, 4); - stride = LLVMGetParam(variant->function_elts, 5); - vb_ptr = LLVMGetParam(variant->function_elts, 6); - instance_id = LLVMGetParam(variant->function_elts, 7); + context_ptr = LLVMGetParam(variant_func, 0); + io_ptr = LLVMGetParam(variant_func, 1); + vbuffers_ptr = LLVMGetParam(variant_func, 2); + stride = LLVMGetParam(variant_func, 5); + vb_ptr = LLVMGetParam(variant_func, 6); + system_values.instance_id = LLVMGetParam(variant_func, 7); lp_build_name(context_ptr, "context"); lp_build_name(io_ptr, "io"); lp_build_name(vbuffers_ptr, "vbuffers"); - lp_build_name(fetch_elts, "fetch_elts"); - lp_build_name(fetch_count, "fetch_count"); lp_build_name(stride, "stride"); lp_build_name(vb_ptr, "vb"); - lp_build_name(instance_id, "instance_id"); + lp_build_name(system_values.instance_id, "instance_id"); + + if (elts) { + fetch_elts = LLVMGetParam(variant_func, 3); + fetch_count = LLVMGetParam(variant_func, 4); + lp_build_name(fetch_elts, "fetch_elts"); + lp_build_name(fetch_count, "fetch_count"); + start = count = NULL; + } + else { + start = LLVMGetParam(variant_func, 3); + count = LLVMGetParam(variant_func, 4); + lp_build_name(start, "start"); + lp_build_name(count, "count"); + fetch_elts = fetch_count = NULL; + } /* * Function body */ - block = LLVMAppendBasicBlock(variant->function_elts, "entry"); - builder = LLVMCreateBuilder(); + block = LLVMAppendBasicBlockInContext(gallivm->context, variant_func, "entry"); + builder = gallivm->builder; LLVMPositionBuilderAtEnd(builder, block); - lp_build_context_init(&bld, builder, lp_type_int(32)); + lp_build_context_init(&bld, gallivm, lp_type_int(32)); - step = LLVMConstInt(LLVMInt32Type(), max_vertices, 0); + /* function will return non-zero i32 value if any clipped vertices */ + ret_ptr = lp_build_alloca(gallivm, int32_type, ""); + LLVMBuildStore(builder, zero, ret_ptr); /* code generated texture sampling */ sampler = draw_llvm_sampler_soa_create( draw_llvm_variant_key_samplers(&variant->key), context_ptr); - fetch_max = LLVMBuildSub(builder, fetch_count, - LLVMConstInt(LLVMInt32Type(), 1, 0), - "fetch_max"); + if (elts) { + start = zero; + end = fetch_count; + } + else { + end = lp_build_add(&bld, start, count); + } + + step = lp_build_const_int32(gallivm, max_vertices); - lp_build_loop_begin(builder, LLVMConstInt(LLVMInt32Type(), 0, 0), &lp_loop); + fetch_max = LLVMBuildSub(builder, end, one, "fetch_max"); + + lp_build_loop_begin(&lp_loop, gallivm, start); { - LLVMValueRef inputs[PIPE_MAX_SHADER_INPUTS][NUM_CHANNELS]; - LLVMValueRef aos_attribs[PIPE_MAX_SHADER_INPUTS][NUM_CHANNELS] = { { 0 } }; + LLVMValueRef inputs[PIPE_MAX_SHADER_INPUTS][TGSI_NUM_CHANNELS]; + LLVMValueRef aos_attribs[PIPE_MAX_SHADER_INPUTS][TGSI_NUM_CHANNELS] = { { 0 } }; LLVMValueRef io; - const LLVMValueRef (*ptr_aos)[NUM_CHANNELS]; + LLVMValueRef clipmask; /* holds the clipmask value */ + const LLVMValueRef (*ptr_aos)[TGSI_NUM_CHANNELS]; + + if (elts) + io_itr = lp_loop.counter; + else + io_itr = LLVMBuildSub(builder, lp_loop.counter, start, ""); - io_itr = lp_loop.counter; io = LLVMBuildGEP(builder, io_ptr, &io_itr, 1, ""); #if DEBUG_STORE lp_build_printf(builder, " --- io %d = %p, loop counter %d\n", io_itr, io, lp_loop.counter); #endif - for (i = 0; i < NUM_CHANNELS; ++i) { - LLVMValueRef true_index = LLVMBuildAdd( - builder, - lp_loop.counter, - LLVMConstInt(LLVMInt32Type(), i, 0), ""); - LLVMValueRef fetch_ptr; + system_values.vertex_id = lp_build_zero(gallivm, lp_type_uint_vec(32)); + for (i = 0; i < TGSI_NUM_CHANNELS; ++i) { + LLVMValueRef true_index = + LLVMBuildAdd(builder, + lp_loop.counter, + lp_build_const_int32(gallivm, i), ""); /* make sure we're not out of bounds which can happen * if fetch_count % 4 != 0, because on the last iteration * a few of the 4 vertex fetches will be out of bounds */ true_index = lp_build_min(&bld, true_index, fetch_max); - fetch_ptr = LLVMBuildGEP(builder, fetch_elts, - &true_index, 1, ""); - true_index = LLVMBuildLoad(builder, fetch_ptr, "fetch_elt"); + if (elts) { + LLVMValueRef fetch_ptr; + fetch_ptr = LLVMBuildGEP(builder, fetch_elts, + &true_index, 1, ""); + true_index = LLVMBuildLoad(builder, fetch_ptr, "fetch_elt"); + } + + system_values.vertex_id = LLVMBuildInsertElement(gallivm->builder, + system_values.vertex_id, true_index, + lp_build_const_int32(gallivm, i), ""); for (j = 0; j < draw->pt.nr_vertex_elements; ++j) { struct pipe_vertex_element *velem = &draw->pt.vertex_element[j]; - LLVMValueRef vb_index = LLVMConstInt(LLVMInt32Type(), - velem->vertex_buffer_index, - 0); - LLVMValueRef vb = LLVMBuildGEP(builder, vb_ptr, - &vb_index, 1, ""); - generate_fetch(builder, vbuffers_ptr, + LLVMValueRef vb_index = + lp_build_const_int32(gallivm, velem->vertex_buffer_index); + LLVMValueRef vb = LLVMBuildGEP(builder, vb_ptr, &vb_index, 1, ""); + generate_fetch(gallivm, vbuffers_ptr, &aos_attribs[j][i], velem, vb, true_index, - instance_id); + system_values.instance_id); } } - convert_to_soa(builder, aos_attribs, inputs, + convert_to_soa(gallivm, aos_attribs, inputs, draw->pt.nr_vertex_elements); - ptr_aos = (const LLVMValueRef (*)[NUM_CHANNELS]) inputs; + ptr_aos = (const LLVMValueRef (*)[TGSI_NUM_CHANNELS]) inputs; generate_vs(llvm, builder, outputs, ptr_aos, + &system_values, context_ptr, - sampler); + sampler, + variant->key.clamp_vertex_color); + + /* store original positions in clip before further manipulation */ + store_clip(gallivm, io, outputs, 0, cv); + store_clip(gallivm, io, outputs, 1, pos); + + /* do cliptest */ + if (enable_cliptest) { + /* allocate clipmask, assign it integer type */ + clipmask = generate_clipmask(llvm, outputs, + variant->key.clip_xy, + variant->key.clip_z, + variant->key.clip_user, + variant->key.clip_halfz, + variant->key.ucp_enable, + context_ptr, &have_clipdist); + /* return clipping boolean value for function */ + clipmask_bool(gallivm, clipmask, ret_ptr); + } + else { + clipmask = lp_build_const_int_vec(gallivm, lp_type_int_vec(32), 0); + } + + /* do viewport mapping */ + if (!bypass_viewport) { + generate_viewport(llvm, builder, outputs, context_ptr); + } - convert_to_aos(builder, io, outputs, - draw->vs.vertex_shader->info.num_outputs, - max_vertices); + /* store clipmask in vertex header, + * original positions in clip + * and transformed positions in data + */ + convert_to_aos(gallivm, io, outputs, clipmask, + vs_info->num_outputs, max_vertices, have_clipdist); } - lp_build_loop_end_cond(builder, fetch_count, step, LLVMIntUGE, &lp_loop); - sampler->destroy(sampler); - -#ifdef PIPE_ARCH_X86 - /* Avoid corrupting the FPU stack on 32bit OSes. */ - lp_build_intrinsic(builder, "llvm.x86.mmx.emms", LLVMVoidType(), NULL, 0); -#endif + lp_build_loop_end_cond(&lp_loop, end, step, LLVMIntUGE); - LLVMBuildRetVoid(builder); + sampler->destroy(sampler); - LLVMDisposeBuilder(builder); + ret = LLVMBuildLoad(builder, ret_ptr, ""); + LLVMBuildRet(builder, ret); /* * Translate the LLVM IR into machine code. */ #ifdef DEBUG - if(LLVMVerifyFunction(variant->function_elts, LLVMPrintMessageAction)) { - lp_debug_dump_value(variant->function_elts); + if (LLVMVerifyFunction(variant_func, LLVMPrintMessageAction)) { + lp_debug_dump_value(variant_func); assert(0); } #endif - LLVMRunFunctionPassManager(llvm->pass, variant->function_elts); + LLVMRunFunctionPassManager(gallivm->passmgr, variant_func); if (gallivm_debug & GALLIVM_DEBUG_IR) { - lp_debug_dump_value(variant->function_elts); + lp_debug_dump_value(variant_func); debug_printf("\n"); } - code = LLVMGetPointerToGlobal(llvm->draw->engine, variant->function_elts); - variant->jit_func_elts = (draw_jit_vert_func_elts)pointer_to_func(code); + code = LLVMGetPointerToGlobal(gallivm->engine, variant_func); + if (elts) + variant->jit_func_elts = (draw_jit_vert_func_elts) pointer_to_func(code); + else + variant->jit_func = (draw_jit_vert_func) pointer_to_func(code); if (gallivm_debug & GALLIVM_DEBUG_ASM) { lp_disassemble(code); } - lp_func_delete_body(variant->function_elts); + lp_func_delete_body(variant_func); } @@ -1033,11 +1505,23 @@ draw_llvm_make_variant_key(struct draw_llvm *llvm, char *store) key = (struct draw_llvm_variant_key *)store; + key->clamp_vertex_color = llvm->draw->rasterizer->clamp_vertex_color; /**/ + /* Presumably all variants of the shader should have the same * number of vertex elements - ie the number of shader inputs. */ key->nr_vertex_elements = llvm->draw->pt.nr_vertex_elements; + /* will have to rig this up properly later */ + key->clip_xy = llvm->draw->clip_xy; + key->clip_z = llvm->draw->clip_z; + key->clip_user = llvm->draw->clip_user; + key->bypass_viewport = llvm->draw->identity_viewport; + key->clip_halfz = !llvm->draw->rasterizer->gl_rasterization_rules; + key->need_edgeflags = (llvm->draw->vs.edgeflag_output ? TRUE : FALSE); + key->ucp_enable = llvm->draw->rasterizer->clip_plane_enable; + key->pad = 0; + /* All variants of this shader will have the same value for * nr_samplers. Not yet trying to compact away holes in the * sampler array. @@ -1061,29 +1545,30 @@ draw_llvm_make_variant_key(struct draw_llvm *llvm, char *store) return key; } + void draw_llvm_set_mapped_texture(struct draw_context *draw, unsigned sampler_idx, uint32_t width, uint32_t height, uint32_t depth, - uint32_t last_level, - uint32_t row_stride[DRAW_MAX_TEXTURE_LEVELS], - uint32_t img_stride[DRAW_MAX_TEXTURE_LEVELS], - const void *data[DRAW_MAX_TEXTURE_LEVELS]) + uint32_t first_level, uint32_t last_level, + uint32_t row_stride[PIPE_MAX_TEXTURE_LEVELS], + uint32_t img_stride[PIPE_MAX_TEXTURE_LEVELS], + const void *data[PIPE_MAX_TEXTURE_LEVELS]) { unsigned j; struct draw_jit_texture *jit_tex; assert(sampler_idx < PIPE_MAX_VERTEX_SAMPLERS); - jit_tex = &draw->llvm->jit_context.textures[sampler_idx]; jit_tex->width = width; jit_tex->height = height; jit_tex->depth = depth; + jit_tex->first_level = first_level; jit_tex->last_level = last_level; - for (j = 0; j <= last_level; j++) { + for (j = first_level; j <= last_level; j++) { jit_tex->data[j] = data[j]; jit_tex->row_stride[j] = row_stride[j]; jit_tex->img_stride[j] = img_stride[j]; @@ -1103,7 +1588,7 @@ draw_llvm_set_sampler_state(struct draw_context *draw) jit_tex->min_lod = draw->samplers[i]->min_lod; jit_tex->max_lod = draw->samplers[i]->max_lod; jit_tex->lod_bias = draw->samplers[i]->lod_bias; - COPY_4V(jit_tex->border_color, draw->samplers[i]->border_color); + COPY_4V(jit_tex->border_color, draw->samplers[i]->border_color.f); } } } @@ -1113,19 +1598,16 @@ void draw_llvm_destroy_variant(struct draw_llvm_variant *variant) { struct draw_llvm *llvm = variant->llvm; - struct draw_context *draw = llvm->draw; if (variant->function_elts) { - if (variant->function_elts) - LLVMFreeMachineCodeForFunction(draw->engine, - variant->function_elts); + LLVMFreeMachineCodeForFunction(llvm->gallivm->engine, + variant->function_elts); LLVMDeleteFunction(variant->function_elts); } if (variant->function) { - if (variant->function) - LLVMFreeMachineCodeForFunction(draw->engine, - variant->function); + LLVMFreeMachineCodeForFunction(llvm->gallivm->engine, + variant->function); LLVMDeleteFunction(variant->function); }