From: Roland Scheidegger Date: Sun, 13 Nov 2016 15:32:24 +0000 (+0100) Subject: draw: unify linear and elts draw jit functions X-Git-Url: https://git.libre-soc.org/?a=commitdiff_plain;h=4e1be31f01e279c35de09333fc3e3a1e72fad599;p=mesa.git draw: unify linear and elts draw jit functions The code for elts and linear paths was nearly 100% identical by now - with the elts path simply having some additional gather for the elements in the main loop (with some additional small differences before the main loop). Hence nuke the separate functions and decide this at jit shader execution time (simply based on the presence of the elts pointer). Some analysis shows that the generated vs jit functions seem to be just very minimally more complex than the former elts functions, and almost none of the additional complexity is in the main loop (basically just the branch logic for the branch fetching the actual indices). Compared to linear, the codesize of the function is of course a bit larger, however the actual executed code in the main loop appears to be near 100% identical (the additional code looking up indices is skipped as expected). So, I would not expect a (meaningful) performance difference with the generated code, neither with elts nor linear, this does however roughly half the compilation time (the compiled shaders should also use only half the memory of course). Reviewed-by: Jose Fonseca --- diff --git a/src/gallium/auxiliary/draw/draw_llvm.c b/src/gallium/auxiliary/draw/draw_llvm.c index a02c893fb6e..2478b1157cd 100644 --- a/src/gallium/auxiliary/draw/draw_llvm.c +++ b/src/gallium/auxiliary/draw/draw_llvm.c @@ -63,8 +63,7 @@ static void -draw_llvm_generate(struct draw_llvm *llvm, struct draw_llvm_variant *var, - boolean elts); +draw_llvm_generate(struct draw_llvm *llvm, struct draw_llvm_variant *var); struct draw_gs_llvm_iface { @@ -577,17 +576,13 @@ draw_llvm_create_variant(struct draw_llvm *llvm, variant->vertex_header_ptr_type = LLVMPointerType(vertex_header, 0); - draw_llvm_generate(llvm, variant, FALSE); /* linear */ - draw_llvm_generate(llvm, variant, TRUE); /* elts */ + draw_llvm_generate(llvm, variant); gallivm_compile_module(variant->gallivm); variant->jit_func = (draw_jit_vert_func) gallivm_jit_function(variant->gallivm, variant->function); - variant->jit_func_elts = (draw_jit_vert_func_elts) - gallivm_jit_function(variant->gallivm, variant->function_elts); - gallivm_free_ir(variant->gallivm); variant->list_item_global.base = variant; @@ -1565,25 +1560,23 @@ draw_gs_llvm_epilogue(const struct lp_build_tgsi_gs_iface *gs_base, } static void -draw_llvm_generate(struct draw_llvm *llvm, struct draw_llvm_variant *variant, - boolean elts) +draw_llvm_generate(struct draw_llvm *llvm, struct draw_llvm_variant *variant) { struct gallivm_state *gallivm = variant->gallivm; LLVMContextRef context = gallivm->context; LLVMTypeRef int32_type = LLVMInt32TypeInContext(context); LLVMTypeRef arg_types[11]; - unsigned num_arg_types = - elts ? ARRAY_SIZE(arg_types) : ARRAY_SIZE(arg_types) - 1; + unsigned num_arg_types = ARRAY_SIZE(arg_types); LLVMTypeRef func_type; LLVMValueRef context_ptr; LLVMBasicBlockRef block; LLVMBuilderRef builder; char func_name[64]; struct lp_type vs_type; - LLVMValueRef count, fetch_elts, fetch_elt_max; - LLVMValueRef vertex_id_offset, start_instance, start; + LLVMValueRef count, fetch_elts, start_or_maxelt, start; + LLVMValueRef vertex_id_offset, start_instance; LLVMValueRef stride, step, io_itr; - LLVMValueRef ind_vec; + LLVMValueRef ind_vec, ind_vec_store, have_elts, fetch_max, tmp; LLVMValueRef io_ptr, vbuffers_ptr, vb_ptr; LLVMValueRef vb_stride[PIPE_MAX_ATTRIBS]; LLVMValueRef map_ptr[PIPE_MAX_ATTRIBS]; @@ -1597,9 +1590,9 @@ draw_llvm_generate(struct draw_llvm *llvm, struct draw_llvm_variant *variant, unsigned i, j; struct lp_build_context bld, bldivec, blduivec; struct lp_build_loop_state lp_loop; + struct lp_build_if_state if_ctx; const int vector_length = lp_native_vector_width / 32; LLVMValueRef outputs[PIPE_MAX_SHADER_OUTPUTS][TGSI_NUM_CHANNELS]; - LLVMValueRef fetch_max; struct lp_build_sampler_soa *sampler = 0; LLVMValueRef ret, clipmask_bool_ptr; struct draw_llvm_variant_key *key = &variant->key; @@ -1624,8 +1617,8 @@ draw_llvm_generate(struct draw_llvm *llvm, struct draw_llvm_variant *variant, memset(&system_values, 0, sizeof(system_values)); - util_snprintf(func_name, sizeof(func_name), "draw_llvm_vs_variant%u_%s", - variant->shader->variants_cached, elts ? "elts" : "linear"); + util_snprintf(func_name, sizeof(func_name), "draw_llvm_vs_variant%u", + variant->shader->variants_cached); i = 0; arg_types[i++] = get_context_ptr_type(variant); /* context */ @@ -1638,19 +1631,13 @@ draw_llvm_generate(struct draw_llvm *llvm, struct draw_llvm_variant *variant, arg_types[i++] = int32_type; /* instance_id */ arg_types[i++] = int32_type; /* vertex_id_offset */ arg_types[i++] = int32_type; /* start_instance */ - if (elts) { - arg_types[i++] = LLVMPointerType(int32_type, 0); /* fetch_elts */ - } + arg_types[i++] = LLVMPointerType(int32_type, 0); /* fetch_elts */ func_type = LLVMFunctionType(LLVMInt8TypeInContext(context), arg_types, num_arg_types, 0); variant_func = LLVMAddFunction(gallivm->module, func_name, func_type); - - if (elts) - variant->function_elts = variant_func; - else - variant->function = variant_func; + variant->function = variant_func; LLVMSetFunctionCallConv(variant_func, LLVMCCallConv); for (i = 0; i < num_arg_types; ++i) @@ -1661,6 +1648,7 @@ draw_llvm_generate(struct draw_llvm *llvm, struct draw_llvm_variant *variant, io_ptr = LLVMGetParam(variant_func, 1); vbuffers_ptr = LLVMGetParam(variant_func, 2); count = LLVMGetParam(variant_func, 3); + start_or_maxelt = LLVMGetParam(variant_func, 4); /* * XXX: stride is actually unused. The stride we use is strictly calculated * from the number of outputs (including the draw_extra outputs). @@ -1672,29 +1660,19 @@ draw_llvm_generate(struct draw_llvm *llvm, struct draw_llvm_variant *variant, system_values.instance_id = LLVMGetParam(variant_func, 7); vertex_id_offset = LLVMGetParam(variant_func, 8); start_instance = LLVMGetParam(variant_func, 9); + fetch_elts = LLVMGetParam(variant_func, 10); lp_build_name(context_ptr, "context"); lp_build_name(io_ptr, "io"); lp_build_name(vbuffers_ptr, "vbuffers"); lp_build_name(count, "count"); + lp_build_name(start_or_maxelt, "start_or_maxelt"); lp_build_name(stride, "stride"); lp_build_name(vb_ptr, "vb"); lp_build_name(system_values.instance_id, "instance_id"); lp_build_name(vertex_id_offset, "vertex_id_offset"); lp_build_name(start_instance, "start_instance"); - - if (elts) { - fetch_elt_max = LLVMGetParam(variant_func, 4); - fetch_elts = LLVMGetParam(variant_func, 10); - lp_build_name(fetch_elts, "fetch_elts"); - lp_build_name(fetch_elt_max, "fetch_elt_max"); - start = NULL; - } - else { - start = LLVMGetParam(variant_func, 4); - lp_build_name(start, "start"); - fetch_elts = NULL; - } + lp_build_name(fetch_elts, "fetch_elts"); /* * Function body @@ -1735,19 +1713,30 @@ draw_llvm_generate(struct draw_llvm *llvm, struct draw_llvm_variant *variant, ind_vec = LLVMBuildInsertElement(builder, ind_vec, index, index, ""); } + fetch_max = lp_build_alloca(gallivm, int32_type, "fetch_max"); + ind_vec_store = lp_build_alloca(gallivm, bldivec.vec_type, "ind_vec"); - if (elts) { - fetch_max = count; - start = blduivec.zero; + have_elts = LLVMBuildICmp(builder, LLVMIntNE, + LLVMConstPointerNull(arg_types[10]), fetch_elts, ""); + lp_build_if(&if_ctx, gallivm, have_elts); + { + LLVMBuildStore(builder, ind_vec, ind_vec_store); + LLVMBuildStore(builder, count, fetch_max); } - else { - fetch_max = lp_build_add(&bld, start, count); - start = lp_build_broadcast_scalar(&blduivec, start); - ind_vec = lp_build_add(&blduivec, start, ind_vec); + lp_build_else(&if_ctx); + { + tmp = lp_build_add(&bld, count, start_or_maxelt); + LLVMBuildStore(builder, tmp, fetch_max); + start = lp_build_broadcast_scalar(&bldivec, start_or_maxelt); + tmp = lp_build_add(&bldivec, start, ind_vec); + LLVMBuildStore(builder, tmp, ind_vec_store); } + lp_build_endif(&if_ctx); + fetch_max = LLVMBuildLoad(builder, fetch_max, ""); fetch_max = LLVMBuildSub(builder, fetch_max, bld.one, "fetch_max"); - fetch_max = lp_build_broadcast_scalar(&blduivec, fetch_max); + fetch_max = lp_build_broadcast_scalar(&bldivec, fetch_max); + ind_vec = LLVMBuildLoad(builder, ind_vec_store, ""); /* * Pre-calculate everything which is constant per shader invocation. @@ -1847,7 +1836,7 @@ draw_llvm_generate(struct draw_llvm *llvm, struct draw_llvm_variant *variant, LLVMValueRef inputs[PIPE_MAX_SHADER_INPUTS][TGSI_NUM_CHANNELS]; LLVMValueRef io; LLVMValueRef clipmask; /* holds the clipmask value */ - LLVMValueRef true_index_array; + LLVMValueRef true_index_array, index_store; const LLVMValueRef (*ptr_aos)[TGSI_NUM_CHANNELS]; io_itr = lp_loop.counter; @@ -1875,8 +1864,11 @@ draw_llvm_generate(struct draw_llvm *llvm, struct draw_llvm_variant *variant, */ true_index_array = lp_build_min(&bldivec, true_index_array, fetch_max); - if (elts) { + index_store = lp_build_alloca_undef(gallivm, bldivec.vec_type, "index_store"); + LLVMBuildStore(builder, true_index_array, index_store); + lp_build_if(&if_ctx, gallivm, have_elts); + { /* * Note: you'd expect some comparison/clamp against fetch_elt_max * here. @@ -1906,7 +1898,11 @@ draw_llvm_generate(struct draw_llvm *llvm, struct draw_llvm_variant *variant, 32, 32, TRUE, fetch_elts, true_index_array, FALSE); + LLVMBuildStore(builder, true_index_array, index_store); } + lp_build_endif(&if_ctx); + + true_index_array = LLVMBuildLoad(builder, index_store, ""); for (j = 0; j < key->nr_vertex_elements; ++j) { struct pipe_vertex_element *velem = &key->vertex_element[j]; diff --git a/src/gallium/auxiliary/draw/draw_llvm.h b/src/gallium/auxiliary/draw/draw_llvm.h index 8d4b4ae6db4..2ddf249672e 100644 --- a/src/gallium/auxiliary/draw/draw_llvm.h +++ b/src/gallium/auxiliary/draw/draw_llvm.h @@ -266,26 +266,13 @@ typedef boolean struct vertex_header *io, const struct draw_vertex_buffer vbuffers[PIPE_MAX_ATTRIBS], unsigned count, - unsigned start, + unsigned start_or_maxelt, unsigned stride, struct pipe_vertex_buffer *vertex_buffers, unsigned instance_id, unsigned vertex_id_offset, - unsigned start_instance); - - -typedef boolean -(*draw_jit_vert_func_elts)(struct draw_jit_context *context, - struct vertex_header *io, - const struct draw_vertex_buffer vbuffers[PIPE_MAX_ATTRIBS], - unsigned count, - unsigned fetch_max_elt, - unsigned stride, - struct pipe_vertex_buffer *vertex_buffers, - unsigned instance_id, - unsigned vertex_id_offset, - unsigned start_instance, - const unsigned *fetch_elts); + unsigned start_instance, + const unsigned *fetch_elts); typedef int @@ -393,9 +380,7 @@ struct draw_llvm_variant LLVMTypeRef vertex_header_ptr_type; LLVMValueRef function; - LLVMValueRef function_elts; draw_jit_vert_func jit_func; - draw_jit_vert_func_elts jit_func_elts; struct llvm_vertex_shader *shader; diff --git a/src/gallium/auxiliary/draw/draw_pt_fetch_shade_pipeline_llvm.c b/src/gallium/auxiliary/draw/draw_pt_fetch_shade_pipeline_llvm.c index 73069c9de65..0277cbfc83a 100644 --- a/src/gallium/auxiliary/draw/draw_pt_fetch_shade_pipeline_llvm.c +++ b/src/gallium/auxiliary/draw/draw_pt_fetch_shade_pipeline_llvm.c @@ -354,6 +354,8 @@ llvm_pipeline_generic(struct draw_pt_middle_end *middle, boolean free_prim_info = FALSE; unsigned opt = fpme->opt; boolean clipped = 0; + unsigned start_or_maxelt, vid_base; + const unsigned *elts; llvm_vert_info.count = fetch_info->count; llvm_vert_info.vertex_size = fpme->vertex_size; @@ -373,29 +375,27 @@ llvm_pipeline_generic(struct draw_pt_middle_end *middle, draw->statistics.vs_invocations += fetch_info->count; } - if (fetch_info->linear) - clipped = fpme->current_variant->jit_func( &fpme->llvm->jit_context, - llvm_vert_info.verts, - draw->pt.user.vbuffer, - fetch_info->count, - fetch_info->start, - fpme->vertex_size, - draw->pt.vertex_buffer, - draw->instance_id, - draw->start_index, - draw->start_instance); - else - clipped = fpme->current_variant->jit_func_elts( &fpme->llvm->jit_context, - llvm_vert_info.verts, - draw->pt.user.vbuffer, - fetch_info->count, - draw->pt.user.eltMax, - fpme->vertex_size, - draw->pt.vertex_buffer, - draw->instance_id, - draw->pt.user.eltBias, - draw->start_instance, - fetch_info->elts); + if (fetch_info->linear) { + start_or_maxelt = fetch_info->start; + vid_base = draw->start_index; + elts = NULL; + } + else { + start_or_maxelt = draw->pt.user.eltMax; + vid_base = draw->pt.user.eltBias; + elts = fetch_info->elts; + } + clipped = fpme->current_variant->jit_func(&fpme->llvm->jit_context, + llvm_vert_info.verts, + draw->pt.user.vbuffer, + fetch_info->count, + start_or_maxelt, + fpme->vertex_size, + draw->pt.vertex_buffer, + draw->instance_id, + vid_base, + draw->start_instance, + elts); /* Finished with fetch and vs: */