X-Git-Url: https://git.libre-soc.org/?a=blobdiff_plain;f=src%2Fgallium%2Fauxiliary%2Fdraw%2Fdraw_pt_fetch_shade_pipeline_llvm.c;h=4789cda931fbe88e1fe1d771dc4e7d356f19c4c2;hb=877128505431adaf817dc8069172ebe4a1cdf5d8;hp=d33969ac7091ac120c9350a3f085bca88bcbd646;hpb=8ebfcf31eb905b7d47e520c04420620ae21bdf4e;p=mesa.git diff --git a/src/gallium/auxiliary/draw/draw_pt_fetch_shade_pipeline_llvm.c b/src/gallium/auxiliary/draw/draw_pt_fetch_shade_pipeline_llvm.c index d33969ac709..4789cda931f 100644 --- a/src/gallium/auxiliary/draw/draw_pt_fetch_shade_pipeline_llvm.c +++ b/src/gallium/auxiliary/draw/draw_pt_fetch_shade_pipeline_llvm.c @@ -1,6 +1,6 @@ /************************************************************************** * - * Copyright 2010 VMWare, Inc. + * Copyright 2010 VMware, Inc. * All Rights Reserved. * * Permission is hereby granted, free of charge, to any person obtaining a @@ -18,7 +18,7 @@ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. - * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. @@ -27,13 +27,16 @@ #include "util/u_math.h" #include "util/u_memory.h" +#include "util/u_prim.h" #include "draw/draw_context.h" #include "draw/draw_gs.h" #include "draw/draw_vbuf.h" #include "draw/draw_vertex.h" #include "draw/draw_pt.h" +#include "draw/draw_prim_assembler.h" #include "draw/draw_vs.h" #include "draw/draw_llvm.h" +#include "gallivm/lp_bld_init.h" struct llvm_middle_end { @@ -57,41 +60,90 @@ struct llvm_middle_end { static void -llvm_middle_end_prepare( struct draw_pt_middle_end *middle, - unsigned in_prim, - unsigned opt, - unsigned *max_vertices ) +llvm_middle_end_prepare_gs(struct llvm_middle_end *fpme) { - struct llvm_middle_end *fpme = (struct llvm_middle_end *)middle; struct draw_context *draw = fpme->draw; - struct llvm_vertex_shader *shader = - llvm_vertex_shader(draw->vs.vertex_shader); - struct draw_llvm_variant_key key; - struct draw_llvm_variant *variant = NULL; - struct draw_llvm_variant_list_item *li; + struct draw_geometry_shader *gs = draw->gs.geometry_shader; + struct draw_gs_llvm_variant_key *key; + struct draw_gs_llvm_variant *variant = NULL; + struct draw_gs_llvm_variant_list_item *li; + struct llvm_geometry_shader *shader = llvm_geometry_shader(gs); + char store[DRAW_GS_LLVM_MAX_VARIANT_KEY_SIZE]; unsigned i; - unsigned instance_id_index = ~0; + key = draw_gs_llvm_make_variant_key(fpme->llvm, store); - unsigned out_prim = (draw->gs.geometry_shader ? - draw->gs.geometry_shader->output_primitive : - in_prim); + /* Search shader's list of variants for the key */ + li = first_elem(&shader->variants); + while (!at_end(&shader->variants, li)) { + if (memcmp(&li->base->key, key, shader->variant_key_size) == 0) { + variant = li->base; + break; + } + li = next_elem(li); + } - /* Add one to num_outputs because the pipeline occasionally tags on - * an additional texcoord, eg for AA lines. - */ - unsigned nr = MAX2( shader->base.info.num_inputs, - shader->base.info.num_outputs + 1 ); + if (variant) { + /* found the variant, move to head of global list (for LRU) */ + move_to_head(&fpme->llvm->gs_variants_list, + &variant->list_item_global); + } + else { + /* Need to create new variant */ - /* Scan for instanceID system value. - */ - for (i = 0; i < shader->base.info.num_inputs; i++) { - if (shader->base.info.input_semantic_name[i] == TGSI_SEMANTIC_INSTANCEID) { - instance_id_index = i; - break; + /* First check if we've created too many variants. If so, free + * 25% of the LRU to avoid using too much memory. + */ + if (fpme->llvm->nr_gs_variants >= DRAW_MAX_SHADER_VARIANTS) { + /* + * XXX: should we flush here ? + */ + for (i = 0; i < DRAW_MAX_SHADER_VARIANTS / 4; i++) { + struct draw_gs_llvm_variant_list_item *item; + if (is_empty_list(&fpme->llvm->gs_variants_list)) { + break; + } + item = last_elem(&fpme->llvm->gs_variants_list); + assert(item); + assert(item->base); + draw_gs_llvm_destroy_variant(item->base); + } + } + + variant = draw_gs_llvm_create_variant(fpme->llvm, gs->info.num_outputs, key); + + if (variant) { + insert_at_head(&shader->variants, &variant->list_item_local); + insert_at_head(&fpme->llvm->gs_variants_list, + &variant->list_item_global); + fpme->llvm->nr_gs_variants++; + shader->variants_cached++; } } + gs->current_variant = variant; +} + +/** + * Prepare/validate middle part of the vertex pipeline. + * NOTE: if you change this function, also look at the non-LLVM + * function fetch_pipeline_prepare() for similar changes. + */ +static void +llvm_middle_end_prepare( struct draw_pt_middle_end *middle, + unsigned in_prim, + unsigned opt, + unsigned *max_vertices ) +{ + struct llvm_middle_end *fpme = (struct llvm_middle_end *)middle; + struct draw_context *draw = fpme->draw; + struct draw_vertex_shader *vs = draw->vs.vertex_shader; + struct draw_geometry_shader *gs = draw->gs.geometry_shader; + const unsigned out_prim = gs ? gs->output_primitive : + u_assembled_prim(in_prim); + const unsigned nr = MAX2(vs->info.num_inputs, + draw_total_vs_outputs(draw)); + fpme->input_prim = in_prim; fpme->opt = opt; @@ -102,76 +154,134 @@ llvm_middle_end_prepare( struct draw_pt_middle_end *middle, fpme->vertex_size = sizeof(struct vertex_header) + nr * 4 * sizeof(float); - /* XXX: it's not really gl rasterization rules we care about here, - * but gl vs dx9 clip spaces. - */ draw_pt_post_vs_prepare( fpme->post_vs, - (boolean)draw->bypass_clipping, - (boolean)(draw->identity_viewport), - (boolean)draw->rasterizer->gl_rasterization_rules, - (draw->vs.edgeflag_output ? TRUE : FALSE) ); + out_prim == PIPE_PRIM_POINTS ? + draw->clip_points_xy : draw->clip_xy, + draw->clip_z, + draw->clip_user, + draw->guard_band_xy, + draw->identity_viewport, + draw->rasterizer->clip_halfz, + (draw->vs.edgeflag_output ? TRUE : FALSE) ); - draw_pt_so_emit_prepare( fpme->so_emit ); + draw_pt_so_emit_prepare( fpme->so_emit, gs == NULL ); if (!(opt & PT_PIPELINE)) { draw_pt_emit_prepare( fpme->emit, out_prim, max_vertices ); - *max_vertices = MAX2( *max_vertices, - DRAW_PIPE_MAX_VERTICES ); + *max_vertices = MAX2( *max_vertices, 4096 ); } else { - *max_vertices = DRAW_PIPE_MAX_VERTICES; + /* limit max fetches by limiting max_vertices */ + *max_vertices = 4096; } /* return even number */ *max_vertices = *max_vertices & ~1; - draw_llvm_make_variant_key(fpme->llvm, &key); + /* Find/create the vertex shader variant */ + { + struct draw_llvm_variant_key *key; + struct draw_llvm_variant *variant = NULL; + struct draw_llvm_variant_list_item *li; + struct llvm_vertex_shader *shader = llvm_vertex_shader(vs); + char store[DRAW_LLVM_MAX_VARIANT_KEY_SIZE]; + unsigned i; - li = first_elem(&shader->variants); - while(!at_end(&shader->variants, li)) { - if(memcmp(&li->base->key, &key, sizeof key) == 0) { - variant = li->base; - break; + key = draw_llvm_make_variant_key(fpme->llvm, store); + + /* Search shader's list of variants for the key */ + li = first_elem(&shader->variants); + while (!at_end(&shader->variants, li)) { + if (memcmp(&li->base->key, key, shader->variant_key_size) == 0) { + variant = li->base; + break; + } + li = next_elem(li); } - li = next_elem(li); - } - if (variant) { - move_to_head(&fpme->llvm->vs_variants_list, &variant->list_item_global); - } - else { - unsigned i; - if (fpme->llvm->nr_variants >= DRAW_MAX_SHADER_VARIANTS) { - /* - * XXX: should we flush here ? + if (variant) { + /* found the variant, move to head of global list (for LRU) */ + move_to_head(&fpme->llvm->vs_variants_list, + &variant->list_item_global); + } + else { + /* Need to create new variant */ + + /* First check if we've created too many variants. If so, free + * 25% of the LRU to avoid using too much memory. */ - for (i = 0; i < DRAW_MAX_SHADER_VARIANTS / 4; i++) { - struct draw_llvm_variant_list_item *item = - last_elem(&fpme->llvm->vs_variants_list); - draw_llvm_destroy_variant(item->base); + if (fpme->llvm->nr_variants >= DRAW_MAX_SHADER_VARIANTS) { + /* + * XXX: should we flush here ? + */ + for (i = 0; i < DRAW_MAX_SHADER_VARIANTS / 4; i++) { + struct draw_llvm_variant_list_item *item; + if (is_empty_list(&fpme->llvm->vs_variants_list)) { + break; + } + item = last_elem(&fpme->llvm->vs_variants_list); + assert(item); + assert(item->base); + draw_llvm_destroy_variant(item->base); + } } - } - variant = draw_llvm_create_variant(fpme->llvm, nr); + variant = draw_llvm_create_variant(fpme->llvm, nr, key); - if (variant) { - insert_at_head(&shader->variants, &variant->list_item_local); - insert_at_head(&fpme->llvm->vs_variants_list, &variant->list_item_global); - fpme->llvm->nr_variants++; - shader->variants_cached++; + if (variant) { + insert_at_head(&shader->variants, &variant->list_item_local); + insert_at_head(&fpme->llvm->vs_variants_list, + &variant->list_item_global); + fpme->llvm->nr_variants++; + shader->variants_cached++; + } } + + fpme->current_variant = variant; + } + + if (gs) { + llvm_middle_end_prepare_gs(fpme); } +} - fpme->current_variant = variant; - /*XXX we only support one constant buffer */ - fpme->llvm->jit_context.vs_constants = - draw->pt.user.vs_constants[0]; - fpme->llvm->jit_context.gs_constants = - draw->pt.user.gs_constants[0]; +/** + * Bind/update constant buffer pointers, clip planes and viewport dims. + * These are "light weight" parameters which aren't baked into the + * generated code. Updating these items is much cheaper than revalidating + * and rebuilding the generated pipeline code. + */ +static void +llvm_middle_end_bind_parameters(struct draw_pt_middle_end *middle) +{ + struct llvm_middle_end *fpme = (struct llvm_middle_end *)middle; + struct draw_context *draw = fpme->draw; + unsigned i; + + for (i = 0; i < Elements(fpme->llvm->jit_context.vs_constants); ++i) { + int num_consts = + draw->pt.user.vs_constants_size[i] / (sizeof(float) * 4); + fpme->llvm->jit_context.vs_constants[i] = draw->pt.user.vs_constants[i]; + fpme->llvm->jit_context.num_vs_constants[i] = num_consts; + } + for (i = 0; i < Elements(fpme->llvm->gs_jit_context.constants); ++i) { + int num_consts = + draw->pt.user.gs_constants_size[i] / (sizeof(float) * 4); + fpme->llvm->gs_jit_context.constants[i] = draw->pt.user.gs_constants[i]; + fpme->llvm->gs_jit_context.num_constants[i] = num_consts; + } + + fpme->llvm->jit_context.planes = + (float (*)[DRAW_TOTAL_CLIP_PLANES][4]) draw->pt.user.planes[0]; + fpme->llvm->gs_jit_context.planes = + (float (*)[DRAW_TOTAL_CLIP_PLANES][4]) draw->pt.user.planes[0]; + + fpme->llvm->jit_context.viewport = (float *) draw->viewports[0].scale; + fpme->llvm->gs_jit_context.viewport = (float *) draw->viewports[0].scale; } @@ -204,7 +314,7 @@ static void emit(struct pt_emit *emit, static void llvm_pipeline_generic( struct draw_pt_middle_end *middle, const struct draw_fetch_info *fetch_info, - const struct draw_prim_info *prim_info ) + const struct draw_prim_info *in_prim_info ) { struct llvm_middle_end *fpme = (struct llvm_middle_end *)middle; struct draw_context *draw = fpme->draw; @@ -213,35 +323,52 @@ llvm_pipeline_generic( struct draw_pt_middle_end *middle, struct draw_vertex_info llvm_vert_info; struct draw_vertex_info gs_vert_info; struct draw_vertex_info *vert_info; + struct draw_prim_info ia_prim_info; + struct draw_vertex_info ia_vert_info; + const struct draw_prim_info *prim_info = in_prim_info; + boolean free_prim_info = FALSE; unsigned opt = fpme->opt; + unsigned clipped = 0; llvm_vert_info.count = fetch_info->count; llvm_vert_info.vertex_size = fpme->vertex_size; llvm_vert_info.stride = fpme->vertex_size; llvm_vert_info.verts = (struct vertex_header *)MALLOC(fpme->vertex_size * - align(fetch_info->count, 4)); + align(fetch_info->count, lp_native_vector_width / 32)); if (!llvm_vert_info.verts) { assert(0); return; } + if (draw->collect_statistics) { + draw->statistics.ia_vertices += prim_info->count; + draw->statistics.ia_primitives += + u_decomposed_prims_for_vertices(prim_info->prim, prim_info->count); + draw->statistics.vs_invocations += fetch_info->count; + } + if (fetch_info->linear) - fpme->current_variant->jit_func( &fpme->llvm->jit_context, + clipped = fpme->current_variant->jit_func( &fpme->llvm->jit_context, llvm_vert_info.verts, - (const char **)draw->pt.user.vbuffer, + draw->pt.user.vbuffer, fetch_info->start, fetch_info->count, fpme->vertex_size, - draw->pt.vertex_buffer ); + draw->pt.vertex_buffer, + draw->instance_id, + draw->start_index); else - fpme->current_variant->jit_func_elts( &fpme->llvm->jit_context, + clipped = fpme->current_variant->jit_func_elts( &fpme->llvm->jit_context, llvm_vert_info.verts, - (const char **)draw->pt.user.vbuffer, + draw->pt.user.vbuffer, fetch_info->elts, + draw->pt.user.eltMax, fetch_info->count, fpme->vertex_size, - draw->pt.vertex_buffer); + draw->pt.vertex_buffer, + draw->instance_id, + draw->pt.user.eltBias); /* Finished with fetch and vs: */ @@ -250,40 +377,72 @@ llvm_pipeline_generic( struct draw_pt_middle_end *middle, if ((opt & PT_SHADE) && gshader) { + struct draw_vertex_shader *vshader = draw->vs.vertex_shader; draw_geometry_shader_run(gshader, draw->pt.user.gs_constants, + draw->pt.user.gs_constants_size, vert_info, prim_info, + &vshader->info, &gs_vert_info, &gs_prim_info); FREE(vert_info->verts); vert_info = &gs_vert_info; prim_info = &gs_prim_info; + } else { + if (draw_prim_assembler_is_required(draw, prim_info, vert_info)) { + draw_prim_assembler_run(draw, prim_info, vert_info, + &ia_prim_info, &ia_vert_info); + + if (ia_vert_info.count) { + FREE(vert_info->verts); + vert_info = &ia_vert_info; + prim_info = &ia_prim_info; + free_prim_info = TRUE; + } + } + } + if (prim_info->count == 0) { + debug_printf("GS/IA didn't emit any vertices!\n"); + + FREE(vert_info->verts); + if (free_prim_info) { + FREE(prim_info->primitive_lengths); + } + return; } /* stream output needs to be done before clipping */ - draw_pt_so_emit( fpme->so_emit, - vert_info, - prim_info ); + draw_pt_so_emit( fpme->so_emit, vert_info, prim_info ); - if (draw_pt_post_vs_run( fpme->post_vs, vert_info )) { - opt |= PT_PIPELINE; - } + draw_stats_clipper_primitives(draw, prim_info); - /* Do we need to run the pipeline? + /* + * if there's no position, need to stop now, or the latter stages + * will try to access non-existent position output. */ - if (opt & PT_PIPELINE) { - pipeline( fpme, - vert_info, - prim_info ); - } - else { - emit( fpme->emit, - vert_info, - prim_info ); + if (draw_current_shader_position_output(draw) != -1) { + if ((opt & PT_SHADE) && gshader) { + clipped = draw_pt_post_vs_run( fpme->post_vs, vert_info, prim_info ); + } + if (clipped) { + opt |= PT_PIPELINE; + } + + /* Do we need to run the pipeline? Now will come here if clipped + */ + if (opt & PT_PIPELINE) { + pipeline( fpme, vert_info, prim_info ); + } + else { + emit( fpme->emit, vert_info, prim_info ); + } } FREE(vert_info->verts); + if (free_prim_info) { + FREE(prim_info->primitive_lengths); + } } @@ -291,7 +450,8 @@ static void llvm_middle_end_run( struct draw_pt_middle_end *middle, const unsigned *fetch_elts, unsigned fetch_count, const ushort *draw_elts, - unsigned draw_count ) + unsigned draw_count, + unsigned prim_flags ) { struct llvm_middle_end *fpme = (struct llvm_middle_end *)middle; struct draw_fetch_info fetch_info; @@ -307,6 +467,7 @@ static void llvm_middle_end_run( struct draw_pt_middle_end *middle, prim_info.count = draw_count; prim_info.elts = draw_elts; prim_info.prim = fpme->input_prim; + prim_info.flags = prim_flags; prim_info.primitive_count = 1; prim_info.primitive_lengths = &draw_count; @@ -316,7 +477,8 @@ static void llvm_middle_end_run( struct draw_pt_middle_end *middle, static void llvm_middle_end_linear_run( struct draw_pt_middle_end *middle, unsigned start, - unsigned count) + unsigned count, + unsigned prim_flags) { struct llvm_middle_end *fpme = (struct llvm_middle_end *)middle; struct draw_fetch_info fetch_info; @@ -332,6 +494,7 @@ static void llvm_middle_end_linear_run( struct draw_pt_middle_end *middle, prim_info.count = count; prim_info.elts = NULL; prim_info.prim = fpme->input_prim; + prim_info.flags = prim_flags; prim_info.primitive_count = 1; prim_info.primitive_lengths = &count; @@ -345,7 +508,8 @@ llvm_middle_end_linear_run_elts( struct draw_pt_middle_end *middle, unsigned start, unsigned count, const ushort *draw_elts, - unsigned draw_count ) + unsigned draw_count, + unsigned prim_flags ) { struct llvm_middle_end *fpme = (struct llvm_middle_end *)middle; struct draw_fetch_info fetch_info; @@ -361,6 +525,7 @@ llvm_middle_end_linear_run_elts( struct draw_pt_middle_end *middle, prim_info.count = draw_count; prim_info.elts = draw_elts; prim_info.prim = fpme->input_prim; + prim_info.flags = prim_flags; prim_info.primitive_count = 1; prim_info.primitive_lengths = &draw_count; @@ -392,9 +557,6 @@ static void llvm_middle_end_destroy( struct draw_pt_middle_end *middle ) if (fpme->post_vs) draw_pt_post_vs_destroy( fpme->post_vs ); - if (fpme->llvm) - draw_llvm_destroy( fpme->llvm ); - FREE(middle); } @@ -404,7 +566,7 @@ draw_pt_fetch_pipeline_or_emit_llvm(struct draw_context *draw) { struct llvm_middle_end *fpme = 0; - if (!draw->engine) + if (!draw->llvm) return NULL; fpme = CALLOC_STRUCT( llvm_middle_end ); @@ -412,6 +574,7 @@ draw_pt_fetch_pipeline_or_emit_llvm(struct draw_context *draw) goto fail; fpme->base.prepare = llvm_middle_end_prepare; + fpme->base.bind_parameters = llvm_middle_end_bind_parameters; fpme->base.run = llvm_middle_end_run; fpme->base.run_linear = llvm_middle_end_linear_run; fpme->base.run_linear_elts = llvm_middle_end_linear_run_elts; @@ -436,7 +599,7 @@ draw_pt_fetch_pipeline_or_emit_llvm(struct draw_context *draw) if (!fpme->so_emit) goto fail; - fpme->llvm = draw_llvm_create(draw); + fpme->llvm = draw->llvm; if (!fpme->llvm) goto fail;