From: Zack Rusin Date: Mon, 31 May 2010 06:20:34 +0000 (-0400) Subject: gallium: a lot more complete implementation of stream output X-Git-Url: https://git.libre-soc.org/?a=commitdiff_plain;h=c9db97c8229689060fab0edee7df717f804b99ce;p=mesa.git gallium: a lot more complete implementation of stream output interface wise we have everything needed by d3d10 and gl transform feedback. the draw module misses implementation of some corner cases (e.g. when stream output wants different number of components per output than normal rendering paths) --- diff --git a/src/gallium/auxiliary/Makefile b/src/gallium/auxiliary/Makefile index 3b202b5bc77..7c8db19f5c3 100644 --- a/src/gallium/auxiliary/Makefile +++ b/src/gallium/auxiliary/Makefile @@ -33,6 +33,7 @@ C_SOURCES = \ draw/draw_pt_fetch_shade_emit.c \ draw/draw_pt_fetch_shade_pipeline.c \ draw/draw_pt_post_vs.c \ + draw/draw_pt_so_emit.c \ draw/draw_pt_util.c \ draw/draw_pt_varray.c \ draw/draw_pt_vcache.c \ diff --git a/src/gallium/auxiliary/SConscript b/src/gallium/auxiliary/SConscript index af4d5edcf86..6242ab0c59d 100644 --- a/src/gallium/auxiliary/SConscript +++ b/src/gallium/auxiliary/SConscript @@ -78,6 +78,7 @@ source = [ 'draw/draw_pt_fetch_shade_emit.c', 'draw/draw_pt_fetch_shade_pipeline.c', 'draw/draw_pt_post_vs.c', + 'draw/draw_pt_so_emit.c', 'draw/draw_pt_util.c', 'draw/draw_pt_varray.c', 'draw/draw_pt_vcache.c', diff --git a/src/gallium/auxiliary/draw/draw_pt.h b/src/gallium/auxiliary/draw/draw_pt.h index 3e3ea320cc0..b9032468da3 100644 --- a/src/gallium/auxiliary/draw/draw_pt.h +++ b/src/gallium/auxiliary/draw/draw_pt.h @@ -177,6 +177,22 @@ void draw_pt_emit_destroy( struct pt_emit *emit ); struct pt_emit *draw_pt_emit_create( struct draw_context *draw ); +/******************************************************************************* + * HW stream output emit: + */ +struct pt_so_emit; + +void draw_pt_so_emit_prepare( struct pt_so_emit *emit, + unsigned prim ); + +void draw_pt_so_emit( struct pt_so_emit *emit, + const float (*vertex_data)[4], + unsigned vertex_count, + unsigned stride ); + +void draw_pt_so_emit_destroy( struct pt_so_emit *emit ); + +struct pt_so_emit *draw_pt_so_emit_create( struct draw_context *draw ); /******************************************************************************* * API vertex fetch: diff --git a/src/gallium/auxiliary/draw/draw_pt_emit.c b/src/gallium/auxiliary/draw/draw_pt_emit.c index 3b4237245e6..57502768444 100644 --- a/src/gallium/auxiliary/draw/draw_pt_emit.c +++ b/src/gallium/auxiliary/draw/draw_pt_emit.c @@ -39,7 +39,6 @@ struct pt_emit { struct draw_context *draw; struct translate *translate; - struct translate *so_translate; struct translate_cache *cache; unsigned prim; @@ -47,51 +46,6 @@ struct pt_emit { const struct vertex_info *vinfo; }; -static void -prepare_so_emit( struct pt_emit *emit, - const struct vertex_info *vinfo ) -{ - struct draw_context *draw = emit->draw; - unsigned i; - struct translate_key hw_key; - unsigned dst_offset = 0; - unsigned output_stride = 0; - boolean has_so = (draw->so.state.num_outputs > 0); - - if (has_so) { - - for (i = 0; i < draw->so.state.num_outputs; ++i) { - unsigned src_offset = (vinfo->attrib[i].src_index * 4 * sizeof(float) ); - unsigned output_format = draw->so.state.format[i]; - unsigned output_bytes = util_format_get_blocksize(output_format); - - hw_key.element[i].type = TRANSLATE_ELEMENT_NORMAL; - hw_key.element[i].input_format = PIPE_FORMAT_R32G32B32A32_FLOAT; - hw_key.element[i].input_buffer = 0; - hw_key.element[i].input_offset = src_offset; - hw_key.element[i].instance_divisor = 0; - hw_key.element[i].output_format = output_format; - hw_key.element[i].output_offset = dst_offset; - - dst_offset += output_bytes; - output_stride += output_bytes; - } - hw_key.nr_elements = draw->so.state.num_outputs; - hw_key.output_stride = output_stride; - - if (!emit->so_translate || - translate_key_compare(&emit->so_translate->key, &hw_key) != 0) - { - translate_key_sanitize(&hw_key); - emit->so_translate = translate_cache_find(emit->cache, &hw_key); - } - } else { - /* no stream output */ - emit->so_translate = NULL; - } -} - - void draw_pt_emit_prepare( struct pt_emit *emit, unsigned prim, unsigned *max_vertices ) @@ -168,8 +122,6 @@ void draw_pt_emit_prepare( struct pt_emit *emit, *max_vertices = (draw->render->max_vertex_buffer_bytes / (vinfo->size * 4)); - prepare_so_emit( emit, vinfo ); - /* even number */ *max_vertices = *max_vertices & ~1; } @@ -184,7 +136,6 @@ void draw_pt_emit( struct pt_emit *emit, { struct draw_context *draw = emit->draw; struct translate *translate = emit->translate; - struct translate *so_translate = emit->so_translate; struct vbuf_render *render = draw->render; void *hw_verts; @@ -236,18 +187,6 @@ void draw_pt_emit( struct pt_emit *emit, draw->instance_id, hw_verts ); - if (so_translate) { - void *so_buffer = draw->so.buffers[0]; - - /* XXX we only support single output buffer right now */ - debug_assert(draw->so.num_buffers >= 0); - - so_translate->set_buffer(translate, 0, vertex_data, - stride, ~0); - so_translate->run(translate, 0, vertex_count, - draw->instance_id, so_buffer); - } - render->unmap_vertices( render, 0, vertex_count - 1 ); @@ -267,7 +206,6 @@ void draw_pt_emit_linear(struct pt_emit *emit, { struct draw_context *draw = emit->draw; struct translate *translate = emit->translate; - struct translate *so_translate = emit->so_translate; struct vbuf_render *render = draw->render; void *hw_verts; @@ -309,18 +247,6 @@ void draw_pt_emit_linear(struct pt_emit *emit, draw->instance_id, hw_verts); - if (so_translate) { - void *so_buffer = draw->so.buffers[0]; - - /* XXX we only support single output buffer right now */ - debug_assert(draw->so.num_buffers >= 0); - - so_translate->set_buffer(translate, 0, - vertex_data, stride, count - 1); - so_translate->run(translate, 0, count, - draw->instance_id, so_buffer); - } - if (0) { unsigned i; for (i = 0; i < count; i++) { diff --git a/src/gallium/auxiliary/draw/draw_pt_fetch_shade_pipeline.c b/src/gallium/auxiliary/draw/draw_pt_fetch_shade_pipeline.c index da5106463a7..0d15ba26423 100644 --- a/src/gallium/auxiliary/draw/draw_pt_fetch_shade_pipeline.c +++ b/src/gallium/auxiliary/draw/draw_pt_fetch_shade_pipeline.c @@ -40,6 +40,7 @@ struct fetch_pipeline_middle_end { struct draw_context *draw; struct pt_emit *emit; + struct pt_so_emit *so_emit; struct pt_fetch *fetch; struct pt_post_vs *post_vs; @@ -100,6 +101,8 @@ static void fetch_pipeline_prepare( struct draw_pt_middle_end *middle, (boolean)draw->rasterizer->gl_rasterization_rules, (draw->vs.edgeflag_output ? true : false) ); + draw_pt_so_emit_prepare( fpme->so_emit, prim ); + if (!(opt & PT_PIPELINE)) { draw_pt_emit_prepare( fpme->emit, prim, @@ -174,6 +177,12 @@ static void fetch_pipeline_run( struct draw_pt_middle_end *middle, fpme->vertex_size); } + /* stream output needs to be done before clipping */ + draw_pt_so_emit( fpme->so_emit, + (const float (*)[4])pipeline_verts->data, + fetch_count, + fpme->vertex_size ); + if (draw_pt_post_vs_run( fpme->post_vs, pipeline_verts, fetch_count, @@ -258,6 +267,12 @@ static void fetch_pipeline_linear_run( struct draw_pt_middle_end *middle, fpme->vertex_size); } + /* stream output needs to be done before clipping */ + draw_pt_so_emit( fpme->so_emit, + (const float (*)[4])pipeline_verts->data, + count, + fpme->vertex_size ); + if (draw_pt_post_vs_run( fpme->post_vs, pipeline_verts, count, @@ -336,6 +351,12 @@ static boolean fetch_pipeline_linear_run_elts( struct draw_pt_middle_end *middle fpme->vertex_size); } + /* stream output needs to be done before clipping */ + draw_pt_so_emit( fpme->so_emit, + (const float (*)[4])pipeline_verts->data, + count, + fpme->vertex_size ); + if (draw_pt_post_vs_run( fpme->post_vs, pipeline_verts, count, @@ -385,6 +406,9 @@ static void fetch_pipeline_destroy( struct draw_pt_middle_end *middle ) if (fpme->emit) draw_pt_emit_destroy( fpme->emit ); + if (fpme->so_emit) + draw_pt_so_emit_destroy( fpme->so_emit ); + if (fpme->post_vs) draw_pt_post_vs_destroy( fpme->post_vs ); @@ -419,6 +443,10 @@ struct draw_pt_middle_end *draw_pt_fetch_pipeline_or_emit( struct draw_context * if (!fpme->emit) goto fail; + fpme->so_emit = draw_pt_so_emit_create( draw ); + if (!fpme->so_emit) + goto fail; + return &fpme->base; fail: diff --git a/src/gallium/auxiliary/draw/draw_pt_fetch_shade_pipeline_llvm.c b/src/gallium/auxiliary/draw/draw_pt_fetch_shade_pipeline_llvm.c index d56889b5f65..584a1a53d46 100644 --- a/src/gallium/auxiliary/draw/draw_pt_fetch_shade_pipeline_llvm.c +++ b/src/gallium/auxiliary/draw/draw_pt_fetch_shade_pipeline_llvm.c @@ -40,6 +40,7 @@ struct llvm_middle_end { struct draw_context *draw; struct pt_emit *emit; + struct pt_so_emit *so_emit; struct pt_fetch *fetch; struct pt_post_vs *post_vs; @@ -104,6 +105,7 @@ llvm_middle_end_prepare( struct draw_pt_middle_end *middle, (boolean)draw->rasterizer->gl_rasterization_rules, (draw->vs.edgeflag_output ? true : false) ); + draw_pt_so_emit_prepare( fpme->so_emit, prim ); if (!(opt & PT_PIPELINE)) { draw_pt_emit_prepare( fpme->emit, prim, @@ -175,6 +177,12 @@ static void llvm_middle_end_run( struct draw_pt_middle_end *middle, fpme->vertex_size, draw->pt.vertex_buffer ); + /* stream output needs to be done before clipping */ + draw_pt_so_emit( fpme->so_emit, + (const float (*)[4])pipeline_verts->data, + fetch_count, + fpme->vertex_size ); + if (draw_pt_post_vs_run( fpme->post_vs, pipeline_verts, fetch_count, @@ -239,6 +247,12 @@ static void llvm_middle_end_linear_run( struct draw_pt_middle_end *middle, fpme->vertex_size, draw->pt.vertex_buffer ); + /* stream output needs to be done before clipping */ + draw_pt_so_emit( fpme->so_emit, + (const float (*)[4])pipeline_verts->data, + count, + fpme->vertex_size ); + if (draw_pt_post_vs_run( fpme->post_vs, pipeline_verts, count, @@ -294,6 +308,12 @@ llvm_middle_end_linear_run_elts( struct draw_pt_middle_end *middle, fpme->vertex_size, draw->pt.vertex_buffer ); + /* stream output needs to be done before clipping */ + draw_pt_so_emit( fpme->so_emit, + (const float (*)[4])pipeline_verts->data, + count, + fpme->vertex_size ); + if (draw_pt_post_vs_run( fpme->post_vs, pipeline_verts, count, @@ -367,6 +387,9 @@ static void llvm_middle_end_destroy( struct draw_pt_middle_end *middle ) if (fpme->emit) draw_pt_emit_destroy( fpme->emit ); + if (fpme->so_emit) + draw_pt_so_emit_destroy( fpme->so_emit ); + if (fpme->post_vs) draw_pt_post_vs_destroy( fpme->post_vs ); @@ -409,6 +432,10 @@ struct draw_pt_middle_end *draw_pt_fetch_pipeline_or_emit_llvm( struct draw_cont if (!fpme->emit) goto fail; + fpme->so_emit = draw_pt_so_emit_create( draw ); + if (!fpme->so_emit) + goto fail; + fpme->llvm = draw_llvm_create(draw); if (!fpme->llvm) goto fail; diff --git a/src/gallium/auxiliary/draw/draw_pt_so_emit.c b/src/gallium/auxiliary/draw/draw_pt_so_emit.c new file mode 100644 index 00000000000..b982e4c529c --- /dev/null +++ b/src/gallium/auxiliary/draw/draw_pt_so_emit.c @@ -0,0 +1,191 @@ +/************************************************************************** + * + * Copyright 2010 VMware, Inc. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +#include "util/u_memory.h" +#include "draw/draw_context.h" +#include "draw/draw_private.h" +#include "draw/draw_vbuf.h" +#include "draw/draw_vertex.h" +#include "draw/draw_pt.h" +#include "translate/translate.h" +#include "translate/translate_cache.h" +#include "util/u_format.h" + +struct pt_so_emit { + struct draw_context *draw; + + struct translate *translate; + + struct translate_cache *cache; + unsigned prim; + + const struct vertex_info *vinfo; + boolean has_so; +}; + +static void +prepare_so_emit( struct pt_so_emit *emit, + const struct vertex_info *vinfo ) +{ + struct draw_context *draw = emit->draw; + unsigned i; + struct translate_key hw_key; + unsigned dst_offset = 0; + unsigned output_stride = 0; + + if (emit->has_so) { + for (i = 0; i < draw->so.state.num_outputs; ++i) { + unsigned src_offset = (draw->so.state.register_index[i] * 4 * + sizeof(float) ); + unsigned output_format; + unsigned emit_sz = 0; + /*unsigned output_bytes = util_format_get_blocksize(output_format); + unsigned nr_compo = util_format_get_nr_components(output_format);*/ + + output_format = draw_translate_vinfo_format(vinfo->attrib[i].emit); + emit_sz = draw_translate_vinfo_size(vinfo->attrib[i].emit); + + /* doesn't handle EMIT_OMIT */ + assert(emit_sz != 0); + + hw_key.element[i].type = TRANSLATE_ELEMENT_NORMAL; + hw_key.element[i].input_format = PIPE_FORMAT_R32G32B32A32_FLOAT; + hw_key.element[i].input_buffer = 0; + hw_key.element[i].input_offset = src_offset; + hw_key.element[i].instance_divisor = 0; + hw_key.element[i].output_format = output_format; + hw_key.element[i].output_offset = dst_offset; + + dst_offset += emit_sz; + output_stride += emit_sz; + } + hw_key.nr_elements = draw->so.state.num_outputs; + hw_key.output_stride = output_stride; + + if (!emit->translate || + translate_key_compare(&emit->translate->key, &hw_key) != 0) + { + translate_key_sanitize(&hw_key); + emit->translate = translate_cache_find(emit->cache, &hw_key); + } + } else { + /* no stream output */ + emit->translate = NULL; + } +} + + +void draw_pt_so_emit_prepare( struct pt_so_emit *emit, + unsigned prim ) +{ + struct draw_context *draw = emit->draw; + boolean ok; + + emit->has_so = (draw->so.state.num_outputs > 0); + + if (!emit->has_so) + return; + + /* XXX: need to flush to get prim_vbuf.c to release its allocation?? + */ + draw_do_flush( draw, DRAW_FLUSH_BACKEND ); + + emit->prim = prim; + + ok = draw->render->set_primitive(draw->render, emit->prim); + if (!ok) { + assert(0); + return; + } + + /* Must do this after set_primitive() above: */ + emit->vinfo = draw->render->get_vertex_info(draw->render); + + prepare_so_emit( emit, emit->vinfo ); +} + + +void draw_pt_so_emit( struct pt_so_emit *emit, + const float (*vertex_data)[4], + unsigned vertex_count, + unsigned stride ) +{ + struct draw_context *draw = emit->draw; + struct translate *translate = emit->translate; + struct vbuf_render *render = draw->render; + void *so_buffer; + + if (!emit->has_so) + return; + + so_buffer = draw->so.buffers[0]; + + /* XXX: need to flush to get prim_vbuf.c to release its allocation??*/ + draw_do_flush( draw, DRAW_FLUSH_BACKEND ); + + if (vertex_count == 0) + return; + + if (vertex_count >= UNDEFINED_VERTEX_ID) { + assert(0); + return; + } + + + /* XXX we only support single output buffer right now */ + debug_assert(draw->so.num_buffers >= 0); + + translate->set_buffer(translate, 0, vertex_data, + stride, ~0); + translate->run(translate, 0, vertex_count, + draw->instance_id, so_buffer); +} + + +struct pt_so_emit *draw_pt_so_emit_create( struct draw_context *draw ) +{ + struct pt_so_emit *emit = CALLOC_STRUCT(pt_so_emit); + if (!emit) + return NULL; + + emit->draw = draw; + emit->cache = translate_cache_create(); + if (!emit->cache) { + FREE(emit); + return NULL; + } + + return emit; +} + +void draw_pt_so_emit_destroy( struct pt_so_emit *emit ) +{ + if (emit->cache) + translate_cache_destroy(emit->cache); + + FREE(emit); +} diff --git a/src/gallium/drivers/softpipe/sp_state_so.c b/src/gallium/drivers/softpipe/sp_state_so.c new file mode 100644 index 00000000000..49bde22e979 --- /dev/null +++ b/src/gallium/drivers/softpipe/sp_state_so.c @@ -0,0 +1,106 @@ +/************************************************************************** + * + * Copyright 2010 VMware, Inc. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +#include "sp_context.h" +#include "sp_state.h" +#include "sp_texture.h" + +#include "util/u_format.h" +#include "util/u_memory.h" +#include "draw/draw_context.h" + + +void * +softpipe_create_stream_output_state(struct pipe_context *pipe, + const struct pipe_stream_output_state *templ) +{ + struct sp_so_state *so; + so = (struct sp_so_state *) CALLOC_STRUCT(sp_so_state); + + if (so) { + so->base.num_outputs = templ->num_outputs; + so->base.stride = templ->stride; + memcpy(so->base.output_buffer, + templ->output_buffer, + sizeof(int) * templ->num_outputs); + memcpy(so->base.register_index, + templ->register_index, + sizeof(int) * templ->num_outputs); + memcpy(so->base.register_mask, + templ->register_mask, + sizeof(ubyte) * templ->num_outputs); + } + return so; +} + +void +softpipe_bind_stream_output_state(struct pipe_context *pipe, + void *so) +{ + struct softpipe_context *softpipe = softpipe_context(pipe); + struct sp_so_state *sp_so = (struct sp_so_state *) so; + + softpipe->so = sp_so; + + softpipe->dirty |= SP_NEW_SO; + + if (sp_so) + draw_set_so_state(softpipe->draw, &sp_so->base); +} + +void +softpipe_delete_stream_output_state(struct pipe_context *pipe, void *so) +{ + FREE( so ); +} + +void +softpipe_set_stream_output_buffers(struct pipe_context *pipe, + struct pipe_resource **buffers, + int *offsets, + int num_buffers) +{ + struct softpipe_context *softpipe = softpipe_context(pipe); + int i; + void *map_buffers[PIPE_MAX_SO_BUFFERS]; + + assert(num_buffers <= PIPE_MAX_SO_BUFFERS); + + softpipe->dirty |= SP_NEW_SO_BUFFERS; + + for (i = 0; i < num_buffers; ++i) { + void *mapped = softpipe_resource(buffers[i])->data; + if (offsets[i] >= 0) + map_buffers[i] = ((char*)mapped) + offsets[i]; + else { + /* this is a buffer append */ + assert(!"appending not implemented"); + map_buffers[i] = mapped; + } + } + draw_set_mapped_so_buffers(softpipe->draw, map_buffers, num_buffers); +} diff --git a/src/gallium/include/pipe/p_state.h b/src/gallium/include/pipe/p_state.h index 33ee066c0c9..771bff524bd 100644 --- a/src/gallium/include/pipe/p_state.h +++ b/src/gallium/include/pipe/p_state.h @@ -348,9 +348,19 @@ struct pipe_resource struct pipe_stream_output_state { + /**< number of the output buffer to insert each element into */ + int output_buffer[PIPE_MAX_SHADER_OUTPUTS]; + /**< which register to grab each output from */ + int register_index[PIPE_MAX_SHADER_OUTPUTS]; /**< format for each output */ - enum pipe_format format[PIPE_MAX_SHADER_OUTPUTS]; + /**< TGSI_WRITEMASK signifying which components to output */ + ubyte register_mask[PIPE_MAX_SHADER_OUTPUTS]; + /**< number of outputs */ int num_outputs; + + /**< stride for an entire vertex, only used if all output_buffers + * are 0 */ + unsigned stride; }; /**