From 874b9265601fcc05b1e32e2be029f3ac6a966c97 Mon Sep 17 00:00:00 2001 From: Keith Whitwell Date: Mon, 27 Aug 2007 19:30:11 +0100 Subject: [PATCH] Split out vertex shader/cache/fetch functionality from draw_prim.c --- src/mesa/pipe/draw/draw_arrays.c | 6 +- src/mesa/pipe/draw/draw_prim.c | 416 +----------------------- src/mesa/pipe/draw/draw_vertex.h | 19 ++ src/mesa/pipe/draw/draw_vertex_cache.c | 175 ++++++++++ src/mesa/pipe/draw/draw_vertex_fetch.c | 101 ++++++ src/mesa/pipe/draw/draw_vertex_shader.c | 203 ++++++++++++ src/mesa/sources | 3 + 7 files changed, 513 insertions(+), 410 deletions(-) create mode 100644 src/mesa/pipe/draw/draw_vertex_cache.c create mode 100644 src/mesa/pipe/draw/draw_vertex_fetch.c create mode 100644 src/mesa/pipe/draw/draw_vertex_shader.c diff --git a/src/mesa/pipe/draw/draw_arrays.c b/src/mesa/pipe/draw/draw_arrays.c index f6bf174dc0b..b7d06dd5a70 100644 --- a/src/mesa/pipe/draw/draw_arrays.c +++ b/src/mesa/pipe/draw/draw_arrays.c @@ -55,7 +55,11 @@ draw_arrays(struct draw_context *draw, unsigned prim, /* tell drawing pipeline we're beginning drawing */ draw->pipeline.first->begin( draw->pipeline.first ); - draw_invalidate_vcache( draw ); + /* XXX: Shouldn't really be needed - cache should be invalidated + * after setting new vertex buffers, vertex elements, but not + * between draws. + */ + draw_vertex_cache_invalidate( draw ); draw_set_prim( draw, prim ); diff --git a/src/mesa/pipe/draw/draw_prim.c b/src/mesa/pipe/draw/draw_prim.c index 09616cf315c..b68cca57064 100644 --- a/src/mesa/pipe/draw/draw_prim.c +++ b/src/mesa/pipe/draw/draw_prim.c @@ -58,299 +58,6 @@ static unsigned reduced_prim[PIPE_PRIM_POLYGON + 1] = { }; -static INLINE unsigned -compute_clipmask(float cx, float cy, float cz, float cw) -{ - unsigned mask; -#if defined(macintosh) || defined(__powerpc__) - /* on powerpc cliptest is 17% faster in this way. */ - mask = (((cw < cx) << CLIP_RIGHT_SHIFT)); - mask |= (((cw < -cx) << CLIP_LEFT_SHIFT)); - mask |= (((cw < cy) << CLIP_TOP_SHIFT)); - mask |= (((cw < -cy) << CLIP_BOTTOM_SHIFT)); - mask |= (((cw < cz) << CLIP_FAR_SHIFT)); - mask |= (((cw < -cz) << CLIP_NEAR_SHIFT)); -#else /* !defined(macintosh)) */ - mask = 0x0; - if (-cx + cw < 0) mask |= CLIP_RIGHT_BIT; - if ( cx + cw < 0) mask |= CLIP_LEFT_BIT; - if (-cy + cw < 0) mask |= CLIP_TOP_BIT; - if ( cy + cw < 0) mask |= CLIP_BOTTOM_BIT; - if (-cz + cw < 0) mask |= CLIP_FAR_BIT; - if ( cz + cw < 0) mask |= CLIP_NEAR_BIT; -#endif /* defined(macintosh) */ - return mask; -} - - -/** - * Fetch a float[4] vertex attribute from memory, doing format/type - * conversion as needed. - * XXX this might be a temporary thing. - */ -static void -fetch_attrib4(const void *ptr, unsigned format, float attrib[4]) -{ - /* defaults */ - attrib[1] = 0.0; - attrib[2] = 0.0; - attrib[3] = 1.0; - switch (format) { - case PIPE_FORMAT_R32G32B32A32_FLOAT: - attrib[3] = ((float *) ptr)[3]; - /* fall-through */ - case PIPE_FORMAT_R32G32B32_FLOAT: - attrib[2] = ((float *) ptr)[2]; - /* fall-through */ - case PIPE_FORMAT_R32G32_FLOAT: - attrib[1] = ((float *) ptr)[1]; - /* fall-through */ - case PIPE_FORMAT_R32_FLOAT: - attrib[0] = ((float *) ptr)[0]; - break; - default: - assert(0); - } -} - -#if !defined(XSTDCALL) -#if defined(WIN32) -#define XSTDCALL __stdcall -#else -#define XSTDCALL -#endif -#endif - -#if defined(USE_X86_ASM) || defined(SLANG_X86) -typedef void (XSTDCALL *sse2_function)( - const struct tgsi_exec_vector *input, - struct tgsi_exec_vector *output, - float (*constant)[4], - struct tgsi_exec_vector *temporary ); -#endif - -/** - * Transform vertices with the current vertex program/shader - * Up to four vertices can be shaded at a time. - * \param vbuffer the input vertex data - * \param elts indexes of four input vertices - * \param count number of vertices to shade [1..4] - * \param vOut array of pointers to four output vertices - */ -static void -run_vertex_program(struct draw_context *draw, - unsigned elts[4], unsigned count, - struct vertex_header *vOut[]) -{ - struct tgsi_exec_machine machine; - unsigned int j; - -#if 0 - FILE *file = stdout; -#endif - - ALIGN16_DECL(struct tgsi_exec_vector, inputs, PIPE_ATTRIB_MAX); - ALIGN16_DECL(struct tgsi_exec_vector, outputs, PIPE_ATTRIB_MAX); - const float *scale = draw->viewport.scale; - const float *trans = draw->viewport.translate; - - assert(count <= 4); - assert(draw->vertex_shader.outputs_written & (1 << TGSI_ATTRIB_POS)); - -#if 0 - if( file == NULL ) { - file = fopen( "vs-exec.txt", "wt" ); - } -#endif - -#ifdef DEBUG - memset( &machine, 0, sizeof( machine ) ); -#endif - - /* init machine state */ - tgsi_exec_machine_init(&machine, - draw->vertex_shader.tokens, - PIPE_MAX_SAMPLERS, - NULL /*samplers*/ ); - - /* Consts does not require 16 byte alignment. */ - machine.Consts = (float (*)[4]) draw->mapped_constants; - - machine.Inputs = ALIGN16_ASSIGN(inputs); - machine.Outputs = ALIGN16_ASSIGN(outputs); - - -#if 0 - { - unsigned attr; - for (attr = 0; attr < 16; attr++) { - if (draw->vertex_shader.inputs_read & (1 << attr)) { - unsigned buf = draw->vertex_element[attr].vertex_buffer_index; - fprintf(file, "attr %d: buf_off %d src_off %d pitch %d\n", - attr, - draw->vertex_buffer[buf].buffer_offset, - draw->vertex_element[attr].src_offset, - draw->vertex_buffer[buf].pitch); - } - } - } -#endif - - /* load machine inputs */ - for (j = 0; j < count; j++) { - unsigned attr; - for (attr = 0; attr < 16; attr++) { - if (draw->vertex_shader.inputs_read & (1 << attr)) { - unsigned buf = draw->vertex_element[attr].vertex_buffer_index; - const void *src - = (const void *) ((const ubyte *) draw->mapped_vbuffer[buf] - + draw->vertex_buffer[buf].buffer_offset - + draw->vertex_element[attr].src_offset - + elts[j] * draw->vertex_buffer[buf].pitch); - float p[4]; - - fetch_attrib4(src, draw->vertex_element[attr].src_format, p); - - machine.Inputs[attr].xyzw[0].f[j] = p[0]; /*X*/ - machine.Inputs[attr].xyzw[1].f[j] = p[1]; /*Y*/ - machine.Inputs[attr].xyzw[2].f[j] = p[2]; /*Z*/ - machine.Inputs[attr].xyzw[3].f[j] = p[3]; /*W*/ -#if 0 - fprintf(file, "Input vertex %d: attr %d: %f %f %f %f\n", - j, attr, p[0], p[1], p[2], p[3]); - fflush( file ); -#endif - } - } - } - -#if 0 - printf("Vertex shader Constants:\n"); - { - int i; - for (i = 0; i < 4; i++) { - printf(" %d: %f %f %f %f\n", i, - machine.Consts[i][0], - machine.Consts[i][1], - machine.Consts[i][2], - machine.Consts[i][3]); - } - } -#endif - - /* run shader */ - if( draw->vertex_shader.executable != NULL ) { -#if defined(USE_X86_ASM) || defined(SLANG_X86) - sse2_function func = (sse2_function) draw->vertex_shader.executable; - func( - machine.Inputs, - machine.Outputs, - machine.Consts, - machine.Temps ); -#else - assert( 0 ); -#endif - } - else { - tgsi_exec_machine_run( &machine ); - } - -#if 0 - for (i = 0; i < 4; i++) { - fprintf(file, "VS result: %f %f %f %f\n", - machine.Outputs[0].xyzw[0].f[i], - machine.Outputs[0].xyzw[1].f[i], - machine.Outputs[0].xyzw[2].f[i], - machine.Outputs[0].xyzw[3].f[i]); - } - fflush( file ); -#endif - - /* store machine results */ - for (j = 0; j < count; j++) { - unsigned slot; - float x, y, z, w; - - /* Handle attr[0] (position) specially: */ - x = vOut[j]->clip[0] = machine.Outputs[0].xyzw[0].f[j]; - y = vOut[j]->clip[1] = machine.Outputs[0].xyzw[1].f[j]; - z = vOut[j]->clip[2] = machine.Outputs[0].xyzw[2].f[j]; - w = vOut[j]->clip[3] = machine.Outputs[0].xyzw[3].f[j]; - - vOut[j]->clipmask = compute_clipmask(x, y, z, w) | draw->user_clipmask; - vOut[j]->edgeflag = 1; - - /* divide by w */ - w = 1.0f / w; - x *= w; - y *= w; - z *= w; - - /* Viewport mapping */ - vOut[j]->data[0][0] = x * scale[0] + trans[0]; - vOut[j]->data[0][1] = y * scale[1] + trans[1]; - vOut[j]->data[0][2] = z * scale[2] + trans[2]; - vOut[j]->data[0][3] = w; -#if 0 - fprintf(file, "Vert %d: wincoord: %f %f %f %f\n", j, - vOut[j]->data[0][0], - vOut[j]->data[0][1], - vOut[j]->data[0][2], - vOut[j]->data[0][3]); - fflush( file ); -#endif - - /* remaining attributes are packed into sequential post-transform - * vertex attrib slots. - */ - for (slot = 1; slot < draw->vertex_info.num_attribs; slot++) { - vOut[j]->data[slot][0] = machine.Outputs[slot].xyzw[0].f[j]; - vOut[j]->data[slot][1] = machine.Outputs[slot].xyzw[1].f[j]; - vOut[j]->data[slot][2] = machine.Outputs[slot].xyzw[2].f[j]; - vOut[j]->data[slot][3] = machine.Outputs[slot].xyzw[3].f[j]; -#if 0 - fprintf(file, "output attrib slot %d: %f %f %f %f vert %p\n", - slot, - vOut[j]->data[slot][0], - vOut[j]->data[slot][1], - vOut[j]->data[slot][2], - vOut[j]->data[slot][3], vOut[j]); -#endif - } - } /* loop over vertices */ -} - - -/** - * Called by the draw module when the vertx cache needs to be flushed. - * This involves running the vertex shader. - */ -static void transform_vertices( struct draw_context *draw ) -{ - unsigned i, j; - - /* run vertex shader on vertex cache entries, four per invokation */ - for (i = 0; i < draw->vs.queue_nr; i += 4) { - struct vertex_header *dests[4]; - unsigned elts[4]; - int n; - - for (j = 0; j < 4; j++) { - elts[j] = draw->vs.queue[i + j].elt; - dests[j] = draw->vs.queue[i + j].dest; - } - - n = MIN2(4, draw->vs.queue_nr - i); - assert(n > 0); - assert(n <= 4); - - run_vertex_program(draw, elts, n, dests); - } - - draw->vs.queue_nr = 0; -} - - void draw_flush( struct draw_context *draw ) { struct draw_stage *first = draw->pipeline.first; @@ -358,7 +65,7 @@ void draw_flush( struct draw_context *draw ) /* Make sure all vertices are available: */ - transform_vertices(draw); + draw_vertex_cache_validate(draw); switch (draw->reduced_prim) { case RP_TRI: @@ -385,24 +92,12 @@ void draw_flush( struct draw_context *draw ) } draw->pq.queue_nr = 0; - draw->vcache.referenced = 0; - draw->vcache.overflow = 0; -} - -void draw_invalidate_vcache( struct draw_context *draw ) -{ - unsigned i; - - assert(draw->pq.queue_nr == 0); - assert(draw->vs.queue_nr == 0); - assert(draw->vcache.referenced == 0); - - for (i = 0; i < Elements( draw->vcache.idx ); i++) - draw->vcache.idx[i] = ~0; + draw_vertex_cache_unreference( draw ); } + /* Return a pointer to a freshly queued primitive header. Ensure that * there is room in the vertex cache for a maximum of "nr_verts" new * vertices. Flush primitive and/or vertex queues if necessary to @@ -412,76 +107,15 @@ static struct prim_header *get_queued_prim( struct draw_context *draw, unsigned nr_verts ) { if (draw->pq.queue_nr + 1 >= PRIM_QUEUE_LENGTH || - draw->vcache.overflow + nr_verts >= VCACHE_OVERFLOW) + draw_vertex_cache_check_space( draw, nr_verts )) + { draw_flush( draw ); - - /* The vs queue is sized so that this can never happen: - */ - assert(draw->vs.queue_nr + nr_verts < VS_QUEUE_LENGTH); - - return &draw->pq.queue[draw->pq.queue_nr++]; -} - - -/* Check if vertex is in cache, otherwise add it. It won't go through - * VS yet, not until there is a flush operation or the VS queue fills up. - */ -static struct vertex_header *get_vertex( struct draw_context *draw, - unsigned i ) -{ - unsigned slot = (i + (i>>5)) & 31; - - /* Cache miss? - */ - if (draw->vcache.idx[slot] != i) { - - /* If slot is in use, use the overflow area: - */ - if (draw->vcache.referenced & (1 << slot)) - slot = VCACHE_SIZE + draw->vcache.overflow++; - else - draw->vcache.referenced |= (1 << slot); /* slot now in use */ - - draw->vcache.idx[slot] = i; - - /* Add to vertex shader queue: - */ - draw->vs.queue[draw->vs.queue_nr].dest = draw->vcache.vertex[slot]; - draw->vs.queue[draw->vs.queue_nr].elt = i; - draw->vs.queue_nr++; - - /* Need to set the vertex's edge flag here. If we're being called - * by do_ef_triangle(), that function needs edge flag info! - */ - draw->vcache.vertex[slot]->edgeflag = 1; /*XXX use user's edge flag! */ } - return draw->vcache.vertex[slot]; -} - - -static struct vertex_header *get_uint_elt_vertex( struct draw_context *draw, - unsigned i ) -{ - const unsigned *elts = (const unsigned *) draw->mapped_elts; - return get_vertex( draw, elts[i] ); -} - - -static struct vertex_header *get_ushort_elt_vertex( struct draw_context *draw, - unsigned i ) -{ - const ushort *elts = (const ushort *) draw->mapped_elts; - return get_vertex( draw, elts[i] ); + return &draw->pq.queue[draw->pq.queue_nr++]; } -static struct vertex_header *get_ubyte_elt_vertex( struct draw_context *draw, - unsigned i ) -{ - const ubyte *elts = (const ubyte *) draw->mapped_elts; - return get_vertex( draw, elts[i] ); -} static void do_point( struct draw_context *draw, @@ -536,7 +170,7 @@ static void do_ef_triangle( struct draw_context *draw, struct vertex_header *v0 = draw->get_vertex( draw, i0 ); struct vertex_header *v1 = draw->get_vertex( draw, i1 ); struct vertex_header *v2 = draw->get_vertex( draw, i2 ); - + prim->reset_line_stipple = reset_stipple; prim->edgeflags = ef_mask & ((v0->edgeflag << 0) | @@ -719,42 +353,6 @@ draw_set_prim( struct draw_context *draw, unsigned prim ) } -/** - * Tell the drawing context about the index/element buffer to use - * (ala glDrawElements) - * If no element buffer is to be used (i.e. glDrawArrays) then this - * should be called with eltSize=0 and elements=NULL. - * - * \param draw the drawing context - * \param eltSize size of each element (1, 2 or 4 bytes) - * \param elements the element buffer ptr - */ -void -draw_set_mapped_element_buffer( struct draw_context *draw, - unsigned eltSize, void *elements ) -{ - /* choose the get_vertex() function to use */ - switch (eltSize) { - case 0: - draw->get_vertex = get_vertex; - break; - case 1: - draw->get_vertex = get_ubyte_elt_vertex; - break; - case 2: - draw->get_vertex = get_ushort_elt_vertex; - break; - case 4: - draw->get_vertex = get_uint_elt_vertex; - break; - default: - assert(0); - } - draw->mapped_elts = elements; - draw->eltSize = eltSize; -} - - /** * Tell drawing context where to find mapped vertex buffers. */ diff --git a/src/mesa/pipe/draw/draw_vertex.h b/src/mesa/pipe/draw/draw_vertex.h index c104aa11437..5d22e01d5cc 100644 --- a/src/mesa/pipe/draw/draw_vertex.h +++ b/src/mesa/pipe/draw/draw_vertex.h @@ -61,4 +61,23 @@ struct vertex_info +struct draw_context; + +extern int draw_vertex_cache_check_space( struct draw_context *draw, + unsigned nr_verts ); + +extern void draw_vertex_cache_validate( struct draw_context *draw ); +extern void draw_vertex_cache_invalidate( struct draw_context *draw ); +extern void draw_vertex_cache_unreference( struct draw_context *draw ); + +extern void draw_vertex_shader_queue_flush( struct draw_context *draw ); + +struct tgsi_exec_machine; + +extern void draw_vertex_fetch( struct draw_context *draw, + struct tgsi_exec_machine *machine, + const unsigned *elts, + unsigned count ); + + #endif /* DRAW_VERTEX_H */ diff --git a/src/mesa/pipe/draw/draw_vertex_cache.c b/src/mesa/pipe/draw/draw_vertex_cache.c new file mode 100644 index 00000000000..f1b0cb14bd3 --- /dev/null +++ b/src/mesa/pipe/draw/draw_vertex_cache.c @@ -0,0 +1,175 @@ +/************************************************************************** + * + * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + + /* + * Authors: + * Keith Whitwell + */ + +#include "pipe/p_util.h" +#include "draw_private.h" +#include "draw_context.h" +#include "draw_vertex.h" + + +void draw_vertex_cache_invalidate( struct draw_context *draw ) +{ + unsigned i; + + assert(draw->pq.queue_nr == 0); + assert(draw->vs.queue_nr == 0); + assert(draw->vcache.referenced == 0); + + for (i = 0; i < Elements( draw->vcache.idx ); i++) + draw->vcache.idx[i] = ~0; +} + + +/* Check if vertex is in cache, otherwise add it. It won't go through + * VS yet, not until there is a flush operation or the VS queue fills up. + */ +static struct vertex_header *get_vertex( struct draw_context *draw, + unsigned i ) +{ + unsigned slot = (i + (i>>5)) & 31; + + /* Cache miss? + */ + if (draw->vcache.idx[slot] != i) { + + /* If slot is in use, use the overflow area: + */ + if (draw->vcache.referenced & (1 << slot)) + slot = VCACHE_SIZE + draw->vcache.overflow++; + else + draw->vcache.referenced |= (1 << slot); /* slot now in use */ + + draw->vcache.idx[slot] = i; + + /* Add to vertex shader queue: + */ + draw->vs.queue[draw->vs.queue_nr].dest = draw->vcache.vertex[slot]; + draw->vs.queue[draw->vs.queue_nr].elt = i; + draw->vs.queue_nr++; + + /* Need to set the vertex's edge flag here. If we're being called + * by do_ef_triangle(), that function needs edge flag info! + */ + draw->vcache.vertex[slot]->edgeflag = 1; /*XXX use user's edge flag! */ + } + + return draw->vcache.vertex[slot]; +} + + +static struct vertex_header *get_uint_elt_vertex( struct draw_context *draw, + unsigned i ) +{ + const unsigned *elts = (const unsigned *) draw->mapped_elts; + return get_vertex( draw, elts[i] ); +} + + +static struct vertex_header *get_ushort_elt_vertex( struct draw_context *draw, + unsigned i ) +{ + const ushort *elts = (const ushort *) draw->mapped_elts; + return get_vertex( draw, elts[i] ); +} + + +static struct vertex_header *get_ubyte_elt_vertex( struct draw_context *draw, + unsigned i ) +{ + const ubyte *elts = (const ubyte *) draw->mapped_elts; + return get_vertex( draw, elts[i] ); +} + + + +void draw_vertex_cache_validate( struct draw_context *draw ) +{ + draw_vertex_shader_queue_flush( draw ); +} + +void draw_vertex_cache_unreference( struct draw_context *draw ) +{ + draw->vcache.referenced = 0; + draw->vcache.overflow = 0; +} + + +int draw_vertex_cache_check_space( struct draw_context *draw, + unsigned nr_verts ) +{ + if (draw->vcache.overflow + nr_verts < VCACHE_OVERFLOW) { + /* The vs queue is sized so that this can never happen: + */ + assert(draw->vs.queue_nr + nr_verts < VS_QUEUE_LENGTH); + return TRUE; + } + else + return FALSE; +} + + + +/** + * Tell the drawing context about the index/element buffer to use + * (ala glDrawElements) + * If no element buffer is to be used (i.e. glDrawArrays) then this + * should be called with eltSize=0 and elements=NULL. + * + * \param draw the drawing context + * \param eltSize size of each element (1, 2 or 4 bytes) + * \param elements the element buffer ptr + */ +void +draw_set_mapped_element_buffer( struct draw_context *draw, + unsigned eltSize, void *elements ) +{ + /* choose the get_vertex() function to use */ + switch (eltSize) { + case 0: + draw->get_vertex = get_vertex; + break; + case 1: + draw->get_vertex = get_ubyte_elt_vertex; + break; + case 2: + draw->get_vertex = get_ushort_elt_vertex; + break; + case 4: + draw->get_vertex = get_uint_elt_vertex; + break; + default: + assert(0); + } + draw->mapped_elts = elements; + draw->eltSize = eltSize; +} + diff --git a/src/mesa/pipe/draw/draw_vertex_fetch.c b/src/mesa/pipe/draw/draw_vertex_fetch.c new file mode 100644 index 00000000000..0dbbdf17f2f --- /dev/null +++ b/src/mesa/pipe/draw/draw_vertex_fetch.c @@ -0,0 +1,101 @@ +/************************************************************************** + * + * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + + /* + * Authors: + * Keith Whitwell + */ + +#include "pipe/p_util.h" +#include "draw_private.h" +#include "draw_context.h" +#include "draw_vertex.h" + +#include "pipe/tgsi/exec/tgsi_core.h" + +/** + * Fetch a float[4] vertex attribute from memory, doing format/type + * conversion as needed. + * XXX this might be a temporary thing. + */ +static void +fetch_attrib4(const void *ptr, unsigned format, float attrib[4]) +{ + /* defaults */ + attrib[1] = 0.0; + attrib[2] = 0.0; + attrib[3] = 1.0; + switch (format) { + case PIPE_FORMAT_R32G32B32A32_FLOAT: + attrib[3] = ((float *) ptr)[3]; + /* fall-through */ + case PIPE_FORMAT_R32G32B32_FLOAT: + attrib[2] = ((float *) ptr)[2]; + /* fall-through */ + case PIPE_FORMAT_R32G32_FLOAT: + attrib[1] = ((float *) ptr)[1]; + /* fall-through */ + case PIPE_FORMAT_R32_FLOAT: + attrib[0] = ((float *) ptr)[0]; + break; + default: + assert(0); + } +} + +void draw_vertex_fetch( struct draw_context *draw, + struct tgsi_exec_machine *machine, + const unsigned *elts, + unsigned count ) +{ + unsigned j; + + + /* load machine inputs */ + for (j = 0; j < count; j++) { + unsigned attr; + for (attr = 0; attr < 16; attr++) { + if (draw->vertex_shader.inputs_read & (1 << attr)) { + unsigned buf = draw->vertex_element[attr].vertex_buffer_index; + const void *src + = (const void *) ((const ubyte *) draw->mapped_vbuffer[buf] + + draw->vertex_buffer[buf].buffer_offset + + draw->vertex_element[attr].src_offset + + elts[j] * draw->vertex_buffer[buf].pitch); + float p[4]; + + fetch_attrib4(src, draw->vertex_element[attr].src_format, p); + + machine->Inputs[attr].xyzw[0].f[j] = p[0]; /*X*/ + machine->Inputs[attr].xyzw[1].f[j] = p[1]; /*Y*/ + machine->Inputs[attr].xyzw[2].f[j] = p[2]; /*Z*/ + machine->Inputs[attr].xyzw[3].f[j] = p[3]; /*W*/ + } + } + } +} + diff --git a/src/mesa/pipe/draw/draw_vertex_shader.c b/src/mesa/pipe/draw/draw_vertex_shader.c new file mode 100644 index 00000000000..8effc74cbee --- /dev/null +++ b/src/mesa/pipe/draw/draw_vertex_shader.c @@ -0,0 +1,203 @@ +/************************************************************************** + * + * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + + /* + * Authors: + * Keith Whitwell + * Brian Paul + */ + +#include "pipe/p_util.h" +#include "draw_private.h" +#include "draw_context.h" +#include "draw_vertex.h" + +#include "pipe/tgsi/exec/tgsi_core.h" + +static INLINE unsigned +compute_clipmask(float cx, float cy, float cz, float cw) +{ + unsigned mask = 0; + + if (-cx + cw < 0) mask |= CLIP_RIGHT_BIT; + if ( cx + cw < 0) mask |= CLIP_LEFT_BIT; + if (-cy + cw < 0) mask |= CLIP_TOP_BIT; + if ( cy + cw < 0) mask |= CLIP_BOTTOM_BIT; + if (-cz + cw < 0) mask |= CLIP_FAR_BIT; + if ( cz + cw < 0) mask |= CLIP_NEAR_BIT; + + return mask; +} + + + + +#if !defined(XSTDCALL) +#if defined(WIN32) +#define XSTDCALL __stdcall +#else +#define XSTDCALL +#endif +#endif + +#if defined(USE_X86_ASM) || defined(SLANG_X86) +typedef void (XSTDCALL *sse2_function)( + const struct tgsi_exec_vector *input, + struct tgsi_exec_vector *output, + float (*constant)[4], + struct tgsi_exec_vector *temporary ); +#endif + +/** + * Transform vertices with the current vertex program/shader + * Up to four vertices can be shaded at a time. + * \param vbuffer the input vertex data + * \param elts indexes of four input vertices + * \param count number of vertices to shade [1..4] + * \param vOut array of pointers to four output vertices + */ +static void +run_vertex_program(struct draw_context *draw, + unsigned elts[4], unsigned count, + struct vertex_header *vOut[]) +{ + struct tgsi_exec_machine machine; + unsigned int j; + + ALIGN16_DECL(struct tgsi_exec_vector, inputs, PIPE_ATTRIB_MAX); + ALIGN16_DECL(struct tgsi_exec_vector, outputs, PIPE_ATTRIB_MAX); + const float *scale = draw->viewport.scale; + const float *trans = draw->viewport.translate; + + assert(count <= 4); + assert(draw->vertex_shader.outputs_written & (1 << TGSI_ATTRIB_POS)); + +#ifdef DEBUG + memset( &machine, 0, sizeof( machine ) ); +#endif + + /* init machine state */ + tgsi_exec_machine_init(&machine, + draw->vertex_shader.tokens, + PIPE_MAX_SAMPLERS, + NULL /*samplers*/ ); + + /* Consts does not require 16 byte alignment. */ + machine.Consts = (float (*)[4]) draw->mapped_constants; + + machine.Inputs = ALIGN16_ASSIGN(inputs); + machine.Outputs = ALIGN16_ASSIGN(outputs); + + draw_vertex_fetch( draw, &machine, elts, count ); + + + /* run shader */ + if( draw->vertex_shader.executable != NULL ) { +#if defined(USE_X86_ASM) || defined(SLANG_X86) + sse2_function func = (sse2_function) draw->vertex_shader.executable; + func( + machine.Inputs, + machine.Outputs, + machine.Consts, + machine.Temps ); +#else + assert( 0 ); +#endif + } + else { + tgsi_exec_machine_run( &machine ); + } + + + /* store machine results */ + for (j = 0; j < count; j++) { + unsigned slot; + float x, y, z, w; + + /* Handle attr[0] (position) specially: */ + x = vOut[j]->clip[0] = machine.Outputs[0].xyzw[0].f[j]; + y = vOut[j]->clip[1] = machine.Outputs[0].xyzw[1].f[j]; + z = vOut[j]->clip[2] = machine.Outputs[0].xyzw[2].f[j]; + w = vOut[j]->clip[3] = machine.Outputs[0].xyzw[3].f[j]; + + vOut[j]->clipmask = compute_clipmask(x, y, z, w) | draw->user_clipmask; + vOut[j]->edgeflag = 1; + + /* divide by w */ + w = 1.0f / w; + x *= w; + y *= w; + z *= w; + + /* Viewport mapping */ + vOut[j]->data[0][0] = x * scale[0] + trans[0]; + vOut[j]->data[0][1] = y * scale[1] + trans[1]; + vOut[j]->data[0][2] = z * scale[2] + trans[2]; + vOut[j]->data[0][3] = w; + + /* remaining attributes are packed into sequential post-transform + * vertex attrib slots. + */ + for (slot = 1; slot < draw->vertex_info.num_attribs; slot++) { + vOut[j]->data[slot][0] = machine.Outputs[slot].xyzw[0].f[j]; + vOut[j]->data[slot][1] = machine.Outputs[slot].xyzw[1].f[j]; + vOut[j]->data[slot][2] = machine.Outputs[slot].xyzw[2].f[j]; + vOut[j]->data[slot][3] = machine.Outputs[slot].xyzw[3].f[j]; + } + } /* loop over vertices */ +} + + +/** + * Called by the draw module when the vertx cache needs to be flushed. + * This involves running the vertex shader. + */ +void draw_vertex_shader_queue_flush( struct draw_context *draw ) +{ + unsigned i, j; + + /* run vertex shader on vertex cache entries, four per invokation */ + for (i = 0; i < draw->vs.queue_nr; i += 4) { + struct vertex_header *dests[4]; + unsigned elts[4]; + int n; + + for (j = 0; j < 4; j++) { + elts[j] = draw->vs.queue[i + j].elt; + dests[j] = draw->vs.queue[i + j].dest; + } + + n = MIN2(4, draw->vs.queue_nr - i); + assert(n > 0); + assert(n <= 4); + + run_vertex_program(draw, elts, n, dests); + } + + draw->vs.queue_nr = 0; +} + diff --git a/src/mesa/sources b/src/mesa/sources index 53969774f9e..be24f93f749 100644 --- a/src/mesa/sources +++ b/src/mesa/sources @@ -164,6 +164,9 @@ DRAW_SOURCES = \ pipe/draw/draw_offset.c \ pipe/draw/draw_prim.c \ pipe/draw/draw_twoside.c \ + pipe/draw/draw_vertex_cache.c \ + pipe/draw/draw_vertex_fetch.c \ + pipe/draw/draw_vertex_shader.c \ pipe/draw/draw_unfilled.c TGSIEXEC_SOURCES = \ -- 2.30.2