From: Brian Date: Fri, 17 Aug 2007 17:40:53 +0000 (-0600) Subject: shade four vertices at a time X-Git-Url: https://git.libre-soc.org/?a=commitdiff_plain;h=83f428e799d598732494f60601c5984e09829a81;p=mesa.git shade four vertices at a time --- diff --git a/src/mesa/pipe/softpipe/sp_draw_arrays.c b/src/mesa/pipe/softpipe/sp_draw_arrays.c index 69fd6908bb9..bb32553cf4b 100644 --- a/src/mesa/pipe/softpipe/sp_draw_arrays.c +++ b/src/mesa/pipe/softpipe/sp_draw_arrays.c @@ -31,10 +31,15 @@ */ +/** TEMP */ +#include "main/context.h" +#include "main/macros.h" + #include "pipe/p_defines.h" #include "pipe/p_context.h" #include "pipe/p_winsys.h" + #include "sp_context.h" #include "sp_state.h" @@ -48,35 +53,43 @@ #if defined __GNUC__ -#define USE_ALIGNED_ATTRIBS 1 -#define ALIGN16_SUFFIX __attribute__(( aligned( 16 ) )) +#define ALIGN16_DECL(TYPE, NAME, SIZE) TYPE NAME[SIZE] __attribute__(( aligned( 16 ) )) +#define ALIGN16_ASSIGN(P) P #else -#define USE_ALIGNED_ATTRIBS 0 -#define ALIGN16_SUFFIX +#define ALIGN16_DECL(TYPE, NAME, SIZE) TYPE NAME[SIZE + 1] +#define ALIGN16_ASSIGN(P) align16(P) #endif static struct softpipe_context *sp_global = NULL; + +/** + * Transform vertices with the current vertex program/shader + * Up to four vertices can be shaded at a time. + * \param vbuffer the input vertex data + * \param elts indexes of four input vertices + * \param count number of vertices to shade [1..4] + * \param vOut array of pointers to four output vertices + */ static void -run_vertex_program2(struct draw_context *draw, - const void *vbuffer, unsigned elem, - struct vertex_header *vOut) +run_vertex_program(struct draw_context *draw, + const void *vbuffer, unsigned elts[4], unsigned count, + struct vertex_header *vOut[]) { #if 1 struct softpipe_context *sp = sp_global; #endif struct tgsi_exec_machine machine; - int i; + unsigned int j; -#if USE_ALIGNED_ATTRIBS - struct tgsi_exec_vector inputs[PIPE_ATTRIB_MAX] ALIGN16_SUFFIX; - struct tgsi_exec_vector outputs[PIPE_ATTRIB_MAX] ALIGN16_SUFFIX; -#else - struct tgsi_exec_vector inputs[PIPE_ATTRIB_MAX + 1]; - struct tgsi_exec_vector outputs[PIPE_ATTRIB_MAX + 1]; -#endif + ALIGN16_DECL(struct tgsi_exec_vector, inputs, PIPE_ATTRIB_MAX); + ALIGN16_DECL(struct tgsi_exec_vector, outputs, PIPE_ATTRIB_MAX); + const float *scale = draw->viewport.scale; + const float *trans = draw->viewport.translate; + + assert(count <= 4); #ifdef DEBUG memset( &machine, 0, sizeof( machine ) ); @@ -92,54 +105,34 @@ run_vertex_program2(struct draw_context *draw, /* Consts does not require 16 byte alignment. */ machine.Consts = sp->vs.constants->constant; -#if USE_ALIGNED_ATTRIBS - machine.Inputs = inputs; - machine.Outputs = outputs; -#else - machine.Inputs = (struct tgsi_exec_vector *) tgsi_align_128bit( inputs ); - machine.Outputs = (struct tgsi_exec_vector *) tgsi_align_128bit( outputs ); -#endif + machine.Inputs = ALIGN16_ASSIGN(inputs); + machine.Outputs = ALIGN16_ASSIGN(outputs); - { + /* load machine inputs */ + for (j = 0; j < count; j++) { const void *mapped = vbuffer; const float *vIn, *cIn; vIn = (const float *) ((const ubyte *) mapped + draw->vertex_buffer[0].buffer_offset + draw->vertex_element[0].src_offset - + elem * draw->vertex_buffer[0].pitch); + + elts[j] * draw->vertex_buffer[0].pitch); cIn = (const float *) ((const ubyte *) mapped + draw->vertex_buffer[3].buffer_offset + draw->vertex_element[3].src_offset - + elem * draw->vertex_buffer[3].pitch); - /*X*/ - machine.Inputs[0].xyzw[0].f[0] = vIn[0]; - machine.Inputs[0].xyzw[0].f[1] = vIn[0]; - machine.Inputs[0].xyzw[0].f[2] = vIn[0]; - machine.Inputs[0].xyzw[0].f[3] = vIn[0]; - - /*Y*/ - machine.Inputs[0].xyzw[1].f[0] = vIn[1]; - machine.Inputs[0].xyzw[1].f[1] = vIn[1]; - machine.Inputs[0].xyzw[1].f[2] = vIn[1]; - machine.Inputs[0].xyzw[1].f[3] = vIn[1]; - - /*Z*/ - machine.Inputs[0].xyzw[2].f[0] = vIn[2]; - machine.Inputs[0].xyzw[2].f[1] = vIn[2]; - machine.Inputs[0].xyzw[2].f[2] = vIn[2]; - machine.Inputs[0].xyzw[2].f[3] = vIn[2]; - - /*W*/ - machine.Inputs[0].xyzw[3].f[0] = 1.0; - machine.Inputs[0].xyzw[3].f[1] = 1.0; - machine.Inputs[0].xyzw[3].f[2] = 1.0; - machine.Inputs[0].xyzw[3].f[3] = 1.0; - - printf("VS Input: %f %f %f %f\n", - vIn[0], vIn[1], vIn[2], 1.0); + + elts[j] * draw->vertex_buffer[3].pitch); + + machine.Inputs[0].xyzw[0].f[j] = vIn[0]; /*X*/ + machine.Inputs[0].xyzw[1].f[j] = vIn[1]; /*Y*/ + machine.Inputs[0].xyzw[2].f[j] = vIn[2]; /*Z*/ + machine.Inputs[0].xyzw[3].f[j] = 1.0; /*W*/ +#if 0 + printf("VS Input %d: %f %f %f %f\n", + j, vIn[0], vIn[1], vIn[2], 1.0); +#endif } +#if 0 printf("Consts:\n"); for (i = 0; i < 4; i++) { printf(" %d: %f %f %f %f\n", i, @@ -148,48 +141,49 @@ run_vertex_program2(struct draw_context *draw, machine.Consts[i][2], machine.Consts[i][3]); } - +#endif /* run shader */ tgsi_exec_machine_run( &machine ); - /* store result pos */ +#if 0 printf("VS result: %f %f %f %f\n", outputs[0].xyzw[0].f[0], outputs[0].xyzw[1].f[0], outputs[0].xyzw[2].f[0], outputs[0].xyzw[3].f[0]); - { - const float *scale = draw->viewport.scale; - const float *trans = draw->viewport.translate; +#endif + + /* store machine results */ + for (j = 0; j < count; j++) { float x, y, z, w; - x = outputs[0].xyzw[0].f[0]; - y = outputs[0].xyzw[1].f[0]; - z = outputs[0].xyzw[2].f[0]; - w = outputs[0].xyzw[3].f[0]; + x = outputs[0].xyzw[0].f[j]; + y = outputs[0].xyzw[1].f[j]; + z = outputs[0].xyzw[2].f[j]; + w = outputs[0].xyzw[3].f[j]; /* divide by w */ x /= w; y /= w; z /= w; - w = 1.0f / w; + w = 1.0 / w; /* Viewport */ - vOut->data[0][0] = scale[0] * x + trans[0]; - vOut->data[0][1] = scale[1] * y + trans[1]; - vOut->data[0][2] = scale[2] * z + trans[2]; - vOut->data[0][3] = w; + vOut[j]->data[0][0] = scale[0] * x + trans[0]; + vOut[j]->data[0][1] = scale[1] * y + trans[1]; + vOut[j]->data[0][2] = scale[2] * z + trans[2]; + vOut[j]->data[0][3] = w; +#if 0 printf("wincoord: %f %f %f\n", - vOut->data[0][0], - vOut->data[0][1], - vOut->data[0][2]); - - vOut->data[1][0] = 1.0; - vOut->data[1][1] = 1.0; - vOut->data[1][2] = 1.0; - vOut->data[1][3] = 1.0; - + vOut[j]->data[0][0], + vOut[j]->data[0][1], + vOut[j]->data[0][2]); +#endif + vOut[j]->data[1][0] = 1.0; + vOut[j]->data[1][1] = 1.0; + vOut[j]->data[1][2] = 1.0; + vOut[j]->data[1][3] = 1.0; } #if 0 @@ -209,14 +203,12 @@ run_vertex_program2(struct draw_context *draw, * \param elem which element of the vertex buffer to use as input * \param vOut the output vertex */ +#if 0 static void run_vertex_program(struct draw_context *draw, const void *vbuffer, unsigned elem, struct vertex_header *vOut) { - run_vertex_program2(draw, vbuffer, elem, vOut); - -#if 0 const float *vIn, *cIn; const float *scale = draw->viewport.scale; const float *trans = draw->viewport.translate; @@ -268,8 +260,8 @@ run_vertex_program(struct draw_context *draw, vOut->data[1][2] = cIn[2]; vOut->data[1][3] = 1.0; } -#endif } +#endif /** @@ -278,7 +270,7 @@ run_vertex_program(struct draw_context *draw, */ static void vs_flush( struct draw_context *draw ) { - unsigned i; + unsigned i, j; /* We're not really running a vertex shader yet, so flushing the vs * queue is just a matter of building the vertices and returning. @@ -286,7 +278,11 @@ static void vs_flush( struct draw_context *draw ) /* Actually, I'm cheating even more and pre-building them still * with the mesa/vf module. So it's very easy... */ +#if 0 for (i = 0; i < draw->vs.queue_nr; i++) { +#else + for (i = 0; i < draw->vs.queue_nr; i+=4) { +#endif /* Would do the following steps here: * * 1) Loop over vertex element descriptors, fetch data from each @@ -302,10 +298,27 @@ static void vs_flush( struct draw_context *draw ) * * In this version, just do the last step: */ +#if 0 const unsigned elt = draw->vs.queue[i].elt; struct vertex_header *dest = draw->vs.queue[i].dest; run_vertex_program(draw, draw->mapped_vbuffer, elt, dest); +#else + struct vertex_header *dests[4]; + unsigned elts[4]; + int n; + + for (j = 0; j < 4; j++) { + elts[j] = draw->vs.queue[i + j].elt; + dests[j] = draw->vs.queue[i + j].dest; + } + + n = MIN2(4, draw->vs.queue_nr - i); + assert(n > 0); + assert(n <= 4); + + run_vertex_program(draw, draw->mapped_vbuffer, elts, n, dests); +#endif } draw->vs.queue_nr = 0; }