X-Git-Url: https://git.libre-soc.org/?a=blobdiff_plain;f=src%2Fgallium%2Fauxiliary%2Fdraw%2Fdraw_vs_exec.c;h=54a2b2ab040fd3acf6f9b80d9ef6459a691f8181;hb=201ac414d4df00745e487a6ffbc9979a2e70f0c6;hp=364693e0b491992af4598e6a45b3ef0dd3f1bcee;hpb=3088eb59497ec8621e003ce3bc87025f257c0a92;p=mesa.git diff --git a/src/gallium/auxiliary/draw/draw_vs_exec.c b/src/gallium/auxiliary/draw/draw_vs_exec.c index 364693e0b49..54a2b2ab040 100644 --- a/src/gallium/auxiliary/draw/draw_vs_exec.c +++ b/src/gallium/auxiliary/draw/draw_vs_exec.c @@ -38,155 +38,124 @@ #include "draw_context.h" #include "draw_vs.h" +#include "tgsi/util/tgsi_parse.h" -static INLINE unsigned -compute_clipmask(const float *clip, /*const*/ float plane[][4], unsigned nr) -{ - unsigned mask = 0; - unsigned i; - - /* Do the hardwired planes first: - */ - if (-clip[0] + clip[3] < 0) mask |= CLIP_RIGHT_BIT; - if ( clip[0] + clip[3] < 0) mask |= CLIP_LEFT_BIT; - if (-clip[1] + clip[3] < 0) mask |= CLIP_TOP_BIT; - if ( clip[1] + clip[3] < 0) mask |= CLIP_BOTTOM_BIT; - if (-clip[2] + clip[3] < 0) mask |= CLIP_FAR_BIT; - if ( clip[2] + clip[3] < 0) mask |= CLIP_NEAR_BIT; - - /* Followed by any remaining ones: - */ - for (i = 6; i < nr; i++) { - if (dot4(clip, plane[i]) < 0) - mask |= (1<machine, + tgsi_exec_machine_bind_shader(evs->machine, shader->state.tokens, PIPE_MAX_SAMPLERS, NULL /*samplers*/ ); - draw_update_vertex_fetch( draw ); } -/** - * Transform vertices with the current vertex program/shader - * Up to four vertices can be shaded at a time. - * \param vbuffer the input vertex data - * \param elts indexes of four input vertices - * \param count number of vertices to shade [1..4] - * \param vOut array of pointers to four output vertices + + +/* Simplified vertex shader interface for the pt paths. Given the + * complexity of code-generating all the above operations together, + * it's time to try doing all the other stuff separately. */ static void -vs_exec_run( struct draw_vertex_shader *shader, - struct draw_context *draw, - const unsigned *elts, - unsigned count, - struct vertex_header *vOut[] ) +vs_exec_run_linear( struct draw_vertex_shader *shader, + const float (*input)[4], + float (*output)[4], + const float (*constants)[4], + unsigned count, + unsigned input_stride, + unsigned output_stride ) { - struct tgsi_exec_machine *machine = &draw->machine; - unsigned int j; - - ALIGN16_DECL(struct tgsi_exec_vector, inputs, PIPE_ATTRIB_MAX); - ALIGN16_DECL(struct tgsi_exec_vector, outputs, PIPE_ATTRIB_MAX); - const float *scale = draw->viewport.scale; - const float *trans = draw->viewport.translate; - - assert(count <= 4); - assert(draw->vertex_shader->info.output_semantic_name[0] - == TGSI_SEMANTIC_POSITION); + struct exec_vertex_shader *evs = exec_vertex_shader(shader); + struct tgsi_exec_machine *machine = evs->machine; + unsigned int i, j; + unsigned slot; - machine->Consts = (float (*)[4]) draw->user.constants; - machine->Inputs = ALIGN16_ASSIGN(inputs); - machine->Outputs = ALIGN16_ASSIGN(outputs); + machine->Consts = constants; - draw->vertex_fetch.fetch_func( draw, machine, elts, count ); + for (i = 0; i < count; i += MAX_TGSI_VERTICES) { + unsigned int max_vertices = MIN2(MAX_TGSI_VERTICES, count - i); - /* run interpreter */ - tgsi_exec_machine_run( machine ); - - - /* store machine results */ - for (j = 0; j < count; j++) { - unsigned slot; - float x, y, z, w; - - /* Handle attr[0] (position) specially: - * - * XXX: Computing the clipmask should be done in the vertex - * program as a set of DP4 instructions appended to the - * user-provided code. + /* Swizzle inputs. */ - x = vOut[j]->clip[0] = machine->Outputs[0].xyzw[0].f[j]; - y = vOut[j]->clip[1] = machine->Outputs[0].xyzw[1].f[j]; - z = vOut[j]->clip[2] = machine->Outputs[0].xyzw[2].f[j]; - w = vOut[j]->clip[3] = machine->Outputs[0].xyzw[3].f[j]; - - if (!draw->rasterizer->bypass_clipping) { - vOut[j]->clipmask = compute_clipmask(vOut[j]->clip, draw->plane, draw->nr_planes); - vOut[j]->edgeflag = 1; - - /* divide by w */ - w = 1.0f / w; - x *= w; - y *= w; - z *= w; - - /* Viewport mapping */ - vOut[j]->data[0][0] = x * scale[0] + trans[0]; - vOut[j]->data[0][1] = y * scale[1] + trans[1]; - vOut[j]->data[0][2] = z * scale[2] + trans[2]; - vOut[j]->data[0][3] = w; - } - else { - vOut[j]->clipmask = 0; - vOut[j]->edgeflag = 1; - vOut[j]->data[0][0] = x; - vOut[j]->data[0][1] = y; - vOut[j]->data[0][2] = z; - vOut[j]->data[0][3] = w; - } - - /* Remaining attributes are packed into sequential post-transform - * vertex attrib slots. + for (j = 0; j < max_vertices; j++) { +#if 0 + debug_printf("%d) Input vert:\n", i + j); + for (slot = 0; slot < shader->info.num_inputs; slot++) { + debug_printf("\t%d: %f %f %f %f\n", slot, + input[slot][0], + input[slot][1], + input[slot][2], + input[slot][3]); + } +#endif + + for (slot = 0; slot < shader->info.num_inputs; slot++) { + machine->Inputs[slot].xyzw[0].f[j] = input[slot][0]; + machine->Inputs[slot].xyzw[1].f[j] = input[slot][1]; + machine->Inputs[slot].xyzw[2].f[j] = input[slot][2]; + machine->Inputs[slot].xyzw[3].f[j] = input[slot][3]; + } + + input = (const float (*)[4])((const char *)input + input_stride); + } + + /* run interpreter */ + tgsi_exec_machine_run( machine ); + + /* Unswizzle all output results. */ - for (slot = 1; slot < draw->num_vs_outputs; slot++) { - vOut[j]->data[slot][0] = machine->Outputs[slot].xyzw[0].f[j]; - vOut[j]->data[slot][1] = machine->Outputs[slot].xyzw[1].f[j]; - vOut[j]->data[slot][2] = machine->Outputs[slot].xyzw[2].f[j]; - vOut[j]->data[slot][3] = machine->Outputs[slot].xyzw[3].f[j]; - } - -#if 0 /*DEBUG*/ - printf("Post xform vert:\n"); - for (slot = 0; slot < draw->num_vs_outputs; slot++) { - printf("%d: %f %f %f %f\n", slot, - vOut[j]->data[slot][0], - vOut[j]->data[slot][1], - vOut[j]->data[slot][2], - vOut[j]->data[slot][3]); - } -#endif - - - } /* loop over vertices */ + for (j = 0; j < max_vertices; j++) { + for (slot = 0; slot < shader->info.num_outputs; slot++) { + output[slot][0] = machine->Outputs[slot].xyzw[0].f[j]; + output[slot][1] = machine->Outputs[slot].xyzw[1].f[j]; + output[slot][2] = machine->Outputs[slot].xyzw[2].f[j]; + output[slot][3] = machine->Outputs[slot].xyzw[3].f[j]; + + } + +#if 0 + debug_printf("%d) Post xform vert:\n", i + j); + for (slot = 0; slot < shader->info.num_outputs; slot++) { + debug_printf("\t%d: %f %f %f %f\n", slot, + output[slot][0], + output[slot][1], + output[slot][2], + output[slot][3]); + } +#endif + + output = (float (*)[4])((char *)output + output_stride); + } + + } } + static void vs_exec_delete( struct draw_vertex_shader *dvs ) { + FREE((void*) dvs->state.tokens); FREE( dvs ); } @@ -195,15 +164,22 @@ struct draw_vertex_shader * draw_create_vs_exec(struct draw_context *draw, const struct pipe_shader_state *state) { - struct draw_vertex_shader *vs = CALLOC_STRUCT( draw_vertex_shader ); + struct exec_vertex_shader *vs = CALLOC_STRUCT( exec_vertex_shader ); + uint nt = tgsi_num_tokens(state->tokens); if (vs == NULL) return NULL; - vs->state = *state; - vs->prepare = vs_exec_prepare; - vs->run = vs_exec_run; - vs->delete = vs_exec_delete; + /* we make a private copy of the tokens */ + vs->base.state.tokens = mem_dup(state->tokens, nt * sizeof(state->tokens[0])); + tgsi_scan_shader(state->tokens, &vs->base.info); + + + vs->base.prepare = vs_exec_prepare; + vs->base.run_linear = vs_exec_run_linear; + vs->base.delete = vs_exec_delete; + vs->machine = &draw->machine; + - return vs; + return &vs->base; }