* Brian Paul
*/
-#include "pipe/p_util.h"
+#include "util/u_math.h"
+#include "util/u_memory.h"
#include "pipe/p_shader_tokens.h"
#include "draw_private.h"
#include "draw_context.h"
#include "draw_vs.h"
-#include "tgsi/util/tgsi_parse.h"
+#include "tgsi/tgsi_parse.h"
+#include "tgsi/tgsi_scan.h"
+#include "tgsi/tgsi_exec.h"
struct exec_vertex_shader {
{
struct exec_vertex_shader *evs = exec_vertex_shader(shader);
- /* specify the vertex program to interpret/execute */
- tgsi_exec_machine_bind_shader(evs->machine,
- shader->state.tokens,
- PIPE_MAX_SAMPLERS,
- NULL /*samplers*/ );
-
- draw_update_vertex_fetch( draw );
-}
-
-
-/**
- * Transform vertices with the current vertex program/shader
- * Up to four vertices can be shaded at a time.
- * \param vbuffer the input vertex data
- * \param elts indexes of four input vertices
- * \param count number of vertices to shade [1..4]
- * \param vOut array of pointers to four output vertices
- */
-static boolean
-vs_exec_run( struct draw_vertex_shader *shader,
- struct draw_context *draw,
- const unsigned *elts,
- unsigned count,
- void *vOut,
- unsigned vertex_size)
-{
- struct exec_vertex_shader *evs = exec_vertex_shader(shader);
- struct tgsi_exec_machine *machine = evs->machine;
- unsigned int i, j;
- unsigned int clipped = 0;
- struct tgsi_exec_vector *outputs = 0;
- const float *scale = draw->viewport.scale;
- const float *trans = draw->viewport.translate;
-
- assert(shader->info.output_semantic_name[0] == TGSI_SEMANTIC_POSITION);
-
- machine->Consts = (const float (*)[4]) draw->user.constants;
-
- if (draw->rasterizer->bypass_vs) {
- /* outputs are just the inputs */
- outputs = machine->Inputs;
- }
- else {
- outputs = machine->Outputs;
+ /* Specify the vertex program to interpret/execute.
+ * Avoid rebinding when possible.
+ */
+ if (evs->machine->Tokens != shader->state.tokens) {
+ tgsi_exec_machine_bind_shader(evs->machine,
+ shader->state.tokens,
+ draw->vs.tgsi.sampler);
}
-
- for (i = 0; i < count; i += MAX_TGSI_VERTICES) {
- unsigned int max_vertices = MIN2(MAX_TGSI_VERTICES, count - i);
- draw->vertex_fetch.fetch_func( draw, machine, &elts[i], max_vertices );
-
-#if 0
- for (j = 0; j < max_vertices; j++) {
- unsigned slot;
- debug_printf("%d) Input vert:\n", i + j);
- for (slot = 0; slot < shader->info.num_inputs; slot++) {
- debug_printf("\t%d: %f %f %f %f\n", slot,
- machine->Inputs[slot].xyzw[0].f[j],
- machine->Inputs[slot].xyzw[1].f[j],
- machine->Inputs[slot].xyzw[2].f[j],
- machine->Inputs[slot].xyzw[3].f[j]);
- }
- }
-#endif
-
-
- if (!draw->rasterizer->bypass_vs) {
- /* run interpreter */
- tgsi_exec_machine_run( machine );
- }
-
- /* store machine results */
- for (j = 0; j < max_vertices; j++) {
- unsigned slot;
- float x, y, z, w;
- struct vertex_header *out =
- draw_header_from_block(vOut, vertex_size, i + j);
-
- /* Handle attr[0] (position) specially:
- *
- * XXX: Computing the clipmask should be done in the vertex
- * program as a set of DP4 instructions appended to the
- * user-provided code.
- */
- x = out->clip[0] = outputs[0].xyzw[0].f[j];
- y = out->clip[1] = outputs[0].xyzw[1].f[j];
- z = out->clip[2] = outputs[0].xyzw[2].f[j];
- w = out->clip[3] = outputs[0].xyzw[3].f[j];
-
- if (!draw->rasterizer->bypass_clipping) {
- out->clipmask = compute_clipmask(out->clip, draw->plane,
- draw->nr_planes);
- clipped += out->clipmask;
-
- /* divide by w */
- w = 1.0f / w;
- x *= w;
- y *= w;
- z *= w;
- }
- else {
- out->clipmask = 0;
- }
- out->edgeflag = 1;
- out->vertex_id = UNDEFINED_VERTEX_ID;
-
- if (!draw->identity_viewport) {
- /* Viewport mapping */
- out->data[0][0] = x * scale[0] + trans[0];
- out->data[0][1] = y * scale[1] + trans[1];
- out->data[0][2] = z * scale[2] + trans[2];
- out->data[0][3] = w;
- }
- else
- {
- out->data[0][0] = x;
- out->data[0][1] = y;
- out->data[0][2] = z;
- out->data[0][3] = w;
- }
-
- /* Remaining attributes are packed into sequential post-transform
- * vertex attrib slots.
- */
- for (slot = 1; slot < draw->num_vs_outputs; slot++) {
- out->data[slot][0] = outputs[slot].xyzw[0].f[j];
- out->data[slot][1] = outputs[slot].xyzw[1].f[j];
- out->data[slot][2] = outputs[slot].xyzw[2].f[j];
- out->data[slot][3] = outputs[slot].xyzw[3].f[j];
- }
-
-#if 0 /*DEBUG*/
- printf("%d) Post xform vert:\n", i + j);
- for (slot = 0; slot < draw->num_vs_outputs; slot++) {
- printf("\t%d: %f %f %f %f\n", slot,
- out->data[slot][0],
- out->data[slot][1],
- out->data[slot][2],
- out->data[slot][3]);
- }
-#endif
- } /* loop over vertices */
- }
- return clipped != 0;
}
+
/* Simplified vertex shader interface for the pt paths. Given the
* complexity of code-generating all the above operations together,
* it's time to try doing all the other stuff separately.
vs_exec_run_linear( struct draw_vertex_shader *shader,
const float (*input)[4],
float (*output)[4],
- const float (*constants)[4],
+ const void *constants[PIPE_MAX_CONSTANT_BUFFERS],
+ const unsigned const_size[PIPE_MAX_CONSTANT_BUFFERS],
unsigned count,
unsigned input_stride,
unsigned output_stride )
struct tgsi_exec_machine *machine = evs->machine;
unsigned int i, j;
unsigned slot;
+ boolean clamp_vertex_color = shader->draw->rasterizer->clamp_vertex_color;
+
+ tgsi_exec_set_constant_buffers(machine, PIPE_MAX_CONSTANT_BUFFERS,
+ constants, const_size);
- machine->Consts = constants;
+ if (shader->info.uses_instanceid) {
+ unsigned i = machine->SysSemanticToIndex[TGSI_SEMANTIC_INSTANCEID];
+ assert(i < Elements(machine->SystemValue));
+ for (j = 0; j < TGSI_QUAD_SIZE; j++)
+ machine->SystemValue[i].i[j] = shader->draw->instance_id;
+ }
for (i = 0; i < count; i += MAX_TGSI_VERTICES) {
unsigned int max_vertices = MIN2(MAX_TGSI_VERTICES, count - i);
}
#endif
+ if (shader->info.uses_vertexid) {
+ unsigned vid = machine->SysSemanticToIndex[TGSI_SEMANTIC_VERTEXID];
+ assert(vid < Elements(machine->SystemValue));
+ machine->SystemValue[vid].i[j] = i + j;
+ }
+
for (slot = 0; slot < shader->info.num_inputs; slot++) {
+#if 0
+ assert(!util_is_inf_or_nan(input[slot][0]));
+ assert(!util_is_inf_or_nan(input[slot][1]));
+ assert(!util_is_inf_or_nan(input[slot][2]));
+ assert(!util_is_inf_or_nan(input[slot][3]));
+#endif
machine->Inputs[slot].xyzw[0].f[j] = input[slot][0];
machine->Inputs[slot].xyzw[1].f[j] = input[slot][1];
machine->Inputs[slot].xyzw[2].f[j] = input[slot][2];
input = (const float (*)[4])((const char *)input + input_stride);
}
+ tgsi_set_exec_mask(machine,
+ 1,
+ max_vertices > 1,
+ max_vertices > 2,
+ max_vertices > 3);
+
/* run interpreter */
tgsi_exec_machine_run( machine );
*/
for (j = 0; j < max_vertices; j++) {
for (slot = 0; slot < shader->info.num_outputs; slot++) {
- output[slot][0] = machine->Outputs[slot].xyzw[0].f[j];
- output[slot][1] = machine->Outputs[slot].xyzw[1].f[j];
- output[slot][2] = machine->Outputs[slot].xyzw[2].f[j];
- output[slot][3] = machine->Outputs[slot].xyzw[3].f[j];
-
+ unsigned name = shader->info.output_semantic_name[slot];
+ if(clamp_vertex_color &&
+ (name == TGSI_SEMANTIC_COLOR || name == TGSI_SEMANTIC_BCOLOR))
+ {
+ output[slot][0] = CLAMP(machine->Outputs[slot].xyzw[0].f[j], 0.0f, 1.0f);
+ output[slot][1] = CLAMP(machine->Outputs[slot].xyzw[1].f[j], 0.0f, 1.0f);
+ output[slot][2] = CLAMP(machine->Outputs[slot].xyzw[2].f[j], 0.0f, 1.0f);
+ output[slot][3] = CLAMP(machine->Outputs[slot].xyzw[3].f[j], 0.0f, 1.0f);
+ }
+ else if (name == TGSI_SEMANTIC_FOG) {
+ output[slot][0] = machine->Outputs[slot].xyzw[0].f[j];
+ output[slot][1] = 0;
+ output[slot][2] = 0;
+ output[slot][3] = 1;
+ } else
+ {
+ output[slot][0] = machine->Outputs[slot].xyzw[0].f[j];
+ output[slot][1] = machine->Outputs[slot].xyzw[1].f[j];
+ output[slot][2] = machine->Outputs[slot].xyzw[2].f[j];
+ output[slot][3] = machine->Outputs[slot].xyzw[3].f[j];
+ }
}
#if 0
output[slot][1],
output[slot][2],
output[slot][3]);
+ assert(!util_is_inf_or_nan(output[slot][0]));
}
#endif
const struct pipe_shader_state *state)
{
struct exec_vertex_shader *vs = CALLOC_STRUCT( exec_vertex_shader );
- uint nt = tgsi_num_tokens(state->tokens);
if (vs == NULL)
return NULL;
/* we make a private copy of the tokens */
- vs->base.state.tokens = mem_dup(state->tokens, nt * sizeof(state->tokens[0]));
- tgsi_scan_shader(state->tokens, &vs->base.info);
+ vs->base.state.tokens = tgsi_dup_tokens(state->tokens);
+ if (!vs->base.state.tokens) {
+ FREE(vs);
+ return NULL;
+ }
+ tgsi_scan_shader(state->tokens, &vs->base.info);
+ vs->base.state.stream_output = state->stream_output;
+ vs->base.draw = draw;
vs->base.prepare = vs_exec_prepare;
- vs->base.run = vs_exec_run;
vs->base.run_linear = vs_exec_run_linear;
vs->base.delete = vs_exec_delete;
- vs->machine = &draw->machine;
-
+ vs->base.create_variant = draw_vs_create_variant_generic;
+ vs->machine = draw->vs.tgsi.machine;
return &vs->base;
}