draw: split off all the extra functionality in the vertex shader
[mesa.git] / src / gallium / auxiliary / draw / draw_vs_exec.c
1 /**************************************************************************
2 *
3 * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas.
4 * All Rights Reserved.
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a
7 * copy of this software and associated documentation files (the
8 * "Software"), to deal in the Software without restriction, including
9 * without limitation the rights to use, copy, modify, merge, publish,
10 * distribute, sub license, and/or sell copies of the Software, and to
11 * permit persons to whom the Software is furnished to do so, subject to
12 * the following conditions:
13 *
14 * The above copyright notice and this permission notice (including the
15 * next paragraph) shall be included in all copies or substantial portions
16 * of the Software.
17 *
18 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
19 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
20 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
21 * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
22 * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
23 * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
24 * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
25 *
26 **************************************************************************/
27
28 /*
29 * Authors:
30 * Keith Whitwell <keith@tungstengraphics.com>
31 * Brian Paul
32 */
33
34 #include "pipe/p_util.h"
35 #include "pipe/p_shader_tokens.h"
36
37 #include "draw_private.h"
38 #include "draw_context.h"
39 #include "draw_vs.h"
40
41 #include "tgsi/util/tgsi_parse.h"
42
43
44 struct exec_vertex_shader {
45 struct draw_vertex_shader base;
46 struct tgsi_exec_machine *machine;
47 };
48
49 static struct exec_vertex_shader *exec_vertex_shader( struct draw_vertex_shader *vs )
50 {
51 return (struct exec_vertex_shader *)vs;
52 }
53
54
55 /* Not required for run_linear.
56 */
57 static void
58 vs_exec_prepare( struct draw_vertex_shader *shader,
59 struct draw_context *draw )
60 {
61 struct exec_vertex_shader *evs = exec_vertex_shader(shader);
62
63 /* specify the vertex program to interpret/execute */
64 tgsi_exec_machine_bind_shader(evs->machine,
65 shader->state.tokens,
66 PIPE_MAX_SAMPLERS,
67 NULL /*samplers*/ );
68
69 draw_update_vertex_fetch( draw );
70 }
71
72
73 /**
74 * Transform vertices with the current vertex program/shader
75 * Up to four vertices can be shaded at a time.
76 * \param vbuffer the input vertex data
77 * \param elts indexes of four input vertices
78 * \param count number of vertices to shade [1..4]
79 * \param vOut array of pointers to four output vertices
80 */
81 static boolean
82 vs_exec_run( struct draw_vertex_shader *shader,
83 struct draw_context *draw,
84 const unsigned *elts,
85 unsigned count,
86 void *vOut,
87 unsigned vertex_size)
88 {
89 struct exec_vertex_shader *evs = exec_vertex_shader(shader);
90 struct tgsi_exec_machine *machine = evs->machine;
91 unsigned int i, j;
92 unsigned int clipped = 0;
93 struct tgsi_exec_vector *outputs = 0;
94 const float *scale = draw->viewport.scale;
95 const float *trans = draw->viewport.translate;
96
97 assert(shader->info.output_semantic_name[0] == TGSI_SEMANTIC_POSITION);
98
99 machine->Consts = (const float (*)[4]) draw->user.constants;
100
101 if (draw->rasterizer->bypass_vs) {
102 /* outputs are just the inputs */
103 outputs = machine->Inputs;
104 }
105 else {
106 outputs = machine->Outputs;
107 }
108
109 for (i = 0; i < count; i += MAX_TGSI_VERTICES) {
110 unsigned int max_vertices = MIN2(MAX_TGSI_VERTICES, count - i);
111 draw->vertex_fetch.fetch_func( draw, machine, &elts[i], max_vertices );
112
113 #if 0
114 for (j = 0; j < max_vertices; j++) {
115 unsigned slot;
116 debug_printf("%d) Input vert:\n", i + j);
117 for (slot = 0; slot < shader->info.num_inputs; slot++) {
118 debug_printf("\t%d: %f %f %f %f\n", slot,
119 machine->Inputs[slot].xyzw[0].f[j],
120 machine->Inputs[slot].xyzw[1].f[j],
121 machine->Inputs[slot].xyzw[2].f[j],
122 machine->Inputs[slot].xyzw[3].f[j]);
123 }
124 }
125 #endif
126
127
128 if (!draw->rasterizer->bypass_vs) {
129 /* run interpreter */
130 tgsi_exec_machine_run( machine );
131 }
132
133 /* store machine results */
134 for (j = 0; j < max_vertices; j++) {
135 unsigned slot;
136 float x, y, z, w;
137 struct vertex_header *out =
138 draw_header_from_block(vOut, vertex_size, i + j);
139
140 /* Handle attr[0] (position) specially:
141 *
142 * XXX: Computing the clipmask should be done in the vertex
143 * program as a set of DP4 instructions appended to the
144 * user-provided code.
145 */
146 x = out->clip[0] = outputs[0].xyzw[0].f[j];
147 y = out->clip[1] = outputs[0].xyzw[1].f[j];
148 z = out->clip[2] = outputs[0].xyzw[2].f[j];
149 w = out->clip[3] = outputs[0].xyzw[3].f[j];
150
151 if (!draw->rasterizer->bypass_clipping) {
152 out->clipmask = compute_clipmask(out->clip, draw->plane,
153 draw->nr_planes);
154 clipped += out->clipmask;
155
156 /* divide by w */
157 w = 1.0f / w;
158 x *= w;
159 y *= w;
160 z *= w;
161 }
162 else {
163 out->clipmask = 0;
164 }
165 out->edgeflag = 1;
166 out->vertex_id = UNDEFINED_VERTEX_ID;
167
168 if (!draw->identity_viewport) {
169 /* Viewport mapping */
170 out->data[0][0] = x * scale[0] + trans[0];
171 out->data[0][1] = y * scale[1] + trans[1];
172 out->data[0][2] = z * scale[2] + trans[2];
173 out->data[0][3] = w;
174 }
175 else
176 {
177 out->data[0][0] = x;
178 out->data[0][1] = y;
179 out->data[0][2] = z;
180 out->data[0][3] = w;
181 }
182
183 /* Remaining attributes are packed into sequential post-transform
184 * vertex attrib slots.
185 */
186 for (slot = 1; slot < draw->num_vs_outputs; slot++) {
187 out->data[slot][0] = outputs[slot].xyzw[0].f[j];
188 out->data[slot][1] = outputs[slot].xyzw[1].f[j];
189 out->data[slot][2] = outputs[slot].xyzw[2].f[j];
190 out->data[slot][3] = outputs[slot].xyzw[3].f[j];
191 }
192
193 #if 0 /*DEBUG*/
194 printf("%d) Post xform vert:\n", i + j);
195 for (slot = 0; slot < draw->num_vs_outputs; slot++) {
196 printf("\t%d: %f %f %f %f\n", slot,
197 out->data[slot][0],
198 out->data[slot][1],
199 out->data[slot][2],
200 out->data[slot][3]);
201 }
202 #endif
203 } /* loop over vertices */
204 }
205 return clipped != 0;
206 }
207
208
209
210 /* Simplified vertex shader interface for the pt paths. Given the
211 * complexity of code-generating all the above operations together,
212 * it's time to try doing all the other stuff separately.
213 */
214 static void
215 vs_exec_run_linear( struct draw_vertex_shader *shader,
216 const float (*input)[4],
217 float (*output)[4],
218 const float (*constants)[4],
219 unsigned count,
220 unsigned input_stride,
221 unsigned output_stride )
222 {
223 struct exec_vertex_shader *evs = exec_vertex_shader(shader);
224 struct tgsi_exec_machine *machine = evs->machine;
225 unsigned int i, j;
226 unsigned slot;
227
228 machine->Consts = constants;
229
230 for (i = 0; i < count; i += MAX_TGSI_VERTICES) {
231 unsigned int max_vertices = MIN2(MAX_TGSI_VERTICES, count - i);
232
233 /* Swizzle inputs.
234 */
235 for (j = 0; j < max_vertices; j++) {
236 #if 0
237 debug_printf("%d) Input vert:\n", i + j);
238 for (slot = 0; slot < shader->info.num_inputs; slot++) {
239 debug_printf("\t%d: %f %f %f %f\n", slot,
240 input[slot][0],
241 input[slot][1],
242 input[slot][2],
243 input[slot][3]);
244 }
245 #endif
246
247 for (slot = 0; slot < shader->info.num_inputs; slot++) {
248 machine->Inputs[slot].xyzw[0].f[j] = input[slot][0];
249 machine->Inputs[slot].xyzw[1].f[j] = input[slot][1];
250 machine->Inputs[slot].xyzw[2].f[j] = input[slot][2];
251 machine->Inputs[slot].xyzw[3].f[j] = input[slot][3];
252 }
253
254 input = (const float (*)[4])((const char *)input + input_stride);
255 }
256
257 /* run interpreter */
258 tgsi_exec_machine_run( machine );
259
260 /* Unswizzle all output results.
261 */
262 for (j = 0; j < max_vertices; j++) {
263 for (slot = 0; slot < shader->info.num_outputs; slot++) {
264 output[slot][0] = machine->Outputs[slot].xyzw[0].f[j];
265 output[slot][1] = machine->Outputs[slot].xyzw[1].f[j];
266 output[slot][2] = machine->Outputs[slot].xyzw[2].f[j];
267 output[slot][3] = machine->Outputs[slot].xyzw[3].f[j];
268
269 }
270
271 #if 0
272 debug_printf("%d) Post xform vert:\n", i + j);
273 for (slot = 0; slot < shader->info.num_outputs; slot++) {
274 debug_printf("\t%d: %f %f %f %f\n", slot,
275 output[slot][0],
276 output[slot][1],
277 output[slot][2],
278 output[slot][3]);
279 }
280 #endif
281
282 output = (float (*)[4])((char *)output + output_stride);
283 }
284
285 }
286 }
287
288
289
290
291 static void
292 vs_exec_delete( struct draw_vertex_shader *dvs )
293 {
294 FREE((void*) dvs->state.tokens);
295 FREE( dvs );
296 }
297
298
299 struct draw_vertex_shader *
300 draw_create_vs_exec(struct draw_context *draw,
301 const struct pipe_shader_state *state)
302 {
303 struct exec_vertex_shader *vs = CALLOC_STRUCT( exec_vertex_shader );
304 uint nt = tgsi_num_tokens(state->tokens);
305
306 if (vs == NULL)
307 return NULL;
308
309 /* we make a private copy of the tokens */
310 vs->base.state.tokens = mem_dup(state->tokens, nt * sizeof(state->tokens[0]));
311 tgsi_scan_shader(state->tokens, &vs->base.info);
312
313
314 vs->base.prepare = vs_exec_prepare;
315 vs->base.run = vs_exec_run;
316 vs->base.run_linear = vs_exec_run_linear;
317 vs->base.delete = vs_exec_delete;
318 vs->machine = &draw->machine;
319
320
321 return &vs->base;
322 }