gallium: Fix build on WinXP.
[mesa.git] / src / mesa / pipe / draw / draw_vertex_shader.c
1 /**************************************************************************
2 *
3 * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas.
4 * All Rights Reserved.
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a
7 * copy of this software and associated documentation files (the
8 * "Software"), to deal in the Software without restriction, including
9 * without limitation the rights to use, copy, modify, merge, publish,
10 * distribute, sub license, and/or sell copies of the Software, and to
11 * permit persons to whom the Software is furnished to do so, subject to
12 * the following conditions:
13 *
14 * The above copyright notice and this permission notice (including the
15 * next paragraph) shall be included in all copies or substantial portions
16 * of the Software.
17 *
18 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
19 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
20 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
21 * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
22 * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
23 * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
24 * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
25 *
26 **************************************************************************/
27
28 /*
29 * Authors:
30 * Keith Whitwell <keith@tungstengraphics.com>
31 * Brian Paul
32 */
33
34 #include "pipe/p_util.h"
35 #include "pipe/p_shader_tokens.h"
36 #if defined(__i386__) || defined(__386__)
37 #include "pipe/tgsi/exec/tgsi_sse2.h"
38 #endif
39 #include "draw_private.h"
40 #include "draw_context.h"
41
42 #include "x86/rtasm/x86sse.h"
43 #include "pipe/llvm/gallivm.h"
44
45
46 #define DBG_VS 0
47
48
49 static INLINE unsigned
50 compute_clipmask(const float *clip, /*const*/ float plane[][4], unsigned nr)
51 {
52 unsigned mask = 0;
53 unsigned i;
54
55 /* Do the hardwired planes first:
56 */
57 if (-clip[0] + clip[3] < 0) mask |= CLIP_RIGHT_BIT;
58 if ( clip[0] + clip[3] < 0) mask |= CLIP_LEFT_BIT;
59 if (-clip[1] + clip[3] < 0) mask |= CLIP_TOP_BIT;
60 if ( clip[1] + clip[3] < 0) mask |= CLIP_BOTTOM_BIT;
61 if (-clip[2] + clip[3] < 0) mask |= CLIP_FAR_BIT;
62 if ( clip[2] + clip[3] < 0) mask |= CLIP_NEAR_BIT;
63
64 /* Followed by any remaining ones:
65 */
66 for (i = 6; i < nr; i++) {
67 if (dot4(clip, plane[i]) < 0)
68 mask |= (1<<i);
69 }
70
71 return mask;
72 }
73
74
75 typedef void (XSTDCALL *codegen_function) (
76 const struct tgsi_exec_vector *input,
77 struct tgsi_exec_vector *output,
78 float (*constant)[4],
79 struct tgsi_exec_vector *temporary );
80
81
82 /**
83 * Transform vertices with the current vertex program/shader
84 * Up to four vertices can be shaded at a time.
85 * \param vbuffer the input vertex data
86 * \param elts indexes of four input vertices
87 * \param count number of vertices to shade [1..4]
88 * \param vOut array of pointers to four output vertices
89 */
90 static void
91 run_vertex_program(struct draw_context *draw,
92 unsigned elts[4], unsigned count,
93 struct vertex_header *vOut[])
94 {
95 struct tgsi_exec_machine *machine = &draw->machine;
96 unsigned int j;
97
98 ALIGN16_DECL(struct tgsi_exec_vector, inputs, PIPE_ATTRIB_MAX);
99 ALIGN16_DECL(struct tgsi_exec_vector, outputs, PIPE_ATTRIB_MAX);
100 const float *scale = draw->viewport.scale;
101 const float *trans = draw->viewport.translate;
102
103 assert(count <= 4);
104 assert(draw->vertex_shader->state->output_semantic_name[0]
105 == TGSI_SEMANTIC_POSITION);
106
107 /* Consts does not require 16 byte alignment. */
108 machine->Consts = (float (*)[4]) draw->user.constants;
109
110 machine->Inputs = ALIGN16_ASSIGN(inputs);
111 machine->Outputs = ALIGN16_ASSIGN(outputs);
112
113 draw->vertex_fetch.fetch_func( draw, machine, elts, count );
114
115 /* run shader */
116 #if defined(__i386__) || defined(__386__)
117 if (draw->use_sse) {
118 /* SSE */
119 /* cast away const */
120 struct draw_vertex_shader *shader
121 = (struct draw_vertex_shader *)draw->vertex_shader;
122 codegen_function func
123 = (codegen_function) x86_get_func( &shader->sse2_program );
124 func(
125 machine->Inputs,
126 machine->Outputs,
127 machine->Consts,
128 machine->Temps );
129 }
130 else
131 #endif
132 {
133 /* interpreter */
134 tgsi_exec_machine_run( machine );
135 }
136
137 /* store machine results */
138 for (j = 0; j < count; j++) {
139 unsigned slot;
140 float x, y, z, w;
141
142 /* Handle attr[0] (position) specially:
143 *
144 * XXX: Computing the clipmask should be done in the vertex
145 * program as a set of DP4 instructions appended to the
146 * user-provided code.
147 */
148 x = vOut[j]->clip[0] = machine->Outputs[0].xyzw[0].f[j];
149 y = vOut[j]->clip[1] = machine->Outputs[0].xyzw[1].f[j];
150 z = vOut[j]->clip[2] = machine->Outputs[0].xyzw[2].f[j];
151 w = vOut[j]->clip[3] = machine->Outputs[0].xyzw[3].f[j];
152
153 vOut[j]->clipmask = compute_clipmask(vOut[j]->clip, draw->plane, draw->nr_planes);
154 vOut[j]->edgeflag = 1;
155
156 /* divide by w */
157 w = 1.0f / w;
158 x *= w;
159 y *= w;
160 z *= w;
161
162 /* Viewport mapping */
163 vOut[j]->data[0][0] = x * scale[0] + trans[0];
164 vOut[j]->data[0][1] = y * scale[1] + trans[1];
165 vOut[j]->data[0][2] = z * scale[2] + trans[2];
166 vOut[j]->data[0][3] = w;
167
168 #if DBG_VS
169 printf("output[%d]win: %f %f %f %f\n", j,
170 vOut[j]->data[0][0],
171 vOut[j]->data[0][1],
172 vOut[j]->data[0][2],
173 vOut[j]->data[0][3]);
174 #endif
175 /* Remaining attributes are packed into sequential post-transform
176 * vertex attrib slots.
177 */
178 for (slot = 1; slot < draw->num_vs_outputs; slot++) {
179 vOut[j]->data[slot][0] = machine->Outputs[slot].xyzw[0].f[j];
180 vOut[j]->data[slot][1] = machine->Outputs[slot].xyzw[1].f[j];
181 vOut[j]->data[slot][2] = machine->Outputs[slot].xyzw[2].f[j];
182 vOut[j]->data[slot][3] = machine->Outputs[slot].xyzw[3].f[j];
183 #if DBG_VS
184 printf("output[%d][%d]: %f %f %f %f\n", j, slot,
185 vOut[j]->data[slot][0],
186 vOut[j]->data[slot][1],
187 vOut[j]->data[slot][2],
188 vOut[j]->data[slot][3]);
189 #endif
190 }
191 } /* loop over vertices */
192 }
193
194
195 /**
196 * Run the vertex shader on all vertices in the vertex queue.
197 * Called by the draw module when the vertx cache needs to be flushed.
198 */
199 void
200 draw_vertex_shader_queue_flush(struct draw_context *draw)
201 {
202 unsigned i;
203
204 assert(draw->vs.queue_nr != 0);
205
206 /* XXX: do this on statechange:
207 */
208 draw_update_vertex_fetch( draw );
209
210 // fprintf(stderr, " q(%d) ", draw->vs.queue_nr );
211 #ifdef MESA_LLVM
212 if (draw->vertex_shader->llvm_prog) {
213 draw_vertex_shader_queue_flush_llvm(draw);
214 return;
215 }
216 #endif
217
218 /* run vertex shader on vertex cache entries, four per invokation */
219 for (i = 0; i < draw->vs.queue_nr; i += 4) {
220 struct vertex_header *dests[4];
221 unsigned elts[4];
222 int j, n = MIN2(4, draw->vs.queue_nr - i);
223
224 for (j = 0; j < n; j++) {
225 elts[j] = draw->vs.queue[i + j].elt;
226 dests[j] = draw->vs.queue[i + j].dest;
227 }
228
229 for ( ; j < 4; j++) {
230 elts[j] = elts[0];
231 dests[j] = dests[0];
232 }
233
234 assert(n > 0);
235 assert(n <= 4);
236
237 run_vertex_program(draw, elts, n, dests);
238 }
239
240 draw->vs.queue_nr = 0;
241 }
242
243
244 struct draw_vertex_shader *
245 draw_create_vertex_shader(struct draw_context *draw,
246 const struct pipe_shader_state *shader)
247 {
248 struct draw_vertex_shader *vs;
249
250 vs = CALLOC_STRUCT( draw_vertex_shader );
251 if (vs == NULL) {
252 return NULL;
253 }
254
255 vs->state = shader;
256
257 #ifdef MESA_LLVM
258 vs->llvm_prog = gallivm_from_tgsi(shader->tokens, GALLIVM_VS);
259 draw->engine = gallivm_global_cpu_engine();
260 if (!draw->engine) {
261 draw->engine = gallivm_cpu_engine_create(vs->llvm_prog);
262 }
263 else {
264 gallivm_cpu_jit_compile(draw->engine, vs->llvm_prog);
265 }
266 #elif defined(__i386__) || defined(__386__)
267 if (draw->use_sse) {
268 /* cast-away const */
269 struct pipe_shader_state *sh = (struct pipe_shader_state *) shader;
270
271 x86_init_func( &vs->sse2_program );
272 tgsi_emit_sse2( (struct tgsi_token *) sh->tokens, &vs->sse2_program );
273 }
274 #endif
275
276 return vs;
277 }
278
279
280 void
281 draw_bind_vertex_shader(struct draw_context *draw,
282 struct draw_vertex_shader *dvs)
283 {
284 draw_do_flush( draw, DRAW_FLUSH_STATE_CHANGE );
285
286 draw->vertex_shader = dvs;
287 draw->num_vs_outputs = dvs->state->num_outputs;
288
289 /* specify the fragment program to interpret/execute */
290 tgsi_exec_machine_init(&draw->machine,
291 draw->vertex_shader->state->tokens,
292 PIPE_MAX_SAMPLERS,
293 NULL /*samplers*/ );
294 }
295
296
297 void
298 draw_delete_vertex_shader(struct draw_context *draw,
299 struct draw_vertex_shader *dvs)
300 {
301 #if defined(__i386__) || defined(__386__)
302 x86_release_func( (struct x86_function *) &dvs->sse2_program );
303 #endif
304
305 FREE( dvs );
306 }