5ca659dbf590341bddc599eff289ad532d3b1732
[mesa.git] / src / mesa / pipe / draw / draw_vertex_shader.c
1 /**************************************************************************
2 *
3 * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas.
4 * All Rights Reserved.
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a
7 * copy of this software and associated documentation files (the
8 * "Software"), to deal in the Software without restriction, including
9 * without limitation the rights to use, copy, modify, merge, publish,
10 * distribute, sub license, and/or sell copies of the Software, and to
11 * permit persons to whom the Software is furnished to do so, subject to
12 * the following conditions:
13 *
14 * The above copyright notice and this permission notice (including the
15 * next paragraph) shall be included in all copies or substantial portions
16 * of the Software.
17 *
18 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
19 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
20 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
21 * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
22 * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
23 * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
24 * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
25 *
26 **************************************************************************/
27
28 /*
29 * Authors:
30 * Keith Whitwell <keith@tungstengraphics.com>
31 * Brian Paul
32 */
33
34 #include "pipe/p_util.h"
35 #include "pipe/p_shader_tokens.h"
36 #if defined(__i386__) || defined(__386__)
37 #include "pipe/tgsi/exec/tgsi_sse2.h"
38 #endif
39 #include "draw_private.h"
40 #include "draw_context.h"
41
42 #include "x86/rtasm/x86sse.h"
43 #include "pipe/llvm/gallivm.h"
44
45
46 #define DBG_VS 0
47
48
49 static INLINE unsigned
50 compute_clipmask(const float *clip, /*const*/ float plane[][4], unsigned nr)
51 {
52 unsigned mask = 0;
53 unsigned i;
54
55 /* Do the hardwired planes first:
56 */
57 if (-clip[0] + clip[3] < 0) mask |= CLIP_RIGHT_BIT;
58 if ( clip[0] + clip[3] < 0) mask |= CLIP_LEFT_BIT;
59 if (-clip[1] + clip[3] < 0) mask |= CLIP_TOP_BIT;
60 if ( clip[1] + clip[3] < 0) mask |= CLIP_BOTTOM_BIT;
61 if (-clip[2] + clip[3] < 0) mask |= CLIP_FAR_BIT;
62 if ( clip[2] + clip[3] < 0) mask |= CLIP_NEAR_BIT;
63
64 /* Followed by any remaining ones:
65 */
66 for (i = 6; i < nr; i++) {
67 if (dot4(clip, plane[i]) < 0)
68 mask |= (1<<i);
69 }
70
71 return mask;
72 }
73
74
75 typedef void (XSTDCALL *codegen_function) (
76 const struct tgsi_exec_vector *input,
77 struct tgsi_exec_vector *output,
78 float (*constant)[4],
79 struct tgsi_exec_vector *temporary );
80
81
82 /**
83 * Transform vertices with the current vertex program/shader
84 * Up to four vertices can be shaded at a time.
85 * \param vbuffer the input vertex data
86 * \param elts indexes of four input vertices
87 * \param count number of vertices to shade [1..4]
88 * \param vOut array of pointers to four output vertices
89 */
90 static void
91 run_vertex_program(struct draw_context *draw,
92 unsigned elts[4], unsigned count,
93 struct vertex_header *vOut[])
94 {
95 struct tgsi_exec_machine *machine = &draw->machine;
96 unsigned int j;
97
98 ALIGN16_DECL(struct tgsi_exec_vector, inputs, PIPE_ATTRIB_MAX);
99 ALIGN16_DECL(struct tgsi_exec_vector, outputs, PIPE_ATTRIB_MAX);
100 const float *scale = draw->viewport.scale;
101 const float *trans = draw->viewport.translate;
102
103 assert(count <= 4);
104 assert(draw->vertex_shader->state->output_semantic_name[0]
105 == TGSI_SEMANTIC_POSITION);
106
107 /* Consts does not require 16 byte alignment. */
108 machine->Consts = (float (*)[4]) draw->user.constants;
109
110 machine->Inputs = ALIGN16_ASSIGN(inputs);
111 machine->Outputs = ALIGN16_ASSIGN(outputs);
112
113 draw_vertex_fetch( draw, machine, elts, count );
114
115 /* run shader */
116 #if defined(__i386__) || defined(__386__)
117 if (draw->use_sse) {
118 /* SSE */
119 /* cast away const */
120 struct draw_vertex_shader *shader
121 = (struct draw_vertex_shader *)draw->vertex_shader;
122 codegen_function func
123 = (codegen_function) x86_get_func( &shader->sse2_program );
124 func(
125 machine->Inputs,
126 machine->Outputs,
127 machine->Consts,
128 machine->Temps );
129 }
130 else
131 #endif
132 {
133 /* interpreter */
134 tgsi_exec_machine_run( machine );
135 }
136
137 /* store machine results */
138 for (j = 0; j < count; j++) {
139 unsigned slot;
140 float x, y, z, w;
141
142 /* Handle attr[0] (position) specially:
143 *
144 * XXX: Computing the clipmask should be done in the vertex
145 * program as a set of DP4 instructions appended to the
146 * user-provided code.
147 */
148 x = vOut[j]->clip[0] = machine->Outputs[0].xyzw[0].f[j];
149 y = vOut[j]->clip[1] = machine->Outputs[0].xyzw[1].f[j];
150 z = vOut[j]->clip[2] = machine->Outputs[0].xyzw[2].f[j];
151 w = vOut[j]->clip[3] = machine->Outputs[0].xyzw[3].f[j];
152
153 vOut[j]->clipmask = compute_clipmask(vOut[j]->clip, draw->plane, draw->nr_planes);
154 vOut[j]->edgeflag = 1;
155
156 /* divide by w */
157 w = 1.0f / w;
158 x *= w;
159 y *= w;
160 z *= w;
161
162 /* Viewport mapping */
163 vOut[j]->data[0][0] = x * scale[0] + trans[0];
164 vOut[j]->data[0][1] = y * scale[1] + trans[1];
165 vOut[j]->data[0][2] = z * scale[2] + trans[2];
166 vOut[j]->data[0][3] = w;
167
168 #if DBG_VS
169 printf("output[%d]win: %f %f %f %f\n", j,
170 vOut[j]->data[0][0],
171 vOut[j]->data[0][1],
172 vOut[j]->data[0][2],
173 vOut[j]->data[0][3]);
174 #endif
175 /* Remaining attributes are packed into sequential post-transform
176 * vertex attrib slots.
177 */
178 for (slot = 1; slot < draw->num_vs_outputs; slot++) {
179 vOut[j]->data[slot][0] = machine->Outputs[slot].xyzw[0].f[j];
180 vOut[j]->data[slot][1] = machine->Outputs[slot].xyzw[1].f[j];
181 vOut[j]->data[slot][2] = machine->Outputs[slot].xyzw[2].f[j];
182 vOut[j]->data[slot][3] = machine->Outputs[slot].xyzw[3].f[j];
183 #if DBG_VS
184 printf("output[%d][%d]: %f %f %f %f\n", j, slot,
185 vOut[j]->data[slot][0],
186 vOut[j]->data[slot][1],
187 vOut[j]->data[slot][2],
188 vOut[j]->data[slot][3]);
189 #endif
190 }
191 } /* loop over vertices */
192 }
193
194
195 /**
196 * Run the vertex shader on all vertices in the vertex queue.
197 * Called by the draw module when the vertx cache needs to be flushed.
198 */
199 void
200 draw_vertex_shader_queue_flush(struct draw_context *draw)
201 {
202 unsigned i, j;
203
204 // fprintf(stderr, " q(%d) ", draw->vs.queue_nr );
205 #ifdef MESA_LLVM
206 if (draw->vertex_shader->llvm_prog) {
207 draw_vertex_shader_queue_flush_llvm(draw);
208 return;
209 }
210 #endif
211
212 /* run vertex shader on vertex cache entries, four per invokation */
213 for (i = 0; i < draw->vs.queue_nr; i += 4) {
214 struct vertex_header *dests[4];
215 unsigned elts[4];
216 int n;
217
218 for (j = 0; j < 4; j++) {
219 elts[j] = draw->vs.queue[i + j].elt;
220 dests[j] = draw->vs.queue[i + j].dest;
221 }
222
223 n = MIN2(4, draw->vs.queue_nr - i);
224 assert(n > 0);
225 assert(n <= 4);
226
227 run_vertex_program(draw, elts, n, dests);
228 }
229
230 draw->vs.queue_nr = 0;
231 }
232
233
234 struct draw_vertex_shader *
235 draw_create_vertex_shader(struct draw_context *draw,
236 const struct pipe_shader_state *shader)
237 {
238 struct draw_vertex_shader *vs;
239
240 vs = CALLOC_STRUCT( draw_vertex_shader );
241 if (vs == NULL) {
242 return NULL;
243 }
244
245 vs->state = shader;
246
247 #ifdef MESA_LLVM
248 vs->llvm_prog = gallivm_from_tgsi(shader->tokens, GALLIVM_VS);
249 draw->engine = gallivm_global_cpu_engine();
250 if (!draw->engine) {
251 draw->engine = gallivm_cpu_engine_create(vs->llvm_prog);
252 }
253 else {
254 gallivm_cpu_jit_compile(draw->engine, vs->llvm_prog);
255 }
256 #elif defined(__i386__) || defined(__386__)
257 if (draw->use_sse) {
258 /* cast-away const */
259 struct pipe_shader_state *sh = (struct pipe_shader_state *) shader;
260
261 x86_init_func( &vs->sse2_program );
262 tgsi_emit_sse2( (struct tgsi_token *) sh->tokens, &vs->sse2_program );
263 }
264 #endif
265
266 return vs;
267 }
268
269
270 void
271 draw_bind_vertex_shader(struct draw_context *draw,
272 struct draw_vertex_shader *dvs)
273 {
274 draw_flush(draw);
275 draw->vertex_shader = dvs;
276
277 draw->num_vs_outputs = dvs->state->num_outputs;
278
279 /* specify the fragment program to interpret/execute */
280 tgsi_exec_machine_init(&draw->machine,
281 draw->vertex_shader->state->tokens,
282 PIPE_MAX_SAMPLERS,
283 NULL /*samplers*/ );
284 }
285
286
287 void
288 draw_delete_vertex_shader(struct draw_context *draw,
289 struct draw_vertex_shader *dvs)
290 {
291 #if defined(__i386__) || defined(__386__)
292 x86_release_func( (struct x86_function *) &dvs->sse2_program );
293 #endif
294
295 FREE( dvs );
296 }