Another typo
[mesa.git] / src / mesa / tnl / t_vb_arbprogram.c
1 /*
2 * Mesa 3-D graphics library
3 * Version: 6.3
4 *
5 * Copyright (C) 1999-2004 Brian Paul All Rights Reserved.
6 *
7 * Permission is hereby granted, free of charge, to any person obtaining a
8 * copy of this software and associated documentation files (the "Software"),
9 * to deal in the Software without restriction, including without limitation
10 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
11 * and/or sell copies of the Software, and to permit persons to whom the
12 * Software is furnished to do so, subject to the following conditions:
13 *
14 * The above copyright notice and this permission notice shall be included
15 * in all copies or substantial portions of the Software.
16 *
17 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
18 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
19 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
20 * BRIAN PAUL BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN
21 * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
22 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
23 */
24
25 /**
26 * \file t_arb_program.c
27 * Compile vertex programs to an intermediate representation.
28 * Execute vertex programs over a buffer of vertices.
29 * \author Keith Whitwell, Brian Paul
30 */
31
32 #include "glheader.h"
33 #include "context.h"
34 #include "imports.h"
35 #include "macros.h"
36 #include "mtypes.h"
37 #include "arbprogparse.h"
38 #include "program.h"
39 #include "math/m_matrix.h"
40 #include "math/m_translate.h"
41 #include "t_context.h"
42 #include "t_pipeline.h"
43 #include "t_vp_build.h"
44 #include "t_vb_arbprogram.h"
45
46 #define DISASSEM 0
47
48 /*--------------------------------------------------------------------------- */
49
50 struct opcode_info {
51 GLuint nr_args;
52 const char *string;
53 void (*print)( union instruction , const struct opcode_info * );
54 };
55
56 struct compilation {
57 GLuint reg_active;
58 union instruction *csr;
59 };
60
61
62 #define ARB_VP_MACHINE(stage) ((struct arb_vp_machine *)(stage->privatePtr))
63
64
65
66 /**
67 * Set x to positive or negative infinity.
68 *
69 * XXX: FIXME - type punning.
70 */
71 #if defined(USE_IEEE) || defined(_WIN32)
72 #define SET_POS_INFINITY(x) ( *((GLuint *) (void *)&x) = 0x7F800000 )
73 #define SET_NEG_INFINITY(x) ( *((GLuint *) (void *)&x) = 0xFF800000 )
74 #elif defined(VMS)
75 #define SET_POS_INFINITY(x) x = __MAXFLOAT
76 #define SET_NEG_INFINITY(x) x = -__MAXFLOAT
77 #define IS_INF_OR_NAN(t) ((t) == __MAXFLOAT)
78 #else
79 #define SET_POS_INFINITY(x) x = (GLfloat) HUGE_VAL
80 #define SET_NEG_INFINITY(x) x = (GLfloat) -HUGE_VAL
81 #endif
82
83 #define FREXPF(a,b) frexpf(a,b)
84
85 #define PUFF(x) ((x)[1] = (x)[2] = (x)[3] = (x)[0])
86
87 /* FIXME: more type punning (despite use of fi_type...)
88 */
89 #define SET_FLOAT_BITS(x, bits) ((fi_type *) (void *) &(x))->i = bits
90
91
92 static GLfloat RoughApproxLog2(GLfloat t)
93 {
94 return LOG2(t);
95 }
96
97 static GLfloat RoughApproxPow2(GLfloat t)
98 {
99 #if 0
100 /* This isn't nearly accurate enough - it discards all of t's
101 * fractional bits!
102 */
103 fi_type fi;
104 fi.i = (GLint) t;
105 fi.i = (fi.i << 23) + 0x3f800000;
106 return fi.f;
107 #else
108 return (GLfloat) _mesa_pow(2.0, t);
109 #endif
110 }
111
112 static GLfloat RoughApproxPower(GLfloat x, GLfloat y)
113 {
114 #if 0
115 return RoughApproxPow2(y * RoughApproxLog2(x));
116 #else
117 return (GLfloat) _mesa_pow(x, y);
118 #endif
119 }
120
121
122
123
124
125 /**
126 * Perform a reduced swizzle:
127 */
128 static void do_RSW( struct arb_vp_machine *m, union instruction op )
129 {
130 GLfloat *result = m->File[0][op.rsw.dst];
131 const GLfloat *arg0 = m->File[op.rsw.file0][op.rsw.idx0];
132 GLuint swz = op.rsw.swz;
133 GLuint neg = op.rsw.neg;
134
135 result[0] = arg0[GET_RSW(swz, 0)];
136 result[1] = arg0[GET_RSW(swz, 1)];
137 result[2] = arg0[GET_RSW(swz, 2)];
138 result[3] = arg0[GET_RSW(swz, 3)];
139
140 if (neg) {
141 if (neg & 0x1) result[0] = -result[0];
142 if (neg & 0x2) result[1] = -result[1];
143 if (neg & 0x4) result[2] = -result[2];
144 if (neg & 0x8) result[3] = -result[3];
145 }
146 }
147
148 /* Used to implement write masking. To make things easier for the sse
149 * generator I've gone back to a 1 argument version of this function
150 * (dst.msk = arg), rather than the semantically cleaner (dst = SEL
151 * arg0, arg1, msk)
152 *
153 * That means this is the only instruction which doesn't write a full
154 * 4 dwords out. This would make such a program harder to analyse,
155 * but it looks like analysis is going to take place on a higher level
156 * anyway.
157 */
158 static void do_MSK( struct arb_vp_machine *m, union instruction op )
159 {
160 GLfloat *dst = m->File[0][op.msk.dst];
161 const GLfloat *arg = m->File[op.msk.file][op.msk.idx];
162
163 if (op.msk.mask & 0x1) dst[0] = arg[0];
164 if (op.msk.mask & 0x2) dst[1] = arg[1];
165 if (op.msk.mask & 0x4) dst[2] = arg[2];
166 if (op.msk.mask & 0x8) dst[3] = arg[3];
167 }
168
169
170 static void do_PRT( struct arb_vp_machine *m, union instruction op )
171 {
172 const GLfloat *arg0 = m->File[op.alu.file0][op.alu.idx0];
173
174 _mesa_printf("%d: %f %f %f %f\n", m->vtx_nr,
175 arg0[0], arg0[1], arg0[2], arg0[3]);
176 }
177
178
179 /**
180 * The traditional ALU and texturing instructions. All operate on
181 * internal registers and ignore write masks and swizzling issues.
182 */
183
184 static void do_ABS( struct arb_vp_machine *m, union instruction op )
185 {
186 GLfloat *result = m->File[0][op.alu.dst];
187 const GLfloat *arg0 = m->File[op.alu.file0][op.alu.idx0];
188
189 result[0] = (arg0[0] < 0.0) ? -arg0[0] : arg0[0];
190 result[1] = (arg0[1] < 0.0) ? -arg0[1] : arg0[1];
191 result[2] = (arg0[2] < 0.0) ? -arg0[2] : arg0[2];
192 result[3] = (arg0[3] < 0.0) ? -arg0[3] : arg0[3];
193 }
194
195 static void do_ADD( struct arb_vp_machine *m, union instruction op )
196 {
197 GLfloat *result = m->File[0][op.alu.dst];
198 const GLfloat *arg0 = m->File[op.alu.file0][op.alu.idx0];
199 const GLfloat *arg1 = m->File[op.alu.file1][op.alu.idx1];
200
201 result[0] = arg0[0] + arg1[0];
202 result[1] = arg0[1] + arg1[1];
203 result[2] = arg0[2] + arg1[2];
204 result[3] = arg0[3] + arg1[3];
205 }
206
207
208 static void do_DP3( struct arb_vp_machine *m, union instruction op )
209 {
210 GLfloat *result = m->File[0][op.alu.dst];
211 const GLfloat *arg0 = m->File[op.alu.file0][op.alu.idx0];
212 const GLfloat *arg1 = m->File[op.alu.file1][op.alu.idx1];
213
214 result[0] = (arg0[0] * arg1[0] +
215 arg0[1] * arg1[1] +
216 arg0[2] * arg1[2]);
217
218 PUFF(result);
219 }
220
221
222
223 static void do_DP4( struct arb_vp_machine *m, union instruction op )
224 {
225 GLfloat *result = m->File[0][op.alu.dst];
226 const GLfloat *arg0 = m->File[op.alu.file0][op.alu.idx0];
227 const GLfloat *arg1 = m->File[op.alu.file1][op.alu.idx1];
228
229 result[0] = (arg0[0] * arg1[0] +
230 arg0[1] * arg1[1] +
231 arg0[2] * arg1[2] +
232 arg0[3] * arg1[3]);
233
234 PUFF(result);
235 }
236
237 static void do_DPH( struct arb_vp_machine *m, union instruction op )
238 {
239 GLfloat *result = m->File[0][op.alu.dst];
240 const GLfloat *arg0 = m->File[op.alu.file0][op.alu.idx0];
241 const GLfloat *arg1 = m->File[op.alu.file1][op.alu.idx1];
242
243 result[0] = (arg0[0] * arg1[0] +
244 arg0[1] * arg1[1] +
245 arg0[2] * arg1[2] +
246 1.0 * arg1[3]);
247
248 PUFF(result);
249 }
250
251 static void do_DST( struct arb_vp_machine *m, union instruction op )
252 {
253 GLfloat *result = m->File[0][op.alu.dst];
254 const GLfloat *arg0 = m->File[op.alu.file0][op.alu.idx0];
255 const GLfloat *arg1 = m->File[op.alu.file1][op.alu.idx1];
256
257 result[0] = 1.0F;
258 result[1] = arg0[1] * arg1[1];
259 result[2] = arg0[2];
260 result[3] = arg1[3];
261 }
262
263
264 static void do_EX2( struct arb_vp_machine *m, union instruction op )
265 {
266 GLfloat *result = m->File[0][op.alu.dst];
267 const GLfloat *arg0 = m->File[op.alu.file0][op.alu.idx0];
268
269 result[0] = (GLfloat)RoughApproxPow2(arg0[0]);
270 PUFF(result);
271 }
272
273 static void do_EXP( struct arb_vp_machine *m, union instruction op )
274 {
275 GLfloat *result = m->File[0][op.alu.dst];
276 const GLfloat *arg0 = m->File[op.alu.file0][op.alu.idx0];
277 GLfloat tmp = arg0[0];
278 GLfloat flr_tmp = FLOORF(tmp);
279
280 /* KW: nvvertexec has an optimized version of this which is pretty
281 * hard to understand/validate, but avoids the RoughApproxPow2.
282 */
283 result[0] = (GLfloat) (1 << (int)flr_tmp);
284 result[1] = tmp - flr_tmp;
285 result[2] = RoughApproxPow2(tmp);
286 result[3] = 1.0F;
287 }
288
289 static void do_FLR( struct arb_vp_machine *m, union instruction op )
290 {
291 GLfloat *result = m->File[0][op.alu.dst];
292 const GLfloat *arg0 = m->File[op.alu.file0][op.alu.idx0];
293
294 result[0] = FLOORF(arg0[0]);
295 result[1] = FLOORF(arg0[1]);
296 result[2] = FLOORF(arg0[2]);
297 result[3] = FLOORF(arg0[3]);
298 }
299
300 static void do_FRC( struct arb_vp_machine *m, union instruction op )
301 {
302 GLfloat *result = m->File[0][op.alu.dst];
303 const GLfloat *arg0 = m->File[op.alu.file0][op.alu.idx0];
304
305 result[0] = arg0[0] - FLOORF(arg0[0]);
306 result[1] = arg0[1] - FLOORF(arg0[1]);
307 result[2] = arg0[2] - FLOORF(arg0[2]);
308 result[3] = arg0[3] - FLOORF(arg0[3]);
309 }
310
311 static void do_LG2( struct arb_vp_machine *m, union instruction op )
312 {
313 GLfloat *result = m->File[0][op.alu.dst];
314 const GLfloat *arg0 = m->File[op.alu.file0][op.alu.idx0];
315
316 result[0] = RoughApproxLog2(arg0[0]);
317 PUFF(result);
318 }
319
320
321
322 static void do_LIT( struct arb_vp_machine *m, union instruction op )
323 {
324 GLfloat *result = m->File[0][op.alu.dst];
325 const GLfloat *arg0 = m->File[op.alu.file0][op.alu.idx0];
326
327 const GLfloat epsilon = 1.0F / 256.0F; /* per NV spec */
328 GLfloat tmp[4];
329
330 tmp[0] = MAX2(arg0[0], 0.0F);
331 tmp[1] = MAX2(arg0[1], 0.0F);
332 tmp[3] = CLAMP(arg0[3], -(128.0F - epsilon), (128.0F - epsilon));
333
334 result[0] = 1.0;
335 result[1] = tmp[0];
336 result[2] = (tmp[0] > 0.0) ? RoughApproxPower(tmp[1], tmp[3]) : 0.0F;
337 result[3] = 1.0;
338 }
339
340
341 static void do_LOG( struct arb_vp_machine *m, union instruction op )
342 {
343 GLfloat *result = m->File[0][op.alu.dst];
344 const GLfloat *arg0 = m->File[op.alu.file0][op.alu.idx0];
345 GLfloat tmp = FABSF(arg0[0]);
346 int exponent;
347 GLfloat mantissa = FREXPF(tmp, &exponent);
348
349 result[0] = (GLfloat) (exponent - 1);
350 result[1] = 2.0 * mantissa; /* map [.5, 1) -> [1, 2) */
351 result[2] = result[0] + LOG2(result[1]);
352 result[3] = 1.0;
353 }
354
355 static void do_MAX( struct arb_vp_machine *m, union instruction op )
356 {
357 GLfloat *result = m->File[0][op.alu.dst];
358 const GLfloat *arg0 = m->File[op.alu.file0][op.alu.idx0];
359 const GLfloat *arg1 = m->File[op.alu.file1][op.alu.idx1];
360
361 result[0] = (arg0[0] > arg1[0]) ? arg0[0] : arg1[0];
362 result[1] = (arg0[1] > arg1[1]) ? arg0[1] : arg1[1];
363 result[2] = (arg0[2] > arg1[2]) ? arg0[2] : arg1[2];
364 result[3] = (arg0[3] > arg1[3]) ? arg0[3] : arg1[3];
365 }
366
367
368 static void do_MIN( struct arb_vp_machine *m, union instruction op )
369 {
370 GLfloat *result = m->File[0][op.alu.dst];
371 const GLfloat *arg0 = m->File[op.alu.file0][op.alu.idx0];
372 const GLfloat *arg1 = m->File[op.alu.file1][op.alu.idx1];
373
374 result[0] = (arg0[0] < arg1[0]) ? arg0[0] : arg1[0];
375 result[1] = (arg0[1] < arg1[1]) ? arg0[1] : arg1[1];
376 result[2] = (arg0[2] < arg1[2]) ? arg0[2] : arg1[2];
377 result[3] = (arg0[3] < arg1[3]) ? arg0[3] : arg1[3];
378 }
379
380 static void do_MOV( struct arb_vp_machine *m, union instruction op )
381 {
382 GLfloat *result = m->File[0][op.alu.dst];
383 const GLfloat *arg0 = m->File[op.alu.file0][op.alu.idx0];
384
385 result[0] = arg0[0];
386 result[1] = arg0[1];
387 result[2] = arg0[2];
388 result[3] = arg0[3];
389 }
390
391 static void do_MUL( struct arb_vp_machine *m, union instruction op )
392 {
393 GLfloat *result = m->File[0][op.alu.dst];
394 const GLfloat *arg0 = m->File[op.alu.file0][op.alu.idx0];
395 const GLfloat *arg1 = m->File[op.alu.file1][op.alu.idx1];
396
397 result[0] = arg0[0] * arg1[0];
398 result[1] = arg0[1] * arg1[1];
399 result[2] = arg0[2] * arg1[2];
400 result[3] = arg0[3] * arg1[3];
401 }
402
403
404 static void do_POW( struct arb_vp_machine *m, union instruction op )
405 {
406 GLfloat *result = m->File[0][op.alu.dst];
407 const GLfloat *arg0 = m->File[op.alu.file0][op.alu.idx0];
408 const GLfloat *arg1 = m->File[op.alu.file1][op.alu.idx1];
409
410 result[0] = (GLfloat)RoughApproxPower(arg0[0], arg1[0]);
411 PUFF(result);
412 }
413
414 static void do_REL( struct arb_vp_machine *m, union instruction op )
415 {
416 GLfloat *result = m->File[0][op.alu.dst];
417 GLuint idx = (op.alu.idx0 + (GLint)m->File[0][REG_ADDR][0]) & (MAX_NV_VERTEX_PROGRAM_PARAMS-1);
418 const GLfloat *arg0 = m->File[op.alu.file0][idx];
419
420 result[0] = arg0[0];
421 result[1] = arg0[1];
422 result[2] = arg0[2];
423 result[3] = arg0[3];
424 }
425
426 static void do_RCP( struct arb_vp_machine *m, union instruction op )
427 {
428 GLfloat *result = m->File[0][op.alu.dst];
429 const GLfloat *arg0 = m->File[op.alu.file0][op.alu.idx0];
430
431 result[0] = 1.0F / arg0[0];
432 PUFF(result);
433 }
434
435 static void do_RSQ( struct arb_vp_machine *m, union instruction op )
436 {
437 GLfloat *result = m->File[0][op.alu.dst];
438 const GLfloat *arg0 = m->File[op.alu.file0][op.alu.idx0];
439
440 result[0] = INV_SQRTF(FABSF(arg0[0]));
441 PUFF(result);
442 }
443
444
445 static void do_SGE( struct arb_vp_machine *m, union instruction op )
446 {
447 GLfloat *result = m->File[0][op.alu.dst];
448 const GLfloat *arg0 = m->File[op.alu.file0][op.alu.idx0];
449 const GLfloat *arg1 = m->File[op.alu.file1][op.alu.idx1];
450
451 result[0] = (arg0[0] >= arg1[0]) ? 1.0F : 0.0F;
452 result[1] = (arg0[1] >= arg1[1]) ? 1.0F : 0.0F;
453 result[2] = (arg0[2] >= arg1[2]) ? 1.0F : 0.0F;
454 result[3] = (arg0[3] >= arg1[3]) ? 1.0F : 0.0F;
455 }
456
457
458 static void do_SLT( struct arb_vp_machine *m, union instruction op )
459 {
460 GLfloat *result = m->File[0][op.alu.dst];
461 const GLfloat *arg0 = m->File[op.alu.file0][op.alu.idx0];
462 const GLfloat *arg1 = m->File[op.alu.file1][op.alu.idx1];
463
464 result[0] = (arg0[0] < arg1[0]) ? 1.0F : 0.0F;
465 result[1] = (arg0[1] < arg1[1]) ? 1.0F : 0.0F;
466 result[2] = (arg0[2] < arg1[2]) ? 1.0F : 0.0F;
467 result[3] = (arg0[3] < arg1[3]) ? 1.0F : 0.0F;
468 }
469
470 static void do_SUB( struct arb_vp_machine *m, union instruction op )
471 {
472 GLfloat *result = m->File[0][op.alu.dst];
473 const GLfloat *arg0 = m->File[op.alu.file0][op.alu.idx0];
474 const GLfloat *arg1 = m->File[op.alu.file1][op.alu.idx1];
475
476 result[0] = arg0[0] - arg1[0];
477 result[1] = arg0[1] - arg1[1];
478 result[2] = arg0[2] - arg1[2];
479 result[3] = arg0[3] - arg1[3];
480 }
481
482
483 static void do_XPD( struct arb_vp_machine *m, union instruction op )
484 {
485 GLfloat *result = m->File[0][op.alu.dst];
486 const GLfloat *arg0 = m->File[op.alu.file0][op.alu.idx0];
487 const GLfloat *arg1 = m->File[op.alu.file1][op.alu.idx1];
488
489 result[0] = arg0[1] * arg1[2] - arg0[2] * arg1[1];
490 result[1] = arg0[2] * arg1[0] - arg0[0] * arg1[2];
491 result[2] = arg0[0] * arg1[1] - arg0[1] * arg1[0];
492 }
493
494 static void do_NOP( struct arb_vp_machine *m, union instruction op )
495 {
496 }
497
498 /* Some useful debugging functions:
499 */
500 static void print_mask( GLuint mask )
501 {
502 _mesa_printf(".");
503 if (mask&0x1) _mesa_printf("x");
504 if (mask&0x2) _mesa_printf("y");
505 if (mask&0x4) _mesa_printf("z");
506 if (mask&0x8) _mesa_printf("w");
507 }
508
509 static void print_reg( GLuint file, GLuint reg )
510 {
511 static const char *reg_file[] = {
512 "REG",
513 "LOCAL_PARAM",
514 "ENV_PARAM",
515 "STATE_VAR",
516 };
517
518 if (file == 0) {
519 if (reg == REG_RES)
520 _mesa_printf("RES");
521 else if (reg >= REG_ARG0 && reg <= REG_ARG1)
522 _mesa_printf("ARG%d", reg - REG_ARG0);
523 else if (reg >= REG_TMP0 && reg <= REG_TMP11)
524 _mesa_printf("TMP%d", reg - REG_TMP0);
525 else if (reg >= REG_IN0 && reg <= REG_IN31)
526 _mesa_printf("IN%d", reg - REG_IN0);
527 else if (reg >= REG_OUT0 && reg <= REG_OUT14)
528 _mesa_printf("OUT%d", reg - REG_OUT0);
529 else if (reg == REG_ADDR)
530 _mesa_printf("ADDR");
531 else if (reg == REG_ID)
532 _mesa_printf("ID");
533 else
534 _mesa_printf("REG%d", reg);
535 }
536 else
537 _mesa_printf("%s:%d", reg_file[file], reg);
538 }
539
540
541 static void print_RSW( union instruction op, const struct opcode_info *info )
542 {
543 GLuint swz = op.rsw.swz;
544 GLuint neg = op.rsw.neg;
545 GLuint i;
546
547 _mesa_printf("%s ", info->string);
548 print_reg(0, op.rsw.dst);
549 _mesa_printf(", ");
550 print_reg(op.rsw.file0, op.rsw.idx0);
551 _mesa_printf(".");
552 for (i = 0; i < 4; i++, swz >>= 2) {
553 const char *cswz = "xyzw";
554 if (neg & (1<<i))
555 _mesa_printf("-");
556 _mesa_printf("%c", cswz[swz&0x3]);
557 }
558 _mesa_printf("\n");
559 }
560
561
562 static void print_ALU( union instruction op, const struct opcode_info *info )
563 {
564 _mesa_printf("%s ", info->string);
565 print_reg(0, op.alu.dst);
566 _mesa_printf(", ");
567 print_reg(op.alu.file0, op.alu.idx0);
568 if (info->nr_args > 1) {
569 _mesa_printf(", ");
570 print_reg(op.alu.file1, op.alu.idx1);
571 }
572 _mesa_printf("\n");
573 }
574
575 static void print_MSK( union instruction op, const struct opcode_info *info )
576 {
577 _mesa_printf("%s ", info->string);
578 print_reg(0, op.msk.dst);
579 print_mask(op.msk.mask);
580 _mesa_printf(", ");
581 print_reg(op.msk.file, op.msk.idx);
582 _mesa_printf("\n");
583 }
584
585
586 static void print_NOP( union instruction op, const struct opcode_info *info )
587 {
588 }
589
590 #define NOP 0
591 #define ALU 1
592 #define SWZ 2
593
594 static const struct opcode_info opcode_info[] =
595 {
596 { 1, "ABS", print_ALU },
597 { 2, "ADD", print_ALU },
598 { 1, "ARL", print_NOP },
599 { 2, "DP3", print_ALU },
600 { 2, "DP4", print_ALU },
601 { 2, "DPH", print_ALU },
602 { 2, "DST", print_ALU },
603 { 0, "END", print_NOP },
604 { 1, "EX2", print_ALU },
605 { 1, "EXP", print_ALU },
606 { 1, "FLR", print_ALU },
607 { 1, "FRC", print_ALU },
608 { 1, "LG2", print_ALU },
609 { 1, "LIT", print_ALU },
610 { 1, "LOG", print_ALU },
611 { 3, "MAD", print_NOP },
612 { 2, "MAX", print_ALU },
613 { 2, "MIN", print_ALU },
614 { 1, "MOV", print_ALU },
615 { 2, "MUL", print_ALU },
616 { 2, "POW", print_ALU },
617 { 1, "PRT", print_ALU }, /* PRINT */
618 { 1, "RCC", print_NOP },
619 { 1, "RCP", print_ALU },
620 { 1, "RSQ", print_ALU },
621 { 2, "SGE", print_ALU },
622 { 2, "SLT", print_ALU },
623 { 2, "SUB", print_ALU },
624 { 1, "SWZ", print_NOP },
625 { 2, "XPD", print_ALU },
626 { 1, "RSW", print_RSW },
627 { 2, "MSK", print_MSK },
628 { 1, "REL", print_ALU },
629 };
630
631 void _tnl_disassem_vba_insn( union instruction op )
632 {
633 const struct opcode_info *info = &opcode_info[op.alu.opcode];
634 info->print( op, info );
635 }
636
637
638 static void (* const opcode_func[])(struct arb_vp_machine *, union instruction) =
639 {
640 do_ABS,
641 do_ADD,
642 do_NOP,
643 do_DP3,
644 do_DP4,
645 do_DPH,
646 do_DST,
647 do_NOP,
648 do_EX2,
649 do_EXP,
650 do_FLR,
651 do_FRC,
652 do_LG2,
653 do_LIT,
654 do_LOG,
655 do_NOP,
656 do_MAX,
657 do_MIN,
658 do_MOV,
659 do_MUL,
660 do_POW,
661 do_PRT,
662 do_NOP,
663 do_RCP,
664 do_RSQ,
665 do_SGE,
666 do_SLT,
667 do_SUB,
668 do_RSW,
669 do_XPD,
670 do_RSW,
671 do_MSK,
672 do_REL,
673 };
674
675 static union instruction *cvp_next_instruction( struct compilation *cp )
676 {
677 union instruction *op = cp->csr++;
678 op->dword = 0;
679 return op;
680 }
681
682 static struct reg cvp_make_reg( GLuint file, GLuint idx )
683 {
684 struct reg reg;
685 reg.file = file;
686 reg.idx = idx;
687 return reg;
688 }
689
690 static struct reg cvp_emit_rel( struct compilation *cp,
691 struct reg reg,
692 struct reg tmpreg )
693 {
694 union instruction *op = cvp_next_instruction(cp);
695 op->alu.opcode = REL;
696 op->alu.file0 = reg.file;
697 op->alu.idx0 = reg.idx;
698 op->alu.dst = tmpreg.idx;
699 return tmpreg;
700 }
701
702
703 static struct reg cvp_load_reg( struct compilation *cp,
704 GLuint file,
705 GLuint index,
706 GLuint rel,
707 GLuint tmpidx )
708 {
709 struct reg tmpreg = cvp_make_reg(FILE_REG, tmpidx);
710 struct reg reg;
711
712 switch (file) {
713 case PROGRAM_TEMPORARY:
714 return cvp_make_reg(FILE_REG, REG_TMP0 + index);
715
716 case PROGRAM_INPUT:
717 return cvp_make_reg(FILE_REG, REG_IN0 + index);
718
719 case PROGRAM_OUTPUT:
720 return cvp_make_reg(FILE_REG, REG_OUT0 + index);
721
722 /* These two aren't populated by the parser?
723 */
724 case PROGRAM_LOCAL_PARAM:
725 reg = cvp_make_reg(FILE_LOCAL_PARAM, index);
726 if (rel)
727 return cvp_emit_rel(cp, reg, tmpreg);
728 else
729 return reg;
730
731 case PROGRAM_ENV_PARAM:
732 reg = cvp_make_reg(FILE_ENV_PARAM, index);
733 if (rel)
734 return cvp_emit_rel(cp, reg, tmpreg);
735 else
736 return reg;
737
738 case PROGRAM_STATE_VAR:
739 reg = cvp_make_reg(FILE_STATE_PARAM, index);
740 if (rel)
741 return cvp_emit_rel(cp, reg, tmpreg);
742 else
743 return reg;
744
745 /* Invalid values:
746 */
747 case PROGRAM_WRITE_ONLY:
748 case PROGRAM_ADDRESS:
749 default:
750 assert(0);
751 return tmpreg; /* can't happen */
752 }
753 }
754
755 static struct reg cvp_emit_arg( struct compilation *cp,
756 const struct vp_src_register *src,
757 GLuint arg )
758 {
759 struct reg reg = cvp_load_reg( cp, src->File, src->Index, src->RelAddr, arg );
760 union instruction rsw, noop;
761
762 /* Emit any necessary swizzling.
763 */
764 rsw.dword = 0;
765 rsw.rsw.neg = src->Negate ? WRITEMASK_XYZW : 0;
766 rsw.rsw.swz = ((GET_SWZ(src->Swizzle, 0) << 0) |
767 (GET_SWZ(src->Swizzle, 1) << 2) |
768 (GET_SWZ(src->Swizzle, 2) << 4) |
769 (GET_SWZ(src->Swizzle, 3) << 6));
770
771 noop.dword = 0;
772 noop.rsw.neg = 0;
773 noop.rsw.swz = RSW_NOOP;
774
775 if (rsw.dword != noop.dword) {
776 union instruction *op = cvp_next_instruction(cp);
777 struct reg rsw_reg = cvp_make_reg(FILE_REG, REG_ARG0 + arg);
778 op->dword = rsw.dword;
779 op->rsw.opcode = RSW;
780 op->rsw.file0 = reg.file;
781 op->rsw.idx0 = reg.idx;
782 op->rsw.dst = rsw_reg.idx;
783 return rsw_reg;
784 }
785 else
786 return reg;
787 }
788
789 static GLuint cvp_choose_result( struct compilation *cp,
790 const struct vp_dst_register *dst,
791 union instruction *fixup )
792 {
793 GLuint mask = dst->WriteMask;
794 GLuint idx;
795
796 switch (dst->File) {
797 case PROGRAM_TEMPORARY:
798 idx = REG_TMP0 + dst->Index;
799 break;
800 case PROGRAM_OUTPUT:
801 idx = REG_OUT0 + dst->Index;
802 break;
803 default:
804 assert(0);
805 return REG_RES; /* can't happen */
806 }
807
808 /* Optimization: When writing (with a writemask) to an undefined
809 * value for the first time, the writemask may be ignored.
810 */
811 if (mask != WRITEMASK_XYZW && (cp->reg_active & (1 << idx))) {
812 fixup->msk.opcode = MSK;
813 fixup->msk.dst = idx;
814 fixup->msk.file = FILE_REG;
815 fixup->msk.idx = REG_RES;
816 fixup->msk.mask = mask;
817 cp->reg_active |= 1 << idx;
818 return REG_RES;
819 }
820 else {
821 fixup->dword = 0;
822 cp->reg_active |= 1 << idx;
823 return idx;
824 }
825 }
826
827 static struct reg cvp_emit_rsw( struct compilation *cp,
828 GLuint dst,
829 struct reg src,
830 GLuint neg,
831 GLuint swz,
832 GLboolean force)
833 {
834 struct reg retval;
835
836 if (swz != RSW_NOOP || neg != 0) {
837 union instruction *op = cvp_next_instruction(cp);
838 op->rsw.opcode = RSW;
839 op->rsw.dst = dst;
840 op->rsw.file0 = src.file;
841 op->rsw.idx0 = src.idx;
842 op->rsw.neg = neg;
843 op->rsw.swz = swz;
844
845 retval.file = FILE_REG;
846 retval.idx = dst;
847 return retval;
848 }
849 else if (force) {
850 /* Oops. Degenerate case:
851 */
852 union instruction *op = cvp_next_instruction(cp);
853 op->alu.opcode = VP_OPCODE_MOV;
854 op->alu.dst = dst;
855 op->alu.file0 = src.file;
856 op->alu.idx0 = src.idx;
857
858 retval.file = FILE_REG;
859 retval.idx = dst;
860 return retval;
861 }
862 else {
863 return src;
864 }
865 }
866
867
868 static void cvp_emit_inst( struct compilation *cp,
869 const struct vp_instruction *inst )
870 {
871 const struct opcode_info *info = &opcode_info[inst->Opcode];
872 union instruction *op;
873 union instruction fixup;
874 struct reg reg[3];
875 GLuint result, i;
876
877 assert(sizeof(*op) == sizeof(GLuint));
878
879 /* Need to handle SWZ, ARL specially.
880 */
881 switch (inst->Opcode) {
882 /* Split into mul and add:
883 */
884 case VP_OPCODE_MAD:
885 result = cvp_choose_result( cp, &inst->DstReg, &fixup );
886 for (i = 0; i < 3; i++)
887 reg[i] = cvp_emit_arg( cp, &inst->SrcReg[i], REG_ARG0+i );
888
889 op = cvp_next_instruction(cp);
890 op->alu.opcode = VP_OPCODE_MUL;
891 op->alu.file0 = reg[0].file;
892 op->alu.idx0 = reg[0].idx;
893 op->alu.file1 = reg[1].file;
894 op->alu.idx1 = reg[1].idx;
895 op->alu.dst = REG_ARG0;
896
897 op = cvp_next_instruction(cp);
898 op->alu.opcode = VP_OPCODE_ADD;
899 op->alu.file0 = FILE_REG;
900 op->alu.idx0 = REG_ARG0;
901 op->alu.file1 = reg[2].file;
902 op->alu.idx1 = reg[2].idx;
903 op->alu.dst = result;
904 break;
905
906 case VP_OPCODE_ARL:
907 reg[0] = cvp_emit_arg( cp, &inst->SrcReg[0], REG_ARG0 );
908
909 op = cvp_next_instruction(cp);
910 op->alu.opcode = VP_OPCODE_FLR;
911 op->alu.dst = REG_ADDR;
912 op->alu.file0 = reg[0].file;
913 op->alu.idx0 = reg[0].idx;
914 break;
915
916 case VP_OPCODE_SWZ: {
917 GLuint swz0 = 0, swz1 = 0;
918 GLuint neg0 = 0, neg1 = 0;
919 GLuint mask = 0;
920
921 /* Translate 3-bit-per-element swizzle into two 2-bit swizzles,
922 * one from the source register the other from a constant
923 * {0,0,0,1}.
924 */
925 for (i = 0; i < 4; i++) {
926 GLuint swzelt = GET_SWZ(inst->SrcReg[0].Swizzle, i);
927 if (swzelt >= SWIZZLE_ZERO) {
928 neg0 |= inst->SrcReg[0].Negate & (1<<i);
929 if (swzelt == SWIZZLE_ONE)
930 swz0 |= SWIZZLE_W << (i*2);
931 else if (i < SWIZZLE_W)
932 swz0 |= i << (i*2);
933 }
934 else {
935 mask |= 1<<i;
936 neg1 |= inst->SrcReg[0].Negate & (1<<i);
937 swz1 |= swzelt << (i*2);
938 }
939 }
940
941 result = cvp_choose_result( cp, &inst->DstReg, &fixup );
942 reg[0].file = FILE_REG;
943 reg[0].idx = REG_ID;
944 reg[1] = cvp_emit_arg( cp, &inst->SrcReg[0], REG_ARG0 );
945
946 if (mask == WRITEMASK_XYZW) {
947 cvp_emit_rsw(cp, result, reg[0], neg0, swz0, GL_TRUE);
948
949 }
950 else if (mask == 0) {
951 cvp_emit_rsw(cp, result, reg[1], neg1, swz1, GL_TRUE);
952 }
953 else {
954 cvp_emit_rsw(cp, result, reg[0], neg0, swz0, GL_TRUE);
955 reg[1] = cvp_emit_rsw(cp, REG_ARG0, reg[1], neg1, swz1, GL_FALSE);
956
957 op = cvp_next_instruction(cp);
958 op->msk.opcode = MSK;
959 op->msk.dst = result;
960 op->msk.file = reg[1].file;
961 op->msk.idx = reg[1].idx;
962 op->msk.mask = mask;
963 }
964
965 if (result == REG_RES) {
966 op = cvp_next_instruction(cp);
967 op->dword = fixup.dword;
968 }
969 break;
970 }
971 case VP_OPCODE_PRINT:
972 case VP_OPCODE_END:
973 break;
974
975 default:
976 result = cvp_choose_result( cp, &inst->DstReg, &fixup );
977 for (i = 0; i < info->nr_args; i++)
978 reg[i] = cvp_emit_arg( cp, &inst->SrcReg[i], REG_ARG0 + i );
979
980 op = cvp_next_instruction(cp);
981 op->alu.opcode = inst->Opcode;
982 op->alu.file0 = reg[0].file;
983 op->alu.idx0 = reg[0].idx;
984 op->alu.file1 = reg[1].file;
985 op->alu.idx1 = reg[1].idx;
986 op->alu.dst = result;
987
988 if (result == REG_RES) {
989 op = cvp_next_instruction(cp);
990 op->dword = fixup.dword;
991 }
992 break;
993 }
994 }
995
996 static void free_tnl_data( struct vertex_program *program )
997 {
998 struct tnl_compiled_program *p = program->TnlData;
999 if (p->compiled_func) free((void *)p->compiled_func);
1000 free(p);
1001 program->TnlData = NULL;
1002 }
1003
1004 static void compile_vertex_program( struct vertex_program *program,
1005 GLboolean try_codegen )
1006 {
1007 struct compilation cp;
1008 struct tnl_compiled_program *p = CALLOC_STRUCT(tnl_compiled_program);
1009 GLuint i;
1010
1011 _mesa_printf("%s\n", __FUNCTION__);
1012
1013 if (program->TnlData)
1014 free_tnl_data( program );
1015
1016 program->TnlData = p;
1017
1018 /* Initialize cp. Note that ctx and VB aren't used in compilation
1019 * so we don't have to worry about statechanges:
1020 */
1021 memset(&cp, 0, sizeof(cp));
1022 cp.csr = p->instructions;
1023
1024 /* Compile instructions:
1025 */
1026 for (i = 0; i < program->Base.NumInstructions; i++) {
1027 cvp_emit_inst(&cp, &program->Instructions[i]);
1028 }
1029
1030 /* Finish up:
1031 */
1032 p->nr_instructions = cp.csr - p->instructions;
1033
1034 /* Print/disassemble:
1035 */
1036 if (DISASSEM) {
1037 for (i = 0; i < p->nr_instructions; i++) {
1038 _tnl_disassem_vba_insn(p->instructions[i]);
1039 }
1040 _mesa_printf("\n\n");
1041 }
1042
1043 #ifdef USE_SSE_ASM
1044 if (try_codegen)
1045 _tnl_sse_codegen_vertex_program(p);
1046 #endif
1047
1048 }
1049
1050
1051
1052
1053 /* ----------------------------------------------------------------------
1054 * Execution
1055 */
1056 static void userclip( GLcontext *ctx,
1057 GLvector4f *clip,
1058 GLubyte *clipmask,
1059 GLubyte *clipormask,
1060 GLubyte *clipandmask )
1061 {
1062 GLuint p;
1063
1064 for (p = 0; p < ctx->Const.MaxClipPlanes; p++) {
1065 if (ctx->Transform.ClipPlanesEnabled & (1 << p)) {
1066 GLuint nr, i;
1067 const GLfloat a = ctx->Transform._ClipUserPlane[p][0];
1068 const GLfloat b = ctx->Transform._ClipUserPlane[p][1];
1069 const GLfloat c = ctx->Transform._ClipUserPlane[p][2];
1070 const GLfloat d = ctx->Transform._ClipUserPlane[p][3];
1071 GLfloat *coord = (GLfloat *)clip->data;
1072 GLuint stride = clip->stride;
1073 GLuint count = clip->count;
1074
1075 for (nr = 0, i = 0 ; i < count ; i++) {
1076 GLfloat dp = (coord[0] * a +
1077 coord[1] * b +
1078 coord[2] * c +
1079 coord[3] * d);
1080
1081 if (dp < 0) {
1082 nr++;
1083 clipmask[i] |= CLIP_USER_BIT;
1084 }
1085
1086 STRIDE_F(coord, stride);
1087 }
1088
1089 if (nr > 0) {
1090 *clipormask |= CLIP_USER_BIT;
1091 if (nr == count) {
1092 *clipandmask |= CLIP_USER_BIT;
1093 return;
1094 }
1095 }
1096 }
1097 }
1098 }
1099
1100
1101 static GLboolean do_ndc_cliptest( struct arb_vp_machine *m )
1102 {
1103 GLcontext *ctx = m->ctx;
1104 TNLcontext *tnl = TNL_CONTEXT(ctx);
1105 struct vertex_buffer *VB = m->VB;
1106
1107 /* Cliptest and perspective divide. Clip functions must clear
1108 * the clipmask.
1109 */
1110 m->ormask = 0;
1111 m->andmask = CLIP_ALL_BITS;
1112
1113 if (tnl->NeedNdcCoords) {
1114 VB->NdcPtr =
1115 _mesa_clip_tab[VB->ClipPtr->size]( VB->ClipPtr,
1116 &m->ndcCoords,
1117 m->clipmask,
1118 &m->ormask,
1119 &m->andmask );
1120 }
1121 else {
1122 VB->NdcPtr = NULL;
1123 _mesa_clip_np_tab[VB->ClipPtr->size]( VB->ClipPtr,
1124 NULL,
1125 m->clipmask,
1126 &m->ormask,
1127 &m->andmask );
1128 }
1129
1130 if (m->andmask) {
1131 /* All vertices are outside the frustum */
1132 return GL_FALSE;
1133 }
1134
1135 /* Test userclip planes. This contributes to VB->ClipMask.
1136 */
1137 if (ctx->Transform.ClipPlanesEnabled && !ctx->VertexProgram._Enabled) {
1138 userclip( ctx,
1139 VB->ClipPtr,
1140 m->clipmask,
1141 &m->ormask,
1142 &m->andmask );
1143
1144 if (m->andmask) {
1145 return GL_FALSE;
1146 }
1147 }
1148
1149 VB->ClipAndMask = m->andmask;
1150 VB->ClipOrMask = m->ormask;
1151 VB->ClipMask = m->clipmask;
1152
1153 return GL_TRUE;
1154 }
1155
1156
1157 static INLINE void call_func( struct tnl_compiled_program *p,
1158 struct arb_vp_machine *m )
1159 {
1160 p->compiled_func(m);
1161 }
1162
1163 /**
1164 * Execute the given vertex program.
1165 *
1166 * TODO: Integrate the t_vertex.c code here, to build machine vertices
1167 * directly at this point.
1168 *
1169 * TODO: Eliminate the VB struct entirely and just use
1170 * struct arb_vertex_machine.
1171 */
1172 static GLboolean
1173 run_arb_vertex_program(GLcontext *ctx, struct tnl_pipeline_stage *stage)
1174 {
1175 struct vertex_program *program = (ctx->VertexProgram._Enabled ?
1176 ctx->VertexProgram.Current :
1177 ctx->_TnlProgram);
1178 struct vertex_buffer *VB = &TNL_CONTEXT(ctx)->vb;
1179 struct arb_vp_machine *m = ARB_VP_MACHINE(stage);
1180 struct tnl_compiled_program *p;
1181 GLuint i, j, outputs;
1182
1183 if (!program || program->IsNVProgram)
1184 return GL_TRUE;
1185
1186 if (program->Parameters) {
1187 _mesa_load_state_parameters(ctx, program->Parameters);
1188 }
1189
1190 p = (struct tnl_compiled_program *)program->TnlData;
1191 assert(p);
1192
1193 /* Initialize regs where necessary:
1194 */
1195 ASSIGN_4V(m->File[0][REG_ID], 0, 0, 0, 1);
1196
1197 m->nr_inputs = m->nr_outputs = 0;
1198
1199 for (i = 0; i < _TNL_ATTRIB_MAX; i++) {
1200 if (program->InputsRead & (1<<i)) {
1201 GLuint j = m->nr_inputs++;
1202 m->input[j].idx = i;
1203 m->input[j].data = (GLfloat *)m->VB->AttribPtr[i]->data;
1204 m->input[j].stride = m->VB->AttribPtr[i]->stride;
1205 m->input[j].size = m->VB->AttribPtr[i]->size;
1206 ASSIGN_4V(m->File[0][REG_IN0 + i], 0, 0, 0, 1);
1207 }
1208 }
1209
1210 for (i = 0; i < 15; i++) {
1211 if (program->OutputsWritten & (1<<i)) {
1212 GLuint j = m->nr_outputs++;
1213 m->output[j].idx = i;
1214 m->output[j].data = (GLfloat *)m->attribs[i].data;
1215 }
1216 }
1217
1218
1219 /* Run the actual program:
1220 */
1221 for (m->vtx_nr = 0; m->vtx_nr < VB->Count; m->vtx_nr++) {
1222 for (j = 0; j < m->nr_inputs; j++) {
1223 GLuint idx = REG_IN0 + m->input[j].idx;
1224 switch (m->input[j].size) {
1225 case 4: m->File[0][idx][3] = m->input[j].data[3];
1226 case 3: m->File[0][idx][2] = m->input[j].data[2];
1227 case 2: m->File[0][idx][1] = m->input[j].data[1];
1228 case 1: m->File[0][idx][0] = m->input[j].data[0];
1229 }
1230
1231 STRIDE_F(m->input[j].data, m->input[j].stride);
1232 }
1233
1234 if (p->compiled_func) {
1235 call_func( p, m );
1236 }
1237 else {
1238 for (j = 0; j < p->nr_instructions; j++) {
1239 union instruction inst = p->instructions[j];
1240 opcode_func[inst.alu.opcode]( m, inst );
1241 }
1242 }
1243
1244 for (j = 0; j < m->nr_outputs; j++) {
1245 GLuint idx = REG_OUT0 + m->output[j].idx;
1246 m->output[j].data[0] = m->File[0][idx][0];
1247 m->output[j].data[1] = m->File[0][idx][1];
1248 m->output[j].data[2] = m->File[0][idx][2];
1249 m->output[j].data[3] = m->File[0][idx][3];
1250 m->output[j].data += 4;
1251 }
1252 }
1253
1254 /* Setup the VB pointers so that the next pipeline stages get
1255 * their data from the right place (the program output arrays).
1256 *
1257 * TODO: 1) Have tnl use these RESULT values for outputs rather
1258 * than trying to shoe-horn inputs and outputs into one set of
1259 * values.
1260 *
1261 * TODO: 2) Integrate t_vertex.c so that we just go straight ahead
1262 * and build machine vertices here.
1263 */
1264 VB->ClipPtr = &m->attribs[VERT_RESULT_HPOS];
1265 VB->ClipPtr->count = VB->Count;
1266
1267 outputs = program->OutputsWritten;
1268
1269 if (outputs & (1<<VERT_RESULT_COL0)) {
1270 VB->ColorPtr[0] = &m->attribs[VERT_RESULT_COL0];
1271 VB->AttribPtr[VERT_ATTRIB_COLOR0] = VB->ColorPtr[0];
1272 }
1273
1274 if (outputs & (1<<VERT_RESULT_BFC0)) {
1275 VB->ColorPtr[1] = &m->attribs[VERT_RESULT_BFC0];
1276 }
1277
1278 if (outputs & (1<<VERT_RESULT_COL1)) {
1279 VB->SecondaryColorPtr[0] = &m->attribs[VERT_RESULT_COL1];
1280 VB->AttribPtr[VERT_ATTRIB_COLOR1] = VB->SecondaryColorPtr[0];
1281 }
1282
1283 if (outputs & (1<<VERT_RESULT_BFC1)) {
1284 VB->SecondaryColorPtr[1] = &m->attribs[VERT_RESULT_BFC1];
1285 }
1286
1287 if (outputs & (1<<VERT_RESULT_FOGC)) {
1288 VB->FogCoordPtr = &m->attribs[VERT_RESULT_FOGC];
1289 VB->AttribPtr[VERT_ATTRIB_FOG] = VB->FogCoordPtr;
1290 }
1291
1292 if (outputs & (1<<VERT_RESULT_PSIZ)) {
1293 VB->PointSizePtr = &m->attribs[VERT_RESULT_PSIZ];
1294 VB->AttribPtr[_TNL_ATTRIB_POINTSIZE] = &m->attribs[VERT_RESULT_PSIZ];
1295 }
1296
1297 for (i = 0; i < ctx->Const.MaxTextureUnits; i++) {
1298 if (outputs & (1<<(VERT_RESULT_TEX0+i))) {
1299 VB->TexCoordPtr[i] = &m->attribs[VERT_RESULT_TEX0 + i];
1300 VB->AttribPtr[VERT_ATTRIB_TEX0+i] = VB->TexCoordPtr[i];
1301 }
1302 }
1303
1304 #if 0
1305 for (i = 0; i < VB->Count; i++) {
1306 printf("Out %d: %f %f %f %f %f %f %f %f\n", i,
1307 VEC_ELT(VB->ClipPtr, GLfloat, i)[0],
1308 VEC_ELT(VB->ClipPtr, GLfloat, i)[1],
1309 VEC_ELT(VB->ClipPtr, GLfloat, i)[2],
1310 VEC_ELT(VB->ClipPtr, GLfloat, i)[3],
1311 VEC_ELT(VB->TexCoordPtr[0], GLfloat, i)[0],
1312 VEC_ELT(VB->TexCoordPtr[0], GLfloat, i)[1],
1313 VEC_ELT(VB->TexCoordPtr[0], GLfloat, i)[2],
1314 VEC_ELT(VB->TexCoordPtr[0], GLfloat, i)[3]);
1315 }
1316 #endif
1317
1318 /* Perform NDC and cliptest operations:
1319 */
1320 return do_ndc_cliptest(m);
1321 }
1322
1323
1324 static void
1325 validate_vertex_program( GLcontext *ctx, struct tnl_pipeline_stage *stage )
1326 {
1327 struct arb_vp_machine *m = ARB_VP_MACHINE(stage);
1328 struct vertex_program *program =
1329 (ctx->VertexProgram._Enabled ? ctx->VertexProgram.Current : 0);
1330
1331 if (!program && ctx->_MaintainTnlProgram) {
1332 program = ctx->_TnlProgram;
1333 }
1334
1335 if (program) {
1336 if (!program->TnlData)
1337 compile_vertex_program( program, m->try_codegen );
1338
1339 /* Grab the state GL state and put into registers:
1340 */
1341 m->File[FILE_LOCAL_PARAM] = program->Base.LocalParams;
1342 m->File[FILE_ENV_PARAM] = ctx->VertexProgram.Parameters;
1343 m->File[FILE_STATE_PARAM] = program->Parameters->ParameterValues;
1344 }
1345 }
1346
1347
1348
1349
1350
1351
1352
1353 /**
1354 * Called the first time stage->run is called. In effect, don't
1355 * allocate data until the first time the stage is run.
1356 */
1357 static GLboolean init_vertex_program( GLcontext *ctx,
1358 struct tnl_pipeline_stage *stage )
1359 {
1360 TNLcontext *tnl = TNL_CONTEXT(ctx);
1361 struct vertex_buffer *VB = &(tnl->vb);
1362 struct arb_vp_machine *m;
1363 const GLuint size = VB->Size;
1364 GLuint i;
1365
1366 stage->privatePtr = MALLOC(sizeof(*m));
1367 m = ARB_VP_MACHINE(stage);
1368 if (!m)
1369 return GL_FALSE;
1370
1371 /* arb_vertex_machine struct should subsume the VB:
1372 */
1373 m->VB = VB;
1374 m->ctx = ctx;
1375
1376 m->File[0] = ALIGN_MALLOC(REG_MAX * sizeof(GLfloat) * 4, 16);
1377
1378 if (_mesa_getenv("MESA_EXPERIMENTAL"))
1379 m->try_codegen = 1;
1380
1381 /* Allocate arrays of vertex output values */
1382 for (i = 0; i < VERT_RESULT_MAX; i++) {
1383 _mesa_vector4f_alloc( &m->attribs[i], 0, size, 32 );
1384 m->attribs[i].size = 4;
1385 }
1386
1387 /* a few other misc allocations */
1388 _mesa_vector4f_alloc( &m->ndcCoords, 0, size, 32 );
1389 m->clipmask = (GLubyte *) ALIGN_MALLOC(sizeof(GLubyte)*size, 32 );
1390
1391 if (ctx->_MaintainTnlProgram)
1392 _mesa_allow_light_in_model( ctx, GL_FALSE );
1393
1394 return GL_TRUE;
1395 }
1396
1397
1398
1399
1400 /**
1401 * Destructor for this pipeline stage.
1402 */
1403 static void dtr( struct tnl_pipeline_stage *stage )
1404 {
1405 struct arb_vp_machine *m = ARB_VP_MACHINE(stage);
1406
1407 if (m) {
1408 GLuint i;
1409
1410 /* free the vertex program result arrays */
1411 for (i = 0; i < VERT_RESULT_MAX; i++)
1412 _mesa_vector4f_free( &m->attribs[i] );
1413
1414 /* free misc arrays */
1415 _mesa_vector4f_free( &m->ndcCoords );
1416 ALIGN_FREE( m->clipmask );
1417 ALIGN_FREE( m->File[0] );
1418
1419 FREE( m );
1420 stage->privatePtr = NULL;
1421 }
1422 }
1423
1424 /**
1425 * Public description of this pipeline stage.
1426 */
1427 const struct tnl_pipeline_stage _tnl_arb_vertex_program_stage =
1428 {
1429 "vertex-program",
1430 NULL, /* private_data */
1431 init_vertex_program, /* create */
1432 dtr, /* destroy */
1433 validate_vertex_program, /* validate */
1434 run_arb_vertex_program /* run */
1435 };