Rename temp_flag to temp_in_use. Use ctx->Const.MaxVertexProgramTemps
[mesa.git] / src / mesa / tnl / t_vb_arbprogram.c
1 /*
2 * Mesa 3-D graphics library
3 * Version: 6.3
4 *
5 * Copyright (C) 1999-2004 Brian Paul All Rights Reserved.
6 *
7 * Permission is hereby granted, free of charge, to any person obtaining a
8 * copy of this software and associated documentation files (the "Software"),
9 * to deal in the Software without restriction, including without limitation
10 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
11 * and/or sell copies of the Software, and to permit persons to whom the
12 * Software is furnished to do so, subject to the following conditions:
13 *
14 * The above copyright notice and this permission notice shall be included
15 * in all copies or substantial portions of the Software.
16 *
17 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
18 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
19 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
20 * BRIAN PAUL BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN
21 * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
22 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
23 */
24
25 /**
26 * \file t_arb_program.c
27 * Compile vertex programs to an intermediate representation.
28 * Execute vertex programs over a buffer of vertices.
29 * \author Keith Whitwell, Brian Paul
30 */
31
32 #include "glheader.h"
33 #include "context.h"
34 #include "imports.h"
35 #include "macros.h"
36 #include "mtypes.h"
37 #include "arbprogparse.h"
38 #include "program.h"
39 #include "math/m_matrix.h"
40 #include "math/m_translate.h"
41 #include "t_context.h"
42 #include "t_pipeline.h"
43 #include "t_vp_build.h"
44
45 /* Define to see the compiled program on stderr:
46 */
47 #define DISASSEM 0
48
49
50 /* New, internal instructions:
51 */
52 #define IN1 (VP_OPCODE_XPD+1)
53 #define IN2 (IN1+1) /* intput-to-reg MOV */
54 #define IN3 (IN1+2)
55 #define IN4 (IN1+3)
56 #define OUT (IN1+4) /* reg-to-output MOV */
57 #define OUM (IN1+5) /* reg-to-output MOV with mask */
58 #define RSW (IN1+6)
59 #define MSK (IN1+7) /* reg-to-reg MOV with mask */
60 #define PAR (IN1+8) /* parameter-to-reg MOV */
61 #define PRL (IN1+9) /* parameter-to-reg MOV */
62
63
64 /* Layout of register file:
65
66 0 -- Scratch (Arg0)
67 1 -- Scratch (Arg1)
68 2 -- Scratch (Arg2)
69 3 -- Scratch (Result)
70 4 -- Program Temporary 0
71 ..
72 31 -- Program Temporary 27
73 32 -- State/Input/Const shadow 0
74 ..
75 63 -- State/Input/Const shadow 31
76
77 */
78
79
80
81 #define REG_ARG0 0
82 #define REG_ARG1 1
83 #define REG_ARG2 2
84 #define REG_RES 3
85 #define REG_TMP0 4
86 #define REG_TMP_MAX 32
87 #define REG_TMP_NR (REG_TMP_MAX-REG_TMP0)
88 #define REG_PAR0 32
89 #define REG_PAR_MAX 64
90 #define REG_PAR_NR (REG_PAR_MAX-REG_PAR0)
91
92 #define REG_MAX 64
93 #define REG_SWZDST_MAX 16
94
95 /* ARB_vp instructions are broken down into one or more of the
96 * following micro-instructions, each representable in a 32 bit packed
97 * structure.
98 */
99
100
101 union instruction {
102 struct {
103 GLuint opcode:6;
104 GLuint dst:5;
105 GLuint arg0:6;
106 GLuint arg1:6;
107 GLuint elt:2; /* x,y,z or w */
108 GLuint pad:7;
109 } scl;
110
111
112 struct {
113 GLuint opcode:6;
114 GLuint dst:5;
115 GLuint arg0:6;
116 GLuint arg1:6;
117 GLuint arg2:6;
118 GLuint pad:3;
119 } vec;
120
121 struct {
122 GLuint opcode:6;
123 GLuint dst:4; /* NOTE! REG 0..16 only! */
124 GLuint arg0:6;
125 GLuint neg:4;
126 GLuint swz:12;
127 } swz;
128
129 struct {
130 GLuint opcode:6;
131 GLuint dst:6;
132 GLuint arg0:6;
133 GLuint neg:1; /* 1 bit only */
134 GLuint swz:8; /* xyzw only */
135 GLuint pad:5;
136 } rsw;
137
138 struct {
139 GLuint opcode:6;
140 GLuint reg:6;
141 GLuint file:5;
142 GLuint idx:8; /* plenty? */
143 GLuint rel:1;
144 GLuint pad:6;
145 } inr;
146
147
148 struct {
149 GLuint opcode:6;
150 GLuint reg:6;
151 GLuint file:5;
152 GLuint idx:8; /* plenty? */
153 GLuint mask:4;
154 GLuint pad:3;
155 } out;
156
157 struct {
158 GLuint opcode:6;
159 GLuint dst:5;
160 GLuint arg0:6;
161 GLuint mask:4;
162 GLuint pad:11;
163 } msk;
164
165 GLuint dword;
166 };
167
168
169
170 struct compilation {
171 struct {
172 GLuint file:5;
173 GLuint idx:8;
174 } reg[REG_PAR_NR];
175
176 GLuint par_active;
177 GLuint par_protected;
178 GLuint tmp_active;
179
180 union instruction *csr;
181
182 struct vertex_buffer *VB; /* for input sizes! */
183 };
184
185 /*--------------------------------------------------------------------------- */
186
187 /*!
188 * Private storage for the vertex program pipeline stage.
189 */
190 struct arb_vp_machine {
191 GLfloat reg[REG_MAX][4]; /* Program temporaries, shadowed parameters and inputs,
192 plus some internal values */
193
194 GLfloat (*File[8])[4]; /* Src/Dest for PAR/PRL instructions. */
195 GLint AddressReg;
196
197 union instruction store[1024];
198 union instruction *instructions;
199 GLint nr_instructions;
200
201 GLvector4f attribs[VERT_RESULT_MAX]; /**< result vectors. */
202 GLvector4f ndcCoords; /**< normalized device coords */
203 GLubyte *clipmask; /**< clip flags */
204 GLubyte ormask, andmask; /**< for clipping */
205
206 GLuint vtx_nr; /**< loop counter */
207
208 struct vertex_buffer *VB;
209 GLcontext *ctx;
210 };
211
212
213 /*--------------------------------------------------------------------------- */
214
215 struct opcode_info {
216 GLuint type;
217 GLuint nr_args;
218 const char *string;
219 void (*func)( struct arb_vp_machine *, union instruction );
220 void (*print)( union instruction , const struct opcode_info * );
221 };
222
223
224 #define ARB_VP_MACHINE(stage) ((struct arb_vp_machine *)(stage->privatePtr))
225
226
227
228 /**
229 * Set x to positive or negative infinity.
230 *
231 * XXX: FIXME - type punning.
232 */
233 #if defined(USE_IEEE) || defined(_WIN32)
234 #define SET_POS_INFINITY(x) ( *((GLuint *) (void *)&x) = 0x7F800000 )
235 #define SET_NEG_INFINITY(x) ( *((GLuint *) (void *)&x) = 0xFF800000 )
236 #elif defined(VMS)
237 #define SET_POS_INFINITY(x) x = __MAXFLOAT
238 #define SET_NEG_INFINITY(x) x = -__MAXFLOAT
239 #define IS_INF_OR_NAN(t) ((t) == __MAXFLOAT)
240 #else
241 #define SET_POS_INFINITY(x) x = (GLfloat) HUGE_VAL
242 #define SET_NEG_INFINITY(x) x = (GLfloat) -HUGE_VAL
243 #endif
244
245 #define FREXPF(a,b) frexpf(a,b)
246
247 #define PUFF(x) ((x)[1] = (x)[2] = (x)[3] = (x)[0])
248
249 /* FIXME: more type punning (despite use of fi_type...)
250 */
251 #define SET_FLOAT_BITS(x, bits) ((fi_type *) (void *) &(x))->i = bits
252
253
254 static GLfloat RoughApproxLog2(GLfloat t)
255 {
256 return LOG2(t);
257 }
258
259 static GLfloat RoughApproxPow2(GLfloat t)
260 {
261 GLfloat q;
262 #ifdef USE_IEEE
263 GLint ii = (GLint) t;
264 ii = (ii < 23) + 0x3f800000;
265 SET_FLOAT_BITS(q, ii);
266 q = *((GLfloat *) (void *)&ii);
267 #else
268 q = (GLfloat) pow(2.0, floor_t0);
269 #endif
270 return q;
271 }
272
273 static GLfloat RoughApproxPower(GLfloat x, GLfloat y)
274 {
275 #if 0
276 return (GLfloat) exp(y * log(x));
277 #else
278 return (GLfloat) _mesa_pow(x, y);
279 #endif
280 }
281
282
283 static const GLfloat ZeroVec[4] = { 0.0F, 0.0F, 0.0F, 0.0F };
284
285
286
287
288 /**
289 * This is probably the least-optimal part of the process, have to
290 * multiply out the stride to access each incoming input value.
291 */
292 static GLfloat *get_input( struct arb_vp_machine *m, GLuint index )
293 {
294 return VEC_ELT(m->VB->AttribPtr[index], GLfloat, m->vtx_nr);
295 }
296
297
298 /**
299 * Fetch a 4-element float vector from the given source register.
300 * Deal with the possibility that not all elements are present.
301 */
302 static void do_IN1( struct arb_vp_machine *m, union instruction op )
303 {
304 GLfloat *result = m->reg[op.inr.reg];
305 const GLfloat *src = get_input(m, op.inr.idx);
306
307 result[0] = src[0];
308 result[1] = 0;
309 result[2] = 0;
310 result[3] = 1;
311 }
312
313 static void do_IN2( struct arb_vp_machine *m, union instruction op )
314 {
315 GLfloat *result = m->reg[op.inr.reg];
316 const GLfloat *src = get_input(m, op.inr.idx);
317
318 result[0] = src[0];
319 result[1] = src[1];
320 result[2] = 0;
321 result[3] = 1;
322 }
323
324 static void do_IN3( struct arb_vp_machine *m, union instruction op )
325 {
326 GLfloat *result = m->reg[op.inr.reg];
327 const GLfloat *src = get_input(m, op.inr.idx);
328
329 result[0] = src[0];
330 result[1] = src[1];
331 result[2] = src[2];
332 result[3] = 1;
333 }
334
335 static void do_IN4( struct arb_vp_machine *m, union instruction op )
336 {
337 GLfloat *result = m->reg[op.inr.reg];
338 const GLfloat *src = get_input(m, op.inr.idx);
339
340 result[0] = src[0];
341 result[1] = src[1];
342 result[2] = src[2];
343 result[3] = src[3];
344 }
345
346 /**
347 * Perform a reduced swizzle:
348 */
349 static void do_RSW( struct arb_vp_machine *m, union instruction op )
350 {
351 GLfloat *result = m->reg[op.rsw.dst];
352 const GLfloat *arg0 = m->reg[op.rsw.arg0];
353 GLuint swz = op.rsw.swz;
354 GLuint neg = op.rsw.neg;
355 GLuint i;
356
357 if (neg)
358 for (i = 0; i < 4; i++, swz >>= 2)
359 result[i] = -arg0[swz & 0x3];
360 else
361 for (i = 0; i < 4; i++, swz >>= 2)
362 result[i] = arg0[swz & 0x3];
363 }
364
365
366
367 /**
368 * Store 4 floats into an external address.
369 */
370 static void do_OUM( struct arb_vp_machine *m, union instruction op )
371 {
372 GLfloat *dst = m->attribs[op.out.idx].data[m->vtx_nr];
373 const GLfloat *value = m->reg[op.out.reg];
374
375 if (op.out.mask & 0x1) dst[0] = value[0];
376 if (op.out.mask & 0x2) dst[1] = value[1];
377 if (op.out.mask & 0x4) dst[2] = value[2];
378 if (op.out.mask & 0x8) dst[3] = value[3];
379 }
380
381 static void do_OUT( struct arb_vp_machine *m, union instruction op )
382 {
383 GLfloat *dst = m->attribs[op.out.idx].data[m->vtx_nr];
384 const GLfloat *value = m->reg[op.out.reg];
385
386 dst[0] = value[0];
387 dst[1] = value[1];
388 dst[2] = value[2];
389 dst[3] = value[3];
390 }
391
392 /* Register-to-register MOV with writemask.
393 */
394 static void do_MSK( struct arb_vp_machine *m, union instruction op )
395 {
396 GLfloat *dst = m->reg[op.msk.dst];
397 const GLfloat *arg0 = m->reg[op.msk.arg0];
398
399 if (op.msk.mask & 0x1) dst[0] = arg0[0];
400 if (op.msk.mask & 0x2) dst[1] = arg0[1];
401 if (op.msk.mask & 0x4) dst[2] = arg0[2];
402 if (op.msk.mask & 0x8) dst[3] = arg0[3];
403 }
404
405
406 /* Retreive parameters and other constant values:
407 */
408 static void do_PAR( struct arb_vp_machine *m, union instruction op )
409 {
410 GLfloat *result = m->reg[op.inr.reg];
411 const GLfloat *src = m->File[op.inr.file][op.inr.idx];
412
413 result[0] = src[0];
414 result[1] = src[1];
415 result[2] = src[2];
416 result[3] = src[3];
417 }
418
419
420 #define RELADDR_MASK (MAX_NV_VERTEX_PROGRAM_PARAMS-1)
421
422 static void do_PRL( struct arb_vp_machine *m, union instruction op )
423 {
424 GLfloat *result = m->reg[op.inr.reg];
425 GLuint index = (op.inr.idx + m->AddressReg) & RELADDR_MASK;
426 const GLfloat *src = m->File[op.inr.file][index];
427
428 result[0] = src[0];
429 result[1] = src[1];
430 result[2] = src[2];
431 result[3] = src[3];
432 }
433
434 static void do_PRT( struct arb_vp_machine *m, union instruction op )
435 {
436 const GLfloat *arg0 = m->reg[op.vec.arg0];
437
438 _mesa_printf("%d: %f %f %f %f\n", m->vtx_nr,
439 arg0[0], arg0[1], arg0[2], arg0[3]);
440 }
441
442
443 /**
444 * The traditional ALU and texturing instructions. All operate on
445 * internal registers and ignore write masks and swizzling issues.
446 */
447
448 static void do_ABS( struct arb_vp_machine *m, union instruction op )
449 {
450 GLfloat *result = m->reg[op.vec.dst];
451 const GLfloat *arg0 = m->reg[op.vec.arg0];
452
453 result[0] = (arg0[0] < 0.0) ? -arg0[0] : arg0[0];
454 result[1] = (arg0[1] < 0.0) ? -arg0[1] : arg0[1];
455 result[2] = (arg0[2] < 0.0) ? -arg0[2] : arg0[2];
456 result[3] = (arg0[3] < 0.0) ? -arg0[3] : arg0[3];
457 }
458
459 static void do_ADD( struct arb_vp_machine *m, union instruction op )
460 {
461 GLfloat *result = m->reg[op.vec.dst];
462 const GLfloat *arg0 = m->reg[op.vec.arg0];
463 const GLfloat *arg1 = m->reg[op.vec.arg1];
464
465 result[0] = arg0[0] + arg1[0];
466 result[1] = arg0[1] + arg1[1];
467 result[2] = arg0[2] + arg1[2];
468 result[3] = arg0[3] + arg1[3];
469 }
470
471
472 static void do_ARL( struct arb_vp_machine *m, union instruction op )
473 {
474 const GLfloat *arg0 = m->reg[op.out.reg];
475 m->AddressReg = (GLint) floor(arg0[0]);
476 }
477
478
479 static void do_DP3( struct arb_vp_machine *m, union instruction op )
480 {
481 GLfloat *result = m->reg[op.scl.dst];
482 const GLfloat *arg0 = m->reg[op.scl.arg0];
483 const GLfloat *arg1 = m->reg[op.scl.arg1];
484
485 result[0] = (arg0[0] * arg1[0] +
486 arg0[1] * arg1[1] +
487 arg0[2] * arg1[2]);
488
489 PUFF(result);
490 }
491
492 #if 0
493 static void do_MAT4( struct arb_vp_machine *m, union instruction op )
494 {
495 GLfloat *result = m->reg[op.scl.dst];
496 const GLfloat *arg0 = m->reg[op.scl.arg0];
497 const GLfloat *mat[] = m->reg[op.scl.arg1];
498
499 result[0] = (arg0[0] * mat0[0] + arg0[1] * mat0[1] + arg0[2] * mat0[2] + arg0[3] * mat0[3]);
500 result[1] = (arg0[0] * mat1[0] + arg0[1] * mat1[1] + arg0[2] * mat1[2] + arg0[3] * mat1[3]);
501 result[2] = (arg0[0] * mat2[0] + arg0[1] * mat2[1] + arg0[2] * mat2[2] + arg0[3] * mat2[3]);
502 result[3] = (arg0[0] * mat3[0] + arg0[1] * mat3[1] + arg0[2] * mat3[2] + arg0[3] * mat3[3]);
503 }
504 #endif
505
506
507 static void do_DP4( struct arb_vp_machine *m, union instruction op )
508 {
509 GLfloat *result = m->reg[op.scl.dst];
510 const GLfloat *arg0 = m->reg[op.scl.arg0];
511 const GLfloat *arg1 = m->reg[op.scl.arg1];
512
513 result[0] = (arg0[0] * arg1[0] +
514 arg0[1] * arg1[1] +
515 arg0[2] * arg1[2] +
516 arg0[3] * arg1[3]);
517
518 PUFF(result);
519 }
520
521 static void do_DPH( struct arb_vp_machine *m, union instruction op )
522 {
523 GLfloat *result = m->reg[op.scl.dst];
524 const GLfloat *arg0 = m->reg[op.scl.arg0];
525 const GLfloat *arg1 = m->reg[op.scl.arg1];
526
527 result[0] = (arg0[0] * arg1[0] +
528 arg0[1] * arg1[1] +
529 arg0[2] * arg1[2] +
530 1.0 * arg1[3]);
531
532 PUFF(result);
533 }
534
535 static void do_DST( struct arb_vp_machine *m, union instruction op )
536 {
537 GLfloat *result = m->reg[op.vec.dst];
538 const GLfloat *arg0 = m->reg[op.vec.arg0];
539 const GLfloat *arg1 = m->reg[op.vec.arg1];
540
541 result[0] = 1.0F;
542 result[1] = arg0[1] * arg1[1];
543 result[2] = arg0[2];
544 result[3] = arg1[3];
545 }
546
547
548 static void do_EX2( struct arb_vp_machine *m, union instruction op )
549 {
550 GLfloat *result = m->reg[op.scl.dst];
551 const GLfloat *arg0 = m->reg[op.scl.arg0];
552
553 result[0] = (GLfloat)RoughApproxPow2(arg0[0]);
554 PUFF(result);
555 }
556
557 static void do_EXP( struct arb_vp_machine *m, union instruction op )
558 {
559 GLfloat *result = m->reg[op.vec.dst];
560 const GLfloat *arg0 = m->reg[op.vec.arg0];
561 GLfloat tmp = arg0[0];
562 GLfloat flr_tmp = FLOORF(tmp);
563
564 /* KW: nvvertexec has an optimized version of this which is pretty
565 * hard to understand/validate, but avoids the RoughApproxPow2.
566 */
567 result[0] = (GLfloat) (1 << (int)flr_tmp);
568 result[1] = tmp - flr_tmp;
569 result[2] = RoughApproxPow2(tmp);
570 result[3] = 1.0F;
571 }
572
573 static void do_FLR( struct arb_vp_machine *m, union instruction op )
574 {
575 GLfloat *result = m->reg[op.vec.dst];
576 const GLfloat *arg0 = m->reg[op.vec.arg0];
577
578 result[0] = FLOORF(arg0[0]);
579 result[1] = FLOORF(arg0[1]);
580 result[2] = FLOORF(arg0[2]);
581 result[3] = FLOORF(arg0[3]);
582 }
583
584 static void do_FRC( struct arb_vp_machine *m, union instruction op )
585 {
586 GLfloat *result = m->reg[op.vec.dst];
587 const GLfloat *arg0 = m->reg[op.vec.arg0];
588
589 result[0] = arg0[0] - FLOORF(arg0[0]);
590 result[1] = arg0[1] - FLOORF(arg0[1]);
591 result[2] = arg0[2] - FLOORF(arg0[2]);
592 result[3] = arg0[3] - FLOORF(arg0[3]);
593 }
594
595 static void do_LG2( struct arb_vp_machine *m, union instruction op )
596 {
597 GLfloat *result = m->reg[op.scl.dst];
598 const GLfloat *arg0 = m->reg[op.scl.arg0];
599
600 result[0] = RoughApproxLog2(arg0[0]);
601 PUFF(result);
602 }
603
604
605
606 static void do_LIT( struct arb_vp_machine *m, union instruction op )
607 {
608 GLfloat *result = m->reg[op.vec.dst];
609 const GLfloat *arg0 = m->reg[op.vec.arg0];
610
611 const GLfloat epsilon = 1.0F / 256.0F; /* per NV spec */
612 GLfloat tmp[4];
613
614 tmp[0] = MAX2(arg0[0], 0.0F);
615 tmp[1] = MAX2(arg0[1], 0.0F);
616 tmp[3] = CLAMP(arg0[3], -(128.0F - epsilon), (128.0F - epsilon));
617
618 result[0] = 1.0;
619 result[1] = tmp[0];
620 result[2] = (tmp[0] > 0.0) ? RoughApproxPower(tmp[1], tmp[3]) : 0.0F;
621 result[3] = 1.0;
622 }
623
624
625 static void do_LOG( struct arb_vp_machine *m, union instruction op )
626 {
627 GLfloat *result = m->reg[op.vec.dst];
628 const GLfloat *arg0 = m->reg[op.vec.arg0];
629 GLfloat tmp = FABSF(arg0[0]);
630 int exponent;
631 GLfloat mantissa = FREXPF(tmp, &exponent);
632
633 result[0] = (GLfloat) (exponent - 1);
634 result[1] = 2.0 * mantissa; /* map [.5, 1) -> [1, 2) */
635 result[2] = result[0] + LOG2(result[1]);
636 result[3] = 1.0;
637 }
638
639
640 static void do_MAD( struct arb_vp_machine *m, union instruction op )
641 {
642 GLfloat *result = m->reg[op.vec.dst];
643 const GLfloat *arg0 = m->reg[op.vec.arg0];
644 const GLfloat *arg1 = m->reg[op.vec.arg1];
645 const GLfloat *arg2 = m->reg[op.vec.arg2];
646
647 result[0] = arg0[0] * arg1[0] + arg2[0];
648 result[1] = arg0[1] * arg1[1] + arg2[1];
649 result[2] = arg0[2] * arg1[2] + arg2[2];
650 result[3] = arg0[3] * arg1[3] + arg2[3];
651 }
652
653 static void do_MAX( struct arb_vp_machine *m, union instruction op )
654 {
655 GLfloat *result = m->reg[op.vec.dst];
656 const GLfloat *arg0 = m->reg[op.vec.arg0];
657 const GLfloat *arg1 = m->reg[op.vec.arg1];
658
659 result[0] = (arg0[0] > arg1[0]) ? arg0[0] : arg1[0];
660 result[1] = (arg0[1] > arg1[1]) ? arg0[1] : arg1[1];
661 result[2] = (arg0[2] > arg1[2]) ? arg0[2] : arg1[2];
662 result[3] = (arg0[3] > arg1[3]) ? arg0[3] : arg1[3];
663 }
664
665
666 static void do_MIN( struct arb_vp_machine *m, union instruction op )
667 {
668 GLfloat *result = m->reg[op.vec.dst];
669 const GLfloat *arg0 = m->reg[op.vec.arg0];
670 const GLfloat *arg1 = m->reg[op.vec.arg1];
671
672 result[0] = (arg0[0] < arg1[0]) ? arg0[0] : arg1[0];
673 result[1] = (arg0[1] < arg1[1]) ? arg0[1] : arg1[1];
674 result[2] = (arg0[2] < arg1[2]) ? arg0[2] : arg1[2];
675 result[3] = (arg0[3] < arg1[3]) ? arg0[3] : arg1[3];
676 }
677
678 static void do_MOV( struct arb_vp_machine *m, union instruction op )
679 {
680 GLfloat *result = m->reg[op.vec.dst];
681 const GLfloat *arg0 = m->reg[op.vec.arg0];
682
683 result[0] = arg0[0];
684 result[1] = arg0[1];
685 result[2] = arg0[2];
686 result[3] = arg0[3];
687 }
688
689 static void do_MUL( struct arb_vp_machine *m, union instruction op )
690 {
691 GLfloat *result = m->reg[op.vec.dst];
692 const GLfloat *arg0 = m->reg[op.vec.arg0];
693 const GLfloat *arg1 = m->reg[op.vec.arg1];
694
695 result[0] = arg0[0] * arg1[0];
696 result[1] = arg0[1] * arg1[1];
697 result[2] = arg0[2] * arg1[2];
698 result[3] = arg0[3] * arg1[3];
699 }
700
701
702 static void do_POW( struct arb_vp_machine *m, union instruction op )
703 {
704 GLfloat *result = m->reg[op.scl.dst];
705 const GLfloat *arg0 = m->reg[op.scl.arg0];
706 const GLfloat *arg1 = m->reg[op.scl.arg1];
707
708 result[0] = (GLfloat)RoughApproxPower(arg0[0], arg1[0]);
709 PUFF(result);
710 }
711
712 static void do_RCP( struct arb_vp_machine *m, union instruction op )
713 {
714 GLfloat *result = m->reg[op.scl.dst];
715 const GLfloat *arg0 = m->reg[op.scl.arg0];
716
717 result[0] = 1.0F / arg0[0];
718 PUFF(result);
719 }
720
721 static void do_RSQ( struct arb_vp_machine *m, union instruction op )
722 {
723 GLfloat *result = m->reg[op.scl.dst];
724 const GLfloat *arg0 = m->reg[op.scl.arg0];
725
726 result[0] = INV_SQRTF(FABSF(arg0[0]));
727 PUFF(result);
728 }
729
730
731 static void do_SGE( struct arb_vp_machine *m, union instruction op )
732 {
733 GLfloat *result = m->reg[op.vec.dst];
734 const GLfloat *arg0 = m->reg[op.vec.arg0];
735 const GLfloat *arg1 = m->reg[op.vec.arg1];
736
737 result[0] = (arg0[0] >= arg1[0]) ? 1.0F : 0.0F;
738 result[1] = (arg0[1] >= arg1[1]) ? 1.0F : 0.0F;
739 result[2] = (arg0[2] >= arg1[2]) ? 1.0F : 0.0F;
740 result[3] = (arg0[3] >= arg1[3]) ? 1.0F : 0.0F;
741 }
742
743
744 static void do_SLT( struct arb_vp_machine *m, union instruction op )
745 {
746 GLfloat *result = m->reg[op.vec.dst];
747 const GLfloat *arg0 = m->reg[op.vec.arg0];
748 const GLfloat *arg1 = m->reg[op.vec.arg1];
749
750 result[0] = (arg0[0] < arg1[0]) ? 1.0F : 0.0F;
751 result[1] = (arg0[1] < arg1[1]) ? 1.0F : 0.0F;
752 result[2] = (arg0[2] < arg1[2]) ? 1.0F : 0.0F;
753 result[3] = (arg0[3] < arg1[3]) ? 1.0F : 0.0F;
754 }
755
756 static void do_SWZ( struct arb_vp_machine *m, union instruction op )
757 {
758 GLfloat *result = m->reg[op.swz.dst];
759 const GLfloat *arg0 = m->reg[op.swz.arg0];
760 GLuint swz = op.swz.swz;
761 GLuint neg = op.swz.neg;
762 GLuint i;
763
764 for (i = 0; i < 4; i++, swz >>= 3, neg >>= 1) {
765 switch (swz & 0x7) {
766 case SWIZZLE_ZERO: result[i] = 0.0; break;
767 case SWIZZLE_ONE: result[i] = 1.0; break;
768 default: result[i] = arg0[swz & 0x7]; break;
769 }
770 if (neg & 0x1) result[i] = -result[i];
771 }
772 }
773
774 static void do_SUB( struct arb_vp_machine *m, union instruction op )
775 {
776 GLfloat *result = m->reg[op.vec.dst];
777 const GLfloat *arg0 = m->reg[op.vec.arg0];
778 const GLfloat *arg1 = m->reg[op.vec.arg1];
779
780 result[0] = arg0[0] - arg1[0];
781 result[1] = arg0[1] - arg1[1];
782 result[2] = arg0[2] - arg1[2];
783 result[3] = arg0[3] - arg1[3];
784 }
785
786
787 static void do_XPD( struct arb_vp_machine *m, union instruction op )
788 {
789 GLfloat *result = m->reg[op.vec.dst];
790 const GLfloat *arg0 = m->reg[op.vec.arg0];
791 const GLfloat *arg1 = m->reg[op.vec.arg1];
792
793 result[0] = arg0[1] * arg1[2] - arg0[2] * arg1[1];
794 result[1] = arg0[2] * arg1[0] - arg0[0] * arg1[2];
795 result[2] = arg0[0] * arg1[1] - arg0[1] * arg1[0];
796 }
797
798 static void do_NOP( struct arb_vp_machine *m, union instruction op )
799 {
800 }
801
802 /* Some useful debugging functions:
803 */
804 static void print_reg( GLuint reg )
805 {
806 if (reg == REG_RES)
807 _mesa_printf("RES");
808 else if (reg >= REG_ARG0 && reg <= REG_ARG2)
809 _mesa_printf("ARG%d", reg - REG_ARG0);
810 else if (reg >= REG_TMP0 && reg < REG_TMP_MAX)
811 _mesa_printf("TMP%d", reg - REG_TMP0);
812 else if (reg >= REG_PAR0 && reg < REG_PAR_MAX)
813 _mesa_printf("PAR%d", reg - REG_PAR0);
814 else
815 _mesa_printf("???");
816 }
817
818 static void print_mask( GLuint mask )
819 {
820 _mesa_printf(".");
821 if (mask&0x1) _mesa_printf("x");
822 if (mask&0x2) _mesa_printf("y");
823 if (mask&0x4) _mesa_printf("z");
824 if (mask&0x8) _mesa_printf("w");
825 }
826
827 static void print_extern( GLuint file, GLuint idx )
828 {
829 static const char *reg_file[] = {
830 "TEMPORARY",
831 "INPUT",
832 "OUTPUT",
833 "LOCAL_PARAM",
834 "ENV_PARAM",
835 "NAMED_PARAM",
836 "STATE_VAR",
837 "WRITE_ONLY",
838 "ADDRESS"
839 };
840
841 _mesa_printf("%s:%d", reg_file[file], idx);
842 }
843
844
845
846 static void print_SWZ( union instruction op, const struct opcode_info *info )
847 {
848 GLuint swz = op.swz.swz;
849 GLuint neg = op.swz.neg;
850 GLuint i;
851
852 _mesa_printf("%s ", info->string);
853 print_reg(op.swz.dst);
854 _mesa_printf(", ");
855 print_reg(op.swz.arg0);
856 _mesa_printf(".");
857 for (i = 0; i < 4; i++, swz >>= 3, neg >>= 1) {
858 const char *cswz = "xyzw01??";
859 if (neg & 0x1)
860 _mesa_printf("-");
861 _mesa_printf("%c", cswz[swz&0x7]);
862 }
863 _mesa_printf("\n");
864 }
865
866 static void print_RSW( union instruction op, const struct opcode_info *info )
867 {
868 GLuint swz = op.rsw.swz;
869 GLuint neg = op.rsw.neg;
870 GLuint i;
871
872 _mesa_printf("%s ", info->string);
873 print_reg(op.rsw.dst);
874 _mesa_printf(", ");
875 print_reg(op.rsw.arg0);
876 _mesa_printf(".");
877 for (i = 0; i < 4; i++, swz >>= 2) {
878 const char *cswz = "xyzw";
879 if (neg)
880 _mesa_printf("-");
881 _mesa_printf("%c", cswz[swz&0x3]);
882 }
883 _mesa_printf("\n");
884 }
885
886
887 static void print_SCL( union instruction op, const struct opcode_info *info )
888 {
889 _mesa_printf("%s ", info->string);
890 print_reg(op.scl.dst);
891 _mesa_printf(", ");
892 print_reg(op.scl.arg0);
893 if (info->nr_args > 1) {
894 _mesa_printf(", ");
895 print_reg(op.scl.arg1);
896 }
897 _mesa_printf("\n");
898 }
899
900
901 static void print_VEC( union instruction op, const struct opcode_info *info )
902 {
903 _mesa_printf("%s ", info->string);
904 print_reg(op.vec.dst);
905 _mesa_printf(", ");
906 print_reg(op.vec.arg0);
907 if (info->nr_args > 1) {
908 _mesa_printf(", ");
909 print_reg(op.vec.arg1);
910 }
911 if (info->nr_args > 2) {
912 _mesa_printf(", ");
913 print_reg(op.vec.arg2);
914 }
915 _mesa_printf("\n");
916 }
917
918 static void print_MSK( union instruction op, const struct opcode_info *info )
919 {
920 _mesa_printf("%s ", info->string);
921 print_reg(op.msk.dst);
922 print_mask(op.msk.mask);
923 _mesa_printf(", ");
924 print_reg(op.msk.arg0);
925 _mesa_printf("\n");
926 }
927
928 static void print_IN( union instruction op, const struct opcode_info *info )
929 {
930 _mesa_printf("%s ", info->string);
931 print_reg(op.inr.reg);
932 _mesa_printf(", ");
933 print_extern(op.inr.file, op.inr.idx);
934 _mesa_printf("\n");
935 }
936
937 static void print_OUT( union instruction op, const struct opcode_info *info )
938 {
939 _mesa_printf("%s ", info->string);
940 print_extern(op.out.file, op.out.idx);
941 if (op.out.opcode == OUM)
942 print_mask(op.out.mask);
943 _mesa_printf(", ");
944 print_reg(op.out.reg);
945 _mesa_printf("\n");
946 }
947
948 static void print_NOP( union instruction op, const struct opcode_info *info )
949 {
950 }
951
952 #define NOP 0
953 #define VEC 1
954 #define SCL 2
955 #define SWZ 3
956
957 static const struct opcode_info opcode_info[] =
958 {
959 { VEC, 1, "ABS", do_ABS, print_VEC },
960 { VEC, 2, "ADD", do_ADD, print_VEC },
961 { OUT, 1, "ARL", do_ARL, print_OUT },
962 { SCL, 2, "DP3", do_DP3, print_SCL },
963 { SCL, 2, "DP4", do_DP4, print_SCL },
964 { SCL, 2, "DPH", do_DPH, print_SCL },
965 { VEC, 2, "DST", do_DST, print_VEC },
966 { NOP, 0, "END", do_NOP, print_NOP },
967 { SCL, 1, "EX2", do_EX2, print_VEC },
968 { VEC, 1, "EXP", do_EXP, print_VEC },
969 { VEC, 1, "FLR", do_FLR, print_VEC },
970 { VEC, 1, "FRC", do_FRC, print_VEC },
971 { SCL, 1, "LG2", do_LG2, print_VEC },
972 { VEC, 1, "LIT", do_LIT, print_VEC },
973 { VEC, 1, "LOG", do_LOG, print_VEC },
974 { VEC, 3, "MAD", do_MAD, print_VEC },
975 { VEC, 2, "MAX", do_MAX, print_VEC },
976 { VEC, 2, "MIN", do_MIN, print_VEC },
977 { VEC, 1, "MOV", do_MOV, print_VEC },
978 { VEC, 2, "MUL", do_MUL, print_VEC },
979 { SCL, 2, "POW", do_POW, print_VEC },
980 { VEC, 1, "PRT", do_PRT, print_VEC }, /* PRINT */
981 { NOP, 1, "RCC", do_NOP, print_NOP },
982 { SCL, 1, "RCP", do_RCP, print_VEC },
983 { SCL, 1, "RSQ", do_RSQ, print_VEC },
984 { VEC, 2, "SGE", do_SGE, print_VEC },
985 { VEC, 2, "SLT", do_SLT, print_VEC },
986 { VEC, 2, "SUB", do_SUB, print_VEC },
987 { SWZ, 1, "SWZ", do_SWZ, print_SWZ },
988 { VEC, 2, "XPD", do_XPD, print_VEC },
989 { IN4, 1, "IN1", do_IN1, print_IN }, /* Internals */
990 { IN4, 1, "IN2", do_IN2, print_IN },
991 { IN4, 1, "IN3", do_IN3, print_IN },
992 { IN4, 1, "IN4", do_IN4, print_IN },
993 { OUT, 1, "OUT", do_OUT, print_OUT },
994 { OUT, 1, "OUM", do_OUM, print_OUT },
995 { SWZ, 1, "RSW", do_RSW, print_RSW },
996 { MSK, 1, "MSK", do_MSK, print_MSK },
997 { IN4, 1, "PAR", do_PAR, print_IN },
998 { IN4, 1, "PRL", do_PRL, print_IN },
999 };
1000
1001
1002 static GLuint cvp_load_reg( struct compilation *cp,
1003 GLuint file,
1004 GLuint index,
1005 GLuint rel )
1006 {
1007 GLuint i, op;
1008
1009 if (file == PROGRAM_TEMPORARY)
1010 return index + REG_TMP0;
1011
1012 /* Don't try to cache relatively addressed values yet:
1013 */
1014 if (!rel) {
1015 for (i = 0; i < REG_PAR_NR; i++) {
1016 if ((cp->par_active & (1<<i)) &&
1017 cp->reg[i].file == file &&
1018 cp->reg[i].idx == index) {
1019 cp->par_protected |= (1<<i);
1020 return i + REG_PAR0;
1021 }
1022 }
1023 }
1024
1025 /* Not already loaded, so identify a slot and load it.
1026 * TODO: preload these values once only!
1027 * TODO: better eviction strategy!
1028 */
1029 if (cp->par_active == ~0) {
1030 assert(cp->par_protected != ~0);
1031 cp->par_active = cp->par_protected;
1032 }
1033
1034 i = ffs(~cp->par_active);
1035 assert(i);
1036 i--;
1037
1038
1039 if (file == PROGRAM_INPUT)
1040 op = IN1 + cp->VB->AttribPtr[index]->size - 1;
1041 else if (rel)
1042 op = PRL;
1043 else
1044 op = PAR;
1045
1046 cp->csr->dword = 0;
1047 cp->csr->inr.opcode = op;
1048 cp->csr->inr.reg = i + REG_PAR0;
1049 cp->csr->inr.file = file;
1050 cp->csr->inr.idx = index;
1051 cp->csr++;
1052
1053 cp->reg[i].file = file;
1054 cp->reg[i].idx = index;
1055 cp->par_protected |= (1<<i);
1056 cp->par_active |= (1<<i);
1057 return i + REG_PAR0;
1058 }
1059
1060 static void cvp_release_regs( struct compilation *cp )
1061 {
1062 cp->par_protected = 0;
1063 }
1064
1065
1066
1067 static GLuint cvp_emit_arg( struct compilation *cp,
1068 const struct vp_src_register *src,
1069 GLuint arg )
1070 {
1071 GLuint reg = cvp_load_reg( cp, src->File, src->Index, src->RelAddr );
1072 union instruction rsw, noop;
1073
1074 /* Emit any necessary swizzling.
1075 */
1076 rsw.dword = 0;
1077 rsw.rsw.neg = src->Negate ? 1 : 0;
1078 rsw.rsw.swz = ((GET_SWZ(src->Swizzle, 0) << 0) |
1079 (GET_SWZ(src->Swizzle, 1) << 2) |
1080 (GET_SWZ(src->Swizzle, 2) << 4) |
1081 (GET_SWZ(src->Swizzle, 3) << 6));
1082
1083 noop.dword = 0;
1084 noop.rsw.neg = 0;
1085 noop.rsw.swz = ((0<<0) |
1086 (1<<2) |
1087 (2<<4) |
1088 (3<<6));
1089
1090 if (rsw.dword != noop.dword) {
1091 GLuint rsw_reg = arg;
1092 cp->csr->dword = rsw.dword;
1093 cp->csr->rsw.opcode = RSW;
1094 cp->csr->rsw.arg0 = reg;
1095 cp->csr->rsw.dst = rsw_reg;
1096 cp->csr++;
1097 return rsw_reg;
1098 }
1099 else
1100 return reg;
1101 }
1102
1103 static GLuint cvp_choose_result( struct compilation *cp,
1104 const struct vp_dst_register *dst,
1105 union instruction *fixup,
1106 GLuint maxreg)
1107 {
1108 GLuint mask = dst->WriteMask;
1109
1110 if (dst->File == PROGRAM_TEMPORARY) {
1111
1112 /* Optimization: When writing (with a writemask) to an undefined
1113 * value for the first time, the writemask may be ignored. In
1114 * practise this means that the MSK instruction to implement the
1115 * writemask can be dropped.
1116 */
1117 if (dst->Index < maxreg &&
1118 (mask == 0xf || !(cp->tmp_active & (1<<dst->Index)))) {
1119 fixup->dword = 0;
1120 cp->tmp_active |= (1<<dst->Index);
1121 return REG_TMP0 + dst->Index;
1122 }
1123 else if (mask != 0xf) {
1124 fixup->msk.opcode = MSK;
1125 fixup->msk.arg0 = REG_RES;
1126 fixup->msk.dst = REG_TMP0 + dst->Index;
1127 fixup->msk.mask = mask;
1128 cp->tmp_active |= (1<<dst->Index);
1129 return REG_RES;
1130 }
1131 else {
1132 fixup->vec.opcode = VP_OPCODE_MOV;
1133 fixup->vec.arg0 = REG_RES;
1134 fixup->vec.dst = REG_TMP0 + dst->Index;
1135 cp->tmp_active |= (1<<dst->Index);
1136 return REG_RES;
1137 }
1138 }
1139 else {
1140 assert(dst->File == PROGRAM_OUTPUT);
1141 fixup->out.opcode = (mask == 0xf) ? OUT : OUM;
1142 fixup->out.reg = REG_RES;
1143 fixup->out.file = dst->File;
1144 fixup->out.idx = dst->Index;
1145 fixup->out.mask = mask;
1146 return REG_RES;
1147 }
1148 }
1149
1150
1151 static void cvp_emit_inst( struct compilation *cp,
1152 const struct vp_instruction *inst )
1153 {
1154 const struct opcode_info *info = &opcode_info[inst->Opcode];
1155 union instruction fixup;
1156 GLuint reg[3];
1157 GLuint result, i;
1158
1159 /* Need to handle SWZ, ARL specially.
1160 */
1161 switch (info->type) {
1162 case OUT:
1163 assert(inst->Opcode == VP_OPCODE_ARL);
1164 reg[0] = cvp_emit_arg( cp, &inst->SrcReg[0], REG_ARG0 );
1165
1166 cp->csr->dword = 0;
1167 cp->csr->out.opcode = inst->Opcode;
1168 cp->csr->out.reg = reg[0];
1169 cp->csr->out.file = PROGRAM_ADDRESS;
1170 cp->csr->out.idx = 0;
1171 break;
1172 case SWZ:
1173 assert(inst->Opcode == VP_OPCODE_SWZ);
1174 result = cvp_choose_result( cp, &inst->DstReg, &fixup, REG_SWZDST_MAX );
1175
1176 reg[0] = cvp_emit_arg( cp, &inst->SrcReg[0], REG_ARG0 );
1177
1178 cp->csr->dword = 0;
1179 cp->csr->swz.opcode = VP_OPCODE_SWZ;
1180 cp->csr->swz.arg0 = reg[0];
1181 cp->csr->swz.dst = result;
1182 cp->csr->swz.neg = inst->SrcReg[0].Negate;
1183 cp->csr->swz.swz = inst->SrcReg[0].Swizzle;
1184 cp->csr++;
1185
1186 if (result == REG_RES) {
1187 cp->csr->dword = fixup.dword;
1188 cp->csr++;
1189 }
1190 break;
1191
1192 case VEC:
1193 case SCL: /* for now */
1194 result = cvp_choose_result( cp, &inst->DstReg, &fixup, REG_MAX );
1195
1196 reg[0] = reg[1] = reg[2] = 0;
1197
1198 for (i = 0; i < info->nr_args; i++)
1199 reg[i] = cvp_emit_arg( cp, &inst->SrcReg[i], REG_ARG0 + i );
1200
1201 cp->csr->dword = 0;
1202 cp->csr->vec.opcode = inst->Opcode;
1203 cp->csr->vec.arg0 = reg[0];
1204 cp->csr->vec.arg1 = reg[1];
1205 cp->csr->vec.arg2 = reg[2];
1206 cp->csr->vec.dst = result;
1207 cp->csr++;
1208
1209 if (result == REG_RES) {
1210 cp->csr->dword = fixup.dword;
1211 cp->csr++;
1212 }
1213 break;
1214
1215
1216 case NOP:
1217 break;
1218
1219 default:
1220 assert(0);
1221 break;
1222 }
1223
1224 cvp_release_regs( cp );
1225 }
1226
1227
1228 static void compile_vertex_program( struct arb_vp_machine *m,
1229 const struct vertex_program *program )
1230 {
1231 struct compilation cp;
1232 GLuint i;
1233
1234 /* Initialize cp:
1235 */
1236 memset(&cp, 0, sizeof(cp));
1237 cp.VB = m->VB;
1238 cp.csr = m->store;
1239
1240 /* Compile instructions:
1241 */
1242 for (i = 0; i < program->Base.NumInstructions; i++) {
1243 cvp_emit_inst(&cp, &program->Instructions[i]);
1244 }
1245
1246 /* Finish up:
1247 */
1248 m->instructions = m->store;
1249 m->nr_instructions = cp.csr - m->store;
1250
1251
1252 /* Print/disassemble:
1253 */
1254 if (DISASSEM) {
1255 for (i = 0; i < m->nr_instructions; i++) {
1256 union instruction insn = m->instructions[i];
1257 const struct opcode_info *info = &opcode_info[insn.vec.opcode];
1258 info->print( insn, info );
1259 }
1260 _mesa_printf("\n\n");
1261 }
1262 }
1263
1264
1265
1266
1267 /* ----------------------------------------------------------------------
1268 * Execution
1269 */
1270 static void userclip( GLcontext *ctx,
1271 GLvector4f *clip,
1272 GLubyte *clipmask,
1273 GLubyte *clipormask,
1274 GLubyte *clipandmask )
1275 {
1276 GLuint p;
1277
1278 for (p = 0; p < ctx->Const.MaxClipPlanes; p++)
1279 if (ctx->Transform.ClipPlanesEnabled & (1 << p)) {
1280 GLuint nr, i;
1281 const GLfloat a = ctx->Transform._ClipUserPlane[p][0];
1282 const GLfloat b = ctx->Transform._ClipUserPlane[p][1];
1283 const GLfloat c = ctx->Transform._ClipUserPlane[p][2];
1284 const GLfloat d = ctx->Transform._ClipUserPlane[p][3];
1285 GLfloat *coord = (GLfloat *)clip->data;
1286 GLuint stride = clip->stride;
1287 GLuint count = clip->count;
1288
1289 for (nr = 0, i = 0 ; i < count ; i++) {
1290 GLfloat dp = (coord[0] * a +
1291 coord[1] * b +
1292 coord[2] * c +
1293 coord[3] * d);
1294
1295 if (dp < 0) {
1296 nr++;
1297 clipmask[i] |= CLIP_USER_BIT;
1298 }
1299
1300 STRIDE_F(coord, stride);
1301 }
1302
1303 if (nr > 0) {
1304 *clipormask |= CLIP_USER_BIT;
1305 if (nr == count) {
1306 *clipandmask |= CLIP_USER_BIT;
1307 return;
1308 }
1309 }
1310 }
1311 }
1312
1313
1314 static GLboolean do_ndc_cliptest( struct arb_vp_machine *m )
1315 {
1316 GLcontext *ctx = m->ctx;
1317 TNLcontext *tnl = TNL_CONTEXT(ctx);
1318 struct vertex_buffer *VB = m->VB;
1319
1320 /* Cliptest and perspective divide. Clip functions must clear
1321 * the clipmask.
1322 */
1323 m->ormask = 0;
1324 m->andmask = CLIP_ALL_BITS;
1325
1326 if (tnl->NeedNdcCoords) {
1327 VB->NdcPtr =
1328 _mesa_clip_tab[VB->ClipPtr->size]( VB->ClipPtr,
1329 &m->ndcCoords,
1330 m->clipmask,
1331 &m->ormask,
1332 &m->andmask );
1333 }
1334 else {
1335 VB->NdcPtr = NULL;
1336 _mesa_clip_np_tab[VB->ClipPtr->size]( VB->ClipPtr,
1337 NULL,
1338 m->clipmask,
1339 &m->ormask,
1340 &m->andmask );
1341 }
1342
1343 if (m->andmask) {
1344 /* All vertices are outside the frustum */
1345 return GL_FALSE;
1346 }
1347
1348 /* Test userclip planes. This contributes to VB->ClipMask.
1349 */
1350 if (ctx->Transform.ClipPlanesEnabled && !ctx->VertexProgram._Enabled) {
1351 userclip( ctx,
1352 VB->ClipPtr,
1353 m->clipmask,
1354 &m->ormask,
1355 &m->andmask );
1356
1357 if (m->andmask) {
1358 return GL_FALSE;
1359 }
1360 }
1361
1362 VB->ClipAndMask = m->andmask;
1363 VB->ClipOrMask = m->ormask;
1364 VB->ClipMask = m->clipmask;
1365
1366 return GL_TRUE;
1367 }
1368
1369
1370
1371
1372 /**
1373 * Execute the given vertex program.
1374 *
1375 * TODO: Integrate the t_vertex.c code here, to build machine vertices
1376 * directly at this point.
1377 *
1378 * TODO: Eliminate the VB struct entirely and just use
1379 * struct arb_vertex_machine.
1380 */
1381 static GLboolean
1382 run_arb_vertex_program(GLcontext *ctx, struct tnl_pipeline_stage *stage)
1383 {
1384 struct vertex_program *program = (ctx->VertexProgram._Enabled ?
1385 ctx->VertexProgram.Current :
1386 ctx->_TnlProgram);
1387 struct vertex_buffer *VB = &TNL_CONTEXT(ctx)->vb;
1388 struct arb_vp_machine *m = ARB_VP_MACHINE(stage);
1389 GLuint i, j, outputs = program->OutputsWritten;
1390
1391 if (program->Parameters) {
1392 _mesa_load_state_parameters(ctx, program->Parameters);
1393 m->File[PROGRAM_STATE_VAR] = program->Parameters->ParameterValues;
1394 }
1395
1396 /* Run the actual program:
1397 */
1398 for (m->vtx_nr = 0; m->vtx_nr < VB->Count; m->vtx_nr++) {
1399 for (j = 0; j < m->nr_instructions; j++) {
1400 union instruction inst = m->instructions[j];
1401 opcode_info[inst.vec.opcode].func( m, inst );
1402 }
1403 }
1404
1405 /* Setup the VB pointers so that the next pipeline stages get
1406 * their data from the right place (the program output arrays).
1407 *
1408 * TODO: 1) Have tnl use these RESULT values for outputs rather
1409 * than trying to shoe-horn inputs and outputs into one set of
1410 * values.
1411 *
1412 * TODO: 2) Integrate t_vertex.c so that we just go straight ahead
1413 * and build machine vertices here.
1414 */
1415 VB->ClipPtr = &m->attribs[VERT_RESULT_HPOS];
1416 VB->ClipPtr->count = VB->Count;
1417
1418 if (outputs & (1<<VERT_RESULT_COL0)) {
1419 VB->ColorPtr[0] = &m->attribs[VERT_RESULT_COL0];
1420 VB->AttribPtr[VERT_ATTRIB_COLOR0] = VB->ColorPtr[0];
1421 }
1422
1423 if (outputs & (1<<VERT_RESULT_BFC0)) {
1424 VB->ColorPtr[1] = &m->attribs[VERT_RESULT_BFC0];
1425 }
1426
1427 if (outputs & (1<<VERT_RESULT_COL1)) {
1428 VB->SecondaryColorPtr[0] = &m->attribs[VERT_RESULT_COL1];
1429 VB->AttribPtr[VERT_ATTRIB_COLOR1] = VB->SecondaryColorPtr[0];
1430 }
1431
1432 if (outputs & (1<<VERT_RESULT_BFC1)) {
1433 VB->SecondaryColorPtr[1] = &m->attribs[VERT_RESULT_BFC1];
1434 }
1435
1436 if (outputs & (1<<VERT_RESULT_FOGC)) {
1437 VB->FogCoordPtr = &m->attribs[VERT_RESULT_FOGC];
1438 VB->AttribPtr[VERT_ATTRIB_FOG] = VB->FogCoordPtr;
1439 }
1440
1441 if (outputs & (1<<VERT_RESULT_PSIZ)) {
1442 VB->PointSizePtr = &m->attribs[VERT_RESULT_PSIZ];
1443 VB->AttribPtr[_TNL_ATTRIB_POINTSIZE] = &m->attribs[VERT_RESULT_PSIZ];
1444 }
1445
1446 for (i = 0; i < ctx->Const.MaxTextureUnits; i++) {
1447 if (outputs & (1<<(VERT_RESULT_TEX0+i))) {
1448 VB->TexCoordPtr[i] = &m->attribs[VERT_RESULT_TEX0 + i];
1449 VB->AttribPtr[VERT_ATTRIB_TEX0+i] = VB->TexCoordPtr[i];
1450 }
1451 }
1452
1453 #if 0
1454 for (i = 0; i < VB->Count; i++) {
1455 printf("Out %d: %f %f %f %f %f %f %f %f\n", i,
1456 VEC_ELT(VB->ClipPtr, GLfloat, i)[0],
1457 VEC_ELT(VB->ClipPtr, GLfloat, i)[1],
1458 VEC_ELT(VB->ClipPtr, GLfloat, i)[2],
1459 VEC_ELT(VB->ClipPtr, GLfloat, i)[3],
1460 VEC_ELT(VB->ColorPtr[0], GLfloat, i)[0],
1461 VEC_ELT(VB->ColorPtr[0], GLfloat, i)[1],
1462 VEC_ELT(VB->ColorPtr[0], GLfloat, i)[2],
1463 VEC_ELT(VB->ColorPtr[0], GLfloat, i)[3]);
1464 }
1465 #endif
1466
1467 /* Perform NDC and cliptest operations:
1468 */
1469 return do_ndc_cliptest(m);
1470 }
1471
1472
1473 static void
1474 validate_vertex_program( GLcontext *ctx, struct tnl_pipeline_stage *stage )
1475 {
1476 struct arb_vp_machine *m = ARB_VP_MACHINE(stage);
1477 struct vertex_program *program =
1478 (ctx->VertexProgram._Enabled ? ctx->VertexProgram.Current : 0);
1479
1480 #if TNL_FIXED_FUNCTION_PROGRAM
1481 if (!program) {
1482 program = ctx->_TnlProgram;
1483 }
1484 #endif
1485
1486 if (program) {
1487 compile_vertex_program( m, program );
1488
1489 /* Grab the state GL state and put into registers:
1490 */
1491 m->File[PROGRAM_LOCAL_PARAM] = program->Base.LocalParams;
1492 m->File[PROGRAM_ENV_PARAM] = ctx->VertexProgram.Parameters;
1493 m->File[PROGRAM_STATE_VAR] = 0;
1494 }
1495 }
1496
1497
1498
1499
1500
1501
1502
1503 /**
1504 * Called the first time stage->run is called. In effect, don't
1505 * allocate data until the first time the stage is run.
1506 */
1507 static GLboolean init_vertex_program( GLcontext *ctx,
1508 struct tnl_pipeline_stage *stage )
1509 {
1510 TNLcontext *tnl = TNL_CONTEXT(ctx);
1511 struct vertex_buffer *VB = &(tnl->vb);
1512 struct arb_vp_machine *m;
1513 const GLuint size = VB->Size;
1514 GLuint i;
1515
1516 stage->privatePtr = MALLOC(sizeof(*m));
1517 m = ARB_VP_MACHINE(stage);
1518 if (!m)
1519 return GL_FALSE;
1520
1521 /* arb_vertex_machine struct should subsume the VB:
1522 */
1523 m->VB = VB;
1524 m->ctx = ctx;
1525
1526 /* Allocate arrays of vertex output values */
1527 for (i = 0; i < VERT_RESULT_MAX; i++) {
1528 _mesa_vector4f_alloc( &m->attribs[i], 0, size, 32 );
1529 m->attribs[i].size = 4;
1530 }
1531
1532 /* a few other misc allocations */
1533 _mesa_vector4f_alloc( &m->ndcCoords, 0, size, 32 );
1534 m->clipmask = (GLubyte *) ALIGN_MALLOC(sizeof(GLubyte)*size, 32 );
1535
1536
1537 #if TNL_FIXED_FUNCTION_PROGRAM
1538 _mesa_allow_light_in_model( ctx, GL_FALSE );
1539 #endif
1540
1541
1542 return GL_TRUE;
1543 }
1544
1545
1546
1547
1548 /**
1549 * Destructor for this pipeline stage.
1550 */
1551 static void dtr( struct tnl_pipeline_stage *stage )
1552 {
1553 struct arb_vp_machine *m = ARB_VP_MACHINE(stage);
1554
1555 if (m) {
1556 GLuint i;
1557
1558 /* free the vertex program result arrays */
1559 for (i = 0; i < VERT_RESULT_MAX; i++)
1560 _mesa_vector4f_free( &m->attribs[i] );
1561
1562 /* free misc arrays */
1563 _mesa_vector4f_free( &m->ndcCoords );
1564 ALIGN_FREE( m->clipmask );
1565
1566 FREE( m );
1567 stage->privatePtr = NULL;
1568 }
1569 }
1570
1571 /**
1572 * Public description of this pipeline stage.
1573 */
1574 const struct tnl_pipeline_stage _tnl_arb_vertex_program_stage =
1575 {
1576 "vertex-program",
1577 NULL, /* private_data */
1578 init_vertex_program, /* create */
1579 dtr, /* destroy */
1580 validate_vertex_program, /* validate */
1581 run_arb_vertex_program /* run */
1582 };