New software implementation of ARB_vertex_program. Similar in speed
[mesa.git] / src / mesa / tnl / t_vb_arbprogram.c
1 /*
2 * Mesa 3-D graphics library
3 * Version: 6.3
4 *
5 * Copyright (C) 1999-2004 Brian Paul All Rights Reserved.
6 *
7 * Permission is hereby granted, free of charge, to any person obtaining a
8 * copy of this software and associated documentation files (the "Software"),
9 * to deal in the Software without restriction, including without limitation
10 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
11 * and/or sell copies of the Software, and to permit persons to whom the
12 * Software is furnished to do so, subject to the following conditions:
13 *
14 * The above copyright notice and this permission notice shall be included
15 * in all copies or substantial portions of the Software.
16 *
17 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
18 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
19 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
20 * BRIAN PAUL BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN
21 * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
22 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
23 */
24
25 /**
26 * \file t_arb_program.c
27 * Compile vertex programs to an intermediate representation.
28 * Execute vertex programs over a buffer of vertices.
29 * \author Keith Whitwell, Brian Paul
30 */
31
32 #include "glheader.h"
33 #include "context.h"
34 #include "imports.h"
35 #include "macros.h"
36 #include "mtypes.h"
37 #include "arbprogparse.h"
38 #include "program.h"
39 #include "math/m_matrix.h"
40 #include "math/m_translate.h"
41 #include "t_context.h"
42 #include "t_pipeline.h"
43
44
45
46
47 /* New, internal instructions:
48 */
49 #define IN1 (VP_OPCODE_XPD+1)
50 #define IN2 (IN1+1) /* intput-to-reg MOV */
51 #define IN3 (IN1+2)
52 #define IN4 (IN1+3)
53 #define OUT (IN1+4) /* reg-to-output MOV */
54 #define OUM (IN1+5) /* reg-to-output MOV with mask */
55 #define RSW (IN1+6)
56 #define MSK (IN1+7) /* reg-to-reg MOV with mask */
57 #define PAR (IN1+8) /* parameter-to-reg MOV */
58 #define PRL (IN1+9) /* parameter-to-reg MOV */
59
60
61 /* Layout of register file:
62
63 0 -- Scratch (Arg0)
64 1 -- Scratch (Arg1)
65 2 -- Scratch (Arg2)
66 3 -- Scratch (Result)
67 4 -- Program Temporary 0
68 ..
69 31 -- Program Temporary 27
70 32 -- State/Input/Const shadow 0
71 ..
72 63 -- State/Input/Const shadow 31
73
74 */
75
76
77
78 #define REG_ARG0 0
79 #define REG_ARG1 1
80 #define REG_ARG2 2
81 #define REG_RES 3
82 #define REG_TMP0 4
83 #define REG_TMP_MAX 32
84 #define REG_TMP_NR (REG_TMP_MAX-REG_TMP0)
85 #define REG_PAR0 32
86 #define REG_PAR_MAX 64
87 #define REG_PAR_NR (REG_PAR_MAX-REG_PAR0)
88
89 #define REG_MAX 64
90 #define REG_SWZDST_MAX 16
91
92 /* ARB_vp instructions are broken down into one or more of the
93 * following micro-instructions, each representable in a 32 bit packed
94 * structure.
95 */
96
97
98 union instruction {
99 struct {
100 GLuint opcode:6;
101 GLuint dst:5;
102 GLuint arg0:6;
103 GLuint arg1:6;
104 GLuint elt:2; /* x,y,z or w */
105 GLuint pad:7;
106 } scl;
107
108
109 struct {
110 GLuint opcode:6;
111 GLuint dst:5;
112 GLuint arg0:6;
113 GLuint arg1:6;
114 GLuint arg2:6;
115 GLuint pad:3;
116 } vec;
117
118 struct {
119 GLuint opcode:6;
120 GLuint dst:4; /* NOTE! REG 0..16 only! */
121 GLuint arg0:6;
122 GLuint neg:4;
123 GLuint swz:12;
124 } swz;
125
126 struct {
127 GLuint opcode:6;
128 GLuint dst:6;
129 GLuint arg0:6;
130 GLuint neg:1; /* 1 bit only */
131 GLuint swz:8; /* xyzw only */
132 GLuint pad:5;
133 } rsw;
134
135 struct {
136 GLuint opcode:6;
137 GLuint reg:6;
138 GLuint file:5;
139 GLuint idx:8; /* plenty? */
140 GLuint rel:1;
141 GLuint pad:6;
142 } inr;
143
144
145 struct {
146 GLuint opcode:6;
147 GLuint reg:6;
148 GLuint file:5;
149 GLuint idx:8; /* plenty? */
150 GLuint mask:4;
151 GLuint pad:3;
152 } out;
153
154 struct {
155 GLuint opcode:6;
156 GLuint dst:5;
157 GLuint arg0:6;
158 GLuint mask:4;
159 GLuint pad:11;
160 } msk;
161
162 GLuint dword;
163 };
164
165
166
167 struct compilation {
168 struct {
169 GLuint file:5;
170 GLuint idx:8;
171 } reg[REG_PAR_NR];
172
173 GLuint par_active;
174 GLuint par_protected;
175 GLuint tmp_active;
176
177 union instruction *csr;
178
179 struct vertex_buffer *VB; /* for input sizes! */
180 };
181
182 /*--------------------------------------------------------------------------- */
183
184 /*!
185 * Private storage for the vertex program pipeline stage.
186 */
187 struct arb_vp_machine {
188 GLfloat reg[REG_MAX][4]; /* Program temporaries, shadowed parameters and inputs,
189 plus some internal values */
190
191 GLfloat (*File[8])[4]; /* Src/Dest for PAR/PRL instructions. */
192 GLint AddressReg;
193
194 union instruction store[1024];
195 /* GLuint store_size; */
196
197 union instruction *instructions;
198 GLint nr_instructions;
199
200 GLvector4f attribs[VERT_RESULT_MAX]; /**< result vectors. */
201 GLvector4f ndcCoords; /**< normalized device coords */
202 GLubyte *clipmask; /**< clip flags */
203 GLubyte ormask, andmask; /**< for clipping */
204
205 GLuint vtx_nr; /**< loop counter */
206
207 struct vertex_buffer *VB;
208 GLcontext *ctx;
209 };
210
211
212 /*--------------------------------------------------------------------------- */
213
214 struct opcode_info {
215 GLuint type;
216 GLuint nr_args;
217 const char *string;
218 void (*func)( struct arb_vp_machine *, union instruction );
219 void (*print)( union instruction , const struct opcode_info * );
220 };
221
222
223 #define ARB_VP_MACHINE(stage) ((struct arb_vp_machine *)(stage->privatePtr))
224
225
226
227 /**
228 * Set x to positive or negative infinity.
229 *
230 * XXX: FIXME - type punning.
231 */
232 #if defined(USE_IEEE) || defined(_WIN32)
233 #define SET_POS_INFINITY(x) ( *((GLuint *) (void *)&x) = 0x7F800000 )
234 #define SET_NEG_INFINITY(x) ( *((GLuint *) (void *)&x) = 0xFF800000 )
235 #elif defined(VMS)
236 #define SET_POS_INFINITY(x) x = __MAXFLOAT
237 #define SET_NEG_INFINITY(x) x = -__MAXFLOAT
238 #define IS_INF_OR_NAN(t) ((t) == __MAXFLOAT)
239 #else
240 #define SET_POS_INFINITY(x) x = (GLfloat) HUGE_VAL
241 #define SET_NEG_INFINITY(x) x = (GLfloat) -HUGE_VAL
242 #endif
243
244 #define FREXPF(a,b) frexpf(a,b)
245
246 #define PUFF(x) ((x)[1] = (x)[2] = (x)[3] = (x)[0])
247
248 /* FIXME: more type punning (despite use of fi_type...)
249 */
250 #define SET_FLOAT_BITS(x, bits) ((fi_type *) (void *) &(x))->i = bits
251
252
253 static GLfloat RoughApproxLog2(GLfloat t)
254 {
255 return LOG2(t);
256 }
257
258 static GLfloat RoughApproxPow2(GLfloat t)
259 {
260 GLfloat q;
261 #ifdef USE_IEEE
262 GLint ii = (GLint) t;
263 ii = (ii < 23) + 0x3f800000;
264 SET_FLOAT_BITS(q, ii);
265 q = *((GLfloat *) (void *)&ii);
266 #else
267 q = (GLfloat) pow(2.0, floor_t0);
268 #endif
269 return q;
270 }
271
272 static GLfloat RoughApproxPower(GLfloat x, GLfloat y)
273 {
274 #if 0
275 return (GLfloat) exp(y * log(x));
276 #else
277 return (GLfloat) _mesa_pow(x, y);
278 #endif
279 }
280
281
282 static const GLfloat ZeroVec[4] = { 0.0F, 0.0F, 0.0F, 0.0F };
283
284
285
286
287 /**
288 * This is probably the least-optimal part of the process, have to
289 * multiply out the stride to access each incoming input value.
290 */
291 static GLfloat *get_input( struct arb_vp_machine *m, GLuint index )
292 {
293 return VEC_ELT(m->VB->AttribPtr[index], GLfloat, m->vtx_nr);
294 }
295
296
297 /**
298 * Fetch a 4-element float vector from the given source register.
299 * Deal with the possibility that not all elements are present.
300 */
301 static void do_IN1( struct arb_vp_machine *m, union instruction op )
302 {
303 GLfloat *result = m->reg[op.inr.reg];
304 const GLfloat *src = get_input(m, op.inr.idx);
305
306 result[0] = src[0];
307 result[1] = 0;
308 result[2] = 0;
309 result[3] = 1;
310 }
311
312 static void do_IN2( struct arb_vp_machine *m, union instruction op )
313 {
314 GLfloat *result = m->reg[op.inr.reg];
315 const GLfloat *src = get_input(m, op.inr.idx);
316
317 result[0] = src[0];
318 result[1] = src[1];
319 result[2] = 0;
320 result[3] = 1;
321 }
322
323 static void do_IN3( struct arb_vp_machine *m, union instruction op )
324 {
325 GLfloat *result = m->reg[op.inr.reg];
326 const GLfloat *src = get_input(m, op.inr.idx);
327
328 result[0] = src[0];
329 result[1] = src[1];
330 result[2] = src[2];
331 result[3] = 1;
332 }
333
334 static void do_IN4( struct arb_vp_machine *m, union instruction op )
335 {
336 GLfloat *result = m->reg[op.inr.reg];
337 const GLfloat *src = get_input(m, op.inr.idx);
338
339 result[0] = src[0];
340 result[1] = src[1];
341 result[2] = src[2];
342 result[3] = src[3];
343 }
344
345 /**
346 * Perform a reduced swizzle:
347 */
348 static void do_RSW( struct arb_vp_machine *m, union instruction op )
349 {
350 GLfloat *result = m->reg[op.rsw.dst];
351 const GLfloat *arg0 = m->reg[op.rsw.arg0];
352 GLuint swz = op.rsw.swz;
353 GLuint neg = op.rsw.neg;
354 GLuint i;
355
356 if (neg)
357 for (i = 0; i < 4; i++, swz >>= 2)
358 result[i] = -arg0[swz & 0x3];
359 else
360 for (i = 0; i < 4; i++, swz >>= 2)
361 result[i] = arg0[swz & 0x3];
362 }
363
364
365
366 /**
367 * Store 4 floats into an external address.
368 */
369 static void do_OUM( struct arb_vp_machine *m, union instruction op )
370 {
371 GLfloat *dst = m->attribs[op.out.idx].data[m->vtx_nr];
372 const GLfloat *value = m->reg[op.out.reg];
373
374 if (op.out.mask & 0x1) dst[0] = value[0];
375 if (op.out.mask & 0x2) dst[1] = value[1];
376 if (op.out.mask & 0x4) dst[2] = value[2];
377 if (op.out.mask & 0x8) dst[3] = value[3];
378 }
379
380 static void do_OUT( struct arb_vp_machine *m, union instruction op )
381 {
382 GLfloat *dst = m->attribs[op.out.idx].data[m->vtx_nr];
383 const GLfloat *value = m->reg[op.out.reg];
384
385 dst[0] = value[0];
386 dst[1] = value[1];
387 dst[2] = value[2];
388 dst[3] = value[3];
389 }
390
391 /* Register-to-register MOV with writemask.
392 */
393 static void do_MSK( struct arb_vp_machine *m, union instruction op )
394 {
395 GLfloat *dst = m->reg[op.msk.dst];
396 const GLfloat *arg0 = m->reg[op.msk.arg0];
397
398 if (op.msk.mask & 0x1) dst[0] = arg0[0];
399 if (op.msk.mask & 0x2) dst[1] = arg0[1];
400 if (op.msk.mask & 0x4) dst[2] = arg0[2];
401 if (op.msk.mask & 0x8) dst[3] = arg0[3];
402 }
403
404
405 /* Retreive parameters and other constant values:
406 */
407 static void do_PAR( struct arb_vp_machine *m, union instruction op )
408 {
409 GLfloat *result = m->reg[op.inr.reg];
410 const GLfloat *src = m->File[op.inr.file][op.inr.idx];
411
412 result[0] = src[0];
413 result[1] = src[1];
414 result[2] = src[2];
415 result[3] = src[3];
416 }
417
418
419 #define RELADDR_MASK MAX_NV_VERTEX_PROGRAM_PARAMS
420
421 static void do_PRL( struct arb_vp_machine *m, union instruction op )
422 {
423 GLfloat *result = m->reg[op.inr.reg];
424 GLuint index = (op.inr.idx + m->AddressReg) & RELADDR_MASK;
425 const GLfloat *src = m->File[op.inr.file][index];
426
427 result[0] = src[0];
428 result[1] = src[1];
429 result[2] = src[2];
430 result[3] = src[3];
431 }
432
433 static void do_PRT( struct arb_vp_machine *m, union instruction op )
434 {
435 const GLfloat *arg0 = m->reg[op.vec.arg0];
436
437 _mesa_printf("%d: %f %f %f %f\n", m->vtx_nr,
438 arg0[0], arg0[1], arg0[2], arg0[3]);
439 }
440
441
442 /**
443 * The traditional ALU and texturing instructions. All operate on
444 * internal registers and ignore write masks and swizzling issues.
445 */
446
447 static void do_ABS( struct arb_vp_machine *m, union instruction op )
448 {
449 GLfloat *result = m->reg[op.vec.dst];
450 const GLfloat *arg0 = m->reg[op.vec.arg0];
451
452 result[0] = (arg0[0] < 0.0) ? -arg0[0] : arg0[0];
453 result[1] = (arg0[1] < 0.0) ? -arg0[1] : arg0[1];
454 result[2] = (arg0[2] < 0.0) ? -arg0[2] : arg0[2];
455 result[3] = (arg0[3] < 0.0) ? -arg0[3] : arg0[3];
456 }
457
458 static void do_ADD( struct arb_vp_machine *m, union instruction op )
459 {
460 GLfloat *result = m->reg[op.vec.dst];
461 const GLfloat *arg0 = m->reg[op.vec.arg0];
462 const GLfloat *arg1 = m->reg[op.vec.arg1];
463
464 result[0] = arg0[0] + arg1[0];
465 result[1] = arg0[1] + arg1[1];
466 result[2] = arg0[2] + arg1[2];
467 result[3] = arg0[3] + arg1[3];
468 }
469
470
471 static void do_ARL( struct arb_vp_machine *m, union instruction op )
472 {
473 const GLfloat *arg0 = m->reg[op.out.reg];
474 m->AddressReg = (GLint) floor(arg0[0]);
475 }
476
477
478 static void do_DP3( struct arb_vp_machine *m, union instruction op )
479 {
480 GLfloat *result = m->reg[op.scl.dst];
481 const GLfloat *arg0 = m->reg[op.scl.arg0];
482 const GLfloat *arg1 = m->reg[op.scl.arg1];
483
484 result[0] = (arg0[0] * arg1[0] +
485 arg0[1] * arg1[1] +
486 arg0[2] * arg1[2]);
487
488 PUFF(result);
489 }
490
491 static void do_DP4( struct arb_vp_machine *m, union instruction op )
492 {
493 GLfloat *result = m->reg[op.scl.dst];
494 const GLfloat *arg0 = m->reg[op.scl.arg0];
495 const GLfloat *arg1 = m->reg[op.scl.arg1];
496
497 result[0] = (arg0[0] * arg1[0] +
498 arg0[1] * arg1[1] +
499 arg0[2] * arg1[2] +
500 arg0[3] * arg1[3]);
501
502 PUFF(result);
503 }
504
505 static void do_DPH( struct arb_vp_machine *m, union instruction op )
506 {
507 GLfloat *result = m->reg[op.scl.dst];
508 const GLfloat *arg0 = m->reg[op.scl.arg0];
509 const GLfloat *arg1 = m->reg[op.scl.arg1];
510
511 result[0] = (arg0[0] * arg1[0] +
512 arg0[1] * arg1[1] +
513 arg0[2] * arg1[2] +
514 1.0 * arg1[3]);
515
516 PUFF(result);
517 }
518
519 static void do_DST( struct arb_vp_machine *m, union instruction op )
520 {
521 GLfloat *result = m->reg[op.vec.dst];
522 const GLfloat *arg0 = m->reg[op.vec.arg0];
523 const GLfloat *arg1 = m->reg[op.vec.arg1];
524
525 result[0] = 1.0F;
526 result[1] = arg0[1] * arg1[1];
527 result[2] = arg0[2];
528 result[3] = arg1[3];
529 }
530
531
532 static void do_EX2( struct arb_vp_machine *m, union instruction op )
533 {
534 GLfloat *result = m->reg[op.scl.dst];
535 const GLfloat *arg0 = m->reg[op.scl.arg0];
536
537 result[0] = (GLfloat)RoughApproxPow2(arg0[0]);
538 PUFF(result);
539 }
540
541 static void do_EXP( struct arb_vp_machine *m, union instruction op )
542 {
543 GLfloat *result = m->reg[op.vec.dst];
544 const GLfloat *arg0 = m->reg[op.vec.arg0];
545 GLfloat tmp = arg0[0];
546 GLfloat flr_tmp = FLOORF(tmp);
547
548 /* KW: previous definition of this instruction was really messed
549 * up... Maybe the nv instruction is quite different?
550 */
551 result[0] = (GLfloat) (1 << (int)flr_tmp);
552 result[1] = tmp - flr_tmp;
553 result[2] = RoughApproxPow2(tmp);
554 result[3] = 1.0F;
555 }
556
557 static void do_FLR( struct arb_vp_machine *m, union instruction op )
558 {
559 GLfloat *result = m->reg[op.vec.dst];
560 const GLfloat *arg0 = m->reg[op.vec.arg0];
561
562 result[0] = FLOORF(arg0[0]);
563 result[1] = FLOORF(arg0[1]);
564 result[2] = FLOORF(arg0[2]);
565 result[3] = FLOORF(arg0[3]);
566 }
567
568 static void do_FRC( struct arb_vp_machine *m, union instruction op )
569 {
570 GLfloat *result = m->reg[op.vec.dst];
571 const GLfloat *arg0 = m->reg[op.vec.arg0];
572
573 result[0] = arg0[0] - FLOORF(arg0[0]);
574 result[1] = arg0[1] - FLOORF(arg0[1]);
575 result[2] = arg0[2] - FLOORF(arg0[2]);
576 result[3] = arg0[3] - FLOORF(arg0[3]);
577 }
578
579 static void do_LG2( struct arb_vp_machine *m, union instruction op )
580 {
581 GLfloat *result = m->reg[op.scl.dst];
582 const GLfloat *arg0 = m->reg[op.scl.arg0];
583
584 result[0] = RoughApproxLog2(arg0[0]);
585 PUFF(result);
586 }
587
588
589
590 static void do_LIT( struct arb_vp_machine *m, union instruction op )
591 {
592 GLfloat *result = m->reg[op.vec.dst];
593 const GLfloat *arg0 = m->reg[op.vec.arg0];
594
595 const GLfloat epsilon = 1.0F / 256.0F; /* per NV spec */
596 GLfloat tmp[4];
597
598 tmp[0] = MAX2(arg0[0], 0.0F);
599 tmp[1] = MAX2(arg0[1], 0.0F);
600 tmp[3] = CLAMP(arg0[3], -(128.0F - epsilon), (128.0F - epsilon));
601
602 result[0] = 1.0;
603 result[1] = tmp[0];
604 result[2] = (tmp[0] > 0.0) ? RoughApproxPower(tmp[1], tmp[3]) : 0.0F;
605 result[3] = 1.0;
606 }
607
608
609 static void do_LOG( struct arb_vp_machine *m, union instruction op )
610 {
611 GLfloat *result = m->reg[op.vec.dst];
612 const GLfloat *arg0 = m->reg[op.vec.arg0];
613 GLfloat tmp = FABSF(arg0[0]);
614 int exponent;
615 GLfloat mantissa = FREXPF(tmp, &exponent);
616
617 result[0] = (GLfloat) (exponent - 1);
618 result[1] = 2.0 * mantissa; /* map [.5, 1) -> [1, 2) */
619 result[2] = result[0] + LOG2(result[1]);
620 result[3] = 1.0;
621 }
622
623
624 static void do_MAD( struct arb_vp_machine *m, union instruction op )
625 {
626 GLfloat *result = m->reg[op.vec.dst];
627 const GLfloat *arg0 = m->reg[op.vec.arg0];
628 const GLfloat *arg1 = m->reg[op.vec.arg1];
629 const GLfloat *arg2 = m->reg[op.vec.arg2];
630
631 result[0] = arg0[0] * arg1[0] + arg2[0];
632 result[1] = arg0[1] * arg1[1] + arg2[1];
633 result[2] = arg0[2] * arg1[2] + arg2[2];
634 result[3] = arg0[3] * arg1[3] + arg2[3];
635 }
636
637 static void do_MAX( struct arb_vp_machine *m, union instruction op )
638 {
639 GLfloat *result = m->reg[op.vec.dst];
640 const GLfloat *arg0 = m->reg[op.vec.arg0];
641 const GLfloat *arg1 = m->reg[op.vec.arg1];
642
643 result[0] = (arg0[0] > arg1[0]) ? arg0[0] : arg1[0];
644 result[1] = (arg0[1] > arg1[1]) ? arg0[1] : arg1[1];
645 result[2] = (arg0[2] > arg1[2]) ? arg0[2] : arg1[2];
646 result[3] = (arg0[3] > arg1[3]) ? arg0[3] : arg1[3];
647 }
648
649
650 static void do_MIN( struct arb_vp_machine *m, union instruction op )
651 {
652 GLfloat *result = m->reg[op.vec.dst];
653 const GLfloat *arg0 = m->reg[op.vec.arg0];
654 const GLfloat *arg1 = m->reg[op.vec.arg1];
655
656 result[0] = (arg0[0] < arg1[0]) ? arg0[0] : arg1[0];
657 result[1] = (arg0[1] < arg1[1]) ? arg0[1] : arg1[1];
658 result[2] = (arg0[2] < arg1[2]) ? arg0[2] : arg1[2];
659 result[3] = (arg0[3] < arg1[3]) ? arg0[3] : arg1[3];
660 }
661
662 static void do_MOV( struct arb_vp_machine *m, union instruction op )
663 {
664 GLfloat *result = m->reg[op.vec.dst];
665 const GLfloat *arg0 = m->reg[op.vec.arg0];
666
667 result[0] = arg0[0];
668 result[1] = arg0[1];
669 result[2] = arg0[2];
670 result[3] = arg0[3];
671 }
672
673 static void do_MUL( struct arb_vp_machine *m, union instruction op )
674 {
675 GLfloat *result = m->reg[op.vec.dst];
676 const GLfloat *arg0 = m->reg[op.vec.arg0];
677 const GLfloat *arg1 = m->reg[op.vec.arg1];
678
679 result[0] = arg0[0] * arg1[0];
680 result[1] = arg0[1] * arg1[1];
681 result[2] = arg0[2] * arg1[2];
682 result[3] = arg0[3] * arg1[3];
683 }
684
685
686 static void do_POW( struct arb_vp_machine *m, union instruction op )
687 {
688 GLfloat *result = m->reg[op.scl.dst];
689 const GLfloat *arg0 = m->reg[op.scl.arg0];
690 const GLfloat *arg1 = m->reg[op.scl.arg1];
691
692 result[0] = (GLfloat)RoughApproxPower(arg0[0], arg1[0]);
693 PUFF(result);
694 }
695
696 static void do_RCP( struct arb_vp_machine *m, union instruction op )
697 {
698 GLfloat *result = m->reg[op.scl.dst];
699 const GLfloat *arg0 = m->reg[op.scl.arg0];
700
701 result[0] = 1.0F / arg0[0];
702 PUFF(result);
703 }
704
705 static void do_RSQ( struct arb_vp_machine *m, union instruction op )
706 {
707 GLfloat *result = m->reg[op.scl.dst];
708 const GLfloat *arg0 = m->reg[op.scl.arg0];
709
710 result[0] = INV_SQRTF(FABSF(arg0[0]));
711 PUFF(result);
712 }
713
714
715 static void do_SGE( struct arb_vp_machine *m, union instruction op )
716 {
717 GLfloat *result = m->reg[op.vec.dst];
718 const GLfloat *arg0 = m->reg[op.vec.arg0];
719 const GLfloat *arg1 = m->reg[op.vec.arg1];
720
721 result[0] = (arg0[0] >= arg1[0]) ? 1.0F : 0.0F;
722 result[1] = (arg0[1] >= arg1[1]) ? 1.0F : 0.0F;
723 result[2] = (arg0[2] >= arg1[2]) ? 1.0F : 0.0F;
724 result[3] = (arg0[3] >= arg1[3]) ? 1.0F : 0.0F;
725 }
726
727
728 static void do_SLT( struct arb_vp_machine *m, union instruction op )
729 {
730 GLfloat *result = m->reg[op.vec.dst];
731 const GLfloat *arg0 = m->reg[op.vec.arg0];
732 const GLfloat *arg1 = m->reg[op.vec.arg1];
733
734 result[0] = (arg0[0] < arg1[0]) ? 1.0F : 0.0F;
735 result[1] = (arg0[1] < arg1[1]) ? 1.0F : 0.0F;
736 result[2] = (arg0[2] < arg1[2]) ? 1.0F : 0.0F;
737 result[3] = (arg0[3] < arg1[3]) ? 1.0F : 0.0F;
738 }
739
740 static void do_SWZ( struct arb_vp_machine *m, union instruction op )
741 {
742 GLfloat *result = m->reg[op.swz.dst];
743 const GLfloat *arg0 = m->reg[op.swz.arg0];
744 GLuint swz = op.swz.swz;
745 GLuint neg = op.swz.neg;
746 GLuint i;
747
748 for (i = 0; i < 4; i++, swz >>= 3, neg >>= 1) {
749 switch (swz & 0x7) {
750 case SWIZZLE_ZERO: result[i] = 0.0; break;
751 case SWIZZLE_ONE: result[i] = 1.0; break;
752 default: result[i] = arg0[swz & 0x7]; break;
753 }
754 if (neg & 0x1) result[i] = -result[i];
755 }
756 }
757
758 static void do_SUB( struct arb_vp_machine *m, union instruction op )
759 {
760 GLfloat *result = m->reg[op.vec.dst];
761 const GLfloat *arg0 = m->reg[op.vec.arg0];
762 const GLfloat *arg1 = m->reg[op.vec.arg1];
763
764 result[0] = arg0[0] - arg1[0];
765 result[1] = arg0[1] - arg1[1];
766 result[2] = arg0[2] - arg1[2];
767 result[3] = arg0[3] - arg1[3];
768 }
769
770
771 static void do_XPD( struct arb_vp_machine *m, union instruction op )
772 {
773 GLfloat *result = m->reg[op.vec.dst];
774 const GLfloat *arg0 = m->reg[op.vec.arg0];
775 const GLfloat *arg1 = m->reg[op.vec.arg1];
776
777 result[0] = arg0[1] * arg1[2] - arg0[2] * arg1[1];
778 result[1] = arg0[2] * arg1[0] - arg0[0] * arg1[2];
779 result[2] = arg0[0] * arg1[1] - arg0[1] * arg1[0];
780 }
781
782 static void do_NOP( struct arb_vp_machine *m, union instruction op )
783 {
784 }
785
786 /* Some useful debugging functions:
787 */
788 static void print_reg( GLuint reg )
789 {
790 if (reg == REG_RES)
791 _mesa_printf("RES");
792 else if (reg >= REG_ARG0 && reg <= REG_ARG2)
793 _mesa_printf("ARG%d", reg - REG_ARG0);
794 else if (reg >= REG_TMP0 && reg < REG_TMP_MAX)
795 _mesa_printf("TMP%d", reg - REG_TMP0);
796 else if (reg >= REG_PAR0 && reg < REG_PAR_MAX)
797 _mesa_printf("PAR%d", reg - REG_PAR0);
798 else
799 _mesa_printf("???");
800 }
801
802 static void print_mask( GLuint mask )
803 {
804 _mesa_printf(".");
805 if (mask&0x1) _mesa_printf("x");
806 if (mask&0x2) _mesa_printf("y");
807 if (mask&0x4) _mesa_printf("z");
808 if (mask&0x8) _mesa_printf("w");
809 }
810
811 static void print_extern( GLuint file, GLuint idx )
812 {
813 static const char *reg_file[] = {
814 "TEMPORARY",
815 "INPUT",
816 "OUTPUT",
817 "LOCAL_PARAM",
818 "ENV_PARAM",
819 "NAMED_PARAM",
820 "STATE_VAR",
821 "WRITE_ONLY",
822 "ADDRESS"
823 };
824
825 _mesa_printf("%s:%d", reg_file[file], idx);
826 }
827
828
829
830 static void print_SWZ( union instruction op, const struct opcode_info *info )
831 {
832 GLuint swz = op.swz.swz;
833 GLuint neg = op.swz.neg;
834 GLuint i;
835
836 _mesa_printf("%s ", info->string);
837 print_reg(op.swz.dst);
838 _mesa_printf(", ");
839 print_reg(op.swz.arg0);
840 _mesa_printf(".");
841 for (i = 0; i < 4; i++, swz >>= 3, neg >>= 1) {
842 const char *cswz = "xyzw01??";
843 if (neg & 0x1)
844 _mesa_printf("-");
845 _mesa_printf("%c", cswz[swz&0x7]);
846 }
847 _mesa_printf("\n");
848 }
849
850 static void print_RSW( union instruction op, const struct opcode_info *info )
851 {
852 GLuint swz = op.rsw.swz;
853 GLuint neg = op.rsw.neg;
854 GLuint i;
855
856 _mesa_printf("%s ", info->string);
857 print_reg(op.rsw.dst);
858 _mesa_printf(", ");
859 print_reg(op.rsw.arg0);
860 _mesa_printf(".");
861 for (i = 0; i < 4; i++, swz >>= 2) {
862 const char *cswz = "xyzw";
863 if (neg)
864 _mesa_printf("-");
865 _mesa_printf("%c", cswz[swz&0x3]);
866 }
867 _mesa_printf("\n");
868 }
869
870
871 static void print_SCL( union instruction op, const struct opcode_info *info )
872 {
873 _mesa_printf("%s ", info->string);
874 print_reg(op.scl.dst);
875 _mesa_printf(", ");
876 print_reg(op.scl.arg0);
877 if (info->nr_args > 1) {
878 _mesa_printf(", ");
879 print_reg(op.scl.arg1);
880 }
881 _mesa_printf("\n");
882 }
883
884
885 static void print_VEC( union instruction op, const struct opcode_info *info )
886 {
887 _mesa_printf("%s ", info->string);
888 print_reg(op.vec.dst);
889 _mesa_printf(", ");
890 print_reg(op.vec.arg0);
891 if (info->nr_args > 1) {
892 _mesa_printf(", ");
893 print_reg(op.vec.arg1);
894 }
895 if (info->nr_args > 2) {
896 _mesa_printf(", ");
897 print_reg(op.vec.arg2);
898 }
899 _mesa_printf("\n");
900 }
901
902 static void print_MSK( union instruction op, const struct opcode_info *info )
903 {
904 _mesa_printf("%s ", info->string);
905 print_reg(op.msk.dst);
906 print_mask(op.msk.mask);
907 _mesa_printf(", ");
908 print_reg(op.msk.arg0);
909 _mesa_printf("\n");
910 }
911
912 static void print_IN( union instruction op, const struct opcode_info *info )
913 {
914 _mesa_printf("%s ", info->string);
915 print_reg(op.inr.reg);
916 _mesa_printf(", ");
917 print_extern(op.inr.file, op.inr.idx);
918 _mesa_printf("\n");
919 }
920
921 static void print_OUT( union instruction op, const struct opcode_info *info )
922 {
923 _mesa_printf("%s ", info->string);
924 print_extern(op.out.file, op.out.idx);
925 if (op.out.opcode == OUM)
926 print_mask(op.out.mask);
927 _mesa_printf(", ");
928 print_reg(op.out.reg);
929 _mesa_printf("\n");
930 }
931
932 static void print_NOP( union instruction op, const struct opcode_info *info )
933 {
934 }
935
936 #define NOP 0
937 #define VEC 1
938 #define SCL 2
939 #define SWZ 3
940
941 static const struct opcode_info opcode_info[] =
942 {
943 { VEC, 1, "ABS", do_ABS, print_VEC },
944 { VEC, 2, "ADD", do_ADD, print_VEC },
945 { OUT, 1, "ARL", do_ARL, print_OUT },
946 { SCL, 2, "DP3", do_DP3, print_SCL },
947 { SCL, 2, "DP4", do_DP4, print_SCL },
948 { SCL, 2, "DPH", do_DPH, print_SCL },
949 { VEC, 2, "DST", do_DST, print_VEC },
950 { NOP, 0, "END", do_NOP, print_NOP },
951 { SCL, 1, "EX2", do_EX2, print_VEC },
952 { VEC, 1, "EXP", do_EXP, print_VEC },
953 { VEC, 1, "FLR", do_FLR, print_VEC },
954 { VEC, 1, "FRC", do_FRC, print_VEC },
955 { SCL, 1, "LG2", do_LG2, print_VEC },
956 { VEC, 1, "LIT", do_LIT, print_VEC },
957 { VEC, 1, "LOG", do_LOG, print_VEC },
958 { VEC, 3, "MAD", do_MAD, print_VEC },
959 { VEC, 2, "MAX", do_MAX, print_VEC },
960 { VEC, 2, "MIN", do_MIN, print_VEC },
961 { VEC, 1, "MOV", do_MOV, print_VEC },
962 { VEC, 2, "MUL", do_MUL, print_VEC },
963 { SCL, 2, "POW", do_POW, print_VEC },
964 { VEC, 1, "PRT", do_PRT, print_VEC }, /* PRINT */
965 { NOP, 1, "RCC", do_NOP, print_NOP },
966 { SCL, 1, "RCP", do_RCP, print_VEC },
967 { SCL, 1, "RSQ", do_RSQ, print_VEC },
968 { VEC, 2, "SGE", do_SGE, print_VEC },
969 { VEC, 2, "SLT", do_SLT, print_VEC },
970 { VEC, 2, "SUB", do_SUB, print_VEC },
971 { SWZ, 1, "SWZ", do_SWZ, print_SWZ },
972 { VEC, 2, "XPD", do_XPD, print_VEC },
973 { IN4, 1, "IN1", do_IN1, print_IN }, /* Internals */
974 { IN4, 1, "IN2", do_IN2, print_IN },
975 { IN4, 1, "IN3", do_IN3, print_IN },
976 { IN4, 1, "IN4", do_IN4, print_IN },
977 { OUT, 1, "OUT", do_OUT, print_OUT },
978 { OUT, 1, "OUM", do_OUM, print_OUT },
979 { SWZ, 1, "RSW", do_RSW, print_RSW },
980 { MSK, 1, "MSK", do_MSK, print_MSK },
981 { IN4, 1, "PAR", do_PAR, print_IN },
982 { IN4, 1, "PRL", do_PRL, print_IN },
983 };
984
985
986 static GLuint cvp_load_reg( struct compilation *cp,
987 GLuint file,
988 GLuint index,
989 GLuint rel )
990 {
991 GLuint i, op;
992
993 if (file == PROGRAM_TEMPORARY)
994 return index + REG_TMP0;
995
996 /* Don't try to cache relatively addressed values yet:
997 */
998 if (!rel) {
999 for (i = 0; i < REG_PAR_NR; i++) {
1000 if ((cp->par_active & (1<<i)) &&
1001 cp->reg[i].file == file &&
1002 cp->reg[i].idx == index) {
1003 cp->par_protected |= (1<<i);
1004 return i + REG_PAR0;
1005 }
1006 }
1007 }
1008
1009 /* Not already loaded, so identify a slot and load it.
1010 * TODO: preload these values once only!
1011 * TODO: better eviction strategy!
1012 */
1013 if (cp->par_active == ~0) {
1014 assert(cp->par_protected != ~0);
1015 cp->par_active = cp->par_protected;
1016 }
1017
1018 i = ffs(~cp->par_active);
1019 assert(i);
1020 i--;
1021
1022
1023 if (file == PROGRAM_INPUT)
1024 op = IN1 + cp->VB->AttribPtr[index]->size - 1;
1025 else if (rel)
1026 op = PRL;
1027 else
1028 op = PAR;
1029
1030 cp->csr->dword = 0;
1031 cp->csr->inr.opcode = op;
1032 cp->csr->inr.reg = i + REG_PAR0;
1033 cp->csr->inr.file = file;
1034 cp->csr->inr.idx = index;
1035 cp->csr++;
1036
1037 cp->reg[i].file = file;
1038 cp->reg[i].idx = index;
1039 cp->par_protected |= (1<<i);
1040 cp->par_active |= (1<<i);
1041 return i + REG_PAR0;
1042 }
1043
1044 static void cvp_release_regs( struct compilation *cp )
1045 {
1046 cp->par_protected = 0;
1047 }
1048
1049
1050
1051 static GLuint cvp_emit_arg( struct compilation *cp,
1052 const struct vp_src_register *src,
1053 GLuint arg )
1054 {
1055 GLuint reg = cvp_load_reg( cp, src->File, src->Index, src->RelAddr );
1056 union instruction rsw, noop;
1057
1058 /* Emit any necessary swizzling.
1059 */
1060 rsw.dword = 0;
1061 rsw.rsw.neg = src->Negate ? 1 : 0;
1062 rsw.rsw.swz = ((GET_SWZ(src->Swizzle, 0) << 0) |
1063 (GET_SWZ(src->Swizzle, 1) << 2) |
1064 (GET_SWZ(src->Swizzle, 2) << 4) |
1065 (GET_SWZ(src->Swizzle, 3) << 6));
1066
1067 noop.dword = 0;
1068 noop.rsw.neg = 0;
1069 noop.rsw.swz = ((0<<0) |
1070 (1<<2) |
1071 (2<<4) |
1072 (3<<6));
1073
1074 if (rsw.dword != noop.dword) {
1075 GLuint rsw_reg = arg;
1076 cp->csr->dword = rsw.dword;
1077 cp->csr->rsw.opcode = RSW;
1078 cp->csr->rsw.arg0 = reg;
1079 cp->csr->rsw.dst = rsw_reg;
1080 cp->csr++;
1081 return rsw_reg;
1082 }
1083 else
1084 return reg;
1085 }
1086
1087 static GLuint cvp_choose_result( struct compilation *cp,
1088 const struct vp_dst_register *dst,
1089 union instruction *fixup,
1090 GLuint maxreg)
1091 {
1092 GLuint mask = dst->WriteMask;
1093
1094 if (dst->File == PROGRAM_TEMPORARY) {
1095
1096 /* Optimization: When writing (with a writemask) to an undefined
1097 * value for the first time, the writemask may be ignored. In
1098 * practise this means that the MSK instruction to implement the
1099 * writemask can be dropped.
1100 */
1101 if (dst->Index < maxreg &&
1102 (mask == 0xf || !(cp->tmp_active & (1<<dst->Index)))) {
1103 fixup->dword = 0;
1104 cp->tmp_active |= (1<<dst->Index);
1105 return REG_TMP0 + dst->Index;
1106 }
1107 else if (mask != 0xf) {
1108 fixup->msk.opcode = MSK;
1109 fixup->msk.arg0 = REG_RES;
1110 fixup->msk.dst = REG_TMP0 + dst->Index;
1111 fixup->msk.mask = mask;
1112 cp->tmp_active |= (1<<dst->Index);
1113 return REG_RES;
1114 }
1115 else {
1116 fixup->vec.opcode = VP_OPCODE_MOV;
1117 fixup->vec.arg0 = REG_RES;
1118 fixup->vec.dst = REG_TMP0 + dst->Index;
1119 cp->tmp_active |= (1<<dst->Index);
1120 return REG_RES;
1121 }
1122 }
1123 else {
1124 assert(dst->File == PROGRAM_OUTPUT);
1125 fixup->out.opcode = (mask == 0xf) ? OUT : OUM;
1126 fixup->out.reg = REG_RES;
1127 fixup->out.file = dst->File;
1128 fixup->out.idx = dst->Index;
1129 fixup->out.mask = mask;
1130 return REG_RES;
1131 }
1132 }
1133
1134
1135 static void cvp_emit_inst( struct compilation *cp,
1136 const struct vp_instruction *inst )
1137 {
1138 const struct opcode_info *info = &opcode_info[inst->Opcode];
1139 union instruction fixup;
1140 GLuint reg[3];
1141 GLuint result, i;
1142
1143 /* Need to handle SWZ, ARL specially.
1144 */
1145 switch (info->type) {
1146 case OUT:
1147 assert(inst->Opcode == VP_OPCODE_ARL);
1148 reg[0] = cvp_emit_arg( cp, &inst->SrcReg[0], REG_ARG0 );
1149
1150 cp->csr->dword = 0;
1151 cp->csr->out.opcode = inst->Opcode;
1152 cp->csr->out.reg = reg[0];
1153 cp->csr->out.file = PROGRAM_ADDRESS;
1154 cp->csr->out.idx = 0;
1155 break;
1156 case SWZ:
1157 assert(inst->Opcode == VP_OPCODE_SWZ);
1158 result = cvp_choose_result( cp, &inst->DstReg, &fixup, REG_SWZDST_MAX );
1159
1160 reg[0] = cvp_emit_arg( cp, &inst->SrcReg[0], REG_ARG0 );
1161
1162 cp->csr->dword = 0;
1163 cp->csr->swz.opcode = VP_OPCODE_SWZ;
1164 cp->csr->swz.arg0 = reg[0];
1165 cp->csr->swz.dst = result;
1166 cp->csr->swz.neg = inst->SrcReg[0].Negate;
1167 cp->csr->swz.swz = inst->SrcReg[0].Swizzle;
1168 cp->csr++;
1169
1170 if (result == REG_RES) {
1171 cp->csr->dword = fixup.dword;
1172 cp->csr++;
1173 }
1174 break;
1175
1176 case VEC:
1177 case SCL: /* for now */
1178 result = cvp_choose_result( cp, &inst->DstReg, &fixup, REG_MAX );
1179
1180 reg[0] = reg[1] = reg[2] = 0;
1181
1182 for (i = 0; i < info->nr_args; i++)
1183 reg[i] = cvp_emit_arg( cp, &inst->SrcReg[i], REG_ARG0 + i );
1184
1185 cp->csr->dword = 0;
1186 cp->csr->vec.opcode = inst->Opcode;
1187 cp->csr->vec.arg0 = reg[0];
1188 cp->csr->vec.arg1 = reg[1];
1189 cp->csr->vec.arg2 = reg[2];
1190 cp->csr->vec.dst = result;
1191 cp->csr++;
1192
1193 if (result == REG_RES) {
1194 cp->csr->dword = fixup.dword;
1195 cp->csr++;
1196 }
1197 break;
1198
1199
1200 case NOP:
1201 break;
1202
1203 default:
1204 assert(0);
1205 break;
1206 }
1207
1208 cvp_release_regs( cp );
1209 }
1210
1211
1212 static void compile_vertex_program( struct arb_vp_machine *m,
1213 const struct vertex_program *program )
1214 {
1215 struct compilation cp;
1216 GLuint i;
1217
1218 /* Initialize cp:
1219 */
1220 memset(&cp, 0, sizeof(cp));
1221 cp.VB = m->VB;
1222 cp.csr = m->store;
1223
1224 /* Compile instructions:
1225 */
1226 for (i = 0; i < program->Base.NumInstructions; i++) {
1227 cvp_emit_inst(&cp, &program->Instructions[i]);
1228 }
1229
1230 /* Finish up:
1231 */
1232 m->instructions = m->store;
1233 m->nr_instructions = cp.csr - m->store;
1234
1235
1236 /* Print/disassemble:
1237 */
1238 if (0) {
1239 for (i = 0; i < m->nr_instructions; i++) {
1240 union instruction insn = m->instructions[i];
1241 const struct opcode_info *info = &opcode_info[insn.vec.opcode];
1242 info->print( insn, info );
1243 }
1244 _mesa_printf("\n\n");
1245 }
1246 }
1247
1248
1249
1250
1251 /* ----------------------------------------------------------------------
1252 * Execution
1253 */
1254 static void userclip( GLcontext *ctx,
1255 GLvector4f *clip,
1256 GLubyte *clipmask,
1257 GLubyte *clipormask,
1258 GLubyte *clipandmask )
1259 {
1260 GLuint p;
1261
1262 for (p = 0; p < ctx->Const.MaxClipPlanes; p++)
1263 if (ctx->Transform.ClipPlanesEnabled & (1 << p)) {
1264 GLuint nr, i;
1265 const GLfloat a = ctx->Transform._ClipUserPlane[p][0];
1266 const GLfloat b = ctx->Transform._ClipUserPlane[p][1];
1267 const GLfloat c = ctx->Transform._ClipUserPlane[p][2];
1268 const GLfloat d = ctx->Transform._ClipUserPlane[p][3];
1269 GLfloat *coord = (GLfloat *)clip->data;
1270 GLuint stride = clip->stride;
1271 GLuint count = clip->count;
1272
1273 for (nr = 0, i = 0 ; i < count ; i++) {
1274 GLfloat dp = (coord[0] * a +
1275 coord[1] * b +
1276 coord[2] * c +
1277 coord[3] * d);
1278
1279 if (dp < 0) {
1280 nr++;
1281 clipmask[i] |= CLIP_USER_BIT;
1282 }
1283
1284 STRIDE_F(coord, stride);
1285 }
1286
1287 if (nr > 0) {
1288 *clipormask |= CLIP_USER_BIT;
1289 if (nr == count) {
1290 *clipandmask |= CLIP_USER_BIT;
1291 return;
1292 }
1293 }
1294 }
1295 }
1296
1297
1298 static GLboolean do_ndc_cliptest( struct arb_vp_machine *m )
1299 {
1300 GLcontext *ctx = m->ctx;
1301 TNLcontext *tnl = TNL_CONTEXT(ctx);
1302 struct vertex_buffer *VB = m->VB;
1303
1304 /* Cliptest and perspective divide. Clip functions must clear
1305 * the clipmask.
1306 */
1307 m->ormask = 0;
1308 m->andmask = CLIP_ALL_BITS;
1309
1310 if (tnl->NeedNdcCoords) {
1311 VB->NdcPtr =
1312 _mesa_clip_tab[VB->ClipPtr->size]( VB->ClipPtr,
1313 &m->ndcCoords,
1314 m->clipmask,
1315 &m->ormask,
1316 &m->andmask );
1317 }
1318 else {
1319 VB->NdcPtr = NULL;
1320 _mesa_clip_np_tab[VB->ClipPtr->size]( VB->ClipPtr,
1321 NULL,
1322 m->clipmask,
1323 &m->ormask,
1324 &m->andmask );
1325 }
1326
1327 if (m->andmask) {
1328 /* All vertices are outside the frustum */
1329 return GL_FALSE;
1330 }
1331
1332 /* Test userclip planes. This contributes to VB->ClipMask.
1333 */
1334 if (ctx->Transform.ClipPlanesEnabled && !ctx->VertexProgram._Enabled) {
1335 userclip( ctx,
1336 VB->ClipPtr,
1337 m->clipmask,
1338 &m->ormask,
1339 &m->andmask );
1340
1341 if (m->andmask) {
1342 return GL_FALSE;
1343 }
1344 }
1345
1346 VB->ClipAndMask = m->andmask;
1347 VB->ClipOrMask = m->ormask;
1348 VB->ClipMask = m->clipmask;
1349
1350 return GL_TRUE;
1351 }
1352
1353
1354
1355
1356 /**
1357 * Execute the given vertex program.
1358 *
1359 * TODO: Integrate the t_vertex.c code here, to build machine vertices
1360 * directly at this point.
1361 *
1362 * TODO: Eliminate the VB struct entirely and just use
1363 * struct arb_vertex_machine.
1364 */
1365 static GLboolean
1366 run_arb_vertex_program(GLcontext *ctx, struct tnl_pipeline_stage *stage)
1367 {
1368 struct vertex_program *program = (ctx->VertexProgram._Enabled ?
1369 ctx->VertexProgram.Current :
1370 &ctx->_TnlProgram);
1371 struct vertex_buffer *VB = &TNL_CONTEXT(ctx)->vb;
1372 struct arb_vp_machine *m = ARB_VP_MACHINE(stage);
1373 GLuint i, j, outputs = program->OutputsWritten;
1374
1375 if (program->Parameters) {
1376 _mesa_load_state_parameters(ctx, program->Parameters);
1377 m->File[PROGRAM_STATE_VAR] = program->Parameters->ParameterValues;
1378 }
1379
1380 /* Run the actual program:
1381 */
1382 for (m->vtx_nr = 0; m->vtx_nr < VB->Count; m->vtx_nr++) {
1383 for (j = 0; j < m->nr_instructions; j++) {
1384 union instruction inst = m->instructions[j];
1385 opcode_info[inst.vec.opcode].func( m, inst );
1386 }
1387 }
1388
1389 /* Setup the VB pointers so that the next pipeline stages get
1390 * their data from the right place (the program output arrays).
1391 *
1392 * TODO: 1) Have tnl use these RESULT values for outputs rather
1393 * than trying to shoe-horn inputs and outputs into one set of
1394 * values.
1395 *
1396 * TODO: 2) Integrate t_vertex.c so that we just go straight ahead
1397 * and build machine vertices here.
1398 */
1399 VB->ClipPtr = &m->attribs[VERT_RESULT_HPOS];
1400 VB->ClipPtr->count = VB->Count;
1401
1402 if (outputs & (1<<VERT_RESULT_COL0)) {
1403 VB->ColorPtr[0] = &m->attribs[VERT_RESULT_COL0];
1404 VB->AttribPtr[VERT_ATTRIB_COLOR0] = VB->ColorPtr[0];
1405 }
1406
1407 if (outputs & (1<<VERT_RESULT_BFC0)) {
1408 VB->ColorPtr[1] = &m->attribs[VERT_RESULT_BFC0];
1409 }
1410
1411 if (outputs & (1<<VERT_RESULT_COL1)) {
1412 VB->SecondaryColorPtr[0] = &m->attribs[VERT_RESULT_COL1];
1413 VB->AttribPtr[VERT_ATTRIB_COLOR1] = VB->SecondaryColorPtr[0];
1414 }
1415
1416 if (outputs & (1<<VERT_RESULT_BFC1)) {
1417 VB->SecondaryColorPtr[1] = &m->attribs[VERT_RESULT_BFC1];
1418 }
1419
1420 if (outputs & (1<<VERT_RESULT_FOGC)) {
1421 VB->FogCoordPtr = &m->attribs[VERT_RESULT_FOGC];
1422 VB->AttribPtr[VERT_ATTRIB_FOG] = VB->FogCoordPtr;
1423 }
1424
1425 if (outputs & (1<<VERT_RESULT_PSIZ)) {
1426 VB->PointSizePtr = &m->attribs[VERT_RESULT_PSIZ];
1427 VB->AttribPtr[_TNL_ATTRIB_POINTSIZE] = &m->attribs[VERT_RESULT_PSIZ];
1428 }
1429
1430 for (i = 0; i < ctx->Const.MaxTextureUnits; i++) {
1431 if (outputs & (1<<(VERT_RESULT_TEX0+i))) {
1432 VB->TexCoordPtr[i] = &m->attribs[VERT_RESULT_TEX0 + i];
1433 VB->AttribPtr[VERT_ATTRIB_TEX0+i] = VB->TexCoordPtr[i];
1434 }
1435 }
1436
1437 #if 0
1438 for (i = 0; i < VB->Count; i++) {
1439 printf("Out %d: %f %f %f %f %f %f %f %f\n", i,
1440 VEC_ELT(VB->ClipPtr, GLfloat, i)[0],
1441 VEC_ELT(VB->ClipPtr, GLfloat, i)[1],
1442 VEC_ELT(VB->ClipPtr, GLfloat, i)[2],
1443 VEC_ELT(VB->ClipPtr, GLfloat, i)[3],
1444 VEC_ELT(VB->ColorPtr[0], GLfloat, i)[0],
1445 VEC_ELT(VB->ColorPtr[0], GLfloat, i)[1],
1446 VEC_ELT(VB->ColorPtr[0], GLfloat, i)[2],
1447 VEC_ELT(VB->ColorPtr[0], GLfloat, i)[3]);
1448 }
1449 #endif
1450
1451 /* Perform NDC and cliptest operations:
1452 */
1453 return do_ndc_cliptest(m);
1454 }
1455
1456
1457 static void
1458 validate_vertex_program( GLcontext *ctx, struct tnl_pipeline_stage *stage )
1459 {
1460 struct arb_vp_machine *m = ARB_VP_MACHINE(stage);
1461 struct vertex_program *program = (ctx->VertexProgram._Enabled ?
1462 ctx->VertexProgram.Current :
1463 &ctx->_TnlProgram);
1464
1465 compile_vertex_program( m, program );
1466
1467 /* Grab the state GL state and put into registers:
1468 */
1469 m->File[PROGRAM_LOCAL_PARAM] = program->Base.LocalParams;
1470 m->File[PROGRAM_ENV_PARAM] = ctx->VertexProgram.Parameters;
1471 m->File[PROGRAM_STATE_VAR] = 0;
1472 }
1473
1474
1475
1476
1477
1478
1479
1480 /**
1481 * Called the first time stage->run is called. In effect, don't
1482 * allocate data until the first time the stage is run.
1483 */
1484 static void init_vertex_program( GLcontext *ctx,
1485 struct tnl_pipeline_stage *stage )
1486 {
1487 TNLcontext *tnl = TNL_CONTEXT(ctx);
1488 struct vertex_buffer *VB = &(tnl->vb);
1489 struct arb_vp_machine *m;
1490 const GLuint size = VB->Size;
1491 GLuint i;
1492
1493 stage->privatePtr = MALLOC(sizeof(*m));
1494 m = ARB_VP_MACHINE(stage);
1495 if (!m)
1496 return;
1497
1498 /* arb_vertex_machine struct should subsume the VB:
1499 */
1500 m->VB = VB;
1501 m->ctx = ctx;
1502
1503 /* Allocate arrays of vertex output values */
1504 for (i = 0; i < VERT_RESULT_MAX; i++) {
1505 _mesa_vector4f_alloc( &m->attribs[i], 0, size, 32 );
1506 m->attribs[i].size = 4;
1507 }
1508
1509 /* a few other misc allocations */
1510 _mesa_vector4f_alloc( &m->ndcCoords, 0, size, 32 );
1511 m->clipmask = (GLubyte *) ALIGN_MALLOC(sizeof(GLubyte)*size, 32 );
1512 }
1513
1514
1515
1516
1517 /**
1518 * Destructor for this pipeline stage.
1519 */
1520 static void dtr( struct tnl_pipeline_stage *stage )
1521 {
1522 struct arb_vp_machine *m = ARB_VP_MACHINE(stage);
1523
1524 if (m) {
1525 GLuint i;
1526
1527 /* free the vertex program result arrays */
1528 for (i = 0; i < VERT_RESULT_MAX; i++)
1529 _mesa_vector4f_free( &m->attribs[i] );
1530
1531 /* free misc arrays */
1532 _mesa_vector4f_free( &m->ndcCoords );
1533 ALIGN_FREE( m->clipmask );
1534
1535 FREE( m );
1536 stage->privatePtr = NULL;
1537 }
1538 }
1539
1540 /**
1541 * Public description of this pipeline stage.
1542 */
1543 const struct tnl_pipeline_stage _tnl_arb_vertex_program_stage =
1544 {
1545 "vertex-program",
1546 NULL, /* private_data */
1547 init_vertex_program, /* create */
1548 dtr, /* destroy */
1549 validate_vertex_program, /* validate */
1550 run_arb_vertex_program /* run */
1551 };