2 * Mesa 3-D graphics library
5 * Copyright (C) 1999-2004 Brian Paul All Rights Reserved.
7 * Permission is hereby granted, free of charge, to any person obtaining a
8 * copy of this software and associated documentation files (the "Software"),
9 * to deal in the Software without restriction, including without limitation
10 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
11 * and/or sell copies of the Software, and to permit persons to whom the
12 * Software is furnished to do so, subject to the following conditions:
14 * The above copyright notice and this permission notice shall be included
15 * in all copies or substantial portions of the Software.
17 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
18 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
19 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
20 * BRIAN PAUL BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN
21 * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
22 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
26 * \file t_arb_program.c
27 * Compile vertex programs to an intermediate representation.
28 * Execute vertex programs over a buffer of vertices.
29 * \author Keith Whitwell, Brian Paul
37 #include "arbprogparse.h"
39 #include "math/m_matrix.h"
40 #include "math/m_translate.h"
41 #include "t_context.h"
42 #include "t_pipeline.h"
43 #include "t_vp_build.h"
45 /* Define to see the compiled program on stderr:
50 /* New, internal instructions:
52 #define IN1 (VP_OPCODE_XPD+1)
53 #define IN2 (IN1+1) /* intput-to-reg MOV */
56 #define OUT (IN1+4) /* reg-to-output MOV */
57 #define OUM (IN1+5) /* reg-to-output MOV with mask */
59 #define MSK (IN1+7) /* reg-to-reg MOV with mask */
60 #define PAR (IN1+8) /* parameter-to-reg MOV */
61 #define PRL (IN1+9) /* parameter-to-reg MOV */
64 /* Layout of register file:
70 4 -- Program Temporary 0
72 31 -- Program Temporary 27
73 32 -- State/Input/Const shadow 0
75 63 -- State/Input/Const shadow 31
86 #define REG_TMP_MAX 32
87 #define REG_TMP_NR (REG_TMP_MAX-REG_TMP0)
89 #define REG_PAR_MAX 64
90 #define REG_PAR_NR (REG_PAR_MAX-REG_PAR0)
93 #define REG_SWZDST_MAX 16
95 /* ARB_vp instructions are broken down into one or more of the
96 * following micro-instructions, each representable in a 32 bit packed
107 GLuint elt
:2; /* x,y,z or w */
123 GLuint dst
:4; /* NOTE! REG 0..16 only! */
133 GLuint neg
:1; /* 1 bit only */
134 GLuint swz
:8; /* xyzw only */
142 GLuint idx
:8; /* plenty? */
152 GLuint idx
:8; /* plenty? */
177 GLuint par_protected
;
180 union instruction
*csr
;
182 struct vertex_buffer
*VB
; /* for input sizes! */
185 /*--------------------------------------------------------------------------- */
188 * Private storage for the vertex program pipeline stage.
190 struct arb_vp_machine
{
191 GLfloat reg
[REG_MAX
][4]; /* Program temporaries, shadowed parameters and inputs,
192 plus some internal values */
194 GLfloat (*File
[8])[4]; /* Src/Dest for PAR/PRL instructions. */
197 union instruction store
[1024];
198 union instruction
*instructions
;
199 GLint nr_instructions
;
201 GLvector4f attribs
[VERT_RESULT_MAX
]; /**< result vectors. */
202 GLvector4f ndcCoords
; /**< normalized device coords */
203 GLubyte
*clipmask
; /**< clip flags */
204 GLubyte ormask
, andmask
; /**< for clipping */
206 GLuint vtx_nr
; /**< loop counter */
208 struct vertex_buffer
*VB
;
213 /*--------------------------------------------------------------------------- */
219 void (*func
)( struct arb_vp_machine
*, union instruction
);
220 void (*print
)( union instruction
, const struct opcode_info
* );
224 #define ARB_VP_MACHINE(stage) ((struct arb_vp_machine *)(stage->privatePtr))
229 * Set x to positive or negative infinity.
231 * XXX: FIXME - type punning.
233 #if defined(USE_IEEE) || defined(_WIN32)
234 #define SET_POS_INFINITY(x) ( *((GLuint *) (void *)&x) = 0x7F800000 )
235 #define SET_NEG_INFINITY(x) ( *((GLuint *) (void *)&x) = 0xFF800000 )
237 #define SET_POS_INFINITY(x) x = __MAXFLOAT
238 #define SET_NEG_INFINITY(x) x = -__MAXFLOAT
239 #define IS_INF_OR_NAN(t) ((t) == __MAXFLOAT)
241 #define SET_POS_INFINITY(x) x = (GLfloat) HUGE_VAL
242 #define SET_NEG_INFINITY(x) x = (GLfloat) -HUGE_VAL
245 #define FREXPF(a,b) frexpf(a,b)
247 #define PUFF(x) ((x)[1] = (x)[2] = (x)[3] = (x)[0])
249 /* FIXME: more type punning (despite use of fi_type...)
251 #define SET_FLOAT_BITS(x, bits) ((fi_type *) (void *) &(x))->i = bits
254 static GLfloat
RoughApproxLog2(GLfloat t
)
259 static GLfloat
RoughApproxPow2(GLfloat t
)
263 GLint ii
= (GLint
) t
;
264 ii
= (ii
< 23) + 0x3f800000;
265 SET_FLOAT_BITS(q
, ii
);
266 q
= *((GLfloat
*) (void *)&ii
);
268 q
= (GLfloat
) pow(2.0, floor_t0
);
273 static GLfloat
RoughApproxPower(GLfloat x
, GLfloat y
)
276 return (GLfloat
) exp(y
* log(x
));
278 return (GLfloat
) _mesa_pow(x
, y
);
283 static const GLfloat ZeroVec
[4] = { 0.0F
, 0.0F
, 0.0F
, 0.0F
};
289 * This is probably the least-optimal part of the process, have to
290 * multiply out the stride to access each incoming input value.
292 static GLfloat
*get_input( struct arb_vp_machine
*m
, GLuint index
)
294 return VEC_ELT(m
->VB
->AttribPtr
[index
], GLfloat
, m
->vtx_nr
);
299 * Fetch a 4-element float vector from the given source register.
300 * Deal with the possibility that not all elements are present.
302 static void do_IN1( struct arb_vp_machine
*m
, union instruction op
)
304 GLfloat
*result
= m
->reg
[op
.inr
.reg
];
305 const GLfloat
*src
= get_input(m
, op
.inr
.idx
);
313 static void do_IN2( struct arb_vp_machine
*m
, union instruction op
)
315 GLfloat
*result
= m
->reg
[op
.inr
.reg
];
316 const GLfloat
*src
= get_input(m
, op
.inr
.idx
);
324 static void do_IN3( struct arb_vp_machine
*m
, union instruction op
)
326 GLfloat
*result
= m
->reg
[op
.inr
.reg
];
327 const GLfloat
*src
= get_input(m
, op
.inr
.idx
);
335 static void do_IN4( struct arb_vp_machine
*m
, union instruction op
)
337 GLfloat
*result
= m
->reg
[op
.inr
.reg
];
338 const GLfloat
*src
= get_input(m
, op
.inr
.idx
);
347 * Perform a reduced swizzle:
349 static void do_RSW( struct arb_vp_machine
*m
, union instruction op
)
351 GLfloat
*result
= m
->reg
[op
.rsw
.dst
];
352 const GLfloat
*arg0
= m
->reg
[op
.rsw
.arg0
];
353 GLuint swz
= op
.rsw
.swz
;
354 GLuint neg
= op
.rsw
.neg
;
358 for (i
= 0; i
< 4; i
++, swz
>>= 2)
359 result
[i
] = -arg0
[swz
& 0x3];
361 for (i
= 0; i
< 4; i
++, swz
>>= 2)
362 result
[i
] = arg0
[swz
& 0x3];
368 * Store 4 floats into an external address.
370 static void do_OUM( struct arb_vp_machine
*m
, union instruction op
)
372 GLfloat
*dst
= m
->attribs
[op
.out
.idx
].data
[m
->vtx_nr
];
373 const GLfloat
*value
= m
->reg
[op
.out
.reg
];
375 if (op
.out
.mask
& 0x1) dst
[0] = value
[0];
376 if (op
.out
.mask
& 0x2) dst
[1] = value
[1];
377 if (op
.out
.mask
& 0x4) dst
[2] = value
[2];
378 if (op
.out
.mask
& 0x8) dst
[3] = value
[3];
381 static void do_OUT( struct arb_vp_machine
*m
, union instruction op
)
383 GLfloat
*dst
= m
->attribs
[op
.out
.idx
].data
[m
->vtx_nr
];
384 const GLfloat
*value
= m
->reg
[op
.out
.reg
];
392 /* Register-to-register MOV with writemask.
394 static void do_MSK( struct arb_vp_machine
*m
, union instruction op
)
396 GLfloat
*dst
= m
->reg
[op
.msk
.dst
];
397 const GLfloat
*arg0
= m
->reg
[op
.msk
.arg0
];
399 if (op
.msk
.mask
& 0x1) dst
[0] = arg0
[0];
400 if (op
.msk
.mask
& 0x2) dst
[1] = arg0
[1];
401 if (op
.msk
.mask
& 0x4) dst
[2] = arg0
[2];
402 if (op
.msk
.mask
& 0x8) dst
[3] = arg0
[3];
406 /* Retreive parameters and other constant values:
408 static void do_PAR( struct arb_vp_machine
*m
, union instruction op
)
410 GLfloat
*result
= m
->reg
[op
.inr
.reg
];
411 const GLfloat
*src
= m
->File
[op
.inr
.file
][op
.inr
.idx
];
420 #define RELADDR_MASK (MAX_NV_VERTEX_PROGRAM_PARAMS-1)
422 static void do_PRL( struct arb_vp_machine
*m
, union instruction op
)
424 GLfloat
*result
= m
->reg
[op
.inr
.reg
];
425 GLuint index
= (op
.inr
.idx
+ m
->AddressReg
) & RELADDR_MASK
;
426 const GLfloat
*src
= m
->File
[op
.inr
.file
][index
];
434 static void do_PRT( struct arb_vp_machine
*m
, union instruction op
)
436 const GLfloat
*arg0
= m
->reg
[op
.vec
.arg0
];
438 _mesa_printf("%d: %f %f %f %f\n", m
->vtx_nr
,
439 arg0
[0], arg0
[1], arg0
[2], arg0
[3]);
444 * The traditional ALU and texturing instructions. All operate on
445 * internal registers and ignore write masks and swizzling issues.
448 static void do_ABS( struct arb_vp_machine
*m
, union instruction op
)
450 GLfloat
*result
= m
->reg
[op
.vec
.dst
];
451 const GLfloat
*arg0
= m
->reg
[op
.vec
.arg0
];
453 result
[0] = (arg0
[0] < 0.0) ? -arg0
[0] : arg0
[0];
454 result
[1] = (arg0
[1] < 0.0) ? -arg0
[1] : arg0
[1];
455 result
[2] = (arg0
[2] < 0.0) ? -arg0
[2] : arg0
[2];
456 result
[3] = (arg0
[3] < 0.0) ? -arg0
[3] : arg0
[3];
459 static void do_ADD( struct arb_vp_machine
*m
, union instruction op
)
461 GLfloat
*result
= m
->reg
[op
.vec
.dst
];
462 const GLfloat
*arg0
= m
->reg
[op
.vec
.arg0
];
463 const GLfloat
*arg1
= m
->reg
[op
.vec
.arg1
];
465 result
[0] = arg0
[0] + arg1
[0];
466 result
[1] = arg0
[1] + arg1
[1];
467 result
[2] = arg0
[2] + arg1
[2];
468 result
[3] = arg0
[3] + arg1
[3];
472 static void do_ARL( struct arb_vp_machine
*m
, union instruction op
)
474 const GLfloat
*arg0
= m
->reg
[op
.out
.reg
];
475 m
->AddressReg
= (GLint
) floor(arg0
[0]);
479 static void do_DP3( struct arb_vp_machine
*m
, union instruction op
)
481 GLfloat
*result
= m
->reg
[op
.scl
.dst
];
482 const GLfloat
*arg0
= m
->reg
[op
.scl
.arg0
];
483 const GLfloat
*arg1
= m
->reg
[op
.scl
.arg1
];
485 result
[0] = (arg0
[0] * arg1
[0] +
493 static void do_MAT4( struct arb_vp_machine
*m
, union instruction op
)
495 GLfloat
*result
= m
->reg
[op
.scl
.dst
];
496 const GLfloat
*arg0
= m
->reg
[op
.scl
.arg0
];
497 const GLfloat
*mat
[] = m
->reg
[op
.scl
.arg1
];
499 result
[0] = (arg0
[0] * mat0
[0] + arg0
[1] * mat0
[1] + arg0
[2] * mat0
[2] + arg0
[3] * mat0
[3]);
500 result
[1] = (arg0
[0] * mat1
[0] + arg0
[1] * mat1
[1] + arg0
[2] * mat1
[2] + arg0
[3] * mat1
[3]);
501 result
[2] = (arg0
[0] * mat2
[0] + arg0
[1] * mat2
[1] + arg0
[2] * mat2
[2] + arg0
[3] * mat2
[3]);
502 result
[3] = (arg0
[0] * mat3
[0] + arg0
[1] * mat3
[1] + arg0
[2] * mat3
[2] + arg0
[3] * mat3
[3]);
507 static void do_DP4( struct arb_vp_machine
*m
, union instruction op
)
509 GLfloat
*result
= m
->reg
[op
.scl
.dst
];
510 const GLfloat
*arg0
= m
->reg
[op
.scl
.arg0
];
511 const GLfloat
*arg1
= m
->reg
[op
.scl
.arg1
];
513 result
[0] = (arg0
[0] * arg1
[0] +
521 static void do_DPH( struct arb_vp_machine
*m
, union instruction op
)
523 GLfloat
*result
= m
->reg
[op
.scl
.dst
];
524 const GLfloat
*arg0
= m
->reg
[op
.scl
.arg0
];
525 const GLfloat
*arg1
= m
->reg
[op
.scl
.arg1
];
527 result
[0] = (arg0
[0] * arg1
[0] +
535 static void do_DST( struct arb_vp_machine
*m
, union instruction op
)
537 GLfloat
*result
= m
->reg
[op
.vec
.dst
];
538 const GLfloat
*arg0
= m
->reg
[op
.vec
.arg0
];
539 const GLfloat
*arg1
= m
->reg
[op
.vec
.arg1
];
542 result
[1] = arg0
[1] * arg1
[1];
548 static void do_EX2( struct arb_vp_machine
*m
, union instruction op
)
550 GLfloat
*result
= m
->reg
[op
.scl
.dst
];
551 const GLfloat
*arg0
= m
->reg
[op
.scl
.arg0
];
553 result
[0] = (GLfloat
)RoughApproxPow2(arg0
[0]);
557 static void do_EXP( struct arb_vp_machine
*m
, union instruction op
)
559 GLfloat
*result
= m
->reg
[op
.vec
.dst
];
560 const GLfloat
*arg0
= m
->reg
[op
.vec
.arg0
];
561 GLfloat tmp
= arg0
[0];
562 GLfloat flr_tmp
= FLOORF(tmp
);
564 /* KW: nvvertexec has an optimized version of this which is pretty
565 * hard to understand/validate, but avoids the RoughApproxPow2.
567 result
[0] = (GLfloat
) (1 << (int)flr_tmp
);
568 result
[1] = tmp
- flr_tmp
;
569 result
[2] = RoughApproxPow2(tmp
);
573 static void do_FLR( struct arb_vp_machine
*m
, union instruction op
)
575 GLfloat
*result
= m
->reg
[op
.vec
.dst
];
576 const GLfloat
*arg0
= m
->reg
[op
.vec
.arg0
];
578 result
[0] = FLOORF(arg0
[0]);
579 result
[1] = FLOORF(arg0
[1]);
580 result
[2] = FLOORF(arg0
[2]);
581 result
[3] = FLOORF(arg0
[3]);
584 static void do_FRC( struct arb_vp_machine
*m
, union instruction op
)
586 GLfloat
*result
= m
->reg
[op
.vec
.dst
];
587 const GLfloat
*arg0
= m
->reg
[op
.vec
.arg0
];
589 result
[0] = arg0
[0] - FLOORF(arg0
[0]);
590 result
[1] = arg0
[1] - FLOORF(arg0
[1]);
591 result
[2] = arg0
[2] - FLOORF(arg0
[2]);
592 result
[3] = arg0
[3] - FLOORF(arg0
[3]);
595 static void do_LG2( struct arb_vp_machine
*m
, union instruction op
)
597 GLfloat
*result
= m
->reg
[op
.scl
.dst
];
598 const GLfloat
*arg0
= m
->reg
[op
.scl
.arg0
];
600 result
[0] = RoughApproxLog2(arg0
[0]);
606 static void do_LIT( struct arb_vp_machine
*m
, union instruction op
)
608 GLfloat
*result
= m
->reg
[op
.vec
.dst
];
609 const GLfloat
*arg0
= m
->reg
[op
.vec
.arg0
];
611 const GLfloat epsilon
= 1.0F
/ 256.0F
; /* per NV spec */
614 tmp
[0] = MAX2(arg0
[0], 0.0F
);
615 tmp
[1] = MAX2(arg0
[1], 0.0F
);
616 tmp
[3] = CLAMP(arg0
[3], -(128.0F
- epsilon
), (128.0F
- epsilon
));
620 result
[2] = (tmp
[0] > 0.0) ? RoughApproxPower(tmp
[1], tmp
[3]) : 0.0F
;
625 static void do_LOG( struct arb_vp_machine
*m
, union instruction op
)
627 GLfloat
*result
= m
->reg
[op
.vec
.dst
];
628 const GLfloat
*arg0
= m
->reg
[op
.vec
.arg0
];
629 GLfloat tmp
= FABSF(arg0
[0]);
631 GLfloat mantissa
= FREXPF(tmp
, &exponent
);
633 result
[0] = (GLfloat
) (exponent
- 1);
634 result
[1] = 2.0 * mantissa
; /* map [.5, 1) -> [1, 2) */
635 result
[2] = result
[0] + LOG2(result
[1]);
640 static void do_MAD( struct arb_vp_machine
*m
, union instruction op
)
642 GLfloat
*result
= m
->reg
[op
.vec
.dst
];
643 const GLfloat
*arg0
= m
->reg
[op
.vec
.arg0
];
644 const GLfloat
*arg1
= m
->reg
[op
.vec
.arg1
];
645 const GLfloat
*arg2
= m
->reg
[op
.vec
.arg2
];
647 result
[0] = arg0
[0] * arg1
[0] + arg2
[0];
648 result
[1] = arg0
[1] * arg1
[1] + arg2
[1];
649 result
[2] = arg0
[2] * arg1
[2] + arg2
[2];
650 result
[3] = arg0
[3] * arg1
[3] + arg2
[3];
653 static void do_MAX( struct arb_vp_machine
*m
, union instruction op
)
655 GLfloat
*result
= m
->reg
[op
.vec
.dst
];
656 const GLfloat
*arg0
= m
->reg
[op
.vec
.arg0
];
657 const GLfloat
*arg1
= m
->reg
[op
.vec
.arg1
];
659 result
[0] = (arg0
[0] > arg1
[0]) ? arg0
[0] : arg1
[0];
660 result
[1] = (arg0
[1] > arg1
[1]) ? arg0
[1] : arg1
[1];
661 result
[2] = (arg0
[2] > arg1
[2]) ? arg0
[2] : arg1
[2];
662 result
[3] = (arg0
[3] > arg1
[3]) ? arg0
[3] : arg1
[3];
666 static void do_MIN( struct arb_vp_machine
*m
, union instruction op
)
668 GLfloat
*result
= m
->reg
[op
.vec
.dst
];
669 const GLfloat
*arg0
= m
->reg
[op
.vec
.arg0
];
670 const GLfloat
*arg1
= m
->reg
[op
.vec
.arg1
];
672 result
[0] = (arg0
[0] < arg1
[0]) ? arg0
[0] : arg1
[0];
673 result
[1] = (arg0
[1] < arg1
[1]) ? arg0
[1] : arg1
[1];
674 result
[2] = (arg0
[2] < arg1
[2]) ? arg0
[2] : arg1
[2];
675 result
[3] = (arg0
[3] < arg1
[3]) ? arg0
[3] : arg1
[3];
678 static void do_MOV( struct arb_vp_machine
*m
, union instruction op
)
680 GLfloat
*result
= m
->reg
[op
.vec
.dst
];
681 const GLfloat
*arg0
= m
->reg
[op
.vec
.arg0
];
689 static void do_MUL( struct arb_vp_machine
*m
, union instruction op
)
691 GLfloat
*result
= m
->reg
[op
.vec
.dst
];
692 const GLfloat
*arg0
= m
->reg
[op
.vec
.arg0
];
693 const GLfloat
*arg1
= m
->reg
[op
.vec
.arg1
];
695 result
[0] = arg0
[0] * arg1
[0];
696 result
[1] = arg0
[1] * arg1
[1];
697 result
[2] = arg0
[2] * arg1
[2];
698 result
[3] = arg0
[3] * arg1
[3];
702 static void do_POW( struct arb_vp_machine
*m
, union instruction op
)
704 GLfloat
*result
= m
->reg
[op
.scl
.dst
];
705 const GLfloat
*arg0
= m
->reg
[op
.scl
.arg0
];
706 const GLfloat
*arg1
= m
->reg
[op
.scl
.arg1
];
708 result
[0] = (GLfloat
)RoughApproxPower(arg0
[0], arg1
[0]);
712 static void do_RCP( struct arb_vp_machine
*m
, union instruction op
)
714 GLfloat
*result
= m
->reg
[op
.scl
.dst
];
715 const GLfloat
*arg0
= m
->reg
[op
.scl
.arg0
];
717 result
[0] = 1.0F
/ arg0
[0];
721 static void do_RSQ( struct arb_vp_machine
*m
, union instruction op
)
723 GLfloat
*result
= m
->reg
[op
.scl
.dst
];
724 const GLfloat
*arg0
= m
->reg
[op
.scl
.arg0
];
726 result
[0] = INV_SQRTF(FABSF(arg0
[0]));
731 static void do_SGE( struct arb_vp_machine
*m
, union instruction op
)
733 GLfloat
*result
= m
->reg
[op
.vec
.dst
];
734 const GLfloat
*arg0
= m
->reg
[op
.vec
.arg0
];
735 const GLfloat
*arg1
= m
->reg
[op
.vec
.arg1
];
737 result
[0] = (arg0
[0] >= arg1
[0]) ? 1.0F
: 0.0F
;
738 result
[1] = (arg0
[1] >= arg1
[1]) ? 1.0F
: 0.0F
;
739 result
[2] = (arg0
[2] >= arg1
[2]) ? 1.0F
: 0.0F
;
740 result
[3] = (arg0
[3] >= arg1
[3]) ? 1.0F
: 0.0F
;
744 static void do_SLT( struct arb_vp_machine
*m
, union instruction op
)
746 GLfloat
*result
= m
->reg
[op
.vec
.dst
];
747 const GLfloat
*arg0
= m
->reg
[op
.vec
.arg0
];
748 const GLfloat
*arg1
= m
->reg
[op
.vec
.arg1
];
750 result
[0] = (arg0
[0] < arg1
[0]) ? 1.0F
: 0.0F
;
751 result
[1] = (arg0
[1] < arg1
[1]) ? 1.0F
: 0.0F
;
752 result
[2] = (arg0
[2] < arg1
[2]) ? 1.0F
: 0.0F
;
753 result
[3] = (arg0
[3] < arg1
[3]) ? 1.0F
: 0.0F
;
756 static void do_SWZ( struct arb_vp_machine
*m
, union instruction op
)
758 GLfloat
*result
= m
->reg
[op
.swz
.dst
];
759 const GLfloat
*arg0
= m
->reg
[op
.swz
.arg0
];
760 GLuint swz
= op
.swz
.swz
;
761 GLuint neg
= op
.swz
.neg
;
764 for (i
= 0; i
< 4; i
++, swz
>>= 3, neg
>>= 1) {
766 case SWIZZLE_ZERO
: result
[i
] = 0.0; break;
767 case SWIZZLE_ONE
: result
[i
] = 1.0; break;
768 default: result
[i
] = arg0
[swz
& 0x7]; break;
770 if (neg
& 0x1) result
[i
] = -result
[i
];
774 static void do_SUB( struct arb_vp_machine
*m
, union instruction op
)
776 GLfloat
*result
= m
->reg
[op
.vec
.dst
];
777 const GLfloat
*arg0
= m
->reg
[op
.vec
.arg0
];
778 const GLfloat
*arg1
= m
->reg
[op
.vec
.arg1
];
780 result
[0] = arg0
[0] - arg1
[0];
781 result
[1] = arg0
[1] - arg1
[1];
782 result
[2] = arg0
[2] - arg1
[2];
783 result
[3] = arg0
[3] - arg1
[3];
787 static void do_XPD( struct arb_vp_machine
*m
, union instruction op
)
789 GLfloat
*result
= m
->reg
[op
.vec
.dst
];
790 const GLfloat
*arg0
= m
->reg
[op
.vec
.arg0
];
791 const GLfloat
*arg1
= m
->reg
[op
.vec
.arg1
];
793 result
[0] = arg0
[1] * arg1
[2] - arg0
[2] * arg1
[1];
794 result
[1] = arg0
[2] * arg1
[0] - arg0
[0] * arg1
[2];
795 result
[2] = arg0
[0] * arg1
[1] - arg0
[1] * arg1
[0];
798 static void do_NOP( struct arb_vp_machine
*m
, union instruction op
)
802 /* Some useful debugging functions:
804 static void print_reg( GLuint reg
)
808 else if (reg
>= REG_ARG0
&& reg
<= REG_ARG2
)
809 _mesa_printf("ARG%d", reg
- REG_ARG0
);
810 else if (reg
>= REG_TMP0
&& reg
< REG_TMP_MAX
)
811 _mesa_printf("TMP%d", reg
- REG_TMP0
);
812 else if (reg
>= REG_PAR0
&& reg
< REG_PAR_MAX
)
813 _mesa_printf("PAR%d", reg
- REG_PAR0
);
818 static void print_mask( GLuint mask
)
821 if (mask
&0x1) _mesa_printf("x");
822 if (mask
&0x2) _mesa_printf("y");
823 if (mask
&0x4) _mesa_printf("z");
824 if (mask
&0x8) _mesa_printf("w");
827 static void print_extern( GLuint file
, GLuint idx
)
829 static const char *reg_file
[] = {
841 _mesa_printf("%s:%d", reg_file
[file
], idx
);
846 static void print_SWZ( union instruction op
, const struct opcode_info
*info
)
848 GLuint swz
= op
.swz
.swz
;
849 GLuint neg
= op
.swz
.neg
;
852 _mesa_printf("%s ", info
->string
);
853 print_reg(op
.swz
.dst
);
855 print_reg(op
.swz
.arg0
);
857 for (i
= 0; i
< 4; i
++, swz
>>= 3, neg
>>= 1) {
858 const char *cswz
= "xyzw01??";
861 _mesa_printf("%c", cswz
[swz
&0x7]);
866 static void print_RSW( union instruction op
, const struct opcode_info
*info
)
868 GLuint swz
= op
.rsw
.swz
;
869 GLuint neg
= op
.rsw
.neg
;
872 _mesa_printf("%s ", info
->string
);
873 print_reg(op
.rsw
.dst
);
875 print_reg(op
.rsw
.arg0
);
877 for (i
= 0; i
< 4; i
++, swz
>>= 2) {
878 const char *cswz
= "xyzw";
881 _mesa_printf("%c", cswz
[swz
&0x3]);
887 static void print_SCL( union instruction op
, const struct opcode_info
*info
)
889 _mesa_printf("%s ", info
->string
);
890 print_reg(op
.scl
.dst
);
892 print_reg(op
.scl
.arg0
);
893 if (info
->nr_args
> 1) {
895 print_reg(op
.scl
.arg1
);
901 static void print_VEC( union instruction op
, const struct opcode_info
*info
)
903 _mesa_printf("%s ", info
->string
);
904 print_reg(op
.vec
.dst
);
906 print_reg(op
.vec
.arg0
);
907 if (info
->nr_args
> 1) {
909 print_reg(op
.vec
.arg1
);
911 if (info
->nr_args
> 2) {
913 print_reg(op
.vec
.arg2
);
918 static void print_MSK( union instruction op
, const struct opcode_info
*info
)
920 _mesa_printf("%s ", info
->string
);
921 print_reg(op
.msk
.dst
);
922 print_mask(op
.msk
.mask
);
924 print_reg(op
.msk
.arg0
);
928 static void print_IN( union instruction op
, const struct opcode_info
*info
)
930 _mesa_printf("%s ", info
->string
);
931 print_reg(op
.inr
.reg
);
933 print_extern(op
.inr
.file
, op
.inr
.idx
);
937 static void print_OUT( union instruction op
, const struct opcode_info
*info
)
939 _mesa_printf("%s ", info
->string
);
940 print_extern(op
.out
.file
, op
.out
.idx
);
941 if (op
.out
.opcode
== OUM
)
942 print_mask(op
.out
.mask
);
944 print_reg(op
.out
.reg
);
948 static void print_NOP( union instruction op
, const struct opcode_info
*info
)
957 static const struct opcode_info opcode_info
[] =
959 { VEC
, 1, "ABS", do_ABS
, print_VEC
},
960 { VEC
, 2, "ADD", do_ADD
, print_VEC
},
961 { OUT
, 1, "ARL", do_ARL
, print_OUT
},
962 { SCL
, 2, "DP3", do_DP3
, print_SCL
},
963 { SCL
, 2, "DP4", do_DP4
, print_SCL
},
964 { SCL
, 2, "DPH", do_DPH
, print_SCL
},
965 { VEC
, 2, "DST", do_DST
, print_VEC
},
966 { NOP
, 0, "END", do_NOP
, print_NOP
},
967 { SCL
, 1, "EX2", do_EX2
, print_VEC
},
968 { VEC
, 1, "EXP", do_EXP
, print_VEC
},
969 { VEC
, 1, "FLR", do_FLR
, print_VEC
},
970 { VEC
, 1, "FRC", do_FRC
, print_VEC
},
971 { SCL
, 1, "LG2", do_LG2
, print_VEC
},
972 { VEC
, 1, "LIT", do_LIT
, print_VEC
},
973 { VEC
, 1, "LOG", do_LOG
, print_VEC
},
974 { VEC
, 3, "MAD", do_MAD
, print_VEC
},
975 { VEC
, 2, "MAX", do_MAX
, print_VEC
},
976 { VEC
, 2, "MIN", do_MIN
, print_VEC
},
977 { VEC
, 1, "MOV", do_MOV
, print_VEC
},
978 { VEC
, 2, "MUL", do_MUL
, print_VEC
},
979 { SCL
, 2, "POW", do_POW
, print_VEC
},
980 { VEC
, 1, "PRT", do_PRT
, print_VEC
}, /* PRINT */
981 { NOP
, 1, "RCC", do_NOP
, print_NOP
},
982 { SCL
, 1, "RCP", do_RCP
, print_VEC
},
983 { SCL
, 1, "RSQ", do_RSQ
, print_VEC
},
984 { VEC
, 2, "SGE", do_SGE
, print_VEC
},
985 { VEC
, 2, "SLT", do_SLT
, print_VEC
},
986 { VEC
, 2, "SUB", do_SUB
, print_VEC
},
987 { SWZ
, 1, "SWZ", do_SWZ
, print_SWZ
},
988 { VEC
, 2, "XPD", do_XPD
, print_VEC
},
989 { IN4
, 1, "IN1", do_IN1
, print_IN
}, /* Internals */
990 { IN4
, 1, "IN2", do_IN2
, print_IN
},
991 { IN4
, 1, "IN3", do_IN3
, print_IN
},
992 { IN4
, 1, "IN4", do_IN4
, print_IN
},
993 { OUT
, 1, "OUT", do_OUT
, print_OUT
},
994 { OUT
, 1, "OUM", do_OUM
, print_OUT
},
995 { SWZ
, 1, "RSW", do_RSW
, print_RSW
},
996 { MSK
, 1, "MSK", do_MSK
, print_MSK
},
997 { IN4
, 1, "PAR", do_PAR
, print_IN
},
998 { IN4
, 1, "PRL", do_PRL
, print_IN
},
1002 static GLuint
cvp_load_reg( struct compilation
*cp
,
1009 if (file
== PROGRAM_TEMPORARY
)
1010 return index
+ REG_TMP0
;
1012 /* Don't try to cache relatively addressed values yet:
1015 for (i
= 0; i
< REG_PAR_NR
; i
++) {
1016 if ((cp
->par_active
& (1<<i
)) &&
1017 cp
->reg
[i
].file
== file
&&
1018 cp
->reg
[i
].idx
== index
) {
1019 cp
->par_protected
|= (1<<i
);
1020 return i
+ REG_PAR0
;
1025 /* Not already loaded, so identify a slot and load it.
1026 * TODO: preload these values once only!
1027 * TODO: better eviction strategy!
1029 if (cp
->par_active
== ~0) {
1030 assert(cp
->par_protected
!= ~0);
1031 cp
->par_active
= cp
->par_protected
;
1034 i
= ffs(~cp
->par_active
);
1039 if (file
== PROGRAM_INPUT
)
1040 op
= IN1
+ cp
->VB
->AttribPtr
[index
]->size
- 1;
1047 cp
->csr
->inr
.opcode
= op
;
1048 cp
->csr
->inr
.reg
= i
+ REG_PAR0
;
1049 cp
->csr
->inr
.file
= file
;
1050 cp
->csr
->inr
.idx
= index
;
1053 cp
->reg
[i
].file
= file
;
1054 cp
->reg
[i
].idx
= index
;
1055 cp
->par_protected
|= (1<<i
);
1056 cp
->par_active
|= (1<<i
);
1057 return i
+ REG_PAR0
;
1060 static void cvp_release_regs( struct compilation
*cp
)
1062 cp
->par_protected
= 0;
1067 static GLuint
cvp_emit_arg( struct compilation
*cp
,
1068 const struct vp_src_register
*src
,
1071 GLuint reg
= cvp_load_reg( cp
, src
->File
, src
->Index
, src
->RelAddr
);
1072 union instruction rsw
, noop
;
1074 /* Emit any necessary swizzling.
1077 rsw
.rsw
.neg
= src
->Negate
? 1 : 0;
1078 rsw
.rsw
.swz
= ((GET_SWZ(src
->Swizzle
, 0) << 0) |
1079 (GET_SWZ(src
->Swizzle
, 1) << 2) |
1080 (GET_SWZ(src
->Swizzle
, 2) << 4) |
1081 (GET_SWZ(src
->Swizzle
, 3) << 6));
1085 noop
.rsw
.swz
= ((0<<0) |
1090 if (rsw
.dword
!= noop
.dword
) {
1091 GLuint rsw_reg
= arg
;
1092 cp
->csr
->dword
= rsw
.dword
;
1093 cp
->csr
->rsw
.opcode
= RSW
;
1094 cp
->csr
->rsw
.arg0
= reg
;
1095 cp
->csr
->rsw
.dst
= rsw_reg
;
1103 static GLuint
cvp_choose_result( struct compilation
*cp
,
1104 const struct vp_dst_register
*dst
,
1105 union instruction
*fixup
,
1108 GLuint mask
= dst
->WriteMask
;
1110 if (dst
->File
== PROGRAM_TEMPORARY
) {
1112 /* Optimization: When writing (with a writemask) to an undefined
1113 * value for the first time, the writemask may be ignored. In
1114 * practise this means that the MSK instruction to implement the
1115 * writemask can be dropped.
1117 if (dst
->Index
< maxreg
&&
1118 (mask
== 0xf || !(cp
->tmp_active
& (1<<dst
->Index
)))) {
1120 cp
->tmp_active
|= (1<<dst
->Index
);
1121 return REG_TMP0
+ dst
->Index
;
1123 else if (mask
!= 0xf) {
1124 fixup
->msk
.opcode
= MSK
;
1125 fixup
->msk
.arg0
= REG_RES
;
1126 fixup
->msk
.dst
= REG_TMP0
+ dst
->Index
;
1127 fixup
->msk
.mask
= mask
;
1128 cp
->tmp_active
|= (1<<dst
->Index
);
1132 fixup
->vec
.opcode
= VP_OPCODE_MOV
;
1133 fixup
->vec
.arg0
= REG_RES
;
1134 fixup
->vec
.dst
= REG_TMP0
+ dst
->Index
;
1135 cp
->tmp_active
|= (1<<dst
->Index
);
1140 assert(dst
->File
== PROGRAM_OUTPUT
);
1141 fixup
->out
.opcode
= (mask
== 0xf) ? OUT
: OUM
;
1142 fixup
->out
.reg
= REG_RES
;
1143 fixup
->out
.file
= dst
->File
;
1144 fixup
->out
.idx
= dst
->Index
;
1145 fixup
->out
.mask
= mask
;
1151 static void cvp_emit_inst( struct compilation
*cp
,
1152 const struct vp_instruction
*inst
)
1154 const struct opcode_info
*info
= &opcode_info
[inst
->Opcode
];
1155 union instruction fixup
;
1159 /* Need to handle SWZ, ARL specially.
1161 switch (info
->type
) {
1163 assert(inst
->Opcode
== VP_OPCODE_ARL
);
1164 reg
[0] = cvp_emit_arg( cp
, &inst
->SrcReg
[0], REG_ARG0
);
1167 cp
->csr
->out
.opcode
= inst
->Opcode
;
1168 cp
->csr
->out
.reg
= reg
[0];
1169 cp
->csr
->out
.file
= PROGRAM_ADDRESS
;
1170 cp
->csr
->out
.idx
= 0;
1173 assert(inst
->Opcode
== VP_OPCODE_SWZ
);
1174 result
= cvp_choose_result( cp
, &inst
->DstReg
, &fixup
, REG_SWZDST_MAX
);
1176 reg
[0] = cvp_emit_arg( cp
, &inst
->SrcReg
[0], REG_ARG0
);
1179 cp
->csr
->swz
.opcode
= VP_OPCODE_SWZ
;
1180 cp
->csr
->swz
.arg0
= reg
[0];
1181 cp
->csr
->swz
.dst
= result
;
1182 cp
->csr
->swz
.neg
= inst
->SrcReg
[0].Negate
;
1183 cp
->csr
->swz
.swz
= inst
->SrcReg
[0].Swizzle
;
1186 if (result
== REG_RES
) {
1187 cp
->csr
->dword
= fixup
.dword
;
1193 case SCL
: /* for now */
1194 result
= cvp_choose_result( cp
, &inst
->DstReg
, &fixup
, REG_MAX
);
1196 reg
[0] = reg
[1] = reg
[2] = 0;
1198 for (i
= 0; i
< info
->nr_args
; i
++)
1199 reg
[i
] = cvp_emit_arg( cp
, &inst
->SrcReg
[i
], REG_ARG0
+ i
);
1202 cp
->csr
->vec
.opcode
= inst
->Opcode
;
1203 cp
->csr
->vec
.arg0
= reg
[0];
1204 cp
->csr
->vec
.arg1
= reg
[1];
1205 cp
->csr
->vec
.arg2
= reg
[2];
1206 cp
->csr
->vec
.dst
= result
;
1209 if (result
== REG_RES
) {
1210 cp
->csr
->dword
= fixup
.dword
;
1224 cvp_release_regs( cp
);
1228 static void compile_vertex_program( struct arb_vp_machine
*m
,
1229 const struct vertex_program
*program
)
1231 struct compilation cp
;
1236 memset(&cp
, 0, sizeof(cp
));
1240 /* Compile instructions:
1242 for (i
= 0; i
< program
->Base
.NumInstructions
; i
++) {
1243 cvp_emit_inst(&cp
, &program
->Instructions
[i
]);
1248 m
->instructions
= m
->store
;
1249 m
->nr_instructions
= cp
.csr
- m
->store
;
1252 /* Print/disassemble:
1255 for (i
= 0; i
< m
->nr_instructions
; i
++) {
1256 union instruction insn
= m
->instructions
[i
];
1257 const struct opcode_info
*info
= &opcode_info
[insn
.vec
.opcode
];
1258 info
->print( insn
, info
);
1260 _mesa_printf("\n\n");
1267 /* ----------------------------------------------------------------------
1270 static void userclip( GLcontext
*ctx
,
1273 GLubyte
*clipormask
,
1274 GLubyte
*clipandmask
)
1278 for (p
= 0; p
< ctx
->Const
.MaxClipPlanes
; p
++)
1279 if (ctx
->Transform
.ClipPlanesEnabled
& (1 << p
)) {
1281 const GLfloat a
= ctx
->Transform
._ClipUserPlane
[p
][0];
1282 const GLfloat b
= ctx
->Transform
._ClipUserPlane
[p
][1];
1283 const GLfloat c
= ctx
->Transform
._ClipUserPlane
[p
][2];
1284 const GLfloat d
= ctx
->Transform
._ClipUserPlane
[p
][3];
1285 GLfloat
*coord
= (GLfloat
*)clip
->data
;
1286 GLuint stride
= clip
->stride
;
1287 GLuint count
= clip
->count
;
1289 for (nr
= 0, i
= 0 ; i
< count
; i
++) {
1290 GLfloat dp
= (coord
[0] * a
+
1297 clipmask
[i
] |= CLIP_USER_BIT
;
1300 STRIDE_F(coord
, stride
);
1304 *clipormask
|= CLIP_USER_BIT
;
1306 *clipandmask
|= CLIP_USER_BIT
;
1314 static GLboolean
do_ndc_cliptest( struct arb_vp_machine
*m
)
1316 GLcontext
*ctx
= m
->ctx
;
1317 TNLcontext
*tnl
= TNL_CONTEXT(ctx
);
1318 struct vertex_buffer
*VB
= m
->VB
;
1320 /* Cliptest and perspective divide. Clip functions must clear
1324 m
->andmask
= CLIP_ALL_BITS
;
1326 if (tnl
->NeedNdcCoords
) {
1328 _mesa_clip_tab
[VB
->ClipPtr
->size
]( VB
->ClipPtr
,
1336 _mesa_clip_np_tab
[VB
->ClipPtr
->size
]( VB
->ClipPtr
,
1344 /* All vertices are outside the frustum */
1348 /* Test userclip planes. This contributes to VB->ClipMask.
1350 if (ctx
->Transform
.ClipPlanesEnabled
&& !ctx
->VertexProgram
._Enabled
) {
1362 VB
->ClipAndMask
= m
->andmask
;
1363 VB
->ClipOrMask
= m
->ormask
;
1364 VB
->ClipMask
= m
->clipmask
;
1373 * Execute the given vertex program.
1375 * TODO: Integrate the t_vertex.c code here, to build machine vertices
1376 * directly at this point.
1378 * TODO: Eliminate the VB struct entirely and just use
1379 * struct arb_vertex_machine.
1382 run_arb_vertex_program(GLcontext
*ctx
, struct tnl_pipeline_stage
*stage
)
1384 struct vertex_program
*program
= (ctx
->VertexProgram
._Enabled
?
1385 ctx
->VertexProgram
.Current
:
1387 struct vertex_buffer
*VB
= &TNL_CONTEXT(ctx
)->vb
;
1388 struct arb_vp_machine
*m
= ARB_VP_MACHINE(stage
);
1389 GLuint i
, j
, outputs
= program
->OutputsWritten
;
1391 if (program
->Parameters
) {
1392 _mesa_load_state_parameters(ctx
, program
->Parameters
);
1393 m
->File
[PROGRAM_STATE_VAR
] = program
->Parameters
->ParameterValues
;
1396 /* Run the actual program:
1398 for (m
->vtx_nr
= 0; m
->vtx_nr
< VB
->Count
; m
->vtx_nr
++) {
1399 for (j
= 0; j
< m
->nr_instructions
; j
++) {
1400 union instruction inst
= m
->instructions
[j
];
1401 opcode_info
[inst
.vec
.opcode
].func( m
, inst
);
1405 /* Setup the VB pointers so that the next pipeline stages get
1406 * their data from the right place (the program output arrays).
1408 * TODO: 1) Have tnl use these RESULT values for outputs rather
1409 * than trying to shoe-horn inputs and outputs into one set of
1412 * TODO: 2) Integrate t_vertex.c so that we just go straight ahead
1413 * and build machine vertices here.
1415 VB
->ClipPtr
= &m
->attribs
[VERT_RESULT_HPOS
];
1416 VB
->ClipPtr
->count
= VB
->Count
;
1418 if (outputs
& (1<<VERT_RESULT_COL0
)) {
1419 VB
->ColorPtr
[0] = &m
->attribs
[VERT_RESULT_COL0
];
1420 VB
->AttribPtr
[VERT_ATTRIB_COLOR0
] = VB
->ColorPtr
[0];
1423 if (outputs
& (1<<VERT_RESULT_BFC0
)) {
1424 VB
->ColorPtr
[1] = &m
->attribs
[VERT_RESULT_BFC0
];
1427 if (outputs
& (1<<VERT_RESULT_COL1
)) {
1428 VB
->SecondaryColorPtr
[0] = &m
->attribs
[VERT_RESULT_COL1
];
1429 VB
->AttribPtr
[VERT_ATTRIB_COLOR1
] = VB
->SecondaryColorPtr
[0];
1432 if (outputs
& (1<<VERT_RESULT_BFC1
)) {
1433 VB
->SecondaryColorPtr
[1] = &m
->attribs
[VERT_RESULT_BFC1
];
1436 if (outputs
& (1<<VERT_RESULT_FOGC
)) {
1437 VB
->FogCoordPtr
= &m
->attribs
[VERT_RESULT_FOGC
];
1438 VB
->AttribPtr
[VERT_ATTRIB_FOG
] = VB
->FogCoordPtr
;
1441 if (outputs
& (1<<VERT_RESULT_PSIZ
)) {
1442 VB
->PointSizePtr
= &m
->attribs
[VERT_RESULT_PSIZ
];
1443 VB
->AttribPtr
[_TNL_ATTRIB_POINTSIZE
] = &m
->attribs
[VERT_RESULT_PSIZ
];
1446 for (i
= 0; i
< ctx
->Const
.MaxTextureUnits
; i
++) {
1447 if (outputs
& (1<<(VERT_RESULT_TEX0
+i
))) {
1448 VB
->TexCoordPtr
[i
] = &m
->attribs
[VERT_RESULT_TEX0
+ i
];
1449 VB
->AttribPtr
[VERT_ATTRIB_TEX0
+i
] = VB
->TexCoordPtr
[i
];
1454 for (i
= 0; i
< VB
->Count
; i
++) {
1455 printf("Out %d: %f %f %f %f %f %f %f %f\n", i
,
1456 VEC_ELT(VB
->ClipPtr
, GLfloat
, i
)[0],
1457 VEC_ELT(VB
->ClipPtr
, GLfloat
, i
)[1],
1458 VEC_ELT(VB
->ClipPtr
, GLfloat
, i
)[2],
1459 VEC_ELT(VB
->ClipPtr
, GLfloat
, i
)[3],
1460 VEC_ELT(VB
->ColorPtr
[0], GLfloat
, i
)[0],
1461 VEC_ELT(VB
->ColorPtr
[0], GLfloat
, i
)[1],
1462 VEC_ELT(VB
->ColorPtr
[0], GLfloat
, i
)[2],
1463 VEC_ELT(VB
->ColorPtr
[0], GLfloat
, i
)[3]);
1467 /* Perform NDC and cliptest operations:
1469 return do_ndc_cliptest(m
);
1474 validate_vertex_program( GLcontext
*ctx
, struct tnl_pipeline_stage
*stage
)
1476 struct arb_vp_machine
*m
= ARB_VP_MACHINE(stage
);
1477 struct vertex_program
*program
=
1478 (ctx
->VertexProgram
._Enabled
? ctx
->VertexProgram
.Current
: 0);
1480 #if TNL_FIXED_FUNCTION_PROGRAM
1482 program
= ctx
->_TnlProgram
;
1487 compile_vertex_program( m
, program
);
1489 /* Grab the state GL state and put into registers:
1491 m
->File
[PROGRAM_LOCAL_PARAM
] = program
->Base
.LocalParams
;
1492 m
->File
[PROGRAM_ENV_PARAM
] = ctx
->VertexProgram
.Parameters
;
1493 m
->File
[PROGRAM_STATE_VAR
] = 0;
1504 * Called the first time stage->run is called. In effect, don't
1505 * allocate data until the first time the stage is run.
1507 static GLboolean
init_vertex_program( GLcontext
*ctx
,
1508 struct tnl_pipeline_stage
*stage
)
1510 TNLcontext
*tnl
= TNL_CONTEXT(ctx
);
1511 struct vertex_buffer
*VB
= &(tnl
->vb
);
1512 struct arb_vp_machine
*m
;
1513 const GLuint size
= VB
->Size
;
1516 stage
->privatePtr
= MALLOC(sizeof(*m
));
1517 m
= ARB_VP_MACHINE(stage
);
1521 /* arb_vertex_machine struct should subsume the VB:
1526 /* Allocate arrays of vertex output values */
1527 for (i
= 0; i
< VERT_RESULT_MAX
; i
++) {
1528 _mesa_vector4f_alloc( &m
->attribs
[i
], 0, size
, 32 );
1529 m
->attribs
[i
].size
= 4;
1532 /* a few other misc allocations */
1533 _mesa_vector4f_alloc( &m
->ndcCoords
, 0, size
, 32 );
1534 m
->clipmask
= (GLubyte
*) ALIGN_MALLOC(sizeof(GLubyte
)*size
, 32 );
1537 #if TNL_FIXED_FUNCTION_PROGRAM
1538 _mesa_allow_light_in_model( ctx
, GL_FALSE
);
1549 * Destructor for this pipeline stage.
1551 static void dtr( struct tnl_pipeline_stage
*stage
)
1553 struct arb_vp_machine
*m
= ARB_VP_MACHINE(stage
);
1558 /* free the vertex program result arrays */
1559 for (i
= 0; i
< VERT_RESULT_MAX
; i
++)
1560 _mesa_vector4f_free( &m
->attribs
[i
] );
1562 /* free misc arrays */
1563 _mesa_vector4f_free( &m
->ndcCoords
);
1564 ALIGN_FREE( m
->clipmask
);
1567 stage
->privatePtr
= NULL
;
1572 * Public description of this pipeline stage.
1574 const struct tnl_pipeline_stage _tnl_arb_vertex_program_stage
=
1577 NULL
, /* private_data */
1578 init_vertex_program
, /* create */
1580 validate_vertex_program
, /* validate */
1581 run_arb_vertex_program
/* run */