2 * Mesa 3-D graphics library
5 * Copyright (C) 1999-2006 Brian Paul All Rights Reserved.
7 * Permission is hereby granted, free of charge, to any person obtaining a
8 * copy of this software and associated documentation files (the "Software"),
9 * to deal in the Software without restriction, including without limitation
10 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
11 * and/or sell copies of the Software, and to permit persons to whom the
12 * Software is furnished to do so, subject to the following conditions:
14 * The above copyright notice and this permission notice shall be included
15 * in all copies or substantial portions of the Software.
17 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
18 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
19 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
20 * BRIAN PAUL BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN
21 * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
22 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
26 * \file t_arb_program.c
27 * Compile vertex programs to an intermediate representation.
28 * Execute vertex programs over a buffer of vertices.
29 * \author Keith Whitwell, Brian Paul
37 #include "arbprogparse.h"
40 #include "math/m_matrix.h"
41 #include "t_context.h"
42 #include "t_pipeline.h"
43 #include "t_vb_arbprogram.h"
45 #include "program_instruction.h"
53 union instruction
*csr
;
57 #define ARB_VP_MACHINE(stage) ((struct arb_vp_machine *)(stage->privatePtr))
59 #define PUFF(x) ((x)[1] = (x)[2] = (x)[3] = (x)[0])
63 /* Lower precision functions for the EXP, LOG and LIT opcodes. The
64 * LOG2() implementation is probably not accurate enough, and the
65 * attempted optimization for Exp2 is definitely not accurate
66 * enough - it discards all of t's fractional bits!
68 static GLfloat
RoughApproxLog2(GLfloat t
)
73 static GLfloat
RoughApproxExp2(GLfloat t
)
78 fi
.i
= (fi
.i
<< 23) + 0x3f800000;
81 return (GLfloat
) _mesa_pow(2.0, t
);
85 static GLfloat
RoughApproxPower(GLfloat x
, GLfloat y
)
87 if (x
== 0.0 && y
== 0.0)
88 return 1.0; /* spec requires this */
90 return RoughApproxExp2(y
* RoughApproxLog2(x
));
94 /* Higher precision functions for the EX2, LG2 and POW opcodes:
96 static GLfloat
ApproxLog2(GLfloat t
)
98 return (GLfloat
) (LOGF(t
) * 1.442695F
);
101 static GLfloat
ApproxExp2(GLfloat t
)
103 return (GLfloat
) _mesa_pow(2.0, t
);
106 static GLfloat
ApproxPower(GLfloat x
, GLfloat y
)
108 return (GLfloat
) _mesa_pow(x
, y
);
113 * Perform a reduced swizzle:
115 static void do_RSW( struct arb_vp_machine
*m
, union instruction op
)
117 GLfloat
*result
= m
->File
[0][op
.rsw
.dst
];
118 const GLfloat
*arg0
= m
->File
[op
.rsw
.file0
][op
.rsw
.idx0
];
119 const GLuint swz
= op
.rsw
.swz
;
120 const GLuint neg
= op
.rsw
.neg
;
123 /* Need a temporary to be correct in the case where result == arg0.
127 result
[0] = tmp
[GET_SWZ(swz
, 0)];
128 result
[1] = tmp
[GET_SWZ(swz
, 1)];
129 result
[2] = tmp
[GET_SWZ(swz
, 2)];
130 result
[3] = tmp
[GET_SWZ(swz
, 3)];
133 if (neg
& 0x1) result
[0] = -result
[0];
134 if (neg
& 0x2) result
[1] = -result
[1];
135 if (neg
& 0x4) result
[2] = -result
[2];
136 if (neg
& 0x8) result
[3] = -result
[3];
141 * Perform a full swizzle
143 static void do_SWZ( struct arb_vp_machine
*m
, union instruction op
)
145 GLfloat
*result
= m
->File
[0][op
.rsw
.dst
];
146 const GLfloat
*arg0
= m
->File
[op
.rsw
.file0
][op
.rsw
.idx0
];
147 const GLuint swz
= op
.rsw
.swz
;
148 const GLuint neg
= op
.rsw
.neg
;
153 /* Need a temporary to be correct in the case where result == arg0.
157 result
[0] = tmp
[GET_SWZ(swz
, 0)];
158 result
[1] = tmp
[GET_SWZ(swz
, 1)];
159 result
[2] = tmp
[GET_SWZ(swz
, 2)];
160 result
[3] = tmp
[GET_SWZ(swz
, 3)];
163 if (neg
& 0x1) result
[0] = -result
[0];
164 if (neg
& 0x2) result
[1] = -result
[1];
165 if (neg
& 0x4) result
[2] = -result
[2];
166 if (neg
& 0x8) result
[3] = -result
[3];
170 /* Used to implement write masking. To make things easier for the sse
171 * generator I've gone back to a 1 argument version of this function
172 * (dst.msk = arg), rather than the semantically cleaner (dst = SEL
175 * That means this is the only instruction which doesn't write a full
176 * 4 dwords out. This would make such a program harder to analyse,
177 * but it looks like analysis is going to take place on a higher level
180 static void do_MSK( struct arb_vp_machine
*m
, union instruction op
)
182 GLfloat
*dst
= m
->File
[0][op
.msk
.dst
];
183 const GLfloat
*arg
= m
->File
[op
.msk
.file
][op
.msk
.idx
];
185 if (op
.msk
.mask
& WRITEMASK_X
) dst
[0] = arg
[0];
186 if (op
.msk
.mask
& WRITEMASK_Y
) dst
[1] = arg
[1];
187 if (op
.msk
.mask
& WRITEMASK_Z
) dst
[2] = arg
[2];
188 if (op
.msk
.mask
& WRITEMASK_W
) dst
[3] = arg
[3];
192 static void do_PRT( struct arb_vp_machine
*m
, union instruction op
)
194 const GLfloat
*arg0
= m
->File
[op
.alu
.file0
][op
.alu
.idx0
];
196 _mesa_printf("%d: %f %f %f %f\n", m
->vtx_nr
,
197 arg0
[0], arg0
[1], arg0
[2], arg0
[3]);
202 * The traditional ALU and texturing instructions. All operate on
203 * internal registers and ignore write masks and swizzling issues.
206 static void do_ABS( struct arb_vp_machine
*m
, union instruction op
)
208 GLfloat
*result
= m
->File
[0][op
.alu
.dst
];
209 const GLfloat
*arg0
= m
->File
[op
.alu
.file0
][op
.alu
.idx0
];
211 result
[0] = (arg0
[0] < 0.0) ? -arg0
[0] : arg0
[0];
212 result
[1] = (arg0
[1] < 0.0) ? -arg0
[1] : arg0
[1];
213 result
[2] = (arg0
[2] < 0.0) ? -arg0
[2] : arg0
[2];
214 result
[3] = (arg0
[3] < 0.0) ? -arg0
[3] : arg0
[3];
217 static void do_ADD( struct arb_vp_machine
*m
, union instruction op
)
219 GLfloat
*result
= m
->File
[0][op
.alu
.dst
];
220 const GLfloat
*arg0
= m
->File
[op
.alu
.file0
][op
.alu
.idx0
];
221 const GLfloat
*arg1
= m
->File
[op
.alu
.file1
][op
.alu
.idx1
];
223 result
[0] = arg0
[0] + arg1
[0];
224 result
[1] = arg0
[1] + arg1
[1];
225 result
[2] = arg0
[2] + arg1
[2];
226 result
[3] = arg0
[3] + arg1
[3];
230 static void do_DP3( struct arb_vp_machine
*m
, union instruction op
)
232 GLfloat
*result
= m
->File
[0][op
.alu
.dst
];
233 const GLfloat
*arg0
= m
->File
[op
.alu
.file0
][op
.alu
.idx0
];
234 const GLfloat
*arg1
= m
->File
[op
.alu
.file1
][op
.alu
.idx1
];
236 result
[0] = (arg0
[0] * arg1
[0] +
245 static void do_DP4( struct arb_vp_machine
*m
, union instruction op
)
247 GLfloat
*result
= m
->File
[0][op
.alu
.dst
];
248 const GLfloat
*arg0
= m
->File
[op
.alu
.file0
][op
.alu
.idx0
];
249 const GLfloat
*arg1
= m
->File
[op
.alu
.file1
][op
.alu
.idx1
];
251 result
[0] = (arg0
[0] * arg1
[0] +
259 static void do_DPH( struct arb_vp_machine
*m
, union instruction op
)
261 GLfloat
*result
= m
->File
[0][op
.alu
.dst
];
262 const GLfloat
*arg0
= m
->File
[op
.alu
.file0
][op
.alu
.idx0
];
263 const GLfloat
*arg1
= m
->File
[op
.alu
.file1
][op
.alu
.idx1
];
265 result
[0] = (arg0
[0] * arg1
[0] +
273 static void do_DST( struct arb_vp_machine
*m
, union instruction op
)
275 GLfloat
*result
= m
->File
[0][op
.alu
.dst
];
276 const GLfloat
*arg0
= m
->File
[op
.alu
.file0
][op
.alu
.idx0
];
277 const GLfloat
*arg1
= m
->File
[op
.alu
.file1
][op
.alu
.idx1
];
279 /* This should be ok even if result == arg0 or result == arg1.
282 result
[1] = arg0
[1] * arg1
[1];
288 /* Intended to be high precision:
290 static void do_EX2( struct arb_vp_machine
*m
, union instruction op
)
292 GLfloat
*result
= m
->File
[0][op
.alu
.dst
];
293 const GLfloat
*arg0
= m
->File
[op
.alu
.file0
][op
.alu
.idx0
];
295 result
[0] = (GLfloat
)ApproxExp2(arg0
[0]);
300 /* Allowed to be lower precision:
302 static void do_EXP( struct arb_vp_machine
*m
, union instruction op
)
304 GLfloat
*result
= m
->File
[0][op
.alu
.dst
];
305 const GLfloat
*arg0
= m
->File
[op
.alu
.file0
][op
.alu
.idx0
];
306 const GLfloat tmp
= arg0
[0];
307 const GLfloat flr_tmp
= FLOORF(tmp
);
308 const GLfloat frac_tmp
= tmp
- flr_tmp
;
310 result
[0] = LDEXPF(1.0, (int)flr_tmp
);
311 result
[1] = frac_tmp
;
312 result
[2] = RoughApproxExp2(tmp
);
316 static void do_FLR( struct arb_vp_machine
*m
, union instruction op
)
318 GLfloat
*result
= m
->File
[0][op
.alu
.dst
];
319 const GLfloat
*arg0
= m
->File
[op
.alu
.file0
][op
.alu
.idx0
];
321 result
[0] = FLOORF(arg0
[0]);
322 result
[1] = FLOORF(arg0
[1]);
323 result
[2] = FLOORF(arg0
[2]);
324 result
[3] = FLOORF(arg0
[3]);
327 static void do_FRC( struct arb_vp_machine
*m
, union instruction op
)
329 GLfloat
*result
= m
->File
[0][op
.alu
.dst
];
330 const GLfloat
*arg0
= m
->File
[op
.alu
.file0
][op
.alu
.idx0
];
332 result
[0] = arg0
[0] - FLOORF(arg0
[0]);
333 result
[1] = arg0
[1] - FLOORF(arg0
[1]);
334 result
[2] = arg0
[2] - FLOORF(arg0
[2]);
335 result
[3] = arg0
[3] - FLOORF(arg0
[3]);
338 /* High precision log base 2:
340 static void do_LG2( struct arb_vp_machine
*m
, union instruction op
)
342 GLfloat
*result
= m
->File
[0][op
.alu
.dst
];
343 const GLfloat
*arg0
= m
->File
[op
.alu
.file0
][op
.alu
.idx0
];
345 result
[0] = ApproxLog2(arg0
[0]);
351 static void do_LIT( struct arb_vp_machine
*m
, union instruction op
)
353 GLfloat
*result
= m
->File
[0][op
.alu
.dst
];
354 const GLfloat
*arg0
= m
->File
[op
.alu
.file0
][op
.alu
.idx0
];
355 GLfloat tmp
[4]; /* use temp in case arg0 == result register */
360 tmp
[2] = RoughApproxPower(arg0
[1], arg0
[3]);
367 COPY_4V(result
, tmp
);
371 /* Intended to allow a lower precision than required for LG2 above.
373 static void do_LOG( struct arb_vp_machine
*m
, union instruction op
)
375 GLfloat
*result
= m
->File
[0][op
.alu
.dst
];
376 const GLfloat
*arg0
= m
->File
[op
.alu
.file0
][op
.alu
.idx0
];
377 const GLfloat tmp
= FABSF(arg0
[0]);
379 const GLfloat mantissa
= FREXPF(tmp
, &exponent
);
381 result
[0] = (GLfloat
) (exponent
- 1);
382 result
[1] = 2.0 * mantissa
; /* map [.5, 1) -> [1, 2) */
383 result
[2] = exponent
+ LOG2(mantissa
);
387 static void do_MAX( struct arb_vp_machine
*m
, union instruction op
)
389 GLfloat
*result
= m
->File
[0][op
.alu
.dst
];
390 const GLfloat
*arg0
= m
->File
[op
.alu
.file0
][op
.alu
.idx0
];
391 const GLfloat
*arg1
= m
->File
[op
.alu
.file1
][op
.alu
.idx1
];
393 result
[0] = (arg0
[0] > arg1
[0]) ? arg0
[0] : arg1
[0];
394 result
[1] = (arg0
[1] > arg1
[1]) ? arg0
[1] : arg1
[1];
395 result
[2] = (arg0
[2] > arg1
[2]) ? arg0
[2] : arg1
[2];
396 result
[3] = (arg0
[3] > arg1
[3]) ? arg0
[3] : arg1
[3];
400 static void do_MIN( struct arb_vp_machine
*m
, union instruction op
)
402 GLfloat
*result
= m
->File
[0][op
.alu
.dst
];
403 const GLfloat
*arg0
= m
->File
[op
.alu
.file0
][op
.alu
.idx0
];
404 const GLfloat
*arg1
= m
->File
[op
.alu
.file1
][op
.alu
.idx1
];
406 result
[0] = (arg0
[0] < arg1
[0]) ? arg0
[0] : arg1
[0];
407 result
[1] = (arg0
[1] < arg1
[1]) ? arg0
[1] : arg1
[1];
408 result
[2] = (arg0
[2] < arg1
[2]) ? arg0
[2] : arg1
[2];
409 result
[3] = (arg0
[3] < arg1
[3]) ? arg0
[3] : arg1
[3];
412 static void do_MOV( struct arb_vp_machine
*m
, union instruction op
)
414 GLfloat
*result
= m
->File
[0][op
.alu
.dst
];
415 const GLfloat
*arg0
= m
->File
[op
.alu
.file0
][op
.alu
.idx0
];
423 static void do_MUL( struct arb_vp_machine
*m
, union instruction op
)
425 GLfloat
*result
= m
->File
[0][op
.alu
.dst
];
426 const GLfloat
*arg0
= m
->File
[op
.alu
.file0
][op
.alu
.idx0
];
427 const GLfloat
*arg1
= m
->File
[op
.alu
.file1
][op
.alu
.idx1
];
429 result
[0] = arg0
[0] * arg1
[0];
430 result
[1] = arg0
[1] * arg1
[1];
431 result
[2] = arg0
[2] * arg1
[2];
432 result
[3] = arg0
[3] * arg1
[3];
436 /* Intended to be "high" precision
438 static void do_POW( struct arb_vp_machine
*m
, union instruction op
)
440 GLfloat
*result
= m
->File
[0][op
.alu
.dst
];
441 const GLfloat
*arg0
= m
->File
[op
.alu
.file0
][op
.alu
.idx0
];
442 const GLfloat
*arg1
= m
->File
[op
.alu
.file1
][op
.alu
.idx1
];
444 result
[0] = (GLfloat
)ApproxPower(arg0
[0], arg1
[0]);
448 static void do_REL( struct arb_vp_machine
*m
, union instruction op
)
450 GLfloat
*result
= m
->File
[0][op
.alu
.dst
];
451 const GLuint idx
= (op
.alu
.idx0
+ (GLint
)m
->File
[0][REG_ADDR
][0]) & (MAX_NV_VERTEX_PROGRAM_PARAMS
-1);
452 const GLfloat
*arg0
= m
->File
[op
.alu
.file0
][idx
];
460 static void do_RCP( struct arb_vp_machine
*m
, union instruction op
)
462 GLfloat
*result
= m
->File
[0][op
.alu
.dst
];
463 const GLfloat
*arg0
= m
->File
[op
.alu
.file0
][op
.alu
.idx0
];
465 result
[0] = 1.0F
/ arg0
[0];
469 static void do_RSQ( struct arb_vp_machine
*m
, union instruction op
)
471 GLfloat
*result
= m
->File
[0][op
.alu
.dst
];
472 const GLfloat
*arg0
= m
->File
[op
.alu
.file0
][op
.alu
.idx0
];
474 result
[0] = INV_SQRTF(FABSF(arg0
[0]));
479 static void do_SGE( struct arb_vp_machine
*m
, union instruction op
)
481 GLfloat
*result
= m
->File
[0][op
.alu
.dst
];
482 const GLfloat
*arg0
= m
->File
[op
.alu
.file0
][op
.alu
.idx0
];
483 const GLfloat
*arg1
= m
->File
[op
.alu
.file1
][op
.alu
.idx1
];
485 result
[0] = (arg0
[0] >= arg1
[0]) ? 1.0F
: 0.0F
;
486 result
[1] = (arg0
[1] >= arg1
[1]) ? 1.0F
: 0.0F
;
487 result
[2] = (arg0
[2] >= arg1
[2]) ? 1.0F
: 0.0F
;
488 result
[3] = (arg0
[3] >= arg1
[3]) ? 1.0F
: 0.0F
;
492 static void do_SLT( struct arb_vp_machine
*m
, union instruction op
)
494 GLfloat
*result
= m
->File
[0][op
.alu
.dst
];
495 const GLfloat
*arg0
= m
->File
[op
.alu
.file0
][op
.alu
.idx0
];
496 const GLfloat
*arg1
= m
->File
[op
.alu
.file1
][op
.alu
.idx1
];
498 result
[0] = (arg0
[0] < arg1
[0]) ? 1.0F
: 0.0F
;
499 result
[1] = (arg0
[1] < arg1
[1]) ? 1.0F
: 0.0F
;
500 result
[2] = (arg0
[2] < arg1
[2]) ? 1.0F
: 0.0F
;
501 result
[3] = (arg0
[3] < arg1
[3]) ? 1.0F
: 0.0F
;
504 static void do_SUB( struct arb_vp_machine
*m
, union instruction op
)
506 GLfloat
*result
= m
->File
[0][op
.alu
.dst
];
507 const GLfloat
*arg0
= m
->File
[op
.alu
.file0
][op
.alu
.idx0
];
508 const GLfloat
*arg1
= m
->File
[op
.alu
.file1
][op
.alu
.idx1
];
510 result
[0] = arg0
[0] - arg1
[0];
511 result
[1] = arg0
[1] - arg1
[1];
512 result
[2] = arg0
[2] - arg1
[2];
513 result
[3] = arg0
[3] - arg1
[3];
517 static void do_XPD( struct arb_vp_machine
*m
, union instruction op
)
519 GLfloat
*result
= m
->File
[0][op
.alu
.dst
];
520 const GLfloat
*arg0
= m
->File
[op
.alu
.file0
][op
.alu
.idx0
];
521 const GLfloat
*arg1
= m
->File
[op
.alu
.file1
][op
.alu
.idx1
];
524 tmp
[0] = arg0
[1] * arg1
[2] - arg0
[2] * arg1
[1];
525 tmp
[1] = arg0
[2] * arg1
[0] - arg0
[0] * arg1
[2];
526 tmp
[2] = arg0
[0] * arg1
[1] - arg0
[1] * arg1
[0];
528 /* Need a temporary to be correct in the case where result == arg0
536 static void do_NOP( struct arb_vp_machine
*m
, union instruction op
)
540 /* Some useful debugging functions:
542 static void print_mask( GLuint mask
)
545 if (mask
& WRITEMASK_X
) _mesa_printf("x");
546 if (mask
& WRITEMASK_Y
) _mesa_printf("y");
547 if (mask
& WRITEMASK_Z
) _mesa_printf("z");
548 if (mask
& WRITEMASK_W
) _mesa_printf("w");
551 static void print_reg( GLuint file
, GLuint reg
)
553 static const char *reg_file
[] = {
563 else if (reg
>= REG_ARG0
&& reg
<= REG_ARG1
)
564 _mesa_printf("ARG%d", reg
- REG_ARG0
);
565 else if (reg
>= REG_TMP0
&& reg
<= REG_TMP11
)
566 _mesa_printf("TMP%d", reg
- REG_TMP0
);
567 else if (reg
>= REG_IN0
&& reg
<= REG_IN31
)
568 _mesa_printf("IN%d", reg
- REG_IN0
);
569 else if (reg
>= REG_OUT0
&& reg
<= REG_OUT14
)
570 _mesa_printf("OUT%d", reg
- REG_OUT0
);
571 else if (reg
== REG_ADDR
)
572 _mesa_printf("ADDR");
573 else if (reg
== REG_ID
)
576 _mesa_printf("REG%d", reg
);
579 _mesa_printf("%s:%d", reg_file
[file
], reg
);
583 static void print_RSW( union instruction op
)
585 GLuint swz
= op
.rsw
.swz
;
586 GLuint neg
= op
.rsw
.neg
;
589 _mesa_printf("RSW ");
590 print_reg(0, op
.rsw
.dst
);
592 print_reg(op
.rsw
.file0
, op
.rsw
.idx0
);
594 for (i
= 0; i
< 4; i
++, swz
>>= 3) {
595 const char *cswz
= "xyzw01";
598 _mesa_printf("%c", cswz
[swz
&0x7]);
603 static void print_SWZ( union instruction op
)
605 GLuint swz
= op
.rsw
.swz
;
606 GLuint neg
= op
.rsw
.neg
;
609 _mesa_printf("SWZ ");
610 print_reg(0, op
.rsw
.dst
);
612 print_reg(op
.rsw
.file0
, op
.rsw
.idx0
);
614 for (i
= 0; i
< 4; i
++, swz
>>= 3) {
615 const char *cswz
= "xyzw01";
618 _mesa_printf("%c", cswz
[swz
&0x7]);
624 static void print_ALU( union instruction op
)
626 _mesa_printf("%s ", _mesa_opcode_string((enum prog_opcode
) op
.alu
.opcode
));
627 print_reg(0, op
.alu
.dst
);
629 print_reg(op
.alu
.file0
, op
.alu
.idx0
);
630 if (_mesa_num_inst_src_regs((enum prog_opcode
) op
.alu
.opcode
) > 1) {
632 print_reg(op
.alu
.file1
, op
.alu
.idx1
);
637 static void print_MSK( union instruction op
)
639 _mesa_printf("MSK ");
640 print_reg(0, op
.msk
.dst
);
641 print_mask(op
.msk
.mask
);
643 print_reg(op
.msk
.file
, op
.msk
.idx
);
647 static void print_NOP( union instruction op
)
652 _tnl_disassem_vba_insn( union instruction op
)
654 switch (op
.alu
.opcode
) {
710 _mesa_problem(NULL
, "Bad opcode in _tnl_disassem_vba_insn()");
715 static void (* const opcode_func
[MAX_OPCODE
+3])(struct arb_vp_machine
*, union instruction
) =
792 static union instruction
*cvp_next_instruction( struct compilation
*cp
)
794 union instruction
*op
= cp
->csr
++;
795 _mesa_bzero(op
, sizeof(*op
));
799 static struct reg
cvp_make_reg( GLuint file
, GLuint idx
)
807 static struct reg
cvp_emit_rel( struct compilation
*cp
,
811 union instruction
*op
= cvp_next_instruction(cp
);
812 op
->alu
.opcode
= REL
;
813 op
->alu
.file0
= reg
.file
;
814 op
->alu
.idx0
= reg
.idx
;
815 op
->alu
.dst
= tmpreg
.idx
;
820 static struct reg
cvp_load_reg( struct compilation
*cp
,
826 struct reg tmpreg
= cvp_make_reg(FILE_REG
, tmpidx
);
830 case PROGRAM_TEMPORARY
:
831 return cvp_make_reg(FILE_REG
, REG_TMP0
+ index
);
834 return cvp_make_reg(FILE_REG
, REG_IN0
+ index
);
837 return cvp_make_reg(FILE_REG
, REG_OUT0
+ index
);
839 /* These two aren't populated by the parser?
841 case PROGRAM_LOCAL_PARAM
:
842 reg
= cvp_make_reg(FILE_LOCAL_PARAM
, index
);
844 return cvp_emit_rel(cp
, reg
, tmpreg
);
848 case PROGRAM_ENV_PARAM
:
849 reg
= cvp_make_reg(FILE_ENV_PARAM
, index
);
851 return cvp_emit_rel(cp
, reg
, tmpreg
);
855 case PROGRAM_STATE_VAR
:
856 reg
= cvp_make_reg(FILE_STATE_PARAM
, index
);
858 return cvp_emit_rel(cp
, reg
, tmpreg
);
864 case PROGRAM_WRITE_ONLY
:
865 case PROGRAM_ADDRESS
:
867 _mesa_problem(NULL
, "Invalid register file %d in cvp_load_reg()");
869 return tmpreg
; /* can't happen */
873 static struct reg
cvp_emit_arg( struct compilation
*cp
,
874 const struct prog_src_register
*src
,
877 struct reg reg
= cvp_load_reg( cp
, src
->File
, src
->Index
, src
->RelAddr
, arg
);
878 union instruction rsw
, noop
;
880 /* Emit any necessary swizzling.
882 _mesa_bzero(&rsw
, sizeof(rsw
));
883 rsw
.rsw
.neg
= src
->NegateBase
? WRITEMASK_XYZW
: 0;
885 /* we're expecting 2-bit swizzles below... */
886 #if 1 /* XXX THESE ASSERTIONS CURRENTLY FAIL DURING GLEAN TESTS! */
887 /* hopefully no longer happens? */
888 ASSERT(GET_SWZ(src
->Swizzle
, 0) < 4);
889 ASSERT(GET_SWZ(src
->Swizzle
, 1) < 4);
890 ASSERT(GET_SWZ(src
->Swizzle
, 2) < 4);
891 ASSERT(GET_SWZ(src
->Swizzle
, 3) < 4);
893 rsw
.rsw
.swz
= src
->Swizzle
;
895 _mesa_bzero(&noop
, sizeof(noop
));
897 noop
.rsw
.swz
= SWIZZLE_NOOP
;
899 if (_mesa_memcmp(&rsw
, &noop
, sizeof(rsw
)) !=0) {
900 union instruction
*op
= cvp_next_instruction(cp
);
901 struct reg rsw_reg
= cvp_make_reg(FILE_REG
, REG_ARG0
+ arg
);
903 op
->rsw
.opcode
= RSW
;
904 op
->rsw
.file0
= reg
.file
;
905 op
->rsw
.idx0
= reg
.idx
;
906 op
->rsw
.dst
= rsw_reg
.idx
;
913 static GLuint
cvp_choose_result( struct compilation
*cp
,
914 const struct prog_dst_register
*dst
,
915 union instruction
*fixup
)
917 GLuint mask
= dst
->WriteMask
;
921 case PROGRAM_TEMPORARY
:
922 idx
= REG_TMP0
+ dst
->Index
;
925 idx
= REG_OUT0
+ dst
->Index
;
929 return REG_RES
; /* can't happen */
932 /* Optimization: When writing (with a writemask) to an undefined
933 * value for the first time, the writemask may be ignored.
935 if (mask
!= WRITEMASK_XYZW
&& (cp
->reg_active
& (1 << idx
))) {
936 fixup
->msk
.opcode
= MSK
;
937 fixup
->msk
.dst
= idx
;
938 fixup
->msk
.file
= FILE_REG
;
939 fixup
->msk
.idx
= REG_RES
;
940 fixup
->msk
.mask
= mask
;
941 cp
->reg_active
|= 1 << idx
;
945 _mesa_bzero(fixup
, sizeof(*fixup
));
946 cp
->reg_active
|= 1 << idx
;
952 static void cvp_emit_inst( struct compilation
*cp
,
953 const struct prog_instruction
*inst
)
955 union instruction
*op
;
956 union instruction fixup
;
958 GLuint result
, nr_args
, i
;
960 /* Need to handle SWZ, ARL specially.
962 switch (inst
->Opcode
) {
963 /* Split into mul and add:
966 result
= cvp_choose_result( cp
, &inst
->DstReg
, &fixup
);
967 for (i
= 0; i
< 3; i
++)
968 reg
[i
] = cvp_emit_arg( cp
, &inst
->SrcReg
[i
], REG_ARG0
+i
);
970 op
= cvp_next_instruction(cp
);
971 op
->alu
.opcode
= OPCODE_MUL
;
972 op
->alu
.file0
= reg
[0].file
;
973 op
->alu
.idx0
= reg
[0].idx
;
974 op
->alu
.file1
= reg
[1].file
;
975 op
->alu
.idx1
= reg
[1].idx
;
976 op
->alu
.dst
= REG_ARG0
;
978 op
= cvp_next_instruction(cp
);
979 op
->alu
.opcode
= OPCODE_ADD
;
980 op
->alu
.file0
= FILE_REG
;
981 op
->alu
.idx0
= REG_ARG0
;
982 op
->alu
.file1
= reg
[2].file
;
983 op
->alu
.idx1
= reg
[2].idx
;
984 op
->alu
.dst
= result
;
986 if (result
== REG_RES
) {
987 op
= cvp_next_instruction(cp
);
993 reg
[0] = cvp_emit_arg( cp
, &inst
->SrcReg
[0], REG_ARG0
);
995 op
= cvp_next_instruction(cp
);
996 op
->alu
.opcode
= OPCODE_FLR
;
997 op
->alu
.dst
= REG_ADDR
;
998 op
->alu
.file0
= reg
[0].file
;
999 op
->alu
.idx0
= reg
[0].idx
;
1006 result
= cvp_choose_result( cp
, &inst
->DstReg
, &fixup
);
1007 reg
[0] = cvp_load_reg( cp
, inst
->SrcReg
[0].File
,
1008 inst
->SrcReg
[0].Index
, inst
->SrcReg
[0].RelAddr
, REG_ARG0
);
1009 op
= cvp_next_instruction(cp
);
1010 op
->rsw
.opcode
= inst
->Opcode
;
1011 op
->rsw
.file0
= reg
[0].file
;
1012 op
->rsw
.idx0
= reg
[0].idx
;
1013 op
->rsw
.dst
= result
;
1014 op
->rsw
.swz
= inst
->SrcReg
[0].Swizzle
;
1015 op
->rsw
.neg
= inst
->SrcReg
[0].NegateBase
;
1017 if (result
== REG_RES
) {
1018 op
= cvp_next_instruction(cp
);
1024 result
= cvp_choose_result( cp
, &inst
->DstReg
, &fixup
);
1025 nr_args
= _mesa_num_inst_src_regs(inst
->Opcode
);
1026 for (i
= 0; i
< nr_args
; i
++)
1027 reg
[i
] = cvp_emit_arg( cp
, &inst
->SrcReg
[i
], REG_ARG0
+ i
);
1029 op
= cvp_next_instruction(cp
);
1030 op
->alu
.opcode
= inst
->Opcode
;
1031 op
->alu
.file0
= reg
[0].file
;
1032 op
->alu
.idx0
= reg
[0].idx
;
1033 op
->alu
.file1
= reg
[1].file
;
1034 op
->alu
.idx1
= reg
[1].idx
;
1035 op
->alu
.dst
= result
;
1037 if (result
== REG_RES
) {
1038 op
= cvp_next_instruction(cp
);
1045 static void free_tnl_data( struct gl_vertex_program
*program
)
1047 struct tnl_compiled_program
*p
= (struct tnl_compiled_program
*) program
->TnlData
;
1048 if (p
->compiled_func
)
1049 _mesa_free((void *)p
->compiled_func
);
1051 program
->TnlData
= NULL
;
1054 static void compile_vertex_program( struct gl_vertex_program
*program
,
1055 GLboolean try_codegen
)
1057 struct compilation cp
;
1058 struct tnl_compiled_program
*p
= CALLOC_STRUCT(tnl_compiled_program
);
1061 if (program
->TnlData
)
1062 free_tnl_data( program
);
1064 program
->TnlData
= p
;
1066 /* Initialize cp. Note that ctx and VB aren't used in compilation
1067 * so we don't have to worry about statechanges:
1069 _mesa_memset(&cp
, 0, sizeof(cp
));
1070 cp
.csr
= p
->instructions
;
1072 /* Compile instructions:
1074 for (i
= 0; i
< program
->Base
.NumInstructions
; i
++) {
1075 cvp_emit_inst(&cp
, &program
->Base
.Instructions
[i
]);
1080 p
->nr_instructions
= cp
.csr
- p
->instructions
;
1082 /* Print/disassemble:
1085 for (i
= 0; i
< p
->nr_instructions
; i
++) {
1086 _tnl_disassem_vba_insn(p
->instructions
[i
]);
1088 _mesa_printf("\n\n");
1093 _tnl_sse_codegen_vertex_program(p
);
1101 /* ----------------------------------------------------------------------
1104 static void userclip( GLcontext
*ctx
,
1107 GLubyte
*clipormask
,
1108 GLubyte
*clipandmask
)
1112 for (p
= 0; p
< ctx
->Const
.MaxClipPlanes
; p
++) {
1113 if (ctx
->Transform
.ClipPlanesEnabled
& (1 << p
)) {
1115 const GLfloat a
= ctx
->Transform
._ClipUserPlane
[p
][0];
1116 const GLfloat b
= ctx
->Transform
._ClipUserPlane
[p
][1];
1117 const GLfloat c
= ctx
->Transform
._ClipUserPlane
[p
][2];
1118 const GLfloat d
= ctx
->Transform
._ClipUserPlane
[p
][3];
1119 GLfloat
*coord
= (GLfloat
*)clip
->data
;
1120 GLuint stride
= clip
->stride
;
1121 GLuint count
= clip
->count
;
1123 for (nr
= 0, i
= 0 ; i
< count
; i
++) {
1124 GLfloat dp
= (coord
[0] * a
+
1131 clipmask
[i
] |= CLIP_USER_BIT
;
1134 STRIDE_F(coord
, stride
);
1138 *clipormask
|= CLIP_USER_BIT
;
1140 *clipandmask
|= CLIP_USER_BIT
;
1150 do_ndc_cliptest(GLcontext
*ctx
, struct arb_vp_machine
*m
)
1152 TNLcontext
*tnl
= TNL_CONTEXT(ctx
);
1153 struct vertex_buffer
*VB
= m
->VB
;
1155 /* Cliptest and perspective divide. Clip functions must clear
1159 m
->andmask
= CLIP_FRUSTUM_BITS
;
1161 if (tnl
->NeedNdcCoords
) {
1163 _mesa_clip_tab
[VB
->ClipPtr
->size
]( VB
->ClipPtr
,
1171 _mesa_clip_np_tab
[VB
->ClipPtr
->size
]( VB
->ClipPtr
,
1179 /* All vertices are outside the frustum */
1183 /* Test userclip planes. This contributes to VB->ClipMask.
1185 if (ctx
->Transform
.ClipPlanesEnabled
&& (!ctx
->VertexProgram
._Enabled
||
1186 ctx
->VertexProgram
.Current
->IsPositionInvariant
)) {
1198 VB
->ClipAndMask
= m
->andmask
;
1199 VB
->ClipOrMask
= m
->ormask
;
1200 VB
->ClipMask
= m
->clipmask
;
1206 static INLINE
void call_func( struct tnl_compiled_program
*p
,
1207 struct arb_vp_machine
*m
)
1209 p
->compiled_func(m
);
1213 * Execute the given vertex program.
1215 * TODO: Integrate the t_vertex.c code here, to build machine vertices
1216 * directly at this point.
1218 * TODO: Eliminate the VB struct entirely and just use
1219 * struct arb_vertex_machine.
1222 run_arb_vertex_program(GLcontext
*ctx
, struct tnl_pipeline_stage
*stage
)
1224 const struct gl_vertex_program
*program
;
1225 struct vertex_buffer
*VB
= &TNL_CONTEXT(ctx
)->vb
;
1226 struct arb_vp_machine
*m
= ARB_VP_MACHINE(stage
);
1227 struct tnl_compiled_program
*p
;
1231 if (ctx
->ShaderObjects
._VertexShaderPresent
)
1234 program
= ctx
->VertexProgram
._Enabled
? ctx
->VertexProgram
.Current
: NULL
;
1235 if (!program
&& ctx
->_MaintainTnlProgram
) {
1236 program
= ctx
->_TnlProgram
;
1238 if (!program
|| program
->IsNVProgram
)
1241 if (program
->Base
.Parameters
) {
1242 _mesa_load_state_parameters(ctx
, program
->Base
.Parameters
);
1245 p
= (struct tnl_compiled_program
*)program
->TnlData
;
1249 m
->nr_inputs
= m
->nr_outputs
= 0;
1251 for (i
= 0; i
< VERT_ATTRIB_MAX
; i
++) {
1252 if (program
->Base
.InputsRead
& (1<<i
) ||
1253 (i
== VERT_ATTRIB_POS
&& program
->IsPositionInvariant
)) {
1254 GLuint j
= m
->nr_inputs
++;
1255 m
->input
[j
].idx
= i
;
1256 m
->input
[j
].data
= (GLfloat
*)m
->VB
->AttribPtr
[i
]->data
;
1257 m
->input
[j
].stride
= m
->VB
->AttribPtr
[i
]->stride
;
1258 m
->input
[j
].size
= m
->VB
->AttribPtr
[i
]->size
;
1259 ASSIGN_4V(m
->File
[0][REG_IN0
+ i
], 0, 0, 0, 1);
1263 for (i
= 0; i
< VERT_RESULT_MAX
; i
++) {
1264 if (program
->Base
.OutputsWritten
& (1 << i
) ||
1265 (i
== VERT_RESULT_HPOS
&& program
->IsPositionInvariant
)) {
1266 GLuint j
= m
->nr_outputs
++;
1267 m
->output
[j
].idx
= i
;
1268 m
->output
[j
].data
= (GLfloat
*)m
->attribs
[i
].data
;
1273 /* Run the actual program:
1275 for (m
->vtx_nr
= 0; m
->vtx_nr
< VB
->Count
; m
->vtx_nr
++) {
1276 for (j
= 0; j
< m
->nr_inputs
; j
++) {
1277 GLuint idx
= REG_IN0
+ m
->input
[j
].idx
;
1278 switch (m
->input
[j
].size
) {
1279 case 4: m
->File
[0][idx
][3] = m
->input
[j
].data
[3];
1280 case 3: m
->File
[0][idx
][2] = m
->input
[j
].data
[2];
1281 case 2: m
->File
[0][idx
][1] = m
->input
[j
].data
[1];
1282 case 1: m
->File
[0][idx
][0] = m
->input
[j
].data
[0];
1285 STRIDE_F(m
->input
[j
].data
, m
->input
[j
].stride
);
1289 if (p
->compiled_func
) {
1293 for (j
= 0; j
< p
->nr_instructions
; j
++) {
1294 union instruction inst
= p
->instructions
[j
];
1295 opcode_func
[inst
.alu
.opcode
]( m
, inst
);
1299 /* If the program is position invariant, multiply the input position
1300 * by the MVP matrix and store in the vertex position result register.
1302 if (program
->IsPositionInvariant
) {
1303 TRANSFORM_POINT( m
->File
[0][REG_OUT0
+0],
1304 ctx
->_ModelProjectMatrix
.m
,
1305 m
->File
[0][REG_IN0
+0]);
1308 for (j
= 0; j
< m
->nr_outputs
; j
++) {
1309 GLuint idx
= REG_OUT0
+ m
->output
[j
].idx
;
1310 m
->output
[j
].data
[0] = m
->File
[0][idx
][0];
1311 m
->output
[j
].data
[1] = m
->File
[0][idx
][1];
1312 m
->output
[j
].data
[2] = m
->File
[0][idx
][2];
1313 m
->output
[j
].data
[3] = m
->File
[0][idx
][3];
1314 m
->output
[j
].data
+= 4;
1319 /* Setup the VB pointers so that the next pipeline stages get
1320 * their data from the right place (the program output arrays).
1322 * TODO: 1) Have tnl use these RESULT values for outputs rather
1323 * than trying to shoe-horn inputs and outputs into one set of
1326 * TODO: 2) Integrate t_vertex.c so that we just go straight ahead
1327 * and build machine vertices here.
1329 VB
->ClipPtr
= &m
->attribs
[VERT_RESULT_HPOS
];
1330 VB
->ClipPtr
->count
= VB
->Count
;
1332 /* XXX There seems to be confusion between using the VERT_ATTRIB_*
1333 * values vs _TNL_ATTRIB_* tokens here:
1335 outputs
= program
->Base
.OutputsWritten
;
1336 if (program
->IsPositionInvariant
)
1337 outputs
|= (1<<VERT_RESULT_HPOS
);
1339 if (outputs
& (1<<VERT_RESULT_COL0
)) {
1341 VB
->AttribPtr
[VERT_ATTRIB_COLOR0
] = &m
->attribs
[VERT_RESULT_COL0
];
1344 if (outputs
& (1<<VERT_RESULT_BFC0
)) {
1345 VB
->ColorPtr
[1] = &m
->attribs
[VERT_RESULT_BFC0
];
1348 if (outputs
& (1<<VERT_RESULT_COL1
)) {
1349 VB
->SecondaryColorPtr
[0] =
1350 VB
->AttribPtr
[VERT_ATTRIB_COLOR1
] = &m
->attribs
[VERT_RESULT_COL1
];
1353 if (outputs
& (1<<VERT_RESULT_BFC1
)) {
1354 VB
->SecondaryColorPtr
[1] = &m
->attribs
[VERT_RESULT_BFC1
];
1357 if (outputs
& (1<<VERT_RESULT_FOGC
)) {
1359 VB
->AttribPtr
[VERT_ATTRIB_FOG
] = &m
->attribs
[VERT_RESULT_FOGC
];
1362 if (outputs
& (1<<VERT_RESULT_PSIZ
)) {
1363 VB
->AttribPtr
[_TNL_ATTRIB_POINTSIZE
] = &m
->attribs
[VERT_RESULT_PSIZ
];
1366 for (i
= 0; i
< ctx
->Const
.MaxTextureCoordUnits
; i
++) {
1367 if (outputs
& (1<<(VERT_RESULT_TEX0
+i
))) {
1368 VB
->TexCoordPtr
[i
] =
1369 VB
->AttribPtr
[VERT_ATTRIB_TEX0
+i
] = &m
->attribs
[VERT_RESULT_TEX0
+ i
];
1374 for (i
= 0; i
< VB
->Count
; i
++) {
1375 printf("Out %d: %f %f %f %f %f %f %f %f\n", i
,
1376 VEC_ELT(VB
->ClipPtr
, GLfloat
, i
)[0],
1377 VEC_ELT(VB
->ClipPtr
, GLfloat
, i
)[1],
1378 VEC_ELT(VB
->ClipPtr
, GLfloat
, i
)[2],
1379 VEC_ELT(VB
->ClipPtr
, GLfloat
, i
)[3],
1380 VEC_ELT(VB
->AttribPtr
[VERT_ATTRIB_TEX0
], GLfloat
, i
)[0],
1381 VEC_ELT(VB
->AttribPtr
[VERT_ATTRIB_TEX0
], GLfloat
, i
)[1],
1382 VEC_ELT(VB
->AttribPtr
[VERT_ATTRIB_TEX0
], GLfloat
, i
)[2],
1383 VEC_ELT(VB
->AttribPtr
[VERT_ATTRIB_TEX0
], GLfloat
, i
)[3]);
1387 /* Perform NDC and cliptest operations:
1389 return do_ndc_cliptest(ctx
, m
);
1394 validate_vertex_program( GLcontext
*ctx
, struct tnl_pipeline_stage
*stage
)
1396 struct arb_vp_machine
*m
= ARB_VP_MACHINE(stage
);
1397 struct gl_vertex_program
*program
;
1399 if (ctx
->ShaderObjects
._VertexShaderPresent
)
1402 program
= (ctx
->VertexProgram
._Enabled
? ctx
->VertexProgram
.Current
: 0);
1403 if (!program
&& ctx
->_MaintainTnlProgram
) {
1404 program
= ctx
->_TnlProgram
;
1408 if (!program
->TnlData
)
1409 compile_vertex_program( program
, m
->try_codegen
);
1411 /* Grab the state GL state and put into registers:
1413 m
->File
[FILE_LOCAL_PARAM
] = program
->Base
.LocalParams
;
1414 m
->File
[FILE_ENV_PARAM
] = ctx
->VertexProgram
.Parameters
;
1415 /* GL_NV_vertex_programs can't reference GL state */
1416 if (program
->Base
.Parameters
)
1417 m
->File
[FILE_STATE_PARAM
] = program
->Base
.Parameters
->ParameterValues
;
1419 m
->File
[FILE_STATE_PARAM
] = NULL
;
1430 * Called the first time stage->run is called. In effect, don't
1431 * allocate data until the first time the stage is run.
1433 static GLboolean
init_vertex_program( GLcontext
*ctx
,
1434 struct tnl_pipeline_stage
*stage
)
1436 TNLcontext
*tnl
= TNL_CONTEXT(ctx
);
1437 struct vertex_buffer
*VB
= &(tnl
->vb
);
1438 struct arb_vp_machine
*m
;
1439 const GLuint size
= VB
->Size
;
1442 stage
->privatePtr
= _mesa_calloc(sizeof(*m
));
1443 m
= ARB_VP_MACHINE(stage
);
1447 /* arb_vertex_machine struct should subsume the VB:
1451 m
->File
[0] = (GLfloat(*)[4])ALIGN_MALLOC(REG_MAX
* sizeof(GLfloat
) * 4, 16);
1453 /* Initialize regs where necessary:
1455 ASSIGN_4V(m
->File
[0][REG_ID
], 0, 0, 0, 1);
1456 ASSIGN_4V(m
->File
[0][REG_ONES
], 1, 1, 1, 1);
1457 ASSIGN_4V(m
->File
[0][REG_SWZ
], 1, -1, 0, 0);
1458 ASSIGN_4V(m
->File
[0][REG_NEG
], -1, -1, -1, -1);
1459 ASSIGN_4V(m
->File
[0][REG_LIT
], 1, 0, 0, 1);
1460 ASSIGN_4V(m
->File
[0][REG_LIT2
], 1, .5, .2, 1); /* debug value */
1462 if (_mesa_getenv("MESA_EXPERIMENTAL"))
1463 m
->try_codegen
= GL_TRUE
;
1465 /* Allocate arrays of vertex output values */
1466 for (i
= 0; i
< VERT_RESULT_MAX
; i
++) {
1467 _mesa_vector4f_alloc( &m
->attribs
[i
], 0, size
, 32 );
1468 m
->attribs
[i
].size
= 4;
1471 /* a few other misc allocations */
1472 _mesa_vector4f_alloc( &m
->ndcCoords
, 0, size
, 32 );
1473 m
->clipmask
= (GLubyte
*) ALIGN_MALLOC(sizeof(GLubyte
)*size
, 32 );
1475 if (ctx
->_MaintainTnlProgram
)
1476 _mesa_allow_light_in_model( ctx
, GL_FALSE
);
1478 m
->fpucntl_rnd_neg
= RND_NEG_FPU
; /* const value */
1479 m
->fpucntl_restore
= RESTORE_FPU
; /* const value */
1488 * Destructor for this pipeline stage.
1490 static void dtr( struct tnl_pipeline_stage
*stage
)
1492 struct arb_vp_machine
*m
= ARB_VP_MACHINE(stage
);
1497 /* free the vertex program result arrays */
1498 for (i
= 0; i
< VERT_RESULT_MAX
; i
++)
1499 _mesa_vector4f_free( &m
->attribs
[i
] );
1501 /* free misc arrays */
1502 _mesa_vector4f_free( &m
->ndcCoords
);
1503 ALIGN_FREE( m
->clipmask
);
1504 ALIGN_FREE( m
->File
[0] );
1507 stage
->privatePtr
= NULL
;
1512 * Public description of this pipeline stage.
1514 const struct tnl_pipeline_stage _tnl_arb_vertex_program_stage
=
1516 "arb-vertex-program",
1517 NULL
, /* private_data */
1518 init_vertex_program
, /* create */
1520 validate_vertex_program
, /* validate */
1521 run_arb_vertex_program
/* run */
1526 * Called via ctx->Driver.ProgramStringNotify() after a new vertex program
1527 * string has been parsed.
1530 _tnl_program_string(GLcontext
*ctx
, GLenum target
, struct gl_program
*program
)
1532 if (target
== GL_VERTEX_PROGRAM_ARB
) {
1533 /* free any existing tnl data hanging off the program */
1534 struct gl_vertex_program
*vprog
= (struct gl_vertex_program
*) program
;
1535 if (vprog
->TnlData
) {
1536 free_tnl_data(vprog
);