fixes for C++ warnings/errors
[mesa.git] / src / mesa / tnl / t_vb_arbprogram.c
1 /*
2 * Mesa 3-D graphics library
3 * Version: 6.5.1
4 *
5 * Copyright (C) 1999-2006 Brian Paul All Rights Reserved.
6 *
7 * Permission is hereby granted, free of charge, to any person obtaining a
8 * copy of this software and associated documentation files (the "Software"),
9 * to deal in the Software without restriction, including without limitation
10 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
11 * and/or sell copies of the Software, and to permit persons to whom the
12 * Software is furnished to do so, subject to the following conditions:
13 *
14 * The above copyright notice and this permission notice shall be included
15 * in all copies or substantial portions of the Software.
16 *
17 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
18 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
19 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
20 * BRIAN PAUL BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN
21 * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
22 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
23 */
24
25 /**
26 * \file t_arb_program.c
27 * Compile vertex programs to an intermediate representation.
28 * Execute vertex programs over a buffer of vertices.
29 * \author Keith Whitwell, Brian Paul
30 */
31
32 #include "glheader.h"
33 #include "context.h"
34 #include "imports.h"
35 #include "macros.h"
36 #include "mtypes.h"
37 #include "arbprogparse.h"
38 #include "light.h"
39 #include "program.h"
40 #include "math/m_matrix.h"
41 #include "t_context.h"
42 #include "t_pipeline.h"
43 #include "t_vb_arbprogram.h"
44 #include "tnl.h"
45 #include "program_instruction.h"
46
47
48 #define DISASSEM 0
49
50
51 struct compilation {
52 GLuint reg_active;
53 union instruction *csr;
54 };
55
56
57 #define ARB_VP_MACHINE(stage) ((struct arb_vp_machine *)(stage->privatePtr))
58
59 #define PUFF(x) ((x)[1] = (x)[2] = (x)[3] = (x)[0])
60
61
62
63 /* Lower precision functions for the EXP, LOG and LIT opcodes. The
64 * LOG2() implementation is probably not accurate enough, and the
65 * attempted optimization for Exp2 is definitely not accurate
66 * enough - it discards all of t's fractional bits!
67 */
68 static GLfloat RoughApproxLog2(GLfloat t)
69 {
70 return LOG2(t);
71 }
72
73 static GLfloat RoughApproxExp2(GLfloat t)
74 {
75 #if 0
76 fi_type fi;
77 fi.i = (GLint) t;
78 fi.i = (fi.i << 23) + 0x3f800000;
79 return fi.f;
80 #else
81 return (GLfloat) _mesa_pow(2.0, t);
82 #endif
83 }
84
85 static GLfloat RoughApproxPower(GLfloat x, GLfloat y)
86 {
87 if (x == 0.0 && y == 0.0)
88 return 1.0; /* spec requires this */
89 else
90 return RoughApproxExp2(y * RoughApproxLog2(x));
91 }
92
93
94 /* Higher precision functions for the EX2, LG2 and POW opcodes:
95 */
96 static GLfloat ApproxLog2(GLfloat t)
97 {
98 return (GLfloat) (LOGF(t) * 1.442695F);
99 }
100
101 static GLfloat ApproxExp2(GLfloat t)
102 {
103 return (GLfloat) _mesa_pow(2.0, t);
104 }
105
106 static GLfloat ApproxPower(GLfloat x, GLfloat y)
107 {
108 return (GLfloat) _mesa_pow(x, y);
109 }
110
111
112 /**
113 * Perform a reduced swizzle:
114 */
115 static void do_RSW( struct arb_vp_machine *m, union instruction op )
116 {
117 GLfloat *result = m->File[0][op.rsw.dst];
118 const GLfloat *arg0 = m->File[op.rsw.file0][op.rsw.idx0];
119 const GLuint swz = op.rsw.swz;
120 const GLuint neg = op.rsw.neg;
121 GLfloat tmp[4];
122
123 /* Need a temporary to be correct in the case where result == arg0.
124 */
125 COPY_4V(tmp, arg0);
126
127 result[0] = tmp[GET_SWZ(swz, 0)];
128 result[1] = tmp[GET_SWZ(swz, 1)];
129 result[2] = tmp[GET_SWZ(swz, 2)];
130 result[3] = tmp[GET_SWZ(swz, 3)];
131
132 if (neg) {
133 if (neg & 0x1) result[0] = -result[0];
134 if (neg & 0x2) result[1] = -result[1];
135 if (neg & 0x4) result[2] = -result[2];
136 if (neg & 0x8) result[3] = -result[3];
137 }
138 }
139
140 /**
141 * Perform a full swizzle
142 */
143 static void do_SWZ( struct arb_vp_machine *m, union instruction op )
144 {
145 GLfloat *result = m->File[0][op.rsw.dst];
146 const GLfloat *arg0 = m->File[op.rsw.file0][op.rsw.idx0];
147 const GLuint swz = op.rsw.swz;
148 const GLuint neg = op.rsw.neg;
149 GLfloat tmp[6];
150 tmp[4] = 0.0;
151 tmp[5] = 1.0;
152
153 /* Need a temporary to be correct in the case where result == arg0.
154 */
155 COPY_4V(tmp, arg0);
156
157 result[0] = tmp[GET_SWZ(swz, 0)];
158 result[1] = tmp[GET_SWZ(swz, 1)];
159 result[2] = tmp[GET_SWZ(swz, 2)];
160 result[3] = tmp[GET_SWZ(swz, 3)];
161
162 if (neg) {
163 if (neg & 0x1) result[0] = -result[0];
164 if (neg & 0x2) result[1] = -result[1];
165 if (neg & 0x4) result[2] = -result[2];
166 if (neg & 0x8) result[3] = -result[3];
167 }
168 }
169
170 /* Used to implement write masking. To make things easier for the sse
171 * generator I've gone back to a 1 argument version of this function
172 * (dst.msk = arg), rather than the semantically cleaner (dst = SEL
173 * arg0, arg1, msk)
174 *
175 * That means this is the only instruction which doesn't write a full
176 * 4 dwords out. This would make such a program harder to analyse,
177 * but it looks like analysis is going to take place on a higher level
178 * anyway.
179 */
180 static void do_MSK( struct arb_vp_machine *m, union instruction op )
181 {
182 GLfloat *dst = m->File[0][op.msk.dst];
183 const GLfloat *arg = m->File[op.msk.file][op.msk.idx];
184
185 if (op.msk.mask & WRITEMASK_X) dst[0] = arg[0];
186 if (op.msk.mask & WRITEMASK_Y) dst[1] = arg[1];
187 if (op.msk.mask & WRITEMASK_Z) dst[2] = arg[2];
188 if (op.msk.mask & WRITEMASK_W) dst[3] = arg[3];
189 }
190
191
192 static void do_PRT( struct arb_vp_machine *m, union instruction op )
193 {
194 const GLfloat *arg0 = m->File[op.alu.file0][op.alu.idx0];
195
196 _mesa_printf("%d: %f %f %f %f\n", m->vtx_nr,
197 arg0[0], arg0[1], arg0[2], arg0[3]);
198 }
199
200
201 /**
202 * The traditional ALU and texturing instructions. All operate on
203 * internal registers and ignore write masks and swizzling issues.
204 */
205
206 static void do_ABS( struct arb_vp_machine *m, union instruction op )
207 {
208 GLfloat *result = m->File[0][op.alu.dst];
209 const GLfloat *arg0 = m->File[op.alu.file0][op.alu.idx0];
210
211 result[0] = (arg0[0] < 0.0) ? -arg0[0] : arg0[0];
212 result[1] = (arg0[1] < 0.0) ? -arg0[1] : arg0[1];
213 result[2] = (arg0[2] < 0.0) ? -arg0[2] : arg0[2];
214 result[3] = (arg0[3] < 0.0) ? -arg0[3] : arg0[3];
215 }
216
217 static void do_ADD( struct arb_vp_machine *m, union instruction op )
218 {
219 GLfloat *result = m->File[0][op.alu.dst];
220 const GLfloat *arg0 = m->File[op.alu.file0][op.alu.idx0];
221 const GLfloat *arg1 = m->File[op.alu.file1][op.alu.idx1];
222
223 result[0] = arg0[0] + arg1[0];
224 result[1] = arg0[1] + arg1[1];
225 result[2] = arg0[2] + arg1[2];
226 result[3] = arg0[3] + arg1[3];
227 }
228
229
230 static void do_DP3( struct arb_vp_machine *m, union instruction op )
231 {
232 GLfloat *result = m->File[0][op.alu.dst];
233 const GLfloat *arg0 = m->File[op.alu.file0][op.alu.idx0];
234 const GLfloat *arg1 = m->File[op.alu.file1][op.alu.idx1];
235
236 result[0] = (arg0[0] * arg1[0] +
237 arg0[1] * arg1[1] +
238 arg0[2] * arg1[2]);
239
240 PUFF(result);
241 }
242
243
244
245 static void do_DP4( struct arb_vp_machine *m, union instruction op )
246 {
247 GLfloat *result = m->File[0][op.alu.dst];
248 const GLfloat *arg0 = m->File[op.alu.file0][op.alu.idx0];
249 const GLfloat *arg1 = m->File[op.alu.file1][op.alu.idx1];
250
251 result[0] = (arg0[0] * arg1[0] +
252 arg0[1] * arg1[1] +
253 arg0[2] * arg1[2] +
254 arg0[3] * arg1[3]);
255
256 PUFF(result);
257 }
258
259 static void do_DPH( struct arb_vp_machine *m, union instruction op )
260 {
261 GLfloat *result = m->File[0][op.alu.dst];
262 const GLfloat *arg0 = m->File[op.alu.file0][op.alu.idx0];
263 const GLfloat *arg1 = m->File[op.alu.file1][op.alu.idx1];
264
265 result[0] = (arg0[0] * arg1[0] +
266 arg0[1] * arg1[1] +
267 arg0[2] * arg1[2] +
268 1.0 * arg1[3]);
269
270 PUFF(result);
271 }
272
273 static void do_DST( struct arb_vp_machine *m, union instruction op )
274 {
275 GLfloat *result = m->File[0][op.alu.dst];
276 const GLfloat *arg0 = m->File[op.alu.file0][op.alu.idx0];
277 const GLfloat *arg1 = m->File[op.alu.file1][op.alu.idx1];
278
279 /* This should be ok even if result == arg0 or result == arg1.
280 */
281 result[0] = 1.0F;
282 result[1] = arg0[1] * arg1[1];
283 result[2] = arg0[2];
284 result[3] = arg1[3];
285 }
286
287
288 /* Intended to be high precision:
289 */
290 static void do_EX2( struct arb_vp_machine *m, union instruction op )
291 {
292 GLfloat *result = m->File[0][op.alu.dst];
293 const GLfloat *arg0 = m->File[op.alu.file0][op.alu.idx0];
294
295 result[0] = (GLfloat)ApproxExp2(arg0[0]);
296 PUFF(result);
297 }
298
299
300 /* Allowed to be lower precision:
301 */
302 static void do_EXP( struct arb_vp_machine *m, union instruction op )
303 {
304 GLfloat *result = m->File[0][op.alu.dst];
305 const GLfloat *arg0 = m->File[op.alu.file0][op.alu.idx0];
306 const GLfloat tmp = arg0[0];
307 const GLfloat flr_tmp = FLOORF(tmp);
308 const GLfloat frac_tmp = tmp - flr_tmp;
309
310 result[0] = LDEXPF(1.0, (int)flr_tmp);
311 result[1] = frac_tmp;
312 result[2] = RoughApproxExp2(tmp);
313 result[3] = 1.0F;
314 }
315
316 static void do_FLR( struct arb_vp_machine *m, union instruction op )
317 {
318 GLfloat *result = m->File[0][op.alu.dst];
319 const GLfloat *arg0 = m->File[op.alu.file0][op.alu.idx0];
320
321 result[0] = FLOORF(arg0[0]);
322 result[1] = FLOORF(arg0[1]);
323 result[2] = FLOORF(arg0[2]);
324 result[3] = FLOORF(arg0[3]);
325 }
326
327 static void do_FRC( struct arb_vp_machine *m, union instruction op )
328 {
329 GLfloat *result = m->File[0][op.alu.dst];
330 const GLfloat *arg0 = m->File[op.alu.file0][op.alu.idx0];
331
332 result[0] = arg0[0] - FLOORF(arg0[0]);
333 result[1] = arg0[1] - FLOORF(arg0[1]);
334 result[2] = arg0[2] - FLOORF(arg0[2]);
335 result[3] = arg0[3] - FLOORF(arg0[3]);
336 }
337
338 /* High precision log base 2:
339 */
340 static void do_LG2( struct arb_vp_machine *m, union instruction op )
341 {
342 GLfloat *result = m->File[0][op.alu.dst];
343 const GLfloat *arg0 = m->File[op.alu.file0][op.alu.idx0];
344
345 result[0] = ApproxLog2(arg0[0]);
346 PUFF(result);
347 }
348
349
350
351 static void do_LIT( struct arb_vp_machine *m, union instruction op )
352 {
353 GLfloat *result = m->File[0][op.alu.dst];
354 const GLfloat *arg0 = m->File[op.alu.file0][op.alu.idx0];
355 GLfloat tmp[4]; /* use temp in case arg0 == result register */
356
357 tmp[0] = 1.0;
358 tmp[1] = arg0[0];
359 if (arg0[0] > 0.0) {
360 tmp[2] = RoughApproxPower(arg0[1], arg0[3]);
361 }
362 else {
363 tmp[2] = 0.0;
364 }
365 tmp[3] = 1.0;
366
367 COPY_4V(result, tmp);
368 }
369
370
371 /* Intended to allow a lower precision than required for LG2 above.
372 */
373 static void do_LOG( struct arb_vp_machine *m, union instruction op )
374 {
375 GLfloat *result = m->File[0][op.alu.dst];
376 const GLfloat *arg0 = m->File[op.alu.file0][op.alu.idx0];
377 const GLfloat tmp = FABSF(arg0[0]);
378 int exponent;
379 const GLfloat mantissa = FREXPF(tmp, &exponent);
380
381 result[0] = (GLfloat) (exponent - 1);
382 result[1] = 2.0 * mantissa; /* map [.5, 1) -> [1, 2) */
383 result[2] = exponent + LOG2(mantissa);
384 result[3] = 1.0;
385 }
386
387 static void do_MAX( struct arb_vp_machine *m, union instruction op )
388 {
389 GLfloat *result = m->File[0][op.alu.dst];
390 const GLfloat *arg0 = m->File[op.alu.file0][op.alu.idx0];
391 const GLfloat *arg1 = m->File[op.alu.file1][op.alu.idx1];
392
393 result[0] = (arg0[0] > arg1[0]) ? arg0[0] : arg1[0];
394 result[1] = (arg0[1] > arg1[1]) ? arg0[1] : arg1[1];
395 result[2] = (arg0[2] > arg1[2]) ? arg0[2] : arg1[2];
396 result[3] = (arg0[3] > arg1[3]) ? arg0[3] : arg1[3];
397 }
398
399
400 static void do_MIN( struct arb_vp_machine *m, union instruction op )
401 {
402 GLfloat *result = m->File[0][op.alu.dst];
403 const GLfloat *arg0 = m->File[op.alu.file0][op.alu.idx0];
404 const GLfloat *arg1 = m->File[op.alu.file1][op.alu.idx1];
405
406 result[0] = (arg0[0] < arg1[0]) ? arg0[0] : arg1[0];
407 result[1] = (arg0[1] < arg1[1]) ? arg0[1] : arg1[1];
408 result[2] = (arg0[2] < arg1[2]) ? arg0[2] : arg1[2];
409 result[3] = (arg0[3] < arg1[3]) ? arg0[3] : arg1[3];
410 }
411
412 static void do_MOV( struct arb_vp_machine *m, union instruction op )
413 {
414 GLfloat *result = m->File[0][op.alu.dst];
415 const GLfloat *arg0 = m->File[op.alu.file0][op.alu.idx0];
416
417 result[0] = arg0[0];
418 result[1] = arg0[1];
419 result[2] = arg0[2];
420 result[3] = arg0[3];
421 }
422
423 static void do_MUL( struct arb_vp_machine *m, union instruction op )
424 {
425 GLfloat *result = m->File[0][op.alu.dst];
426 const GLfloat *arg0 = m->File[op.alu.file0][op.alu.idx0];
427 const GLfloat *arg1 = m->File[op.alu.file1][op.alu.idx1];
428
429 result[0] = arg0[0] * arg1[0];
430 result[1] = arg0[1] * arg1[1];
431 result[2] = arg0[2] * arg1[2];
432 result[3] = arg0[3] * arg1[3];
433 }
434
435
436 /* Intended to be "high" precision
437 */
438 static void do_POW( struct arb_vp_machine *m, union instruction op )
439 {
440 GLfloat *result = m->File[0][op.alu.dst];
441 const GLfloat *arg0 = m->File[op.alu.file0][op.alu.idx0];
442 const GLfloat *arg1 = m->File[op.alu.file1][op.alu.idx1];
443
444 result[0] = (GLfloat)ApproxPower(arg0[0], arg1[0]);
445 PUFF(result);
446 }
447
448 static void do_REL( struct arb_vp_machine *m, union instruction op )
449 {
450 GLfloat *result = m->File[0][op.alu.dst];
451 const GLuint idx = (op.alu.idx0 + (GLint)m->File[0][REG_ADDR][0]) & (MAX_NV_VERTEX_PROGRAM_PARAMS-1);
452 const GLfloat *arg0 = m->File[op.alu.file0][idx];
453
454 result[0] = arg0[0];
455 result[1] = arg0[1];
456 result[2] = arg0[2];
457 result[3] = arg0[3];
458 }
459
460 static void do_RCP( struct arb_vp_machine *m, union instruction op )
461 {
462 GLfloat *result = m->File[0][op.alu.dst];
463 const GLfloat *arg0 = m->File[op.alu.file0][op.alu.idx0];
464
465 result[0] = 1.0F / arg0[0];
466 PUFF(result);
467 }
468
469 static void do_RSQ( struct arb_vp_machine *m, union instruction op )
470 {
471 GLfloat *result = m->File[0][op.alu.dst];
472 const GLfloat *arg0 = m->File[op.alu.file0][op.alu.idx0];
473
474 result[0] = INV_SQRTF(FABSF(arg0[0]));
475 PUFF(result);
476 }
477
478
479 static void do_SGE( struct arb_vp_machine *m, union instruction op )
480 {
481 GLfloat *result = m->File[0][op.alu.dst];
482 const GLfloat *arg0 = m->File[op.alu.file0][op.alu.idx0];
483 const GLfloat *arg1 = m->File[op.alu.file1][op.alu.idx1];
484
485 result[0] = (arg0[0] >= arg1[0]) ? 1.0F : 0.0F;
486 result[1] = (arg0[1] >= arg1[1]) ? 1.0F : 0.0F;
487 result[2] = (arg0[2] >= arg1[2]) ? 1.0F : 0.0F;
488 result[3] = (arg0[3] >= arg1[3]) ? 1.0F : 0.0F;
489 }
490
491
492 static void do_SLT( struct arb_vp_machine *m, union instruction op )
493 {
494 GLfloat *result = m->File[0][op.alu.dst];
495 const GLfloat *arg0 = m->File[op.alu.file0][op.alu.idx0];
496 const GLfloat *arg1 = m->File[op.alu.file1][op.alu.idx1];
497
498 result[0] = (arg0[0] < arg1[0]) ? 1.0F : 0.0F;
499 result[1] = (arg0[1] < arg1[1]) ? 1.0F : 0.0F;
500 result[2] = (arg0[2] < arg1[2]) ? 1.0F : 0.0F;
501 result[3] = (arg0[3] < arg1[3]) ? 1.0F : 0.0F;
502 }
503
504 static void do_SUB( struct arb_vp_machine *m, union instruction op )
505 {
506 GLfloat *result = m->File[0][op.alu.dst];
507 const GLfloat *arg0 = m->File[op.alu.file0][op.alu.idx0];
508 const GLfloat *arg1 = m->File[op.alu.file1][op.alu.idx1];
509
510 result[0] = arg0[0] - arg1[0];
511 result[1] = arg0[1] - arg1[1];
512 result[2] = arg0[2] - arg1[2];
513 result[3] = arg0[3] - arg1[3];
514 }
515
516
517 static void do_XPD( struct arb_vp_machine *m, union instruction op )
518 {
519 GLfloat *result = m->File[0][op.alu.dst];
520 const GLfloat *arg0 = m->File[op.alu.file0][op.alu.idx0];
521 const GLfloat *arg1 = m->File[op.alu.file1][op.alu.idx1];
522 GLfloat tmp[3];
523
524 tmp[0] = arg0[1] * arg1[2] - arg0[2] * arg1[1];
525 tmp[1] = arg0[2] * arg1[0] - arg0[0] * arg1[2];
526 tmp[2] = arg0[0] * arg1[1] - arg0[1] * arg1[0];
527
528 /* Need a temporary to be correct in the case where result == arg0
529 * or result == arg1.
530 */
531 result[0] = tmp[0];
532 result[1] = tmp[1];
533 result[2] = tmp[2];
534 }
535
536 static void do_NOP( struct arb_vp_machine *m, union instruction op )
537 {
538 }
539
540 /* Some useful debugging functions:
541 */
542 static void print_mask( GLuint mask )
543 {
544 _mesa_printf(".");
545 if (mask & WRITEMASK_X) _mesa_printf("x");
546 if (mask & WRITEMASK_Y) _mesa_printf("y");
547 if (mask & WRITEMASK_Z) _mesa_printf("z");
548 if (mask & WRITEMASK_W) _mesa_printf("w");
549 }
550
551 static void print_reg( GLuint file, GLuint reg )
552 {
553 static const char *reg_file[] = {
554 "REG",
555 "LOCAL_PARAM",
556 "ENV_PARAM",
557 "STATE_VAR",
558 };
559
560 if (file == 0) {
561 if (reg == REG_RES)
562 _mesa_printf("RES");
563 else if (reg >= REG_ARG0 && reg <= REG_ARG1)
564 _mesa_printf("ARG%d", reg - REG_ARG0);
565 else if (reg >= REG_TMP0 && reg <= REG_TMP11)
566 _mesa_printf("TMP%d", reg - REG_TMP0);
567 else if (reg >= REG_IN0 && reg <= REG_IN31)
568 _mesa_printf("IN%d", reg - REG_IN0);
569 else if (reg >= REG_OUT0 && reg <= REG_OUT14)
570 _mesa_printf("OUT%d", reg - REG_OUT0);
571 else if (reg == REG_ADDR)
572 _mesa_printf("ADDR");
573 else if (reg == REG_ID)
574 _mesa_printf("ID");
575 else
576 _mesa_printf("REG%d", reg);
577 }
578 else
579 _mesa_printf("%s:%d", reg_file[file], reg);
580 }
581
582
583 static void print_RSW( union instruction op )
584 {
585 GLuint swz = op.rsw.swz;
586 GLuint neg = op.rsw.neg;
587 GLuint i;
588
589 _mesa_printf("RSW ");
590 print_reg(0, op.rsw.dst);
591 _mesa_printf(", ");
592 print_reg(op.rsw.file0, op.rsw.idx0);
593 _mesa_printf(".");
594 for (i = 0; i < 4; i++, swz >>= 3) {
595 const char *cswz = "xyzw01";
596 if (neg & (1<<i))
597 _mesa_printf("-");
598 _mesa_printf("%c", cswz[swz&0x7]);
599 }
600 _mesa_printf("\n");
601 }
602
603 static void print_SWZ( union instruction op )
604 {
605 GLuint swz = op.rsw.swz;
606 GLuint neg = op.rsw.neg;
607 GLuint i;
608
609 _mesa_printf("SWZ ");
610 print_reg(0, op.rsw.dst);
611 _mesa_printf(", ");
612 print_reg(op.rsw.file0, op.rsw.idx0);
613 _mesa_printf(".");
614 for (i = 0; i < 4; i++, swz >>= 3) {
615 const char *cswz = "xyzw01";
616 if (neg & (1<<i))
617 _mesa_printf("-");
618 _mesa_printf("%c", cswz[swz&0x7]);
619 }
620 _mesa_printf("\n");
621 }
622
623
624 static void print_ALU( union instruction op )
625 {
626 _mesa_printf("%s ", _mesa_opcode_string((enum prog_opcode) op.alu.opcode));
627 print_reg(0, op.alu.dst);
628 _mesa_printf(", ");
629 print_reg(op.alu.file0, op.alu.idx0);
630 if (_mesa_num_inst_src_regs((enum prog_opcode) op.alu.opcode) > 1) {
631 _mesa_printf(", ");
632 print_reg(op.alu.file1, op.alu.idx1);
633 }
634 _mesa_printf("\n");
635 }
636
637 static void print_MSK( union instruction op )
638 {
639 _mesa_printf("MSK ");
640 print_reg(0, op.msk.dst);
641 print_mask(op.msk.mask);
642 _mesa_printf(", ");
643 print_reg(op.msk.file, op.msk.idx);
644 _mesa_printf("\n");
645 }
646
647 static void print_NOP( union instruction op )
648 {
649 }
650
651 void
652 _tnl_disassem_vba_insn( union instruction op )
653 {
654 switch (op.alu.opcode) {
655 case OPCODE_ABS:
656 case OPCODE_ADD:
657 case OPCODE_DP3:
658 case OPCODE_DP4:
659 case OPCODE_DPH:
660 case OPCODE_DST:
661 case OPCODE_EX2:
662 case OPCODE_EXP:
663 case OPCODE_FLR:
664 case OPCODE_FRC:
665 case OPCODE_LG2:
666 case OPCODE_LIT:
667 case OPCODE_LOG:
668 case OPCODE_MAX:
669 case OPCODE_MIN:
670 case OPCODE_MOV:
671 case OPCODE_MUL:
672 case OPCODE_POW:
673 case OPCODE_PRINT:
674 case OPCODE_RCP:
675 case OPCODE_RSQ:
676 case OPCODE_SGE:
677 case OPCODE_SLT:
678 case OPCODE_SUB:
679 case OPCODE_XPD:
680 print_ALU(op);
681 break;
682 case OPCODE_ARA:
683 case OPCODE_ARL:
684 case OPCODE_ARL_NV:
685 case OPCODE_ARR:
686 case OPCODE_BRA:
687 case OPCODE_CAL:
688 case OPCODE_END:
689 case OPCODE_MAD:
690 case OPCODE_POPA:
691 case OPCODE_PUSHA:
692 case OPCODE_RCC:
693 case OPCODE_RET:
694 case OPCODE_SSG:
695 print_NOP(op);
696 break;
697 case OPCODE_SWZ:
698 print_SWZ(op);
699 break;
700 case RSW:
701 print_RSW(op);
702 break;
703 case MSK:
704 print_MSK(op);
705 break;
706 case REL:
707 print_ALU(op);
708 break;
709 default:
710 _mesa_problem(NULL, "Bad opcode in _tnl_disassem_vba_insn()");
711 }
712 }
713
714
715 static void (* const opcode_func[MAX_OPCODE+3])(struct arb_vp_machine *, union instruction) =
716 {
717 do_ABS,
718 do_ADD,
719 do_NOP,/*ARA*/
720 do_NOP,/*ARL*/
721 do_NOP,/*ARL_NV*/
722 do_NOP,/*ARR*/
723 do_NOP,/*BRA*/
724 do_NOP,/*CAL*/
725 do_NOP,/*CMP*/
726 do_NOP,/*COS*/
727 do_NOP,/*DDX*/
728 do_NOP,/*DDY*/
729 do_DP3,
730 do_DP4,
731 do_DPH,
732 do_DST,
733 do_NOP,
734 do_EX2,
735 do_EXP,
736 do_FLR,
737 do_FRC,
738 do_NOP,/*KIL*/
739 do_NOP,/*KIL_NV*/
740 do_LG2,
741 do_LIT,
742 do_LOG,
743 do_NOP,/*LRP*/
744 do_NOP,/*MAD*/
745 do_MAX,
746 do_MIN,
747 do_MOV,
748 do_MUL,
749 do_NOP,/*PK2H*/
750 do_NOP,/*PK2US*/
751 do_NOP,/*PK4B*/
752 do_NOP,/*PK4UB*/
753 do_POW,
754 do_NOP,/*POPA*/
755 do_PRT,
756 do_NOP,/*PUSHA*/
757 do_NOP,/*RCC*/
758 do_RCP,/*RCP*/
759 do_NOP,/*RET*/
760 do_NOP,/*RFL*/
761 do_RSQ,
762 do_NOP,/*SCS*/
763 do_NOP,/*SEQ*/
764 do_NOP,/*SFL*/
765 do_SGE,
766 do_NOP,/*SGT*/
767 do_NOP,/*SIN*/
768 do_NOP,/*SLE*/
769 do_SLT,
770 do_NOP,/*SNE*/
771 do_NOP,/*SSG*/
772 do_NOP,/*STR*/
773 do_SUB,
774 do_SWZ,/*SWZ*/
775 do_NOP,/*TEX*/
776 do_NOP,/*TXB*/
777 do_NOP,/*TXD*/
778 do_NOP,/*TXL*/
779 do_NOP,/*TXP*/
780 do_NOP,/*TXP_NV*/
781 do_NOP,/*UP2H*/
782 do_NOP,/*UP2US*/
783 do_NOP,/*UP4B*/
784 do_NOP,/*UP4UB*/
785 do_NOP,/*X2D*/
786 do_XPD,
787 do_RSW,
788 do_MSK,
789 do_REL,
790 };
791
792 static union instruction *cvp_next_instruction( struct compilation *cp )
793 {
794 union instruction *op = cp->csr++;
795 _mesa_bzero(op, sizeof(*op));
796 return op;
797 }
798
799 static struct reg cvp_make_reg( GLuint file, GLuint idx )
800 {
801 struct reg reg;
802 reg.file = file;
803 reg.idx = idx;
804 return reg;
805 }
806
807 static struct reg cvp_emit_rel( struct compilation *cp,
808 struct reg reg,
809 struct reg tmpreg )
810 {
811 union instruction *op = cvp_next_instruction(cp);
812 op->alu.opcode = REL;
813 op->alu.file0 = reg.file;
814 op->alu.idx0 = reg.idx;
815 op->alu.dst = tmpreg.idx;
816 return tmpreg;
817 }
818
819
820 static struct reg cvp_load_reg( struct compilation *cp,
821 GLuint file,
822 GLuint index,
823 GLuint rel,
824 GLuint tmpidx )
825 {
826 struct reg tmpreg = cvp_make_reg(FILE_REG, tmpidx);
827 struct reg reg;
828
829 switch (file) {
830 case PROGRAM_TEMPORARY:
831 return cvp_make_reg(FILE_REG, REG_TMP0 + index);
832
833 case PROGRAM_INPUT:
834 return cvp_make_reg(FILE_REG, REG_IN0 + index);
835
836 case PROGRAM_OUTPUT:
837 return cvp_make_reg(FILE_REG, REG_OUT0 + index);
838
839 /* These two aren't populated by the parser?
840 */
841 case PROGRAM_LOCAL_PARAM:
842 reg = cvp_make_reg(FILE_LOCAL_PARAM, index);
843 if (rel)
844 return cvp_emit_rel(cp, reg, tmpreg);
845 else
846 return reg;
847
848 case PROGRAM_ENV_PARAM:
849 reg = cvp_make_reg(FILE_ENV_PARAM, index);
850 if (rel)
851 return cvp_emit_rel(cp, reg, tmpreg);
852 else
853 return reg;
854
855 case PROGRAM_STATE_VAR:
856 reg = cvp_make_reg(FILE_STATE_PARAM, index);
857 if (rel)
858 return cvp_emit_rel(cp, reg, tmpreg);
859 else
860 return reg;
861
862 /* Invalid values:
863 */
864 case PROGRAM_WRITE_ONLY:
865 case PROGRAM_ADDRESS:
866 default:
867 _mesa_problem(NULL, "Invalid register file %d in cvp_load_reg()");
868 assert(0);
869 return tmpreg; /* can't happen */
870 }
871 }
872
873 static struct reg cvp_emit_arg( struct compilation *cp,
874 const struct prog_src_register *src,
875 GLuint arg )
876 {
877 struct reg reg = cvp_load_reg( cp, src->File, src->Index, src->RelAddr, arg );
878 union instruction rsw, noop;
879
880 /* Emit any necessary swizzling.
881 */
882 _mesa_bzero(&rsw, sizeof(rsw));
883 rsw.rsw.neg = src->NegateBase ? WRITEMASK_XYZW : 0;
884
885 /* we're expecting 2-bit swizzles below... */
886 #if 1 /* XXX THESE ASSERTIONS CURRENTLY FAIL DURING GLEAN TESTS! */
887 /* hopefully no longer happens? */
888 ASSERT(GET_SWZ(src->Swizzle, 0) < 4);
889 ASSERT(GET_SWZ(src->Swizzle, 1) < 4);
890 ASSERT(GET_SWZ(src->Swizzle, 2) < 4);
891 ASSERT(GET_SWZ(src->Swizzle, 3) < 4);
892 #endif
893 rsw.rsw.swz = src->Swizzle;
894
895 _mesa_bzero(&noop, sizeof(noop));
896 noop.rsw.neg = 0;
897 noop.rsw.swz = SWIZZLE_NOOP;
898
899 if (_mesa_memcmp(&rsw, &noop, sizeof(rsw)) !=0) {
900 union instruction *op = cvp_next_instruction(cp);
901 struct reg rsw_reg = cvp_make_reg(FILE_REG, REG_ARG0 + arg);
902 *op = rsw;
903 op->rsw.opcode = RSW;
904 op->rsw.file0 = reg.file;
905 op->rsw.idx0 = reg.idx;
906 op->rsw.dst = rsw_reg.idx;
907 return rsw_reg;
908 }
909 else
910 return reg;
911 }
912
913 static GLuint cvp_choose_result( struct compilation *cp,
914 const struct prog_dst_register *dst,
915 union instruction *fixup )
916 {
917 GLuint mask = dst->WriteMask;
918 GLuint idx;
919
920 switch (dst->File) {
921 case PROGRAM_TEMPORARY:
922 idx = REG_TMP0 + dst->Index;
923 break;
924 case PROGRAM_OUTPUT:
925 idx = REG_OUT0 + dst->Index;
926 break;
927 default:
928 assert(0);
929 return REG_RES; /* can't happen */
930 }
931
932 /* Optimization: When writing (with a writemask) to an undefined
933 * value for the first time, the writemask may be ignored.
934 */
935 if (mask != WRITEMASK_XYZW && (cp->reg_active & (1 << idx))) {
936 fixup->msk.opcode = MSK;
937 fixup->msk.dst = idx;
938 fixup->msk.file = FILE_REG;
939 fixup->msk.idx = REG_RES;
940 fixup->msk.mask = mask;
941 cp->reg_active |= 1 << idx;
942 return REG_RES;
943 }
944 else {
945 _mesa_bzero(fixup, sizeof(*fixup));
946 cp->reg_active |= 1 << idx;
947 return idx;
948 }
949 }
950
951
952 static void cvp_emit_inst( struct compilation *cp,
953 const struct prog_instruction *inst )
954 {
955 union instruction *op;
956 union instruction fixup;
957 struct reg reg[3];
958 GLuint result, nr_args, i;
959
960 /* Need to handle SWZ, ARL specially.
961 */
962 switch (inst->Opcode) {
963 /* Split into mul and add:
964 */
965 case OPCODE_MAD:
966 result = cvp_choose_result( cp, &inst->DstReg, &fixup );
967 for (i = 0; i < 3; i++)
968 reg[i] = cvp_emit_arg( cp, &inst->SrcReg[i], REG_ARG0+i );
969
970 op = cvp_next_instruction(cp);
971 op->alu.opcode = OPCODE_MUL;
972 op->alu.file0 = reg[0].file;
973 op->alu.idx0 = reg[0].idx;
974 op->alu.file1 = reg[1].file;
975 op->alu.idx1 = reg[1].idx;
976 op->alu.dst = REG_ARG0;
977
978 op = cvp_next_instruction(cp);
979 op->alu.opcode = OPCODE_ADD;
980 op->alu.file0 = FILE_REG;
981 op->alu.idx0 = REG_ARG0;
982 op->alu.file1 = reg[2].file;
983 op->alu.idx1 = reg[2].idx;
984 op->alu.dst = result;
985
986 if (result == REG_RES) {
987 op = cvp_next_instruction(cp);
988 *op = fixup;
989 }
990 break;
991
992 case OPCODE_ARL:
993 reg[0] = cvp_emit_arg( cp, &inst->SrcReg[0], REG_ARG0 );
994
995 op = cvp_next_instruction(cp);
996 op->alu.opcode = OPCODE_FLR;
997 op->alu.dst = REG_ADDR;
998 op->alu.file0 = reg[0].file;
999 op->alu.idx0 = reg[0].idx;
1000 break;
1001
1002 case OPCODE_END:
1003 break;
1004
1005 case OPCODE_SWZ:
1006 result = cvp_choose_result( cp, &inst->DstReg, &fixup );
1007 reg[0] = cvp_load_reg( cp, inst->SrcReg[0].File,
1008 inst->SrcReg[0].Index, inst->SrcReg[0].RelAddr, REG_ARG0 );
1009 op = cvp_next_instruction(cp);
1010 op->rsw.opcode = inst->Opcode;
1011 op->rsw.file0 = reg[0].file;
1012 op->rsw.idx0 = reg[0].idx;
1013 op->rsw.dst = result;
1014 op->rsw.swz = inst->SrcReg[0].Swizzle;
1015 op->rsw.neg = inst->SrcReg[0].NegateBase;
1016
1017 if (result == REG_RES) {
1018 op = cvp_next_instruction(cp);
1019 *op = fixup;
1020 }
1021 break;
1022
1023 default:
1024 result = cvp_choose_result( cp, &inst->DstReg, &fixup );
1025 nr_args = _mesa_num_inst_src_regs(inst->Opcode);
1026 for (i = 0; i < nr_args; i++)
1027 reg[i] = cvp_emit_arg( cp, &inst->SrcReg[i], REG_ARG0 + i );
1028
1029 op = cvp_next_instruction(cp);
1030 op->alu.opcode = inst->Opcode;
1031 op->alu.file0 = reg[0].file;
1032 op->alu.idx0 = reg[0].idx;
1033 op->alu.file1 = reg[1].file;
1034 op->alu.idx1 = reg[1].idx;
1035 op->alu.dst = result;
1036
1037 if (result == REG_RES) {
1038 op = cvp_next_instruction(cp);
1039 *op = fixup;
1040 }
1041 break;
1042 }
1043 }
1044
1045 static void free_tnl_data( struct gl_vertex_program *program )
1046 {
1047 struct tnl_compiled_program *p = (struct tnl_compiled_program *) program->TnlData;
1048 if (p->compiled_func)
1049 _mesa_free((void *)p->compiled_func);
1050 _mesa_free(p);
1051 program->TnlData = NULL;
1052 }
1053
1054 static void compile_vertex_program( struct gl_vertex_program *program,
1055 GLboolean try_codegen )
1056 {
1057 struct compilation cp;
1058 struct tnl_compiled_program *p = CALLOC_STRUCT(tnl_compiled_program);
1059 GLint i;
1060
1061 if (program->TnlData)
1062 free_tnl_data( program );
1063
1064 program->TnlData = p;
1065
1066 /* Initialize cp. Note that ctx and VB aren't used in compilation
1067 * so we don't have to worry about statechanges:
1068 */
1069 _mesa_memset(&cp, 0, sizeof(cp));
1070 cp.csr = p->instructions;
1071
1072 /* Compile instructions:
1073 */
1074 for (i = 0; i < program->Base.NumInstructions; i++) {
1075 cvp_emit_inst(&cp, &program->Base.Instructions[i]);
1076 }
1077
1078 /* Finish up:
1079 */
1080 p->nr_instructions = cp.csr - p->instructions;
1081
1082 /* Print/disassemble:
1083 */
1084 if (DISASSEM) {
1085 for (i = 0; i < p->nr_instructions; i++) {
1086 _tnl_disassem_vba_insn(p->instructions[i]);
1087 }
1088 _mesa_printf("\n\n");
1089 }
1090
1091 #ifdef USE_SSE_ASM
1092 if (try_codegen)
1093 _tnl_sse_codegen_vertex_program(p);
1094 #endif
1095
1096 }
1097
1098
1099
1100
1101 /* ----------------------------------------------------------------------
1102 * Execution
1103 */
1104 static void userclip( GLcontext *ctx,
1105 GLvector4f *clip,
1106 GLubyte *clipmask,
1107 GLubyte *clipormask,
1108 GLubyte *clipandmask )
1109 {
1110 GLuint p;
1111
1112 for (p = 0; p < ctx->Const.MaxClipPlanes; p++) {
1113 if (ctx->Transform.ClipPlanesEnabled & (1 << p)) {
1114 GLuint nr, i;
1115 const GLfloat a = ctx->Transform._ClipUserPlane[p][0];
1116 const GLfloat b = ctx->Transform._ClipUserPlane[p][1];
1117 const GLfloat c = ctx->Transform._ClipUserPlane[p][2];
1118 const GLfloat d = ctx->Transform._ClipUserPlane[p][3];
1119 GLfloat *coord = (GLfloat *)clip->data;
1120 GLuint stride = clip->stride;
1121 GLuint count = clip->count;
1122
1123 for (nr = 0, i = 0 ; i < count ; i++) {
1124 GLfloat dp = (coord[0] * a +
1125 coord[1] * b +
1126 coord[2] * c +
1127 coord[3] * d);
1128
1129 if (dp < 0) {
1130 nr++;
1131 clipmask[i] |= CLIP_USER_BIT;
1132 }
1133
1134 STRIDE_F(coord, stride);
1135 }
1136
1137 if (nr > 0) {
1138 *clipormask |= CLIP_USER_BIT;
1139 if (nr == count) {
1140 *clipandmask |= CLIP_USER_BIT;
1141 return;
1142 }
1143 }
1144 }
1145 }
1146 }
1147
1148
1149 static GLboolean
1150 do_ndc_cliptest(GLcontext *ctx, struct arb_vp_machine *m)
1151 {
1152 TNLcontext *tnl = TNL_CONTEXT(ctx);
1153 struct vertex_buffer *VB = m->VB;
1154
1155 /* Cliptest and perspective divide. Clip functions must clear
1156 * the clipmask.
1157 */
1158 m->ormask = 0;
1159 m->andmask = CLIP_FRUSTUM_BITS;
1160
1161 if (tnl->NeedNdcCoords) {
1162 VB->NdcPtr =
1163 _mesa_clip_tab[VB->ClipPtr->size]( VB->ClipPtr,
1164 &m->ndcCoords,
1165 m->clipmask,
1166 &m->ormask,
1167 &m->andmask );
1168 }
1169 else {
1170 VB->NdcPtr = NULL;
1171 _mesa_clip_np_tab[VB->ClipPtr->size]( VB->ClipPtr,
1172 NULL,
1173 m->clipmask,
1174 &m->ormask,
1175 &m->andmask );
1176 }
1177
1178 if (m->andmask) {
1179 /* All vertices are outside the frustum */
1180 return GL_FALSE;
1181 }
1182
1183 /* Test userclip planes. This contributes to VB->ClipMask.
1184 */
1185 if (ctx->Transform.ClipPlanesEnabled && (!ctx->VertexProgram._Enabled ||
1186 ctx->VertexProgram.Current->IsPositionInvariant)) {
1187 userclip( ctx,
1188 VB->ClipPtr,
1189 m->clipmask,
1190 &m->ormask,
1191 &m->andmask );
1192
1193 if (m->andmask) {
1194 return GL_FALSE;
1195 }
1196 }
1197
1198 VB->ClipAndMask = m->andmask;
1199 VB->ClipOrMask = m->ormask;
1200 VB->ClipMask = m->clipmask;
1201
1202 return GL_TRUE;
1203 }
1204
1205
1206 static INLINE void call_func( struct tnl_compiled_program *p,
1207 struct arb_vp_machine *m )
1208 {
1209 p->compiled_func(m);
1210 }
1211
1212 /**
1213 * Execute the given vertex program.
1214 *
1215 * TODO: Integrate the t_vertex.c code here, to build machine vertices
1216 * directly at this point.
1217 *
1218 * TODO: Eliminate the VB struct entirely and just use
1219 * struct arb_vertex_machine.
1220 */
1221 static GLboolean
1222 run_arb_vertex_program(GLcontext *ctx, struct tnl_pipeline_stage *stage)
1223 {
1224 const struct gl_vertex_program *program;
1225 struct vertex_buffer *VB = &TNL_CONTEXT(ctx)->vb;
1226 struct arb_vp_machine *m = ARB_VP_MACHINE(stage);
1227 struct tnl_compiled_program *p;
1228 GLuint i, j;
1229 GLbitfield outputs;
1230
1231 if (ctx->ShaderObjects._VertexShaderPresent)
1232 return GL_TRUE;
1233
1234 program = ctx->VertexProgram._Enabled ? ctx->VertexProgram.Current : NULL;
1235 if (!program && ctx->_MaintainTnlProgram) {
1236 program = ctx->_TnlProgram;
1237 }
1238 if (!program || program->IsNVProgram)
1239 return GL_TRUE;
1240
1241 if (program->Base.Parameters) {
1242 _mesa_load_state_parameters(ctx, program->Base.Parameters);
1243 }
1244
1245 p = (struct tnl_compiled_program *)program->TnlData;
1246 assert(p);
1247
1248
1249 m->nr_inputs = m->nr_outputs = 0;
1250
1251 for (i = 0; i < VERT_ATTRIB_MAX; i++) {
1252 if (program->Base.InputsRead & (1<<i) ||
1253 (i == VERT_ATTRIB_POS && program->IsPositionInvariant)) {
1254 GLuint j = m->nr_inputs++;
1255 m->input[j].idx = i;
1256 m->input[j].data = (GLfloat *)m->VB->AttribPtr[i]->data;
1257 m->input[j].stride = m->VB->AttribPtr[i]->stride;
1258 m->input[j].size = m->VB->AttribPtr[i]->size;
1259 ASSIGN_4V(m->File[0][REG_IN0 + i], 0, 0, 0, 1);
1260 }
1261 }
1262
1263 for (i = 0; i < VERT_RESULT_MAX; i++) {
1264 if (program->Base.OutputsWritten & (1 << i) ||
1265 (i == VERT_RESULT_HPOS && program->IsPositionInvariant)) {
1266 GLuint j = m->nr_outputs++;
1267 m->output[j].idx = i;
1268 m->output[j].data = (GLfloat *)m->attribs[i].data;
1269 }
1270 }
1271
1272
1273 /* Run the actual program:
1274 */
1275 for (m->vtx_nr = 0; m->vtx_nr < VB->Count; m->vtx_nr++) {
1276 for (j = 0; j < m->nr_inputs; j++) {
1277 GLuint idx = REG_IN0 + m->input[j].idx;
1278 switch (m->input[j].size) {
1279 case 4: m->File[0][idx][3] = m->input[j].data[3];
1280 case 3: m->File[0][idx][2] = m->input[j].data[2];
1281 case 2: m->File[0][idx][1] = m->input[j].data[1];
1282 case 1: m->File[0][idx][0] = m->input[j].data[0];
1283 }
1284
1285 STRIDE_F(m->input[j].data, m->input[j].stride);
1286 }
1287
1288
1289 if (p->compiled_func) {
1290 call_func( p, m );
1291 }
1292 else {
1293 GLint j;
1294 for (j = 0; j < p->nr_instructions; j++) {
1295 union instruction inst = p->instructions[j];
1296 opcode_func[inst.alu.opcode]( m, inst );
1297 }
1298 }
1299
1300 /* If the program is position invariant, multiply the input position
1301 * by the MVP matrix and store in the vertex position result register.
1302 */
1303 if (program->IsPositionInvariant) {
1304 TRANSFORM_POINT( m->File[0][REG_OUT0+0],
1305 ctx->_ModelProjectMatrix.m,
1306 m->File[0][REG_IN0+0]);
1307 }
1308
1309 for (j = 0; j < m->nr_outputs; j++) {
1310 GLuint idx = REG_OUT0 + m->output[j].idx;
1311 m->output[j].data[0] = m->File[0][idx][0];
1312 m->output[j].data[1] = m->File[0][idx][1];
1313 m->output[j].data[2] = m->File[0][idx][2];
1314 m->output[j].data[3] = m->File[0][idx][3];
1315 m->output[j].data += 4;
1316 }
1317
1318 }
1319
1320 /* Setup the VB pointers so that the next pipeline stages get
1321 * their data from the right place (the program output arrays).
1322 *
1323 * TODO: 1) Have tnl use these RESULT values for outputs rather
1324 * than trying to shoe-horn inputs and outputs into one set of
1325 * values.
1326 *
1327 * TODO: 2) Integrate t_vertex.c so that we just go straight ahead
1328 * and build machine vertices here.
1329 */
1330 VB->ClipPtr = &m->attribs[VERT_RESULT_HPOS];
1331 VB->ClipPtr->count = VB->Count;
1332
1333 /* XXX There seems to be confusion between using the VERT_ATTRIB_*
1334 * values vs _TNL_ATTRIB_* tokens here:
1335 */
1336 outputs = program->Base.OutputsWritten;
1337 if (program->IsPositionInvariant)
1338 outputs |= (1<<VERT_RESULT_HPOS);
1339
1340 if (outputs & (1<<VERT_RESULT_COL0)) {
1341 VB->ColorPtr[0] =
1342 VB->AttribPtr[VERT_ATTRIB_COLOR0] = &m->attribs[VERT_RESULT_COL0];
1343 }
1344
1345 if (outputs & (1<<VERT_RESULT_BFC0)) {
1346 VB->ColorPtr[1] = &m->attribs[VERT_RESULT_BFC0];
1347 }
1348
1349 if (outputs & (1<<VERT_RESULT_COL1)) {
1350 VB->SecondaryColorPtr[0] =
1351 VB->AttribPtr[VERT_ATTRIB_COLOR1] = &m->attribs[VERT_RESULT_COL1];
1352 }
1353
1354 if (outputs & (1<<VERT_RESULT_BFC1)) {
1355 VB->SecondaryColorPtr[1] = &m->attribs[VERT_RESULT_BFC1];
1356 }
1357
1358 if (outputs & (1<<VERT_RESULT_FOGC)) {
1359 VB->FogCoordPtr =
1360 VB->AttribPtr[VERT_ATTRIB_FOG] = &m->attribs[VERT_RESULT_FOGC];
1361 }
1362
1363 if (outputs & (1<<VERT_RESULT_PSIZ)) {
1364 VB->AttribPtr[_TNL_ATTRIB_POINTSIZE] = &m->attribs[VERT_RESULT_PSIZ];
1365 }
1366
1367 for (i = 0; i < ctx->Const.MaxTextureCoordUnits; i++) {
1368 if (outputs & (1<<(VERT_RESULT_TEX0+i))) {
1369 VB->TexCoordPtr[i] =
1370 VB->AttribPtr[VERT_ATTRIB_TEX0+i] = &m->attribs[VERT_RESULT_TEX0 + i];
1371 }
1372 }
1373
1374 #if 0
1375 for (i = 0; i < VB->Count; i++) {
1376 printf("Out %d: %f %f %f %f %f %f %f %f\n", i,
1377 VEC_ELT(VB->ClipPtr, GLfloat, i)[0],
1378 VEC_ELT(VB->ClipPtr, GLfloat, i)[1],
1379 VEC_ELT(VB->ClipPtr, GLfloat, i)[2],
1380 VEC_ELT(VB->ClipPtr, GLfloat, i)[3],
1381 VEC_ELT(VB->AttribPtr[VERT_ATTRIB_TEX0], GLfloat, i)[0],
1382 VEC_ELT(VB->AttribPtr[VERT_ATTRIB_TEX0], GLfloat, i)[1],
1383 VEC_ELT(VB->AttribPtr[VERT_ATTRIB_TEX0], GLfloat, i)[2],
1384 VEC_ELT(VB->AttribPtr[VERT_ATTRIB_TEX0], GLfloat, i)[3]);
1385 }
1386 #endif
1387
1388 /* Perform NDC and cliptest operations:
1389 */
1390 return do_ndc_cliptest(ctx, m);
1391 }
1392
1393
1394 static void
1395 validate_vertex_program( GLcontext *ctx, struct tnl_pipeline_stage *stage )
1396 {
1397 struct arb_vp_machine *m = ARB_VP_MACHINE(stage);
1398 struct gl_vertex_program *program;
1399
1400 if (ctx->ShaderObjects._VertexShaderPresent)
1401 return;
1402
1403 program = (ctx->VertexProgram._Enabled ? ctx->VertexProgram.Current : 0);
1404 if (!program && ctx->_MaintainTnlProgram) {
1405 program = ctx->_TnlProgram;
1406 }
1407
1408 if (program) {
1409 if (!program->TnlData)
1410 compile_vertex_program( program, m->try_codegen );
1411
1412 /* Grab the state GL state and put into registers:
1413 */
1414 m->File[FILE_LOCAL_PARAM] = program->Base.LocalParams;
1415 m->File[FILE_ENV_PARAM] = ctx->VertexProgram.Parameters;
1416 /* GL_NV_vertex_programs can't reference GL state */
1417 if (program->Base.Parameters)
1418 m->File[FILE_STATE_PARAM] = program->Base.Parameters->ParameterValues;
1419 else
1420 m->File[FILE_STATE_PARAM] = NULL;
1421 }
1422 }
1423
1424
1425
1426
1427
1428
1429
1430 /**
1431 * Called the first time stage->run is called. In effect, don't
1432 * allocate data until the first time the stage is run.
1433 */
1434 static GLboolean init_vertex_program( GLcontext *ctx,
1435 struct tnl_pipeline_stage *stage )
1436 {
1437 TNLcontext *tnl = TNL_CONTEXT(ctx);
1438 struct vertex_buffer *VB = &(tnl->vb);
1439 struct arb_vp_machine *m;
1440 const GLuint size = VB->Size;
1441 GLuint i;
1442
1443 stage->privatePtr = _mesa_calloc(sizeof(*m));
1444 m = ARB_VP_MACHINE(stage);
1445 if (!m)
1446 return GL_FALSE;
1447
1448 /* arb_vertex_machine struct should subsume the VB:
1449 */
1450 m->VB = VB;
1451
1452 m->File[0] = (GLfloat(*)[4])ALIGN_MALLOC(REG_MAX * sizeof(GLfloat) * 4, 16);
1453
1454 /* Initialize regs where necessary:
1455 */
1456 ASSIGN_4V(m->File[0][REG_ID], 0, 0, 0, 1);
1457 ASSIGN_4V(m->File[0][REG_ONES], 1, 1, 1, 1);
1458 ASSIGN_4V(m->File[0][REG_SWZ], 1, -1, 0, 0);
1459 ASSIGN_4V(m->File[0][REG_NEG], -1, -1, -1, -1);
1460 ASSIGN_4V(m->File[0][REG_LIT], 1, 0, 0, 1);
1461 ASSIGN_4V(m->File[0][REG_LIT2], 1, .5, .2, 1); /* debug value */
1462
1463 if (_mesa_getenv("MESA_EXPERIMENTAL"))
1464 m->try_codegen = GL_TRUE;
1465
1466 /* Allocate arrays of vertex output values */
1467 for (i = 0; i < VERT_RESULT_MAX; i++) {
1468 _mesa_vector4f_alloc( &m->attribs[i], 0, size, 32 );
1469 m->attribs[i].size = 4;
1470 }
1471
1472 /* a few other misc allocations */
1473 _mesa_vector4f_alloc( &m->ndcCoords, 0, size, 32 );
1474 m->clipmask = (GLubyte *) ALIGN_MALLOC(sizeof(GLubyte)*size, 32 );
1475
1476 if (ctx->_MaintainTnlProgram)
1477 _mesa_allow_light_in_model( ctx, GL_FALSE );
1478
1479 m->fpucntl_rnd_neg = RND_NEG_FPU; /* const value */
1480 m->fpucntl_restore = RESTORE_FPU; /* const value */
1481
1482 return GL_TRUE;
1483 }
1484
1485
1486
1487
1488 /**
1489 * Destructor for this pipeline stage.
1490 */
1491 static void dtr( struct tnl_pipeline_stage *stage )
1492 {
1493 struct arb_vp_machine *m = ARB_VP_MACHINE(stage);
1494
1495 if (m) {
1496 GLuint i;
1497
1498 /* free the vertex program result arrays */
1499 for (i = 0; i < VERT_RESULT_MAX; i++)
1500 _mesa_vector4f_free( &m->attribs[i] );
1501
1502 /* free misc arrays */
1503 _mesa_vector4f_free( &m->ndcCoords );
1504 ALIGN_FREE( m->clipmask );
1505 ALIGN_FREE( m->File[0] );
1506
1507 _mesa_free( m );
1508 stage->privatePtr = NULL;
1509 }
1510 }
1511
1512 /**
1513 * Public description of this pipeline stage.
1514 */
1515 const struct tnl_pipeline_stage _tnl_arb_vertex_program_stage =
1516 {
1517 "arb-vertex-program",
1518 NULL, /* private_data */
1519 init_vertex_program, /* create */
1520 dtr, /* destroy */
1521 validate_vertex_program, /* validate */
1522 run_arb_vertex_program /* run */
1523 };
1524
1525
1526 /**
1527 * Called via ctx->Driver.ProgramStringNotify() after a new vertex program
1528 * string has been parsed.
1529 */
1530 void
1531 _tnl_program_string(GLcontext *ctx, GLenum target, struct gl_program *program)
1532 {
1533 if (target == GL_VERTEX_PROGRAM_ARB) {
1534 /* free any existing tnl data hanging off the program */
1535 struct gl_vertex_program *vprog = (struct gl_vertex_program *) program;
1536 if (vprog->TnlData) {
1537 free_tnl_data(vprog);
1538 }
1539 }
1540 }