mesa: Include macros.h in files that use symbols from macros.h.
[mesa.git] / src / mesa / program / prog_execute.c
1 /*
2 * Mesa 3-D graphics library
3 * Version: 7.3
4 *
5 * Copyright (C) 1999-2008 Brian Paul All Rights Reserved.
6 *
7 * Permission is hereby granted, free of charge, to any person obtaining a
8 * copy of this software and associated documentation files (the "Software"),
9 * to deal in the Software without restriction, including without limitation
10 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
11 * and/or sell copies of the Software, and to permit persons to whom the
12 * Software is furnished to do so, subject to the following conditions:
13 *
14 * The above copyright notice and this permission notice shall be included
15 * in all copies or substantial portions of the Software.
16 *
17 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
18 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
19 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
20 * BRIAN PAUL BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN
21 * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
22 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
23 */
24
25 /**
26 * \file prog_execute.c
27 * Software interpreter for vertex/fragment programs.
28 * \author Brian Paul
29 */
30
31 /*
32 * NOTE: we do everything in single-precision floating point; we don't
33 * currently observe the single/half/fixed-precision qualifiers.
34 *
35 */
36
37
38 #include "main/glheader.h"
39 #include "main/colormac.h"
40 #include "main/context.h"
41 #include "main/macros.h"
42 #include "prog_execute.h"
43 #include "prog_instruction.h"
44 #include "prog_parameter.h"
45 #include "prog_print.h"
46 #include "prog_noise.h"
47
48
49 /* debug predicate */
50 #define DEBUG_PROG 0
51
52
53 /**
54 * Set x to positive or negative infinity.
55 */
56 #if defined(USE_IEEE) || defined(_WIN32)
57 #define SET_POS_INFINITY(x) \
58 do { \
59 fi_type fi; \
60 fi.i = 0x7F800000; \
61 x = fi.f; \
62 } while (0)
63 #define SET_NEG_INFINITY(x) \
64 do { \
65 fi_type fi; \
66 fi.i = 0xFF800000; \
67 x = fi.f; \
68 } while (0)
69 #elif defined(VMS)
70 #define SET_POS_INFINITY(x) x = __MAXFLOAT
71 #define SET_NEG_INFINITY(x) x = -__MAXFLOAT
72 #else
73 #define SET_POS_INFINITY(x) x = (GLfloat) HUGE_VAL
74 #define SET_NEG_INFINITY(x) x = (GLfloat) -HUGE_VAL
75 #endif
76
77 #define SET_FLOAT_BITS(x, bits) ((fi_type *) (void *) &(x))->i = bits
78
79
80 static const GLfloat ZeroVec[4] = { 0.0F, 0.0F, 0.0F, 0.0F };
81
82
83
84 /**
85 * Return TRUE for +0 and other positive values, FALSE otherwise.
86 * Used for RCC opcode.
87 */
88 static INLINE GLboolean
89 positive(float x)
90 {
91 fi_type fi;
92 fi.f = x;
93 if (fi.i & 0x80000000)
94 return GL_FALSE;
95 return GL_TRUE;
96 }
97
98
99
100 /**
101 * Return a pointer to the 4-element float vector specified by the given
102 * source register.
103 */
104 static INLINE const GLfloat *
105 get_src_register_pointer(const struct prog_src_register *source,
106 const struct gl_program_machine *machine)
107 {
108 const struct gl_program *prog = machine->CurProgram;
109 GLint reg = source->Index;
110
111 if (source->RelAddr) {
112 /* add address register value to src index/offset */
113 reg += machine->AddressReg[0][0];
114 if (reg < 0) {
115 return ZeroVec;
116 }
117 }
118
119 switch (source->File) {
120 case PROGRAM_TEMPORARY:
121 if (reg >= MAX_PROGRAM_TEMPS)
122 return ZeroVec;
123 return machine->Temporaries[reg];
124
125 case PROGRAM_INPUT:
126 if (prog->Target == GL_VERTEX_PROGRAM_ARB) {
127 if (reg >= VERT_ATTRIB_MAX)
128 return ZeroVec;
129 return machine->VertAttribs[reg];
130 }
131 else {
132 if (reg >= FRAG_ATTRIB_MAX)
133 return ZeroVec;
134 return machine->Attribs[reg][machine->CurElement];
135 }
136
137 case PROGRAM_OUTPUT:
138 if (reg >= MAX_PROGRAM_OUTPUTS)
139 return ZeroVec;
140 return machine->Outputs[reg];
141
142 case PROGRAM_LOCAL_PARAM:
143 if (reg >= MAX_PROGRAM_LOCAL_PARAMS)
144 return ZeroVec;
145 return machine->CurProgram->LocalParams[reg];
146
147 case PROGRAM_ENV_PARAM:
148 if (reg >= MAX_PROGRAM_ENV_PARAMS)
149 return ZeroVec;
150 return machine->EnvParams[reg];
151
152 case PROGRAM_STATE_VAR:
153 /* Fallthrough */
154 case PROGRAM_CONSTANT:
155 /* Fallthrough */
156 case PROGRAM_UNIFORM:
157 /* Fallthrough */
158 case PROGRAM_NAMED_PARAM:
159 if (reg >= (GLint) prog->Parameters->NumParameters)
160 return ZeroVec;
161 return prog->Parameters->ParameterValues[reg];
162
163 default:
164 _mesa_problem(NULL,
165 "Invalid src register file %d in get_src_register_pointer()",
166 source->File);
167 return NULL;
168 }
169 }
170
171
172 /**
173 * Return a pointer to the 4-element float vector specified by the given
174 * destination register.
175 */
176 static INLINE GLfloat *
177 get_dst_register_pointer(const struct prog_dst_register *dest,
178 struct gl_program_machine *machine)
179 {
180 static GLfloat dummyReg[4];
181 GLint reg = dest->Index;
182
183 if (dest->RelAddr) {
184 /* add address register value to src index/offset */
185 reg += machine->AddressReg[0][0];
186 if (reg < 0) {
187 return dummyReg;
188 }
189 }
190
191 switch (dest->File) {
192 case PROGRAM_TEMPORARY:
193 if (reg >= MAX_PROGRAM_TEMPS)
194 return dummyReg;
195 return machine->Temporaries[reg];
196
197 case PROGRAM_OUTPUT:
198 if (reg >= MAX_PROGRAM_OUTPUTS)
199 return dummyReg;
200 return machine->Outputs[reg];
201
202 case PROGRAM_WRITE_ONLY:
203 return dummyReg;
204
205 default:
206 _mesa_problem(NULL,
207 "Invalid dest register file %d in get_dst_register_pointer()",
208 dest->File);
209 return NULL;
210 }
211 }
212
213
214
215 /**
216 * Fetch a 4-element float vector from the given source register.
217 * Apply swizzling and negating as needed.
218 */
219 static void
220 fetch_vector4(const struct prog_src_register *source,
221 const struct gl_program_machine *machine, GLfloat result[4])
222 {
223 const GLfloat *src = get_src_register_pointer(source, machine);
224 ASSERT(src);
225
226 if (source->Swizzle == SWIZZLE_NOOP) {
227 /* no swizzling */
228 COPY_4V(result, src);
229 }
230 else {
231 ASSERT(GET_SWZ(source->Swizzle, 0) <= 3);
232 ASSERT(GET_SWZ(source->Swizzle, 1) <= 3);
233 ASSERT(GET_SWZ(source->Swizzle, 2) <= 3);
234 ASSERT(GET_SWZ(source->Swizzle, 3) <= 3);
235 result[0] = src[GET_SWZ(source->Swizzle, 0)];
236 result[1] = src[GET_SWZ(source->Swizzle, 1)];
237 result[2] = src[GET_SWZ(source->Swizzle, 2)];
238 result[3] = src[GET_SWZ(source->Swizzle, 3)];
239 }
240
241 if (source->Abs) {
242 result[0] = FABSF(result[0]);
243 result[1] = FABSF(result[1]);
244 result[2] = FABSF(result[2]);
245 result[3] = FABSF(result[3]);
246 }
247 if (source->Negate) {
248 ASSERT(source->Negate == NEGATE_XYZW);
249 result[0] = -result[0];
250 result[1] = -result[1];
251 result[2] = -result[2];
252 result[3] = -result[3];
253 }
254
255 #ifdef NAN_CHECK
256 assert(!IS_INF_OR_NAN(result[0]));
257 assert(!IS_INF_OR_NAN(result[0]));
258 assert(!IS_INF_OR_NAN(result[0]));
259 assert(!IS_INF_OR_NAN(result[0]));
260 #endif
261 }
262
263
264 /**
265 * Fetch a 4-element uint vector from the given source register.
266 * Apply swizzling but not negation/abs.
267 */
268 static void
269 fetch_vector4ui(const struct prog_src_register *source,
270 const struct gl_program_machine *machine, GLuint result[4])
271 {
272 const GLuint *src = (GLuint *) get_src_register_pointer(source, machine);
273 ASSERT(src);
274
275 if (source->Swizzle == SWIZZLE_NOOP) {
276 /* no swizzling */
277 COPY_4V(result, src);
278 }
279 else {
280 ASSERT(GET_SWZ(source->Swizzle, 0) <= 3);
281 ASSERT(GET_SWZ(source->Swizzle, 1) <= 3);
282 ASSERT(GET_SWZ(source->Swizzle, 2) <= 3);
283 ASSERT(GET_SWZ(source->Swizzle, 3) <= 3);
284 result[0] = src[GET_SWZ(source->Swizzle, 0)];
285 result[1] = src[GET_SWZ(source->Swizzle, 1)];
286 result[2] = src[GET_SWZ(source->Swizzle, 2)];
287 result[3] = src[GET_SWZ(source->Swizzle, 3)];
288 }
289
290 /* Note: no Negate or Abs here */
291 }
292
293
294
295 /**
296 * Fetch the derivative with respect to X or Y for the given register.
297 * XXX this currently only works for fragment program input attribs.
298 */
299 static void
300 fetch_vector4_deriv(GLcontext * ctx,
301 const struct prog_src_register *source,
302 const struct gl_program_machine *machine,
303 char xOrY, GLfloat result[4])
304 {
305 if (source->File == PROGRAM_INPUT &&
306 source->Index < (GLint) machine->NumDeriv) {
307 const GLint col = machine->CurElement;
308 const GLfloat w = machine->Attribs[FRAG_ATTRIB_WPOS][col][3];
309 const GLfloat invQ = 1.0f / w;
310 GLfloat deriv[4];
311
312 if (xOrY == 'X') {
313 deriv[0] = machine->DerivX[source->Index][0] * invQ;
314 deriv[1] = machine->DerivX[source->Index][1] * invQ;
315 deriv[2] = machine->DerivX[source->Index][2] * invQ;
316 deriv[3] = machine->DerivX[source->Index][3] * invQ;
317 }
318 else {
319 deriv[0] = machine->DerivY[source->Index][0] * invQ;
320 deriv[1] = machine->DerivY[source->Index][1] * invQ;
321 deriv[2] = machine->DerivY[source->Index][2] * invQ;
322 deriv[3] = machine->DerivY[source->Index][3] * invQ;
323 }
324
325 result[0] = deriv[GET_SWZ(source->Swizzle, 0)];
326 result[1] = deriv[GET_SWZ(source->Swizzle, 1)];
327 result[2] = deriv[GET_SWZ(source->Swizzle, 2)];
328 result[3] = deriv[GET_SWZ(source->Swizzle, 3)];
329
330 if (source->Abs) {
331 result[0] = FABSF(result[0]);
332 result[1] = FABSF(result[1]);
333 result[2] = FABSF(result[2]);
334 result[3] = FABSF(result[3]);
335 }
336 if (source->Negate) {
337 ASSERT(source->Negate == NEGATE_XYZW);
338 result[0] = -result[0];
339 result[1] = -result[1];
340 result[2] = -result[2];
341 result[3] = -result[3];
342 }
343 }
344 else {
345 ASSIGN_4V(result, 0.0, 0.0, 0.0, 0.0);
346 }
347 }
348
349
350 /**
351 * As above, but only return result[0] element.
352 */
353 static void
354 fetch_vector1(const struct prog_src_register *source,
355 const struct gl_program_machine *machine, GLfloat result[4])
356 {
357 const GLfloat *src = get_src_register_pointer(source, machine);
358 ASSERT(src);
359
360 result[0] = src[GET_SWZ(source->Swizzle, 0)];
361
362 if (source->Abs) {
363 result[0] = FABSF(result[0]);
364 }
365 if (source->Negate) {
366 result[0] = -result[0];
367 }
368 }
369
370
371 static GLuint
372 fetch_vector1ui(const struct prog_src_register *source,
373 const struct gl_program_machine *machine)
374 {
375 const GLuint *src = (GLuint *) get_src_register_pointer(source, machine);
376 return src[GET_SWZ(source->Swizzle, 0)];
377 }
378
379
380 /**
381 * Fetch texel from texture. Use partial derivatives when possible.
382 */
383 static INLINE void
384 fetch_texel(GLcontext *ctx,
385 const struct gl_program_machine *machine,
386 const struct prog_instruction *inst,
387 const GLfloat texcoord[4], GLfloat lodBias,
388 GLfloat color[4])
389 {
390 const GLuint unit = machine->Samplers[inst->TexSrcUnit];
391
392 /* Note: we only have the right derivatives for fragment input attribs.
393 */
394 if (machine->NumDeriv > 0 &&
395 inst->SrcReg[0].File == PROGRAM_INPUT &&
396 inst->SrcReg[0].Index == FRAG_ATTRIB_TEX0 + inst->TexSrcUnit) {
397 /* simple texture fetch for which we should have derivatives */
398 GLuint attr = inst->SrcReg[0].Index;
399 machine->FetchTexelDeriv(ctx, texcoord,
400 machine->DerivX[attr],
401 machine->DerivY[attr],
402 lodBias, unit, color);
403 }
404 else {
405 machine->FetchTexelLod(ctx, texcoord, lodBias, unit, color);
406 }
407 }
408
409
410 /**
411 * Test value against zero and return GT, LT, EQ or UN if NaN.
412 */
413 static INLINE GLuint
414 generate_cc(float value)
415 {
416 if (value != value)
417 return COND_UN; /* NaN */
418 if (value > 0.0F)
419 return COND_GT;
420 if (value < 0.0F)
421 return COND_LT;
422 return COND_EQ;
423 }
424
425
426 /**
427 * Test if the ccMaskRule is satisfied by the given condition code.
428 * Used to mask destination writes according to the current condition code.
429 */
430 static INLINE GLboolean
431 test_cc(GLuint condCode, GLuint ccMaskRule)
432 {
433 switch (ccMaskRule) {
434 case COND_EQ: return (condCode == COND_EQ);
435 case COND_NE: return (condCode != COND_EQ);
436 case COND_LT: return (condCode == COND_LT);
437 case COND_GE: return (condCode == COND_GT || condCode == COND_EQ);
438 case COND_LE: return (condCode == COND_LT || condCode == COND_EQ);
439 case COND_GT: return (condCode == COND_GT);
440 case COND_TR: return GL_TRUE;
441 case COND_FL: return GL_FALSE;
442 default: return GL_TRUE;
443 }
444 }
445
446
447 /**
448 * Evaluate the 4 condition codes against a predicate and return GL_TRUE
449 * or GL_FALSE to indicate result.
450 */
451 static INLINE GLboolean
452 eval_condition(const struct gl_program_machine *machine,
453 const struct prog_instruction *inst)
454 {
455 const GLuint swizzle = inst->DstReg.CondSwizzle;
456 const GLuint condMask = inst->DstReg.CondMask;
457 if (test_cc(machine->CondCodes[GET_SWZ(swizzle, 0)], condMask) ||
458 test_cc(machine->CondCodes[GET_SWZ(swizzle, 1)], condMask) ||
459 test_cc(machine->CondCodes[GET_SWZ(swizzle, 2)], condMask) ||
460 test_cc(machine->CondCodes[GET_SWZ(swizzle, 3)], condMask)) {
461 return GL_TRUE;
462 }
463 else {
464 return GL_FALSE;
465 }
466 }
467
468
469
470 /**
471 * Store 4 floats into a register. Observe the instructions saturate and
472 * set-condition-code flags.
473 */
474 static void
475 store_vector4(const struct prog_instruction *inst,
476 struct gl_program_machine *machine, const GLfloat value[4])
477 {
478 const struct prog_dst_register *dstReg = &(inst->DstReg);
479 const GLboolean clamp = inst->SaturateMode == SATURATE_ZERO_ONE;
480 GLuint writeMask = dstReg->WriteMask;
481 GLfloat clampedValue[4];
482 GLfloat *dst = get_dst_register_pointer(dstReg, machine);
483
484 #if 0
485 if (value[0] > 1.0e10 ||
486 IS_INF_OR_NAN(value[0]) ||
487 IS_INF_OR_NAN(value[1]) ||
488 IS_INF_OR_NAN(value[2]) || IS_INF_OR_NAN(value[3]))
489 printf("store %g %g %g %g\n", value[0], value[1], value[2], value[3]);
490 #endif
491
492 if (clamp) {
493 clampedValue[0] = CLAMP(value[0], 0.0F, 1.0F);
494 clampedValue[1] = CLAMP(value[1], 0.0F, 1.0F);
495 clampedValue[2] = CLAMP(value[2], 0.0F, 1.0F);
496 clampedValue[3] = CLAMP(value[3], 0.0F, 1.0F);
497 value = clampedValue;
498 }
499
500 if (dstReg->CondMask != COND_TR) {
501 /* condition codes may turn off some writes */
502 if (writeMask & WRITEMASK_X) {
503 if (!test_cc(machine->CondCodes[GET_SWZ(dstReg->CondSwizzle, 0)],
504 dstReg->CondMask))
505 writeMask &= ~WRITEMASK_X;
506 }
507 if (writeMask & WRITEMASK_Y) {
508 if (!test_cc(machine->CondCodes[GET_SWZ(dstReg->CondSwizzle, 1)],
509 dstReg->CondMask))
510 writeMask &= ~WRITEMASK_Y;
511 }
512 if (writeMask & WRITEMASK_Z) {
513 if (!test_cc(machine->CondCodes[GET_SWZ(dstReg->CondSwizzle, 2)],
514 dstReg->CondMask))
515 writeMask &= ~WRITEMASK_Z;
516 }
517 if (writeMask & WRITEMASK_W) {
518 if (!test_cc(machine->CondCodes[GET_SWZ(dstReg->CondSwizzle, 3)],
519 dstReg->CondMask))
520 writeMask &= ~WRITEMASK_W;
521 }
522 }
523
524 #ifdef NAN_CHECK
525 assert(!IS_INF_OR_NAN(value[0]));
526 assert(!IS_INF_OR_NAN(value[0]));
527 assert(!IS_INF_OR_NAN(value[0]));
528 assert(!IS_INF_OR_NAN(value[0]));
529 #endif
530
531 if (writeMask & WRITEMASK_X)
532 dst[0] = value[0];
533 if (writeMask & WRITEMASK_Y)
534 dst[1] = value[1];
535 if (writeMask & WRITEMASK_Z)
536 dst[2] = value[2];
537 if (writeMask & WRITEMASK_W)
538 dst[3] = value[3];
539
540 if (inst->CondUpdate) {
541 if (writeMask & WRITEMASK_X)
542 machine->CondCodes[0] = generate_cc(value[0]);
543 if (writeMask & WRITEMASK_Y)
544 machine->CondCodes[1] = generate_cc(value[1]);
545 if (writeMask & WRITEMASK_Z)
546 machine->CondCodes[2] = generate_cc(value[2]);
547 if (writeMask & WRITEMASK_W)
548 machine->CondCodes[3] = generate_cc(value[3]);
549 #if DEBUG_PROG
550 printf("CondCodes=(%s,%s,%s,%s) for:\n",
551 _mesa_condcode_string(machine->CondCodes[0]),
552 _mesa_condcode_string(machine->CondCodes[1]),
553 _mesa_condcode_string(machine->CondCodes[2]),
554 _mesa_condcode_string(machine->CondCodes[3]));
555 #endif
556 }
557 }
558
559
560 /**
561 * Store 4 uints into a register. Observe the set-condition-code flags.
562 */
563 static void
564 store_vector4ui(const struct prog_instruction *inst,
565 struct gl_program_machine *machine, const GLuint value[4])
566 {
567 const struct prog_dst_register *dstReg = &(inst->DstReg);
568 GLuint writeMask = dstReg->WriteMask;
569 GLuint *dst = (GLuint *) get_dst_register_pointer(dstReg, machine);
570
571 if (dstReg->CondMask != COND_TR) {
572 /* condition codes may turn off some writes */
573 if (writeMask & WRITEMASK_X) {
574 if (!test_cc(machine->CondCodes[GET_SWZ(dstReg->CondSwizzle, 0)],
575 dstReg->CondMask))
576 writeMask &= ~WRITEMASK_X;
577 }
578 if (writeMask & WRITEMASK_Y) {
579 if (!test_cc(machine->CondCodes[GET_SWZ(dstReg->CondSwizzle, 1)],
580 dstReg->CondMask))
581 writeMask &= ~WRITEMASK_Y;
582 }
583 if (writeMask & WRITEMASK_Z) {
584 if (!test_cc(machine->CondCodes[GET_SWZ(dstReg->CondSwizzle, 2)],
585 dstReg->CondMask))
586 writeMask &= ~WRITEMASK_Z;
587 }
588 if (writeMask & WRITEMASK_W) {
589 if (!test_cc(machine->CondCodes[GET_SWZ(dstReg->CondSwizzle, 3)],
590 dstReg->CondMask))
591 writeMask &= ~WRITEMASK_W;
592 }
593 }
594
595 if (writeMask & WRITEMASK_X)
596 dst[0] = value[0];
597 if (writeMask & WRITEMASK_Y)
598 dst[1] = value[1];
599 if (writeMask & WRITEMASK_Z)
600 dst[2] = value[2];
601 if (writeMask & WRITEMASK_W)
602 dst[3] = value[3];
603
604 if (inst->CondUpdate) {
605 if (writeMask & WRITEMASK_X)
606 machine->CondCodes[0] = generate_cc((float)value[0]);
607 if (writeMask & WRITEMASK_Y)
608 machine->CondCodes[1] = generate_cc((float)value[1]);
609 if (writeMask & WRITEMASK_Z)
610 machine->CondCodes[2] = generate_cc((float)value[2]);
611 if (writeMask & WRITEMASK_W)
612 machine->CondCodes[3] = generate_cc((float)value[3]);
613 #if DEBUG_PROG
614 printf("CondCodes=(%s,%s,%s,%s) for:\n",
615 _mesa_condcode_string(machine->CondCodes[0]),
616 _mesa_condcode_string(machine->CondCodes[1]),
617 _mesa_condcode_string(machine->CondCodes[2]),
618 _mesa_condcode_string(machine->CondCodes[3]));
619 #endif
620 }
621 }
622
623
624
625 /**
626 * Execute the given vertex/fragment program.
627 *
628 * \param ctx rendering context
629 * \param program the program to execute
630 * \param machine machine state (must be initialized)
631 * \return GL_TRUE if program completed or GL_FALSE if program executed KIL.
632 */
633 GLboolean
634 _mesa_execute_program(GLcontext * ctx,
635 const struct gl_program *program,
636 struct gl_program_machine *machine)
637 {
638 const GLuint numInst = program->NumInstructions;
639 const GLuint maxExec = 10000;
640 GLuint pc, numExec = 0;
641
642 machine->CurProgram = program;
643
644 if (DEBUG_PROG) {
645 printf("execute program %u --------------------\n", program->Id);
646 }
647
648 if (program->Target == GL_VERTEX_PROGRAM_ARB) {
649 machine->EnvParams = ctx->VertexProgram.Parameters;
650 }
651 else {
652 machine->EnvParams = ctx->FragmentProgram.Parameters;
653 }
654
655 for (pc = 0; pc < numInst; pc++) {
656 const struct prog_instruction *inst = program->Instructions + pc;
657
658 if (DEBUG_PROG) {
659 _mesa_print_instruction(inst);
660 }
661
662 switch (inst->Opcode) {
663 case OPCODE_ABS:
664 {
665 GLfloat a[4], result[4];
666 fetch_vector4(&inst->SrcReg[0], machine, a);
667 result[0] = FABSF(a[0]);
668 result[1] = FABSF(a[1]);
669 result[2] = FABSF(a[2]);
670 result[3] = FABSF(a[3]);
671 store_vector4(inst, machine, result);
672 }
673 break;
674 case OPCODE_ADD:
675 {
676 GLfloat a[4], b[4], result[4];
677 fetch_vector4(&inst->SrcReg[0], machine, a);
678 fetch_vector4(&inst->SrcReg[1], machine, b);
679 result[0] = a[0] + b[0];
680 result[1] = a[1] + b[1];
681 result[2] = a[2] + b[2];
682 result[3] = a[3] + b[3];
683 store_vector4(inst, machine, result);
684 if (DEBUG_PROG) {
685 printf("ADD (%g %g %g %g) = (%g %g %g %g) + (%g %g %g %g)\n",
686 result[0], result[1], result[2], result[3],
687 a[0], a[1], a[2], a[3], b[0], b[1], b[2], b[3]);
688 }
689 }
690 break;
691 case OPCODE_AND: /* bitwise AND */
692 {
693 GLuint a[4], b[4], result[4];
694 fetch_vector4ui(&inst->SrcReg[0], machine, a);
695 fetch_vector4ui(&inst->SrcReg[1], machine, b);
696 result[0] = a[0] & b[0];
697 result[1] = a[1] & b[1];
698 result[2] = a[2] & b[2];
699 result[3] = a[3] & b[3];
700 store_vector4ui(inst, machine, result);
701 }
702 break;
703 case OPCODE_ARL:
704 {
705 GLfloat t[4];
706 fetch_vector4(&inst->SrcReg[0], machine, t);
707 machine->AddressReg[0][0] = IFLOOR(t[0]);
708 if (DEBUG_PROG) {
709 printf("ARL %d\n", machine->AddressReg[0][0]);
710 }
711 }
712 break;
713 case OPCODE_BGNLOOP:
714 /* no-op */
715 ASSERT(program->Instructions[inst->BranchTarget].Opcode
716 == OPCODE_ENDLOOP);
717 break;
718 case OPCODE_ENDLOOP:
719 /* subtract 1 here since pc is incremented by for(pc) loop */
720 ASSERT(program->Instructions[inst->BranchTarget].Opcode
721 == OPCODE_BGNLOOP);
722 pc = inst->BranchTarget - 1; /* go to matching BNGLOOP */
723 break;
724 case OPCODE_BGNSUB: /* begin subroutine */
725 break;
726 case OPCODE_ENDSUB: /* end subroutine */
727 break;
728 case OPCODE_BRA: /* branch (conditional) */
729 if (eval_condition(machine, inst)) {
730 /* take branch */
731 /* Subtract 1 here since we'll do pc++ below */
732 pc = inst->BranchTarget - 1;
733 }
734 break;
735 case OPCODE_BRK: /* break out of loop (conditional) */
736 ASSERT(program->Instructions[inst->BranchTarget].Opcode
737 == OPCODE_ENDLOOP);
738 if (eval_condition(machine, inst)) {
739 /* break out of loop */
740 /* pc++ at end of for-loop will put us after the ENDLOOP inst */
741 pc = inst->BranchTarget;
742 }
743 break;
744 case OPCODE_CONT: /* continue loop (conditional) */
745 ASSERT(program->Instructions[inst->BranchTarget].Opcode
746 == OPCODE_ENDLOOP);
747 if (eval_condition(machine, inst)) {
748 /* continue at ENDLOOP */
749 /* Subtract 1 here since we'll do pc++ at end of for-loop */
750 pc = inst->BranchTarget - 1;
751 }
752 break;
753 case OPCODE_CAL: /* Call subroutine (conditional) */
754 if (eval_condition(machine, inst)) {
755 /* call the subroutine */
756 if (machine->StackDepth >= MAX_PROGRAM_CALL_DEPTH) {
757 return GL_TRUE; /* Per GL_NV_vertex_program2 spec */
758 }
759 machine->CallStack[machine->StackDepth++] = pc + 1; /* next inst */
760 /* Subtract 1 here since we'll do pc++ at end of for-loop */
761 pc = inst->BranchTarget - 1;
762 }
763 break;
764 case OPCODE_CMP:
765 {
766 GLfloat a[4], b[4], c[4], result[4];
767 fetch_vector4(&inst->SrcReg[0], machine, a);
768 fetch_vector4(&inst->SrcReg[1], machine, b);
769 fetch_vector4(&inst->SrcReg[2], machine, c);
770 result[0] = a[0] < 0.0F ? b[0] : c[0];
771 result[1] = a[1] < 0.0F ? b[1] : c[1];
772 result[2] = a[2] < 0.0F ? b[2] : c[2];
773 result[3] = a[3] < 0.0F ? b[3] : c[3];
774 store_vector4(inst, machine, result);
775 }
776 break;
777 case OPCODE_COS:
778 {
779 GLfloat a[4], result[4];
780 fetch_vector1(&inst->SrcReg[0], machine, a);
781 result[0] = result[1] = result[2] = result[3]
782 = (GLfloat) cos(a[0]);
783 store_vector4(inst, machine, result);
784 }
785 break;
786 case OPCODE_DDX: /* Partial derivative with respect to X */
787 {
788 GLfloat result[4];
789 fetch_vector4_deriv(ctx, &inst->SrcReg[0], machine,
790 'X', result);
791 store_vector4(inst, machine, result);
792 }
793 break;
794 case OPCODE_DDY: /* Partial derivative with respect to Y */
795 {
796 GLfloat result[4];
797 fetch_vector4_deriv(ctx, &inst->SrcReg[0], machine,
798 'Y', result);
799 store_vector4(inst, machine, result);
800 }
801 break;
802 case OPCODE_DP2:
803 {
804 GLfloat a[4], b[4], result[4];
805 fetch_vector4(&inst->SrcReg[0], machine, a);
806 fetch_vector4(&inst->SrcReg[1], machine, b);
807 result[0] = result[1] = result[2] = result[3] = DOT2(a, b);
808 store_vector4(inst, machine, result);
809 if (DEBUG_PROG) {
810 printf("DP2 %g = (%g %g) . (%g %g)\n",
811 result[0], a[0], a[1], b[0], b[1]);
812 }
813 }
814 break;
815 case OPCODE_DP2A:
816 {
817 GLfloat a[4], b[4], c, result[4];
818 fetch_vector4(&inst->SrcReg[0], machine, a);
819 fetch_vector4(&inst->SrcReg[1], machine, b);
820 fetch_vector1(&inst->SrcReg[1], machine, &c);
821 result[0] = result[1] = result[2] = result[3] = DOT2(a, b) + c;
822 store_vector4(inst, machine, result);
823 if (DEBUG_PROG) {
824 printf("DP2A %g = (%g %g) . (%g %g) + %g\n",
825 result[0], a[0], a[1], b[0], b[1], c);
826 }
827 }
828 break;
829 case OPCODE_DP3:
830 {
831 GLfloat a[4], b[4], result[4];
832 fetch_vector4(&inst->SrcReg[0], machine, a);
833 fetch_vector4(&inst->SrcReg[1], machine, b);
834 result[0] = result[1] = result[2] = result[3] = DOT3(a, b);
835 store_vector4(inst, machine, result);
836 if (DEBUG_PROG) {
837 printf("DP3 %g = (%g %g %g) . (%g %g %g)\n",
838 result[0], a[0], a[1], a[2], b[0], b[1], b[2]);
839 }
840 }
841 break;
842 case OPCODE_DP4:
843 {
844 GLfloat a[4], b[4], result[4];
845 fetch_vector4(&inst->SrcReg[0], machine, a);
846 fetch_vector4(&inst->SrcReg[1], machine, b);
847 result[0] = result[1] = result[2] = result[3] = DOT4(a, b);
848 store_vector4(inst, machine, result);
849 if (DEBUG_PROG) {
850 printf("DP4 %g = (%g, %g %g %g) . (%g, %g %g %g)\n",
851 result[0], a[0], a[1], a[2], a[3],
852 b[0], b[1], b[2], b[3]);
853 }
854 }
855 break;
856 case OPCODE_DPH:
857 {
858 GLfloat a[4], b[4], result[4];
859 fetch_vector4(&inst->SrcReg[0], machine, a);
860 fetch_vector4(&inst->SrcReg[1], machine, b);
861 result[0] = result[1] = result[2] = result[3] = DOT3(a, b) + b[3];
862 store_vector4(inst, machine, result);
863 }
864 break;
865 case OPCODE_DST: /* Distance vector */
866 {
867 GLfloat a[4], b[4], result[4];
868 fetch_vector4(&inst->SrcReg[0], machine, a);
869 fetch_vector4(&inst->SrcReg[1], machine, b);
870 result[0] = 1.0F;
871 result[1] = a[1] * b[1];
872 result[2] = a[2];
873 result[3] = b[3];
874 store_vector4(inst, machine, result);
875 }
876 break;
877 case OPCODE_EXP:
878 {
879 GLfloat t[4], q[4], floor_t0;
880 fetch_vector1(&inst->SrcReg[0], machine, t);
881 floor_t0 = FLOORF(t[0]);
882 if (floor_t0 > FLT_MAX_EXP) {
883 SET_POS_INFINITY(q[0]);
884 SET_POS_INFINITY(q[2]);
885 }
886 else if (floor_t0 < FLT_MIN_EXP) {
887 q[0] = 0.0F;
888 q[2] = 0.0F;
889 }
890 else {
891 q[0] = LDEXPF(1.0, (int) floor_t0);
892 /* Note: GL_NV_vertex_program expects
893 * result.z = result.x * APPX(result.y)
894 * We do what the ARB extension says.
895 */
896 q[2] = (GLfloat) pow(2.0, t[0]);
897 }
898 q[1] = t[0] - floor_t0;
899 q[3] = 1.0F;
900 store_vector4( inst, machine, q );
901 }
902 break;
903 case OPCODE_EX2: /* Exponential base 2 */
904 {
905 GLfloat a[4], result[4], val;
906 fetch_vector1(&inst->SrcReg[0], machine, a);
907 val = (GLfloat) pow(2.0, a[0]);
908 /*
909 if (IS_INF_OR_NAN(val))
910 val = 1.0e10;
911 */
912 result[0] = result[1] = result[2] = result[3] = val;
913 store_vector4(inst, machine, result);
914 }
915 break;
916 case OPCODE_FLR:
917 {
918 GLfloat a[4], result[4];
919 fetch_vector4(&inst->SrcReg[0], machine, a);
920 result[0] = FLOORF(a[0]);
921 result[1] = FLOORF(a[1]);
922 result[2] = FLOORF(a[2]);
923 result[3] = FLOORF(a[3]);
924 store_vector4(inst, machine, result);
925 }
926 break;
927 case OPCODE_FRC:
928 {
929 GLfloat a[4], result[4];
930 fetch_vector4(&inst->SrcReg[0], machine, a);
931 result[0] = a[0] - FLOORF(a[0]);
932 result[1] = a[1] - FLOORF(a[1]);
933 result[2] = a[2] - FLOORF(a[2]);
934 result[3] = a[3] - FLOORF(a[3]);
935 store_vector4(inst, machine, result);
936 }
937 break;
938 case OPCODE_IF:
939 {
940 GLboolean cond;
941 ASSERT(program->Instructions[inst->BranchTarget].Opcode
942 == OPCODE_ELSE ||
943 program->Instructions[inst->BranchTarget].Opcode
944 == OPCODE_ENDIF);
945 /* eval condition */
946 if (inst->SrcReg[0].File != PROGRAM_UNDEFINED) {
947 GLfloat a[4];
948 fetch_vector1(&inst->SrcReg[0], machine, a);
949 cond = (a[0] != 0.0);
950 }
951 else {
952 cond = eval_condition(machine, inst);
953 }
954 if (DEBUG_PROG) {
955 printf("IF: %d\n", cond);
956 }
957 /* do if/else */
958 if (cond) {
959 /* do if-clause (just continue execution) */
960 }
961 else {
962 /* go to the instruction after ELSE or ENDIF */
963 assert(inst->BranchTarget >= 0);
964 pc = inst->BranchTarget;
965 }
966 }
967 break;
968 case OPCODE_ELSE:
969 /* goto ENDIF */
970 ASSERT(program->Instructions[inst->BranchTarget].Opcode
971 == OPCODE_ENDIF);
972 assert(inst->BranchTarget >= 0);
973 pc = inst->BranchTarget;
974 break;
975 case OPCODE_ENDIF:
976 /* nothing */
977 break;
978 case OPCODE_KIL_NV: /* NV_f_p only (conditional) */
979 if (eval_condition(machine, inst)) {
980 return GL_FALSE;
981 }
982 break;
983 case OPCODE_KIL: /* ARB_f_p only */
984 {
985 GLfloat a[4];
986 fetch_vector4(&inst->SrcReg[0], machine, a);
987 if (DEBUG_PROG) {
988 printf("KIL if (%g %g %g %g) <= 0.0\n",
989 a[0], a[1], a[2], a[3]);
990 }
991
992 if (a[0] < 0.0F || a[1] < 0.0F || a[2] < 0.0F || a[3] < 0.0F) {
993 return GL_FALSE;
994 }
995 }
996 break;
997 case OPCODE_LG2: /* log base 2 */
998 {
999 GLfloat a[4], result[4], val;
1000 fetch_vector1(&inst->SrcReg[0], machine, a);
1001 /* The fast LOG2 macro doesn't meet the precision requirements.
1002 */
1003 if (a[0] == 0.0F) {
1004 val = -FLT_MAX;
1005 }
1006 else {
1007 val = (float)(log(a[0]) * 1.442695F);
1008 }
1009 result[0] = result[1] = result[2] = result[3] = val;
1010 store_vector4(inst, machine, result);
1011 }
1012 break;
1013 case OPCODE_LIT:
1014 {
1015 const GLfloat epsilon = 1.0F / 256.0F; /* from NV VP spec */
1016 GLfloat a[4], result[4];
1017 fetch_vector4(&inst->SrcReg[0], machine, a);
1018 a[0] = MAX2(a[0], 0.0F);
1019 a[1] = MAX2(a[1], 0.0F);
1020 /* XXX ARB version clamps a[3], NV version doesn't */
1021 a[3] = CLAMP(a[3], -(128.0F - epsilon), (128.0F - epsilon));
1022 result[0] = 1.0F;
1023 result[1] = a[0];
1024 /* XXX we could probably just use pow() here */
1025 if (a[0] > 0.0F) {
1026 if (a[1] == 0.0 && a[3] == 0.0)
1027 result[2] = 1.0F;
1028 else
1029 result[2] = (GLfloat) pow(a[1], a[3]);
1030 }
1031 else {
1032 result[2] = 0.0F;
1033 }
1034 result[3] = 1.0F;
1035 store_vector4(inst, machine, result);
1036 if (DEBUG_PROG) {
1037 printf("LIT (%g %g %g %g) : (%g %g %g %g)\n",
1038 result[0], result[1], result[2], result[3],
1039 a[0], a[1], a[2], a[3]);
1040 }
1041 }
1042 break;
1043 case OPCODE_LOG:
1044 {
1045 GLfloat t[4], q[4], abs_t0;
1046 fetch_vector1(&inst->SrcReg[0], machine, t);
1047 abs_t0 = FABSF(t[0]);
1048 if (abs_t0 != 0.0F) {
1049 /* Since we really can't handle infinite values on VMS
1050 * like other OSes we'll use __MAXFLOAT to represent
1051 * infinity. This may need some tweaking.
1052 */
1053 #ifdef VMS
1054 if (abs_t0 == __MAXFLOAT)
1055 #else
1056 if (IS_INF_OR_NAN(abs_t0))
1057 #endif
1058 {
1059 SET_POS_INFINITY(q[0]);
1060 q[1] = 1.0F;
1061 SET_POS_INFINITY(q[2]);
1062 }
1063 else {
1064 int exponent;
1065 GLfloat mantissa = FREXPF(t[0], &exponent);
1066 q[0] = (GLfloat) (exponent - 1);
1067 q[1] = (GLfloat) (2.0 * mantissa); /* map [.5, 1) -> [1, 2) */
1068
1069 /* The fast LOG2 macro doesn't meet the precision
1070 * requirements.
1071 */
1072 q[2] = (float)(log(t[0]) * 1.442695F);
1073 }
1074 }
1075 else {
1076 SET_NEG_INFINITY(q[0]);
1077 q[1] = 1.0F;
1078 SET_NEG_INFINITY(q[2]);
1079 }
1080 q[3] = 1.0;
1081 store_vector4(inst, machine, q);
1082 }
1083 break;
1084 case OPCODE_LRP:
1085 {
1086 GLfloat a[4], b[4], c[4], result[4];
1087 fetch_vector4(&inst->SrcReg[0], machine, a);
1088 fetch_vector4(&inst->SrcReg[1], machine, b);
1089 fetch_vector4(&inst->SrcReg[2], machine, c);
1090 result[0] = a[0] * b[0] + (1.0F - a[0]) * c[0];
1091 result[1] = a[1] * b[1] + (1.0F - a[1]) * c[1];
1092 result[2] = a[2] * b[2] + (1.0F - a[2]) * c[2];
1093 result[3] = a[3] * b[3] + (1.0F - a[3]) * c[3];
1094 store_vector4(inst, machine, result);
1095 if (DEBUG_PROG) {
1096 printf("LRP (%g %g %g %g) = (%g %g %g %g), "
1097 "(%g %g %g %g), (%g %g %g %g)\n",
1098 result[0], result[1], result[2], result[3],
1099 a[0], a[1], a[2], a[3],
1100 b[0], b[1], b[2], b[3], c[0], c[1], c[2], c[3]);
1101 }
1102 }
1103 break;
1104 case OPCODE_MAD:
1105 {
1106 GLfloat a[4], b[4], c[4], result[4];
1107 fetch_vector4(&inst->SrcReg[0], machine, a);
1108 fetch_vector4(&inst->SrcReg[1], machine, b);
1109 fetch_vector4(&inst->SrcReg[2], machine, c);
1110 result[0] = a[0] * b[0] + c[0];
1111 result[1] = a[1] * b[1] + c[1];
1112 result[2] = a[2] * b[2] + c[2];
1113 result[3] = a[3] * b[3] + c[3];
1114 store_vector4(inst, machine, result);
1115 if (DEBUG_PROG) {
1116 printf("MAD (%g %g %g %g) = (%g %g %g %g) * "
1117 "(%g %g %g %g) + (%g %g %g %g)\n",
1118 result[0], result[1], result[2], result[3],
1119 a[0], a[1], a[2], a[3],
1120 b[0], b[1], b[2], b[3], c[0], c[1], c[2], c[3]);
1121 }
1122 }
1123 break;
1124 case OPCODE_MAX:
1125 {
1126 GLfloat a[4], b[4], result[4];
1127 fetch_vector4(&inst->SrcReg[0], machine, a);
1128 fetch_vector4(&inst->SrcReg[1], machine, b);
1129 result[0] = MAX2(a[0], b[0]);
1130 result[1] = MAX2(a[1], b[1]);
1131 result[2] = MAX2(a[2], b[2]);
1132 result[3] = MAX2(a[3], b[3]);
1133 store_vector4(inst, machine, result);
1134 if (DEBUG_PROG) {
1135 printf("MAX (%g %g %g %g) = (%g %g %g %g), (%g %g %g %g)\n",
1136 result[0], result[1], result[2], result[3],
1137 a[0], a[1], a[2], a[3], b[0], b[1], b[2], b[3]);
1138 }
1139 }
1140 break;
1141 case OPCODE_MIN:
1142 {
1143 GLfloat a[4], b[4], result[4];
1144 fetch_vector4(&inst->SrcReg[0], machine, a);
1145 fetch_vector4(&inst->SrcReg[1], machine, b);
1146 result[0] = MIN2(a[0], b[0]);
1147 result[1] = MIN2(a[1], b[1]);
1148 result[2] = MIN2(a[2], b[2]);
1149 result[3] = MIN2(a[3], b[3]);
1150 store_vector4(inst, machine, result);
1151 }
1152 break;
1153 case OPCODE_MOV:
1154 {
1155 GLfloat result[4];
1156 fetch_vector4(&inst->SrcReg[0], machine, result);
1157 store_vector4(inst, machine, result);
1158 if (DEBUG_PROG) {
1159 printf("MOV (%g %g %g %g)\n",
1160 result[0], result[1], result[2], result[3]);
1161 }
1162 }
1163 break;
1164 case OPCODE_MUL:
1165 {
1166 GLfloat a[4], b[4], result[4];
1167 fetch_vector4(&inst->SrcReg[0], machine, a);
1168 fetch_vector4(&inst->SrcReg[1], machine, b);
1169 result[0] = a[0] * b[0];
1170 result[1] = a[1] * b[1];
1171 result[2] = a[2] * b[2];
1172 result[3] = a[3] * b[3];
1173 store_vector4(inst, machine, result);
1174 if (DEBUG_PROG) {
1175 printf("MUL (%g %g %g %g) = (%g %g %g %g) * (%g %g %g %g)\n",
1176 result[0], result[1], result[2], result[3],
1177 a[0], a[1], a[2], a[3], b[0], b[1], b[2], b[3]);
1178 }
1179 }
1180 break;
1181 case OPCODE_NOISE1:
1182 {
1183 GLfloat a[4], result[4];
1184 fetch_vector1(&inst->SrcReg[0], machine, a);
1185 result[0] =
1186 result[1] =
1187 result[2] =
1188 result[3] = _mesa_noise1(a[0]);
1189 store_vector4(inst, machine, result);
1190 }
1191 break;
1192 case OPCODE_NOISE2:
1193 {
1194 GLfloat a[4], result[4];
1195 fetch_vector4(&inst->SrcReg[0], machine, a);
1196 result[0] =
1197 result[1] =
1198 result[2] = result[3] = _mesa_noise2(a[0], a[1]);
1199 store_vector4(inst, machine, result);
1200 }
1201 break;
1202 case OPCODE_NOISE3:
1203 {
1204 GLfloat a[4], result[4];
1205 fetch_vector4(&inst->SrcReg[0], machine, a);
1206 result[0] =
1207 result[1] =
1208 result[2] =
1209 result[3] = _mesa_noise3(a[0], a[1], a[2]);
1210 store_vector4(inst, machine, result);
1211 }
1212 break;
1213 case OPCODE_NOISE4:
1214 {
1215 GLfloat a[4], result[4];
1216 fetch_vector4(&inst->SrcReg[0], machine, a);
1217 result[0] =
1218 result[1] =
1219 result[2] =
1220 result[3] = _mesa_noise4(a[0], a[1], a[2], a[3]);
1221 store_vector4(inst, machine, result);
1222 }
1223 break;
1224 case OPCODE_NOP:
1225 break;
1226 case OPCODE_NOT: /* bitwise NOT */
1227 {
1228 GLuint a[4], result[4];
1229 fetch_vector4ui(&inst->SrcReg[0], machine, a);
1230 result[0] = ~a[0];
1231 result[1] = ~a[1];
1232 result[2] = ~a[2];
1233 result[3] = ~a[3];
1234 store_vector4ui(inst, machine, result);
1235 }
1236 break;
1237 case OPCODE_NRM3: /* 3-component normalization */
1238 {
1239 GLfloat a[4], result[4];
1240 GLfloat tmp;
1241 fetch_vector4(&inst->SrcReg[0], machine, a);
1242 tmp = a[0] * a[0] + a[1] * a[1] + a[2] * a[2];
1243 if (tmp != 0.0F)
1244 tmp = INV_SQRTF(tmp);
1245 result[0] = tmp * a[0];
1246 result[1] = tmp * a[1];
1247 result[2] = tmp * a[2];
1248 result[3] = 0.0; /* undefined, but prevent valgrind warnings */
1249 store_vector4(inst, machine, result);
1250 }
1251 break;
1252 case OPCODE_NRM4: /* 4-component normalization */
1253 {
1254 GLfloat a[4], result[4];
1255 GLfloat tmp;
1256 fetch_vector4(&inst->SrcReg[0], machine, a);
1257 tmp = a[0] * a[0] + a[1] * a[1] + a[2] * a[2] + a[3] * a[3];
1258 if (tmp != 0.0F)
1259 tmp = INV_SQRTF(tmp);
1260 result[0] = tmp * a[0];
1261 result[1] = tmp * a[1];
1262 result[2] = tmp * a[2];
1263 result[3] = tmp * a[3];
1264 store_vector4(inst, machine, result);
1265 }
1266 break;
1267 case OPCODE_OR: /* bitwise OR */
1268 {
1269 GLuint a[4], b[4], result[4];
1270 fetch_vector4ui(&inst->SrcReg[0], machine, a);
1271 fetch_vector4ui(&inst->SrcReg[1], machine, b);
1272 result[0] = a[0] | b[0];
1273 result[1] = a[1] | b[1];
1274 result[2] = a[2] | b[2];
1275 result[3] = a[3] | b[3];
1276 store_vector4ui(inst, machine, result);
1277 }
1278 break;
1279 case OPCODE_PK2H: /* pack two 16-bit floats in one 32-bit float */
1280 {
1281 GLfloat a[4];
1282 GLuint result[4];
1283 GLhalfNV hx, hy;
1284 fetch_vector4(&inst->SrcReg[0], machine, a);
1285 hx = _mesa_float_to_half(a[0]);
1286 hy = _mesa_float_to_half(a[1]);
1287 result[0] =
1288 result[1] =
1289 result[2] =
1290 result[3] = hx | (hy << 16);
1291 store_vector4ui(inst, machine, result);
1292 }
1293 break;
1294 case OPCODE_PK2US: /* pack two GLushorts into one 32-bit float */
1295 {
1296 GLfloat a[4];
1297 GLuint result[4], usx, usy;
1298 fetch_vector4(&inst->SrcReg[0], machine, a);
1299 a[0] = CLAMP(a[0], 0.0F, 1.0F);
1300 a[1] = CLAMP(a[1], 0.0F, 1.0F);
1301 usx = IROUND(a[0] * 65535.0F);
1302 usy = IROUND(a[1] * 65535.0F);
1303 result[0] =
1304 result[1] =
1305 result[2] =
1306 result[3] = usx | (usy << 16);
1307 store_vector4ui(inst, machine, result);
1308 }
1309 break;
1310 case OPCODE_PK4B: /* pack four GLbytes into one 32-bit float */
1311 {
1312 GLfloat a[4];
1313 GLuint result[4], ubx, uby, ubz, ubw;
1314 fetch_vector4(&inst->SrcReg[0], machine, a);
1315 a[0] = CLAMP(a[0], -128.0F / 127.0F, 1.0F);
1316 a[1] = CLAMP(a[1], -128.0F / 127.0F, 1.0F);
1317 a[2] = CLAMP(a[2], -128.0F / 127.0F, 1.0F);
1318 a[3] = CLAMP(a[3], -128.0F / 127.0F, 1.0F);
1319 ubx = IROUND(127.0F * a[0] + 128.0F);
1320 uby = IROUND(127.0F * a[1] + 128.0F);
1321 ubz = IROUND(127.0F * a[2] + 128.0F);
1322 ubw = IROUND(127.0F * a[3] + 128.0F);
1323 result[0] =
1324 result[1] =
1325 result[2] =
1326 result[3] = ubx | (uby << 8) | (ubz << 16) | (ubw << 24);
1327 store_vector4ui(inst, machine, result);
1328 }
1329 break;
1330 case OPCODE_PK4UB: /* pack four GLubytes into one 32-bit float */
1331 {
1332 GLfloat a[4];
1333 GLuint result[4], ubx, uby, ubz, ubw;
1334 fetch_vector4(&inst->SrcReg[0], machine, a);
1335 a[0] = CLAMP(a[0], 0.0F, 1.0F);
1336 a[1] = CLAMP(a[1], 0.0F, 1.0F);
1337 a[2] = CLAMP(a[2], 0.0F, 1.0F);
1338 a[3] = CLAMP(a[3], 0.0F, 1.0F);
1339 ubx = IROUND(255.0F * a[0]);
1340 uby = IROUND(255.0F * a[1]);
1341 ubz = IROUND(255.0F * a[2]);
1342 ubw = IROUND(255.0F * a[3]);
1343 result[0] =
1344 result[1] =
1345 result[2] =
1346 result[3] = ubx | (uby << 8) | (ubz << 16) | (ubw << 24);
1347 store_vector4ui(inst, machine, result);
1348 }
1349 break;
1350 case OPCODE_POW:
1351 {
1352 GLfloat a[4], b[4], result[4];
1353 fetch_vector1(&inst->SrcReg[0], machine, a);
1354 fetch_vector1(&inst->SrcReg[1], machine, b);
1355 result[0] = result[1] = result[2] = result[3]
1356 = (GLfloat) pow(a[0], b[0]);
1357 store_vector4(inst, machine, result);
1358 }
1359 break;
1360 case OPCODE_RCC: /* clamped riciprocal */
1361 {
1362 const float largest = 1.884467e+19, smallest = 5.42101e-20;
1363 GLfloat a[4], r, result[4];
1364 fetch_vector1(&inst->SrcReg[0], machine, a);
1365 if (DEBUG_PROG) {
1366 if (a[0] == 0)
1367 printf("RCC(0)\n");
1368 else if (IS_INF_OR_NAN(a[0]))
1369 printf("RCC(inf)\n");
1370 }
1371 if (a[0] == 1.0F) {
1372 r = 1.0F;
1373 }
1374 else {
1375 r = 1.0F / a[0];
1376 }
1377 if (positive(r)) {
1378 if (r > largest) {
1379 r = largest;
1380 }
1381 else if (r < smallest) {
1382 r = smallest;
1383 }
1384 }
1385 else {
1386 if (r < -largest) {
1387 r = -largest;
1388 }
1389 else if (r > -smallest) {
1390 r = -smallest;
1391 }
1392 }
1393 result[0] = result[1] = result[2] = result[3] = r;
1394 store_vector4(inst, machine, result);
1395 }
1396 break;
1397
1398 case OPCODE_RCP:
1399 {
1400 GLfloat a[4], result[4];
1401 fetch_vector1(&inst->SrcReg[0], machine, a);
1402 if (DEBUG_PROG) {
1403 if (a[0] == 0)
1404 printf("RCP(0)\n");
1405 else if (IS_INF_OR_NAN(a[0]))
1406 printf("RCP(inf)\n");
1407 }
1408 result[0] = result[1] = result[2] = result[3] = 1.0F / a[0];
1409 store_vector4(inst, machine, result);
1410 }
1411 break;
1412 case OPCODE_RET: /* return from subroutine (conditional) */
1413 if (eval_condition(machine, inst)) {
1414 if (machine->StackDepth == 0) {
1415 return GL_TRUE; /* Per GL_NV_vertex_program2 spec */
1416 }
1417 /* subtract one because of pc++ in the for loop */
1418 pc = machine->CallStack[--machine->StackDepth] - 1;
1419 }
1420 break;
1421 case OPCODE_RFL: /* reflection vector */
1422 {
1423 GLfloat axis[4], dir[4], result[4], tmpX, tmpW;
1424 fetch_vector4(&inst->SrcReg[0], machine, axis);
1425 fetch_vector4(&inst->SrcReg[1], machine, dir);
1426 tmpW = DOT3(axis, axis);
1427 tmpX = (2.0F * DOT3(axis, dir)) / tmpW;
1428 result[0] = tmpX * axis[0] - dir[0];
1429 result[1] = tmpX * axis[1] - dir[1];
1430 result[2] = tmpX * axis[2] - dir[2];
1431 /* result[3] is never written! XXX enforce in parser! */
1432 store_vector4(inst, machine, result);
1433 }
1434 break;
1435 case OPCODE_RSQ: /* 1 / sqrt() */
1436 {
1437 GLfloat a[4], result[4];
1438 fetch_vector1(&inst->SrcReg[0], machine, a);
1439 a[0] = FABSF(a[0]);
1440 result[0] = result[1] = result[2] = result[3] = INV_SQRTF(a[0]);
1441 store_vector4(inst, machine, result);
1442 if (DEBUG_PROG) {
1443 printf("RSQ %g = 1/sqrt(|%g|)\n", result[0], a[0]);
1444 }
1445 }
1446 break;
1447 case OPCODE_SCS: /* sine and cos */
1448 {
1449 GLfloat a[4], result[4];
1450 fetch_vector1(&inst->SrcReg[0], machine, a);
1451 result[0] = (GLfloat) cos(a[0]);
1452 result[1] = (GLfloat) sin(a[0]);
1453 result[2] = 0.0; /* undefined! */
1454 result[3] = 0.0; /* undefined! */
1455 store_vector4(inst, machine, result);
1456 }
1457 break;
1458 case OPCODE_SEQ: /* set on equal */
1459 {
1460 GLfloat a[4], b[4], result[4];
1461 fetch_vector4(&inst->SrcReg[0], machine, a);
1462 fetch_vector4(&inst->SrcReg[1], machine, b);
1463 result[0] = (a[0] == b[0]) ? 1.0F : 0.0F;
1464 result[1] = (a[1] == b[1]) ? 1.0F : 0.0F;
1465 result[2] = (a[2] == b[2]) ? 1.0F : 0.0F;
1466 result[3] = (a[3] == b[3]) ? 1.0F : 0.0F;
1467 store_vector4(inst, machine, result);
1468 if (DEBUG_PROG) {
1469 printf("SEQ (%g %g %g %g) = (%g %g %g %g) == (%g %g %g %g)\n",
1470 result[0], result[1], result[2], result[3],
1471 a[0], a[1], a[2], a[3],
1472 b[0], b[1], b[2], b[3]);
1473 }
1474 }
1475 break;
1476 case OPCODE_SFL: /* set false, operands ignored */
1477 {
1478 static const GLfloat result[4] = { 0.0F, 0.0F, 0.0F, 0.0F };
1479 store_vector4(inst, machine, result);
1480 }
1481 break;
1482 case OPCODE_SGE: /* set on greater or equal */
1483 {
1484 GLfloat a[4], b[4], result[4];
1485 fetch_vector4(&inst->SrcReg[0], machine, a);
1486 fetch_vector4(&inst->SrcReg[1], machine, b);
1487 result[0] = (a[0] >= b[0]) ? 1.0F : 0.0F;
1488 result[1] = (a[1] >= b[1]) ? 1.0F : 0.0F;
1489 result[2] = (a[2] >= b[2]) ? 1.0F : 0.0F;
1490 result[3] = (a[3] >= b[3]) ? 1.0F : 0.0F;
1491 store_vector4(inst, machine, result);
1492 if (DEBUG_PROG) {
1493 printf("SGE (%g %g %g %g) = (%g %g %g %g) >= (%g %g %g %g)\n",
1494 result[0], result[1], result[2], result[3],
1495 a[0], a[1], a[2], a[3],
1496 b[0], b[1], b[2], b[3]);
1497 }
1498 }
1499 break;
1500 case OPCODE_SGT: /* set on greater */
1501 {
1502 GLfloat a[4], b[4], result[4];
1503 fetch_vector4(&inst->SrcReg[0], machine, a);
1504 fetch_vector4(&inst->SrcReg[1], machine, b);
1505 result[0] = (a[0] > b[0]) ? 1.0F : 0.0F;
1506 result[1] = (a[1] > b[1]) ? 1.0F : 0.0F;
1507 result[2] = (a[2] > b[2]) ? 1.0F : 0.0F;
1508 result[3] = (a[3] > b[3]) ? 1.0F : 0.0F;
1509 store_vector4(inst, machine, result);
1510 if (DEBUG_PROG) {
1511 printf("SGT (%g %g %g %g) = (%g %g %g %g) > (%g %g %g %g)\n",
1512 result[0], result[1], result[2], result[3],
1513 a[0], a[1], a[2], a[3],
1514 b[0], b[1], b[2], b[3]);
1515 }
1516 }
1517 break;
1518 case OPCODE_SIN:
1519 {
1520 GLfloat a[4], result[4];
1521 fetch_vector1(&inst->SrcReg[0], machine, a);
1522 result[0] = result[1] = result[2] = result[3]
1523 = (GLfloat) sin(a[0]);
1524 store_vector4(inst, machine, result);
1525 }
1526 break;
1527 case OPCODE_SLE: /* set on less or equal */
1528 {
1529 GLfloat a[4], b[4], result[4];
1530 fetch_vector4(&inst->SrcReg[0], machine, a);
1531 fetch_vector4(&inst->SrcReg[1], machine, b);
1532 result[0] = (a[0] <= b[0]) ? 1.0F : 0.0F;
1533 result[1] = (a[1] <= b[1]) ? 1.0F : 0.0F;
1534 result[2] = (a[2] <= b[2]) ? 1.0F : 0.0F;
1535 result[3] = (a[3] <= b[3]) ? 1.0F : 0.0F;
1536 store_vector4(inst, machine, result);
1537 if (DEBUG_PROG) {
1538 printf("SLE (%g %g %g %g) = (%g %g %g %g) <= (%g %g %g %g)\n",
1539 result[0], result[1], result[2], result[3],
1540 a[0], a[1], a[2], a[3],
1541 b[0], b[1], b[2], b[3]);
1542 }
1543 }
1544 break;
1545 case OPCODE_SLT: /* set on less */
1546 {
1547 GLfloat a[4], b[4], result[4];
1548 fetch_vector4(&inst->SrcReg[0], machine, a);
1549 fetch_vector4(&inst->SrcReg[1], machine, b);
1550 result[0] = (a[0] < b[0]) ? 1.0F : 0.0F;
1551 result[1] = (a[1] < b[1]) ? 1.0F : 0.0F;
1552 result[2] = (a[2] < b[2]) ? 1.0F : 0.0F;
1553 result[3] = (a[3] < b[3]) ? 1.0F : 0.0F;
1554 store_vector4(inst, machine, result);
1555 if (DEBUG_PROG) {
1556 printf("SLT (%g %g %g %g) = (%g %g %g %g) < (%g %g %g %g)\n",
1557 result[0], result[1], result[2], result[3],
1558 a[0], a[1], a[2], a[3],
1559 b[0], b[1], b[2], b[3]);
1560 }
1561 }
1562 break;
1563 case OPCODE_SNE: /* set on not equal */
1564 {
1565 GLfloat a[4], b[4], result[4];
1566 fetch_vector4(&inst->SrcReg[0], machine, a);
1567 fetch_vector4(&inst->SrcReg[1], machine, b);
1568 result[0] = (a[0] != b[0]) ? 1.0F : 0.0F;
1569 result[1] = (a[1] != b[1]) ? 1.0F : 0.0F;
1570 result[2] = (a[2] != b[2]) ? 1.0F : 0.0F;
1571 result[3] = (a[3] != b[3]) ? 1.0F : 0.0F;
1572 store_vector4(inst, machine, result);
1573 if (DEBUG_PROG) {
1574 printf("SNE (%g %g %g %g) = (%g %g %g %g) != (%g %g %g %g)\n",
1575 result[0], result[1], result[2], result[3],
1576 a[0], a[1], a[2], a[3],
1577 b[0], b[1], b[2], b[3]);
1578 }
1579 }
1580 break;
1581 case OPCODE_SSG: /* set sign (-1, 0 or +1) */
1582 {
1583 GLfloat a[4], result[4];
1584 fetch_vector4(&inst->SrcReg[0], machine, a);
1585 result[0] = (GLfloat) ((a[0] > 0.0F) - (a[0] < 0.0F));
1586 result[1] = (GLfloat) ((a[1] > 0.0F) - (a[1] < 0.0F));
1587 result[2] = (GLfloat) ((a[2] > 0.0F) - (a[2] < 0.0F));
1588 result[3] = (GLfloat) ((a[3] > 0.0F) - (a[3] < 0.0F));
1589 store_vector4(inst, machine, result);
1590 }
1591 break;
1592 case OPCODE_STR: /* set true, operands ignored */
1593 {
1594 static const GLfloat result[4] = { 1.0F, 1.0F, 1.0F, 1.0F };
1595 store_vector4(inst, machine, result);
1596 }
1597 break;
1598 case OPCODE_SUB:
1599 {
1600 GLfloat a[4], b[4], result[4];
1601 fetch_vector4(&inst->SrcReg[0], machine, a);
1602 fetch_vector4(&inst->SrcReg[1], machine, b);
1603 result[0] = a[0] - b[0];
1604 result[1] = a[1] - b[1];
1605 result[2] = a[2] - b[2];
1606 result[3] = a[3] - b[3];
1607 store_vector4(inst, machine, result);
1608 if (DEBUG_PROG) {
1609 printf("SUB (%g %g %g %g) = (%g %g %g %g) - (%g %g %g %g)\n",
1610 result[0], result[1], result[2], result[3],
1611 a[0], a[1], a[2], a[3], b[0], b[1], b[2], b[3]);
1612 }
1613 }
1614 break;
1615 case OPCODE_SWZ: /* extended swizzle */
1616 {
1617 const struct prog_src_register *source = &inst->SrcReg[0];
1618 const GLfloat *src = get_src_register_pointer(source, machine);
1619 GLfloat result[4];
1620 GLuint i;
1621 for (i = 0; i < 4; i++) {
1622 const GLuint swz = GET_SWZ(source->Swizzle, i);
1623 if (swz == SWIZZLE_ZERO)
1624 result[i] = 0.0;
1625 else if (swz == SWIZZLE_ONE)
1626 result[i] = 1.0;
1627 else {
1628 ASSERT(swz >= 0);
1629 ASSERT(swz <= 3);
1630 result[i] = src[swz];
1631 }
1632 if (source->Negate & (1 << i))
1633 result[i] = -result[i];
1634 }
1635 store_vector4(inst, machine, result);
1636 }
1637 break;
1638 case OPCODE_TEX: /* Both ARB and NV frag prog */
1639 /* Simple texel lookup */
1640 {
1641 GLfloat texcoord[4], color[4];
1642 fetch_vector4(&inst->SrcReg[0], machine, texcoord);
1643
1644 fetch_texel(ctx, machine, inst, texcoord, 0.0, color);
1645
1646 if (DEBUG_PROG) {
1647 printf("TEX (%g, %g, %g, %g) = texture[%d][%g, %g, %g, %g]\n",
1648 color[0], color[1], color[2], color[3],
1649 inst->TexSrcUnit,
1650 texcoord[0], texcoord[1], texcoord[2], texcoord[3]);
1651 }
1652 store_vector4(inst, machine, color);
1653 }
1654 break;
1655 case OPCODE_TXB: /* GL_ARB_fragment_program only */
1656 /* Texel lookup with LOD bias */
1657 {
1658 GLfloat texcoord[4], color[4], lodBias;
1659
1660 fetch_vector4(&inst->SrcReg[0], machine, texcoord);
1661
1662 /* texcoord[3] is the bias to add to lambda */
1663 lodBias = texcoord[3];
1664
1665 fetch_texel(ctx, machine, inst, texcoord, lodBias, color);
1666
1667 store_vector4(inst, machine, color);
1668 }
1669 break;
1670 case OPCODE_TXD: /* GL_NV_fragment_program only */
1671 /* Texture lookup w/ partial derivatives for LOD */
1672 {
1673 GLfloat texcoord[4], dtdx[4], dtdy[4], color[4];
1674 fetch_vector4(&inst->SrcReg[0], machine, texcoord);
1675 fetch_vector4(&inst->SrcReg[1], machine, dtdx);
1676 fetch_vector4(&inst->SrcReg[2], machine, dtdy);
1677 machine->FetchTexelDeriv(ctx, texcoord, dtdx, dtdy,
1678 0.0, /* lodBias */
1679 inst->TexSrcUnit, color);
1680 store_vector4(inst, machine, color);
1681 }
1682 break;
1683 case OPCODE_TXP: /* GL_ARB_fragment_program only */
1684 /* Texture lookup w/ projective divide */
1685 {
1686 GLfloat texcoord[4], color[4];
1687
1688 fetch_vector4(&inst->SrcReg[0], machine, texcoord);
1689 /* Not so sure about this test - if texcoord[3] is
1690 * zero, we'd probably be fine except for an ASSERT in
1691 * IROUND_POS() which gets triggered by the inf values created.
1692 */
1693 if (texcoord[3] != 0.0) {
1694 texcoord[0] /= texcoord[3];
1695 texcoord[1] /= texcoord[3];
1696 texcoord[2] /= texcoord[3];
1697 }
1698
1699 fetch_texel(ctx, machine, inst, texcoord, 0.0, color);
1700
1701 store_vector4(inst, machine, color);
1702 }
1703 break;
1704 case OPCODE_TXP_NV: /* GL_NV_fragment_program only */
1705 /* Texture lookup w/ projective divide, as above, but do not
1706 * do the divide by w if sampling from a cube map.
1707 */
1708 {
1709 GLfloat texcoord[4], color[4];
1710
1711 fetch_vector4(&inst->SrcReg[0], machine, texcoord);
1712 if (inst->TexSrcTarget != TEXTURE_CUBE_INDEX &&
1713 texcoord[3] != 0.0) {
1714 texcoord[0] /= texcoord[3];
1715 texcoord[1] /= texcoord[3];
1716 texcoord[2] /= texcoord[3];
1717 }
1718
1719 fetch_texel(ctx, machine, inst, texcoord, 0.0, color);
1720
1721 store_vector4(inst, machine, color);
1722 }
1723 break;
1724 case OPCODE_TRUNC: /* truncate toward zero */
1725 {
1726 GLfloat a[4], result[4];
1727 fetch_vector4(&inst->SrcReg[0], machine, a);
1728 result[0] = (GLfloat) (GLint) a[0];
1729 result[1] = (GLfloat) (GLint) a[1];
1730 result[2] = (GLfloat) (GLint) a[2];
1731 result[3] = (GLfloat) (GLint) a[3];
1732 store_vector4(inst, machine, result);
1733 }
1734 break;
1735 case OPCODE_UP2H: /* unpack two 16-bit floats */
1736 {
1737 const GLuint raw = fetch_vector1ui(&inst->SrcReg[0], machine);
1738 GLfloat result[4];
1739 GLushort hx, hy;
1740 hx = raw & 0xffff;
1741 hy = raw >> 16;
1742 result[0] = result[2] = _mesa_half_to_float(hx);
1743 result[1] = result[3] = _mesa_half_to_float(hy);
1744 store_vector4(inst, machine, result);
1745 }
1746 break;
1747 case OPCODE_UP2US: /* unpack two GLushorts */
1748 {
1749 const GLuint raw = fetch_vector1ui(&inst->SrcReg[0], machine);
1750 GLfloat result[4];
1751 GLushort usx, usy;
1752 usx = raw & 0xffff;
1753 usy = raw >> 16;
1754 result[0] = result[2] = usx * (1.0f / 65535.0f);
1755 result[1] = result[3] = usy * (1.0f / 65535.0f);
1756 store_vector4(inst, machine, result);
1757 }
1758 break;
1759 case OPCODE_UP4B: /* unpack four GLbytes */
1760 {
1761 const GLuint raw = fetch_vector1ui(&inst->SrcReg[0], machine);
1762 GLfloat result[4];
1763 result[0] = (((raw >> 0) & 0xff) - 128) / 127.0F;
1764 result[1] = (((raw >> 8) & 0xff) - 128) / 127.0F;
1765 result[2] = (((raw >> 16) & 0xff) - 128) / 127.0F;
1766 result[3] = (((raw >> 24) & 0xff) - 128) / 127.0F;
1767 store_vector4(inst, machine, result);
1768 }
1769 break;
1770 case OPCODE_UP4UB: /* unpack four GLubytes */
1771 {
1772 const GLuint raw = fetch_vector1ui(&inst->SrcReg[0], machine);
1773 GLfloat result[4];
1774 result[0] = ((raw >> 0) & 0xff) / 255.0F;
1775 result[1] = ((raw >> 8) & 0xff) / 255.0F;
1776 result[2] = ((raw >> 16) & 0xff) / 255.0F;
1777 result[3] = ((raw >> 24) & 0xff) / 255.0F;
1778 store_vector4(inst, machine, result);
1779 }
1780 break;
1781 case OPCODE_XOR: /* bitwise XOR */
1782 {
1783 GLuint a[4], b[4], result[4];
1784 fetch_vector4ui(&inst->SrcReg[0], machine, a);
1785 fetch_vector4ui(&inst->SrcReg[1], machine, b);
1786 result[0] = a[0] ^ b[0];
1787 result[1] = a[1] ^ b[1];
1788 result[2] = a[2] ^ b[2];
1789 result[3] = a[3] ^ b[3];
1790 store_vector4ui(inst, machine, result);
1791 }
1792 break;
1793 case OPCODE_XPD: /* cross product */
1794 {
1795 GLfloat a[4], b[4], result[4];
1796 fetch_vector4(&inst->SrcReg[0], machine, a);
1797 fetch_vector4(&inst->SrcReg[1], machine, b);
1798 result[0] = a[1] * b[2] - a[2] * b[1];
1799 result[1] = a[2] * b[0] - a[0] * b[2];
1800 result[2] = a[0] * b[1] - a[1] * b[0];
1801 result[3] = 1.0;
1802 store_vector4(inst, machine, result);
1803 if (DEBUG_PROG) {
1804 printf("XPD (%g %g %g %g) = (%g %g %g) X (%g %g %g)\n",
1805 result[0], result[1], result[2], result[3],
1806 a[0], a[1], a[2], b[0], b[1], b[2]);
1807 }
1808 }
1809 break;
1810 case OPCODE_X2D: /* 2-D matrix transform */
1811 {
1812 GLfloat a[4], b[4], c[4], result[4];
1813 fetch_vector4(&inst->SrcReg[0], machine, a);
1814 fetch_vector4(&inst->SrcReg[1], machine, b);
1815 fetch_vector4(&inst->SrcReg[2], machine, c);
1816 result[0] = a[0] + b[0] * c[0] + b[1] * c[1];
1817 result[1] = a[1] + b[0] * c[2] + b[1] * c[3];
1818 result[2] = a[2] + b[0] * c[0] + b[1] * c[1];
1819 result[3] = a[3] + b[0] * c[2] + b[1] * c[3];
1820 store_vector4(inst, machine, result);
1821 }
1822 break;
1823 case OPCODE_PRINT:
1824 {
1825 if (inst->SrcReg[0].File != PROGRAM_UNDEFINED) {
1826 GLfloat a[4];
1827 fetch_vector4(&inst->SrcReg[0], machine, a);
1828 printf("%s%g, %g, %g, %g\n", (const char *) inst->Data,
1829 a[0], a[1], a[2], a[3]);
1830 }
1831 else {
1832 printf("%s\n", (const char *) inst->Data);
1833 }
1834 }
1835 break;
1836 case OPCODE_END:
1837 return GL_TRUE;
1838 default:
1839 _mesa_problem(ctx, "Bad opcode %d in _mesa_execute_program",
1840 inst->Opcode);
1841 return GL_TRUE; /* return value doesn't matter */
1842 }
1843
1844 numExec++;
1845 if (numExec > maxExec) {
1846 _mesa_problem(ctx, "Infinite loop detected in fragment program");
1847 return GL_TRUE;
1848 }
1849
1850 } /* for pc */
1851
1852 return GL_TRUE;
1853 }