650c40f2a4e5dcf0e722a39b325dd011cb17a98d
[mesa.git] / src / mesa / program / prog_execute.c
1 /*
2 * Mesa 3-D graphics library
3 *
4 * Copyright (C) 1999-2008 Brian Paul All Rights Reserved.
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a
7 * copy of this software and associated documentation files (the "Software"),
8 * to deal in the Software without restriction, including without limitation
9 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
10 * and/or sell copies of the Software, and to permit persons to whom the
11 * Software is furnished to do so, subject to the following conditions:
12 *
13 * The above copyright notice and this permission notice shall be included
14 * in all copies or substantial portions of the Software.
15 *
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
17 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
19 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
20 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
21 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
22 * OTHER DEALINGS IN THE SOFTWARE.
23 */
24
25 /**
26 * \file prog_execute.c
27 * Software interpreter for vertex/fragment programs.
28 * \author Brian Paul
29 */
30
31 /*
32 * NOTE: we do everything in single-precision floating point; we don't
33 * currently observe the single/half/fixed-precision qualifiers.
34 *
35 */
36
37
38 #include "main/glheader.h"
39 #include "main/colormac.h"
40 #include "main/macros.h"
41 #include "prog_execute.h"
42 #include "prog_instruction.h"
43 #include "prog_parameter.h"
44 #include "prog_print.h"
45 #include "prog_noise.h"
46
47
48 /* debug predicate */
49 #define DEBUG_PROG 0
50
51
52 /**
53 * Set x to positive or negative infinity.
54 */
55 #define SET_POS_INFINITY(x) \
56 do { \
57 fi_type fi; \
58 fi.i = 0x7F800000; \
59 x = fi.f; \
60 } while (0)
61 #define SET_NEG_INFINITY(x) \
62 do { \
63 fi_type fi; \
64 fi.i = 0xFF800000; \
65 x = fi.f; \
66 } while (0)
67
68 #define SET_FLOAT_BITS(x, bits) ((fi_type *) (void *) &(x))->i = bits
69
70
71 static const GLfloat ZeroVec[4] = { 0.0F, 0.0F, 0.0F, 0.0F };
72
73
74 /**
75 * Return a pointer to the 4-element float vector specified by the given
76 * source register.
77 */
78 static inline const GLfloat *
79 get_src_register_pointer(const struct prog_src_register *source,
80 const struct gl_program_machine *machine)
81 {
82 const struct gl_program *prog = machine->CurProgram;
83 GLint reg = source->Index;
84
85 if (source->RelAddr) {
86 /* add address register value to src index/offset */
87 reg += machine->AddressReg[0][0];
88 if (reg < 0) {
89 return ZeroVec;
90 }
91 }
92
93 switch (source->File) {
94 case PROGRAM_TEMPORARY:
95 if (reg >= MAX_PROGRAM_TEMPS)
96 return ZeroVec;
97 return machine->Temporaries[reg];
98
99 case PROGRAM_INPUT:
100 if (prog->Target == GL_VERTEX_PROGRAM_ARB) {
101 if (reg >= VERT_ATTRIB_MAX)
102 return ZeroVec;
103 return machine->VertAttribs[reg];
104 }
105 else {
106 if (reg >= VARYING_SLOT_MAX)
107 return ZeroVec;
108 return machine->Attribs[reg][machine->CurElement];
109 }
110
111 case PROGRAM_OUTPUT:
112 if (reg >= MAX_PROGRAM_OUTPUTS)
113 return ZeroVec;
114 return machine->Outputs[reg];
115
116 case PROGRAM_STATE_VAR:
117 /* Fallthrough */
118 case PROGRAM_CONSTANT:
119 /* Fallthrough */
120 case PROGRAM_UNIFORM:
121 if (reg >= (GLint) prog->Parameters->NumParameters)
122 return ZeroVec;
123 return (GLfloat *) prog->Parameters->ParameterValues[reg];
124
125 case PROGRAM_SYSTEM_VALUE:
126 assert(reg < Elements(machine->SystemValues));
127 return machine->SystemValues[reg];
128
129 default:
130 _mesa_problem(NULL,
131 "Invalid src register file %d in get_src_register_pointer()",
132 source->File);
133 return ZeroVec;
134 }
135 }
136
137
138 /**
139 * Return a pointer to the 4-element float vector specified by the given
140 * destination register.
141 */
142 static inline GLfloat *
143 get_dst_register_pointer(const struct prog_dst_register *dest,
144 struct gl_program_machine *machine)
145 {
146 static GLfloat dummyReg[4];
147 GLint reg = dest->Index;
148
149 if (dest->RelAddr) {
150 /* add address register value to src index/offset */
151 reg += machine->AddressReg[0][0];
152 if (reg < 0) {
153 return dummyReg;
154 }
155 }
156
157 switch (dest->File) {
158 case PROGRAM_TEMPORARY:
159 if (reg >= MAX_PROGRAM_TEMPS)
160 return dummyReg;
161 return machine->Temporaries[reg];
162
163 case PROGRAM_OUTPUT:
164 if (reg >= MAX_PROGRAM_OUTPUTS)
165 return dummyReg;
166 return machine->Outputs[reg];
167
168 default:
169 _mesa_problem(NULL,
170 "Invalid dest register file %d in get_dst_register_pointer()",
171 dest->File);
172 return dummyReg;
173 }
174 }
175
176
177
178 /**
179 * Fetch a 4-element float vector from the given source register.
180 * Apply swizzling and negating as needed.
181 */
182 static void
183 fetch_vector4(const struct prog_src_register *source,
184 const struct gl_program_machine *machine, GLfloat result[4])
185 {
186 const GLfloat *src = get_src_register_pointer(source, machine);
187
188 if (source->Swizzle == SWIZZLE_NOOP) {
189 /* no swizzling */
190 COPY_4V(result, src);
191 }
192 else {
193 ASSERT(GET_SWZ(source->Swizzle, 0) <= 3);
194 ASSERT(GET_SWZ(source->Swizzle, 1) <= 3);
195 ASSERT(GET_SWZ(source->Swizzle, 2) <= 3);
196 ASSERT(GET_SWZ(source->Swizzle, 3) <= 3);
197 result[0] = src[GET_SWZ(source->Swizzle, 0)];
198 result[1] = src[GET_SWZ(source->Swizzle, 1)];
199 result[2] = src[GET_SWZ(source->Swizzle, 2)];
200 result[3] = src[GET_SWZ(source->Swizzle, 3)];
201 }
202
203 if (source->Abs) {
204 result[0] = FABSF(result[0]);
205 result[1] = FABSF(result[1]);
206 result[2] = FABSF(result[2]);
207 result[3] = FABSF(result[3]);
208 }
209 if (source->Negate) {
210 ASSERT(source->Negate == NEGATE_XYZW);
211 result[0] = -result[0];
212 result[1] = -result[1];
213 result[2] = -result[2];
214 result[3] = -result[3];
215 }
216
217 #ifdef NAN_CHECK
218 assert(!IS_INF_OR_NAN(result[0]));
219 assert(!IS_INF_OR_NAN(result[0]));
220 assert(!IS_INF_OR_NAN(result[0]));
221 assert(!IS_INF_OR_NAN(result[0]));
222 #endif
223 }
224
225
226 /**
227 * Fetch the derivative with respect to X or Y for the given register.
228 * XXX this currently only works for fragment program input attribs.
229 */
230 static void
231 fetch_vector4_deriv(struct gl_context * ctx,
232 const struct prog_src_register *source,
233 const struct gl_program_machine *machine,
234 char xOrY, GLfloat result[4])
235 {
236 if (source->File == PROGRAM_INPUT &&
237 source->Index < (GLint) machine->NumDeriv) {
238 const GLint col = machine->CurElement;
239 const GLfloat w = machine->Attribs[VARYING_SLOT_POS][col][3];
240 const GLfloat invQ = 1.0f / w;
241 GLfloat deriv[4];
242
243 if (xOrY == 'X') {
244 deriv[0] = machine->DerivX[source->Index][0] * invQ;
245 deriv[1] = machine->DerivX[source->Index][1] * invQ;
246 deriv[2] = machine->DerivX[source->Index][2] * invQ;
247 deriv[3] = machine->DerivX[source->Index][3] * invQ;
248 }
249 else {
250 deriv[0] = machine->DerivY[source->Index][0] * invQ;
251 deriv[1] = machine->DerivY[source->Index][1] * invQ;
252 deriv[2] = machine->DerivY[source->Index][2] * invQ;
253 deriv[3] = machine->DerivY[source->Index][3] * invQ;
254 }
255
256 result[0] = deriv[GET_SWZ(source->Swizzle, 0)];
257 result[1] = deriv[GET_SWZ(source->Swizzle, 1)];
258 result[2] = deriv[GET_SWZ(source->Swizzle, 2)];
259 result[3] = deriv[GET_SWZ(source->Swizzle, 3)];
260
261 if (source->Abs) {
262 result[0] = FABSF(result[0]);
263 result[1] = FABSF(result[1]);
264 result[2] = FABSF(result[2]);
265 result[3] = FABSF(result[3]);
266 }
267 if (source->Negate) {
268 ASSERT(source->Negate == NEGATE_XYZW);
269 result[0] = -result[0];
270 result[1] = -result[1];
271 result[2] = -result[2];
272 result[3] = -result[3];
273 }
274 }
275 else {
276 ASSIGN_4V(result, 0.0, 0.0, 0.0, 0.0);
277 }
278 }
279
280
281 /**
282 * As above, but only return result[0] element.
283 */
284 static void
285 fetch_vector1(const struct prog_src_register *source,
286 const struct gl_program_machine *machine, GLfloat result[4])
287 {
288 const GLfloat *src = get_src_register_pointer(source, machine);
289
290 result[0] = src[GET_SWZ(source->Swizzle, 0)];
291
292 if (source->Abs) {
293 result[0] = FABSF(result[0]);
294 }
295 if (source->Negate) {
296 result[0] = -result[0];
297 }
298 }
299
300
301 static GLuint
302 fetch_vector1ui(const struct prog_src_register *source,
303 const struct gl_program_machine *machine)
304 {
305 const GLuint *src = (GLuint *) get_src_register_pointer(source, machine);
306 return src[GET_SWZ(source->Swizzle, 0)];
307 }
308
309
310 /**
311 * Fetch texel from texture. Use partial derivatives when possible.
312 */
313 static inline void
314 fetch_texel(struct gl_context *ctx,
315 const struct gl_program_machine *machine,
316 const struct prog_instruction *inst,
317 const GLfloat texcoord[4], GLfloat lodBias,
318 GLfloat color[4])
319 {
320 const GLuint unit = machine->Samplers[inst->TexSrcUnit];
321
322 /* Note: we only have the right derivatives for fragment input attribs.
323 */
324 if (machine->NumDeriv > 0 &&
325 inst->SrcReg[0].File == PROGRAM_INPUT &&
326 inst->SrcReg[0].Index == VARYING_SLOT_TEX0 + inst->TexSrcUnit) {
327 /* simple texture fetch for which we should have derivatives */
328 GLuint attr = inst->SrcReg[0].Index;
329 machine->FetchTexelDeriv(ctx, texcoord,
330 machine->DerivX[attr],
331 machine->DerivY[attr],
332 lodBias, unit, color);
333 }
334 else {
335 machine->FetchTexelLod(ctx, texcoord, lodBias, unit, color);
336 }
337 }
338
339
340 /**
341 * Test value against zero and return GT, LT, EQ or UN if NaN.
342 */
343 static inline GLuint
344 generate_cc(float value)
345 {
346 if (value != value)
347 return COND_UN; /* NaN */
348 if (value > 0.0F)
349 return COND_GT;
350 if (value < 0.0F)
351 return COND_LT;
352 return COND_EQ;
353 }
354
355
356 /**
357 * Test if the ccMaskRule is satisfied by the given condition code.
358 * Used to mask destination writes according to the current condition code.
359 */
360 static inline GLboolean
361 test_cc(GLuint condCode, GLuint ccMaskRule)
362 {
363 switch (ccMaskRule) {
364 case COND_EQ: return (condCode == COND_EQ);
365 case COND_NE: return (condCode != COND_EQ);
366 case COND_LT: return (condCode == COND_LT);
367 case COND_GE: return (condCode == COND_GT || condCode == COND_EQ);
368 case COND_LE: return (condCode == COND_LT || condCode == COND_EQ);
369 case COND_GT: return (condCode == COND_GT);
370 case COND_TR: return GL_TRUE;
371 case COND_FL: return GL_FALSE;
372 default: return GL_TRUE;
373 }
374 }
375
376
377 /**
378 * Evaluate the 4 condition codes against a predicate and return GL_TRUE
379 * or GL_FALSE to indicate result.
380 */
381 static inline GLboolean
382 eval_condition(const struct gl_program_machine *machine,
383 const struct prog_instruction *inst)
384 {
385 const GLuint swizzle = inst->DstReg.CondSwizzle;
386 const GLuint condMask = inst->DstReg.CondMask;
387 if (test_cc(machine->CondCodes[GET_SWZ(swizzle, 0)], condMask) ||
388 test_cc(machine->CondCodes[GET_SWZ(swizzle, 1)], condMask) ||
389 test_cc(machine->CondCodes[GET_SWZ(swizzle, 2)], condMask) ||
390 test_cc(machine->CondCodes[GET_SWZ(swizzle, 3)], condMask)) {
391 return GL_TRUE;
392 }
393 else {
394 return GL_FALSE;
395 }
396 }
397
398
399
400 /**
401 * Store 4 floats into a register. Observe the instructions saturate and
402 * set-condition-code flags.
403 */
404 static void
405 store_vector4(const struct prog_instruction *inst,
406 struct gl_program_machine *machine, const GLfloat value[4])
407 {
408 const struct prog_dst_register *dstReg = &(inst->DstReg);
409 const GLboolean clamp = inst->SaturateMode == SATURATE_ZERO_ONE;
410 GLuint writeMask = dstReg->WriteMask;
411 GLfloat clampedValue[4];
412 GLfloat *dst = get_dst_register_pointer(dstReg, machine);
413
414 #if 0
415 if (value[0] > 1.0e10 ||
416 IS_INF_OR_NAN(value[0]) ||
417 IS_INF_OR_NAN(value[1]) ||
418 IS_INF_OR_NAN(value[2]) || IS_INF_OR_NAN(value[3]))
419 printf("store %g %g %g %g\n", value[0], value[1], value[2], value[3]);
420 #endif
421
422 if (clamp) {
423 clampedValue[0] = CLAMP(value[0], 0.0F, 1.0F);
424 clampedValue[1] = CLAMP(value[1], 0.0F, 1.0F);
425 clampedValue[2] = CLAMP(value[2], 0.0F, 1.0F);
426 clampedValue[3] = CLAMP(value[3], 0.0F, 1.0F);
427 value = clampedValue;
428 }
429
430 if (dstReg->CondMask != COND_TR) {
431 /* condition codes may turn off some writes */
432 if (writeMask & WRITEMASK_X) {
433 if (!test_cc(machine->CondCodes[GET_SWZ(dstReg->CondSwizzle, 0)],
434 dstReg->CondMask))
435 writeMask &= ~WRITEMASK_X;
436 }
437 if (writeMask & WRITEMASK_Y) {
438 if (!test_cc(machine->CondCodes[GET_SWZ(dstReg->CondSwizzle, 1)],
439 dstReg->CondMask))
440 writeMask &= ~WRITEMASK_Y;
441 }
442 if (writeMask & WRITEMASK_Z) {
443 if (!test_cc(machine->CondCodes[GET_SWZ(dstReg->CondSwizzle, 2)],
444 dstReg->CondMask))
445 writeMask &= ~WRITEMASK_Z;
446 }
447 if (writeMask & WRITEMASK_W) {
448 if (!test_cc(machine->CondCodes[GET_SWZ(dstReg->CondSwizzle, 3)],
449 dstReg->CondMask))
450 writeMask &= ~WRITEMASK_W;
451 }
452 }
453
454 #ifdef NAN_CHECK
455 assert(!IS_INF_OR_NAN(value[0]));
456 assert(!IS_INF_OR_NAN(value[0]));
457 assert(!IS_INF_OR_NAN(value[0]));
458 assert(!IS_INF_OR_NAN(value[0]));
459 #endif
460
461 if (writeMask & WRITEMASK_X)
462 dst[0] = value[0];
463 if (writeMask & WRITEMASK_Y)
464 dst[1] = value[1];
465 if (writeMask & WRITEMASK_Z)
466 dst[2] = value[2];
467 if (writeMask & WRITEMASK_W)
468 dst[3] = value[3];
469
470 if (inst->CondUpdate) {
471 if (writeMask & WRITEMASK_X)
472 machine->CondCodes[0] = generate_cc(value[0]);
473 if (writeMask & WRITEMASK_Y)
474 machine->CondCodes[1] = generate_cc(value[1]);
475 if (writeMask & WRITEMASK_Z)
476 machine->CondCodes[2] = generate_cc(value[2]);
477 if (writeMask & WRITEMASK_W)
478 machine->CondCodes[3] = generate_cc(value[3]);
479 #if DEBUG_PROG
480 printf("CondCodes=(%s,%s,%s,%s) for:\n",
481 _mesa_condcode_string(machine->CondCodes[0]),
482 _mesa_condcode_string(machine->CondCodes[1]),
483 _mesa_condcode_string(machine->CondCodes[2]),
484 _mesa_condcode_string(machine->CondCodes[3]));
485 #endif
486 }
487 }
488
489
490 /**
491 * Store 4 uints into a register. Observe the set-condition-code flags.
492 */
493 static void
494 store_vector4ui(const struct prog_instruction *inst,
495 struct gl_program_machine *machine, const GLuint value[4])
496 {
497 const struct prog_dst_register *dstReg = &(inst->DstReg);
498 GLuint writeMask = dstReg->WriteMask;
499 GLuint *dst = (GLuint *) get_dst_register_pointer(dstReg, machine);
500
501 if (dstReg->CondMask != COND_TR) {
502 /* condition codes may turn off some writes */
503 if (writeMask & WRITEMASK_X) {
504 if (!test_cc(machine->CondCodes[GET_SWZ(dstReg->CondSwizzle, 0)],
505 dstReg->CondMask))
506 writeMask &= ~WRITEMASK_X;
507 }
508 if (writeMask & WRITEMASK_Y) {
509 if (!test_cc(machine->CondCodes[GET_SWZ(dstReg->CondSwizzle, 1)],
510 dstReg->CondMask))
511 writeMask &= ~WRITEMASK_Y;
512 }
513 if (writeMask & WRITEMASK_Z) {
514 if (!test_cc(machine->CondCodes[GET_SWZ(dstReg->CondSwizzle, 2)],
515 dstReg->CondMask))
516 writeMask &= ~WRITEMASK_Z;
517 }
518 if (writeMask & WRITEMASK_W) {
519 if (!test_cc(machine->CondCodes[GET_SWZ(dstReg->CondSwizzle, 3)],
520 dstReg->CondMask))
521 writeMask &= ~WRITEMASK_W;
522 }
523 }
524
525 if (writeMask & WRITEMASK_X)
526 dst[0] = value[0];
527 if (writeMask & WRITEMASK_Y)
528 dst[1] = value[1];
529 if (writeMask & WRITEMASK_Z)
530 dst[2] = value[2];
531 if (writeMask & WRITEMASK_W)
532 dst[3] = value[3];
533
534 if (inst->CondUpdate) {
535 if (writeMask & WRITEMASK_X)
536 machine->CondCodes[0] = generate_cc((float)value[0]);
537 if (writeMask & WRITEMASK_Y)
538 machine->CondCodes[1] = generate_cc((float)value[1]);
539 if (writeMask & WRITEMASK_Z)
540 machine->CondCodes[2] = generate_cc((float)value[2]);
541 if (writeMask & WRITEMASK_W)
542 machine->CondCodes[3] = generate_cc((float)value[3]);
543 #if DEBUG_PROG
544 printf("CondCodes=(%s,%s,%s,%s) for:\n",
545 _mesa_condcode_string(machine->CondCodes[0]),
546 _mesa_condcode_string(machine->CondCodes[1]),
547 _mesa_condcode_string(machine->CondCodes[2]),
548 _mesa_condcode_string(machine->CondCodes[3]));
549 #endif
550 }
551 }
552
553
554
555 /**
556 * Execute the given vertex/fragment program.
557 *
558 * \param ctx rendering context
559 * \param program the program to execute
560 * \param machine machine state (must be initialized)
561 * \return GL_TRUE if program completed or GL_FALSE if program executed KIL.
562 */
563 GLboolean
564 _mesa_execute_program(struct gl_context * ctx,
565 const struct gl_program *program,
566 struct gl_program_machine *machine)
567 {
568 const GLuint numInst = program->NumInstructions;
569 const GLuint maxExec = 65536;
570 GLuint pc, numExec = 0;
571
572 machine->CurProgram = program;
573
574 if (DEBUG_PROG) {
575 printf("execute program %u --------------------\n", program->Id);
576 }
577
578 if (program->Target == GL_VERTEX_PROGRAM_ARB) {
579 machine->EnvParams = ctx->VertexProgram.Parameters;
580 }
581 else {
582 machine->EnvParams = ctx->FragmentProgram.Parameters;
583 }
584
585 for (pc = 0; pc < numInst; pc++) {
586 const struct prog_instruction *inst = program->Instructions + pc;
587
588 if (DEBUG_PROG) {
589 _mesa_print_instruction(inst);
590 }
591
592 switch (inst->Opcode) {
593 case OPCODE_ABS:
594 {
595 GLfloat a[4], result[4];
596 fetch_vector4(&inst->SrcReg[0], machine, a);
597 result[0] = FABSF(a[0]);
598 result[1] = FABSF(a[1]);
599 result[2] = FABSF(a[2]);
600 result[3] = FABSF(a[3]);
601 store_vector4(inst, machine, result);
602 }
603 break;
604 case OPCODE_ADD:
605 {
606 GLfloat a[4], b[4], result[4];
607 fetch_vector4(&inst->SrcReg[0], machine, a);
608 fetch_vector4(&inst->SrcReg[1], machine, b);
609 result[0] = a[0] + b[0];
610 result[1] = a[1] + b[1];
611 result[2] = a[2] + b[2];
612 result[3] = a[3] + b[3];
613 store_vector4(inst, machine, result);
614 if (DEBUG_PROG) {
615 printf("ADD (%g %g %g %g) = (%g %g %g %g) + (%g %g %g %g)\n",
616 result[0], result[1], result[2], result[3],
617 a[0], a[1], a[2], a[3], b[0], b[1], b[2], b[3]);
618 }
619 }
620 break;
621 case OPCODE_ARL:
622 {
623 GLfloat t[4];
624 fetch_vector4(&inst->SrcReg[0], machine, t);
625 machine->AddressReg[0][0] = IFLOOR(t[0]);
626 if (DEBUG_PROG) {
627 printf("ARL %d\n", machine->AddressReg[0][0]);
628 }
629 }
630 break;
631 case OPCODE_BGNLOOP:
632 /* no-op */
633 ASSERT(program->Instructions[inst->BranchTarget].Opcode
634 == OPCODE_ENDLOOP);
635 break;
636 case OPCODE_ENDLOOP:
637 /* subtract 1 here since pc is incremented by for(pc) loop */
638 ASSERT(program->Instructions[inst->BranchTarget].Opcode
639 == OPCODE_BGNLOOP);
640 pc = inst->BranchTarget - 1; /* go to matching BNGLOOP */
641 break;
642 case OPCODE_BGNSUB: /* begin subroutine */
643 break;
644 case OPCODE_ENDSUB: /* end subroutine */
645 break;
646 case OPCODE_BRK: /* break out of loop (conditional) */
647 ASSERT(program->Instructions[inst->BranchTarget].Opcode
648 == OPCODE_ENDLOOP);
649 if (eval_condition(machine, inst)) {
650 /* break out of loop */
651 /* pc++ at end of for-loop will put us after the ENDLOOP inst */
652 pc = inst->BranchTarget;
653 }
654 break;
655 case OPCODE_CONT: /* continue loop (conditional) */
656 ASSERT(program->Instructions[inst->BranchTarget].Opcode
657 == OPCODE_ENDLOOP);
658 if (eval_condition(machine, inst)) {
659 /* continue at ENDLOOP */
660 /* Subtract 1 here since we'll do pc++ at end of for-loop */
661 pc = inst->BranchTarget - 1;
662 }
663 break;
664 case OPCODE_CAL: /* Call subroutine (conditional) */
665 if (eval_condition(machine, inst)) {
666 /* call the subroutine */
667 if (machine->StackDepth >= MAX_PROGRAM_CALL_DEPTH) {
668 return GL_TRUE; /* Per GL_NV_vertex_program2 spec */
669 }
670 machine->CallStack[machine->StackDepth++] = pc + 1; /* next inst */
671 /* Subtract 1 here since we'll do pc++ at end of for-loop */
672 pc = inst->BranchTarget - 1;
673 }
674 break;
675 case OPCODE_CMP:
676 {
677 GLfloat a[4], b[4], c[4], result[4];
678 fetch_vector4(&inst->SrcReg[0], machine, a);
679 fetch_vector4(&inst->SrcReg[1], machine, b);
680 fetch_vector4(&inst->SrcReg[2], machine, c);
681 result[0] = a[0] < 0.0F ? b[0] : c[0];
682 result[1] = a[1] < 0.0F ? b[1] : c[1];
683 result[2] = a[2] < 0.0F ? b[2] : c[2];
684 result[3] = a[3] < 0.0F ? b[3] : c[3];
685 store_vector4(inst, machine, result);
686 if (DEBUG_PROG) {
687 printf("CMP (%g %g %g %g) = (%g %g %g %g) < 0 ? (%g %g %g %g) : (%g %g %g %g)\n",
688 result[0], result[1], result[2], result[3],
689 a[0], a[1], a[2], a[3],
690 b[0], b[1], b[2], b[3],
691 c[0], c[1], c[2], c[3]);
692 }
693 }
694 break;
695 case OPCODE_COS:
696 {
697 GLfloat a[4], result[4];
698 fetch_vector1(&inst->SrcReg[0], machine, a);
699 result[0] = result[1] = result[2] = result[3]
700 = (GLfloat) cos(a[0]);
701 store_vector4(inst, machine, result);
702 }
703 break;
704 case OPCODE_DDX: /* Partial derivative with respect to X */
705 {
706 GLfloat result[4];
707 fetch_vector4_deriv(ctx, &inst->SrcReg[0], machine,
708 'X', result);
709 store_vector4(inst, machine, result);
710 }
711 break;
712 case OPCODE_DDY: /* Partial derivative with respect to Y */
713 {
714 GLfloat result[4];
715 fetch_vector4_deriv(ctx, &inst->SrcReg[0], machine,
716 'Y', result);
717 store_vector4(inst, machine, result);
718 }
719 break;
720 case OPCODE_DP2:
721 {
722 GLfloat a[4], b[4], result[4];
723 fetch_vector4(&inst->SrcReg[0], machine, a);
724 fetch_vector4(&inst->SrcReg[1], machine, b);
725 result[0] = result[1] = result[2] = result[3] = DOT2(a, b);
726 store_vector4(inst, machine, result);
727 if (DEBUG_PROG) {
728 printf("DP2 %g = (%g %g) . (%g %g)\n",
729 result[0], a[0], a[1], b[0], b[1]);
730 }
731 }
732 break;
733 case OPCODE_DP3:
734 {
735 GLfloat a[4], b[4], result[4];
736 fetch_vector4(&inst->SrcReg[0], machine, a);
737 fetch_vector4(&inst->SrcReg[1], machine, b);
738 result[0] = result[1] = result[2] = result[3] = DOT3(a, b);
739 store_vector4(inst, machine, result);
740 if (DEBUG_PROG) {
741 printf("DP3 %g = (%g %g %g) . (%g %g %g)\n",
742 result[0], a[0], a[1], a[2], b[0], b[1], b[2]);
743 }
744 }
745 break;
746 case OPCODE_DP4:
747 {
748 GLfloat a[4], b[4], result[4];
749 fetch_vector4(&inst->SrcReg[0], machine, a);
750 fetch_vector4(&inst->SrcReg[1], machine, b);
751 result[0] = result[1] = result[2] = result[3] = DOT4(a, b);
752 store_vector4(inst, machine, result);
753 if (DEBUG_PROG) {
754 printf("DP4 %g = (%g, %g %g %g) . (%g, %g %g %g)\n",
755 result[0], a[0], a[1], a[2], a[3],
756 b[0], b[1], b[2], b[3]);
757 }
758 }
759 break;
760 case OPCODE_DPH:
761 {
762 GLfloat a[4], b[4], result[4];
763 fetch_vector4(&inst->SrcReg[0], machine, a);
764 fetch_vector4(&inst->SrcReg[1], machine, b);
765 result[0] = result[1] = result[2] = result[3] = DOT3(a, b) + b[3];
766 store_vector4(inst, machine, result);
767 }
768 break;
769 case OPCODE_DST: /* Distance vector */
770 {
771 GLfloat a[4], b[4], result[4];
772 fetch_vector4(&inst->SrcReg[0], machine, a);
773 fetch_vector4(&inst->SrcReg[1], machine, b);
774 result[0] = 1.0F;
775 result[1] = a[1] * b[1];
776 result[2] = a[2];
777 result[3] = b[3];
778 store_vector4(inst, machine, result);
779 }
780 break;
781 case OPCODE_EXP:
782 {
783 GLfloat t[4], q[4], floor_t0;
784 fetch_vector1(&inst->SrcReg[0], machine, t);
785 floor_t0 = FLOORF(t[0]);
786 if (floor_t0 > FLT_MAX_EXP) {
787 SET_POS_INFINITY(q[0]);
788 SET_POS_INFINITY(q[2]);
789 }
790 else if (floor_t0 < FLT_MIN_EXP) {
791 q[0] = 0.0F;
792 q[2] = 0.0F;
793 }
794 else {
795 q[0] = LDEXPF(1.0, (int) floor_t0);
796 /* Note: GL_NV_vertex_program expects
797 * result.z = result.x * APPX(result.y)
798 * We do what the ARB extension says.
799 */
800 q[2] = (GLfloat) pow(2.0, t[0]);
801 }
802 q[1] = t[0] - floor_t0;
803 q[3] = 1.0F;
804 store_vector4( inst, machine, q );
805 }
806 break;
807 case OPCODE_EX2: /* Exponential base 2 */
808 {
809 GLfloat a[4], result[4], val;
810 fetch_vector1(&inst->SrcReg[0], machine, a);
811 val = (GLfloat) pow(2.0, a[0]);
812 /*
813 if (IS_INF_OR_NAN(val))
814 val = 1.0e10;
815 */
816 result[0] = result[1] = result[2] = result[3] = val;
817 store_vector4(inst, machine, result);
818 }
819 break;
820 case OPCODE_FLR:
821 {
822 GLfloat a[4], result[4];
823 fetch_vector4(&inst->SrcReg[0], machine, a);
824 result[0] = FLOORF(a[0]);
825 result[1] = FLOORF(a[1]);
826 result[2] = FLOORF(a[2]);
827 result[3] = FLOORF(a[3]);
828 store_vector4(inst, machine, result);
829 }
830 break;
831 case OPCODE_FRC:
832 {
833 GLfloat a[4], result[4];
834 fetch_vector4(&inst->SrcReg[0], machine, a);
835 result[0] = a[0] - FLOORF(a[0]);
836 result[1] = a[1] - FLOORF(a[1]);
837 result[2] = a[2] - FLOORF(a[2]);
838 result[3] = a[3] - FLOORF(a[3]);
839 store_vector4(inst, machine, result);
840 }
841 break;
842 case OPCODE_IF:
843 {
844 GLboolean cond;
845 ASSERT(program->Instructions[inst->BranchTarget].Opcode
846 == OPCODE_ELSE ||
847 program->Instructions[inst->BranchTarget].Opcode
848 == OPCODE_ENDIF);
849 /* eval condition */
850 if (inst->SrcReg[0].File != PROGRAM_UNDEFINED) {
851 GLfloat a[4];
852 fetch_vector1(&inst->SrcReg[0], machine, a);
853 cond = (a[0] != 0.0);
854 }
855 else {
856 cond = eval_condition(machine, inst);
857 }
858 if (DEBUG_PROG) {
859 printf("IF: %d\n", cond);
860 }
861 /* do if/else */
862 if (cond) {
863 /* do if-clause (just continue execution) */
864 }
865 else {
866 /* go to the instruction after ELSE or ENDIF */
867 assert(inst->BranchTarget >= 0);
868 pc = inst->BranchTarget;
869 }
870 }
871 break;
872 case OPCODE_ELSE:
873 /* goto ENDIF */
874 ASSERT(program->Instructions[inst->BranchTarget].Opcode
875 == OPCODE_ENDIF);
876 assert(inst->BranchTarget >= 0);
877 pc = inst->BranchTarget;
878 break;
879 case OPCODE_ENDIF:
880 /* nothing */
881 break;
882 case OPCODE_KIL_NV: /* NV_f_p only (conditional) */
883 if (eval_condition(machine, inst)) {
884 return GL_FALSE;
885 }
886 break;
887 case OPCODE_KIL: /* ARB_f_p only */
888 {
889 GLfloat a[4];
890 fetch_vector4(&inst->SrcReg[0], machine, a);
891 if (DEBUG_PROG) {
892 printf("KIL if (%g %g %g %g) <= 0.0\n",
893 a[0], a[1], a[2], a[3]);
894 }
895
896 if (a[0] < 0.0F || a[1] < 0.0F || a[2] < 0.0F || a[3] < 0.0F) {
897 return GL_FALSE;
898 }
899 }
900 break;
901 case OPCODE_LG2: /* log base 2 */
902 {
903 GLfloat a[4], result[4], val;
904 fetch_vector1(&inst->SrcReg[0], machine, a);
905 /* The fast LOG2 macro doesn't meet the precision requirements.
906 */
907 if (a[0] == 0.0F) {
908 val = -FLT_MAX;
909 }
910 else {
911 val = (float)(log(a[0]) * 1.442695F);
912 }
913 result[0] = result[1] = result[2] = result[3] = val;
914 store_vector4(inst, machine, result);
915 }
916 break;
917 case OPCODE_LIT:
918 {
919 const GLfloat epsilon = 1.0F / 256.0F; /* from NV VP spec */
920 GLfloat a[4], result[4];
921 fetch_vector4(&inst->SrcReg[0], machine, a);
922 a[0] = MAX2(a[0], 0.0F);
923 a[1] = MAX2(a[1], 0.0F);
924 /* XXX ARB version clamps a[3], NV version doesn't */
925 a[3] = CLAMP(a[3], -(128.0F - epsilon), (128.0F - epsilon));
926 result[0] = 1.0F;
927 result[1] = a[0];
928 /* XXX we could probably just use pow() here */
929 if (a[0] > 0.0F) {
930 if (a[1] == 0.0 && a[3] == 0.0)
931 result[2] = 1.0F;
932 else
933 result[2] = (GLfloat) pow(a[1], a[3]);
934 }
935 else {
936 result[2] = 0.0F;
937 }
938 result[3] = 1.0F;
939 store_vector4(inst, machine, result);
940 if (DEBUG_PROG) {
941 printf("LIT (%g %g %g %g) : (%g %g %g %g)\n",
942 result[0], result[1], result[2], result[3],
943 a[0], a[1], a[2], a[3]);
944 }
945 }
946 break;
947 case OPCODE_LOG:
948 {
949 GLfloat t[4], q[4], abs_t0;
950 fetch_vector1(&inst->SrcReg[0], machine, t);
951 abs_t0 = FABSF(t[0]);
952 if (abs_t0 != 0.0F) {
953 if (IS_INF_OR_NAN(abs_t0))
954 {
955 SET_POS_INFINITY(q[0]);
956 q[1] = 1.0F;
957 SET_POS_INFINITY(q[2]);
958 }
959 else {
960 int exponent;
961 GLfloat mantissa = FREXPF(t[0], &exponent);
962 q[0] = (GLfloat) (exponent - 1);
963 q[1] = (GLfloat) (2.0 * mantissa); /* map [.5, 1) -> [1, 2) */
964
965 /* The fast LOG2 macro doesn't meet the precision
966 * requirements.
967 */
968 q[2] = (float)(log(t[0]) * 1.442695F);
969 }
970 }
971 else {
972 SET_NEG_INFINITY(q[0]);
973 q[1] = 1.0F;
974 SET_NEG_INFINITY(q[2]);
975 }
976 q[3] = 1.0;
977 store_vector4(inst, machine, q);
978 }
979 break;
980 case OPCODE_LRP:
981 {
982 GLfloat a[4], b[4], c[4], result[4];
983 fetch_vector4(&inst->SrcReg[0], machine, a);
984 fetch_vector4(&inst->SrcReg[1], machine, b);
985 fetch_vector4(&inst->SrcReg[2], machine, c);
986 result[0] = a[0] * b[0] + (1.0F - a[0]) * c[0];
987 result[1] = a[1] * b[1] + (1.0F - a[1]) * c[1];
988 result[2] = a[2] * b[2] + (1.0F - a[2]) * c[2];
989 result[3] = a[3] * b[3] + (1.0F - a[3]) * c[3];
990 store_vector4(inst, machine, result);
991 if (DEBUG_PROG) {
992 printf("LRP (%g %g %g %g) = (%g %g %g %g), "
993 "(%g %g %g %g), (%g %g %g %g)\n",
994 result[0], result[1], result[2], result[3],
995 a[0], a[1], a[2], a[3],
996 b[0], b[1], b[2], b[3], c[0], c[1], c[2], c[3]);
997 }
998 }
999 break;
1000 case OPCODE_MAD:
1001 {
1002 GLfloat a[4], b[4], c[4], result[4];
1003 fetch_vector4(&inst->SrcReg[0], machine, a);
1004 fetch_vector4(&inst->SrcReg[1], machine, b);
1005 fetch_vector4(&inst->SrcReg[2], machine, c);
1006 result[0] = a[0] * b[0] + c[0];
1007 result[1] = a[1] * b[1] + c[1];
1008 result[2] = a[2] * b[2] + c[2];
1009 result[3] = a[3] * b[3] + c[3];
1010 store_vector4(inst, machine, result);
1011 if (DEBUG_PROG) {
1012 printf("MAD (%g %g %g %g) = (%g %g %g %g) * "
1013 "(%g %g %g %g) + (%g %g %g %g)\n",
1014 result[0], result[1], result[2], result[3],
1015 a[0], a[1], a[2], a[3],
1016 b[0], b[1], b[2], b[3], c[0], c[1], c[2], c[3]);
1017 }
1018 }
1019 break;
1020 case OPCODE_MAX:
1021 {
1022 GLfloat a[4], b[4], result[4];
1023 fetch_vector4(&inst->SrcReg[0], machine, a);
1024 fetch_vector4(&inst->SrcReg[1], machine, b);
1025 result[0] = MAX2(a[0], b[0]);
1026 result[1] = MAX2(a[1], b[1]);
1027 result[2] = MAX2(a[2], b[2]);
1028 result[3] = MAX2(a[3], b[3]);
1029 store_vector4(inst, machine, result);
1030 if (DEBUG_PROG) {
1031 printf("MAX (%g %g %g %g) = (%g %g %g %g), (%g %g %g %g)\n",
1032 result[0], result[1], result[2], result[3],
1033 a[0], a[1], a[2], a[3], b[0], b[1], b[2], b[3]);
1034 }
1035 }
1036 break;
1037 case OPCODE_MIN:
1038 {
1039 GLfloat a[4], b[4], result[4];
1040 fetch_vector4(&inst->SrcReg[0], machine, a);
1041 fetch_vector4(&inst->SrcReg[1], machine, b);
1042 result[0] = MIN2(a[0], b[0]);
1043 result[1] = MIN2(a[1], b[1]);
1044 result[2] = MIN2(a[2], b[2]);
1045 result[3] = MIN2(a[3], b[3]);
1046 store_vector4(inst, machine, result);
1047 }
1048 break;
1049 case OPCODE_MOV:
1050 {
1051 GLfloat result[4];
1052 fetch_vector4(&inst->SrcReg[0], machine, result);
1053 store_vector4(inst, machine, result);
1054 if (DEBUG_PROG) {
1055 printf("MOV (%g %g %g %g)\n",
1056 result[0], result[1], result[2], result[3]);
1057 }
1058 }
1059 break;
1060 case OPCODE_MUL:
1061 {
1062 GLfloat a[4], b[4], result[4];
1063 fetch_vector4(&inst->SrcReg[0], machine, a);
1064 fetch_vector4(&inst->SrcReg[1], machine, b);
1065 result[0] = a[0] * b[0];
1066 result[1] = a[1] * b[1];
1067 result[2] = a[2] * b[2];
1068 result[3] = a[3] * b[3];
1069 store_vector4(inst, machine, result);
1070 if (DEBUG_PROG) {
1071 printf("MUL (%g %g %g %g) = (%g %g %g %g) * (%g %g %g %g)\n",
1072 result[0], result[1], result[2], result[3],
1073 a[0], a[1], a[2], a[3], b[0], b[1], b[2], b[3]);
1074 }
1075 }
1076 break;
1077 case OPCODE_NOISE1:
1078 {
1079 GLfloat a[4], result[4];
1080 fetch_vector1(&inst->SrcReg[0], machine, a);
1081 result[0] =
1082 result[1] =
1083 result[2] =
1084 result[3] = _mesa_noise1(a[0]);
1085 store_vector4(inst, machine, result);
1086 }
1087 break;
1088 case OPCODE_NOISE2:
1089 {
1090 GLfloat a[4], result[4];
1091 fetch_vector4(&inst->SrcReg[0], machine, a);
1092 result[0] =
1093 result[1] =
1094 result[2] = result[3] = _mesa_noise2(a[0], a[1]);
1095 store_vector4(inst, machine, result);
1096 }
1097 break;
1098 case OPCODE_NOISE3:
1099 {
1100 GLfloat a[4], result[4];
1101 fetch_vector4(&inst->SrcReg[0], machine, a);
1102 result[0] =
1103 result[1] =
1104 result[2] =
1105 result[3] = _mesa_noise3(a[0], a[1], a[2]);
1106 store_vector4(inst, machine, result);
1107 }
1108 break;
1109 case OPCODE_NOISE4:
1110 {
1111 GLfloat a[4], result[4];
1112 fetch_vector4(&inst->SrcReg[0], machine, a);
1113 result[0] =
1114 result[1] =
1115 result[2] =
1116 result[3] = _mesa_noise4(a[0], a[1], a[2], a[3]);
1117 store_vector4(inst, machine, result);
1118 }
1119 break;
1120 case OPCODE_NOP:
1121 break;
1122 case OPCODE_POW:
1123 {
1124 GLfloat a[4], b[4], result[4];
1125 fetch_vector1(&inst->SrcReg[0], machine, a);
1126 fetch_vector1(&inst->SrcReg[1], machine, b);
1127 result[0] = result[1] = result[2] = result[3]
1128 = (GLfloat) pow(a[0], b[0]);
1129 store_vector4(inst, machine, result);
1130 }
1131 break;
1132
1133 case OPCODE_RCP:
1134 {
1135 GLfloat a[4], result[4];
1136 fetch_vector1(&inst->SrcReg[0], machine, a);
1137 if (DEBUG_PROG) {
1138 if (a[0] == 0)
1139 printf("RCP(0)\n");
1140 else if (IS_INF_OR_NAN(a[0]))
1141 printf("RCP(inf)\n");
1142 }
1143 result[0] = result[1] = result[2] = result[3] = 1.0F / a[0];
1144 store_vector4(inst, machine, result);
1145 }
1146 break;
1147 case OPCODE_RET: /* return from subroutine (conditional) */
1148 if (eval_condition(machine, inst)) {
1149 if (machine->StackDepth == 0) {
1150 return GL_TRUE; /* Per GL_NV_vertex_program2 spec */
1151 }
1152 /* subtract one because of pc++ in the for loop */
1153 pc = machine->CallStack[--machine->StackDepth] - 1;
1154 }
1155 break;
1156 case OPCODE_RSQ: /* 1 / sqrt() */
1157 {
1158 GLfloat a[4], result[4];
1159 fetch_vector1(&inst->SrcReg[0], machine, a);
1160 a[0] = FABSF(a[0]);
1161 result[0] = result[1] = result[2] = result[3] = INV_SQRTF(a[0]);
1162 store_vector4(inst, machine, result);
1163 if (DEBUG_PROG) {
1164 printf("RSQ %g = 1/sqrt(|%g|)\n", result[0], a[0]);
1165 }
1166 }
1167 break;
1168 case OPCODE_SCS: /* sine and cos */
1169 {
1170 GLfloat a[4], result[4];
1171 fetch_vector1(&inst->SrcReg[0], machine, a);
1172 result[0] = (GLfloat) cos(a[0]);
1173 result[1] = (GLfloat) sin(a[0]);
1174 result[2] = 0.0; /* undefined! */
1175 result[3] = 0.0; /* undefined! */
1176 store_vector4(inst, machine, result);
1177 }
1178 break;
1179 case OPCODE_SEQ: /* set on equal */
1180 {
1181 GLfloat a[4], b[4], result[4];
1182 fetch_vector4(&inst->SrcReg[0], machine, a);
1183 fetch_vector4(&inst->SrcReg[1], machine, b);
1184 result[0] = (a[0] == b[0]) ? 1.0F : 0.0F;
1185 result[1] = (a[1] == b[1]) ? 1.0F : 0.0F;
1186 result[2] = (a[2] == b[2]) ? 1.0F : 0.0F;
1187 result[3] = (a[3] == b[3]) ? 1.0F : 0.0F;
1188 store_vector4(inst, machine, result);
1189 if (DEBUG_PROG) {
1190 printf("SEQ (%g %g %g %g) = (%g %g %g %g) == (%g %g %g %g)\n",
1191 result[0], result[1], result[2], result[3],
1192 a[0], a[1], a[2], a[3],
1193 b[0], b[1], b[2], b[3]);
1194 }
1195 }
1196 break;
1197 case OPCODE_SGE: /* set on greater or equal */
1198 {
1199 GLfloat a[4], b[4], result[4];
1200 fetch_vector4(&inst->SrcReg[0], machine, a);
1201 fetch_vector4(&inst->SrcReg[1], machine, b);
1202 result[0] = (a[0] >= b[0]) ? 1.0F : 0.0F;
1203 result[1] = (a[1] >= b[1]) ? 1.0F : 0.0F;
1204 result[2] = (a[2] >= b[2]) ? 1.0F : 0.0F;
1205 result[3] = (a[3] >= b[3]) ? 1.0F : 0.0F;
1206 store_vector4(inst, machine, result);
1207 if (DEBUG_PROG) {
1208 printf("SGE (%g %g %g %g) = (%g %g %g %g) >= (%g %g %g %g)\n",
1209 result[0], result[1], result[2], result[3],
1210 a[0], a[1], a[2], a[3],
1211 b[0], b[1], b[2], b[3]);
1212 }
1213 }
1214 break;
1215 case OPCODE_SGT: /* set on greater */
1216 {
1217 GLfloat a[4], b[4], result[4];
1218 fetch_vector4(&inst->SrcReg[0], machine, a);
1219 fetch_vector4(&inst->SrcReg[1], machine, b);
1220 result[0] = (a[0] > b[0]) ? 1.0F : 0.0F;
1221 result[1] = (a[1] > b[1]) ? 1.0F : 0.0F;
1222 result[2] = (a[2] > b[2]) ? 1.0F : 0.0F;
1223 result[3] = (a[3] > b[3]) ? 1.0F : 0.0F;
1224 store_vector4(inst, machine, result);
1225 if (DEBUG_PROG) {
1226 printf("SGT (%g %g %g %g) = (%g %g %g %g) > (%g %g %g %g)\n",
1227 result[0], result[1], result[2], result[3],
1228 a[0], a[1], a[2], a[3],
1229 b[0], b[1], b[2], b[3]);
1230 }
1231 }
1232 break;
1233 case OPCODE_SIN:
1234 {
1235 GLfloat a[4], result[4];
1236 fetch_vector1(&inst->SrcReg[0], machine, a);
1237 result[0] = result[1] = result[2] = result[3]
1238 = (GLfloat) sin(a[0]);
1239 store_vector4(inst, machine, result);
1240 }
1241 break;
1242 case OPCODE_SLE: /* set on less or equal */
1243 {
1244 GLfloat a[4], b[4], result[4];
1245 fetch_vector4(&inst->SrcReg[0], machine, a);
1246 fetch_vector4(&inst->SrcReg[1], machine, b);
1247 result[0] = (a[0] <= b[0]) ? 1.0F : 0.0F;
1248 result[1] = (a[1] <= b[1]) ? 1.0F : 0.0F;
1249 result[2] = (a[2] <= b[2]) ? 1.0F : 0.0F;
1250 result[3] = (a[3] <= b[3]) ? 1.0F : 0.0F;
1251 store_vector4(inst, machine, result);
1252 if (DEBUG_PROG) {
1253 printf("SLE (%g %g %g %g) = (%g %g %g %g) <= (%g %g %g %g)\n",
1254 result[0], result[1], result[2], result[3],
1255 a[0], a[1], a[2], a[3],
1256 b[0], b[1], b[2], b[3]);
1257 }
1258 }
1259 break;
1260 case OPCODE_SLT: /* set on less */
1261 {
1262 GLfloat a[4], b[4], result[4];
1263 fetch_vector4(&inst->SrcReg[0], machine, a);
1264 fetch_vector4(&inst->SrcReg[1], machine, b);
1265 result[0] = (a[0] < b[0]) ? 1.0F : 0.0F;
1266 result[1] = (a[1] < b[1]) ? 1.0F : 0.0F;
1267 result[2] = (a[2] < b[2]) ? 1.0F : 0.0F;
1268 result[3] = (a[3] < b[3]) ? 1.0F : 0.0F;
1269 store_vector4(inst, machine, result);
1270 if (DEBUG_PROG) {
1271 printf("SLT (%g %g %g %g) = (%g %g %g %g) < (%g %g %g %g)\n",
1272 result[0], result[1], result[2], result[3],
1273 a[0], a[1], a[2], a[3],
1274 b[0], b[1], b[2], b[3]);
1275 }
1276 }
1277 break;
1278 case OPCODE_SNE: /* set on not equal */
1279 {
1280 GLfloat a[4], b[4], result[4];
1281 fetch_vector4(&inst->SrcReg[0], machine, a);
1282 fetch_vector4(&inst->SrcReg[1], machine, b);
1283 result[0] = (a[0] != b[0]) ? 1.0F : 0.0F;
1284 result[1] = (a[1] != b[1]) ? 1.0F : 0.0F;
1285 result[2] = (a[2] != b[2]) ? 1.0F : 0.0F;
1286 result[3] = (a[3] != b[3]) ? 1.0F : 0.0F;
1287 store_vector4(inst, machine, result);
1288 if (DEBUG_PROG) {
1289 printf("SNE (%g %g %g %g) = (%g %g %g %g) != (%g %g %g %g)\n",
1290 result[0], result[1], result[2], result[3],
1291 a[0], a[1], a[2], a[3],
1292 b[0], b[1], b[2], b[3]);
1293 }
1294 }
1295 break;
1296 case OPCODE_SSG: /* set sign (-1, 0 or +1) */
1297 {
1298 GLfloat a[4], result[4];
1299 fetch_vector4(&inst->SrcReg[0], machine, a);
1300 result[0] = (GLfloat) ((a[0] > 0.0F) - (a[0] < 0.0F));
1301 result[1] = (GLfloat) ((a[1] > 0.0F) - (a[1] < 0.0F));
1302 result[2] = (GLfloat) ((a[2] > 0.0F) - (a[2] < 0.0F));
1303 result[3] = (GLfloat) ((a[3] > 0.0F) - (a[3] < 0.0F));
1304 store_vector4(inst, machine, result);
1305 }
1306 break;
1307 case OPCODE_SUB:
1308 {
1309 GLfloat a[4], b[4], result[4];
1310 fetch_vector4(&inst->SrcReg[0], machine, a);
1311 fetch_vector4(&inst->SrcReg[1], machine, b);
1312 result[0] = a[0] - b[0];
1313 result[1] = a[1] - b[1];
1314 result[2] = a[2] - b[2];
1315 result[3] = a[3] - b[3];
1316 store_vector4(inst, machine, result);
1317 if (DEBUG_PROG) {
1318 printf("SUB (%g %g %g %g) = (%g %g %g %g) - (%g %g %g %g)\n",
1319 result[0], result[1], result[2], result[3],
1320 a[0], a[1], a[2], a[3], b[0], b[1], b[2], b[3]);
1321 }
1322 }
1323 break;
1324 case OPCODE_SWZ: /* extended swizzle */
1325 {
1326 const struct prog_src_register *source = &inst->SrcReg[0];
1327 const GLfloat *src = get_src_register_pointer(source, machine);
1328 GLfloat result[4];
1329 GLuint i;
1330 for (i = 0; i < 4; i++) {
1331 const GLuint swz = GET_SWZ(source->Swizzle, i);
1332 if (swz == SWIZZLE_ZERO)
1333 result[i] = 0.0;
1334 else if (swz == SWIZZLE_ONE)
1335 result[i] = 1.0;
1336 else {
1337 ASSERT(swz >= 0);
1338 ASSERT(swz <= 3);
1339 result[i] = src[swz];
1340 }
1341 if (source->Negate & (1 << i))
1342 result[i] = -result[i];
1343 }
1344 store_vector4(inst, machine, result);
1345 }
1346 break;
1347 case OPCODE_TEX: /* Both ARB and NV frag prog */
1348 /* Simple texel lookup */
1349 {
1350 GLfloat texcoord[4], color[4];
1351 fetch_vector4(&inst->SrcReg[0], machine, texcoord);
1352
1353 /* For TEX, texcoord.Q should not be used and its value should not
1354 * matter (at most, we pass coord.xyz to texture3D() in GLSL).
1355 * Set Q=1 so that FetchTexelDeriv() doesn't get a garbage value
1356 * which is effectively what happens when the texcoord swizzle
1357 * is .xyzz
1358 */
1359 texcoord[3] = 1.0f;
1360
1361 fetch_texel(ctx, machine, inst, texcoord, 0.0, color);
1362
1363 if (DEBUG_PROG) {
1364 printf("TEX (%g, %g, %g, %g) = texture[%d][%g, %g, %g, %g]\n",
1365 color[0], color[1], color[2], color[3],
1366 inst->TexSrcUnit,
1367 texcoord[0], texcoord[1], texcoord[2], texcoord[3]);
1368 }
1369 store_vector4(inst, machine, color);
1370 }
1371 break;
1372 case OPCODE_TXB: /* GL_ARB_fragment_program only */
1373 /* Texel lookup with LOD bias */
1374 {
1375 GLfloat texcoord[4], color[4], lodBias;
1376
1377 fetch_vector4(&inst->SrcReg[0], machine, texcoord);
1378
1379 /* texcoord[3] is the bias to add to lambda */
1380 lodBias = texcoord[3];
1381
1382 fetch_texel(ctx, machine, inst, texcoord, lodBias, color);
1383
1384 if (DEBUG_PROG) {
1385 printf("TXB (%g, %g, %g, %g) = texture[%d][%g %g %g %g]"
1386 " bias %g\n",
1387 color[0], color[1], color[2], color[3],
1388 inst->TexSrcUnit,
1389 texcoord[0],
1390 texcoord[1],
1391 texcoord[2],
1392 texcoord[3],
1393 lodBias);
1394 }
1395
1396 store_vector4(inst, machine, color);
1397 }
1398 break;
1399 case OPCODE_TXD: /* GL_NV_fragment_program only */
1400 /* Texture lookup w/ partial derivatives for LOD */
1401 {
1402 GLfloat texcoord[4], dtdx[4], dtdy[4], color[4];
1403 fetch_vector4(&inst->SrcReg[0], machine, texcoord);
1404 fetch_vector4(&inst->SrcReg[1], machine, dtdx);
1405 fetch_vector4(&inst->SrcReg[2], machine, dtdy);
1406 machine->FetchTexelDeriv(ctx, texcoord, dtdx, dtdy,
1407 0.0, /* lodBias */
1408 inst->TexSrcUnit, color);
1409 store_vector4(inst, machine, color);
1410 }
1411 break;
1412 case OPCODE_TXL:
1413 /* Texel lookup with explicit LOD */
1414 {
1415 GLfloat texcoord[4], color[4], lod;
1416
1417 fetch_vector4(&inst->SrcReg[0], machine, texcoord);
1418
1419 /* texcoord[3] is the LOD */
1420 lod = texcoord[3];
1421
1422 machine->FetchTexelLod(ctx, texcoord, lod,
1423 machine->Samplers[inst->TexSrcUnit], color);
1424
1425 store_vector4(inst, machine, color);
1426 }
1427 break;
1428 case OPCODE_TXP: /* GL_ARB_fragment_program only */
1429 /* Texture lookup w/ projective divide */
1430 {
1431 GLfloat texcoord[4], color[4];
1432
1433 fetch_vector4(&inst->SrcReg[0], machine, texcoord);
1434 /* Not so sure about this test - if texcoord[3] is
1435 * zero, we'd probably be fine except for an ASSERT in
1436 * IROUND_POS() which gets triggered by the inf values created.
1437 */
1438 if (texcoord[3] != 0.0) {
1439 texcoord[0] /= texcoord[3];
1440 texcoord[1] /= texcoord[3];
1441 texcoord[2] /= texcoord[3];
1442 }
1443
1444 fetch_texel(ctx, machine, inst, texcoord, 0.0, color);
1445
1446 store_vector4(inst, machine, color);
1447 }
1448 break;
1449 case OPCODE_TXP_NV: /* GL_NV_fragment_program only */
1450 /* Texture lookup w/ projective divide, as above, but do not
1451 * do the divide by w if sampling from a cube map.
1452 */
1453 {
1454 GLfloat texcoord[4], color[4];
1455
1456 fetch_vector4(&inst->SrcReg[0], machine, texcoord);
1457 if (inst->TexSrcTarget != TEXTURE_CUBE_INDEX &&
1458 texcoord[3] != 0.0) {
1459 texcoord[0] /= texcoord[3];
1460 texcoord[1] /= texcoord[3];
1461 texcoord[2] /= texcoord[3];
1462 }
1463
1464 fetch_texel(ctx, machine, inst, texcoord, 0.0, color);
1465
1466 store_vector4(inst, machine, color);
1467 }
1468 break;
1469 case OPCODE_TRUNC: /* truncate toward zero */
1470 {
1471 GLfloat a[4], result[4];
1472 fetch_vector4(&inst->SrcReg[0], machine, a);
1473 result[0] = (GLfloat) (GLint) a[0];
1474 result[1] = (GLfloat) (GLint) a[1];
1475 result[2] = (GLfloat) (GLint) a[2];
1476 result[3] = (GLfloat) (GLint) a[3];
1477 store_vector4(inst, machine, result);
1478 }
1479 break;
1480 case OPCODE_XPD: /* cross product */
1481 {
1482 GLfloat a[4], b[4], result[4];
1483 fetch_vector4(&inst->SrcReg[0], machine, a);
1484 fetch_vector4(&inst->SrcReg[1], machine, b);
1485 result[0] = a[1] * b[2] - a[2] * b[1];
1486 result[1] = a[2] * b[0] - a[0] * b[2];
1487 result[2] = a[0] * b[1] - a[1] * b[0];
1488 result[3] = 1.0;
1489 store_vector4(inst, machine, result);
1490 if (DEBUG_PROG) {
1491 printf("XPD (%g %g %g %g) = (%g %g %g) X (%g %g %g)\n",
1492 result[0], result[1], result[2], result[3],
1493 a[0], a[1], a[2], b[0], b[1], b[2]);
1494 }
1495 }
1496 break;
1497 case OPCODE_END:
1498 return GL_TRUE;
1499 default:
1500 _mesa_problem(ctx, "Bad opcode %d in _mesa_execute_program",
1501 inst->Opcode);
1502 return GL_TRUE; /* return value doesn't matter */
1503 }
1504
1505 numExec++;
1506 if (numExec > maxExec) {
1507 static GLboolean reported = GL_FALSE;
1508 if (!reported) {
1509 _mesa_problem(ctx, "Infinite loop detected in fragment program");
1510 reported = GL_TRUE;
1511 }
1512 return GL_TRUE;
1513 }
1514
1515 } /* for pc */
1516
1517 return GL_TRUE;
1518 }