mesa: Restore 78-column wrapping of license text in C-style comments.
[mesa.git] / src / mesa / program / prog_execute.c
1 /*
2 * Mesa 3-D graphics library
3 * Version: 7.3
4 *
5 * Copyright (C) 1999-2008 Brian Paul All Rights Reserved.
6 *
7 * Permission is hereby granted, free of charge, to any person obtaining a
8 * copy of this software and associated documentation files (the "Software"),
9 * to deal in the Software without restriction, including without limitation
10 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
11 * and/or sell copies of the Software, and to permit persons to whom the
12 * Software is furnished to do so, subject to the following conditions:
13 *
14 * The above copyright notice and this permission notice shall be included
15 * in all copies or substantial portions of the Software.
16 *
17 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
18 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
19 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
20 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
21 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
22 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
23 * OTHER DEALINGS IN THE SOFTWARE.
24 */
25
26 /**
27 * \file prog_execute.c
28 * Software interpreter for vertex/fragment programs.
29 * \author Brian Paul
30 */
31
32 /*
33 * NOTE: we do everything in single-precision floating point; we don't
34 * currently observe the single/half/fixed-precision qualifiers.
35 *
36 */
37
38
39 #include "main/glheader.h"
40 #include "main/colormac.h"
41 #include "main/macros.h"
42 #include "prog_execute.h"
43 #include "prog_instruction.h"
44 #include "prog_parameter.h"
45 #include "prog_print.h"
46 #include "prog_noise.h"
47
48
49 /* debug predicate */
50 #define DEBUG_PROG 0
51
52
53 /**
54 * Set x to positive or negative infinity.
55 */
56 #if defined(USE_IEEE) || defined(_WIN32)
57 #define SET_POS_INFINITY(x) \
58 do { \
59 fi_type fi; \
60 fi.i = 0x7F800000; \
61 x = fi.f; \
62 } while (0)
63 #define SET_NEG_INFINITY(x) \
64 do { \
65 fi_type fi; \
66 fi.i = 0xFF800000; \
67 x = fi.f; \
68 } while (0)
69 #else
70 #define SET_POS_INFINITY(x) x = (GLfloat) HUGE_VAL
71 #define SET_NEG_INFINITY(x) x = (GLfloat) -HUGE_VAL
72 #endif
73
74 #define SET_FLOAT_BITS(x, bits) ((fi_type *) (void *) &(x))->i = bits
75
76
77 static const GLfloat ZeroVec[4] = { 0.0F, 0.0F, 0.0F, 0.0F };
78
79
80 /**
81 * Return a pointer to the 4-element float vector specified by the given
82 * source register.
83 */
84 static inline const GLfloat *
85 get_src_register_pointer(const struct prog_src_register *source,
86 const struct gl_program_machine *machine)
87 {
88 const struct gl_program *prog = machine->CurProgram;
89 GLint reg = source->Index;
90
91 if (source->RelAddr) {
92 /* add address register value to src index/offset */
93 reg += machine->AddressReg[0][0];
94 if (reg < 0) {
95 return ZeroVec;
96 }
97 }
98
99 switch (source->File) {
100 case PROGRAM_TEMPORARY:
101 if (reg >= MAX_PROGRAM_TEMPS)
102 return ZeroVec;
103 return machine->Temporaries[reg];
104
105 case PROGRAM_INPUT:
106 if (prog->Target == GL_VERTEX_PROGRAM_ARB) {
107 if (reg >= VERT_ATTRIB_MAX)
108 return ZeroVec;
109 return machine->VertAttribs[reg];
110 }
111 else {
112 if (reg >= VARYING_SLOT_MAX)
113 return ZeroVec;
114 return machine->Attribs[reg][machine->CurElement];
115 }
116
117 case PROGRAM_OUTPUT:
118 if (reg >= MAX_PROGRAM_OUTPUTS)
119 return ZeroVec;
120 return machine->Outputs[reg];
121
122 case PROGRAM_LOCAL_PARAM:
123 if (reg >= MAX_PROGRAM_LOCAL_PARAMS)
124 return ZeroVec;
125 return machine->CurProgram->LocalParams[reg];
126
127 case PROGRAM_ENV_PARAM:
128 if (reg >= MAX_PROGRAM_ENV_PARAMS)
129 return ZeroVec;
130 return machine->EnvParams[reg];
131
132 case PROGRAM_STATE_VAR:
133 /* Fallthrough */
134 case PROGRAM_CONSTANT:
135 /* Fallthrough */
136 case PROGRAM_UNIFORM:
137 if (reg >= (GLint) prog->Parameters->NumParameters)
138 return ZeroVec;
139 return (GLfloat *) prog->Parameters->ParameterValues[reg];
140
141 case PROGRAM_SYSTEM_VALUE:
142 assert(reg < Elements(machine->SystemValues));
143 return machine->SystemValues[reg];
144
145 default:
146 _mesa_problem(NULL,
147 "Invalid src register file %d in get_src_register_pointer()",
148 source->File);
149 return NULL;
150 }
151 }
152
153
154 /**
155 * Return a pointer to the 4-element float vector specified by the given
156 * destination register.
157 */
158 static inline GLfloat *
159 get_dst_register_pointer(const struct prog_dst_register *dest,
160 struct gl_program_machine *machine)
161 {
162 static GLfloat dummyReg[4];
163 GLint reg = dest->Index;
164
165 if (dest->RelAddr) {
166 /* add address register value to src index/offset */
167 reg += machine->AddressReg[0][0];
168 if (reg < 0) {
169 return dummyReg;
170 }
171 }
172
173 switch (dest->File) {
174 case PROGRAM_TEMPORARY:
175 if (reg >= MAX_PROGRAM_TEMPS)
176 return dummyReg;
177 return machine->Temporaries[reg];
178
179 case PROGRAM_OUTPUT:
180 if (reg >= MAX_PROGRAM_OUTPUTS)
181 return dummyReg;
182 return machine->Outputs[reg];
183
184 default:
185 _mesa_problem(NULL,
186 "Invalid dest register file %d in get_dst_register_pointer()",
187 dest->File);
188 return NULL;
189 }
190 }
191
192
193
194 /**
195 * Fetch a 4-element float vector from the given source register.
196 * Apply swizzling and negating as needed.
197 */
198 static void
199 fetch_vector4(const struct prog_src_register *source,
200 const struct gl_program_machine *machine, GLfloat result[4])
201 {
202 const GLfloat *src = get_src_register_pointer(source, machine);
203 ASSERT(src);
204
205 if (source->Swizzle == SWIZZLE_NOOP) {
206 /* no swizzling */
207 COPY_4V(result, src);
208 }
209 else {
210 ASSERT(GET_SWZ(source->Swizzle, 0) <= 3);
211 ASSERT(GET_SWZ(source->Swizzle, 1) <= 3);
212 ASSERT(GET_SWZ(source->Swizzle, 2) <= 3);
213 ASSERT(GET_SWZ(source->Swizzle, 3) <= 3);
214 result[0] = src[GET_SWZ(source->Swizzle, 0)];
215 result[1] = src[GET_SWZ(source->Swizzle, 1)];
216 result[2] = src[GET_SWZ(source->Swizzle, 2)];
217 result[3] = src[GET_SWZ(source->Swizzle, 3)];
218 }
219
220 if (source->Abs) {
221 result[0] = FABSF(result[0]);
222 result[1] = FABSF(result[1]);
223 result[2] = FABSF(result[2]);
224 result[3] = FABSF(result[3]);
225 }
226 if (source->Negate) {
227 ASSERT(source->Negate == NEGATE_XYZW);
228 result[0] = -result[0];
229 result[1] = -result[1];
230 result[2] = -result[2];
231 result[3] = -result[3];
232 }
233
234 #ifdef NAN_CHECK
235 assert(!IS_INF_OR_NAN(result[0]));
236 assert(!IS_INF_OR_NAN(result[0]));
237 assert(!IS_INF_OR_NAN(result[0]));
238 assert(!IS_INF_OR_NAN(result[0]));
239 #endif
240 }
241
242
243 /**
244 * Fetch the derivative with respect to X or Y for the given register.
245 * XXX this currently only works for fragment program input attribs.
246 */
247 static void
248 fetch_vector4_deriv(struct gl_context * ctx,
249 const struct prog_src_register *source,
250 const struct gl_program_machine *machine,
251 char xOrY, GLfloat result[4])
252 {
253 if (source->File == PROGRAM_INPUT &&
254 source->Index < (GLint) machine->NumDeriv) {
255 const GLint col = machine->CurElement;
256 const GLfloat w = machine->Attribs[VARYING_SLOT_POS][col][3];
257 const GLfloat invQ = 1.0f / w;
258 GLfloat deriv[4];
259
260 if (xOrY == 'X') {
261 deriv[0] = machine->DerivX[source->Index][0] * invQ;
262 deriv[1] = machine->DerivX[source->Index][1] * invQ;
263 deriv[2] = machine->DerivX[source->Index][2] * invQ;
264 deriv[3] = machine->DerivX[source->Index][3] * invQ;
265 }
266 else {
267 deriv[0] = machine->DerivY[source->Index][0] * invQ;
268 deriv[1] = machine->DerivY[source->Index][1] * invQ;
269 deriv[2] = machine->DerivY[source->Index][2] * invQ;
270 deriv[3] = machine->DerivY[source->Index][3] * invQ;
271 }
272
273 result[0] = deriv[GET_SWZ(source->Swizzle, 0)];
274 result[1] = deriv[GET_SWZ(source->Swizzle, 1)];
275 result[2] = deriv[GET_SWZ(source->Swizzle, 2)];
276 result[3] = deriv[GET_SWZ(source->Swizzle, 3)];
277
278 if (source->Abs) {
279 result[0] = FABSF(result[0]);
280 result[1] = FABSF(result[1]);
281 result[2] = FABSF(result[2]);
282 result[3] = FABSF(result[3]);
283 }
284 if (source->Negate) {
285 ASSERT(source->Negate == NEGATE_XYZW);
286 result[0] = -result[0];
287 result[1] = -result[1];
288 result[2] = -result[2];
289 result[3] = -result[3];
290 }
291 }
292 else {
293 ASSIGN_4V(result, 0.0, 0.0, 0.0, 0.0);
294 }
295 }
296
297
298 /**
299 * As above, but only return result[0] element.
300 */
301 static void
302 fetch_vector1(const struct prog_src_register *source,
303 const struct gl_program_machine *machine, GLfloat result[4])
304 {
305 const GLfloat *src = get_src_register_pointer(source, machine);
306 ASSERT(src);
307
308 result[0] = src[GET_SWZ(source->Swizzle, 0)];
309
310 if (source->Abs) {
311 result[0] = FABSF(result[0]);
312 }
313 if (source->Negate) {
314 result[0] = -result[0];
315 }
316 }
317
318
319 static GLuint
320 fetch_vector1ui(const struct prog_src_register *source,
321 const struct gl_program_machine *machine)
322 {
323 const GLuint *src = (GLuint *) get_src_register_pointer(source, machine);
324 return src[GET_SWZ(source->Swizzle, 0)];
325 }
326
327
328 /**
329 * Fetch texel from texture. Use partial derivatives when possible.
330 */
331 static inline void
332 fetch_texel(struct gl_context *ctx,
333 const struct gl_program_machine *machine,
334 const struct prog_instruction *inst,
335 const GLfloat texcoord[4], GLfloat lodBias,
336 GLfloat color[4])
337 {
338 const GLuint unit = machine->Samplers[inst->TexSrcUnit];
339
340 /* Note: we only have the right derivatives for fragment input attribs.
341 */
342 if (machine->NumDeriv > 0 &&
343 inst->SrcReg[0].File == PROGRAM_INPUT &&
344 inst->SrcReg[0].Index == VARYING_SLOT_TEX0 + inst->TexSrcUnit) {
345 /* simple texture fetch for which we should have derivatives */
346 GLuint attr = inst->SrcReg[0].Index;
347 machine->FetchTexelDeriv(ctx, texcoord,
348 machine->DerivX[attr],
349 machine->DerivY[attr],
350 lodBias, unit, color);
351 }
352 else {
353 machine->FetchTexelLod(ctx, texcoord, lodBias, unit, color);
354 }
355 }
356
357
358 /**
359 * Test value against zero and return GT, LT, EQ or UN if NaN.
360 */
361 static inline GLuint
362 generate_cc(float value)
363 {
364 if (value != value)
365 return COND_UN; /* NaN */
366 if (value > 0.0F)
367 return COND_GT;
368 if (value < 0.0F)
369 return COND_LT;
370 return COND_EQ;
371 }
372
373
374 /**
375 * Test if the ccMaskRule is satisfied by the given condition code.
376 * Used to mask destination writes according to the current condition code.
377 */
378 static inline GLboolean
379 test_cc(GLuint condCode, GLuint ccMaskRule)
380 {
381 switch (ccMaskRule) {
382 case COND_EQ: return (condCode == COND_EQ);
383 case COND_NE: return (condCode != COND_EQ);
384 case COND_LT: return (condCode == COND_LT);
385 case COND_GE: return (condCode == COND_GT || condCode == COND_EQ);
386 case COND_LE: return (condCode == COND_LT || condCode == COND_EQ);
387 case COND_GT: return (condCode == COND_GT);
388 case COND_TR: return GL_TRUE;
389 case COND_FL: return GL_FALSE;
390 default: return GL_TRUE;
391 }
392 }
393
394
395 /**
396 * Evaluate the 4 condition codes against a predicate and return GL_TRUE
397 * or GL_FALSE to indicate result.
398 */
399 static inline GLboolean
400 eval_condition(const struct gl_program_machine *machine,
401 const struct prog_instruction *inst)
402 {
403 const GLuint swizzle = inst->DstReg.CondSwizzle;
404 const GLuint condMask = inst->DstReg.CondMask;
405 if (test_cc(machine->CondCodes[GET_SWZ(swizzle, 0)], condMask) ||
406 test_cc(machine->CondCodes[GET_SWZ(swizzle, 1)], condMask) ||
407 test_cc(machine->CondCodes[GET_SWZ(swizzle, 2)], condMask) ||
408 test_cc(machine->CondCodes[GET_SWZ(swizzle, 3)], condMask)) {
409 return GL_TRUE;
410 }
411 else {
412 return GL_FALSE;
413 }
414 }
415
416
417
418 /**
419 * Store 4 floats into a register. Observe the instructions saturate and
420 * set-condition-code flags.
421 */
422 static void
423 store_vector4(const struct prog_instruction *inst,
424 struct gl_program_machine *machine, const GLfloat value[4])
425 {
426 const struct prog_dst_register *dstReg = &(inst->DstReg);
427 const GLboolean clamp = inst->SaturateMode == SATURATE_ZERO_ONE;
428 GLuint writeMask = dstReg->WriteMask;
429 GLfloat clampedValue[4];
430 GLfloat *dst = get_dst_register_pointer(dstReg, machine);
431
432 #if 0
433 if (value[0] > 1.0e10 ||
434 IS_INF_OR_NAN(value[0]) ||
435 IS_INF_OR_NAN(value[1]) ||
436 IS_INF_OR_NAN(value[2]) || IS_INF_OR_NAN(value[3]))
437 printf("store %g %g %g %g\n", value[0], value[1], value[2], value[3]);
438 #endif
439
440 if (clamp) {
441 clampedValue[0] = CLAMP(value[0], 0.0F, 1.0F);
442 clampedValue[1] = CLAMP(value[1], 0.0F, 1.0F);
443 clampedValue[2] = CLAMP(value[2], 0.0F, 1.0F);
444 clampedValue[3] = CLAMP(value[3], 0.0F, 1.0F);
445 value = clampedValue;
446 }
447
448 if (dstReg->CondMask != COND_TR) {
449 /* condition codes may turn off some writes */
450 if (writeMask & WRITEMASK_X) {
451 if (!test_cc(machine->CondCodes[GET_SWZ(dstReg->CondSwizzle, 0)],
452 dstReg->CondMask))
453 writeMask &= ~WRITEMASK_X;
454 }
455 if (writeMask & WRITEMASK_Y) {
456 if (!test_cc(machine->CondCodes[GET_SWZ(dstReg->CondSwizzle, 1)],
457 dstReg->CondMask))
458 writeMask &= ~WRITEMASK_Y;
459 }
460 if (writeMask & WRITEMASK_Z) {
461 if (!test_cc(machine->CondCodes[GET_SWZ(dstReg->CondSwizzle, 2)],
462 dstReg->CondMask))
463 writeMask &= ~WRITEMASK_Z;
464 }
465 if (writeMask & WRITEMASK_W) {
466 if (!test_cc(machine->CondCodes[GET_SWZ(dstReg->CondSwizzle, 3)],
467 dstReg->CondMask))
468 writeMask &= ~WRITEMASK_W;
469 }
470 }
471
472 #ifdef NAN_CHECK
473 assert(!IS_INF_OR_NAN(value[0]));
474 assert(!IS_INF_OR_NAN(value[0]));
475 assert(!IS_INF_OR_NAN(value[0]));
476 assert(!IS_INF_OR_NAN(value[0]));
477 #endif
478
479 if (writeMask & WRITEMASK_X)
480 dst[0] = value[0];
481 if (writeMask & WRITEMASK_Y)
482 dst[1] = value[1];
483 if (writeMask & WRITEMASK_Z)
484 dst[2] = value[2];
485 if (writeMask & WRITEMASK_W)
486 dst[3] = value[3];
487
488 if (inst->CondUpdate) {
489 if (writeMask & WRITEMASK_X)
490 machine->CondCodes[0] = generate_cc(value[0]);
491 if (writeMask & WRITEMASK_Y)
492 machine->CondCodes[1] = generate_cc(value[1]);
493 if (writeMask & WRITEMASK_Z)
494 machine->CondCodes[2] = generate_cc(value[2]);
495 if (writeMask & WRITEMASK_W)
496 machine->CondCodes[3] = generate_cc(value[3]);
497 #if DEBUG_PROG
498 printf("CondCodes=(%s,%s,%s,%s) for:\n",
499 _mesa_condcode_string(machine->CondCodes[0]),
500 _mesa_condcode_string(machine->CondCodes[1]),
501 _mesa_condcode_string(machine->CondCodes[2]),
502 _mesa_condcode_string(machine->CondCodes[3]));
503 #endif
504 }
505 }
506
507
508 /**
509 * Store 4 uints into a register. Observe the set-condition-code flags.
510 */
511 static void
512 store_vector4ui(const struct prog_instruction *inst,
513 struct gl_program_machine *machine, const GLuint value[4])
514 {
515 const struct prog_dst_register *dstReg = &(inst->DstReg);
516 GLuint writeMask = dstReg->WriteMask;
517 GLuint *dst = (GLuint *) get_dst_register_pointer(dstReg, machine);
518
519 if (dstReg->CondMask != COND_TR) {
520 /* condition codes may turn off some writes */
521 if (writeMask & WRITEMASK_X) {
522 if (!test_cc(machine->CondCodes[GET_SWZ(dstReg->CondSwizzle, 0)],
523 dstReg->CondMask))
524 writeMask &= ~WRITEMASK_X;
525 }
526 if (writeMask & WRITEMASK_Y) {
527 if (!test_cc(machine->CondCodes[GET_SWZ(dstReg->CondSwizzle, 1)],
528 dstReg->CondMask))
529 writeMask &= ~WRITEMASK_Y;
530 }
531 if (writeMask & WRITEMASK_Z) {
532 if (!test_cc(machine->CondCodes[GET_SWZ(dstReg->CondSwizzle, 2)],
533 dstReg->CondMask))
534 writeMask &= ~WRITEMASK_Z;
535 }
536 if (writeMask & WRITEMASK_W) {
537 if (!test_cc(machine->CondCodes[GET_SWZ(dstReg->CondSwizzle, 3)],
538 dstReg->CondMask))
539 writeMask &= ~WRITEMASK_W;
540 }
541 }
542
543 if (writeMask & WRITEMASK_X)
544 dst[0] = value[0];
545 if (writeMask & WRITEMASK_Y)
546 dst[1] = value[1];
547 if (writeMask & WRITEMASK_Z)
548 dst[2] = value[2];
549 if (writeMask & WRITEMASK_W)
550 dst[3] = value[3];
551
552 if (inst->CondUpdate) {
553 if (writeMask & WRITEMASK_X)
554 machine->CondCodes[0] = generate_cc((float)value[0]);
555 if (writeMask & WRITEMASK_Y)
556 machine->CondCodes[1] = generate_cc((float)value[1]);
557 if (writeMask & WRITEMASK_Z)
558 machine->CondCodes[2] = generate_cc((float)value[2]);
559 if (writeMask & WRITEMASK_W)
560 machine->CondCodes[3] = generate_cc((float)value[3]);
561 #if DEBUG_PROG
562 printf("CondCodes=(%s,%s,%s,%s) for:\n",
563 _mesa_condcode_string(machine->CondCodes[0]),
564 _mesa_condcode_string(machine->CondCodes[1]),
565 _mesa_condcode_string(machine->CondCodes[2]),
566 _mesa_condcode_string(machine->CondCodes[3]));
567 #endif
568 }
569 }
570
571
572
573 /**
574 * Execute the given vertex/fragment program.
575 *
576 * \param ctx rendering context
577 * \param program the program to execute
578 * \param machine machine state (must be initialized)
579 * \return GL_TRUE if program completed or GL_FALSE if program executed KIL.
580 */
581 GLboolean
582 _mesa_execute_program(struct gl_context * ctx,
583 const struct gl_program *program,
584 struct gl_program_machine *machine)
585 {
586 const GLuint numInst = program->NumInstructions;
587 const GLuint maxExec = 65536;
588 GLuint pc, numExec = 0;
589
590 machine->CurProgram = program;
591
592 if (DEBUG_PROG) {
593 printf("execute program %u --------------------\n", program->Id);
594 }
595
596 if (program->Target == GL_VERTEX_PROGRAM_ARB) {
597 machine->EnvParams = ctx->VertexProgram.Parameters;
598 }
599 else {
600 machine->EnvParams = ctx->FragmentProgram.Parameters;
601 }
602
603 for (pc = 0; pc < numInst; pc++) {
604 const struct prog_instruction *inst = program->Instructions + pc;
605
606 if (DEBUG_PROG) {
607 _mesa_print_instruction(inst);
608 }
609
610 switch (inst->Opcode) {
611 case OPCODE_ABS:
612 {
613 GLfloat a[4], result[4];
614 fetch_vector4(&inst->SrcReg[0], machine, a);
615 result[0] = FABSF(a[0]);
616 result[1] = FABSF(a[1]);
617 result[2] = FABSF(a[2]);
618 result[3] = FABSF(a[3]);
619 store_vector4(inst, machine, result);
620 }
621 break;
622 case OPCODE_ADD:
623 {
624 GLfloat a[4], b[4], result[4];
625 fetch_vector4(&inst->SrcReg[0], machine, a);
626 fetch_vector4(&inst->SrcReg[1], machine, b);
627 result[0] = a[0] + b[0];
628 result[1] = a[1] + b[1];
629 result[2] = a[2] + b[2];
630 result[3] = a[3] + b[3];
631 store_vector4(inst, machine, result);
632 if (DEBUG_PROG) {
633 printf("ADD (%g %g %g %g) = (%g %g %g %g) + (%g %g %g %g)\n",
634 result[0], result[1], result[2], result[3],
635 a[0], a[1], a[2], a[3], b[0], b[1], b[2], b[3]);
636 }
637 }
638 break;
639 case OPCODE_ARL:
640 {
641 GLfloat t[4];
642 fetch_vector4(&inst->SrcReg[0], machine, t);
643 machine->AddressReg[0][0] = IFLOOR(t[0]);
644 if (DEBUG_PROG) {
645 printf("ARL %d\n", machine->AddressReg[0][0]);
646 }
647 }
648 break;
649 case OPCODE_BGNLOOP:
650 /* no-op */
651 ASSERT(program->Instructions[inst->BranchTarget].Opcode
652 == OPCODE_ENDLOOP);
653 break;
654 case OPCODE_ENDLOOP:
655 /* subtract 1 here since pc is incremented by for(pc) loop */
656 ASSERT(program->Instructions[inst->BranchTarget].Opcode
657 == OPCODE_BGNLOOP);
658 pc = inst->BranchTarget - 1; /* go to matching BNGLOOP */
659 break;
660 case OPCODE_BGNSUB: /* begin subroutine */
661 break;
662 case OPCODE_ENDSUB: /* end subroutine */
663 break;
664 case OPCODE_BRK: /* break out of loop (conditional) */
665 ASSERT(program->Instructions[inst->BranchTarget].Opcode
666 == OPCODE_ENDLOOP);
667 if (eval_condition(machine, inst)) {
668 /* break out of loop */
669 /* pc++ at end of for-loop will put us after the ENDLOOP inst */
670 pc = inst->BranchTarget;
671 }
672 break;
673 case OPCODE_CONT: /* continue loop (conditional) */
674 ASSERT(program->Instructions[inst->BranchTarget].Opcode
675 == OPCODE_ENDLOOP);
676 if (eval_condition(machine, inst)) {
677 /* continue at ENDLOOP */
678 /* Subtract 1 here since we'll do pc++ at end of for-loop */
679 pc = inst->BranchTarget - 1;
680 }
681 break;
682 case OPCODE_CAL: /* Call subroutine (conditional) */
683 if (eval_condition(machine, inst)) {
684 /* call the subroutine */
685 if (machine->StackDepth >= MAX_PROGRAM_CALL_DEPTH) {
686 return GL_TRUE; /* Per GL_NV_vertex_program2 spec */
687 }
688 machine->CallStack[machine->StackDepth++] = pc + 1; /* next inst */
689 /* Subtract 1 here since we'll do pc++ at end of for-loop */
690 pc = inst->BranchTarget - 1;
691 }
692 break;
693 case OPCODE_CMP:
694 {
695 GLfloat a[4], b[4], c[4], result[4];
696 fetch_vector4(&inst->SrcReg[0], machine, a);
697 fetch_vector4(&inst->SrcReg[1], machine, b);
698 fetch_vector4(&inst->SrcReg[2], machine, c);
699 result[0] = a[0] < 0.0F ? b[0] : c[0];
700 result[1] = a[1] < 0.0F ? b[1] : c[1];
701 result[2] = a[2] < 0.0F ? b[2] : c[2];
702 result[3] = a[3] < 0.0F ? b[3] : c[3];
703 store_vector4(inst, machine, result);
704 if (DEBUG_PROG) {
705 printf("CMP (%g %g %g %g) = (%g %g %g %g) < 0 ? (%g %g %g %g) : (%g %g %g %g)\n",
706 result[0], result[1], result[2], result[3],
707 a[0], a[1], a[2], a[3],
708 b[0], b[1], b[2], b[3],
709 c[0], c[1], c[2], c[3]);
710 }
711 }
712 break;
713 case OPCODE_COS:
714 {
715 GLfloat a[4], result[4];
716 fetch_vector1(&inst->SrcReg[0], machine, a);
717 result[0] = result[1] = result[2] = result[3]
718 = (GLfloat) cos(a[0]);
719 store_vector4(inst, machine, result);
720 }
721 break;
722 case OPCODE_DDX: /* Partial derivative with respect to X */
723 {
724 GLfloat result[4];
725 fetch_vector4_deriv(ctx, &inst->SrcReg[0], machine,
726 'X', result);
727 store_vector4(inst, machine, result);
728 }
729 break;
730 case OPCODE_DDY: /* Partial derivative with respect to Y */
731 {
732 GLfloat result[4];
733 fetch_vector4_deriv(ctx, &inst->SrcReg[0], machine,
734 'Y', result);
735 store_vector4(inst, machine, result);
736 }
737 break;
738 case OPCODE_DP2:
739 {
740 GLfloat a[4], b[4], result[4];
741 fetch_vector4(&inst->SrcReg[0], machine, a);
742 fetch_vector4(&inst->SrcReg[1], machine, b);
743 result[0] = result[1] = result[2] = result[3] = DOT2(a, b);
744 store_vector4(inst, machine, result);
745 if (DEBUG_PROG) {
746 printf("DP2 %g = (%g %g) . (%g %g)\n",
747 result[0], a[0], a[1], b[0], b[1]);
748 }
749 }
750 break;
751 case OPCODE_DP3:
752 {
753 GLfloat a[4], b[4], result[4];
754 fetch_vector4(&inst->SrcReg[0], machine, a);
755 fetch_vector4(&inst->SrcReg[1], machine, b);
756 result[0] = result[1] = result[2] = result[3] = DOT3(a, b);
757 store_vector4(inst, machine, result);
758 if (DEBUG_PROG) {
759 printf("DP3 %g = (%g %g %g) . (%g %g %g)\n",
760 result[0], a[0], a[1], a[2], b[0], b[1], b[2]);
761 }
762 }
763 break;
764 case OPCODE_DP4:
765 {
766 GLfloat a[4], b[4], result[4];
767 fetch_vector4(&inst->SrcReg[0], machine, a);
768 fetch_vector4(&inst->SrcReg[1], machine, b);
769 result[0] = result[1] = result[2] = result[3] = DOT4(a, b);
770 store_vector4(inst, machine, result);
771 if (DEBUG_PROG) {
772 printf("DP4 %g = (%g, %g %g %g) . (%g, %g %g %g)\n",
773 result[0], a[0], a[1], a[2], a[3],
774 b[0], b[1], b[2], b[3]);
775 }
776 }
777 break;
778 case OPCODE_DPH:
779 {
780 GLfloat a[4], b[4], result[4];
781 fetch_vector4(&inst->SrcReg[0], machine, a);
782 fetch_vector4(&inst->SrcReg[1], machine, b);
783 result[0] = result[1] = result[2] = result[3] = DOT3(a, b) + b[3];
784 store_vector4(inst, machine, result);
785 }
786 break;
787 case OPCODE_DST: /* Distance vector */
788 {
789 GLfloat a[4], b[4], result[4];
790 fetch_vector4(&inst->SrcReg[0], machine, a);
791 fetch_vector4(&inst->SrcReg[1], machine, b);
792 result[0] = 1.0F;
793 result[1] = a[1] * b[1];
794 result[2] = a[2];
795 result[3] = b[3];
796 store_vector4(inst, machine, result);
797 }
798 break;
799 case OPCODE_EXP:
800 {
801 GLfloat t[4], q[4], floor_t0;
802 fetch_vector1(&inst->SrcReg[0], machine, t);
803 floor_t0 = FLOORF(t[0]);
804 if (floor_t0 > FLT_MAX_EXP) {
805 SET_POS_INFINITY(q[0]);
806 SET_POS_INFINITY(q[2]);
807 }
808 else if (floor_t0 < FLT_MIN_EXP) {
809 q[0] = 0.0F;
810 q[2] = 0.0F;
811 }
812 else {
813 q[0] = LDEXPF(1.0, (int) floor_t0);
814 /* Note: GL_NV_vertex_program expects
815 * result.z = result.x * APPX(result.y)
816 * We do what the ARB extension says.
817 */
818 q[2] = (GLfloat) pow(2.0, t[0]);
819 }
820 q[1] = t[0] - floor_t0;
821 q[3] = 1.0F;
822 store_vector4( inst, machine, q );
823 }
824 break;
825 case OPCODE_EX2: /* Exponential base 2 */
826 {
827 GLfloat a[4], result[4], val;
828 fetch_vector1(&inst->SrcReg[0], machine, a);
829 val = (GLfloat) pow(2.0, a[0]);
830 /*
831 if (IS_INF_OR_NAN(val))
832 val = 1.0e10;
833 */
834 result[0] = result[1] = result[2] = result[3] = val;
835 store_vector4(inst, machine, result);
836 }
837 break;
838 case OPCODE_FLR:
839 {
840 GLfloat a[4], result[4];
841 fetch_vector4(&inst->SrcReg[0], machine, a);
842 result[0] = FLOORF(a[0]);
843 result[1] = FLOORF(a[1]);
844 result[2] = FLOORF(a[2]);
845 result[3] = FLOORF(a[3]);
846 store_vector4(inst, machine, result);
847 }
848 break;
849 case OPCODE_FRC:
850 {
851 GLfloat a[4], result[4];
852 fetch_vector4(&inst->SrcReg[0], machine, a);
853 result[0] = a[0] - FLOORF(a[0]);
854 result[1] = a[1] - FLOORF(a[1]);
855 result[2] = a[2] - FLOORF(a[2]);
856 result[3] = a[3] - FLOORF(a[3]);
857 store_vector4(inst, machine, result);
858 }
859 break;
860 case OPCODE_IF:
861 {
862 GLboolean cond;
863 ASSERT(program->Instructions[inst->BranchTarget].Opcode
864 == OPCODE_ELSE ||
865 program->Instructions[inst->BranchTarget].Opcode
866 == OPCODE_ENDIF);
867 /* eval condition */
868 if (inst->SrcReg[0].File != PROGRAM_UNDEFINED) {
869 GLfloat a[4];
870 fetch_vector1(&inst->SrcReg[0], machine, a);
871 cond = (a[0] != 0.0);
872 }
873 else {
874 cond = eval_condition(machine, inst);
875 }
876 if (DEBUG_PROG) {
877 printf("IF: %d\n", cond);
878 }
879 /* do if/else */
880 if (cond) {
881 /* do if-clause (just continue execution) */
882 }
883 else {
884 /* go to the instruction after ELSE or ENDIF */
885 assert(inst->BranchTarget >= 0);
886 pc = inst->BranchTarget;
887 }
888 }
889 break;
890 case OPCODE_ELSE:
891 /* goto ENDIF */
892 ASSERT(program->Instructions[inst->BranchTarget].Opcode
893 == OPCODE_ENDIF);
894 assert(inst->BranchTarget >= 0);
895 pc = inst->BranchTarget;
896 break;
897 case OPCODE_ENDIF:
898 /* nothing */
899 break;
900 case OPCODE_KIL_NV: /* NV_f_p only (conditional) */
901 if (eval_condition(machine, inst)) {
902 return GL_FALSE;
903 }
904 break;
905 case OPCODE_KIL: /* ARB_f_p only */
906 {
907 GLfloat a[4];
908 fetch_vector4(&inst->SrcReg[0], machine, a);
909 if (DEBUG_PROG) {
910 printf("KIL if (%g %g %g %g) <= 0.0\n",
911 a[0], a[1], a[2], a[3]);
912 }
913
914 if (a[0] < 0.0F || a[1] < 0.0F || a[2] < 0.0F || a[3] < 0.0F) {
915 return GL_FALSE;
916 }
917 }
918 break;
919 case OPCODE_LG2: /* log base 2 */
920 {
921 GLfloat a[4], result[4], val;
922 fetch_vector1(&inst->SrcReg[0], machine, a);
923 /* The fast LOG2 macro doesn't meet the precision requirements.
924 */
925 if (a[0] == 0.0F) {
926 val = -FLT_MAX;
927 }
928 else {
929 val = (float)(log(a[0]) * 1.442695F);
930 }
931 result[0] = result[1] = result[2] = result[3] = val;
932 store_vector4(inst, machine, result);
933 }
934 break;
935 case OPCODE_LIT:
936 {
937 const GLfloat epsilon = 1.0F / 256.0F; /* from NV VP spec */
938 GLfloat a[4], result[4];
939 fetch_vector4(&inst->SrcReg[0], machine, a);
940 a[0] = MAX2(a[0], 0.0F);
941 a[1] = MAX2(a[1], 0.0F);
942 /* XXX ARB version clamps a[3], NV version doesn't */
943 a[3] = CLAMP(a[3], -(128.0F - epsilon), (128.0F - epsilon));
944 result[0] = 1.0F;
945 result[1] = a[0];
946 /* XXX we could probably just use pow() here */
947 if (a[0] > 0.0F) {
948 if (a[1] == 0.0 && a[3] == 0.0)
949 result[2] = 1.0F;
950 else
951 result[2] = (GLfloat) pow(a[1], a[3]);
952 }
953 else {
954 result[2] = 0.0F;
955 }
956 result[3] = 1.0F;
957 store_vector4(inst, machine, result);
958 if (DEBUG_PROG) {
959 printf("LIT (%g %g %g %g) : (%g %g %g %g)\n",
960 result[0], result[1], result[2], result[3],
961 a[0], a[1], a[2], a[3]);
962 }
963 }
964 break;
965 case OPCODE_LOG:
966 {
967 GLfloat t[4], q[4], abs_t0;
968 fetch_vector1(&inst->SrcReg[0], machine, t);
969 abs_t0 = FABSF(t[0]);
970 if (abs_t0 != 0.0F) {
971 if (IS_INF_OR_NAN(abs_t0))
972 {
973 SET_POS_INFINITY(q[0]);
974 q[1] = 1.0F;
975 SET_POS_INFINITY(q[2]);
976 }
977 else {
978 int exponent;
979 GLfloat mantissa = FREXPF(t[0], &exponent);
980 q[0] = (GLfloat) (exponent - 1);
981 q[1] = (GLfloat) (2.0 * mantissa); /* map [.5, 1) -> [1, 2) */
982
983 /* The fast LOG2 macro doesn't meet the precision
984 * requirements.
985 */
986 q[2] = (float)(log(t[0]) * 1.442695F);
987 }
988 }
989 else {
990 SET_NEG_INFINITY(q[0]);
991 q[1] = 1.0F;
992 SET_NEG_INFINITY(q[2]);
993 }
994 q[3] = 1.0;
995 store_vector4(inst, machine, q);
996 }
997 break;
998 case OPCODE_LRP:
999 {
1000 GLfloat a[4], b[4], c[4], result[4];
1001 fetch_vector4(&inst->SrcReg[0], machine, a);
1002 fetch_vector4(&inst->SrcReg[1], machine, b);
1003 fetch_vector4(&inst->SrcReg[2], machine, c);
1004 result[0] = a[0] * b[0] + (1.0F - a[0]) * c[0];
1005 result[1] = a[1] * b[1] + (1.0F - a[1]) * c[1];
1006 result[2] = a[2] * b[2] + (1.0F - a[2]) * c[2];
1007 result[3] = a[3] * b[3] + (1.0F - a[3]) * c[3];
1008 store_vector4(inst, machine, result);
1009 if (DEBUG_PROG) {
1010 printf("LRP (%g %g %g %g) = (%g %g %g %g), "
1011 "(%g %g %g %g), (%g %g %g %g)\n",
1012 result[0], result[1], result[2], result[3],
1013 a[0], a[1], a[2], a[3],
1014 b[0], b[1], b[2], b[3], c[0], c[1], c[2], c[3]);
1015 }
1016 }
1017 break;
1018 case OPCODE_MAD:
1019 {
1020 GLfloat a[4], b[4], c[4], result[4];
1021 fetch_vector4(&inst->SrcReg[0], machine, a);
1022 fetch_vector4(&inst->SrcReg[1], machine, b);
1023 fetch_vector4(&inst->SrcReg[2], machine, c);
1024 result[0] = a[0] * b[0] + c[0];
1025 result[1] = a[1] * b[1] + c[1];
1026 result[2] = a[2] * b[2] + c[2];
1027 result[3] = a[3] * b[3] + c[3];
1028 store_vector4(inst, machine, result);
1029 if (DEBUG_PROG) {
1030 printf("MAD (%g %g %g %g) = (%g %g %g %g) * "
1031 "(%g %g %g %g) + (%g %g %g %g)\n",
1032 result[0], result[1], result[2], result[3],
1033 a[0], a[1], a[2], a[3],
1034 b[0], b[1], b[2], b[3], c[0], c[1], c[2], c[3]);
1035 }
1036 }
1037 break;
1038 case OPCODE_MAX:
1039 {
1040 GLfloat a[4], b[4], result[4];
1041 fetch_vector4(&inst->SrcReg[0], machine, a);
1042 fetch_vector4(&inst->SrcReg[1], machine, b);
1043 result[0] = MAX2(a[0], b[0]);
1044 result[1] = MAX2(a[1], b[1]);
1045 result[2] = MAX2(a[2], b[2]);
1046 result[3] = MAX2(a[3], b[3]);
1047 store_vector4(inst, machine, result);
1048 if (DEBUG_PROG) {
1049 printf("MAX (%g %g %g %g) = (%g %g %g %g), (%g %g %g %g)\n",
1050 result[0], result[1], result[2], result[3],
1051 a[0], a[1], a[2], a[3], b[0], b[1], b[2], b[3]);
1052 }
1053 }
1054 break;
1055 case OPCODE_MIN:
1056 {
1057 GLfloat a[4], b[4], result[4];
1058 fetch_vector4(&inst->SrcReg[0], machine, a);
1059 fetch_vector4(&inst->SrcReg[1], machine, b);
1060 result[0] = MIN2(a[0], b[0]);
1061 result[1] = MIN2(a[1], b[1]);
1062 result[2] = MIN2(a[2], b[2]);
1063 result[3] = MIN2(a[3], b[3]);
1064 store_vector4(inst, machine, result);
1065 }
1066 break;
1067 case OPCODE_MOV:
1068 {
1069 GLfloat result[4];
1070 fetch_vector4(&inst->SrcReg[0], machine, result);
1071 store_vector4(inst, machine, result);
1072 if (DEBUG_PROG) {
1073 printf("MOV (%g %g %g %g)\n",
1074 result[0], result[1], result[2], result[3]);
1075 }
1076 }
1077 break;
1078 case OPCODE_MUL:
1079 {
1080 GLfloat a[4], b[4], result[4];
1081 fetch_vector4(&inst->SrcReg[0], machine, a);
1082 fetch_vector4(&inst->SrcReg[1], machine, b);
1083 result[0] = a[0] * b[0];
1084 result[1] = a[1] * b[1];
1085 result[2] = a[2] * b[2];
1086 result[3] = a[3] * b[3];
1087 store_vector4(inst, machine, result);
1088 if (DEBUG_PROG) {
1089 printf("MUL (%g %g %g %g) = (%g %g %g %g) * (%g %g %g %g)\n",
1090 result[0], result[1], result[2], result[3],
1091 a[0], a[1], a[2], a[3], b[0], b[1], b[2], b[3]);
1092 }
1093 }
1094 break;
1095 case OPCODE_NOISE1:
1096 {
1097 GLfloat a[4], result[4];
1098 fetch_vector1(&inst->SrcReg[0], machine, a);
1099 result[0] =
1100 result[1] =
1101 result[2] =
1102 result[3] = _mesa_noise1(a[0]);
1103 store_vector4(inst, machine, result);
1104 }
1105 break;
1106 case OPCODE_NOISE2:
1107 {
1108 GLfloat a[4], result[4];
1109 fetch_vector4(&inst->SrcReg[0], machine, a);
1110 result[0] =
1111 result[1] =
1112 result[2] = result[3] = _mesa_noise2(a[0], a[1]);
1113 store_vector4(inst, machine, result);
1114 }
1115 break;
1116 case OPCODE_NOISE3:
1117 {
1118 GLfloat a[4], result[4];
1119 fetch_vector4(&inst->SrcReg[0], machine, a);
1120 result[0] =
1121 result[1] =
1122 result[2] =
1123 result[3] = _mesa_noise3(a[0], a[1], a[2]);
1124 store_vector4(inst, machine, result);
1125 }
1126 break;
1127 case OPCODE_NOISE4:
1128 {
1129 GLfloat a[4], result[4];
1130 fetch_vector4(&inst->SrcReg[0], machine, a);
1131 result[0] =
1132 result[1] =
1133 result[2] =
1134 result[3] = _mesa_noise4(a[0], a[1], a[2], a[3]);
1135 store_vector4(inst, machine, result);
1136 }
1137 break;
1138 case OPCODE_NOP:
1139 break;
1140 case OPCODE_PK2H: /* pack two 16-bit floats in one 32-bit float */
1141 {
1142 GLfloat a[4];
1143 GLuint result[4];
1144 GLhalfNV hx, hy;
1145 fetch_vector4(&inst->SrcReg[0], machine, a);
1146 hx = _mesa_float_to_half(a[0]);
1147 hy = _mesa_float_to_half(a[1]);
1148 result[0] =
1149 result[1] =
1150 result[2] =
1151 result[3] = hx | (hy << 16);
1152 store_vector4ui(inst, machine, result);
1153 }
1154 break;
1155 case OPCODE_PK2US: /* pack two GLushorts into one 32-bit float */
1156 {
1157 GLfloat a[4];
1158 GLuint result[4], usx, usy;
1159 fetch_vector4(&inst->SrcReg[0], machine, a);
1160 a[0] = CLAMP(a[0], 0.0F, 1.0F);
1161 a[1] = CLAMP(a[1], 0.0F, 1.0F);
1162 usx = F_TO_I(a[0] * 65535.0F);
1163 usy = F_TO_I(a[1] * 65535.0F);
1164 result[0] =
1165 result[1] =
1166 result[2] =
1167 result[3] = usx | (usy << 16);
1168 store_vector4ui(inst, machine, result);
1169 }
1170 break;
1171 case OPCODE_PK4B: /* pack four GLbytes into one 32-bit float */
1172 {
1173 GLfloat a[4];
1174 GLuint result[4], ubx, uby, ubz, ubw;
1175 fetch_vector4(&inst->SrcReg[0], machine, a);
1176 a[0] = CLAMP(a[0], -128.0F / 127.0F, 1.0F);
1177 a[1] = CLAMP(a[1], -128.0F / 127.0F, 1.0F);
1178 a[2] = CLAMP(a[2], -128.0F / 127.0F, 1.0F);
1179 a[3] = CLAMP(a[3], -128.0F / 127.0F, 1.0F);
1180 ubx = F_TO_I(127.0F * a[0] + 128.0F);
1181 uby = F_TO_I(127.0F * a[1] + 128.0F);
1182 ubz = F_TO_I(127.0F * a[2] + 128.0F);
1183 ubw = F_TO_I(127.0F * a[3] + 128.0F);
1184 result[0] =
1185 result[1] =
1186 result[2] =
1187 result[3] = ubx | (uby << 8) | (ubz << 16) | (ubw << 24);
1188 store_vector4ui(inst, machine, result);
1189 }
1190 break;
1191 case OPCODE_PK4UB: /* pack four GLubytes into one 32-bit float */
1192 {
1193 GLfloat a[4];
1194 GLuint result[4], ubx, uby, ubz, ubw;
1195 fetch_vector4(&inst->SrcReg[0], machine, a);
1196 a[0] = CLAMP(a[0], 0.0F, 1.0F);
1197 a[1] = CLAMP(a[1], 0.0F, 1.0F);
1198 a[2] = CLAMP(a[2], 0.0F, 1.0F);
1199 a[3] = CLAMP(a[3], 0.0F, 1.0F);
1200 ubx = F_TO_I(255.0F * a[0]);
1201 uby = F_TO_I(255.0F * a[1]);
1202 ubz = F_TO_I(255.0F * a[2]);
1203 ubw = F_TO_I(255.0F * a[3]);
1204 result[0] =
1205 result[1] =
1206 result[2] =
1207 result[3] = ubx | (uby << 8) | (ubz << 16) | (ubw << 24);
1208 store_vector4ui(inst, machine, result);
1209 }
1210 break;
1211 case OPCODE_POW:
1212 {
1213 GLfloat a[4], b[4], result[4];
1214 fetch_vector1(&inst->SrcReg[0], machine, a);
1215 fetch_vector1(&inst->SrcReg[1], machine, b);
1216 result[0] = result[1] = result[2] = result[3]
1217 = (GLfloat) pow(a[0], b[0]);
1218 store_vector4(inst, machine, result);
1219 }
1220 break;
1221
1222 case OPCODE_RCP:
1223 {
1224 GLfloat a[4], result[4];
1225 fetch_vector1(&inst->SrcReg[0], machine, a);
1226 if (DEBUG_PROG) {
1227 if (a[0] == 0)
1228 printf("RCP(0)\n");
1229 else if (IS_INF_OR_NAN(a[0]))
1230 printf("RCP(inf)\n");
1231 }
1232 result[0] = result[1] = result[2] = result[3] = 1.0F / a[0];
1233 store_vector4(inst, machine, result);
1234 }
1235 break;
1236 case OPCODE_RET: /* return from subroutine (conditional) */
1237 if (eval_condition(machine, inst)) {
1238 if (machine->StackDepth == 0) {
1239 return GL_TRUE; /* Per GL_NV_vertex_program2 spec */
1240 }
1241 /* subtract one because of pc++ in the for loop */
1242 pc = machine->CallStack[--machine->StackDepth] - 1;
1243 }
1244 break;
1245 case OPCODE_RFL: /* reflection vector */
1246 {
1247 GLfloat axis[4], dir[4], result[4], tmpX, tmpW;
1248 fetch_vector4(&inst->SrcReg[0], machine, axis);
1249 fetch_vector4(&inst->SrcReg[1], machine, dir);
1250 tmpW = DOT3(axis, axis);
1251 tmpX = (2.0F * DOT3(axis, dir)) / tmpW;
1252 result[0] = tmpX * axis[0] - dir[0];
1253 result[1] = tmpX * axis[1] - dir[1];
1254 result[2] = tmpX * axis[2] - dir[2];
1255 /* result[3] is never written! XXX enforce in parser! */
1256 store_vector4(inst, machine, result);
1257 }
1258 break;
1259 case OPCODE_RSQ: /* 1 / sqrt() */
1260 {
1261 GLfloat a[4], result[4];
1262 fetch_vector1(&inst->SrcReg[0], machine, a);
1263 a[0] = FABSF(a[0]);
1264 result[0] = result[1] = result[2] = result[3] = INV_SQRTF(a[0]);
1265 store_vector4(inst, machine, result);
1266 if (DEBUG_PROG) {
1267 printf("RSQ %g = 1/sqrt(|%g|)\n", result[0], a[0]);
1268 }
1269 }
1270 break;
1271 case OPCODE_SCS: /* sine and cos */
1272 {
1273 GLfloat a[4], result[4];
1274 fetch_vector1(&inst->SrcReg[0], machine, a);
1275 result[0] = (GLfloat) cos(a[0]);
1276 result[1] = (GLfloat) sin(a[0]);
1277 result[2] = 0.0; /* undefined! */
1278 result[3] = 0.0; /* undefined! */
1279 store_vector4(inst, machine, result);
1280 }
1281 break;
1282 case OPCODE_SEQ: /* set on equal */
1283 {
1284 GLfloat a[4], b[4], result[4];
1285 fetch_vector4(&inst->SrcReg[0], machine, a);
1286 fetch_vector4(&inst->SrcReg[1], machine, b);
1287 result[0] = (a[0] == b[0]) ? 1.0F : 0.0F;
1288 result[1] = (a[1] == b[1]) ? 1.0F : 0.0F;
1289 result[2] = (a[2] == b[2]) ? 1.0F : 0.0F;
1290 result[3] = (a[3] == b[3]) ? 1.0F : 0.0F;
1291 store_vector4(inst, machine, result);
1292 if (DEBUG_PROG) {
1293 printf("SEQ (%g %g %g %g) = (%g %g %g %g) == (%g %g %g %g)\n",
1294 result[0], result[1], result[2], result[3],
1295 a[0], a[1], a[2], a[3],
1296 b[0], b[1], b[2], b[3]);
1297 }
1298 }
1299 break;
1300 case OPCODE_SFL: /* set false, operands ignored */
1301 {
1302 static const GLfloat result[4] = { 0.0F, 0.0F, 0.0F, 0.0F };
1303 store_vector4(inst, machine, result);
1304 }
1305 break;
1306 case OPCODE_SGE: /* set on greater or equal */
1307 {
1308 GLfloat a[4], b[4], result[4];
1309 fetch_vector4(&inst->SrcReg[0], machine, a);
1310 fetch_vector4(&inst->SrcReg[1], machine, b);
1311 result[0] = (a[0] >= b[0]) ? 1.0F : 0.0F;
1312 result[1] = (a[1] >= b[1]) ? 1.0F : 0.0F;
1313 result[2] = (a[2] >= b[2]) ? 1.0F : 0.0F;
1314 result[3] = (a[3] >= b[3]) ? 1.0F : 0.0F;
1315 store_vector4(inst, machine, result);
1316 if (DEBUG_PROG) {
1317 printf("SGE (%g %g %g %g) = (%g %g %g %g) >= (%g %g %g %g)\n",
1318 result[0], result[1], result[2], result[3],
1319 a[0], a[1], a[2], a[3],
1320 b[0], b[1], b[2], b[3]);
1321 }
1322 }
1323 break;
1324 case OPCODE_SGT: /* set on greater */
1325 {
1326 GLfloat a[4], b[4], result[4];
1327 fetch_vector4(&inst->SrcReg[0], machine, a);
1328 fetch_vector4(&inst->SrcReg[1], machine, b);
1329 result[0] = (a[0] > b[0]) ? 1.0F : 0.0F;
1330 result[1] = (a[1] > b[1]) ? 1.0F : 0.0F;
1331 result[2] = (a[2] > b[2]) ? 1.0F : 0.0F;
1332 result[3] = (a[3] > b[3]) ? 1.0F : 0.0F;
1333 store_vector4(inst, machine, result);
1334 if (DEBUG_PROG) {
1335 printf("SGT (%g %g %g %g) = (%g %g %g %g) > (%g %g %g %g)\n",
1336 result[0], result[1], result[2], result[3],
1337 a[0], a[1], a[2], a[3],
1338 b[0], b[1], b[2], b[3]);
1339 }
1340 }
1341 break;
1342 case OPCODE_SIN:
1343 {
1344 GLfloat a[4], result[4];
1345 fetch_vector1(&inst->SrcReg[0], machine, a);
1346 result[0] = result[1] = result[2] = result[3]
1347 = (GLfloat) sin(a[0]);
1348 store_vector4(inst, machine, result);
1349 }
1350 break;
1351 case OPCODE_SLE: /* set on less or equal */
1352 {
1353 GLfloat a[4], b[4], result[4];
1354 fetch_vector4(&inst->SrcReg[0], machine, a);
1355 fetch_vector4(&inst->SrcReg[1], machine, b);
1356 result[0] = (a[0] <= b[0]) ? 1.0F : 0.0F;
1357 result[1] = (a[1] <= b[1]) ? 1.0F : 0.0F;
1358 result[2] = (a[2] <= b[2]) ? 1.0F : 0.0F;
1359 result[3] = (a[3] <= b[3]) ? 1.0F : 0.0F;
1360 store_vector4(inst, machine, result);
1361 if (DEBUG_PROG) {
1362 printf("SLE (%g %g %g %g) = (%g %g %g %g) <= (%g %g %g %g)\n",
1363 result[0], result[1], result[2], result[3],
1364 a[0], a[1], a[2], a[3],
1365 b[0], b[1], b[2], b[3]);
1366 }
1367 }
1368 break;
1369 case OPCODE_SLT: /* set on less */
1370 {
1371 GLfloat a[4], b[4], result[4];
1372 fetch_vector4(&inst->SrcReg[0], machine, a);
1373 fetch_vector4(&inst->SrcReg[1], machine, b);
1374 result[0] = (a[0] < b[0]) ? 1.0F : 0.0F;
1375 result[1] = (a[1] < b[1]) ? 1.0F : 0.0F;
1376 result[2] = (a[2] < b[2]) ? 1.0F : 0.0F;
1377 result[3] = (a[3] < b[3]) ? 1.0F : 0.0F;
1378 store_vector4(inst, machine, result);
1379 if (DEBUG_PROG) {
1380 printf("SLT (%g %g %g %g) = (%g %g %g %g) < (%g %g %g %g)\n",
1381 result[0], result[1], result[2], result[3],
1382 a[0], a[1], a[2], a[3],
1383 b[0], b[1], b[2], b[3]);
1384 }
1385 }
1386 break;
1387 case OPCODE_SNE: /* set on not equal */
1388 {
1389 GLfloat a[4], b[4], result[4];
1390 fetch_vector4(&inst->SrcReg[0], machine, a);
1391 fetch_vector4(&inst->SrcReg[1], machine, b);
1392 result[0] = (a[0] != b[0]) ? 1.0F : 0.0F;
1393 result[1] = (a[1] != b[1]) ? 1.0F : 0.0F;
1394 result[2] = (a[2] != b[2]) ? 1.0F : 0.0F;
1395 result[3] = (a[3] != b[3]) ? 1.0F : 0.0F;
1396 store_vector4(inst, machine, result);
1397 if (DEBUG_PROG) {
1398 printf("SNE (%g %g %g %g) = (%g %g %g %g) != (%g %g %g %g)\n",
1399 result[0], result[1], result[2], result[3],
1400 a[0], a[1], a[2], a[3],
1401 b[0], b[1], b[2], b[3]);
1402 }
1403 }
1404 break;
1405 case OPCODE_SSG: /* set sign (-1, 0 or +1) */
1406 {
1407 GLfloat a[4], result[4];
1408 fetch_vector4(&inst->SrcReg[0], machine, a);
1409 result[0] = (GLfloat) ((a[0] > 0.0F) - (a[0] < 0.0F));
1410 result[1] = (GLfloat) ((a[1] > 0.0F) - (a[1] < 0.0F));
1411 result[2] = (GLfloat) ((a[2] > 0.0F) - (a[2] < 0.0F));
1412 result[3] = (GLfloat) ((a[3] > 0.0F) - (a[3] < 0.0F));
1413 store_vector4(inst, machine, result);
1414 }
1415 break;
1416 case OPCODE_STR: /* set true, operands ignored */
1417 {
1418 static const GLfloat result[4] = { 1.0F, 1.0F, 1.0F, 1.0F };
1419 store_vector4(inst, machine, result);
1420 }
1421 break;
1422 case OPCODE_SUB:
1423 {
1424 GLfloat a[4], b[4], result[4];
1425 fetch_vector4(&inst->SrcReg[0], machine, a);
1426 fetch_vector4(&inst->SrcReg[1], machine, b);
1427 result[0] = a[0] - b[0];
1428 result[1] = a[1] - b[1];
1429 result[2] = a[2] - b[2];
1430 result[3] = a[3] - b[3];
1431 store_vector4(inst, machine, result);
1432 if (DEBUG_PROG) {
1433 printf("SUB (%g %g %g %g) = (%g %g %g %g) - (%g %g %g %g)\n",
1434 result[0], result[1], result[2], result[3],
1435 a[0], a[1], a[2], a[3], b[0], b[1], b[2], b[3]);
1436 }
1437 }
1438 break;
1439 case OPCODE_SWZ: /* extended swizzle */
1440 {
1441 const struct prog_src_register *source = &inst->SrcReg[0];
1442 const GLfloat *src = get_src_register_pointer(source, machine);
1443 GLfloat result[4];
1444 GLuint i;
1445 for (i = 0; i < 4; i++) {
1446 const GLuint swz = GET_SWZ(source->Swizzle, i);
1447 if (swz == SWIZZLE_ZERO)
1448 result[i] = 0.0;
1449 else if (swz == SWIZZLE_ONE)
1450 result[i] = 1.0;
1451 else {
1452 ASSERT(swz >= 0);
1453 ASSERT(swz <= 3);
1454 result[i] = src[swz];
1455 }
1456 if (source->Negate & (1 << i))
1457 result[i] = -result[i];
1458 }
1459 store_vector4(inst, machine, result);
1460 }
1461 break;
1462 case OPCODE_TEX: /* Both ARB and NV frag prog */
1463 /* Simple texel lookup */
1464 {
1465 GLfloat texcoord[4], color[4];
1466 fetch_vector4(&inst->SrcReg[0], machine, texcoord);
1467
1468 /* For TEX, texcoord.Q should not be used and its value should not
1469 * matter (at most, we pass coord.xyz to texture3D() in GLSL).
1470 * Set Q=1 so that FetchTexelDeriv() doesn't get a garbage value
1471 * which is effectively what happens when the texcoord swizzle
1472 * is .xyzz
1473 */
1474 texcoord[3] = 1.0f;
1475
1476 fetch_texel(ctx, machine, inst, texcoord, 0.0, color);
1477
1478 if (DEBUG_PROG) {
1479 printf("TEX (%g, %g, %g, %g) = texture[%d][%g, %g, %g, %g]\n",
1480 color[0], color[1], color[2], color[3],
1481 inst->TexSrcUnit,
1482 texcoord[0], texcoord[1], texcoord[2], texcoord[3]);
1483 }
1484 store_vector4(inst, machine, color);
1485 }
1486 break;
1487 case OPCODE_TXB: /* GL_ARB_fragment_program only */
1488 /* Texel lookup with LOD bias */
1489 {
1490 GLfloat texcoord[4], color[4], lodBias;
1491
1492 fetch_vector4(&inst->SrcReg[0], machine, texcoord);
1493
1494 /* texcoord[3] is the bias to add to lambda */
1495 lodBias = texcoord[3];
1496
1497 fetch_texel(ctx, machine, inst, texcoord, lodBias, color);
1498
1499 if (DEBUG_PROG) {
1500 printf("TXB (%g, %g, %g, %g) = texture[%d][%g %g %g %g]"
1501 " bias %g\n",
1502 color[0], color[1], color[2], color[3],
1503 inst->TexSrcUnit,
1504 texcoord[0],
1505 texcoord[1],
1506 texcoord[2],
1507 texcoord[3],
1508 lodBias);
1509 }
1510
1511 store_vector4(inst, machine, color);
1512 }
1513 break;
1514 case OPCODE_TXD: /* GL_NV_fragment_program only */
1515 /* Texture lookup w/ partial derivatives for LOD */
1516 {
1517 GLfloat texcoord[4], dtdx[4], dtdy[4], color[4];
1518 fetch_vector4(&inst->SrcReg[0], machine, texcoord);
1519 fetch_vector4(&inst->SrcReg[1], machine, dtdx);
1520 fetch_vector4(&inst->SrcReg[2], machine, dtdy);
1521 machine->FetchTexelDeriv(ctx, texcoord, dtdx, dtdy,
1522 0.0, /* lodBias */
1523 inst->TexSrcUnit, color);
1524 store_vector4(inst, machine, color);
1525 }
1526 break;
1527 case OPCODE_TXL:
1528 /* Texel lookup with explicit LOD */
1529 {
1530 GLfloat texcoord[4], color[4], lod;
1531
1532 fetch_vector4(&inst->SrcReg[0], machine, texcoord);
1533
1534 /* texcoord[3] is the LOD */
1535 lod = texcoord[3];
1536
1537 machine->FetchTexelLod(ctx, texcoord, lod,
1538 machine->Samplers[inst->TexSrcUnit], color);
1539
1540 store_vector4(inst, machine, color);
1541 }
1542 break;
1543 case OPCODE_TXP: /* GL_ARB_fragment_program only */
1544 /* Texture lookup w/ projective divide */
1545 {
1546 GLfloat texcoord[4], color[4];
1547
1548 fetch_vector4(&inst->SrcReg[0], machine, texcoord);
1549 /* Not so sure about this test - if texcoord[3] is
1550 * zero, we'd probably be fine except for an ASSERT in
1551 * IROUND_POS() which gets triggered by the inf values created.
1552 */
1553 if (texcoord[3] != 0.0) {
1554 texcoord[0] /= texcoord[3];
1555 texcoord[1] /= texcoord[3];
1556 texcoord[2] /= texcoord[3];
1557 }
1558
1559 fetch_texel(ctx, machine, inst, texcoord, 0.0, color);
1560
1561 store_vector4(inst, machine, color);
1562 }
1563 break;
1564 case OPCODE_TXP_NV: /* GL_NV_fragment_program only */
1565 /* Texture lookup w/ projective divide, as above, but do not
1566 * do the divide by w if sampling from a cube map.
1567 */
1568 {
1569 GLfloat texcoord[4], color[4];
1570
1571 fetch_vector4(&inst->SrcReg[0], machine, texcoord);
1572 if (inst->TexSrcTarget != TEXTURE_CUBE_INDEX &&
1573 texcoord[3] != 0.0) {
1574 texcoord[0] /= texcoord[3];
1575 texcoord[1] /= texcoord[3];
1576 texcoord[2] /= texcoord[3];
1577 }
1578
1579 fetch_texel(ctx, machine, inst, texcoord, 0.0, color);
1580
1581 store_vector4(inst, machine, color);
1582 }
1583 break;
1584 case OPCODE_TRUNC: /* truncate toward zero */
1585 {
1586 GLfloat a[4], result[4];
1587 fetch_vector4(&inst->SrcReg[0], machine, a);
1588 result[0] = (GLfloat) (GLint) a[0];
1589 result[1] = (GLfloat) (GLint) a[1];
1590 result[2] = (GLfloat) (GLint) a[2];
1591 result[3] = (GLfloat) (GLint) a[3];
1592 store_vector4(inst, machine, result);
1593 }
1594 break;
1595 case OPCODE_UP2H: /* unpack two 16-bit floats */
1596 {
1597 const GLuint raw = fetch_vector1ui(&inst->SrcReg[0], machine);
1598 GLfloat result[4];
1599 GLushort hx, hy;
1600 hx = raw & 0xffff;
1601 hy = raw >> 16;
1602 result[0] = result[2] = _mesa_half_to_float(hx);
1603 result[1] = result[3] = _mesa_half_to_float(hy);
1604 store_vector4(inst, machine, result);
1605 }
1606 break;
1607 case OPCODE_UP2US: /* unpack two GLushorts */
1608 {
1609 const GLuint raw = fetch_vector1ui(&inst->SrcReg[0], machine);
1610 GLfloat result[4];
1611 GLushort usx, usy;
1612 usx = raw & 0xffff;
1613 usy = raw >> 16;
1614 result[0] = result[2] = usx * (1.0f / 65535.0f);
1615 result[1] = result[3] = usy * (1.0f / 65535.0f);
1616 store_vector4(inst, machine, result);
1617 }
1618 break;
1619 case OPCODE_UP4B: /* unpack four GLbytes */
1620 {
1621 const GLuint raw = fetch_vector1ui(&inst->SrcReg[0], machine);
1622 GLfloat result[4];
1623 result[0] = (((raw >> 0) & 0xff) - 128) / 127.0F;
1624 result[1] = (((raw >> 8) & 0xff) - 128) / 127.0F;
1625 result[2] = (((raw >> 16) & 0xff) - 128) / 127.0F;
1626 result[3] = (((raw >> 24) & 0xff) - 128) / 127.0F;
1627 store_vector4(inst, machine, result);
1628 }
1629 break;
1630 case OPCODE_UP4UB: /* unpack four GLubytes */
1631 {
1632 const GLuint raw = fetch_vector1ui(&inst->SrcReg[0], machine);
1633 GLfloat result[4];
1634 result[0] = ((raw >> 0) & 0xff) / 255.0F;
1635 result[1] = ((raw >> 8) & 0xff) / 255.0F;
1636 result[2] = ((raw >> 16) & 0xff) / 255.0F;
1637 result[3] = ((raw >> 24) & 0xff) / 255.0F;
1638 store_vector4(inst, machine, result);
1639 }
1640 break;
1641 case OPCODE_XPD: /* cross product */
1642 {
1643 GLfloat a[4], b[4], result[4];
1644 fetch_vector4(&inst->SrcReg[0], machine, a);
1645 fetch_vector4(&inst->SrcReg[1], machine, b);
1646 result[0] = a[1] * b[2] - a[2] * b[1];
1647 result[1] = a[2] * b[0] - a[0] * b[2];
1648 result[2] = a[0] * b[1] - a[1] * b[0];
1649 result[3] = 1.0;
1650 store_vector4(inst, machine, result);
1651 if (DEBUG_PROG) {
1652 printf("XPD (%g %g %g %g) = (%g %g %g) X (%g %g %g)\n",
1653 result[0], result[1], result[2], result[3],
1654 a[0], a[1], a[2], b[0], b[1], b[2]);
1655 }
1656 }
1657 break;
1658 case OPCODE_X2D: /* 2-D matrix transform */
1659 {
1660 GLfloat a[4], b[4], c[4], result[4];
1661 fetch_vector4(&inst->SrcReg[0], machine, a);
1662 fetch_vector4(&inst->SrcReg[1], machine, b);
1663 fetch_vector4(&inst->SrcReg[2], machine, c);
1664 result[0] = a[0] + b[0] * c[0] + b[1] * c[1];
1665 result[1] = a[1] + b[0] * c[2] + b[1] * c[3];
1666 result[2] = a[2] + b[0] * c[0] + b[1] * c[1];
1667 result[3] = a[3] + b[0] * c[2] + b[1] * c[3];
1668 store_vector4(inst, machine, result);
1669 }
1670 break;
1671 case OPCODE_END:
1672 return GL_TRUE;
1673 default:
1674 _mesa_problem(ctx, "Bad opcode %d in _mesa_execute_program",
1675 inst->Opcode);
1676 return GL_TRUE; /* return value doesn't matter */
1677 }
1678
1679 numExec++;
1680 if (numExec > maxExec) {
1681 static GLboolean reported = GL_FALSE;
1682 if (!reported) {
1683 _mesa_problem(ctx, "Infinite loop detected in fragment program");
1684 reported = GL_TRUE;
1685 }
1686 return GL_TRUE;
1687 }
1688
1689 } /* for pc */
1690
1691 return GL_TRUE;
1692 }