mesa: re-implement _mesa_ProgramEnvParameter4fvARB() with memcpy()
[mesa.git] / src / mesa / shader / prog_execute.c
1 /*
2 * Mesa 3-D graphics library
3 * Version: 7.3
4 *
5 * Copyright (C) 1999-2008 Brian Paul All Rights Reserved.
6 *
7 * Permission is hereby granted, free of charge, to any person obtaining a
8 * copy of this software and associated documentation files (the "Software"),
9 * to deal in the Software without restriction, including without limitation
10 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
11 * and/or sell copies of the Software, and to permit persons to whom the
12 * Software is furnished to do so, subject to the following conditions:
13 *
14 * The above copyright notice and this permission notice shall be included
15 * in all copies or substantial portions of the Software.
16 *
17 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
18 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
19 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
20 * BRIAN PAUL BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN
21 * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
22 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
23 */
24
25 /**
26 * \file prog_execute.c
27 * Software interpreter for vertex/fragment programs.
28 * \author Brian Paul
29 */
30
31 /*
32 * NOTE: we do everything in single-precision floating point; we don't
33 * currently observe the single/half/fixed-precision qualifiers.
34 *
35 */
36
37
38 #include "main/glheader.h"
39 #include "main/colormac.h"
40 #include "main/context.h"
41 #include "prog_execute.h"
42 #include "prog_instruction.h"
43 #include "prog_parameter.h"
44 #include "prog_print.h"
45 #include "prog_noise.h"
46
47
48 /* debug predicate */
49 #define DEBUG_PROG 0
50
51
52 /**
53 * Set x to positive or negative infinity.
54 */
55 #if defined(USE_IEEE) || defined(_WIN32)
56 #define SET_POS_INFINITY(x) ( *((GLuint *) (void *)&x) = 0x7F800000 )
57 #define SET_NEG_INFINITY(x) ( *((GLuint *) (void *)&x) = 0xFF800000 )
58 #elif defined(VMS)
59 #define SET_POS_INFINITY(x) x = __MAXFLOAT
60 #define SET_NEG_INFINITY(x) x = -__MAXFLOAT
61 #else
62 #define SET_POS_INFINITY(x) x = (GLfloat) HUGE_VAL
63 #define SET_NEG_INFINITY(x) x = (GLfloat) -HUGE_VAL
64 #endif
65
66 #define SET_FLOAT_BITS(x, bits) ((fi_type *) (void *) &(x))->i = bits
67
68
69 static const GLfloat ZeroVec[4] = { 0.0F, 0.0F, 0.0F, 0.0F };
70
71
72
73 /**
74 * Return a pointer to the 4-element float vector specified by the given
75 * source register.
76 */
77 static INLINE const GLfloat *
78 get_src_register_pointer(const struct prog_src_register *source,
79 const struct gl_program_machine *machine)
80 {
81 const struct gl_program *prog = machine->CurProgram;
82 GLint reg = source->Index;
83
84 if (source->RelAddr) {
85 /* add address register value to src index/offset */
86 reg += machine->AddressReg[0][0];
87 if (reg < 0) {
88 return ZeroVec;
89 }
90 }
91
92 switch (source->File) {
93 case PROGRAM_TEMPORARY:
94 if (reg >= MAX_PROGRAM_TEMPS)
95 return ZeroVec;
96 return machine->Temporaries[reg];
97
98 case PROGRAM_INPUT:
99 if (prog->Target == GL_VERTEX_PROGRAM_ARB) {
100 if (reg >= VERT_ATTRIB_MAX)
101 return ZeroVec;
102 return machine->VertAttribs[reg];
103 }
104 else {
105 if (reg >= FRAG_ATTRIB_MAX)
106 return ZeroVec;
107 return machine->Attribs[reg][machine->CurElement];
108 }
109
110 case PROGRAM_OUTPUT:
111 if (reg >= MAX_PROGRAM_OUTPUTS)
112 return ZeroVec;
113 return machine->Outputs[reg];
114
115 case PROGRAM_LOCAL_PARAM:
116 if (reg >= MAX_PROGRAM_LOCAL_PARAMS)
117 return ZeroVec;
118 return machine->CurProgram->LocalParams[reg];
119
120 case PROGRAM_ENV_PARAM:
121 if (reg >= MAX_PROGRAM_ENV_PARAMS)
122 return ZeroVec;
123 return machine->EnvParams[reg];
124
125 case PROGRAM_STATE_VAR:
126 /* Fallthrough */
127 case PROGRAM_CONSTANT:
128 /* Fallthrough */
129 case PROGRAM_UNIFORM:
130 /* Fallthrough */
131 case PROGRAM_NAMED_PARAM:
132 if (reg >= (GLint) prog->Parameters->NumParameters)
133 return ZeroVec;
134 return prog->Parameters->ParameterValues[reg];
135
136 default:
137 _mesa_problem(NULL,
138 "Invalid src register file %d in get_src_register_pointer()",
139 source->File);
140 return NULL;
141 }
142 }
143
144
145 /**
146 * Return a pointer to the 4-element float vector specified by the given
147 * destination register.
148 */
149 static INLINE GLfloat *
150 get_dst_register_pointer(const struct prog_dst_register *dest,
151 struct gl_program_machine *machine)
152 {
153 static GLfloat dummyReg[4];
154 GLint reg = dest->Index;
155
156 if (dest->RelAddr) {
157 /* add address register value to src index/offset */
158 reg += machine->AddressReg[0][0];
159 if (reg < 0) {
160 return dummyReg;
161 }
162 }
163
164 switch (dest->File) {
165 case PROGRAM_TEMPORARY:
166 if (reg >= MAX_PROGRAM_TEMPS)
167 return dummyReg;
168 return machine->Temporaries[reg];
169
170 case PROGRAM_OUTPUT:
171 if (reg >= MAX_PROGRAM_OUTPUTS)
172 return dummyReg;
173 return machine->Outputs[reg];
174
175 case PROGRAM_WRITE_ONLY:
176 return dummyReg;
177
178 default:
179 _mesa_problem(NULL,
180 "Invalid dest register file %d in get_dst_register_pointer()",
181 dest->File);
182 return NULL;
183 }
184 }
185
186
187
188 /**
189 * Fetch a 4-element float vector from the given source register.
190 * Apply swizzling and negating as needed.
191 */
192 static void
193 fetch_vector4(const struct prog_src_register *source,
194 const struct gl_program_machine *machine, GLfloat result[4])
195 {
196 const GLfloat *src = get_src_register_pointer(source, machine);
197 ASSERT(src);
198
199 if (source->Swizzle == SWIZZLE_NOOP) {
200 /* no swizzling */
201 COPY_4V(result, src);
202 }
203 else {
204 ASSERT(GET_SWZ(source->Swizzle, 0) <= 3);
205 ASSERT(GET_SWZ(source->Swizzle, 1) <= 3);
206 ASSERT(GET_SWZ(source->Swizzle, 2) <= 3);
207 ASSERT(GET_SWZ(source->Swizzle, 3) <= 3);
208 result[0] = src[GET_SWZ(source->Swizzle, 0)];
209 result[1] = src[GET_SWZ(source->Swizzle, 1)];
210 result[2] = src[GET_SWZ(source->Swizzle, 2)];
211 result[3] = src[GET_SWZ(source->Swizzle, 3)];
212 }
213
214 if (source->Abs) {
215 result[0] = FABSF(result[0]);
216 result[1] = FABSF(result[1]);
217 result[2] = FABSF(result[2]);
218 result[3] = FABSF(result[3]);
219 }
220 if (source->Negate) {
221 ASSERT(source->Negate == NEGATE_XYZW);
222 result[0] = -result[0];
223 result[1] = -result[1];
224 result[2] = -result[2];
225 result[3] = -result[3];
226 }
227
228 #ifdef NAN_CHECK
229 assert(!IS_INF_OR_NAN(result[0]));
230 assert(!IS_INF_OR_NAN(result[0]));
231 assert(!IS_INF_OR_NAN(result[0]));
232 assert(!IS_INF_OR_NAN(result[0]));
233 #endif
234 }
235
236
237 /**
238 * Fetch a 4-element uint vector from the given source register.
239 * Apply swizzling but not negation/abs.
240 */
241 static void
242 fetch_vector4ui(const struct prog_src_register *source,
243 const struct gl_program_machine *machine, GLuint result[4])
244 {
245 const GLuint *src = (GLuint *) get_src_register_pointer(source, machine);
246 ASSERT(src);
247
248 if (source->Swizzle == SWIZZLE_NOOP) {
249 /* no swizzling */
250 COPY_4V(result, src);
251 }
252 else {
253 ASSERT(GET_SWZ(source->Swizzle, 0) <= 3);
254 ASSERT(GET_SWZ(source->Swizzle, 1) <= 3);
255 ASSERT(GET_SWZ(source->Swizzle, 2) <= 3);
256 ASSERT(GET_SWZ(source->Swizzle, 3) <= 3);
257 result[0] = src[GET_SWZ(source->Swizzle, 0)];
258 result[1] = src[GET_SWZ(source->Swizzle, 1)];
259 result[2] = src[GET_SWZ(source->Swizzle, 2)];
260 result[3] = src[GET_SWZ(source->Swizzle, 3)];
261 }
262
263 /* Note: no Negate or Abs here */
264 }
265
266
267
268 /**
269 * Fetch the derivative with respect to X or Y for the given register.
270 * XXX this currently only works for fragment program input attribs.
271 */
272 static void
273 fetch_vector4_deriv(GLcontext * ctx,
274 const struct prog_src_register *source,
275 const struct gl_program_machine *machine,
276 char xOrY, GLfloat result[4])
277 {
278 if (source->File == PROGRAM_INPUT &&
279 source->Index < (GLint) machine->NumDeriv) {
280 const GLint col = machine->CurElement;
281 const GLfloat w = machine->Attribs[FRAG_ATTRIB_WPOS][col][3];
282 const GLfloat invQ = 1.0f / w;
283 GLfloat deriv[4];
284
285 if (xOrY == 'X') {
286 deriv[0] = machine->DerivX[source->Index][0] * invQ;
287 deriv[1] = machine->DerivX[source->Index][1] * invQ;
288 deriv[2] = machine->DerivX[source->Index][2] * invQ;
289 deriv[3] = machine->DerivX[source->Index][3] * invQ;
290 }
291 else {
292 deriv[0] = machine->DerivY[source->Index][0] * invQ;
293 deriv[1] = machine->DerivY[source->Index][1] * invQ;
294 deriv[2] = machine->DerivY[source->Index][2] * invQ;
295 deriv[3] = machine->DerivY[source->Index][3] * invQ;
296 }
297
298 result[0] = deriv[GET_SWZ(source->Swizzle, 0)];
299 result[1] = deriv[GET_SWZ(source->Swizzle, 1)];
300 result[2] = deriv[GET_SWZ(source->Swizzle, 2)];
301 result[3] = deriv[GET_SWZ(source->Swizzle, 3)];
302
303 if (source->Abs) {
304 result[0] = FABSF(result[0]);
305 result[1] = FABSF(result[1]);
306 result[2] = FABSF(result[2]);
307 result[3] = FABSF(result[3]);
308 }
309 if (source->Negate) {
310 ASSERT(source->Negate == NEGATE_XYZW);
311 result[0] = -result[0];
312 result[1] = -result[1];
313 result[2] = -result[2];
314 result[3] = -result[3];
315 }
316 }
317 else {
318 ASSIGN_4V(result, 0.0, 0.0, 0.0, 0.0);
319 }
320 }
321
322
323 /**
324 * As above, but only return result[0] element.
325 */
326 static void
327 fetch_vector1(const struct prog_src_register *source,
328 const struct gl_program_machine *machine, GLfloat result[4])
329 {
330 const GLfloat *src = get_src_register_pointer(source, machine);
331 ASSERT(src);
332
333 result[0] = src[GET_SWZ(source->Swizzle, 0)];
334
335 if (source->Abs) {
336 result[0] = FABSF(result[0]);
337 }
338 if (source->Negate) {
339 result[0] = -result[0];
340 }
341 }
342
343
344 static GLuint
345 fetch_vector1ui(const struct prog_src_register *source,
346 const struct gl_program_machine *machine)
347 {
348 const GLuint *src = (GLuint *) get_src_register_pointer(source, machine);
349 GLuint result;
350
351 ASSERT(src);
352
353 result = src[GET_SWZ(source->Swizzle, 0)];
354
355 if (source->Abs) {
356 result = FABSF(result);
357 }
358 if (source->Negate) {
359 result = -result;
360 }
361
362 return result;
363 }
364
365
366 /**
367 * Fetch texel from texture. Use partial derivatives when possible.
368 */
369 static INLINE void
370 fetch_texel(GLcontext *ctx,
371 const struct gl_program_machine *machine,
372 const struct prog_instruction *inst,
373 const GLfloat texcoord[4], GLfloat lodBias,
374 GLfloat color[4])
375 {
376 const GLuint unit = machine->Samplers[inst->TexSrcUnit];
377
378 /* Note: we only have the right derivatives for fragment input attribs.
379 */
380 if (machine->NumDeriv > 0 &&
381 inst->SrcReg[0].File == PROGRAM_INPUT &&
382 inst->SrcReg[0].Index == FRAG_ATTRIB_TEX0 + inst->TexSrcUnit) {
383 /* simple texture fetch for which we should have derivatives */
384 GLuint attr = inst->SrcReg[0].Index;
385 machine->FetchTexelDeriv(ctx, texcoord,
386 machine->DerivX[attr],
387 machine->DerivY[attr],
388 lodBias, unit, color);
389 }
390 else {
391 machine->FetchTexelLod(ctx, texcoord, lodBias, unit, color);
392 }
393 }
394
395
396 /**
397 * Test value against zero and return GT, LT, EQ or UN if NaN.
398 */
399 static INLINE GLuint
400 generate_cc(float value)
401 {
402 if (value != value)
403 return COND_UN; /* NaN */
404 if (value > 0.0F)
405 return COND_GT;
406 if (value < 0.0F)
407 return COND_LT;
408 return COND_EQ;
409 }
410
411
412 /**
413 * Test if the ccMaskRule is satisfied by the given condition code.
414 * Used to mask destination writes according to the current condition code.
415 */
416 static INLINE GLboolean
417 test_cc(GLuint condCode, GLuint ccMaskRule)
418 {
419 switch (ccMaskRule) {
420 case COND_EQ: return (condCode == COND_EQ);
421 case COND_NE: return (condCode != COND_EQ);
422 case COND_LT: return (condCode == COND_LT);
423 case COND_GE: return (condCode == COND_GT || condCode == COND_EQ);
424 case COND_LE: return (condCode == COND_LT || condCode == COND_EQ);
425 case COND_GT: return (condCode == COND_GT);
426 case COND_TR: return GL_TRUE;
427 case COND_FL: return GL_FALSE;
428 default: return GL_TRUE;
429 }
430 }
431
432
433 /**
434 * Evaluate the 4 condition codes against a predicate and return GL_TRUE
435 * or GL_FALSE to indicate result.
436 */
437 static INLINE GLboolean
438 eval_condition(const struct gl_program_machine *machine,
439 const struct prog_instruction *inst)
440 {
441 const GLuint swizzle = inst->DstReg.CondSwizzle;
442 const GLuint condMask = inst->DstReg.CondMask;
443 if (test_cc(machine->CondCodes[GET_SWZ(swizzle, 0)], condMask) ||
444 test_cc(machine->CondCodes[GET_SWZ(swizzle, 1)], condMask) ||
445 test_cc(machine->CondCodes[GET_SWZ(swizzle, 2)], condMask) ||
446 test_cc(machine->CondCodes[GET_SWZ(swizzle, 3)], condMask)) {
447 return GL_TRUE;
448 }
449 else {
450 return GL_FALSE;
451 }
452 }
453
454
455
456 /**
457 * Store 4 floats into a register. Observe the instructions saturate and
458 * set-condition-code flags.
459 */
460 static void
461 store_vector4(const struct prog_instruction *inst,
462 struct gl_program_machine *machine, const GLfloat value[4])
463 {
464 const struct prog_dst_register *dstReg = &(inst->DstReg);
465 const GLboolean clamp = inst->SaturateMode == SATURATE_ZERO_ONE;
466 GLuint writeMask = dstReg->WriteMask;
467 GLfloat clampedValue[4];
468 GLfloat *dst = get_dst_register_pointer(dstReg, machine);
469
470 #if 0
471 if (value[0] > 1.0e10 ||
472 IS_INF_OR_NAN(value[0]) ||
473 IS_INF_OR_NAN(value[1]) ||
474 IS_INF_OR_NAN(value[2]) || IS_INF_OR_NAN(value[3]))
475 printf("store %g %g %g %g\n", value[0], value[1], value[2], value[3]);
476 #endif
477
478 if (clamp) {
479 clampedValue[0] = CLAMP(value[0], 0.0F, 1.0F);
480 clampedValue[1] = CLAMP(value[1], 0.0F, 1.0F);
481 clampedValue[2] = CLAMP(value[2], 0.0F, 1.0F);
482 clampedValue[3] = CLAMP(value[3], 0.0F, 1.0F);
483 value = clampedValue;
484 }
485
486 if (dstReg->CondMask != COND_TR) {
487 /* condition codes may turn off some writes */
488 if (writeMask & WRITEMASK_X) {
489 if (!test_cc(machine->CondCodes[GET_SWZ(dstReg->CondSwizzle, 0)],
490 dstReg->CondMask))
491 writeMask &= ~WRITEMASK_X;
492 }
493 if (writeMask & WRITEMASK_Y) {
494 if (!test_cc(machine->CondCodes[GET_SWZ(dstReg->CondSwizzle, 1)],
495 dstReg->CondMask))
496 writeMask &= ~WRITEMASK_Y;
497 }
498 if (writeMask & WRITEMASK_Z) {
499 if (!test_cc(machine->CondCodes[GET_SWZ(dstReg->CondSwizzle, 2)],
500 dstReg->CondMask))
501 writeMask &= ~WRITEMASK_Z;
502 }
503 if (writeMask & WRITEMASK_W) {
504 if (!test_cc(machine->CondCodes[GET_SWZ(dstReg->CondSwizzle, 3)],
505 dstReg->CondMask))
506 writeMask &= ~WRITEMASK_W;
507 }
508 }
509
510 #ifdef NAN_CHECK
511 assert(!IS_INF_OR_NAN(value[0]));
512 assert(!IS_INF_OR_NAN(value[0]));
513 assert(!IS_INF_OR_NAN(value[0]));
514 assert(!IS_INF_OR_NAN(value[0]));
515 #endif
516
517 if (writeMask & WRITEMASK_X)
518 dst[0] = value[0];
519 if (writeMask & WRITEMASK_Y)
520 dst[1] = value[1];
521 if (writeMask & WRITEMASK_Z)
522 dst[2] = value[2];
523 if (writeMask & WRITEMASK_W)
524 dst[3] = value[3];
525
526 if (inst->CondUpdate) {
527 if (writeMask & WRITEMASK_X)
528 machine->CondCodes[0] = generate_cc(value[0]);
529 if (writeMask & WRITEMASK_Y)
530 machine->CondCodes[1] = generate_cc(value[1]);
531 if (writeMask & WRITEMASK_Z)
532 machine->CondCodes[2] = generate_cc(value[2]);
533 if (writeMask & WRITEMASK_W)
534 machine->CondCodes[3] = generate_cc(value[3]);
535 #if DEBUG_PROG
536 printf("CondCodes=(%s,%s,%s,%s) for:\n",
537 _mesa_condcode_string(machine->CondCodes[0]),
538 _mesa_condcode_string(machine->CondCodes[1]),
539 _mesa_condcode_string(machine->CondCodes[2]),
540 _mesa_condcode_string(machine->CondCodes[3]));
541 #endif
542 }
543 }
544
545
546 /**
547 * Store 4 uints into a register. Observe the set-condition-code flags.
548 */
549 static void
550 store_vector4ui(const struct prog_instruction *inst,
551 struct gl_program_machine *machine, const GLuint value[4])
552 {
553 const struct prog_dst_register *dstReg = &(inst->DstReg);
554 GLuint writeMask = dstReg->WriteMask;
555 GLuint *dst = (GLuint *) get_dst_register_pointer(dstReg, machine);
556
557 if (dstReg->CondMask != COND_TR) {
558 /* condition codes may turn off some writes */
559 if (writeMask & WRITEMASK_X) {
560 if (!test_cc(machine->CondCodes[GET_SWZ(dstReg->CondSwizzle, 0)],
561 dstReg->CondMask))
562 writeMask &= ~WRITEMASK_X;
563 }
564 if (writeMask & WRITEMASK_Y) {
565 if (!test_cc(machine->CondCodes[GET_SWZ(dstReg->CondSwizzle, 1)],
566 dstReg->CondMask))
567 writeMask &= ~WRITEMASK_Y;
568 }
569 if (writeMask & WRITEMASK_Z) {
570 if (!test_cc(machine->CondCodes[GET_SWZ(dstReg->CondSwizzle, 2)],
571 dstReg->CondMask))
572 writeMask &= ~WRITEMASK_Z;
573 }
574 if (writeMask & WRITEMASK_W) {
575 if (!test_cc(machine->CondCodes[GET_SWZ(dstReg->CondSwizzle, 3)],
576 dstReg->CondMask))
577 writeMask &= ~WRITEMASK_W;
578 }
579 }
580
581 if (writeMask & WRITEMASK_X)
582 dst[0] = value[0];
583 if (writeMask & WRITEMASK_Y)
584 dst[1] = value[1];
585 if (writeMask & WRITEMASK_Z)
586 dst[2] = value[2];
587 if (writeMask & WRITEMASK_W)
588 dst[3] = value[3];
589
590 if (inst->CondUpdate) {
591 if (writeMask & WRITEMASK_X)
592 machine->CondCodes[0] = generate_cc(value[0]);
593 if (writeMask & WRITEMASK_Y)
594 machine->CondCodes[1] = generate_cc(value[1]);
595 if (writeMask & WRITEMASK_Z)
596 machine->CondCodes[2] = generate_cc(value[2]);
597 if (writeMask & WRITEMASK_W)
598 machine->CondCodes[3] = generate_cc(value[3]);
599 #if DEBUG_PROG
600 printf("CondCodes=(%s,%s,%s,%s) for:\n",
601 _mesa_condcode_string(machine->CondCodes[0]),
602 _mesa_condcode_string(machine->CondCodes[1]),
603 _mesa_condcode_string(machine->CondCodes[2]),
604 _mesa_condcode_string(machine->CondCodes[3]));
605 #endif
606 }
607 }
608
609
610
611 /**
612 * Execute the given vertex/fragment program.
613 *
614 * \param ctx rendering context
615 * \param program the program to execute
616 * \param machine machine state (must be initialized)
617 * \return GL_TRUE if program completed or GL_FALSE if program executed KIL.
618 */
619 GLboolean
620 _mesa_execute_program(GLcontext * ctx,
621 const struct gl_program *program,
622 struct gl_program_machine *machine)
623 {
624 const GLuint numInst = program->NumInstructions;
625 const GLuint maxExec = 10000;
626 GLuint pc, numExec = 0;
627
628 machine->CurProgram = program;
629
630 if (DEBUG_PROG) {
631 printf("execute program %u --------------------\n", program->Id);
632 }
633
634 if (program->Target == GL_VERTEX_PROGRAM_ARB) {
635 machine->EnvParams = ctx->VertexProgram.Parameters;
636 }
637 else {
638 machine->EnvParams = ctx->FragmentProgram.Parameters;
639 }
640
641 for (pc = 0; pc < numInst; pc++) {
642 const struct prog_instruction *inst = program->Instructions + pc;
643
644 if (DEBUG_PROG) {
645 _mesa_print_instruction(inst);
646 }
647
648 switch (inst->Opcode) {
649 case OPCODE_ABS:
650 {
651 GLfloat a[4], result[4];
652 fetch_vector4(&inst->SrcReg[0], machine, a);
653 result[0] = FABSF(a[0]);
654 result[1] = FABSF(a[1]);
655 result[2] = FABSF(a[2]);
656 result[3] = FABSF(a[3]);
657 store_vector4(inst, machine, result);
658 }
659 break;
660 case OPCODE_ADD:
661 {
662 GLfloat a[4], b[4], result[4];
663 fetch_vector4(&inst->SrcReg[0], machine, a);
664 fetch_vector4(&inst->SrcReg[1], machine, b);
665 result[0] = a[0] + b[0];
666 result[1] = a[1] + b[1];
667 result[2] = a[2] + b[2];
668 result[3] = a[3] + b[3];
669 store_vector4(inst, machine, result);
670 if (DEBUG_PROG) {
671 printf("ADD (%g %g %g %g) = (%g %g %g %g) + (%g %g %g %g)\n",
672 result[0], result[1], result[2], result[3],
673 a[0], a[1], a[2], a[3], b[0], b[1], b[2], b[3]);
674 }
675 }
676 break;
677 case OPCODE_AND: /* bitwise AND */
678 {
679 GLuint a[4], b[4], result[4];
680 fetch_vector4ui(&inst->SrcReg[0], machine, a);
681 fetch_vector4ui(&inst->SrcReg[1], machine, b);
682 result[0] = a[0] & b[0];
683 result[1] = a[1] & b[1];
684 result[2] = a[2] & b[2];
685 result[3] = a[3] & b[3];
686 store_vector4ui(inst, machine, result);
687 }
688 break;
689 case OPCODE_ARL:
690 {
691 GLfloat t[4];
692 fetch_vector4(&inst->SrcReg[0], machine, t);
693 machine->AddressReg[0][0] = IFLOOR(t[0]);
694 }
695 break;
696 case OPCODE_BGNLOOP:
697 /* no-op */
698 break;
699 case OPCODE_ENDLOOP:
700 /* subtract 1 here since pc is incremented by for(pc) loop */
701 pc = inst->BranchTarget - 1; /* go to matching BNGLOOP */
702 break;
703 case OPCODE_BGNSUB: /* begin subroutine */
704 break;
705 case OPCODE_ENDSUB: /* end subroutine */
706 break;
707 case OPCODE_BRA: /* branch (conditional) */
708 /* fall-through */
709 case OPCODE_BRK: /* break out of loop (conditional) */
710 /* fall-through */
711 case OPCODE_CONT: /* continue loop (conditional) */
712 if (eval_condition(machine, inst)) {
713 /* take branch */
714 /* Subtract 1 here since we'll do pc++ at end of for-loop */
715 pc = inst->BranchTarget - 1;
716 }
717 break;
718 case OPCODE_CAL: /* Call subroutine (conditional) */
719 if (eval_condition(machine, inst)) {
720 /* call the subroutine */
721 if (machine->StackDepth >= MAX_PROGRAM_CALL_DEPTH) {
722 return GL_TRUE; /* Per GL_NV_vertex_program2 spec */
723 }
724 machine->CallStack[machine->StackDepth++] = pc + 1; /* next inst */
725 /* Subtract 1 here since we'll do pc++ at end of for-loop */
726 pc = inst->BranchTarget - 1;
727 }
728 break;
729 case OPCODE_CMP:
730 {
731 GLfloat a[4], b[4], c[4], result[4];
732 fetch_vector4(&inst->SrcReg[0], machine, a);
733 fetch_vector4(&inst->SrcReg[1], machine, b);
734 fetch_vector4(&inst->SrcReg[2], machine, c);
735 result[0] = a[0] < 0.0F ? b[0] : c[0];
736 result[1] = a[1] < 0.0F ? b[1] : c[1];
737 result[2] = a[2] < 0.0F ? b[2] : c[2];
738 result[3] = a[3] < 0.0F ? b[3] : c[3];
739 store_vector4(inst, machine, result);
740 }
741 break;
742 case OPCODE_COS:
743 {
744 GLfloat a[4], result[4];
745 fetch_vector1(&inst->SrcReg[0], machine, a);
746 result[0] = result[1] = result[2] = result[3]
747 = (GLfloat) _mesa_cos(a[0]);
748 store_vector4(inst, machine, result);
749 }
750 break;
751 case OPCODE_DDX: /* Partial derivative with respect to X */
752 {
753 GLfloat result[4];
754 fetch_vector4_deriv(ctx, &inst->SrcReg[0], machine,
755 'X', result);
756 store_vector4(inst, machine, result);
757 }
758 break;
759 case OPCODE_DDY: /* Partial derivative with respect to Y */
760 {
761 GLfloat result[4];
762 fetch_vector4_deriv(ctx, &inst->SrcReg[0], machine,
763 'Y', result);
764 store_vector4(inst, machine, result);
765 }
766 break;
767 case OPCODE_DP2:
768 {
769 GLfloat a[4], b[4], result[4];
770 fetch_vector4(&inst->SrcReg[0], machine, a);
771 fetch_vector4(&inst->SrcReg[1], machine, b);
772 result[0] = result[1] = result[2] = result[3] = DOT2(a, b);
773 store_vector4(inst, machine, result);
774 if (DEBUG_PROG) {
775 printf("DP2 %g = (%g %g) . (%g %g)\n",
776 result[0], a[0], a[1], b[0], b[1]);
777 }
778 }
779 break;
780 case OPCODE_DP2A:
781 {
782 GLfloat a[4], b[4], c, result[4];
783 fetch_vector4(&inst->SrcReg[0], machine, a);
784 fetch_vector4(&inst->SrcReg[1], machine, b);
785 fetch_vector1(&inst->SrcReg[1], machine, &c);
786 result[0] = result[1] = result[2] = result[3] = DOT2(a, b) + c;
787 store_vector4(inst, machine, result);
788 if (DEBUG_PROG) {
789 printf("DP2A %g = (%g %g) . (%g %g) + %g\n",
790 result[0], a[0], a[1], b[0], b[1], c);
791 }
792 }
793 break;
794 case OPCODE_DP3:
795 {
796 GLfloat a[4], b[4], result[4];
797 fetch_vector4(&inst->SrcReg[0], machine, a);
798 fetch_vector4(&inst->SrcReg[1], machine, b);
799 result[0] = result[1] = result[2] = result[3] = DOT3(a, b);
800 store_vector4(inst, machine, result);
801 if (DEBUG_PROG) {
802 printf("DP3 %g = (%g %g %g) . (%g %g %g)\n",
803 result[0], a[0], a[1], a[2], b[0], b[1], b[2]);
804 }
805 }
806 break;
807 case OPCODE_DP4:
808 {
809 GLfloat a[4], b[4], result[4];
810 fetch_vector4(&inst->SrcReg[0], machine, a);
811 fetch_vector4(&inst->SrcReg[1], machine, b);
812 result[0] = result[1] = result[2] = result[3] = DOT4(a, b);
813 store_vector4(inst, machine, result);
814 if (DEBUG_PROG) {
815 printf("DP4 %g = (%g, %g %g %g) . (%g, %g %g %g)\n",
816 result[0], a[0], a[1], a[2], a[3],
817 b[0], b[1], b[2], b[3]);
818 }
819 }
820 break;
821 case OPCODE_DPH:
822 {
823 GLfloat a[4], b[4], result[4];
824 fetch_vector4(&inst->SrcReg[0], machine, a);
825 fetch_vector4(&inst->SrcReg[1], machine, b);
826 result[0] = result[1] = result[2] = result[3] = DOT3(a, b) + b[3];
827 store_vector4(inst, machine, result);
828 }
829 break;
830 case OPCODE_DST: /* Distance vector */
831 {
832 GLfloat a[4], b[4], result[4];
833 fetch_vector4(&inst->SrcReg[0], machine, a);
834 fetch_vector4(&inst->SrcReg[1], machine, b);
835 result[0] = 1.0F;
836 result[1] = a[1] * b[1];
837 result[2] = a[2];
838 result[3] = b[3];
839 store_vector4(inst, machine, result);
840 }
841 break;
842 case OPCODE_EXP:
843 {
844 GLfloat t[4], q[4], floor_t0;
845 fetch_vector1(&inst->SrcReg[0], machine, t);
846 floor_t0 = FLOORF(t[0]);
847 if (floor_t0 > FLT_MAX_EXP) {
848 SET_POS_INFINITY(q[0]);
849 SET_POS_INFINITY(q[2]);
850 }
851 else if (floor_t0 < FLT_MIN_EXP) {
852 q[0] = 0.0F;
853 q[2] = 0.0F;
854 }
855 else {
856 q[0] = LDEXPF(1.0, (int) floor_t0);
857 /* Note: GL_NV_vertex_program expects
858 * result.z = result.x * APPX(result.y)
859 * We do what the ARB extension says.
860 */
861 q[2] = (GLfloat) _mesa_pow(2.0, t[0]);
862 }
863 q[1] = t[0] - floor_t0;
864 q[3] = 1.0F;
865 store_vector4( inst, machine, q );
866 }
867 break;
868 case OPCODE_EX2: /* Exponential base 2 */
869 {
870 GLfloat a[4], result[4], val;
871 fetch_vector1(&inst->SrcReg[0], machine, a);
872 val = (GLfloat) _mesa_pow(2.0, a[0]);
873 /*
874 if (IS_INF_OR_NAN(val))
875 val = 1.0e10;
876 */
877 result[0] = result[1] = result[2] = result[3] = val;
878 store_vector4(inst, machine, result);
879 }
880 break;
881 case OPCODE_FLR:
882 {
883 GLfloat a[4], result[4];
884 fetch_vector4(&inst->SrcReg[0], machine, a);
885 result[0] = FLOORF(a[0]);
886 result[1] = FLOORF(a[1]);
887 result[2] = FLOORF(a[2]);
888 result[3] = FLOORF(a[3]);
889 store_vector4(inst, machine, result);
890 }
891 break;
892 case OPCODE_FRC:
893 {
894 GLfloat a[4], result[4];
895 fetch_vector4(&inst->SrcReg[0], machine, a);
896 result[0] = a[0] - FLOORF(a[0]);
897 result[1] = a[1] - FLOORF(a[1]);
898 result[2] = a[2] - FLOORF(a[2]);
899 result[3] = a[3] - FLOORF(a[3]);
900 store_vector4(inst, machine, result);
901 }
902 break;
903 case OPCODE_IF:
904 {
905 GLboolean cond;
906 /* eval condition */
907 if (inst->SrcReg[0].File != PROGRAM_UNDEFINED) {
908 GLfloat a[4];
909 fetch_vector1(&inst->SrcReg[0], machine, a);
910 cond = (a[0] != 0.0);
911 }
912 else {
913 cond = eval_condition(machine, inst);
914 }
915 if (DEBUG_PROG) {
916 printf("IF: %d\n", cond);
917 }
918 /* do if/else */
919 if (cond) {
920 /* do if-clause (just continue execution) */
921 }
922 else {
923 /* go to the instruction after ELSE or ENDIF */
924 assert(inst->BranchTarget >= 0);
925 pc = inst->BranchTarget - 1;
926 }
927 }
928 break;
929 case OPCODE_ELSE:
930 /* goto ENDIF */
931 assert(inst->BranchTarget >= 0);
932 pc = inst->BranchTarget - 1;
933 break;
934 case OPCODE_ENDIF:
935 /* nothing */
936 break;
937 case OPCODE_KIL_NV: /* NV_f_p only (conditional) */
938 if (eval_condition(machine, inst)) {
939 return GL_FALSE;
940 }
941 break;
942 case OPCODE_KIL: /* ARB_f_p only */
943 {
944 GLfloat a[4];
945 fetch_vector4(&inst->SrcReg[0], machine, a);
946 if (DEBUG_PROG) {
947 printf("KIL if (%g %g %g %g) <= 0.0\n",
948 a[0], a[1], a[2], a[3]);
949 }
950
951 if (a[0] < 0.0F || a[1] < 0.0F || a[2] < 0.0F || a[3] < 0.0F) {
952 return GL_FALSE;
953 }
954 }
955 break;
956 case OPCODE_LG2: /* log base 2 */
957 {
958 GLfloat a[4], result[4], val;
959 fetch_vector1(&inst->SrcReg[0], machine, a);
960 /* The fast LOG2 macro doesn't meet the precision requirements.
961 */
962 if (a[0] == 0.0F) {
963 val = -FLT_MAX;
964 }
965 else {
966 val = log(a[0]) * 1.442695F;
967 }
968 result[0] = result[1] = result[2] = result[3] = val;
969 store_vector4(inst, machine, result);
970 }
971 break;
972 case OPCODE_LIT:
973 {
974 const GLfloat epsilon = 1.0F / 256.0F; /* from NV VP spec */
975 GLfloat a[4], result[4];
976 fetch_vector4(&inst->SrcReg[0], machine, a);
977 a[0] = MAX2(a[0], 0.0F);
978 a[1] = MAX2(a[1], 0.0F);
979 /* XXX ARB version clamps a[3], NV version doesn't */
980 a[3] = CLAMP(a[3], -(128.0F - epsilon), (128.0F - epsilon));
981 result[0] = 1.0F;
982 result[1] = a[0];
983 /* XXX we could probably just use pow() here */
984 if (a[0] > 0.0F) {
985 if (a[1] == 0.0 && a[3] == 0.0)
986 result[2] = 1.0;
987 else
988 result[2] = (GLfloat) _mesa_pow(a[1], a[3]);
989 }
990 else {
991 result[2] = 0.0;
992 }
993 result[3] = 1.0F;
994 store_vector4(inst, machine, result);
995 if (DEBUG_PROG) {
996 printf("LIT (%g %g %g %g) : (%g %g %g %g)\n",
997 result[0], result[1], result[2], result[3],
998 a[0], a[1], a[2], a[3]);
999 }
1000 }
1001 break;
1002 case OPCODE_LOG:
1003 {
1004 GLfloat t[4], q[4], abs_t0;
1005 fetch_vector1(&inst->SrcReg[0], machine, t);
1006 abs_t0 = FABSF(t[0]);
1007 if (abs_t0 != 0.0F) {
1008 /* Since we really can't handle infinite values on VMS
1009 * like other OSes we'll use __MAXFLOAT to represent
1010 * infinity. This may need some tweaking.
1011 */
1012 #ifdef VMS
1013 if (abs_t0 == __MAXFLOAT)
1014 #else
1015 if (IS_INF_OR_NAN(abs_t0))
1016 #endif
1017 {
1018 SET_POS_INFINITY(q[0]);
1019 q[1] = 1.0F;
1020 SET_POS_INFINITY(q[2]);
1021 }
1022 else {
1023 int exponent;
1024 GLfloat mantissa = FREXPF(t[0], &exponent);
1025 q[0] = (GLfloat) (exponent - 1);
1026 q[1] = (GLfloat) (2.0 * mantissa); /* map [.5, 1) -> [1, 2) */
1027
1028 /* The fast LOG2 macro doesn't meet the precision
1029 * requirements.
1030 */
1031 q[2] = (log(t[0]) * 1.442695F);
1032 }
1033 }
1034 else {
1035 SET_NEG_INFINITY(q[0]);
1036 q[1] = 1.0F;
1037 SET_NEG_INFINITY(q[2]);
1038 }
1039 q[3] = 1.0;
1040 store_vector4(inst, machine, q);
1041 }
1042 break;
1043 case OPCODE_LRP:
1044 {
1045 GLfloat a[4], b[4], c[4], result[4];
1046 fetch_vector4(&inst->SrcReg[0], machine, a);
1047 fetch_vector4(&inst->SrcReg[1], machine, b);
1048 fetch_vector4(&inst->SrcReg[2], machine, c);
1049 result[0] = a[0] * b[0] + (1.0F - a[0]) * c[0];
1050 result[1] = a[1] * b[1] + (1.0F - a[1]) * c[1];
1051 result[2] = a[2] * b[2] + (1.0F - a[2]) * c[2];
1052 result[3] = a[3] * b[3] + (1.0F - a[3]) * c[3];
1053 store_vector4(inst, machine, result);
1054 if (DEBUG_PROG) {
1055 printf("LRP (%g %g %g %g) = (%g %g %g %g), "
1056 "(%g %g %g %g), (%g %g %g %g)\n",
1057 result[0], result[1], result[2], result[3],
1058 a[0], a[1], a[2], a[3],
1059 b[0], b[1], b[2], b[3], c[0], c[1], c[2], c[3]);
1060 }
1061 }
1062 break;
1063 case OPCODE_MAD:
1064 {
1065 GLfloat a[4], b[4], c[4], result[4];
1066 fetch_vector4(&inst->SrcReg[0], machine, a);
1067 fetch_vector4(&inst->SrcReg[1], machine, b);
1068 fetch_vector4(&inst->SrcReg[2], machine, c);
1069 result[0] = a[0] * b[0] + c[0];
1070 result[1] = a[1] * b[1] + c[1];
1071 result[2] = a[2] * b[2] + c[2];
1072 result[3] = a[3] * b[3] + c[3];
1073 store_vector4(inst, machine, result);
1074 if (DEBUG_PROG) {
1075 printf("MAD (%g %g %g %g) = (%g %g %g %g) * "
1076 "(%g %g %g %g) + (%g %g %g %g)\n",
1077 result[0], result[1], result[2], result[3],
1078 a[0], a[1], a[2], a[3],
1079 b[0], b[1], b[2], b[3], c[0], c[1], c[2], c[3]);
1080 }
1081 }
1082 break;
1083 case OPCODE_MAX:
1084 {
1085 GLfloat a[4], b[4], result[4];
1086 fetch_vector4(&inst->SrcReg[0], machine, a);
1087 fetch_vector4(&inst->SrcReg[1], machine, b);
1088 result[0] = MAX2(a[0], b[0]);
1089 result[1] = MAX2(a[1], b[1]);
1090 result[2] = MAX2(a[2], b[2]);
1091 result[3] = MAX2(a[3], b[3]);
1092 store_vector4(inst, machine, result);
1093 if (DEBUG_PROG) {
1094 printf("MAX (%g %g %g %g) = (%g %g %g %g), (%g %g %g %g)\n",
1095 result[0], result[1], result[2], result[3],
1096 a[0], a[1], a[2], a[3], b[0], b[1], b[2], b[3]);
1097 }
1098 }
1099 break;
1100 case OPCODE_MIN:
1101 {
1102 GLfloat a[4], b[4], result[4];
1103 fetch_vector4(&inst->SrcReg[0], machine, a);
1104 fetch_vector4(&inst->SrcReg[1], machine, b);
1105 result[0] = MIN2(a[0], b[0]);
1106 result[1] = MIN2(a[1], b[1]);
1107 result[2] = MIN2(a[2], b[2]);
1108 result[3] = MIN2(a[3], b[3]);
1109 store_vector4(inst, machine, result);
1110 }
1111 break;
1112 case OPCODE_MOV:
1113 {
1114 GLfloat result[4];
1115 fetch_vector4(&inst->SrcReg[0], machine, result);
1116 store_vector4(inst, machine, result);
1117 if (DEBUG_PROG) {
1118 printf("MOV (%g %g %g %g)\n",
1119 result[0], result[1], result[2], result[3]);
1120 }
1121 }
1122 break;
1123 case OPCODE_MUL:
1124 {
1125 GLfloat a[4], b[4], result[4];
1126 fetch_vector4(&inst->SrcReg[0], machine, a);
1127 fetch_vector4(&inst->SrcReg[1], machine, b);
1128 result[0] = a[0] * b[0];
1129 result[1] = a[1] * b[1];
1130 result[2] = a[2] * b[2];
1131 result[3] = a[3] * b[3];
1132 store_vector4(inst, machine, result);
1133 if (DEBUG_PROG) {
1134 printf("MUL (%g %g %g %g) = (%g %g %g %g) * (%g %g %g %g)\n",
1135 result[0], result[1], result[2], result[3],
1136 a[0], a[1], a[2], a[3], b[0], b[1], b[2], b[3]);
1137 }
1138 }
1139 break;
1140 case OPCODE_NOISE1:
1141 {
1142 GLfloat a[4], result[4];
1143 fetch_vector1(&inst->SrcReg[0], machine, a);
1144 result[0] =
1145 result[1] =
1146 result[2] =
1147 result[3] = _mesa_noise1(a[0]);
1148 store_vector4(inst, machine, result);
1149 }
1150 break;
1151 case OPCODE_NOISE2:
1152 {
1153 GLfloat a[4], result[4];
1154 fetch_vector4(&inst->SrcReg[0], machine, a);
1155 result[0] =
1156 result[1] =
1157 result[2] = result[3] = _mesa_noise2(a[0], a[1]);
1158 store_vector4(inst, machine, result);
1159 }
1160 break;
1161 case OPCODE_NOISE3:
1162 {
1163 GLfloat a[4], result[4];
1164 fetch_vector4(&inst->SrcReg[0], machine, a);
1165 result[0] =
1166 result[1] =
1167 result[2] =
1168 result[3] = _mesa_noise3(a[0], a[1], a[2]);
1169 store_vector4(inst, machine, result);
1170 }
1171 break;
1172 case OPCODE_NOISE4:
1173 {
1174 GLfloat a[4], result[4];
1175 fetch_vector4(&inst->SrcReg[0], machine, a);
1176 result[0] =
1177 result[1] =
1178 result[2] =
1179 result[3] = _mesa_noise4(a[0], a[1], a[2], a[3]);
1180 store_vector4(inst, machine, result);
1181 }
1182 break;
1183 case OPCODE_NOP:
1184 break;
1185 case OPCODE_NOT: /* bitwise NOT */
1186 {
1187 GLuint a[4], result[4];
1188 fetch_vector4ui(&inst->SrcReg[0], machine, a);
1189 result[0] = ~a[0];
1190 result[1] = ~a[1];
1191 result[2] = ~a[2];
1192 result[3] = ~a[3];
1193 store_vector4ui(inst, machine, result);
1194 }
1195 break;
1196 case OPCODE_NRM3: /* 3-component normalization */
1197 {
1198 GLfloat a[4], result[4];
1199 GLfloat tmp;
1200 fetch_vector4(&inst->SrcReg[0], machine, a);
1201 tmp = a[0] * a[0] + a[1] * a[1] + a[2] * a[2];
1202 if (tmp != 0.0F)
1203 tmp = INV_SQRTF(tmp);
1204 result[0] = tmp * a[0];
1205 result[1] = tmp * a[1];
1206 result[2] = tmp * a[2];
1207 result[3] = 0.0; /* undefined, but prevent valgrind warnings */
1208 store_vector4(inst, machine, result);
1209 }
1210 break;
1211 case OPCODE_NRM4: /* 4-component normalization */
1212 {
1213 GLfloat a[4], result[4];
1214 GLfloat tmp;
1215 fetch_vector4(&inst->SrcReg[0], machine, a);
1216 tmp = a[0] * a[0] + a[1] * a[1] + a[2] * a[2] + a[3] * a[3];
1217 if (tmp != 0.0F)
1218 tmp = INV_SQRTF(tmp);
1219 result[0] = tmp * a[0];
1220 result[1] = tmp * a[1];
1221 result[2] = tmp * a[2];
1222 result[3] = tmp * a[3];
1223 store_vector4(inst, machine, result);
1224 }
1225 break;
1226 case OPCODE_OR: /* bitwise OR */
1227 {
1228 GLuint a[4], b[4], result[4];
1229 fetch_vector4ui(&inst->SrcReg[0], machine, a);
1230 fetch_vector4ui(&inst->SrcReg[1], machine, b);
1231 result[0] = a[0] | b[0];
1232 result[1] = a[1] | b[1];
1233 result[2] = a[2] | b[2];
1234 result[3] = a[3] | b[3];
1235 store_vector4ui(inst, machine, result);
1236 }
1237 break;
1238 case OPCODE_PK2H: /* pack two 16-bit floats in one 32-bit float */
1239 {
1240 GLfloat a[4];
1241 GLuint result[4];
1242 GLhalfNV hx, hy;
1243 fetch_vector4(&inst->SrcReg[0], machine, a);
1244 hx = _mesa_float_to_half(a[0]);
1245 hy = _mesa_float_to_half(a[1]);
1246 result[0] =
1247 result[1] =
1248 result[2] =
1249 result[3] = hx | (hy << 16);
1250 store_vector4ui(inst, machine, result);
1251 }
1252 break;
1253 case OPCODE_PK2US: /* pack two GLushorts into one 32-bit float */
1254 {
1255 GLfloat a[4];
1256 GLuint result[4], usx, usy;
1257 fetch_vector4(&inst->SrcReg[0], machine, a);
1258 a[0] = CLAMP(a[0], 0.0F, 1.0F);
1259 a[1] = CLAMP(a[1], 0.0F, 1.0F);
1260 usx = IROUND(a[0] * 65535.0F);
1261 usy = IROUND(a[1] * 65535.0F);
1262 result[0] =
1263 result[1] =
1264 result[2] =
1265 result[3] = usx | (usy << 16);
1266 store_vector4ui(inst, machine, result);
1267 }
1268 break;
1269 case OPCODE_PK4B: /* pack four GLbytes into one 32-bit float */
1270 {
1271 GLfloat a[4];
1272 GLuint result[4], ubx, uby, ubz, ubw;
1273 fetch_vector4(&inst->SrcReg[0], machine, a);
1274 a[0] = CLAMP(a[0], -128.0F / 127.0F, 1.0F);
1275 a[1] = CLAMP(a[1], -128.0F / 127.0F, 1.0F);
1276 a[2] = CLAMP(a[2], -128.0F / 127.0F, 1.0F);
1277 a[3] = CLAMP(a[3], -128.0F / 127.0F, 1.0F);
1278 ubx = IROUND(127.0F * a[0] + 128.0F);
1279 uby = IROUND(127.0F * a[1] + 128.0F);
1280 ubz = IROUND(127.0F * a[2] + 128.0F);
1281 ubw = IROUND(127.0F * a[3] + 128.0F);
1282 result[0] =
1283 result[1] =
1284 result[2] =
1285 result[3] = ubx | (uby << 8) | (ubz << 16) | (ubw << 24);
1286 store_vector4ui(inst, machine, result);
1287 }
1288 break;
1289 case OPCODE_PK4UB: /* pack four GLubytes into one 32-bit float */
1290 {
1291 GLfloat a[4];
1292 GLuint result[4], ubx, uby, ubz, ubw;
1293 fetch_vector4(&inst->SrcReg[0], machine, a);
1294 a[0] = CLAMP(a[0], 0.0F, 1.0F);
1295 a[1] = CLAMP(a[1], 0.0F, 1.0F);
1296 a[2] = CLAMP(a[2], 0.0F, 1.0F);
1297 a[3] = CLAMP(a[3], 0.0F, 1.0F);
1298 ubx = IROUND(255.0F * a[0]);
1299 uby = IROUND(255.0F * a[1]);
1300 ubz = IROUND(255.0F * a[2]);
1301 ubw = IROUND(255.0F * a[3]);
1302 result[0] =
1303 result[1] =
1304 result[2] =
1305 result[3] = ubx | (uby << 8) | (ubz << 16) | (ubw << 24);
1306 store_vector4ui(inst, machine, result);
1307 }
1308 break;
1309 case OPCODE_POW:
1310 {
1311 GLfloat a[4], b[4], result[4];
1312 fetch_vector1(&inst->SrcReg[0], machine, a);
1313 fetch_vector1(&inst->SrcReg[1], machine, b);
1314 result[0] = result[1] = result[2] = result[3]
1315 = (GLfloat) _mesa_pow(a[0], b[0]);
1316 store_vector4(inst, machine, result);
1317 }
1318 break;
1319 case OPCODE_RCP:
1320 {
1321 GLfloat a[4], result[4];
1322 fetch_vector1(&inst->SrcReg[0], machine, a);
1323 if (DEBUG_PROG) {
1324 if (a[0] == 0)
1325 printf("RCP(0)\n");
1326 else if (IS_INF_OR_NAN(a[0]))
1327 printf("RCP(inf)\n");
1328 }
1329 result[0] = result[1] = result[2] = result[3] = 1.0F / a[0];
1330 store_vector4(inst, machine, result);
1331 }
1332 break;
1333 case OPCODE_RET: /* return from subroutine (conditional) */
1334 if (eval_condition(machine, inst)) {
1335 if (machine->StackDepth == 0) {
1336 return GL_TRUE; /* Per GL_NV_vertex_program2 spec */
1337 }
1338 /* subtract one because of pc++ in the for loop */
1339 pc = machine->CallStack[--machine->StackDepth] - 1;
1340 }
1341 break;
1342 case OPCODE_RFL: /* reflection vector */
1343 {
1344 GLfloat axis[4], dir[4], result[4], tmpX, tmpW;
1345 fetch_vector4(&inst->SrcReg[0], machine, axis);
1346 fetch_vector4(&inst->SrcReg[1], machine, dir);
1347 tmpW = DOT3(axis, axis);
1348 tmpX = (2.0F * DOT3(axis, dir)) / tmpW;
1349 result[0] = tmpX * axis[0] - dir[0];
1350 result[1] = tmpX * axis[1] - dir[1];
1351 result[2] = tmpX * axis[2] - dir[2];
1352 /* result[3] is never written! XXX enforce in parser! */
1353 store_vector4(inst, machine, result);
1354 }
1355 break;
1356 case OPCODE_RSQ: /* 1 / sqrt() */
1357 {
1358 GLfloat a[4], result[4];
1359 fetch_vector1(&inst->SrcReg[0], machine, a);
1360 a[0] = FABSF(a[0]);
1361 result[0] = result[1] = result[2] = result[3] = INV_SQRTF(a[0]);
1362 store_vector4(inst, machine, result);
1363 if (DEBUG_PROG) {
1364 printf("RSQ %g = 1/sqrt(|%g|)\n", result[0], a[0]);
1365 }
1366 }
1367 break;
1368 case OPCODE_SCS: /* sine and cos */
1369 {
1370 GLfloat a[4], result[4];
1371 fetch_vector1(&inst->SrcReg[0], machine, a);
1372 result[0] = (GLfloat) _mesa_cos(a[0]);
1373 result[1] = (GLfloat) _mesa_sin(a[0]);
1374 result[2] = 0.0; /* undefined! */
1375 result[3] = 0.0; /* undefined! */
1376 store_vector4(inst, machine, result);
1377 }
1378 break;
1379 case OPCODE_SEQ: /* set on equal */
1380 {
1381 GLfloat a[4], b[4], result[4];
1382 fetch_vector4(&inst->SrcReg[0], machine, a);
1383 fetch_vector4(&inst->SrcReg[1], machine, b);
1384 result[0] = (a[0] == b[0]) ? 1.0F : 0.0F;
1385 result[1] = (a[1] == b[1]) ? 1.0F : 0.0F;
1386 result[2] = (a[2] == b[2]) ? 1.0F : 0.0F;
1387 result[3] = (a[3] == b[3]) ? 1.0F : 0.0F;
1388 store_vector4(inst, machine, result);
1389 if (DEBUG_PROG) {
1390 printf("SEQ (%g %g %g %g) = (%g %g %g %g) == (%g %g %g %g)\n",
1391 result[0], result[1], result[2], result[3],
1392 a[0], a[1], a[2], a[3],
1393 b[0], b[1], b[2], b[3]);
1394 }
1395 }
1396 break;
1397 case OPCODE_SFL: /* set false, operands ignored */
1398 {
1399 static const GLfloat result[4] = { 0.0F, 0.0F, 0.0F, 0.0F };
1400 store_vector4(inst, machine, result);
1401 }
1402 break;
1403 case OPCODE_SGE: /* set on greater or equal */
1404 {
1405 GLfloat a[4], b[4], result[4];
1406 fetch_vector4(&inst->SrcReg[0], machine, a);
1407 fetch_vector4(&inst->SrcReg[1], machine, b);
1408 result[0] = (a[0] >= b[0]) ? 1.0F : 0.0F;
1409 result[1] = (a[1] >= b[1]) ? 1.0F : 0.0F;
1410 result[2] = (a[2] >= b[2]) ? 1.0F : 0.0F;
1411 result[3] = (a[3] >= b[3]) ? 1.0F : 0.0F;
1412 store_vector4(inst, machine, result);
1413 if (DEBUG_PROG) {
1414 printf("SGE (%g %g %g %g) = (%g %g %g %g) >= (%g %g %g %g)\n",
1415 result[0], result[1], result[2], result[3],
1416 a[0], a[1], a[2], a[3],
1417 b[0], b[1], b[2], b[3]);
1418 }
1419 }
1420 break;
1421 case OPCODE_SGT: /* set on greater */
1422 {
1423 GLfloat a[4], b[4], result[4];
1424 fetch_vector4(&inst->SrcReg[0], machine, a);
1425 fetch_vector4(&inst->SrcReg[1], machine, b);
1426 result[0] = (a[0] > b[0]) ? 1.0F : 0.0F;
1427 result[1] = (a[1] > b[1]) ? 1.0F : 0.0F;
1428 result[2] = (a[2] > b[2]) ? 1.0F : 0.0F;
1429 result[3] = (a[3] > b[3]) ? 1.0F : 0.0F;
1430 store_vector4(inst, machine, result);
1431 if (DEBUG_PROG) {
1432 printf("SGT (%g %g %g %g) = (%g %g %g %g) > (%g %g %g %g)\n",
1433 result[0], result[1], result[2], result[3],
1434 a[0], a[1], a[2], a[3],
1435 b[0], b[1], b[2], b[3]);
1436 }
1437 }
1438 break;
1439 case OPCODE_SIN:
1440 {
1441 GLfloat a[4], result[4];
1442 fetch_vector1(&inst->SrcReg[0], machine, a);
1443 result[0] = result[1] = result[2] = result[3]
1444 = (GLfloat) _mesa_sin(a[0]);
1445 store_vector4(inst, machine, result);
1446 }
1447 break;
1448 case OPCODE_SLE: /* set on less or equal */
1449 {
1450 GLfloat a[4], b[4], result[4];
1451 fetch_vector4(&inst->SrcReg[0], machine, a);
1452 fetch_vector4(&inst->SrcReg[1], machine, b);
1453 result[0] = (a[0] <= b[0]) ? 1.0F : 0.0F;
1454 result[1] = (a[1] <= b[1]) ? 1.0F : 0.0F;
1455 result[2] = (a[2] <= b[2]) ? 1.0F : 0.0F;
1456 result[3] = (a[3] <= b[3]) ? 1.0F : 0.0F;
1457 store_vector4(inst, machine, result);
1458 if (DEBUG_PROG) {
1459 printf("SLE (%g %g %g %g) = (%g %g %g %g) <= (%g %g %g %g)\n",
1460 result[0], result[1], result[2], result[3],
1461 a[0], a[1], a[2], a[3],
1462 b[0], b[1], b[2], b[3]);
1463 }
1464 }
1465 break;
1466 case OPCODE_SLT: /* set on less */
1467 {
1468 GLfloat a[4], b[4], result[4];
1469 fetch_vector4(&inst->SrcReg[0], machine, a);
1470 fetch_vector4(&inst->SrcReg[1], machine, b);
1471 result[0] = (a[0] < b[0]) ? 1.0F : 0.0F;
1472 result[1] = (a[1] < b[1]) ? 1.0F : 0.0F;
1473 result[2] = (a[2] < b[2]) ? 1.0F : 0.0F;
1474 result[3] = (a[3] < b[3]) ? 1.0F : 0.0F;
1475 store_vector4(inst, machine, result);
1476 if (DEBUG_PROG) {
1477 printf("SLT (%g %g %g %g) = (%g %g %g %g) < (%g %g %g %g)\n",
1478 result[0], result[1], result[2], result[3],
1479 a[0], a[1], a[2], a[3],
1480 b[0], b[1], b[2], b[3]);
1481 }
1482 }
1483 break;
1484 case OPCODE_SNE: /* set on not equal */
1485 {
1486 GLfloat a[4], b[4], result[4];
1487 fetch_vector4(&inst->SrcReg[0], machine, a);
1488 fetch_vector4(&inst->SrcReg[1], machine, b);
1489 result[0] = (a[0] != b[0]) ? 1.0F : 0.0F;
1490 result[1] = (a[1] != b[1]) ? 1.0F : 0.0F;
1491 result[2] = (a[2] != b[2]) ? 1.0F : 0.0F;
1492 result[3] = (a[3] != b[3]) ? 1.0F : 0.0F;
1493 store_vector4(inst, machine, result);
1494 if (DEBUG_PROG) {
1495 printf("SNE (%g %g %g %g) = (%g %g %g %g) != (%g %g %g %g)\n",
1496 result[0], result[1], result[2], result[3],
1497 a[0], a[1], a[2], a[3],
1498 b[0], b[1], b[2], b[3]);
1499 }
1500 }
1501 break;
1502 case OPCODE_SSG: /* set sign (-1, 0 or +1) */
1503 {
1504 GLfloat a[4], result[4];
1505 fetch_vector4(&inst->SrcReg[0], machine, a);
1506 result[0] = (GLfloat) ((a[0] > 0.0F) - (a[0] < 0.0F));
1507 result[1] = (GLfloat) ((a[1] > 0.0F) - (a[1] < 0.0F));
1508 result[2] = (GLfloat) ((a[2] > 0.0F) - (a[2] < 0.0F));
1509 result[3] = (GLfloat) ((a[3] > 0.0F) - (a[3] < 0.0F));
1510 store_vector4(inst, machine, result);
1511 }
1512 break;
1513 case OPCODE_STR: /* set true, operands ignored */
1514 {
1515 static const GLfloat result[4] = { 1.0F, 1.0F, 1.0F, 1.0F };
1516 store_vector4(inst, machine, result);
1517 }
1518 break;
1519 case OPCODE_SUB:
1520 {
1521 GLfloat a[4], b[4], result[4];
1522 fetch_vector4(&inst->SrcReg[0], machine, a);
1523 fetch_vector4(&inst->SrcReg[1], machine, b);
1524 result[0] = a[0] - b[0];
1525 result[1] = a[1] - b[1];
1526 result[2] = a[2] - b[2];
1527 result[3] = a[3] - b[3];
1528 store_vector4(inst, machine, result);
1529 if (DEBUG_PROG) {
1530 printf("SUB (%g %g %g %g) = (%g %g %g %g) - (%g %g %g %g)\n",
1531 result[0], result[1], result[2], result[3],
1532 a[0], a[1], a[2], a[3], b[0], b[1], b[2], b[3]);
1533 }
1534 }
1535 break;
1536 case OPCODE_SWZ: /* extended swizzle */
1537 {
1538 const struct prog_src_register *source = &inst->SrcReg[0];
1539 const GLfloat *src = get_src_register_pointer(source, machine);
1540 GLfloat result[4];
1541 GLuint i;
1542 for (i = 0; i < 4; i++) {
1543 const GLuint swz = GET_SWZ(source->Swizzle, i);
1544 if (swz == SWIZZLE_ZERO)
1545 result[i] = 0.0;
1546 else if (swz == SWIZZLE_ONE)
1547 result[i] = 1.0;
1548 else {
1549 ASSERT(swz >= 0);
1550 ASSERT(swz <= 3);
1551 result[i] = src[swz];
1552 }
1553 if (source->Negate & (1 << i))
1554 result[i] = -result[i];
1555 }
1556 store_vector4(inst, machine, result);
1557 }
1558 break;
1559 case OPCODE_TEX: /* Both ARB and NV frag prog */
1560 /* Simple texel lookup */
1561 {
1562 GLfloat texcoord[4], color[4];
1563 fetch_vector4(&inst->SrcReg[0], machine, texcoord);
1564
1565 fetch_texel(ctx, machine, inst, texcoord, 0.0, color);
1566
1567 if (DEBUG_PROG) {
1568 printf("TEX (%g, %g, %g, %g) = texture[%d][%g, %g, %g, %g]\n",
1569 color[0], color[1], color[2], color[3],
1570 inst->TexSrcUnit,
1571 texcoord[0], texcoord[1], texcoord[2], texcoord[3]);
1572 }
1573 store_vector4(inst, machine, color);
1574 }
1575 break;
1576 case OPCODE_TXB: /* GL_ARB_fragment_program only */
1577 /* Texel lookup with LOD bias */
1578 {
1579 GLfloat texcoord[4], color[4], lodBias;
1580
1581 fetch_vector4(&inst->SrcReg[0], machine, texcoord);
1582
1583 /* texcoord[3] is the bias to add to lambda */
1584 lodBias = texcoord[3];
1585
1586 fetch_texel(ctx, machine, inst, texcoord, lodBias, color);
1587
1588 store_vector4(inst, machine, color);
1589 }
1590 break;
1591 case OPCODE_TXD: /* GL_NV_fragment_program only */
1592 /* Texture lookup w/ partial derivatives for LOD */
1593 {
1594 GLfloat texcoord[4], dtdx[4], dtdy[4], color[4];
1595 fetch_vector4(&inst->SrcReg[0], machine, texcoord);
1596 fetch_vector4(&inst->SrcReg[1], machine, dtdx);
1597 fetch_vector4(&inst->SrcReg[2], machine, dtdy);
1598 machine->FetchTexelDeriv(ctx, texcoord, dtdx, dtdy,
1599 0.0, /* lodBias */
1600 inst->TexSrcUnit, color);
1601 store_vector4(inst, machine, color);
1602 }
1603 break;
1604 case OPCODE_TXP: /* GL_ARB_fragment_program only */
1605 /* Texture lookup w/ projective divide */
1606 {
1607 GLfloat texcoord[4], color[4];
1608
1609 fetch_vector4(&inst->SrcReg[0], machine, texcoord);
1610 /* Not so sure about this test - if texcoord[3] is
1611 * zero, we'd probably be fine except for an ASSERT in
1612 * IROUND_POS() which gets triggered by the inf values created.
1613 */
1614 if (texcoord[3] != 0.0) {
1615 texcoord[0] /= texcoord[3];
1616 texcoord[1] /= texcoord[3];
1617 texcoord[2] /= texcoord[3];
1618 }
1619
1620 fetch_texel(ctx, machine, inst, texcoord, 0.0, color);
1621
1622 store_vector4(inst, machine, color);
1623 }
1624 break;
1625 case OPCODE_TXP_NV: /* GL_NV_fragment_program only */
1626 /* Texture lookup w/ projective divide, as above, but do not
1627 * do the divide by w if sampling from a cube map.
1628 */
1629 {
1630 GLfloat texcoord[4], color[4];
1631
1632 fetch_vector4(&inst->SrcReg[0], machine, texcoord);
1633 if (inst->TexSrcTarget != TEXTURE_CUBE_INDEX &&
1634 texcoord[3] != 0.0) {
1635 texcoord[0] /= texcoord[3];
1636 texcoord[1] /= texcoord[3];
1637 texcoord[2] /= texcoord[3];
1638 }
1639
1640 fetch_texel(ctx, machine, inst, texcoord, 0.0, color);
1641
1642 store_vector4(inst, machine, color);
1643 }
1644 break;
1645 case OPCODE_TRUNC: /* truncate toward zero */
1646 {
1647 GLfloat a[4], result[4];
1648 fetch_vector4(&inst->SrcReg[0], machine, a);
1649 result[0] = (GLfloat) (GLint) a[0];
1650 result[1] = (GLfloat) (GLint) a[1];
1651 result[2] = (GLfloat) (GLint) a[2];
1652 result[3] = (GLfloat) (GLint) a[3];
1653 store_vector4(inst, machine, result);
1654 }
1655 break;
1656 case OPCODE_UP2H: /* unpack two 16-bit floats */
1657 {
1658 const GLuint raw = fetch_vector1ui(&inst->SrcReg[0], machine);
1659 GLfloat result[4];
1660 GLhalfNV hx, hy;
1661 hx = raw & 0xffff;
1662 hy = raw >> 16;
1663 result[0] = result[2] = _mesa_half_to_float(hx);
1664 result[1] = result[3] = _mesa_half_to_float(hy);
1665 store_vector4(inst, machine, result);
1666 }
1667 break;
1668 case OPCODE_UP2US: /* unpack two GLushorts */
1669 {
1670 const GLuint raw = fetch_vector1ui(&inst->SrcReg[0], machine);
1671 GLfloat result[4];
1672 GLushort usx, usy;
1673 usx = raw & 0xffff;
1674 usy = raw >> 16;
1675 result[0] = result[2] = usx * (1.0f / 65535.0f);
1676 result[1] = result[3] = usy * (1.0f / 65535.0f);
1677 store_vector4(inst, machine, result);
1678 }
1679 break;
1680 case OPCODE_UP4B: /* unpack four GLbytes */
1681 {
1682 const GLuint raw = fetch_vector1ui(&inst->SrcReg[0], machine);
1683 GLfloat result[4];
1684 result[0] = (((raw >> 0) & 0xff) - 128) / 127.0F;
1685 result[1] = (((raw >> 8) & 0xff) - 128) / 127.0F;
1686 result[2] = (((raw >> 16) & 0xff) - 128) / 127.0F;
1687 result[3] = (((raw >> 24) & 0xff) - 128) / 127.0F;
1688 store_vector4(inst, machine, result);
1689 }
1690 break;
1691 case OPCODE_UP4UB: /* unpack four GLubytes */
1692 {
1693 const GLuint raw = fetch_vector1ui(&inst->SrcReg[0], machine);
1694 GLfloat result[4];
1695 result[0] = ((raw >> 0) & 0xff) / 255.0F;
1696 result[1] = ((raw >> 8) & 0xff) / 255.0F;
1697 result[2] = ((raw >> 16) & 0xff) / 255.0F;
1698 result[3] = ((raw >> 24) & 0xff) / 255.0F;
1699 store_vector4(inst, machine, result);
1700 }
1701 break;
1702 case OPCODE_XOR: /* bitwise XOR */
1703 {
1704 GLuint a[4], b[4], result[4];
1705 fetch_vector4ui(&inst->SrcReg[0], machine, a);
1706 fetch_vector4ui(&inst->SrcReg[1], machine, b);
1707 result[0] = a[0] ^ b[0];
1708 result[1] = a[1] ^ b[1];
1709 result[2] = a[2] ^ b[2];
1710 result[3] = a[3] ^ b[3];
1711 store_vector4ui(inst, machine, result);
1712 }
1713 break;
1714 case OPCODE_XPD: /* cross product */
1715 {
1716 GLfloat a[4], b[4], result[4];
1717 fetch_vector4(&inst->SrcReg[0], machine, a);
1718 fetch_vector4(&inst->SrcReg[1], machine, b);
1719 result[0] = a[1] * b[2] - a[2] * b[1];
1720 result[1] = a[2] * b[0] - a[0] * b[2];
1721 result[2] = a[0] * b[1] - a[1] * b[0];
1722 result[3] = 1.0;
1723 store_vector4(inst, machine, result);
1724 if (DEBUG_PROG) {
1725 printf("XPD (%g %g %g %g) = (%g %g %g) X (%g %g %g)\n",
1726 result[0], result[1], result[2], result[3],
1727 a[0], a[1], a[2], b[0], b[1], b[2]);
1728 }
1729 }
1730 break;
1731 case OPCODE_X2D: /* 2-D matrix transform */
1732 {
1733 GLfloat a[4], b[4], c[4], result[4];
1734 fetch_vector4(&inst->SrcReg[0], machine, a);
1735 fetch_vector4(&inst->SrcReg[1], machine, b);
1736 fetch_vector4(&inst->SrcReg[2], machine, c);
1737 result[0] = a[0] + b[0] * c[0] + b[1] * c[1];
1738 result[1] = a[1] + b[0] * c[2] + b[1] * c[3];
1739 result[2] = a[2] + b[0] * c[0] + b[1] * c[1];
1740 result[3] = a[3] + b[0] * c[2] + b[1] * c[3];
1741 store_vector4(inst, machine, result);
1742 }
1743 break;
1744 case OPCODE_PRINT:
1745 {
1746 if (inst->SrcReg[0].File != -1) {
1747 GLfloat a[4];
1748 fetch_vector4(&inst->SrcReg[0], machine, a);
1749 _mesa_printf("%s%g, %g, %g, %g\n", (const char *) inst->Data,
1750 a[0], a[1], a[2], a[3]);
1751 }
1752 else {
1753 _mesa_printf("%s\n", (const char *) inst->Data);
1754 }
1755 }
1756 break;
1757 case OPCODE_END:
1758 return GL_TRUE;
1759 default:
1760 _mesa_problem(ctx, "Bad opcode %d in _mesa_execute_program",
1761 inst->Opcode);
1762 return GL_TRUE; /* return value doesn't matter */
1763 }
1764
1765 numExec++;
1766 if (numExec > maxExec) {
1767 _mesa_problem(ctx, "Infinite loop detected in fragment program");
1768 return GL_TRUE;
1769 }
1770
1771 } /* for pc */
1772
1773 return GL_TRUE;
1774 }