mesa: add support for 'centroid' qualifier in GLSL 1.20
[mesa.git] / src / mesa / shader / prog_execute.c
1 /*
2 * Mesa 3-D graphics library
3 * Version: 7.0.3
4 *
5 * Copyright (C) 1999-2007 Brian Paul All Rights Reserved.
6 *
7 * Permission is hereby granted, free of charge, to any person obtaining a
8 * copy of this software and associated documentation files (the "Software"),
9 * to deal in the Software without restriction, including without limitation
10 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
11 * and/or sell copies of the Software, and to permit persons to whom the
12 * Software is furnished to do so, subject to the following conditions:
13 *
14 * The above copyright notice and this permission notice shall be included
15 * in all copies or substantial portions of the Software.
16 *
17 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
18 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
19 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
20 * BRIAN PAUL BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN
21 * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
22 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
23 */
24
25 /**
26 * \file prog_execute.c
27 * Software interpreter for vertex/fragment programs.
28 * \author Brian Paul
29 */
30
31 /*
32 * NOTE: we do everything in single-precision floating point; we don't
33 * currently observe the single/half/fixed-precision qualifiers.
34 *
35 */
36
37
38 #include "main/glheader.h"
39 #include "main/colormac.h"
40 #include "main/context.h"
41 #include "program.h"
42 #include "prog_execute.h"
43 #include "prog_instruction.h"
44 #include "prog_parameter.h"
45 #include "prog_print.h"
46 #include "shader/slang/slang_library_noise.h"
47
48
49 /* debug predicate */
50 #define DEBUG_PROG 0
51
52
53 /**
54 * Set x to positive or negative infinity.
55 */
56 #if defined(USE_IEEE) || defined(_WIN32)
57 #define SET_POS_INFINITY(x) ( *((GLuint *) (void *)&x) = 0x7F800000 )
58 #define SET_NEG_INFINITY(x) ( *((GLuint *) (void *)&x) = 0xFF800000 )
59 #elif defined(VMS)
60 #define SET_POS_INFINITY(x) x = __MAXFLOAT
61 #define SET_NEG_INFINITY(x) x = -__MAXFLOAT
62 #else
63 #define SET_POS_INFINITY(x) x = (GLfloat) HUGE_VAL
64 #define SET_NEG_INFINITY(x) x = (GLfloat) -HUGE_VAL
65 #endif
66
67 #define SET_FLOAT_BITS(x, bits) ((fi_type *) (void *) &(x))->i = bits
68
69
70 static const GLfloat ZeroVec[4] = { 0.0F, 0.0F, 0.0F, 0.0F };
71
72
73
74 /**
75 * Return a pointer to the 4-element float vector specified by the given
76 * source register.
77 */
78 static INLINE const GLfloat *
79 get_register_pointer(const struct prog_src_register *source,
80 const struct gl_program_machine *machine)
81 {
82 if (source->RelAddr) {
83 const GLint reg = source->Index + machine->AddressReg[0][0];
84 if (source->File == PROGRAM_ENV_PARAM)
85 if (reg < 0 || reg >= MAX_PROGRAM_ENV_PARAMS)
86 return ZeroVec;
87 else
88 return machine->EnvParams[reg];
89 else {
90 const struct gl_program_parameter_list *params;
91 ASSERT(source->File == PROGRAM_LOCAL_PARAM ||
92 source->File == PROGRAM_CONSTANT ||
93 source->File == PROGRAM_STATE_VAR ||
94 source->File == PROGRAM_UNIFORM);
95 params = machine->CurProgram->Parameters;
96 if (reg < 0 || reg >= (GLint)params->NumParameters)
97 return ZeroVec;
98 else
99 return params->ParameterValues[reg];
100 }
101 }
102
103 switch (source->File) {
104 case PROGRAM_TEMPORARY:
105 ASSERT(source->Index < MAX_PROGRAM_TEMPS);
106 return machine->Temporaries[source->Index];
107
108 case PROGRAM_INPUT:
109 if (machine->CurProgram->Target == GL_VERTEX_PROGRAM_ARB) {
110 ASSERT(source->Index < VERT_ATTRIB_MAX);
111 return machine->VertAttribs[source->Index];
112 }
113 else {
114 ASSERT(source->Index < FRAG_ATTRIB_MAX);
115 return machine->Attribs[source->Index][machine->CurElement];
116 }
117
118 case PROGRAM_OUTPUT:
119 ASSERT(source->Index < MAX_PROGRAM_OUTPUTS);
120 return machine->Outputs[source->Index];
121
122 case PROGRAM_LOCAL_PARAM:
123 ASSERT(source->Index < MAX_PROGRAM_LOCAL_PARAMS);
124 return machine->CurProgram->LocalParams[source->Index];
125
126 case PROGRAM_ENV_PARAM:
127 ASSERT(source->Index < MAX_PROGRAM_ENV_PARAMS);
128 return machine->EnvParams[source->Index];
129
130 case PROGRAM_STATE_VAR:
131 /* Fallthrough */
132 case PROGRAM_CONSTANT:
133 /* Fallthrough */
134 case PROGRAM_UNIFORM:
135 /* Fallthrough */
136 case PROGRAM_NAMED_PARAM:
137 ASSERT(source->Index <
138 (GLint) machine->CurProgram->Parameters->NumParameters);
139 return machine->CurProgram->Parameters->ParameterValues[source->Index];
140
141 default:
142 _mesa_problem(NULL,
143 "Invalid input register file %d in get_register_pointer()",
144 source->File);
145 return NULL;
146 }
147 }
148
149
150 #if FEATURE_MESA_program_debug
151 static struct gl_program_machine *CurrentMachine = NULL;
152
153 /**
154 * For GL_MESA_program_debug.
155 * Return current value (4*GLfloat) of a program register.
156 * Called via ctx->Driver.GetProgramRegister().
157 */
158 void
159 _mesa_get_program_register(GLcontext *ctx, enum register_file file,
160 GLuint index, GLfloat val[4])
161 {
162 if (CurrentMachine) {
163 struct prog_src_register src;
164 const GLfloat *reg;
165 src.File = file;
166 src.Index = index;
167 reg = get_register_pointer(&src, CurrentMachine);
168 COPY_4V(val, reg);
169 }
170 }
171 #endif /* FEATURE_MESA_program_debug */
172
173
174 /**
175 * Fetch a 4-element float vector from the given source register.
176 * Apply swizzling and negating as needed.
177 */
178 static void
179 fetch_vector4(const struct prog_src_register *source,
180 const struct gl_program_machine *machine, GLfloat result[4])
181 {
182 const GLfloat *src = get_register_pointer(source, machine);
183 ASSERT(src);
184
185 if (source->Swizzle == SWIZZLE_NOOP) {
186 /* no swizzling */
187 COPY_4V(result, src);
188 }
189 else {
190 ASSERT(GET_SWZ(source->Swizzle, 0) <= 3);
191 ASSERT(GET_SWZ(source->Swizzle, 1) <= 3);
192 ASSERT(GET_SWZ(source->Swizzle, 2) <= 3);
193 ASSERT(GET_SWZ(source->Swizzle, 3) <= 3);
194 result[0] = src[GET_SWZ(source->Swizzle, 0)];
195 result[1] = src[GET_SWZ(source->Swizzle, 1)];
196 result[2] = src[GET_SWZ(source->Swizzle, 2)];
197 result[3] = src[GET_SWZ(source->Swizzle, 3)];
198 }
199
200 if (source->NegateBase) {
201 result[0] = -result[0];
202 result[1] = -result[1];
203 result[2] = -result[2];
204 result[3] = -result[3];
205 }
206 if (source->Abs) {
207 result[0] = FABSF(result[0]);
208 result[1] = FABSF(result[1]);
209 result[2] = FABSF(result[2]);
210 result[3] = FABSF(result[3]);
211 }
212 if (source->NegateAbs) {
213 result[0] = -result[0];
214 result[1] = -result[1];
215 result[2] = -result[2];
216 result[3] = -result[3];
217 }
218 }
219
220
221 /**
222 * Fetch a 4-element uint vector from the given source register.
223 * Apply swizzling but not negation/abs.
224 */
225 static void
226 fetch_vector4ui(const struct prog_src_register *source,
227 const struct gl_program_machine *machine, GLuint result[4])
228 {
229 const GLuint *src = (GLuint *) get_register_pointer(source, machine);
230 ASSERT(src);
231
232 if (source->Swizzle == SWIZZLE_NOOP) {
233 /* no swizzling */
234 COPY_4V(result, src);
235 }
236 else {
237 ASSERT(GET_SWZ(source->Swizzle, 0) <= 3);
238 ASSERT(GET_SWZ(source->Swizzle, 1) <= 3);
239 ASSERT(GET_SWZ(source->Swizzle, 2) <= 3);
240 ASSERT(GET_SWZ(source->Swizzle, 3) <= 3);
241 result[0] = src[GET_SWZ(source->Swizzle, 0)];
242 result[1] = src[GET_SWZ(source->Swizzle, 1)];
243 result[2] = src[GET_SWZ(source->Swizzle, 2)];
244 result[3] = src[GET_SWZ(source->Swizzle, 3)];
245 }
246
247 /* Note: no NegateBase, Abs, NegateAbs here */
248 }
249
250
251
252 /**
253 * Fetch the derivative with respect to X or Y for the given register.
254 * XXX this currently only works for fragment program input attribs.
255 */
256 static void
257 fetch_vector4_deriv(GLcontext * ctx,
258 const struct prog_src_register *source,
259 const struct gl_program_machine *machine,
260 char xOrY, GLfloat result[4])
261 {
262 if (source->File == PROGRAM_INPUT &&
263 source->Index < (GLint) machine->NumDeriv) {
264 const GLint col = machine->CurElement;
265 const GLfloat w = machine->Attribs[FRAG_ATTRIB_WPOS][col][3];
266 const GLfloat invQ = 1.0f / w;
267 GLfloat deriv[4];
268
269 if (xOrY == 'X') {
270 deriv[0] = machine->DerivX[source->Index][0] * invQ;
271 deriv[1] = machine->DerivX[source->Index][1] * invQ;
272 deriv[2] = machine->DerivX[source->Index][2] * invQ;
273 deriv[3] = machine->DerivX[source->Index][3] * invQ;
274 }
275 else {
276 deriv[0] = machine->DerivY[source->Index][0] * invQ;
277 deriv[1] = machine->DerivY[source->Index][1] * invQ;
278 deriv[2] = machine->DerivY[source->Index][2] * invQ;
279 deriv[3] = machine->DerivY[source->Index][3] * invQ;
280 }
281
282 result[0] = deriv[GET_SWZ(source->Swizzle, 0)];
283 result[1] = deriv[GET_SWZ(source->Swizzle, 1)];
284 result[2] = deriv[GET_SWZ(source->Swizzle, 2)];
285 result[3] = deriv[GET_SWZ(source->Swizzle, 3)];
286
287 if (source->NegateBase) {
288 result[0] = -result[0];
289 result[1] = -result[1];
290 result[2] = -result[2];
291 result[3] = -result[3];
292 }
293 if (source->Abs) {
294 result[0] = FABSF(result[0]);
295 result[1] = FABSF(result[1]);
296 result[2] = FABSF(result[2]);
297 result[3] = FABSF(result[3]);
298 }
299 if (source->NegateAbs) {
300 result[0] = -result[0];
301 result[1] = -result[1];
302 result[2] = -result[2];
303 result[3] = -result[3];
304 }
305 }
306 else {
307 ASSIGN_4V(result, 0.0, 0.0, 0.0, 0.0);
308 }
309 }
310
311
312 /**
313 * As above, but only return result[0] element.
314 */
315 static void
316 fetch_vector1(const struct prog_src_register *source,
317 const struct gl_program_machine *machine, GLfloat result[4])
318 {
319 const GLfloat *src = get_register_pointer(source, machine);
320 ASSERT(src);
321
322 result[0] = src[GET_SWZ(source->Swizzle, 0)];
323
324 if (source->NegateBase) {
325 result[0] = -result[0];
326 }
327 if (source->Abs) {
328 result[0] = FABSF(result[0]);
329 }
330 if (source->NegateAbs) {
331 result[0] = -result[0];
332 }
333 }
334
335
336 /**
337 * Fetch texel from texture. Use partial derivatives when possible.
338 */
339 static INLINE void
340 fetch_texel(GLcontext *ctx,
341 const struct gl_program_machine *machine,
342 const struct prog_instruction *inst,
343 const GLfloat texcoord[4], GLfloat lodBias,
344 GLfloat color[4])
345 {
346 const GLuint unit = machine->Samplers[inst->TexSrcUnit];
347
348 /* Note: we only have the right derivatives for fragment input attribs.
349 */
350 if (machine->NumDeriv > 0 &&
351 inst->SrcReg[0].File == PROGRAM_INPUT &&
352 inst->SrcReg[0].Index == FRAG_ATTRIB_TEX0 + inst->TexSrcUnit) {
353 /* simple texture fetch for which we should have derivatives */
354 GLuint attr = inst->SrcReg[0].Index;
355 machine->FetchTexelDeriv(ctx, texcoord,
356 machine->DerivX[attr],
357 machine->DerivY[attr],
358 lodBias, unit, color);
359 }
360 else {
361 machine->FetchTexelLod(ctx, texcoord, lodBias, unit, color);
362 }
363 }
364
365
366 /**
367 * Test value against zero and return GT, LT, EQ or UN if NaN.
368 */
369 static INLINE GLuint
370 generate_cc(float value)
371 {
372 if (value != value)
373 return COND_UN; /* NaN */
374 if (value > 0.0F)
375 return COND_GT;
376 if (value < 0.0F)
377 return COND_LT;
378 return COND_EQ;
379 }
380
381
382 /**
383 * Test if the ccMaskRule is satisfied by the given condition code.
384 * Used to mask destination writes according to the current condition code.
385 */
386 static INLINE GLboolean
387 test_cc(GLuint condCode, GLuint ccMaskRule)
388 {
389 switch (ccMaskRule) {
390 case COND_EQ: return (condCode == COND_EQ);
391 case COND_NE: return (condCode != COND_EQ);
392 case COND_LT: return (condCode == COND_LT);
393 case COND_GE: return (condCode == COND_GT || condCode == COND_EQ);
394 case COND_LE: return (condCode == COND_LT || condCode == COND_EQ);
395 case COND_GT: return (condCode == COND_GT);
396 case COND_TR: return GL_TRUE;
397 case COND_FL: return GL_FALSE;
398 default: return GL_TRUE;
399 }
400 }
401
402
403 /**
404 * Evaluate the 4 condition codes against a predicate and return GL_TRUE
405 * or GL_FALSE to indicate result.
406 */
407 static INLINE GLboolean
408 eval_condition(const struct gl_program_machine *machine,
409 const struct prog_instruction *inst)
410 {
411 const GLuint swizzle = inst->DstReg.CondSwizzle;
412 const GLuint condMask = inst->DstReg.CondMask;
413 if (test_cc(machine->CondCodes[GET_SWZ(swizzle, 0)], condMask) ||
414 test_cc(machine->CondCodes[GET_SWZ(swizzle, 1)], condMask) ||
415 test_cc(machine->CondCodes[GET_SWZ(swizzle, 2)], condMask) ||
416 test_cc(machine->CondCodes[GET_SWZ(swizzle, 3)], condMask)) {
417 return GL_TRUE;
418 }
419 else {
420 return GL_FALSE;
421 }
422 }
423
424
425
426 /**
427 * Store 4 floats into a register. Observe the instructions saturate and
428 * set-condition-code flags.
429 */
430 static void
431 store_vector4(const struct prog_instruction *inst,
432 struct gl_program_machine *machine, const GLfloat value[4])
433 {
434 const struct prog_dst_register *dest = &(inst->DstReg);
435 const GLboolean clamp = inst->SaturateMode == SATURATE_ZERO_ONE;
436 GLfloat *dstReg;
437 GLfloat dummyReg[4];
438 GLfloat clampedValue[4];
439 GLuint writeMask = dest->WriteMask;
440
441 switch (dest->File) {
442 case PROGRAM_OUTPUT:
443 ASSERT(dest->Index < MAX_PROGRAM_OUTPUTS);
444 dstReg = machine->Outputs[dest->Index];
445 break;
446 case PROGRAM_TEMPORARY:
447 ASSERT(dest->Index < MAX_PROGRAM_TEMPS);
448 dstReg = machine->Temporaries[dest->Index];
449 break;
450 case PROGRAM_WRITE_ONLY:
451 dstReg = dummyReg;
452 return;
453 default:
454 _mesa_problem(NULL, "bad register file in store_vector4(fp)");
455 return;
456 }
457
458 #if 0
459 if (value[0] > 1.0e10 ||
460 IS_INF_OR_NAN(value[0]) ||
461 IS_INF_OR_NAN(value[1]) ||
462 IS_INF_OR_NAN(value[2]) || IS_INF_OR_NAN(value[3]))
463 printf("store %g %g %g %g\n", value[0], value[1], value[2], value[3]);
464 #endif
465
466 if (clamp) {
467 clampedValue[0] = CLAMP(value[0], 0.0F, 1.0F);
468 clampedValue[1] = CLAMP(value[1], 0.0F, 1.0F);
469 clampedValue[2] = CLAMP(value[2], 0.0F, 1.0F);
470 clampedValue[3] = CLAMP(value[3], 0.0F, 1.0F);
471 value = clampedValue;
472 }
473
474 if (dest->CondMask != COND_TR) {
475 /* condition codes may turn off some writes */
476 if (writeMask & WRITEMASK_X) {
477 if (!test_cc(machine->CondCodes[GET_SWZ(dest->CondSwizzle, 0)],
478 dest->CondMask))
479 writeMask &= ~WRITEMASK_X;
480 }
481 if (writeMask & WRITEMASK_Y) {
482 if (!test_cc(machine->CondCodes[GET_SWZ(dest->CondSwizzle, 1)],
483 dest->CondMask))
484 writeMask &= ~WRITEMASK_Y;
485 }
486 if (writeMask & WRITEMASK_Z) {
487 if (!test_cc(machine->CondCodes[GET_SWZ(dest->CondSwizzle, 2)],
488 dest->CondMask))
489 writeMask &= ~WRITEMASK_Z;
490 }
491 if (writeMask & WRITEMASK_W) {
492 if (!test_cc(machine->CondCodes[GET_SWZ(dest->CondSwizzle, 3)],
493 dest->CondMask))
494 writeMask &= ~WRITEMASK_W;
495 }
496 }
497
498 if (writeMask & WRITEMASK_X)
499 dstReg[0] = value[0];
500 if (writeMask & WRITEMASK_Y)
501 dstReg[1] = value[1];
502 if (writeMask & WRITEMASK_Z)
503 dstReg[2] = value[2];
504 if (writeMask & WRITEMASK_W)
505 dstReg[3] = value[3];
506
507 if (inst->CondUpdate) {
508 if (writeMask & WRITEMASK_X)
509 machine->CondCodes[0] = generate_cc(value[0]);
510 if (writeMask & WRITEMASK_Y)
511 machine->CondCodes[1] = generate_cc(value[1]);
512 if (writeMask & WRITEMASK_Z)
513 machine->CondCodes[2] = generate_cc(value[2]);
514 if (writeMask & WRITEMASK_W)
515 machine->CondCodes[3] = generate_cc(value[3]);
516 #if DEBUG_PROG
517 printf("CondCodes=(%s,%s,%s,%s) for:\n",
518 _mesa_condcode_string(machine->CondCodes[0]),
519 _mesa_condcode_string(machine->CondCodes[1]),
520 _mesa_condcode_string(machine->CondCodes[2]),
521 _mesa_condcode_string(machine->CondCodes[3]));
522 #endif
523 }
524 }
525
526
527 /**
528 * Store 4 uints into a register. Observe the set-condition-code flags.
529 */
530 static void
531 store_vector4ui(const struct prog_instruction *inst,
532 struct gl_program_machine *machine, const GLuint value[4])
533 {
534 const struct prog_dst_register *dest = &(inst->DstReg);
535 GLuint *dstReg;
536 GLuint dummyReg[4];
537 GLuint writeMask = dest->WriteMask;
538
539 switch (dest->File) {
540 case PROGRAM_OUTPUT:
541 ASSERT(dest->Index < MAX_PROGRAM_OUTPUTS);
542 dstReg = (GLuint *) machine->Outputs[dest->Index];
543 break;
544 case PROGRAM_TEMPORARY:
545 ASSERT(dest->Index < MAX_PROGRAM_TEMPS);
546 dstReg = (GLuint *) machine->Temporaries[dest->Index];
547 break;
548 case PROGRAM_WRITE_ONLY:
549 dstReg = dummyReg;
550 return;
551 default:
552 _mesa_problem(NULL, "bad register file in store_vector4(fp)");
553 return;
554 }
555
556 if (dest->CondMask != COND_TR) {
557 /* condition codes may turn off some writes */
558 if (writeMask & WRITEMASK_X) {
559 if (!test_cc(machine->CondCodes[GET_SWZ(dest->CondSwizzle, 0)],
560 dest->CondMask))
561 writeMask &= ~WRITEMASK_X;
562 }
563 if (writeMask & WRITEMASK_Y) {
564 if (!test_cc(machine->CondCodes[GET_SWZ(dest->CondSwizzle, 1)],
565 dest->CondMask))
566 writeMask &= ~WRITEMASK_Y;
567 }
568 if (writeMask & WRITEMASK_Z) {
569 if (!test_cc(machine->CondCodes[GET_SWZ(dest->CondSwizzle, 2)],
570 dest->CondMask))
571 writeMask &= ~WRITEMASK_Z;
572 }
573 if (writeMask & WRITEMASK_W) {
574 if (!test_cc(machine->CondCodes[GET_SWZ(dest->CondSwizzle, 3)],
575 dest->CondMask))
576 writeMask &= ~WRITEMASK_W;
577 }
578 }
579
580 if (writeMask & WRITEMASK_X)
581 dstReg[0] = value[0];
582 if (writeMask & WRITEMASK_Y)
583 dstReg[1] = value[1];
584 if (writeMask & WRITEMASK_Z)
585 dstReg[2] = value[2];
586 if (writeMask & WRITEMASK_W)
587 dstReg[3] = value[3];
588
589 if (inst->CondUpdate) {
590 if (writeMask & WRITEMASK_X)
591 machine->CondCodes[0] = generate_cc(value[0]);
592 if (writeMask & WRITEMASK_Y)
593 machine->CondCodes[1] = generate_cc(value[1]);
594 if (writeMask & WRITEMASK_Z)
595 machine->CondCodes[2] = generate_cc(value[2]);
596 if (writeMask & WRITEMASK_W)
597 machine->CondCodes[3] = generate_cc(value[3]);
598 #if DEBUG_PROG
599 printf("CondCodes=(%s,%s,%s,%s) for:\n",
600 _mesa_condcode_string(machine->CondCodes[0]),
601 _mesa_condcode_string(machine->CondCodes[1]),
602 _mesa_condcode_string(machine->CondCodes[2]),
603 _mesa_condcode_string(machine->CondCodes[3]));
604 #endif
605 }
606 }
607
608
609
610 /**
611 * Execute the given vertex/fragment program.
612 *
613 * \param ctx rendering context
614 * \param program the program to execute
615 * \param machine machine state (must be initialized)
616 * \return GL_TRUE if program completed or GL_FALSE if program executed KIL.
617 */
618 GLboolean
619 _mesa_execute_program(GLcontext * ctx,
620 const struct gl_program *program,
621 struct gl_program_machine *machine)
622 {
623 const GLuint numInst = program->NumInstructions;
624 const GLuint maxExec = 10000;
625 GLuint pc, numExec = 0;
626
627 machine->CurProgram = program;
628
629 if (DEBUG_PROG) {
630 printf("execute program %u --------------------\n", program->Id);
631 }
632
633 #if FEATURE_MESA_program_debug
634 CurrentMachine = machine;
635 #endif
636
637 if (program->Target == GL_VERTEX_PROGRAM_ARB) {
638 machine->EnvParams = ctx->VertexProgram.Parameters;
639 }
640 else {
641 machine->EnvParams = ctx->FragmentProgram.Parameters;
642 }
643
644 for (pc = 0; pc < numInst; pc++) {
645 const struct prog_instruction *inst = program->Instructions + pc;
646
647 #if FEATURE_MESA_program_debug
648 if (ctx->FragmentProgram.CallbackEnabled &&
649 ctx->FragmentProgram.Callback) {
650 ctx->FragmentProgram.CurrentPosition = inst->StringPos;
651 ctx->FragmentProgram.Callback(program->Target,
652 ctx->FragmentProgram.CallbackData);
653 }
654 #endif
655
656 if (DEBUG_PROG) {
657 _mesa_print_instruction(inst);
658 }
659
660 switch (inst->Opcode) {
661 case OPCODE_ABS:
662 {
663 GLfloat a[4], result[4];
664 fetch_vector4(&inst->SrcReg[0], machine, a);
665 result[0] = FABSF(a[0]);
666 result[1] = FABSF(a[1]);
667 result[2] = FABSF(a[2]);
668 result[3] = FABSF(a[3]);
669 store_vector4(inst, machine, result);
670 }
671 break;
672 case OPCODE_ADD:
673 {
674 GLfloat a[4], b[4], result[4];
675 fetch_vector4(&inst->SrcReg[0], machine, a);
676 fetch_vector4(&inst->SrcReg[1], machine, b);
677 result[0] = a[0] + b[0];
678 result[1] = a[1] + b[1];
679 result[2] = a[2] + b[2];
680 result[3] = a[3] + b[3];
681 store_vector4(inst, machine, result);
682 if (DEBUG_PROG) {
683 printf("ADD (%g %g %g %g) = (%g %g %g %g) + (%g %g %g %g)\n",
684 result[0], result[1], result[2], result[3],
685 a[0], a[1], a[2], a[3], b[0], b[1], b[2], b[3]);
686 }
687 }
688 break;
689 case OPCODE_AND: /* bitwise AND */
690 {
691 GLuint a[4], b[4], result[4];
692 fetch_vector4ui(&inst->SrcReg[0], machine, a);
693 fetch_vector4ui(&inst->SrcReg[1], machine, b);
694 result[0] = a[0] & b[0];
695 result[1] = a[1] & b[1];
696 result[2] = a[2] & b[2];
697 result[3] = a[3] & b[3];
698 store_vector4ui(inst, machine, result);
699 }
700 break;
701 case OPCODE_ARL:
702 {
703 GLfloat t[4];
704 fetch_vector4(&inst->SrcReg[0], machine, t);
705 machine->AddressReg[0][0] = (GLint) FLOORF(t[0]);
706 }
707 break;
708 case OPCODE_BGNLOOP:
709 /* no-op */
710 break;
711 case OPCODE_ENDLOOP:
712 /* subtract 1 here since pc is incremented by for(pc) loop */
713 pc = inst->BranchTarget - 1; /* go to matching BNGLOOP */
714 break;
715 case OPCODE_BGNSUB: /* begin subroutine */
716 break;
717 case OPCODE_ENDSUB: /* end subroutine */
718 break;
719 case OPCODE_BRA: /* branch (conditional) */
720 /* fall-through */
721 case OPCODE_BRK: /* break out of loop (conditional) */
722 /* fall-through */
723 case OPCODE_CONT: /* continue loop (conditional) */
724 if (eval_condition(machine, inst)) {
725 /* take branch */
726 /* Subtract 1 here since we'll do pc++ at end of for-loop */
727 pc = inst->BranchTarget - 1;
728 }
729 break;
730 case OPCODE_CAL: /* Call subroutine (conditional) */
731 if (eval_condition(machine, inst)) {
732 /* call the subroutine */
733 if (machine->StackDepth >= MAX_PROGRAM_CALL_DEPTH) {
734 return GL_TRUE; /* Per GL_NV_vertex_program2 spec */
735 }
736 machine->CallStack[machine->StackDepth++] = pc + 1; /* next inst */
737 /* Subtract 1 here since we'll do pc++ at end of for-loop */
738 pc = inst->BranchTarget - 1;
739 }
740 break;
741 case OPCODE_CMP:
742 {
743 GLfloat a[4], b[4], c[4], result[4];
744 fetch_vector4(&inst->SrcReg[0], machine, a);
745 fetch_vector4(&inst->SrcReg[1], machine, b);
746 fetch_vector4(&inst->SrcReg[2], machine, c);
747 result[0] = a[0] < 0.0F ? b[0] : c[0];
748 result[1] = a[1] < 0.0F ? b[1] : c[1];
749 result[2] = a[2] < 0.0F ? b[2] : c[2];
750 result[3] = a[3] < 0.0F ? b[3] : c[3];
751 store_vector4(inst, machine, result);
752 }
753 break;
754 case OPCODE_COS:
755 {
756 GLfloat a[4], result[4];
757 fetch_vector1(&inst->SrcReg[0], machine, a);
758 result[0] = result[1] = result[2] = result[3]
759 = (GLfloat) _mesa_cos(a[0]);
760 store_vector4(inst, machine, result);
761 }
762 break;
763 case OPCODE_DDX: /* Partial derivative with respect to X */
764 {
765 GLfloat result[4];
766 fetch_vector4_deriv(ctx, &inst->SrcReg[0], machine,
767 'X', result);
768 store_vector4(inst, machine, result);
769 }
770 break;
771 case OPCODE_DDY: /* Partial derivative with respect to Y */
772 {
773 GLfloat result[4];
774 fetch_vector4_deriv(ctx, &inst->SrcReg[0], machine,
775 'Y', result);
776 store_vector4(inst, machine, result);
777 }
778 break;
779 case OPCODE_DP2:
780 {
781 GLfloat a[4], b[4], result[4];
782 fetch_vector4(&inst->SrcReg[0], machine, a);
783 fetch_vector4(&inst->SrcReg[1], machine, b);
784 result[0] = result[1] = result[2] = result[3] = DOT2(a, b);
785 store_vector4(inst, machine, result);
786 if (DEBUG_PROG) {
787 printf("DP2 %g = (%g %g) . (%g %g)\n",
788 result[0], a[0], a[1], b[0], b[1]);
789 }
790 }
791 break;
792 case OPCODE_DP2A:
793 {
794 GLfloat a[4], b[4], c, result[4];
795 fetch_vector4(&inst->SrcReg[0], machine, a);
796 fetch_vector4(&inst->SrcReg[1], machine, b);
797 fetch_vector1(&inst->SrcReg[1], machine, &c);
798 result[0] = result[1] = result[2] = result[3] = DOT2(a, b) + c;
799 store_vector4(inst, machine, result);
800 if (DEBUG_PROG) {
801 printf("DP2A %g = (%g %g) . (%g %g) + %g\n",
802 result[0], a[0], a[1], b[0], b[1], c);
803 }
804 }
805 break;
806 case OPCODE_DP3:
807 {
808 GLfloat a[4], b[4], result[4];
809 fetch_vector4(&inst->SrcReg[0], machine, a);
810 fetch_vector4(&inst->SrcReg[1], machine, b);
811 result[0] = result[1] = result[2] = result[3] = DOT3(a, b);
812 store_vector4(inst, machine, result);
813 if (DEBUG_PROG) {
814 printf("DP3 %g = (%g %g %g) . (%g %g %g)\n",
815 result[0], a[0], a[1], a[2], b[0], b[1], b[2]);
816 }
817 }
818 break;
819 case OPCODE_DP4:
820 {
821 GLfloat a[4], b[4], result[4];
822 fetch_vector4(&inst->SrcReg[0], machine, a);
823 fetch_vector4(&inst->SrcReg[1], machine, b);
824 result[0] = result[1] = result[2] = result[3] = DOT4(a, b);
825 store_vector4(inst, machine, result);
826 if (DEBUG_PROG) {
827 printf("DP4 %g = (%g, %g %g %g) . (%g, %g %g %g)\n",
828 result[0], a[0], a[1], a[2], a[3],
829 b[0], b[1], b[2], b[3]);
830 }
831 }
832 break;
833 case OPCODE_DPH:
834 {
835 GLfloat a[4], b[4], result[4];
836 fetch_vector4(&inst->SrcReg[0], machine, a);
837 fetch_vector4(&inst->SrcReg[1], machine, b);
838 result[0] = result[1] = result[2] = result[3] = DOT3(a, b) + b[3];
839 store_vector4(inst, machine, result);
840 }
841 break;
842 case OPCODE_DST: /* Distance vector */
843 {
844 GLfloat a[4], b[4], result[4];
845 fetch_vector4(&inst->SrcReg[0], machine, a);
846 fetch_vector4(&inst->SrcReg[1], machine, b);
847 result[0] = 1.0F;
848 result[1] = a[1] * b[1];
849 result[2] = a[2];
850 result[3] = b[3];
851 store_vector4(inst, machine, result);
852 }
853 break;
854 case OPCODE_EXP:
855 {
856 GLfloat t[4], q[4], floor_t0;
857 fetch_vector1(&inst->SrcReg[0], machine, t);
858 floor_t0 = FLOORF(t[0]);
859 if (floor_t0 > FLT_MAX_EXP) {
860 SET_POS_INFINITY(q[0]);
861 SET_POS_INFINITY(q[2]);
862 }
863 else if (floor_t0 < FLT_MIN_EXP) {
864 q[0] = 0.0F;
865 q[2] = 0.0F;
866 }
867 else {
868 q[0] = LDEXPF(1.0, (int) floor_t0);
869 /* Note: GL_NV_vertex_program expects
870 * result.z = result.x * APPX(result.y)
871 * We do what the ARB extension says.
872 */
873 q[2] = (GLfloat) pow(2.0, t[0]);
874 }
875 q[1] = t[0] - floor_t0;
876 q[3] = 1.0F;
877 store_vector4( inst, machine, q );
878 }
879 break;
880 case OPCODE_EX2: /* Exponential base 2 */
881 {
882 GLfloat a[4], result[4];
883 fetch_vector1(&inst->SrcReg[0], machine, a);
884 result[0] = result[1] = result[2] = result[3] =
885 (GLfloat) _mesa_pow(2.0, a[0]);
886 store_vector4(inst, machine, result);
887 }
888 break;
889 case OPCODE_FLR:
890 {
891 GLfloat a[4], result[4];
892 fetch_vector4(&inst->SrcReg[0], machine, a);
893 result[0] = FLOORF(a[0]);
894 result[1] = FLOORF(a[1]);
895 result[2] = FLOORF(a[2]);
896 result[3] = FLOORF(a[3]);
897 store_vector4(inst, machine, result);
898 }
899 break;
900 case OPCODE_FRC:
901 {
902 GLfloat a[4], result[4];
903 fetch_vector4(&inst->SrcReg[0], machine, a);
904 result[0] = a[0] - FLOORF(a[0]);
905 result[1] = a[1] - FLOORF(a[1]);
906 result[2] = a[2] - FLOORF(a[2]);
907 result[3] = a[3] - FLOORF(a[3]);
908 store_vector4(inst, machine, result);
909 }
910 break;
911 case OPCODE_IF:
912 {
913 GLboolean cond;
914 /* eval condition */
915 if (inst->SrcReg[0].File != PROGRAM_UNDEFINED) {
916 GLfloat a[4];
917 fetch_vector1(&inst->SrcReg[0], machine, a);
918 cond = (a[0] != 0.0);
919 }
920 else {
921 cond = eval_condition(machine, inst);
922 }
923 if (DEBUG_PROG) {
924 printf("IF: %d\n", cond);
925 }
926 /* do if/else */
927 if (cond) {
928 /* do if-clause (just continue execution) */
929 }
930 else {
931 /* go to the instruction after ELSE or ENDIF */
932 assert(inst->BranchTarget >= 0);
933 pc = inst->BranchTarget - 1;
934 }
935 }
936 break;
937 case OPCODE_ELSE:
938 /* goto ENDIF */
939 assert(inst->BranchTarget >= 0);
940 pc = inst->BranchTarget - 1;
941 break;
942 case OPCODE_ENDIF:
943 /* nothing */
944 break;
945 case OPCODE_KIL_NV: /* NV_f_p only (conditional) */
946 if (eval_condition(machine, inst)) {
947 return GL_FALSE;
948 }
949 break;
950 case OPCODE_KIL: /* ARB_f_p only */
951 {
952 GLfloat a[4];
953 fetch_vector4(&inst->SrcReg[0], machine, a);
954 if (a[0] < 0.0F || a[1] < 0.0F || a[2] < 0.0F || a[3] < 0.0F) {
955 return GL_FALSE;
956 }
957 }
958 break;
959 case OPCODE_LG2: /* log base 2 */
960 {
961 GLfloat a[4], result[4];
962 fetch_vector1(&inst->SrcReg[0], machine, a);
963 result[0] = result[1] = result[2] = result[3] = LOG2(a[0]);
964 store_vector4(inst, machine, result);
965 }
966 break;
967 case OPCODE_LIT:
968 {
969 const GLfloat epsilon = 1.0F / 256.0F; /* from NV VP spec */
970 GLfloat a[4], result[4];
971 fetch_vector4(&inst->SrcReg[0], machine, a);
972 a[0] = MAX2(a[0], 0.0F);
973 a[1] = MAX2(a[1], 0.0F);
974 /* XXX ARB version clamps a[3], NV version doesn't */
975 a[3] = CLAMP(a[3], -(128.0F - epsilon), (128.0F - epsilon));
976 result[0] = 1.0F;
977 result[1] = a[0];
978 /* XXX we could probably just use pow() here */
979 if (a[0] > 0.0F) {
980 if (a[1] == 0.0 && a[3] == 0.0)
981 result[2] = 1.0;
982 else
983 result[2] = EXPF(a[3] * LOGF(a[1]));
984 }
985 else {
986 result[2] = 0.0;
987 }
988 result[3] = 1.0F;
989 store_vector4(inst, machine, result);
990 if (DEBUG_PROG) {
991 printf("LIT (%g %g %g %g) : (%g %g %g %g)\n",
992 result[0], result[1], result[2], result[3],
993 a[0], a[1], a[2], a[3]);
994 }
995 }
996 break;
997 case OPCODE_LOG:
998 {
999 GLfloat t[4], q[4], abs_t0;
1000 fetch_vector1(&inst->SrcReg[0], machine, t);
1001 abs_t0 = FABSF(t[0]);
1002 if (abs_t0 != 0.0F) {
1003 /* Since we really can't handle infinite values on VMS
1004 * like other OSes we'll use __MAXFLOAT to represent
1005 * infinity. This may need some tweaking.
1006 */
1007 #ifdef VMS
1008 if (abs_t0 == __MAXFLOAT)
1009 #else
1010 if (IS_INF_OR_NAN(abs_t0))
1011 #endif
1012 {
1013 SET_POS_INFINITY(q[0]);
1014 q[1] = 1.0F;
1015 SET_POS_INFINITY(q[2]);
1016 }
1017 else {
1018 int exponent;
1019 GLfloat mantissa = FREXPF(t[0], &exponent);
1020 q[0] = (GLfloat) (exponent - 1);
1021 q[1] = (GLfloat) (2.0 * mantissa); /* map [.5, 1) -> [1, 2) */
1022 q[2] = (GLfloat) (q[0] + LOG2(q[1]));
1023 }
1024 }
1025 else {
1026 SET_NEG_INFINITY(q[0]);
1027 q[1] = 1.0F;
1028 SET_NEG_INFINITY(q[2]);
1029 }
1030 q[3] = 1.0;
1031 store_vector4(inst, machine, q);
1032 }
1033 break;
1034 case OPCODE_LRP:
1035 {
1036 GLfloat a[4], b[4], c[4], result[4];
1037 fetch_vector4(&inst->SrcReg[0], machine, a);
1038 fetch_vector4(&inst->SrcReg[1], machine, b);
1039 fetch_vector4(&inst->SrcReg[2], machine, c);
1040 result[0] = a[0] * b[0] + (1.0F - a[0]) * c[0];
1041 result[1] = a[1] * b[1] + (1.0F - a[1]) * c[1];
1042 result[2] = a[2] * b[2] + (1.0F - a[2]) * c[2];
1043 result[3] = a[3] * b[3] + (1.0F - a[3]) * c[3];
1044 store_vector4(inst, machine, result);
1045 if (DEBUG_PROG) {
1046 printf("LRP (%g %g %g %g) = (%g %g %g %g), "
1047 "(%g %g %g %g), (%g %g %g %g)\n",
1048 result[0], result[1], result[2], result[3],
1049 a[0], a[1], a[2], a[3],
1050 b[0], b[1], b[2], b[3], c[0], c[1], c[2], c[3]);
1051 }
1052 }
1053 break;
1054 case OPCODE_MAD:
1055 {
1056 GLfloat a[4], b[4], c[4], result[4];
1057 fetch_vector4(&inst->SrcReg[0], machine, a);
1058 fetch_vector4(&inst->SrcReg[1], machine, b);
1059 fetch_vector4(&inst->SrcReg[2], machine, c);
1060 result[0] = a[0] * b[0] + c[0];
1061 result[1] = a[1] * b[1] + c[1];
1062 result[2] = a[2] * b[2] + c[2];
1063 result[3] = a[3] * b[3] + c[3];
1064 store_vector4(inst, machine, result);
1065 if (DEBUG_PROG) {
1066 printf("MAD (%g %g %g %g) = (%g %g %g %g) * "
1067 "(%g %g %g %g) + (%g %g %g %g)\n",
1068 result[0], result[1], result[2], result[3],
1069 a[0], a[1], a[2], a[3],
1070 b[0], b[1], b[2], b[3], c[0], c[1], c[2], c[3]);
1071 }
1072 }
1073 break;
1074 case OPCODE_MAX:
1075 {
1076 GLfloat a[4], b[4], result[4];
1077 fetch_vector4(&inst->SrcReg[0], machine, a);
1078 fetch_vector4(&inst->SrcReg[1], machine, b);
1079 result[0] = MAX2(a[0], b[0]);
1080 result[1] = MAX2(a[1], b[1]);
1081 result[2] = MAX2(a[2], b[2]);
1082 result[3] = MAX2(a[3], b[3]);
1083 store_vector4(inst, machine, result);
1084 if (DEBUG_PROG) {
1085 printf("MAX (%g %g %g %g) = (%g %g %g %g), (%g %g %g %g)\n",
1086 result[0], result[1], result[2], result[3],
1087 a[0], a[1], a[2], a[3], b[0], b[1], b[2], b[3]);
1088 }
1089 }
1090 break;
1091 case OPCODE_MIN:
1092 {
1093 GLfloat a[4], b[4], result[4];
1094 fetch_vector4(&inst->SrcReg[0], machine, a);
1095 fetch_vector4(&inst->SrcReg[1], machine, b);
1096 result[0] = MIN2(a[0], b[0]);
1097 result[1] = MIN2(a[1], b[1]);
1098 result[2] = MIN2(a[2], b[2]);
1099 result[3] = MIN2(a[3], b[3]);
1100 store_vector4(inst, machine, result);
1101 }
1102 break;
1103 case OPCODE_MOV:
1104 {
1105 GLfloat result[4];
1106 fetch_vector4(&inst->SrcReg[0], machine, result);
1107 store_vector4(inst, machine, result);
1108 if (DEBUG_PROG) {
1109 printf("MOV (%g %g %g %g)\n",
1110 result[0], result[1], result[2], result[3]);
1111 }
1112 }
1113 break;
1114 case OPCODE_MUL:
1115 {
1116 GLfloat a[4], b[4], result[4];
1117 fetch_vector4(&inst->SrcReg[0], machine, a);
1118 fetch_vector4(&inst->SrcReg[1], machine, b);
1119 result[0] = a[0] * b[0];
1120 result[1] = a[1] * b[1];
1121 result[2] = a[2] * b[2];
1122 result[3] = a[3] * b[3];
1123 store_vector4(inst, machine, result);
1124 if (DEBUG_PROG) {
1125 printf("MUL (%g %g %g %g) = (%g %g %g %g) * (%g %g %g %g)\n",
1126 result[0], result[1], result[2], result[3],
1127 a[0], a[1], a[2], a[3], b[0], b[1], b[2], b[3]);
1128 }
1129 }
1130 break;
1131 case OPCODE_NOISE1:
1132 {
1133 GLfloat a[4], result[4];
1134 fetch_vector1(&inst->SrcReg[0], machine, a);
1135 result[0] =
1136 result[1] =
1137 result[2] = result[3] = _slang_library_noise1(a[0]);
1138 store_vector4(inst, machine, result);
1139 }
1140 break;
1141 case OPCODE_NOISE2:
1142 {
1143 GLfloat a[4], result[4];
1144 fetch_vector4(&inst->SrcReg[0], machine, a);
1145 result[0] =
1146 result[1] =
1147 result[2] = result[3] = _slang_library_noise2(a[0], a[1]);
1148 store_vector4(inst, machine, result);
1149 }
1150 break;
1151 case OPCODE_NOISE3:
1152 {
1153 GLfloat a[4], result[4];
1154 fetch_vector4(&inst->SrcReg[0], machine, a);
1155 result[0] =
1156 result[1] =
1157 result[2] =
1158 result[3] = _slang_library_noise3(a[0], a[1], a[2]);
1159 store_vector4(inst, machine, result);
1160 }
1161 break;
1162 case OPCODE_NOISE4:
1163 {
1164 GLfloat a[4], result[4];
1165 fetch_vector4(&inst->SrcReg[0], machine, a);
1166 result[0] =
1167 result[1] =
1168 result[2] =
1169 result[3] = _slang_library_noise4(a[0], a[1], a[2], a[3]);
1170 store_vector4(inst, machine, result);
1171 }
1172 break;
1173 case OPCODE_NOP:
1174 break;
1175 case OPCODE_NOT: /* bitwise NOT */
1176 {
1177 GLuint a[4], result[4];
1178 fetch_vector4ui(&inst->SrcReg[0], machine, a);
1179 result[0] = ~a[0];
1180 result[1] = ~a[1];
1181 result[2] = ~a[2];
1182 result[3] = ~a[3];
1183 store_vector4ui(inst, machine, result);
1184 }
1185 break;
1186 case OPCODE_NRM3: /* 3-component normalization */
1187 {
1188 GLfloat a[4], result[4];
1189 GLfloat tmp;
1190 fetch_vector4(&inst->SrcReg[0], machine, a);
1191 tmp = a[0] * a[0] + a[1] * a[1] + a[2] * a[2];
1192 if (tmp != 0.0F)
1193 tmp = INV_SQRTF(tmp);
1194 result[0] = tmp * a[0];
1195 result[1] = tmp * a[1];
1196 result[2] = tmp * a[2];
1197 result[3] = 0.0; /* undefined, but prevent valgrind warnings */
1198 store_vector4(inst, machine, result);
1199 }
1200 break;
1201 case OPCODE_NRM4: /* 4-component normalization */
1202 {
1203 GLfloat a[4], result[4];
1204 GLfloat tmp;
1205 fetch_vector4(&inst->SrcReg[0], machine, a);
1206 tmp = a[0] * a[0] + a[1] * a[1] + a[2] * a[2] + a[3] * a[3];
1207 if (tmp != 0.0F)
1208 tmp = INV_SQRTF(tmp);
1209 result[0] = tmp * a[0];
1210 result[1] = tmp * a[1];
1211 result[2] = tmp * a[2];
1212 result[3] = tmp * a[3];
1213 store_vector4(inst, machine, result);
1214 }
1215 break;
1216 case OPCODE_OR: /* bitwise OR */
1217 {
1218 GLuint a[4], b[4], result[4];
1219 fetch_vector4ui(&inst->SrcReg[0], machine, a);
1220 fetch_vector4ui(&inst->SrcReg[1], machine, b);
1221 result[0] = a[0] | b[0];
1222 result[1] = a[1] | b[1];
1223 result[2] = a[2] | b[2];
1224 result[3] = a[3] | b[3];
1225 store_vector4ui(inst, machine, result);
1226 }
1227 break;
1228 case OPCODE_PK2H: /* pack two 16-bit floats in one 32-bit float */
1229 {
1230 GLfloat a[4];
1231 GLuint result[4];
1232 GLhalfNV hx, hy;
1233 fetch_vector4(&inst->SrcReg[0], machine, a);
1234 hx = _mesa_float_to_half(a[0]);
1235 hy = _mesa_float_to_half(a[1]);
1236 result[0] =
1237 result[1] =
1238 result[2] =
1239 result[3] = hx | (hy << 16);
1240 store_vector4ui(inst, machine, result);
1241 }
1242 break;
1243 case OPCODE_PK2US: /* pack two GLushorts into one 32-bit float */
1244 {
1245 GLfloat a[4];
1246 GLuint result[4], usx, usy;
1247 fetch_vector4(&inst->SrcReg[0], machine, a);
1248 a[0] = CLAMP(a[0], 0.0F, 1.0F);
1249 a[1] = CLAMP(a[1], 0.0F, 1.0F);
1250 usx = IROUND(a[0] * 65535.0F);
1251 usy = IROUND(a[1] * 65535.0F);
1252 result[0] =
1253 result[1] =
1254 result[2] =
1255 result[3] = usx | (usy << 16);
1256 store_vector4ui(inst, machine, result);
1257 }
1258 break;
1259 case OPCODE_PK4B: /* pack four GLbytes into one 32-bit float */
1260 {
1261 GLfloat a[4];
1262 GLuint result[4], ubx, uby, ubz, ubw;
1263 fetch_vector4(&inst->SrcReg[0], machine, a);
1264 a[0] = CLAMP(a[0], -128.0F / 127.0F, 1.0F);
1265 a[1] = CLAMP(a[1], -128.0F / 127.0F, 1.0F);
1266 a[2] = CLAMP(a[2], -128.0F / 127.0F, 1.0F);
1267 a[3] = CLAMP(a[3], -128.0F / 127.0F, 1.0F);
1268 ubx = IROUND(127.0F * a[0] + 128.0F);
1269 uby = IROUND(127.0F * a[1] + 128.0F);
1270 ubz = IROUND(127.0F * a[2] + 128.0F);
1271 ubw = IROUND(127.0F * a[3] + 128.0F);
1272 result[0] =
1273 result[1] =
1274 result[2] =
1275 result[3] = ubx | (uby << 8) | (ubz << 16) | (ubw << 24);
1276 store_vector4ui(inst, machine, result);
1277 }
1278 break;
1279 case OPCODE_PK4UB: /* pack four GLubytes into one 32-bit float */
1280 {
1281 GLfloat a[4];
1282 GLuint result[4], ubx, uby, ubz, ubw;
1283 fetch_vector4(&inst->SrcReg[0], machine, a);
1284 a[0] = CLAMP(a[0], 0.0F, 1.0F);
1285 a[1] = CLAMP(a[1], 0.0F, 1.0F);
1286 a[2] = CLAMP(a[2], 0.0F, 1.0F);
1287 a[3] = CLAMP(a[3], 0.0F, 1.0F);
1288 ubx = IROUND(255.0F * a[0]);
1289 uby = IROUND(255.0F * a[1]);
1290 ubz = IROUND(255.0F * a[2]);
1291 ubw = IROUND(255.0F * a[3]);
1292 result[0] =
1293 result[1] =
1294 result[2] =
1295 result[3] = ubx | (uby << 8) | (ubz << 16) | (ubw << 24);
1296 store_vector4ui(inst, machine, result);
1297 }
1298 break;
1299 case OPCODE_POW:
1300 {
1301 GLfloat a[4], b[4], result[4];
1302 fetch_vector1(&inst->SrcReg[0], machine, a);
1303 fetch_vector1(&inst->SrcReg[1], machine, b);
1304 result[0] = result[1] = result[2] = result[3]
1305 = (GLfloat) _mesa_pow(a[0], b[0]);
1306 store_vector4(inst, machine, result);
1307 }
1308 break;
1309 case OPCODE_RCP:
1310 {
1311 GLfloat a[4], result[4];
1312 fetch_vector1(&inst->SrcReg[0], machine, a);
1313 if (DEBUG_PROG) {
1314 if (a[0] == 0)
1315 printf("RCP(0)\n");
1316 else if (IS_INF_OR_NAN(a[0]))
1317 printf("RCP(inf)\n");
1318 }
1319 result[0] = result[1] = result[2] = result[3] = 1.0F / a[0];
1320 store_vector4(inst, machine, result);
1321 }
1322 break;
1323 case OPCODE_RET: /* return from subroutine (conditional) */
1324 if (eval_condition(machine, inst)) {
1325 if (machine->StackDepth == 0) {
1326 return GL_TRUE; /* Per GL_NV_vertex_program2 spec */
1327 }
1328 /* subtract one because of pc++ in the for loop */
1329 pc = machine->CallStack[--machine->StackDepth] - 1;
1330 }
1331 break;
1332 case OPCODE_RFL: /* reflection vector */
1333 {
1334 GLfloat axis[4], dir[4], result[4], tmpX, tmpW;
1335 fetch_vector4(&inst->SrcReg[0], machine, axis);
1336 fetch_vector4(&inst->SrcReg[1], machine, dir);
1337 tmpW = DOT3(axis, axis);
1338 tmpX = (2.0F * DOT3(axis, dir)) / tmpW;
1339 result[0] = tmpX * axis[0] - dir[0];
1340 result[1] = tmpX * axis[1] - dir[1];
1341 result[2] = tmpX * axis[2] - dir[2];
1342 /* result[3] is never written! XXX enforce in parser! */
1343 store_vector4(inst, machine, result);
1344 }
1345 break;
1346 case OPCODE_RSQ: /* 1 / sqrt() */
1347 {
1348 GLfloat a[4], result[4];
1349 fetch_vector1(&inst->SrcReg[0], machine, a);
1350 a[0] = FABSF(a[0]);
1351 result[0] = result[1] = result[2] = result[3] = INV_SQRTF(a[0]);
1352 store_vector4(inst, machine, result);
1353 if (DEBUG_PROG) {
1354 printf("RSQ %g = 1/sqrt(|%g|)\n", result[0], a[0]);
1355 }
1356 }
1357 break;
1358 case OPCODE_SCS: /* sine and cos */
1359 {
1360 GLfloat a[4], result[4];
1361 fetch_vector1(&inst->SrcReg[0], machine, a);
1362 result[0] = (GLfloat) _mesa_cos(a[0]);
1363 result[1] = (GLfloat) _mesa_sin(a[0]);
1364 result[2] = 0.0; /* undefined! */
1365 result[3] = 0.0; /* undefined! */
1366 store_vector4(inst, machine, result);
1367 }
1368 break;
1369 case OPCODE_SEQ: /* set on equal */
1370 {
1371 GLfloat a[4], b[4], result[4];
1372 fetch_vector4(&inst->SrcReg[0], machine, a);
1373 fetch_vector4(&inst->SrcReg[1], machine, b);
1374 result[0] = (a[0] == b[0]) ? 1.0F : 0.0F;
1375 result[1] = (a[1] == b[1]) ? 1.0F : 0.0F;
1376 result[2] = (a[2] == b[2]) ? 1.0F : 0.0F;
1377 result[3] = (a[3] == b[3]) ? 1.0F : 0.0F;
1378 store_vector4(inst, machine, result);
1379 if (DEBUG_PROG) {
1380 printf("SEQ (%g %g %g %g) = (%g %g %g %g) == (%g %g %g %g)\n",
1381 result[0], result[1], result[2], result[3],
1382 a[0], a[1], a[2], a[3],
1383 b[0], b[1], b[2], b[3]);
1384 }
1385 }
1386 break;
1387 case OPCODE_SFL: /* set false, operands ignored */
1388 {
1389 static const GLfloat result[4] = { 0.0F, 0.0F, 0.0F, 0.0F };
1390 store_vector4(inst, machine, result);
1391 }
1392 break;
1393 case OPCODE_SGE: /* set on greater or equal */
1394 {
1395 GLfloat a[4], b[4], result[4];
1396 fetch_vector4(&inst->SrcReg[0], machine, a);
1397 fetch_vector4(&inst->SrcReg[1], machine, b);
1398 result[0] = (a[0] >= b[0]) ? 1.0F : 0.0F;
1399 result[1] = (a[1] >= b[1]) ? 1.0F : 0.0F;
1400 result[2] = (a[2] >= b[2]) ? 1.0F : 0.0F;
1401 result[3] = (a[3] >= b[3]) ? 1.0F : 0.0F;
1402 store_vector4(inst, machine, result);
1403 if (DEBUG_PROG) {
1404 printf("SGE (%g %g %g %g) = (%g %g %g %g) >= (%g %g %g %g)\n",
1405 result[0], result[1], result[2], result[3],
1406 a[0], a[1], a[2], a[3],
1407 b[0], b[1], b[2], b[3]);
1408 }
1409 }
1410 break;
1411 case OPCODE_SGT: /* set on greater */
1412 {
1413 GLfloat a[4], b[4], result[4];
1414 fetch_vector4(&inst->SrcReg[0], machine, a);
1415 fetch_vector4(&inst->SrcReg[1], machine, b);
1416 result[0] = (a[0] > b[0]) ? 1.0F : 0.0F;
1417 result[1] = (a[1] > b[1]) ? 1.0F : 0.0F;
1418 result[2] = (a[2] > b[2]) ? 1.0F : 0.0F;
1419 result[3] = (a[3] > b[3]) ? 1.0F : 0.0F;
1420 store_vector4(inst, machine, result);
1421 if (DEBUG_PROG) {
1422 printf("SGT (%g %g %g %g) = (%g %g %g %g) > (%g %g %g %g)\n",
1423 result[0], result[1], result[2], result[3],
1424 a[0], a[1], a[2], a[3],
1425 b[0], b[1], b[2], b[3]);
1426 }
1427 }
1428 break;
1429 case OPCODE_SIN:
1430 {
1431 GLfloat a[4], result[4];
1432 fetch_vector1(&inst->SrcReg[0], machine, a);
1433 result[0] = result[1] = result[2] = result[3]
1434 = (GLfloat) _mesa_sin(a[0]);
1435 store_vector4(inst, machine, result);
1436 }
1437 break;
1438 case OPCODE_SLE: /* set on less or equal */
1439 {
1440 GLfloat a[4], b[4], result[4];
1441 fetch_vector4(&inst->SrcReg[0], machine, a);
1442 fetch_vector4(&inst->SrcReg[1], machine, b);
1443 result[0] = (a[0] <= b[0]) ? 1.0F : 0.0F;
1444 result[1] = (a[1] <= b[1]) ? 1.0F : 0.0F;
1445 result[2] = (a[2] <= b[2]) ? 1.0F : 0.0F;
1446 result[3] = (a[3] <= b[3]) ? 1.0F : 0.0F;
1447 store_vector4(inst, machine, result);
1448 if (DEBUG_PROG) {
1449 printf("SLE (%g %g %g %g) = (%g %g %g %g) <= (%g %g %g %g)\n",
1450 result[0], result[1], result[2], result[3],
1451 a[0], a[1], a[2], a[3],
1452 b[0], b[1], b[2], b[3]);
1453 }
1454 }
1455 break;
1456 case OPCODE_SLT: /* set on less */
1457 {
1458 GLfloat a[4], b[4], result[4];
1459 fetch_vector4(&inst->SrcReg[0], machine, a);
1460 fetch_vector4(&inst->SrcReg[1], machine, b);
1461 result[0] = (a[0] < b[0]) ? 1.0F : 0.0F;
1462 result[1] = (a[1] < b[1]) ? 1.0F : 0.0F;
1463 result[2] = (a[2] < b[2]) ? 1.0F : 0.0F;
1464 result[3] = (a[3] < b[3]) ? 1.0F : 0.0F;
1465 store_vector4(inst, machine, result);
1466 if (DEBUG_PROG) {
1467 printf("SLT (%g %g %g %g) = (%g %g %g %g) < (%g %g %g %g)\n",
1468 result[0], result[1], result[2], result[3],
1469 a[0], a[1], a[2], a[3],
1470 b[0], b[1], b[2], b[3]);
1471 }
1472 }
1473 break;
1474 case OPCODE_SNE: /* set on not equal */
1475 {
1476 GLfloat a[4], b[4], result[4];
1477 fetch_vector4(&inst->SrcReg[0], machine, a);
1478 fetch_vector4(&inst->SrcReg[1], machine, b);
1479 result[0] = (a[0] != b[0]) ? 1.0F : 0.0F;
1480 result[1] = (a[1] != b[1]) ? 1.0F : 0.0F;
1481 result[2] = (a[2] != b[2]) ? 1.0F : 0.0F;
1482 result[3] = (a[3] != b[3]) ? 1.0F : 0.0F;
1483 store_vector4(inst, machine, result);
1484 if (DEBUG_PROG) {
1485 printf("SNE (%g %g %g %g) = (%g %g %g %g) != (%g %g %g %g)\n",
1486 result[0], result[1], result[2], result[3],
1487 a[0], a[1], a[2], a[3],
1488 b[0], b[1], b[2], b[3]);
1489 }
1490 }
1491 break;
1492 case OPCODE_SSG: /* set sign (-1, 0 or +1) */
1493 {
1494 GLfloat a[4], result[4];
1495 fetch_vector4(&inst->SrcReg[0], machine, a);
1496 result[0] = (GLfloat) ((a[0] > 0.0F) - (a[0] < 0.0F));
1497 result[1] = (GLfloat) ((a[1] > 0.0F) - (a[1] < 0.0F));
1498 result[2] = (GLfloat) ((a[2] > 0.0F) - (a[2] < 0.0F));
1499 result[3] = (GLfloat) ((a[3] > 0.0F) - (a[3] < 0.0F));
1500 store_vector4(inst, machine, result);
1501 }
1502 break;
1503 case OPCODE_STR: /* set true, operands ignored */
1504 {
1505 static const GLfloat result[4] = { 1.0F, 1.0F, 1.0F, 1.0F };
1506 store_vector4(inst, machine, result);
1507 }
1508 break;
1509 case OPCODE_SUB:
1510 {
1511 GLfloat a[4], b[4], result[4];
1512 fetch_vector4(&inst->SrcReg[0], machine, a);
1513 fetch_vector4(&inst->SrcReg[1], machine, b);
1514 result[0] = a[0] - b[0];
1515 result[1] = a[1] - b[1];
1516 result[2] = a[2] - b[2];
1517 result[3] = a[3] - b[3];
1518 store_vector4(inst, machine, result);
1519 if (DEBUG_PROG) {
1520 printf("SUB (%g %g %g %g) = (%g %g %g %g) - (%g %g %g %g)\n",
1521 result[0], result[1], result[2], result[3],
1522 a[0], a[1], a[2], a[3], b[0], b[1], b[2], b[3]);
1523 }
1524 }
1525 break;
1526 case OPCODE_SWZ: /* extended swizzle */
1527 {
1528 const struct prog_src_register *source = &inst->SrcReg[0];
1529 const GLfloat *src = get_register_pointer(source, machine);
1530 GLfloat result[4];
1531 GLuint i;
1532 for (i = 0; i < 4; i++) {
1533 const GLuint swz = GET_SWZ(source->Swizzle, i);
1534 if (swz == SWIZZLE_ZERO)
1535 result[i] = 0.0;
1536 else if (swz == SWIZZLE_ONE)
1537 result[i] = 1.0;
1538 else {
1539 ASSERT(swz >= 0);
1540 ASSERT(swz <= 3);
1541 result[i] = src[swz];
1542 }
1543 if (source->NegateBase & (1 << i))
1544 result[i] = -result[i];
1545 }
1546 store_vector4(inst, machine, result);
1547 }
1548 break;
1549 case OPCODE_TEX: /* Both ARB and NV frag prog */
1550 /* Simple texel lookup */
1551 {
1552 GLfloat texcoord[4], color[4];
1553 fetch_vector4(&inst->SrcReg[0], machine, texcoord);
1554
1555 fetch_texel(ctx, machine, inst, texcoord, 0.0, color);
1556
1557 if (DEBUG_PROG) {
1558 printf("TEX (%g, %g, %g, %g) = texture[%d][%g, %g, %g, %g]\n",
1559 color[0], color[1], color[2], color[3],
1560 inst->TexSrcUnit,
1561 texcoord[0], texcoord[1], texcoord[2], texcoord[3]);
1562 }
1563 store_vector4(inst, machine, color);
1564 }
1565 break;
1566 case OPCODE_TXB: /* GL_ARB_fragment_program only */
1567 /* Texel lookup with LOD bias */
1568 {
1569 const struct gl_texture_unit *texUnit
1570 = &ctx->Texture.Unit[inst->TexSrcUnit];
1571 GLfloat texcoord[4], color[4], lodBias;
1572
1573 fetch_vector4(&inst->SrcReg[0], machine, texcoord);
1574
1575 /* texcoord[3] is the bias to add to lambda */
1576 lodBias = texUnit->LodBias + texcoord[3];
1577 if (texUnit->_Current) {
1578 lodBias += texUnit->_Current->LodBias;
1579 }
1580
1581 fetch_texel(ctx, machine, inst, texcoord, lodBias, color);
1582
1583 store_vector4(inst, machine, color);
1584 }
1585 break;
1586 case OPCODE_TXD: /* GL_NV_fragment_program only */
1587 /* Texture lookup w/ partial derivatives for LOD */
1588 {
1589 GLfloat texcoord[4], dtdx[4], dtdy[4], color[4];
1590 fetch_vector4(&inst->SrcReg[0], machine, texcoord);
1591 fetch_vector4(&inst->SrcReg[1], machine, dtdx);
1592 fetch_vector4(&inst->SrcReg[2], machine, dtdy);
1593 machine->FetchTexelDeriv(ctx, texcoord, dtdx, dtdy,
1594 0.0, /* lodBias */
1595 inst->TexSrcUnit, color);
1596 store_vector4(inst, machine, color);
1597 }
1598 break;
1599 case OPCODE_TXP: /* GL_ARB_fragment_program only */
1600 /* Texture lookup w/ projective divide */
1601 {
1602 GLfloat texcoord[4], color[4];
1603
1604 fetch_vector4(&inst->SrcReg[0], machine, texcoord);
1605 /* Not so sure about this test - if texcoord[3] is
1606 * zero, we'd probably be fine except for an ASSERT in
1607 * IROUND_POS() which gets triggered by the inf values created.
1608 */
1609 if (texcoord[3] != 0.0) {
1610 texcoord[0] /= texcoord[3];
1611 texcoord[1] /= texcoord[3];
1612 texcoord[2] /= texcoord[3];
1613 }
1614
1615 fetch_texel(ctx, machine, inst, texcoord, 0.0, color);
1616
1617 store_vector4(inst, machine, color);
1618 }
1619 break;
1620 case OPCODE_TXP_NV: /* GL_NV_fragment_program only */
1621 /* Texture lookup w/ projective divide, as above, but do not
1622 * do the divide by w if sampling from a cube map.
1623 */
1624 {
1625 GLfloat texcoord[4], color[4];
1626
1627 fetch_vector4(&inst->SrcReg[0], machine, texcoord);
1628 if (inst->TexSrcTarget != TEXTURE_CUBE_INDEX &&
1629 texcoord[3] != 0.0) {
1630 texcoord[0] /= texcoord[3];
1631 texcoord[1] /= texcoord[3];
1632 texcoord[2] /= texcoord[3];
1633 }
1634
1635 fetch_texel(ctx, machine, inst, texcoord, 0.0, color);
1636
1637 store_vector4(inst, machine, color);
1638 }
1639 break;
1640 case OPCODE_TRUNC: /* truncate toward zero */
1641 {
1642 GLfloat a[4], result[4];
1643 fetch_vector4(&inst->SrcReg[0], machine, a);
1644 result[0] = (GLfloat) (GLint) a[0];
1645 result[1] = (GLfloat) (GLint) a[1];
1646 result[2] = (GLfloat) (GLint) a[2];
1647 result[3] = (GLfloat) (GLint) a[3];
1648 store_vector4(inst, machine, result);
1649 }
1650 break;
1651 case OPCODE_UP2H: /* unpack two 16-bit floats */
1652 {
1653 GLfloat a[4], result[4];
1654 const GLuint *rawBits = (const GLuint *) a;
1655 GLhalfNV hx, hy;
1656 fetch_vector1(&inst->SrcReg[0], machine, a);
1657 hx = rawBits[0] & 0xffff;
1658 hy = rawBits[0] >> 16;
1659 result[0] = result[2] = _mesa_half_to_float(hx);
1660 result[1] = result[3] = _mesa_half_to_float(hy);
1661 store_vector4(inst, machine, result);
1662 }
1663 break;
1664 case OPCODE_UP2US: /* unpack two GLushorts */
1665 {
1666 GLfloat a[4], result[4];
1667 const GLuint *rawBits = (const GLuint *) a;
1668 GLushort usx, usy;
1669 fetch_vector1(&inst->SrcReg[0], machine, a);
1670 usx = rawBits[0] & 0xffff;
1671 usy = rawBits[0] >> 16;
1672 result[0] = result[2] = usx * (1.0f / 65535.0f);
1673 result[1] = result[3] = usy * (1.0f / 65535.0f);
1674 store_vector4(inst, machine, result);
1675 }
1676 break;
1677 case OPCODE_UP4B: /* unpack four GLbytes */
1678 {
1679 GLfloat a[4], result[4];
1680 const GLuint *rawBits = (const GLuint *) a;
1681 fetch_vector1(&inst->SrcReg[0], machine, a);
1682 result[0] = (((rawBits[0] >> 0) & 0xff) - 128) / 127.0F;
1683 result[1] = (((rawBits[0] >> 8) & 0xff) - 128) / 127.0F;
1684 result[2] = (((rawBits[0] >> 16) & 0xff) - 128) / 127.0F;
1685 result[3] = (((rawBits[0] >> 24) & 0xff) - 128) / 127.0F;
1686 store_vector4(inst, machine, result);
1687 }
1688 break;
1689 case OPCODE_UP4UB: /* unpack four GLubytes */
1690 {
1691 GLfloat a[4], result[4];
1692 const GLuint *rawBits = (const GLuint *) a;
1693 fetch_vector1(&inst->SrcReg[0], machine, a);
1694 result[0] = ((rawBits[0] >> 0) & 0xff) / 255.0F;
1695 result[1] = ((rawBits[0] >> 8) & 0xff) / 255.0F;
1696 result[2] = ((rawBits[0] >> 16) & 0xff) / 255.0F;
1697 result[3] = ((rawBits[0] >> 24) & 0xff) / 255.0F;
1698 store_vector4(inst, machine, result);
1699 }
1700 break;
1701 case OPCODE_XOR: /* bitwise XOR */
1702 {
1703 GLuint a[4], b[4], result[4];
1704 fetch_vector4ui(&inst->SrcReg[0], machine, a);
1705 fetch_vector4ui(&inst->SrcReg[1], machine, b);
1706 result[0] = a[0] ^ b[0];
1707 result[1] = a[1] ^ b[1];
1708 result[2] = a[2] ^ b[2];
1709 result[3] = a[3] ^ b[3];
1710 store_vector4ui(inst, machine, result);
1711 }
1712 break;
1713 case OPCODE_XPD: /* cross product */
1714 {
1715 GLfloat a[4], b[4], result[4];
1716 fetch_vector4(&inst->SrcReg[0], machine, a);
1717 fetch_vector4(&inst->SrcReg[1], machine, b);
1718 result[0] = a[1] * b[2] - a[2] * b[1];
1719 result[1] = a[2] * b[0] - a[0] * b[2];
1720 result[2] = a[0] * b[1] - a[1] * b[0];
1721 result[3] = 1.0;
1722 store_vector4(inst, machine, result);
1723 if (DEBUG_PROG) {
1724 printf("XPD (%g %g %g %g) = (%g %g %g) X (%g %g %g)\n",
1725 result[0], result[1], result[2], result[3],
1726 a[0], a[1], a[2], b[0], b[1], b[2]);
1727 }
1728 }
1729 break;
1730 case OPCODE_X2D: /* 2-D matrix transform */
1731 {
1732 GLfloat a[4], b[4], c[4], result[4];
1733 fetch_vector4(&inst->SrcReg[0], machine, a);
1734 fetch_vector4(&inst->SrcReg[1], machine, b);
1735 fetch_vector4(&inst->SrcReg[2], machine, c);
1736 result[0] = a[0] + b[0] * c[0] + b[1] * c[1];
1737 result[1] = a[1] + b[0] * c[2] + b[1] * c[3];
1738 result[2] = a[2] + b[0] * c[0] + b[1] * c[1];
1739 result[3] = a[3] + b[0] * c[2] + b[1] * c[3];
1740 store_vector4(inst, machine, result);
1741 }
1742 break;
1743 case OPCODE_PRINT:
1744 {
1745 if (inst->SrcReg[0].File != -1) {
1746 GLfloat a[4];
1747 fetch_vector4(&inst->SrcReg[0], machine, a);
1748 _mesa_printf("%s%g, %g, %g, %g\n", (const char *) inst->Data,
1749 a[0], a[1], a[2], a[3]);
1750 }
1751 else {
1752 _mesa_printf("%s\n", (const char *) inst->Data);
1753 }
1754 }
1755 break;
1756 case OPCODE_END:
1757 return GL_TRUE;
1758 default:
1759 _mesa_problem(ctx, "Bad opcode %d in _mesa_execute_program",
1760 inst->Opcode);
1761 return GL_TRUE; /* return value doesn't matter */
1762 }
1763
1764 numExec++;
1765 if (numExec > maxExec) {
1766 _mesa_problem(ctx, "Infinite loop detected in fragment program");
1767 return GL_TRUE;
1768 }
1769
1770 } /* for pc */
1771
1772 #if FEATURE_MESA_program_debug
1773 CurrentMachine = NULL;
1774 #endif
1775
1776 return GL_TRUE;
1777 }