st/mesa/r200/i915/i965: move ARB program fields into a union
[mesa.git] / src / mesa / program / prog_execute.c
1 /*
2 * Mesa 3-D graphics library
3 *
4 * Copyright (C) 1999-2008 Brian Paul All Rights Reserved.
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a
7 * copy of this software and associated documentation files (the "Software"),
8 * to deal in the Software without restriction, including without limitation
9 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
10 * and/or sell copies of the Software, and to permit persons to whom the
11 * Software is furnished to do so, subject to the following conditions:
12 *
13 * The above copyright notice and this permission notice shall be included
14 * in all copies or substantial portions of the Software.
15 *
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
17 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
19 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
20 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
21 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
22 * OTHER DEALINGS IN THE SOFTWARE.
23 */
24
25 /**
26 * \file prog_execute.c
27 * Software interpreter for vertex/fragment programs.
28 * \author Brian Paul
29 */
30
31 /*
32 * NOTE: we do everything in single-precision floating point; we don't
33 * currently observe the single/half/fixed-precision qualifiers.
34 *
35 */
36
37
38 #include "c99_math.h"
39 #include "main/glheader.h"
40 #include "main/macros.h"
41 #include "prog_execute.h"
42 #include "prog_instruction.h"
43 #include "prog_parameter.h"
44 #include "prog_print.h"
45 #include "prog_noise.h"
46
47
48 /* debug predicate */
49 #define DEBUG_PROG 0
50
51
52 /**
53 * Set x to positive or negative infinity.
54 */
55 #define SET_POS_INFINITY(x) \
56 do { \
57 fi_type fi; \
58 fi.i = 0x7F800000; \
59 x = fi.f; \
60 } while (0)
61 #define SET_NEG_INFINITY(x) \
62 do { \
63 fi_type fi; \
64 fi.i = 0xFF800000; \
65 x = fi.f; \
66 } while (0)
67
68 #define SET_FLOAT_BITS(x, bits) ((fi_type *) (void *) &(x))->i = bits
69
70
71 static const GLfloat ZeroVec[4] = { 0.0F, 0.0F, 0.0F, 0.0F };
72
73
74 /**
75 * Return a pointer to the 4-element float vector specified by the given
76 * source register.
77 */
78 static inline const GLfloat *
79 get_src_register_pointer(const struct prog_src_register *source,
80 const struct gl_program_machine *machine)
81 {
82 const struct gl_program *prog = machine->CurProgram;
83 GLint reg = source->Index;
84
85 if (source->RelAddr) {
86 /* add address register value to src index/offset */
87 reg += machine->AddressReg[0][0];
88 if (reg < 0) {
89 return ZeroVec;
90 }
91 }
92
93 switch (source->File) {
94 case PROGRAM_TEMPORARY:
95 if (reg >= MAX_PROGRAM_TEMPS)
96 return ZeroVec;
97 return machine->Temporaries[reg];
98
99 case PROGRAM_INPUT:
100 if (prog->Target == GL_VERTEX_PROGRAM_ARB) {
101 if (reg >= VERT_ATTRIB_MAX)
102 return ZeroVec;
103 return machine->VertAttribs[reg];
104 }
105 else {
106 if (reg >= VARYING_SLOT_MAX)
107 return ZeroVec;
108 return machine->Attribs[reg][machine->CurElement];
109 }
110
111 case PROGRAM_OUTPUT:
112 if (reg >= MAX_PROGRAM_OUTPUTS)
113 return ZeroVec;
114 return machine->Outputs[reg];
115
116 case PROGRAM_STATE_VAR:
117 /* Fallthrough */
118 case PROGRAM_CONSTANT:
119 /* Fallthrough */
120 case PROGRAM_UNIFORM:
121 if (reg >= (GLint) prog->Parameters->NumParameters)
122 return ZeroVec;
123 return (GLfloat *) prog->Parameters->ParameterValues[reg];
124
125 case PROGRAM_SYSTEM_VALUE:
126 assert(reg < (GLint) ARRAY_SIZE(machine->SystemValues));
127 return machine->SystemValues[reg];
128
129 default:
130 _mesa_problem(NULL,
131 "Invalid src register file %d in get_src_register_pointer()",
132 source->File);
133 return ZeroVec;
134 }
135 }
136
137
138 /**
139 * Return a pointer to the 4-element float vector specified by the given
140 * destination register.
141 */
142 static inline GLfloat *
143 get_dst_register_pointer(const struct prog_dst_register *dest,
144 struct gl_program_machine *machine)
145 {
146 static GLfloat dummyReg[4];
147 GLint reg = dest->Index;
148
149 if (dest->RelAddr) {
150 /* add address register value to src index/offset */
151 reg += machine->AddressReg[0][0];
152 if (reg < 0) {
153 return dummyReg;
154 }
155 }
156
157 switch (dest->File) {
158 case PROGRAM_TEMPORARY:
159 if (reg >= MAX_PROGRAM_TEMPS)
160 return dummyReg;
161 return machine->Temporaries[reg];
162
163 case PROGRAM_OUTPUT:
164 if (reg >= MAX_PROGRAM_OUTPUTS)
165 return dummyReg;
166 return machine->Outputs[reg];
167
168 default:
169 _mesa_problem(NULL,
170 "Invalid dest register file %d in get_dst_register_pointer()",
171 dest->File);
172 return dummyReg;
173 }
174 }
175
176
177
178 /**
179 * Fetch a 4-element float vector from the given source register.
180 * Apply swizzling and negating as needed.
181 */
182 static void
183 fetch_vector4(const struct prog_src_register *source,
184 const struct gl_program_machine *machine, GLfloat result[4])
185 {
186 const GLfloat *src = get_src_register_pointer(source, machine);
187
188 if (source->Swizzle == SWIZZLE_NOOP) {
189 /* no swizzling */
190 COPY_4V(result, src);
191 }
192 else {
193 assert(GET_SWZ(source->Swizzle, 0) <= 3);
194 assert(GET_SWZ(source->Swizzle, 1) <= 3);
195 assert(GET_SWZ(source->Swizzle, 2) <= 3);
196 assert(GET_SWZ(source->Swizzle, 3) <= 3);
197 result[0] = src[GET_SWZ(source->Swizzle, 0)];
198 result[1] = src[GET_SWZ(source->Swizzle, 1)];
199 result[2] = src[GET_SWZ(source->Swizzle, 2)];
200 result[3] = src[GET_SWZ(source->Swizzle, 3)];
201 }
202
203 if (source->Negate) {
204 assert(source->Negate == NEGATE_XYZW);
205 result[0] = -result[0];
206 result[1] = -result[1];
207 result[2] = -result[2];
208 result[3] = -result[3];
209 }
210
211 #ifdef NAN_CHECK
212 assert(!IS_INF_OR_NAN(result[0]));
213 assert(!IS_INF_OR_NAN(result[0]));
214 assert(!IS_INF_OR_NAN(result[0]));
215 assert(!IS_INF_OR_NAN(result[0]));
216 #endif
217 }
218
219
220 /**
221 * Fetch the derivative with respect to X or Y for the given register.
222 * XXX this currently only works for fragment program input attribs.
223 */
224 static void
225 fetch_vector4_deriv(struct gl_context * ctx,
226 const struct prog_src_register *source,
227 const struct gl_program_machine *machine,
228 char xOrY, GLfloat result[4])
229 {
230 if (source->File == PROGRAM_INPUT &&
231 source->Index < (GLint) machine->NumDeriv) {
232 const GLint col = machine->CurElement;
233 const GLfloat w = machine->Attribs[VARYING_SLOT_POS][col][3];
234 const GLfloat invQ = 1.0f / w;
235 GLfloat deriv[4];
236
237 if (xOrY == 'X') {
238 deriv[0] = machine->DerivX[source->Index][0] * invQ;
239 deriv[1] = machine->DerivX[source->Index][1] * invQ;
240 deriv[2] = machine->DerivX[source->Index][2] * invQ;
241 deriv[3] = machine->DerivX[source->Index][3] * invQ;
242 }
243 else {
244 deriv[0] = machine->DerivY[source->Index][0] * invQ;
245 deriv[1] = machine->DerivY[source->Index][1] * invQ;
246 deriv[2] = machine->DerivY[source->Index][2] * invQ;
247 deriv[3] = machine->DerivY[source->Index][3] * invQ;
248 }
249
250 result[0] = deriv[GET_SWZ(source->Swizzle, 0)];
251 result[1] = deriv[GET_SWZ(source->Swizzle, 1)];
252 result[2] = deriv[GET_SWZ(source->Swizzle, 2)];
253 result[3] = deriv[GET_SWZ(source->Swizzle, 3)];
254
255 if (source->Negate) {
256 assert(source->Negate == NEGATE_XYZW);
257 result[0] = -result[0];
258 result[1] = -result[1];
259 result[2] = -result[2];
260 result[3] = -result[3];
261 }
262 }
263 else {
264 ASSIGN_4V(result, 0.0, 0.0, 0.0, 0.0);
265 }
266 }
267
268
269 /**
270 * As above, but only return result[0] element.
271 */
272 static void
273 fetch_vector1(const struct prog_src_register *source,
274 const struct gl_program_machine *machine, GLfloat result[4])
275 {
276 const GLfloat *src = get_src_register_pointer(source, machine);
277
278 result[0] = src[GET_SWZ(source->Swizzle, 0)];
279
280 if (source->Negate) {
281 result[0] = -result[0];
282 }
283 }
284
285
286 /**
287 * Fetch texel from texture. Use partial derivatives when possible.
288 */
289 static inline void
290 fetch_texel(struct gl_context *ctx,
291 const struct gl_program_machine *machine,
292 const struct prog_instruction *inst,
293 const GLfloat texcoord[4], GLfloat lodBias,
294 GLfloat color[4])
295 {
296 const GLuint unit = machine->Samplers[inst->TexSrcUnit];
297
298 /* Note: we only have the right derivatives for fragment input attribs.
299 */
300 if (machine->NumDeriv > 0 &&
301 inst->SrcReg[0].File == PROGRAM_INPUT &&
302 inst->SrcReg[0].Index == VARYING_SLOT_TEX0 + inst->TexSrcUnit) {
303 /* simple texture fetch for which we should have derivatives */
304 GLuint attr = inst->SrcReg[0].Index;
305 machine->FetchTexelDeriv(ctx, texcoord,
306 machine->DerivX[attr],
307 machine->DerivY[attr],
308 lodBias, unit, color);
309 }
310 else {
311 machine->FetchTexelLod(ctx, texcoord, lodBias, unit, color);
312 }
313 }
314
315
316 /**
317 * Store 4 floats into a register. Observe the instructions saturate and
318 * set-condition-code flags.
319 */
320 static void
321 store_vector4(const struct prog_instruction *inst,
322 struct gl_program_machine *machine, const GLfloat value[4])
323 {
324 const struct prog_dst_register *dstReg = &(inst->DstReg);
325 const GLboolean clamp = inst->Saturate;
326 GLuint writeMask = dstReg->WriteMask;
327 GLfloat clampedValue[4];
328 GLfloat *dst = get_dst_register_pointer(dstReg, machine);
329
330 #if 0
331 if (value[0] > 1.0e10 ||
332 IS_INF_OR_NAN(value[0]) ||
333 IS_INF_OR_NAN(value[1]) ||
334 IS_INF_OR_NAN(value[2]) || IS_INF_OR_NAN(value[3]))
335 printf("store %g %g %g %g\n", value[0], value[1], value[2], value[3]);
336 #endif
337
338 if (clamp) {
339 clampedValue[0] = CLAMP(value[0], 0.0F, 1.0F);
340 clampedValue[1] = CLAMP(value[1], 0.0F, 1.0F);
341 clampedValue[2] = CLAMP(value[2], 0.0F, 1.0F);
342 clampedValue[3] = CLAMP(value[3], 0.0F, 1.0F);
343 value = clampedValue;
344 }
345
346 #ifdef NAN_CHECK
347 assert(!IS_INF_OR_NAN(value[0]));
348 assert(!IS_INF_OR_NAN(value[0]));
349 assert(!IS_INF_OR_NAN(value[0]));
350 assert(!IS_INF_OR_NAN(value[0]));
351 #endif
352
353 if (writeMask & WRITEMASK_X)
354 dst[0] = value[0];
355 if (writeMask & WRITEMASK_Y)
356 dst[1] = value[1];
357 if (writeMask & WRITEMASK_Z)
358 dst[2] = value[2];
359 if (writeMask & WRITEMASK_W)
360 dst[3] = value[3];
361 }
362
363
364 /**
365 * Execute the given vertex/fragment program.
366 *
367 * \param ctx rendering context
368 * \param program the program to execute
369 * \param machine machine state (must be initialized)
370 * \return GL_TRUE if program completed or GL_FALSE if program executed KIL.
371 */
372 GLboolean
373 _mesa_execute_program(struct gl_context * ctx,
374 const struct gl_program *program,
375 struct gl_program_machine *machine)
376 {
377 const GLuint numInst = program->arb.NumInstructions;
378 const GLuint maxExec = 65536;
379 GLuint pc, numExec = 0;
380
381 machine->CurProgram = program;
382
383 if (DEBUG_PROG) {
384 printf("execute program %u --------------------\n", program->Id);
385 }
386
387 if (program->Target == GL_VERTEX_PROGRAM_ARB) {
388 machine->EnvParams = ctx->VertexProgram.Parameters;
389 }
390 else {
391 machine->EnvParams = ctx->FragmentProgram.Parameters;
392 }
393
394 for (pc = 0; pc < numInst; pc++) {
395 const struct prog_instruction *inst = program->arb.Instructions + pc;
396
397 if (DEBUG_PROG) {
398 _mesa_print_instruction(inst);
399 }
400
401 switch (inst->Opcode) {
402 case OPCODE_ABS:
403 {
404 GLfloat a[4], result[4];
405 fetch_vector4(&inst->SrcReg[0], machine, a);
406 result[0] = fabsf(a[0]);
407 result[1] = fabsf(a[1]);
408 result[2] = fabsf(a[2]);
409 result[3] = fabsf(a[3]);
410 store_vector4(inst, machine, result);
411 }
412 break;
413 case OPCODE_ADD:
414 {
415 GLfloat a[4], b[4], result[4];
416 fetch_vector4(&inst->SrcReg[0], machine, a);
417 fetch_vector4(&inst->SrcReg[1], machine, b);
418 result[0] = a[0] + b[0];
419 result[1] = a[1] + b[1];
420 result[2] = a[2] + b[2];
421 result[3] = a[3] + b[3];
422 store_vector4(inst, machine, result);
423 if (DEBUG_PROG) {
424 printf("ADD (%g %g %g %g) = (%g %g %g %g) + (%g %g %g %g)\n",
425 result[0], result[1], result[2], result[3],
426 a[0], a[1], a[2], a[3], b[0], b[1], b[2], b[3]);
427 }
428 }
429 break;
430 case OPCODE_ARL:
431 {
432 GLfloat t[4];
433 fetch_vector4(&inst->SrcReg[0], machine, t);
434 machine->AddressReg[0][0] = IFLOOR(t[0]);
435 if (DEBUG_PROG) {
436 printf("ARL %d\n", machine->AddressReg[0][0]);
437 }
438 }
439 break;
440 case OPCODE_BGNLOOP:
441 /* no-op */
442 assert(program->arb.Instructions[inst->BranchTarget].Opcode
443 == OPCODE_ENDLOOP);
444 break;
445 case OPCODE_ENDLOOP:
446 /* subtract 1 here since pc is incremented by for(pc) loop */
447 assert(program->arb.Instructions[inst->BranchTarget].Opcode
448 == OPCODE_BGNLOOP);
449 pc = inst->BranchTarget - 1; /* go to matching BNGLOOP */
450 break;
451 case OPCODE_BGNSUB: /* begin subroutine */
452 break;
453 case OPCODE_ENDSUB: /* end subroutine */
454 break;
455 case OPCODE_BRK: /* break out of loop (conditional) */
456 assert(program->arb.Instructions[inst->BranchTarget].Opcode
457 == OPCODE_ENDLOOP);
458 /* break out of loop */
459 /* pc++ at end of for-loop will put us after the ENDLOOP inst */
460 pc = inst->BranchTarget;
461 break;
462 case OPCODE_CONT: /* continue loop (conditional) */
463 assert(program->arb.Instructions[inst->BranchTarget].Opcode
464 == OPCODE_ENDLOOP);
465 /* continue at ENDLOOP */
466 /* Subtract 1 here since we'll do pc++ at end of for-loop */
467 pc = inst->BranchTarget - 1;
468 break;
469 case OPCODE_CAL: /* Call subroutine (conditional) */
470 /* call the subroutine */
471 if (machine->StackDepth >= MAX_PROGRAM_CALL_DEPTH) {
472 return GL_TRUE; /* Per GL_NV_vertex_program2 spec */
473 }
474 machine->CallStack[machine->StackDepth++] = pc + 1; /* next inst */
475 /* Subtract 1 here since we'll do pc++ at end of for-loop */
476 pc = inst->BranchTarget - 1;
477 break;
478 case OPCODE_CMP:
479 {
480 GLfloat a[4], b[4], c[4], result[4];
481 fetch_vector4(&inst->SrcReg[0], machine, a);
482 fetch_vector4(&inst->SrcReg[1], machine, b);
483 fetch_vector4(&inst->SrcReg[2], machine, c);
484 result[0] = a[0] < 0.0F ? b[0] : c[0];
485 result[1] = a[1] < 0.0F ? b[1] : c[1];
486 result[2] = a[2] < 0.0F ? b[2] : c[2];
487 result[3] = a[3] < 0.0F ? b[3] : c[3];
488 store_vector4(inst, machine, result);
489 if (DEBUG_PROG) {
490 printf("CMP (%g %g %g %g) = (%g %g %g %g) < 0 ? (%g %g %g %g) : (%g %g %g %g)\n",
491 result[0], result[1], result[2], result[3],
492 a[0], a[1], a[2], a[3],
493 b[0], b[1], b[2], b[3],
494 c[0], c[1], c[2], c[3]);
495 }
496 }
497 break;
498 case OPCODE_COS:
499 {
500 GLfloat a[4], result[4];
501 fetch_vector1(&inst->SrcReg[0], machine, a);
502 result[0] = result[1] = result[2] = result[3]
503 = cosf(a[0]);
504 store_vector4(inst, machine, result);
505 }
506 break;
507 case OPCODE_DDX: /* Partial derivative with respect to X */
508 {
509 GLfloat result[4];
510 fetch_vector4_deriv(ctx, &inst->SrcReg[0], machine,
511 'X', result);
512 store_vector4(inst, machine, result);
513 }
514 break;
515 case OPCODE_DDY: /* Partial derivative with respect to Y */
516 {
517 GLfloat result[4];
518 fetch_vector4_deriv(ctx, &inst->SrcReg[0], machine,
519 'Y', result);
520 store_vector4(inst, machine, result);
521 }
522 break;
523 case OPCODE_DP2:
524 {
525 GLfloat a[4], b[4], result[4];
526 fetch_vector4(&inst->SrcReg[0], machine, a);
527 fetch_vector4(&inst->SrcReg[1], machine, b);
528 result[0] = result[1] = result[2] = result[3] = DOT2(a, b);
529 store_vector4(inst, machine, result);
530 if (DEBUG_PROG) {
531 printf("DP2 %g = (%g %g) . (%g %g)\n",
532 result[0], a[0], a[1], b[0], b[1]);
533 }
534 }
535 break;
536 case OPCODE_DP3:
537 {
538 GLfloat a[4], b[4], result[4];
539 fetch_vector4(&inst->SrcReg[0], machine, a);
540 fetch_vector4(&inst->SrcReg[1], machine, b);
541 result[0] = result[1] = result[2] = result[3] = DOT3(a, b);
542 store_vector4(inst, machine, result);
543 if (DEBUG_PROG) {
544 printf("DP3 %g = (%g %g %g) . (%g %g %g)\n",
545 result[0], a[0], a[1], a[2], b[0], b[1], b[2]);
546 }
547 }
548 break;
549 case OPCODE_DP4:
550 {
551 GLfloat a[4], b[4], result[4];
552 fetch_vector4(&inst->SrcReg[0], machine, a);
553 fetch_vector4(&inst->SrcReg[1], machine, b);
554 result[0] = result[1] = result[2] = result[3] = DOT4(a, b);
555 store_vector4(inst, machine, result);
556 if (DEBUG_PROG) {
557 printf("DP4 %g = (%g, %g %g %g) . (%g, %g %g %g)\n",
558 result[0], a[0], a[1], a[2], a[3],
559 b[0], b[1], b[2], b[3]);
560 }
561 }
562 break;
563 case OPCODE_DPH:
564 {
565 GLfloat a[4], b[4], result[4];
566 fetch_vector4(&inst->SrcReg[0], machine, a);
567 fetch_vector4(&inst->SrcReg[1], machine, b);
568 result[0] = result[1] = result[2] = result[3] = DOT3(a, b) + b[3];
569 store_vector4(inst, machine, result);
570 }
571 break;
572 case OPCODE_DST: /* Distance vector */
573 {
574 GLfloat a[4], b[4], result[4];
575 fetch_vector4(&inst->SrcReg[0], machine, a);
576 fetch_vector4(&inst->SrcReg[1], machine, b);
577 result[0] = 1.0F;
578 result[1] = a[1] * b[1];
579 result[2] = a[2];
580 result[3] = b[3];
581 store_vector4(inst, machine, result);
582 }
583 break;
584 case OPCODE_EXP:
585 {
586 GLfloat t[4], q[4], floor_t0;
587 fetch_vector1(&inst->SrcReg[0], machine, t);
588 floor_t0 = floorf(t[0]);
589 if (floor_t0 > FLT_MAX_EXP) {
590 SET_POS_INFINITY(q[0]);
591 SET_POS_INFINITY(q[2]);
592 }
593 else if (floor_t0 < FLT_MIN_EXP) {
594 q[0] = 0.0F;
595 q[2] = 0.0F;
596 }
597 else {
598 q[0] = ldexpf(1.0, (int) floor_t0);
599 /* Note: GL_NV_vertex_program expects
600 * result.z = result.x * APPX(result.y)
601 * We do what the ARB extension says.
602 */
603 q[2] = exp2f(t[0]);
604 }
605 q[1] = t[0] - floor_t0;
606 q[3] = 1.0F;
607 store_vector4( inst, machine, q );
608 }
609 break;
610 case OPCODE_EX2: /* Exponential base 2 */
611 {
612 GLfloat a[4], result[4], val;
613 fetch_vector1(&inst->SrcReg[0], machine, a);
614 val = exp2f(a[0]);
615 /*
616 if (IS_INF_OR_NAN(val))
617 val = 1.0e10;
618 */
619 result[0] = result[1] = result[2] = result[3] = val;
620 store_vector4(inst, machine, result);
621 }
622 break;
623 case OPCODE_FLR:
624 {
625 GLfloat a[4], result[4];
626 fetch_vector4(&inst->SrcReg[0], machine, a);
627 result[0] = floorf(a[0]);
628 result[1] = floorf(a[1]);
629 result[2] = floorf(a[2]);
630 result[3] = floorf(a[3]);
631 store_vector4(inst, machine, result);
632 }
633 break;
634 case OPCODE_FRC:
635 {
636 GLfloat a[4], result[4];
637 fetch_vector4(&inst->SrcReg[0], machine, a);
638 result[0] = a[0] - floorf(a[0]);
639 result[1] = a[1] - floorf(a[1]);
640 result[2] = a[2] - floorf(a[2]);
641 result[3] = a[3] - floorf(a[3]);
642 store_vector4(inst, machine, result);
643 }
644 break;
645 case OPCODE_IF:
646 {
647 GLboolean cond;
648 assert(program->arb.Instructions[inst->BranchTarget].Opcode
649 == OPCODE_ELSE ||
650 program->arb.Instructions[inst->BranchTarget].Opcode
651 == OPCODE_ENDIF);
652 /* eval condition */
653 GLfloat a[4];
654 fetch_vector1(&inst->SrcReg[0], machine, a);
655 cond = (a[0] != 0.0F);
656 if (DEBUG_PROG) {
657 printf("IF: %d\n", cond);
658 }
659 /* do if/else */
660 if (cond) {
661 /* do if-clause (just continue execution) */
662 }
663 else {
664 /* go to the instruction after ELSE or ENDIF */
665 assert(inst->BranchTarget >= 0);
666 pc = inst->BranchTarget;
667 }
668 }
669 break;
670 case OPCODE_ELSE:
671 /* goto ENDIF */
672 assert(program->arb.Instructions[inst->BranchTarget].Opcode
673 == OPCODE_ENDIF);
674 assert(inst->BranchTarget >= 0);
675 pc = inst->BranchTarget;
676 break;
677 case OPCODE_ENDIF:
678 /* nothing */
679 break;
680 case OPCODE_KIL: /* ARB_f_p only */
681 {
682 GLfloat a[4];
683 fetch_vector4(&inst->SrcReg[0], machine, a);
684 if (DEBUG_PROG) {
685 printf("KIL if (%g %g %g %g) <= 0.0\n",
686 a[0], a[1], a[2], a[3]);
687 }
688
689 if (a[0] < 0.0F || a[1] < 0.0F || a[2] < 0.0F || a[3] < 0.0F) {
690 return GL_FALSE;
691 }
692 }
693 break;
694 case OPCODE_LG2: /* log base 2 */
695 {
696 GLfloat a[4], result[4], val;
697 fetch_vector1(&inst->SrcReg[0], machine, a);
698 /* The fast LOG2 macro doesn't meet the precision requirements.
699 */
700 if (a[0] == 0.0F) {
701 val = -FLT_MAX;
702 }
703 else {
704 val = logf(a[0]) * 1.442695F;
705 }
706 result[0] = result[1] = result[2] = result[3] = val;
707 store_vector4(inst, machine, result);
708 }
709 break;
710 case OPCODE_LIT:
711 {
712 const GLfloat epsilon = 1.0F / 256.0F; /* from NV VP spec */
713 GLfloat a[4], result[4];
714 fetch_vector4(&inst->SrcReg[0], machine, a);
715 a[0] = MAX2(a[0], 0.0F);
716 a[1] = MAX2(a[1], 0.0F);
717 /* XXX ARB version clamps a[3], NV version doesn't */
718 a[3] = CLAMP(a[3], -(128.0F - epsilon), (128.0F - epsilon));
719 result[0] = 1.0F;
720 result[1] = a[0];
721 /* XXX we could probably just use pow() here */
722 if (a[0] > 0.0F) {
723 if (a[1] == 0.0F && a[3] == 0.0F)
724 result[2] = 1.0F;
725 else
726 result[2] = powf(a[1], a[3]);
727 }
728 else {
729 result[2] = 0.0F;
730 }
731 result[3] = 1.0F;
732 store_vector4(inst, machine, result);
733 if (DEBUG_PROG) {
734 printf("LIT (%g %g %g %g) : (%g %g %g %g)\n",
735 result[0], result[1], result[2], result[3],
736 a[0], a[1], a[2], a[3]);
737 }
738 }
739 break;
740 case OPCODE_LOG:
741 {
742 GLfloat t[4], q[4], abs_t0;
743 fetch_vector1(&inst->SrcReg[0], machine, t);
744 abs_t0 = fabsf(t[0]);
745 if (abs_t0 != 0.0F) {
746 if (IS_INF_OR_NAN(abs_t0))
747 {
748 SET_POS_INFINITY(q[0]);
749 q[1] = 1.0F;
750 SET_POS_INFINITY(q[2]);
751 }
752 else {
753 int exponent;
754 GLfloat mantissa = frexpf(t[0], &exponent);
755 q[0] = (GLfloat) (exponent - 1);
756 q[1] = 2.0F * mantissa; /* map [.5, 1) -> [1, 2) */
757
758 /* The fast LOG2 macro doesn't meet the precision
759 * requirements.
760 */
761 q[2] = logf(t[0]) * 1.442695F;
762 }
763 }
764 else {
765 SET_NEG_INFINITY(q[0]);
766 q[1] = 1.0F;
767 SET_NEG_INFINITY(q[2]);
768 }
769 q[3] = 1.0;
770 store_vector4(inst, machine, q);
771 }
772 break;
773 case OPCODE_LRP:
774 {
775 GLfloat a[4], b[4], c[4], result[4];
776 fetch_vector4(&inst->SrcReg[0], machine, a);
777 fetch_vector4(&inst->SrcReg[1], machine, b);
778 fetch_vector4(&inst->SrcReg[2], machine, c);
779 result[0] = a[0] * b[0] + (1.0F - a[0]) * c[0];
780 result[1] = a[1] * b[1] + (1.0F - a[1]) * c[1];
781 result[2] = a[2] * b[2] + (1.0F - a[2]) * c[2];
782 result[3] = a[3] * b[3] + (1.0F - a[3]) * c[3];
783 store_vector4(inst, machine, result);
784 if (DEBUG_PROG) {
785 printf("LRP (%g %g %g %g) = (%g %g %g %g), "
786 "(%g %g %g %g), (%g %g %g %g)\n",
787 result[0], result[1], result[2], result[3],
788 a[0], a[1], a[2], a[3],
789 b[0], b[1], b[2], b[3], c[0], c[1], c[2], c[3]);
790 }
791 }
792 break;
793 case OPCODE_MAD:
794 {
795 GLfloat a[4], b[4], c[4], result[4];
796 fetch_vector4(&inst->SrcReg[0], machine, a);
797 fetch_vector4(&inst->SrcReg[1], machine, b);
798 fetch_vector4(&inst->SrcReg[2], machine, c);
799 result[0] = a[0] * b[0] + c[0];
800 result[1] = a[1] * b[1] + c[1];
801 result[2] = a[2] * b[2] + c[2];
802 result[3] = a[3] * b[3] + c[3];
803 store_vector4(inst, machine, result);
804 if (DEBUG_PROG) {
805 printf("MAD (%g %g %g %g) = (%g %g %g %g) * "
806 "(%g %g %g %g) + (%g %g %g %g)\n",
807 result[0], result[1], result[2], result[3],
808 a[0], a[1], a[2], a[3],
809 b[0], b[1], b[2], b[3], c[0], c[1], c[2], c[3]);
810 }
811 }
812 break;
813 case OPCODE_MAX:
814 {
815 GLfloat a[4], b[4], result[4];
816 fetch_vector4(&inst->SrcReg[0], machine, a);
817 fetch_vector4(&inst->SrcReg[1], machine, b);
818 result[0] = MAX2(a[0], b[0]);
819 result[1] = MAX2(a[1], b[1]);
820 result[2] = MAX2(a[2], b[2]);
821 result[3] = MAX2(a[3], b[3]);
822 store_vector4(inst, machine, result);
823 if (DEBUG_PROG) {
824 printf("MAX (%g %g %g %g) = (%g %g %g %g), (%g %g %g %g)\n",
825 result[0], result[1], result[2], result[3],
826 a[0], a[1], a[2], a[3], b[0], b[1], b[2], b[3]);
827 }
828 }
829 break;
830 case OPCODE_MIN:
831 {
832 GLfloat a[4], b[4], result[4];
833 fetch_vector4(&inst->SrcReg[0], machine, a);
834 fetch_vector4(&inst->SrcReg[1], machine, b);
835 result[0] = MIN2(a[0], b[0]);
836 result[1] = MIN2(a[1], b[1]);
837 result[2] = MIN2(a[2], b[2]);
838 result[3] = MIN2(a[3], b[3]);
839 store_vector4(inst, machine, result);
840 }
841 break;
842 case OPCODE_MOV:
843 {
844 GLfloat result[4];
845 fetch_vector4(&inst->SrcReg[0], machine, result);
846 store_vector4(inst, machine, result);
847 if (DEBUG_PROG) {
848 printf("MOV (%g %g %g %g)\n",
849 result[0], result[1], result[2], result[3]);
850 }
851 }
852 break;
853 case OPCODE_MUL:
854 {
855 GLfloat a[4], b[4], result[4];
856 fetch_vector4(&inst->SrcReg[0], machine, a);
857 fetch_vector4(&inst->SrcReg[1], machine, b);
858 result[0] = a[0] * b[0];
859 result[1] = a[1] * b[1];
860 result[2] = a[2] * b[2];
861 result[3] = a[3] * b[3];
862 store_vector4(inst, machine, result);
863 if (DEBUG_PROG) {
864 printf("MUL (%g %g %g %g) = (%g %g %g %g) * (%g %g %g %g)\n",
865 result[0], result[1], result[2], result[3],
866 a[0], a[1], a[2], a[3], b[0], b[1], b[2], b[3]);
867 }
868 }
869 break;
870 case OPCODE_NOISE1:
871 {
872 GLfloat a[4], result[4];
873 fetch_vector1(&inst->SrcReg[0], machine, a);
874 result[0] =
875 result[1] =
876 result[2] =
877 result[3] = _mesa_noise1(a[0]);
878 store_vector4(inst, machine, result);
879 }
880 break;
881 case OPCODE_NOISE2:
882 {
883 GLfloat a[4], result[4];
884 fetch_vector4(&inst->SrcReg[0], machine, a);
885 result[0] =
886 result[1] =
887 result[2] = result[3] = _mesa_noise2(a[0], a[1]);
888 store_vector4(inst, machine, result);
889 }
890 break;
891 case OPCODE_NOISE3:
892 {
893 GLfloat a[4], result[4];
894 fetch_vector4(&inst->SrcReg[0], machine, a);
895 result[0] =
896 result[1] =
897 result[2] =
898 result[3] = _mesa_noise3(a[0], a[1], a[2]);
899 store_vector4(inst, machine, result);
900 }
901 break;
902 case OPCODE_NOISE4:
903 {
904 GLfloat a[4], result[4];
905 fetch_vector4(&inst->SrcReg[0], machine, a);
906 result[0] =
907 result[1] =
908 result[2] =
909 result[3] = _mesa_noise4(a[0], a[1], a[2], a[3]);
910 store_vector4(inst, machine, result);
911 }
912 break;
913 case OPCODE_NOP:
914 break;
915 case OPCODE_POW:
916 {
917 GLfloat a[4], b[4], result[4];
918 fetch_vector1(&inst->SrcReg[0], machine, a);
919 fetch_vector1(&inst->SrcReg[1], machine, b);
920 result[0] = result[1] = result[2] = result[3]
921 = powf(a[0], b[0]);
922 store_vector4(inst, machine, result);
923 }
924 break;
925
926 case OPCODE_RCP:
927 {
928 GLfloat a[4], result[4];
929 fetch_vector1(&inst->SrcReg[0], machine, a);
930 if (DEBUG_PROG) {
931 if (a[0] == 0)
932 printf("RCP(0)\n");
933 else if (IS_INF_OR_NAN(a[0]))
934 printf("RCP(inf)\n");
935 }
936 result[0] = result[1] = result[2] = result[3] = 1.0F / a[0];
937 store_vector4(inst, machine, result);
938 }
939 break;
940 case OPCODE_RET: /* return from subroutine (conditional) */
941 if (machine->StackDepth == 0) {
942 return GL_TRUE; /* Per GL_NV_vertex_program2 spec */
943 }
944 /* subtract one because of pc++ in the for loop */
945 pc = machine->CallStack[--machine->StackDepth] - 1;
946 break;
947 case OPCODE_RSQ: /* 1 / sqrt() */
948 {
949 GLfloat a[4], result[4];
950 fetch_vector1(&inst->SrcReg[0], machine, a);
951 a[0] = fabsf(a[0]);
952 result[0] = result[1] = result[2] = result[3] = 1.0f / sqrtf(a[0]);
953 store_vector4(inst, machine, result);
954 if (DEBUG_PROG) {
955 printf("RSQ %g = 1/sqrt(|%g|)\n", result[0], a[0]);
956 }
957 }
958 break;
959 case OPCODE_SCS: /* sine and cos */
960 {
961 GLfloat a[4], result[4];
962 fetch_vector1(&inst->SrcReg[0], machine, a);
963 result[0] = cosf(a[0]);
964 result[1] = sinf(a[0]);
965 result[2] = 0.0F; /* undefined! */
966 result[3] = 0.0F; /* undefined! */
967 store_vector4(inst, machine, result);
968 }
969 break;
970 case OPCODE_SGE: /* set on greater or equal */
971 {
972 GLfloat a[4], b[4], result[4];
973 fetch_vector4(&inst->SrcReg[0], machine, a);
974 fetch_vector4(&inst->SrcReg[1], machine, b);
975 result[0] = (a[0] >= b[0]) ? 1.0F : 0.0F;
976 result[1] = (a[1] >= b[1]) ? 1.0F : 0.0F;
977 result[2] = (a[2] >= b[2]) ? 1.0F : 0.0F;
978 result[3] = (a[3] >= b[3]) ? 1.0F : 0.0F;
979 store_vector4(inst, machine, result);
980 if (DEBUG_PROG) {
981 printf("SGE (%g %g %g %g) = (%g %g %g %g) >= (%g %g %g %g)\n",
982 result[0], result[1], result[2], result[3],
983 a[0], a[1], a[2], a[3],
984 b[0], b[1], b[2], b[3]);
985 }
986 }
987 break;
988 case OPCODE_SIN:
989 {
990 GLfloat a[4], result[4];
991 fetch_vector1(&inst->SrcReg[0], machine, a);
992 result[0] = result[1] = result[2] = result[3]
993 = sinf(a[0]);
994 store_vector4(inst, machine, result);
995 }
996 break;
997 case OPCODE_SLT: /* set on less */
998 {
999 GLfloat a[4], b[4], result[4];
1000 fetch_vector4(&inst->SrcReg[0], machine, a);
1001 fetch_vector4(&inst->SrcReg[1], machine, b);
1002 result[0] = (a[0] < b[0]) ? 1.0F : 0.0F;
1003 result[1] = (a[1] < b[1]) ? 1.0F : 0.0F;
1004 result[2] = (a[2] < b[2]) ? 1.0F : 0.0F;
1005 result[3] = (a[3] < b[3]) ? 1.0F : 0.0F;
1006 store_vector4(inst, machine, result);
1007 if (DEBUG_PROG) {
1008 printf("SLT (%g %g %g %g) = (%g %g %g %g) < (%g %g %g %g)\n",
1009 result[0], result[1], result[2], result[3],
1010 a[0], a[1], a[2], a[3],
1011 b[0], b[1], b[2], b[3]);
1012 }
1013 }
1014 break;
1015 case OPCODE_SSG: /* set sign (-1, 0 or +1) */
1016 {
1017 GLfloat a[4], result[4];
1018 fetch_vector4(&inst->SrcReg[0], machine, a);
1019 result[0] = (GLfloat) ((a[0] > 0.0F) - (a[0] < 0.0F));
1020 result[1] = (GLfloat) ((a[1] > 0.0F) - (a[1] < 0.0F));
1021 result[2] = (GLfloat) ((a[2] > 0.0F) - (a[2] < 0.0F));
1022 result[3] = (GLfloat) ((a[3] > 0.0F) - (a[3] < 0.0F));
1023 store_vector4(inst, machine, result);
1024 }
1025 break;
1026 case OPCODE_SUB:
1027 {
1028 GLfloat a[4], b[4], result[4];
1029 fetch_vector4(&inst->SrcReg[0], machine, a);
1030 fetch_vector4(&inst->SrcReg[1], machine, b);
1031 result[0] = a[0] - b[0];
1032 result[1] = a[1] - b[1];
1033 result[2] = a[2] - b[2];
1034 result[3] = a[3] - b[3];
1035 store_vector4(inst, machine, result);
1036 if (DEBUG_PROG) {
1037 printf("SUB (%g %g %g %g) = (%g %g %g %g) - (%g %g %g %g)\n",
1038 result[0], result[1], result[2], result[3],
1039 a[0], a[1], a[2], a[3], b[0], b[1], b[2], b[3]);
1040 }
1041 }
1042 break;
1043 case OPCODE_SWZ: /* extended swizzle */
1044 {
1045 const struct prog_src_register *source = &inst->SrcReg[0];
1046 const GLfloat *src = get_src_register_pointer(source, machine);
1047 GLfloat result[4];
1048 GLuint i;
1049 for (i = 0; i < 4; i++) {
1050 const GLuint swz = GET_SWZ(source->Swizzle, i);
1051 if (swz == SWIZZLE_ZERO)
1052 result[i] = 0.0;
1053 else if (swz == SWIZZLE_ONE)
1054 result[i] = 1.0;
1055 else {
1056 assert(swz <= 3);
1057 result[i] = src[swz];
1058 }
1059 if (source->Negate & (1 << i))
1060 result[i] = -result[i];
1061 }
1062 store_vector4(inst, machine, result);
1063 }
1064 break;
1065 case OPCODE_TEX: /* Both ARB and NV frag prog */
1066 /* Simple texel lookup */
1067 {
1068 GLfloat texcoord[4], color[4];
1069 fetch_vector4(&inst->SrcReg[0], machine, texcoord);
1070
1071 /* For TEX, texcoord.Q should not be used and its value should not
1072 * matter (at most, we pass coord.xyz to texture3D() in GLSL).
1073 * Set Q=1 so that FetchTexelDeriv() doesn't get a garbage value
1074 * which is effectively what happens when the texcoord swizzle
1075 * is .xyzz
1076 */
1077 texcoord[3] = 1.0f;
1078
1079 fetch_texel(ctx, machine, inst, texcoord, 0.0, color);
1080
1081 if (DEBUG_PROG) {
1082 printf("TEX (%g, %g, %g, %g) = texture[%d][%g, %g, %g, %g]\n",
1083 color[0], color[1], color[2], color[3],
1084 inst->TexSrcUnit,
1085 texcoord[0], texcoord[1], texcoord[2], texcoord[3]);
1086 }
1087 store_vector4(inst, machine, color);
1088 }
1089 break;
1090 case OPCODE_TXB: /* GL_ARB_fragment_program only */
1091 /* Texel lookup with LOD bias */
1092 {
1093 GLfloat texcoord[4], color[4], lodBias;
1094
1095 fetch_vector4(&inst->SrcReg[0], machine, texcoord);
1096
1097 /* texcoord[3] is the bias to add to lambda */
1098 lodBias = texcoord[3];
1099
1100 fetch_texel(ctx, machine, inst, texcoord, lodBias, color);
1101
1102 if (DEBUG_PROG) {
1103 printf("TXB (%g, %g, %g, %g) = texture[%d][%g %g %g %g]"
1104 " bias %g\n",
1105 color[0], color[1], color[2], color[3],
1106 inst->TexSrcUnit,
1107 texcoord[0],
1108 texcoord[1],
1109 texcoord[2],
1110 texcoord[3],
1111 lodBias);
1112 }
1113
1114 store_vector4(inst, machine, color);
1115 }
1116 break;
1117 case OPCODE_TXD:
1118 /* Texture lookup w/ partial derivatives for LOD */
1119 {
1120 GLfloat texcoord[4], dtdx[4], dtdy[4], color[4];
1121 fetch_vector4(&inst->SrcReg[0], machine, texcoord);
1122 fetch_vector4(&inst->SrcReg[1], machine, dtdx);
1123 fetch_vector4(&inst->SrcReg[2], machine, dtdy);
1124 machine->FetchTexelDeriv(ctx, texcoord, dtdx, dtdy,
1125 0.0, /* lodBias */
1126 inst->TexSrcUnit, color);
1127 store_vector4(inst, machine, color);
1128 }
1129 break;
1130 case OPCODE_TXL:
1131 /* Texel lookup with explicit LOD */
1132 {
1133 GLfloat texcoord[4], color[4], lod;
1134
1135 fetch_vector4(&inst->SrcReg[0], machine, texcoord);
1136
1137 /* texcoord[3] is the LOD */
1138 lod = texcoord[3];
1139
1140 machine->FetchTexelLod(ctx, texcoord, lod,
1141 machine->Samplers[inst->TexSrcUnit], color);
1142
1143 store_vector4(inst, machine, color);
1144 }
1145 break;
1146 case OPCODE_TXP: /* GL_ARB_fragment_program only */
1147 /* Texture lookup w/ projective divide */
1148 {
1149 GLfloat texcoord[4], color[4];
1150
1151 fetch_vector4(&inst->SrcReg[0], machine, texcoord);
1152 /* Not so sure about this test - if texcoord[3] is
1153 * zero, we'd probably be fine except for an assert in
1154 * IROUND_POS() which gets triggered by the inf values created.
1155 */
1156 if (texcoord[3] != 0.0F) {
1157 texcoord[0] /= texcoord[3];
1158 texcoord[1] /= texcoord[3];
1159 texcoord[2] /= texcoord[3];
1160 }
1161
1162 fetch_texel(ctx, machine, inst, texcoord, 0.0, color);
1163
1164 store_vector4(inst, machine, color);
1165 }
1166 break;
1167 case OPCODE_TRUNC: /* truncate toward zero */
1168 {
1169 GLfloat a[4], result[4];
1170 fetch_vector4(&inst->SrcReg[0], machine, a);
1171 result[0] = (GLfloat) (GLint) a[0];
1172 result[1] = (GLfloat) (GLint) a[1];
1173 result[2] = (GLfloat) (GLint) a[2];
1174 result[3] = (GLfloat) (GLint) a[3];
1175 store_vector4(inst, machine, result);
1176 }
1177 break;
1178 case OPCODE_XPD: /* cross product */
1179 {
1180 GLfloat a[4], b[4], result[4];
1181 fetch_vector4(&inst->SrcReg[0], machine, a);
1182 fetch_vector4(&inst->SrcReg[1], machine, b);
1183 result[0] = a[1] * b[2] - a[2] * b[1];
1184 result[1] = a[2] * b[0] - a[0] * b[2];
1185 result[2] = a[0] * b[1] - a[1] * b[0];
1186 result[3] = 1.0;
1187 store_vector4(inst, machine, result);
1188 if (DEBUG_PROG) {
1189 printf("XPD (%g %g %g %g) = (%g %g %g) X (%g %g %g)\n",
1190 result[0], result[1], result[2], result[3],
1191 a[0], a[1], a[2], b[0], b[1], b[2]);
1192 }
1193 }
1194 break;
1195 case OPCODE_END:
1196 return GL_TRUE;
1197 default:
1198 _mesa_problem(ctx, "Bad opcode %d in _mesa_execute_program",
1199 inst->Opcode);
1200 return GL_TRUE; /* return value doesn't matter */
1201 }
1202
1203 numExec++;
1204 if (numExec > maxExec) {
1205 static GLboolean reported = GL_FALSE;
1206 if (!reported) {
1207 _mesa_problem(ctx, "Infinite loop detected in fragment program");
1208 reported = GL_TRUE;
1209 }
1210 return GL_TRUE;
1211 }
1212
1213 } /* for pc */
1214
1215 return GL_TRUE;
1216 }