b5a7a692aeb664f832d2c5bd3a3dfae121d43d04
2 * Mesa 3-D graphics library
4 * Copyright (C) 1999-2008 Brian Paul All Rights Reserved.
6 * Permission is hereby granted, free of charge, to any person obtaining a
7 * copy of this software and associated documentation files (the "Software"),
8 * to deal in the Software without restriction, including without limitation
9 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
10 * and/or sell copies of the Software, and to permit persons to whom the
11 * Software is furnished to do so, subject to the following conditions:
13 * The above copyright notice and this permission notice shall be included
14 * in all copies or substantial portions of the Software.
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
17 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
19 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
20 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
21 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
22 * OTHER DEALINGS IN THE SOFTWARE.
26 * \file prog_execute.c
27 * Software interpreter for vertex/fragment programs.
32 * NOTE: we do everything in single-precision floating point; we don't
33 * currently observe the single/half/fixed-precision qualifiers.
39 #include "main/glheader.h"
40 #include "main/macros.h"
41 #include "prog_execute.h"
42 #include "prog_instruction.h"
43 #include "prog_parameter.h"
44 #include "prog_print.h"
45 #include "prog_noise.h"
53 * Set x to positive or negative infinity.
55 #define SET_POS_INFINITY(x) \
61 #define SET_NEG_INFINITY(x) \
68 #define SET_FLOAT_BITS(x, bits) ((fi_type *) (void *) &(x))->i = bits
71 static const GLfloat ZeroVec
[4] = { 0.0F
, 0.0F
, 0.0F
, 0.0F
};
75 * Return a pointer to the 4-element float vector specified by the given
78 static inline const GLfloat
*
79 get_src_register_pointer(const struct prog_src_register
*source
,
80 const struct gl_program_machine
*machine
)
82 const struct gl_program
*prog
= machine
->CurProgram
;
83 GLint reg
= source
->Index
;
85 if (source
->RelAddr
) {
86 /* add address register value to src index/offset */
87 reg
+= machine
->AddressReg
[0][0];
93 switch (source
->File
) {
94 case PROGRAM_TEMPORARY
:
95 if (reg
>= MAX_PROGRAM_TEMPS
)
97 return machine
->Temporaries
[reg
];
100 if (prog
->Target
== GL_VERTEX_PROGRAM_ARB
) {
101 if (reg
>= VERT_ATTRIB_MAX
)
103 return machine
->VertAttribs
[reg
];
106 if (reg
>= VARYING_SLOT_MAX
)
108 return machine
->Attribs
[reg
][machine
->CurElement
];
112 if (reg
>= MAX_PROGRAM_OUTPUTS
)
114 return machine
->Outputs
[reg
];
116 case PROGRAM_STATE_VAR
:
118 case PROGRAM_CONSTANT
:
120 case PROGRAM_UNIFORM
: {
121 if (reg
>= (GLint
) prog
->Parameters
->NumParameters
)
124 unsigned pvo
= prog
->Parameters
->ParameterValueOffset
[reg
];
125 return (GLfloat
*) prog
->Parameters
->ParameterValues
+ pvo
;
127 case PROGRAM_SYSTEM_VALUE
:
128 assert(reg
< (GLint
) ARRAY_SIZE(machine
->SystemValues
));
129 return machine
->SystemValues
[reg
];
133 "Invalid src register file %d in get_src_register_pointer()",
141 * Return a pointer to the 4-element float vector specified by the given
142 * destination register.
144 static inline GLfloat
*
145 get_dst_register_pointer(const struct prog_dst_register
*dest
,
146 struct gl_program_machine
*machine
)
148 static GLfloat dummyReg
[4];
149 GLint reg
= dest
->Index
;
152 /* add address register value to src index/offset */
153 reg
+= machine
->AddressReg
[0][0];
159 switch (dest
->File
) {
160 case PROGRAM_TEMPORARY
:
161 if (reg
>= MAX_PROGRAM_TEMPS
)
163 return machine
->Temporaries
[reg
];
166 if (reg
>= MAX_PROGRAM_OUTPUTS
)
168 return machine
->Outputs
[reg
];
172 "Invalid dest register file %d in get_dst_register_pointer()",
181 * Fetch a 4-element float vector from the given source register.
182 * Apply swizzling and negating as needed.
185 fetch_vector4(const struct prog_src_register
*source
,
186 const struct gl_program_machine
*machine
, GLfloat result
[4])
188 const GLfloat
*src
= get_src_register_pointer(source
, machine
);
190 if (source
->Swizzle
== SWIZZLE_NOOP
) {
192 COPY_4V(result
, src
);
195 assert(GET_SWZ(source
->Swizzle
, 0) <= 3);
196 assert(GET_SWZ(source
->Swizzle
, 1) <= 3);
197 assert(GET_SWZ(source
->Swizzle
, 2) <= 3);
198 assert(GET_SWZ(source
->Swizzle
, 3) <= 3);
199 result
[0] = src
[GET_SWZ(source
->Swizzle
, 0)];
200 result
[1] = src
[GET_SWZ(source
->Swizzle
, 1)];
201 result
[2] = src
[GET_SWZ(source
->Swizzle
, 2)];
202 result
[3] = src
[GET_SWZ(source
->Swizzle
, 3)];
205 if (source
->Negate
) {
206 assert(source
->Negate
== NEGATE_XYZW
);
207 result
[0] = -result
[0];
208 result
[1] = -result
[1];
209 result
[2] = -result
[2];
210 result
[3] = -result
[3];
214 assert(!IS_INF_OR_NAN(result
[0]));
215 assert(!IS_INF_OR_NAN(result
[0]));
216 assert(!IS_INF_OR_NAN(result
[0]));
217 assert(!IS_INF_OR_NAN(result
[0]));
223 * Fetch the derivative with respect to X or Y for the given register.
224 * XXX this currently only works for fragment program input attribs.
227 fetch_vector4_deriv(const struct prog_src_register
*source
,
228 const struct gl_program_machine
*machine
,
229 char xOrY
, GLfloat result
[4])
231 if (source
->File
== PROGRAM_INPUT
&&
232 source
->Index
< (GLint
) machine
->NumDeriv
) {
233 const GLint col
= machine
->CurElement
;
234 const GLfloat w
= machine
->Attribs
[VARYING_SLOT_POS
][col
][3];
235 const GLfloat invQ
= 1.0f
/ w
;
239 deriv
[0] = machine
->DerivX
[source
->Index
][0] * invQ
;
240 deriv
[1] = machine
->DerivX
[source
->Index
][1] * invQ
;
241 deriv
[2] = machine
->DerivX
[source
->Index
][2] * invQ
;
242 deriv
[3] = machine
->DerivX
[source
->Index
][3] * invQ
;
245 deriv
[0] = machine
->DerivY
[source
->Index
][0] * invQ
;
246 deriv
[1] = machine
->DerivY
[source
->Index
][1] * invQ
;
247 deriv
[2] = machine
->DerivY
[source
->Index
][2] * invQ
;
248 deriv
[3] = machine
->DerivY
[source
->Index
][3] * invQ
;
251 result
[0] = deriv
[GET_SWZ(source
->Swizzle
, 0)];
252 result
[1] = deriv
[GET_SWZ(source
->Swizzle
, 1)];
253 result
[2] = deriv
[GET_SWZ(source
->Swizzle
, 2)];
254 result
[3] = deriv
[GET_SWZ(source
->Swizzle
, 3)];
256 if (source
->Negate
) {
257 assert(source
->Negate
== NEGATE_XYZW
);
258 result
[0] = -result
[0];
259 result
[1] = -result
[1];
260 result
[2] = -result
[2];
261 result
[3] = -result
[3];
265 ASSIGN_4V(result
, 0.0, 0.0, 0.0, 0.0);
271 * As above, but only return result[0] element.
274 fetch_vector1(const struct prog_src_register
*source
,
275 const struct gl_program_machine
*machine
, GLfloat result
[4])
277 const GLfloat
*src
= get_src_register_pointer(source
, machine
);
279 result
[0] = src
[GET_SWZ(source
->Swizzle
, 0)];
281 if (source
->Negate
) {
282 result
[0] = -result
[0];
288 * Fetch texel from texture. Use partial derivatives when possible.
291 fetch_texel(struct gl_context
*ctx
,
292 const struct gl_program_machine
*machine
,
293 const struct prog_instruction
*inst
,
294 const GLfloat texcoord
[4], GLfloat lodBias
,
297 const GLuint unit
= machine
->Samplers
[inst
->TexSrcUnit
];
299 /* Note: we only have the right derivatives for fragment input attribs.
301 if (machine
->NumDeriv
> 0 &&
302 inst
->SrcReg
[0].File
== PROGRAM_INPUT
&&
303 inst
->SrcReg
[0].Index
== VARYING_SLOT_TEX0
+ inst
->TexSrcUnit
) {
304 /* simple texture fetch for which we should have derivatives */
305 GLuint attr
= inst
->SrcReg
[0].Index
;
306 machine
->FetchTexelDeriv(ctx
, texcoord
,
307 machine
->DerivX
[attr
],
308 machine
->DerivY
[attr
],
309 lodBias
, unit
, color
);
312 machine
->FetchTexelLod(ctx
, texcoord
, lodBias
, unit
, color
);
318 * Store 4 floats into a register. Observe the instructions saturate and
319 * set-condition-code flags.
322 store_vector4(const struct prog_instruction
*inst
,
323 struct gl_program_machine
*machine
, const GLfloat value
[4])
325 const struct prog_dst_register
*dstReg
= &(inst
->DstReg
);
326 const GLboolean clamp
= inst
->Saturate
;
327 GLuint writeMask
= dstReg
->WriteMask
;
328 GLfloat clampedValue
[4];
329 GLfloat
*dst
= get_dst_register_pointer(dstReg
, machine
);
332 if (value
[0] > 1.0e10
||
333 IS_INF_OR_NAN(value
[0]) ||
334 IS_INF_OR_NAN(value
[1]) ||
335 IS_INF_OR_NAN(value
[2]) || IS_INF_OR_NAN(value
[3]))
336 printf("store %g %g %g %g\n", value
[0], value
[1], value
[2], value
[3]);
340 clampedValue
[0] = CLAMP(value
[0], 0.0F
, 1.0F
);
341 clampedValue
[1] = CLAMP(value
[1], 0.0F
, 1.0F
);
342 clampedValue
[2] = CLAMP(value
[2], 0.0F
, 1.0F
);
343 clampedValue
[3] = CLAMP(value
[3], 0.0F
, 1.0F
);
344 value
= clampedValue
;
348 assert(!IS_INF_OR_NAN(value
[0]));
349 assert(!IS_INF_OR_NAN(value
[0]));
350 assert(!IS_INF_OR_NAN(value
[0]));
351 assert(!IS_INF_OR_NAN(value
[0]));
354 if (writeMask
& WRITEMASK_X
)
356 if (writeMask
& WRITEMASK_Y
)
358 if (writeMask
& WRITEMASK_Z
)
360 if (writeMask
& WRITEMASK_W
)
366 * Execute the given vertex/fragment program.
368 * \param ctx rendering context
369 * \param program the program to execute
370 * \param machine machine state (must be initialized)
371 * \return GL_TRUE if program completed or GL_FALSE if program executed KIL.
374 _mesa_execute_program(struct gl_context
* ctx
,
375 const struct gl_program
*program
,
376 struct gl_program_machine
*machine
)
378 const GLuint numInst
= program
->arb
.NumInstructions
;
379 const GLuint maxExec
= 65536;
380 GLuint pc
, numExec
= 0;
382 machine
->CurProgram
= program
;
385 printf("execute program %u --------------------\n", program
->Id
);
388 if (program
->Target
== GL_VERTEX_PROGRAM_ARB
) {
389 machine
->EnvParams
= ctx
->VertexProgram
.Parameters
;
392 machine
->EnvParams
= ctx
->FragmentProgram
.Parameters
;
395 for (pc
= 0; pc
< numInst
; pc
++) {
396 const struct prog_instruction
*inst
= program
->arb
.Instructions
+ pc
;
399 _mesa_print_instruction(inst
);
402 switch (inst
->Opcode
) {
405 GLfloat a
[4], result
[4];
406 fetch_vector4(&inst
->SrcReg
[0], machine
, a
);
407 result
[0] = fabsf(a
[0]);
408 result
[1] = fabsf(a
[1]);
409 result
[2] = fabsf(a
[2]);
410 result
[3] = fabsf(a
[3]);
411 store_vector4(inst
, machine
, result
);
416 GLfloat a
[4], b
[4], result
[4];
417 fetch_vector4(&inst
->SrcReg
[0], machine
, a
);
418 fetch_vector4(&inst
->SrcReg
[1], machine
, b
);
419 result
[0] = a
[0] + b
[0];
420 result
[1] = a
[1] + b
[1];
421 result
[2] = a
[2] + b
[2];
422 result
[3] = a
[3] + b
[3];
423 store_vector4(inst
, machine
, result
);
425 printf("ADD (%g %g %g %g) = (%g %g %g %g) + (%g %g %g %g)\n",
426 result
[0], result
[1], result
[2], result
[3],
427 a
[0], a
[1], a
[2], a
[3], b
[0], b
[1], b
[2], b
[3]);
434 fetch_vector4(&inst
->SrcReg
[0], machine
, t
);
435 machine
->AddressReg
[0][0] = IFLOOR(t
[0]);
437 printf("ARL %d\n", machine
->AddressReg
[0][0]);
443 assert(program
->arb
.Instructions
[inst
->BranchTarget
].Opcode
447 /* subtract 1 here since pc is incremented by for(pc) loop */
448 assert(program
->arb
.Instructions
[inst
->BranchTarget
].Opcode
450 pc
= inst
->BranchTarget
- 1; /* go to matching BNGLOOP */
452 case OPCODE_BGNSUB
: /* begin subroutine */
454 case OPCODE_ENDSUB
: /* end subroutine */
456 case OPCODE_BRK
: /* break out of loop (conditional) */
457 assert(program
->arb
.Instructions
[inst
->BranchTarget
].Opcode
459 /* break out of loop */
460 /* pc++ at end of for-loop will put us after the ENDLOOP inst */
461 pc
= inst
->BranchTarget
;
463 case OPCODE_CONT
: /* continue loop (conditional) */
464 assert(program
->arb
.Instructions
[inst
->BranchTarget
].Opcode
466 /* continue at ENDLOOP */
467 /* Subtract 1 here since we'll do pc++ at end of for-loop */
468 pc
= inst
->BranchTarget
- 1;
470 case OPCODE_CAL
: /* Call subroutine (conditional) */
471 /* call the subroutine */
472 if (machine
->StackDepth
>= MAX_PROGRAM_CALL_DEPTH
) {
473 return GL_TRUE
; /* Per GL_NV_vertex_program2 spec */
475 machine
->CallStack
[machine
->StackDepth
++] = pc
+ 1; /* next inst */
476 /* Subtract 1 here since we'll do pc++ at end of for-loop */
477 pc
= inst
->BranchTarget
- 1;
481 GLfloat a
[4], b
[4], c
[4], result
[4];
482 fetch_vector4(&inst
->SrcReg
[0], machine
, a
);
483 fetch_vector4(&inst
->SrcReg
[1], machine
, b
);
484 fetch_vector4(&inst
->SrcReg
[2], machine
, c
);
485 result
[0] = a
[0] < 0.0F
? b
[0] : c
[0];
486 result
[1] = a
[1] < 0.0F
? b
[1] : c
[1];
487 result
[2] = a
[2] < 0.0F
? b
[2] : c
[2];
488 result
[3] = a
[3] < 0.0F
? b
[3] : c
[3];
489 store_vector4(inst
, machine
, result
);
491 printf("CMP (%g %g %g %g) = (%g %g %g %g) < 0 ? (%g %g %g %g) : (%g %g %g %g)\n",
492 result
[0], result
[1], result
[2], result
[3],
493 a
[0], a
[1], a
[2], a
[3],
494 b
[0], b
[1], b
[2], b
[3],
495 c
[0], c
[1], c
[2], c
[3]);
501 GLfloat a
[4], result
[4];
502 fetch_vector1(&inst
->SrcReg
[0], machine
, a
);
503 result
[0] = result
[1] = result
[2] = result
[3]
505 store_vector4(inst
, machine
, result
);
508 case OPCODE_DDX
: /* Partial derivative with respect to X */
511 fetch_vector4_deriv(&inst
->SrcReg
[0], machine
, 'X', result
);
512 store_vector4(inst
, machine
, result
);
515 case OPCODE_DDY
: /* Partial derivative with respect to Y */
518 fetch_vector4_deriv(&inst
->SrcReg
[0], machine
, 'Y', result
);
519 store_vector4(inst
, machine
, result
);
524 GLfloat a
[4], b
[4], result
[4];
525 fetch_vector4(&inst
->SrcReg
[0], machine
, a
);
526 fetch_vector4(&inst
->SrcReg
[1], machine
, b
);
527 result
[0] = result
[1] = result
[2] = result
[3] = DOT2(a
, b
);
528 store_vector4(inst
, machine
, result
);
530 printf("DP2 %g = (%g %g) . (%g %g)\n",
531 result
[0], a
[0], a
[1], b
[0], b
[1]);
537 GLfloat a
[4], b
[4], result
[4];
538 fetch_vector4(&inst
->SrcReg
[0], machine
, a
);
539 fetch_vector4(&inst
->SrcReg
[1], machine
, b
);
540 result
[0] = result
[1] = result
[2] = result
[3] = DOT3(a
, b
);
541 store_vector4(inst
, machine
, result
);
543 printf("DP3 %g = (%g %g %g) . (%g %g %g)\n",
544 result
[0], a
[0], a
[1], a
[2], b
[0], b
[1], b
[2]);
550 GLfloat a
[4], b
[4], result
[4];
551 fetch_vector4(&inst
->SrcReg
[0], machine
, a
);
552 fetch_vector4(&inst
->SrcReg
[1], machine
, b
);
553 result
[0] = result
[1] = result
[2] = result
[3] = DOT4(a
, b
);
554 store_vector4(inst
, machine
, result
);
556 printf("DP4 %g = (%g, %g %g %g) . (%g, %g %g %g)\n",
557 result
[0], a
[0], a
[1], a
[2], a
[3],
558 b
[0], b
[1], b
[2], b
[3]);
564 GLfloat a
[4], b
[4], result
[4];
565 fetch_vector4(&inst
->SrcReg
[0], machine
, a
);
566 fetch_vector4(&inst
->SrcReg
[1], machine
, b
);
567 result
[0] = result
[1] = result
[2] = result
[3] = DOT3(a
, b
) + b
[3];
568 store_vector4(inst
, machine
, result
);
571 case OPCODE_DST
: /* Distance vector */
573 GLfloat a
[4], b
[4], result
[4];
574 fetch_vector4(&inst
->SrcReg
[0], machine
, a
);
575 fetch_vector4(&inst
->SrcReg
[1], machine
, b
);
577 result
[1] = a
[1] * b
[1];
580 store_vector4(inst
, machine
, result
);
585 GLfloat t
[4], q
[4], floor_t0
;
586 fetch_vector1(&inst
->SrcReg
[0], machine
, t
);
587 floor_t0
= floorf(t
[0]);
588 if (floor_t0
> FLT_MAX_EXP
) {
589 SET_POS_INFINITY(q
[0]);
590 SET_POS_INFINITY(q
[2]);
592 else if (floor_t0
< FLT_MIN_EXP
) {
597 q
[0] = ldexpf(1.0, (int) floor_t0
);
598 /* Note: GL_NV_vertex_program expects
599 * result.z = result.x * APPX(result.y)
600 * We do what the ARB extension says.
604 q
[1] = t
[0] - floor_t0
;
606 store_vector4( inst
, machine
, q
);
609 case OPCODE_EX2
: /* Exponential base 2 */
611 GLfloat a
[4], result
[4], val
;
612 fetch_vector1(&inst
->SrcReg
[0], machine
, a
);
615 if (IS_INF_OR_NAN(val))
618 result
[0] = result
[1] = result
[2] = result
[3] = val
;
619 store_vector4(inst
, machine
, result
);
624 GLfloat a
[4], result
[4];
625 fetch_vector4(&inst
->SrcReg
[0], machine
, a
);
626 result
[0] = floorf(a
[0]);
627 result
[1] = floorf(a
[1]);
628 result
[2] = floorf(a
[2]);
629 result
[3] = floorf(a
[3]);
630 store_vector4(inst
, machine
, result
);
635 GLfloat a
[4], result
[4];
636 fetch_vector4(&inst
->SrcReg
[0], machine
, a
);
637 result
[0] = a
[0] - floorf(a
[0]);
638 result
[1] = a
[1] - floorf(a
[1]);
639 result
[2] = a
[2] - floorf(a
[2]);
640 result
[3] = a
[3] - floorf(a
[3]);
641 store_vector4(inst
, machine
, result
);
647 assert(program
->arb
.Instructions
[inst
->BranchTarget
].Opcode
649 program
->arb
.Instructions
[inst
->BranchTarget
].Opcode
653 fetch_vector1(&inst
->SrcReg
[0], machine
, a
);
654 cond
= (a
[0] != 0.0F
);
656 printf("IF: %d\n", cond
);
660 /* do if-clause (just continue execution) */
663 /* go to the instruction after ELSE or ENDIF */
664 assert(inst
->BranchTarget
>= 0);
665 pc
= inst
->BranchTarget
;
671 assert(program
->arb
.Instructions
[inst
->BranchTarget
].Opcode
673 assert(inst
->BranchTarget
>= 0);
674 pc
= inst
->BranchTarget
;
679 case OPCODE_KIL
: /* ARB_f_p only */
682 fetch_vector4(&inst
->SrcReg
[0], machine
, a
);
684 printf("KIL if (%g %g %g %g) <= 0.0\n",
685 a
[0], a
[1], a
[2], a
[3]);
688 if (a
[0] < 0.0F
|| a
[1] < 0.0F
|| a
[2] < 0.0F
|| a
[3] < 0.0F
) {
693 case OPCODE_LG2
: /* log base 2 */
695 GLfloat a
[4], result
[4], val
;
696 fetch_vector1(&inst
->SrcReg
[0], machine
, a
);
697 /* The fast LOG2 macro doesn't meet the precision requirements.
703 val
= logf(a
[0]) * 1.442695F
;
705 result
[0] = result
[1] = result
[2] = result
[3] = val
;
706 store_vector4(inst
, machine
, result
);
711 const GLfloat epsilon
= 1.0F
/ 256.0F
; /* from NV VP spec */
712 GLfloat a
[4], result
[4];
713 fetch_vector4(&inst
->SrcReg
[0], machine
, a
);
714 a
[0] = MAX2(a
[0], 0.0F
);
715 a
[1] = MAX2(a
[1], 0.0F
);
716 /* XXX ARB version clamps a[3], NV version doesn't */
717 a
[3] = CLAMP(a
[3], -(128.0F
- epsilon
), (128.0F
- epsilon
));
720 /* XXX we could probably just use pow() here */
722 if (a
[1] == 0.0F
&& a
[3] == 0.0F
)
725 result
[2] = powf(a
[1], a
[3]);
731 store_vector4(inst
, machine
, result
);
733 printf("LIT (%g %g %g %g) : (%g %g %g %g)\n",
734 result
[0], result
[1], result
[2], result
[3],
735 a
[0], a
[1], a
[2], a
[3]);
741 GLfloat t
[4], q
[4], abs_t0
;
742 fetch_vector1(&inst
->SrcReg
[0], machine
, t
);
743 abs_t0
= fabsf(t
[0]);
744 if (abs_t0
!= 0.0F
) {
745 if (IS_INF_OR_NAN(abs_t0
))
747 SET_POS_INFINITY(q
[0]);
749 SET_POS_INFINITY(q
[2]);
753 GLfloat mantissa
= frexpf(t
[0], &exponent
);
754 q
[0] = (GLfloat
) (exponent
- 1);
755 q
[1] = 2.0F
* mantissa
; /* map [.5, 1) -> [1, 2) */
757 /* The fast LOG2 macro doesn't meet the precision
760 q
[2] = logf(t
[0]) * 1.442695F
;
764 SET_NEG_INFINITY(q
[0]);
766 SET_NEG_INFINITY(q
[2]);
769 store_vector4(inst
, machine
, q
);
774 GLfloat a
[4], b
[4], c
[4], result
[4];
775 fetch_vector4(&inst
->SrcReg
[0], machine
, a
);
776 fetch_vector4(&inst
->SrcReg
[1], machine
, b
);
777 fetch_vector4(&inst
->SrcReg
[2], machine
, c
);
778 result
[0] = a
[0] * b
[0] + (1.0F
- a
[0]) * c
[0];
779 result
[1] = a
[1] * b
[1] + (1.0F
- a
[1]) * c
[1];
780 result
[2] = a
[2] * b
[2] + (1.0F
- a
[2]) * c
[2];
781 result
[3] = a
[3] * b
[3] + (1.0F
- a
[3]) * c
[3];
782 store_vector4(inst
, machine
, result
);
784 printf("LRP (%g %g %g %g) = (%g %g %g %g), "
785 "(%g %g %g %g), (%g %g %g %g)\n",
786 result
[0], result
[1], result
[2], result
[3],
787 a
[0], a
[1], a
[2], a
[3],
788 b
[0], b
[1], b
[2], b
[3], c
[0], c
[1], c
[2], c
[3]);
794 GLfloat a
[4], b
[4], c
[4], result
[4];
795 fetch_vector4(&inst
->SrcReg
[0], machine
, a
);
796 fetch_vector4(&inst
->SrcReg
[1], machine
, b
);
797 fetch_vector4(&inst
->SrcReg
[2], machine
, c
);
798 result
[0] = a
[0] * b
[0] + c
[0];
799 result
[1] = a
[1] * b
[1] + c
[1];
800 result
[2] = a
[2] * b
[2] + c
[2];
801 result
[3] = a
[3] * b
[3] + c
[3];
802 store_vector4(inst
, machine
, result
);
804 printf("MAD (%g %g %g %g) = (%g %g %g %g) * "
805 "(%g %g %g %g) + (%g %g %g %g)\n",
806 result
[0], result
[1], result
[2], result
[3],
807 a
[0], a
[1], a
[2], a
[3],
808 b
[0], b
[1], b
[2], b
[3], c
[0], c
[1], c
[2], c
[3]);
814 GLfloat a
[4], b
[4], result
[4];
815 fetch_vector4(&inst
->SrcReg
[0], machine
, a
);
816 fetch_vector4(&inst
->SrcReg
[1], machine
, b
);
817 result
[0] = MAX2(a
[0], b
[0]);
818 result
[1] = MAX2(a
[1], b
[1]);
819 result
[2] = MAX2(a
[2], b
[2]);
820 result
[3] = MAX2(a
[3], b
[3]);
821 store_vector4(inst
, machine
, result
);
823 printf("MAX (%g %g %g %g) = (%g %g %g %g), (%g %g %g %g)\n",
824 result
[0], result
[1], result
[2], result
[3],
825 a
[0], a
[1], a
[2], a
[3], b
[0], b
[1], b
[2], b
[3]);
831 GLfloat a
[4], b
[4], result
[4];
832 fetch_vector4(&inst
->SrcReg
[0], machine
, a
);
833 fetch_vector4(&inst
->SrcReg
[1], machine
, b
);
834 result
[0] = MIN2(a
[0], b
[0]);
835 result
[1] = MIN2(a
[1], b
[1]);
836 result
[2] = MIN2(a
[2], b
[2]);
837 result
[3] = MIN2(a
[3], b
[3]);
838 store_vector4(inst
, machine
, result
);
844 fetch_vector4(&inst
->SrcReg
[0], machine
, result
);
845 store_vector4(inst
, machine
, result
);
847 printf("MOV (%g %g %g %g)\n",
848 result
[0], result
[1], result
[2], result
[3]);
854 GLfloat a
[4], b
[4], result
[4];
855 fetch_vector4(&inst
->SrcReg
[0], machine
, a
);
856 fetch_vector4(&inst
->SrcReg
[1], machine
, b
);
857 result
[0] = a
[0] * b
[0];
858 result
[1] = a
[1] * b
[1];
859 result
[2] = a
[2] * b
[2];
860 result
[3] = a
[3] * b
[3];
861 store_vector4(inst
, machine
, result
);
863 printf("MUL (%g %g %g %g) = (%g %g %g %g) * (%g %g %g %g)\n",
864 result
[0], result
[1], result
[2], result
[3],
865 a
[0], a
[1], a
[2], a
[3], b
[0], b
[1], b
[2], b
[3]);
871 GLfloat a
[4], result
[4];
872 fetch_vector1(&inst
->SrcReg
[0], machine
, a
);
876 result
[3] = _mesa_noise1(a
[0]);
877 store_vector4(inst
, machine
, result
);
882 GLfloat a
[4], result
[4];
883 fetch_vector4(&inst
->SrcReg
[0], machine
, a
);
886 result
[2] = result
[3] = _mesa_noise2(a
[0], a
[1]);
887 store_vector4(inst
, machine
, result
);
892 GLfloat a
[4], result
[4];
893 fetch_vector4(&inst
->SrcReg
[0], machine
, a
);
897 result
[3] = _mesa_noise3(a
[0], a
[1], a
[2]);
898 store_vector4(inst
, machine
, result
);
903 GLfloat a
[4], result
[4];
904 fetch_vector4(&inst
->SrcReg
[0], machine
, a
);
908 result
[3] = _mesa_noise4(a
[0], a
[1], a
[2], a
[3]);
909 store_vector4(inst
, machine
, result
);
916 GLfloat a
[4], b
[4], result
[4];
917 fetch_vector1(&inst
->SrcReg
[0], machine
, a
);
918 fetch_vector1(&inst
->SrcReg
[1], machine
, b
);
919 result
[0] = result
[1] = result
[2] = result
[3]
921 store_vector4(inst
, machine
, result
);
927 GLfloat a
[4], result
[4];
928 fetch_vector1(&inst
->SrcReg
[0], machine
, a
);
932 else if (IS_INF_OR_NAN(a
[0]))
933 printf("RCP(inf)\n");
935 result
[0] = result
[1] = result
[2] = result
[3] = 1.0F
/ a
[0];
936 store_vector4(inst
, machine
, result
);
939 case OPCODE_RET
: /* return from subroutine (conditional) */
940 if (machine
->StackDepth
== 0) {
941 return GL_TRUE
; /* Per GL_NV_vertex_program2 spec */
943 /* subtract one because of pc++ in the for loop */
944 pc
= machine
->CallStack
[--machine
->StackDepth
] - 1;
946 case OPCODE_RSQ
: /* 1 / sqrt() */
948 GLfloat a
[4], result
[4];
949 fetch_vector1(&inst
->SrcReg
[0], machine
, a
);
951 result
[0] = result
[1] = result
[2] = result
[3] = 1.0f
/ sqrtf(a
[0]);
952 store_vector4(inst
, machine
, result
);
954 printf("RSQ %g = 1/sqrt(|%g|)\n", result
[0], a
[0]);
958 case OPCODE_SCS
: /* sine and cos */
960 GLfloat a
[4], result
[4];
961 fetch_vector1(&inst
->SrcReg
[0], machine
, a
);
962 result
[0] = cosf(a
[0]);
963 result
[1] = sinf(a
[0]);
964 result
[2] = 0.0F
; /* undefined! */
965 result
[3] = 0.0F
; /* undefined! */
966 store_vector4(inst
, machine
, result
);
969 case OPCODE_SGE
: /* set on greater or equal */
971 GLfloat a
[4], b
[4], result
[4];
972 fetch_vector4(&inst
->SrcReg
[0], machine
, a
);
973 fetch_vector4(&inst
->SrcReg
[1], machine
, b
);
974 result
[0] = (a
[0] >= b
[0]) ? 1.0F
: 0.0F
;
975 result
[1] = (a
[1] >= b
[1]) ? 1.0F
: 0.0F
;
976 result
[2] = (a
[2] >= b
[2]) ? 1.0F
: 0.0F
;
977 result
[3] = (a
[3] >= b
[3]) ? 1.0F
: 0.0F
;
978 store_vector4(inst
, machine
, result
);
980 printf("SGE (%g %g %g %g) = (%g %g %g %g) >= (%g %g %g %g)\n",
981 result
[0], result
[1], result
[2], result
[3],
982 a
[0], a
[1], a
[2], a
[3],
983 b
[0], b
[1], b
[2], b
[3]);
989 GLfloat a
[4], result
[4];
990 fetch_vector1(&inst
->SrcReg
[0], machine
, a
);
991 result
[0] = result
[1] = result
[2] = result
[3]
993 store_vector4(inst
, machine
, result
);
996 case OPCODE_SLT
: /* set on less */
998 GLfloat a
[4], b
[4], result
[4];
999 fetch_vector4(&inst
->SrcReg
[0], machine
, a
);
1000 fetch_vector4(&inst
->SrcReg
[1], machine
, b
);
1001 result
[0] = (a
[0] < b
[0]) ? 1.0F
: 0.0F
;
1002 result
[1] = (a
[1] < b
[1]) ? 1.0F
: 0.0F
;
1003 result
[2] = (a
[2] < b
[2]) ? 1.0F
: 0.0F
;
1004 result
[3] = (a
[3] < b
[3]) ? 1.0F
: 0.0F
;
1005 store_vector4(inst
, machine
, result
);
1007 printf("SLT (%g %g %g %g) = (%g %g %g %g) < (%g %g %g %g)\n",
1008 result
[0], result
[1], result
[2], result
[3],
1009 a
[0], a
[1], a
[2], a
[3],
1010 b
[0], b
[1], b
[2], b
[3]);
1014 case OPCODE_SSG
: /* set sign (-1, 0 or +1) */
1016 GLfloat a
[4], result
[4];
1017 fetch_vector4(&inst
->SrcReg
[0], machine
, a
);
1018 result
[0] = (GLfloat
) ((a
[0] > 0.0F
) - (a
[0] < 0.0F
));
1019 result
[1] = (GLfloat
) ((a
[1] > 0.0F
) - (a
[1] < 0.0F
));
1020 result
[2] = (GLfloat
) ((a
[2] > 0.0F
) - (a
[2] < 0.0F
));
1021 result
[3] = (GLfloat
) ((a
[3] > 0.0F
) - (a
[3] < 0.0F
));
1022 store_vector4(inst
, machine
, result
);
1027 GLfloat a
[4], b
[4], result
[4];
1028 fetch_vector4(&inst
->SrcReg
[0], machine
, a
);
1029 fetch_vector4(&inst
->SrcReg
[1], machine
, b
);
1030 result
[0] = a
[0] - b
[0];
1031 result
[1] = a
[1] - b
[1];
1032 result
[2] = a
[2] - b
[2];
1033 result
[3] = a
[3] - b
[3];
1034 store_vector4(inst
, machine
, result
);
1036 printf("SUB (%g %g %g %g) = (%g %g %g %g) - (%g %g %g %g)\n",
1037 result
[0], result
[1], result
[2], result
[3],
1038 a
[0], a
[1], a
[2], a
[3], b
[0], b
[1], b
[2], b
[3]);
1042 case OPCODE_SWZ
: /* extended swizzle */
1044 const struct prog_src_register
*source
= &inst
->SrcReg
[0];
1045 const GLfloat
*src
= get_src_register_pointer(source
, machine
);
1048 for (i
= 0; i
< 4; i
++) {
1049 const GLuint swz
= GET_SWZ(source
->Swizzle
, i
);
1050 if (swz
== SWIZZLE_ZERO
)
1052 else if (swz
== SWIZZLE_ONE
)
1056 result
[i
] = src
[swz
];
1058 if (source
->Negate
& (1 << i
))
1059 result
[i
] = -result
[i
];
1061 store_vector4(inst
, machine
, result
);
1064 case OPCODE_TEX
: /* Both ARB and NV frag prog */
1065 /* Simple texel lookup */
1067 GLfloat texcoord
[4], color
[4];
1068 fetch_vector4(&inst
->SrcReg
[0], machine
, texcoord
);
1070 /* For TEX, texcoord.Q should not be used and its value should not
1071 * matter (at most, we pass coord.xyz to texture3D() in GLSL).
1072 * Set Q=1 so that FetchTexelDeriv() doesn't get a garbage value
1073 * which is effectively what happens when the texcoord swizzle
1078 fetch_texel(ctx
, machine
, inst
, texcoord
, 0.0, color
);
1081 printf("TEX (%g, %g, %g, %g) = texture[%d][%g, %g, %g, %g]\n",
1082 color
[0], color
[1], color
[2], color
[3],
1084 texcoord
[0], texcoord
[1], texcoord
[2], texcoord
[3]);
1086 store_vector4(inst
, machine
, color
);
1089 case OPCODE_TXB
: /* GL_ARB_fragment_program only */
1090 /* Texel lookup with LOD bias */
1092 GLfloat texcoord
[4], color
[4], lodBias
;
1094 fetch_vector4(&inst
->SrcReg
[0], machine
, texcoord
);
1096 /* texcoord[3] is the bias to add to lambda */
1097 lodBias
= texcoord
[3];
1099 fetch_texel(ctx
, machine
, inst
, texcoord
, lodBias
, color
);
1102 printf("TXB (%g, %g, %g, %g) = texture[%d][%g %g %g %g]"
1104 color
[0], color
[1], color
[2], color
[3],
1113 store_vector4(inst
, machine
, color
);
1117 /* Texture lookup w/ partial derivatives for LOD */
1119 GLfloat texcoord
[4], dtdx
[4], dtdy
[4], color
[4];
1120 fetch_vector4(&inst
->SrcReg
[0], machine
, texcoord
);
1121 fetch_vector4(&inst
->SrcReg
[1], machine
, dtdx
);
1122 fetch_vector4(&inst
->SrcReg
[2], machine
, dtdy
);
1123 machine
->FetchTexelDeriv(ctx
, texcoord
, dtdx
, dtdy
,
1125 inst
->TexSrcUnit
, color
);
1126 store_vector4(inst
, machine
, color
);
1130 /* Texel lookup with explicit LOD */
1132 GLfloat texcoord
[4], color
[4], lod
;
1134 fetch_vector4(&inst
->SrcReg
[0], machine
, texcoord
);
1136 /* texcoord[3] is the LOD */
1139 machine
->FetchTexelLod(ctx
, texcoord
, lod
,
1140 machine
->Samplers
[inst
->TexSrcUnit
], color
);
1142 store_vector4(inst
, machine
, color
);
1145 case OPCODE_TXP
: /* GL_ARB_fragment_program only */
1146 /* Texture lookup w/ projective divide */
1148 GLfloat texcoord
[4], color
[4];
1150 fetch_vector4(&inst
->SrcReg
[0], machine
, texcoord
);
1151 /* Not so sure about this test - if texcoord[3] is
1152 * zero, we'd probably be fine except for an assert in
1153 * IROUND_POS() which gets triggered by the inf values created.
1155 if (texcoord
[3] != 0.0F
) {
1156 texcoord
[0] /= texcoord
[3];
1157 texcoord
[1] /= texcoord
[3];
1158 texcoord
[2] /= texcoord
[3];
1161 fetch_texel(ctx
, machine
, inst
, texcoord
, 0.0, color
);
1163 store_vector4(inst
, machine
, color
);
1166 case OPCODE_TRUNC
: /* truncate toward zero */
1168 GLfloat a
[4], result
[4];
1169 fetch_vector4(&inst
->SrcReg
[0], machine
, a
);
1170 result
[0] = (GLfloat
) (GLint
) a
[0];
1171 result
[1] = (GLfloat
) (GLint
) a
[1];
1172 result
[2] = (GLfloat
) (GLint
) a
[2];
1173 result
[3] = (GLfloat
) (GLint
) a
[3];
1174 store_vector4(inst
, machine
, result
);
1177 case OPCODE_XPD
: /* cross product */
1179 GLfloat a
[4], b
[4], result
[4];
1180 fetch_vector4(&inst
->SrcReg
[0], machine
, a
);
1181 fetch_vector4(&inst
->SrcReg
[1], machine
, b
);
1182 result
[0] = a
[1] * b
[2] - a
[2] * b
[1];
1183 result
[1] = a
[2] * b
[0] - a
[0] * b
[2];
1184 result
[2] = a
[0] * b
[1] - a
[1] * b
[0];
1186 store_vector4(inst
, machine
, result
);
1188 printf("XPD (%g %g %g %g) = (%g %g %g) X (%g %g %g)\n",
1189 result
[0], result
[1], result
[2], result
[3],
1190 a
[0], a
[1], a
[2], b
[0], b
[1], b
[2]);
1197 _mesa_problem(ctx
, "Bad opcode %d in _mesa_execute_program",
1199 return GL_TRUE
; /* return value doesn't matter */
1203 if (numExec
> maxExec
) {
1204 static GLboolean reported
= GL_FALSE
;
1206 _mesa_problem(ctx
, "Infinite loop detected in fragment program");