2 * Mesa 3-D graphics library
5 * Copyright (C) 1999-2007 Brian Paul All Rights Reserved.
7 * Permission is hereby granted, free of charge, to any person obtaining a
8 * copy of this software and associated documentation files (the "Software"),
9 * to deal in the Software without restriction, including without limitation
10 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
11 * and/or sell copies of the Software, and to permit persons to whom the
12 * Software is furnished to do so, subject to the following conditions:
14 * The above copyright notice and this permission notice shall be included
15 * in all copies or substantial portions of the Software.
17 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
18 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
19 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
20 * BRIAN PAUL BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN
21 * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
22 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
26 * \file prog_execute.c
27 * Software interpreter for vertex/fragment programs.
32 * NOTE: we do everything in single-precision floating point; we don't
33 * currently observe the single/half/fixed-precision qualifiers.
42 #include "prog_execute.h"
43 #include "prog_instruction.h"
44 #include "prog_parameter.h"
45 #include "prog_print.h"
46 #include "slang_library_noise.h"
49 /* See comments below for info about this */
56 #if FEATURE_MESA_program_debug
57 static struct gl_program_machine
*CurrentMachine
= NULL
;
60 * For GL_MESA_program_debug.
61 * Return current value (4*GLfloat) of a program register.
62 * Called via ctx->Driver.GetFragmentProgramRegister().
65 _mesa_get_program_register(GLcontext
*ctx
, enum register_file file
,
66 GLuint index
, GLfloat val
[4])
71 if (CurrentMachine
->CurProgram
->Target
== GL_VERTEX_PROGRAM_ARB
) {
72 COPY_4V(val
, CurrentMachine
->VertAttribs
[index
]);
76 CurrentMachine
->Attribs
[index
][CurrentMachine
->CurElement
]);
80 COPY_4V(val
, CurrentMachine
->Outputs
[index
]);
82 case PROGRAM_TEMPORARY
:
83 COPY_4V(val
, CurrentMachine
->Temporaries
[index
]);
87 "bad register file in _swrast_get_program_register");
91 #endif /* FEATURE_MESA_program_debug */
96 * Return a pointer to the 4-element float vector specified by the given
99 static INLINE
const GLfloat
*
100 get_register_pointer( GLcontext
*ctx
,
101 const struct prog_src_register
*source
,
102 const struct gl_program_machine
*machine
)
104 /* XXX relative addressing... */
105 switch (source
->File
) {
106 case PROGRAM_TEMPORARY
:
107 ASSERT(source
->Index
< MAX_PROGRAM_TEMPS
);
108 return machine
->Temporaries
[source
->Index
];
111 if (machine
->CurProgram
->Target
== GL_VERTEX_PROGRAM_ARB
) {
112 ASSERT(source
->Index
< VERT_ATTRIB_MAX
);
113 return machine
->VertAttribs
[source
->Index
];
116 ASSERT(source
->Index
< FRAG_ATTRIB_MAX
);
117 return machine
->Attribs
[source
->Index
][machine
->CurElement
];
121 /* This is only for PRINT */
122 ASSERT(source
->Index
< FRAG_RESULT_MAX
);
123 return machine
->Outputs
[source
->Index
];
125 case PROGRAM_LOCAL_PARAM
:
126 ASSERT(source
->Index
< MAX_PROGRAM_LOCAL_PARAMS
);
127 return machine
->CurProgram
->LocalParams
[source
->Index
];
129 case PROGRAM_ENV_PARAM
:
130 ASSERT(source
->Index
< MAX_PROGRAM_ENV_PARAMS
);
131 if (machine
->CurProgram
->Target
== GL_VERTEX_PROGRAM_ARB
)
132 return ctx
->VertexProgram
.Parameters
[source
->Index
];
134 return ctx
->FragmentProgram
.Parameters
[source
->Index
];
136 case PROGRAM_STATE_VAR
:
138 case PROGRAM_CONSTANT
:
140 case PROGRAM_UNIFORM
:
142 case PROGRAM_NAMED_PARAM
:
143 ASSERT(source
->Index
<
144 (GLint
) machine
->CurProgram
->Parameters
->NumParameters
);
145 return machine
->CurProgram
->Parameters
->ParameterValues
[source
->Index
];
149 "Invalid input register file %d in get_register_pointer()",
157 * Fetch a 4-element float vector from the given source register.
158 * Apply swizzling and negating as needed.
161 fetch_vector4( GLcontext
*ctx
,
162 const struct prog_src_register
*source
,
163 const struct gl_program_machine
*machine
,
166 const GLfloat
*src
= get_register_pointer(ctx
, source
, machine
);
169 if (source
->Swizzle
== SWIZZLE_NOOP
) {
171 COPY_4V(result
, src
);
174 ASSERT(GET_SWZ(source
->Swizzle
, 0) <= 3);
175 ASSERT(GET_SWZ(source
->Swizzle
, 1) <= 3);
176 ASSERT(GET_SWZ(source
->Swizzle
, 2) <= 3);
177 ASSERT(GET_SWZ(source
->Swizzle
, 3) <= 3);
178 result
[0] = src
[GET_SWZ(source
->Swizzle
, 0)];
179 result
[1] = src
[GET_SWZ(source
->Swizzle
, 1)];
180 result
[2] = src
[GET_SWZ(source
->Swizzle
, 2)];
181 result
[3] = src
[GET_SWZ(source
->Swizzle
, 3)];
184 if (source
->NegateBase
) {
185 result
[0] = -result
[0];
186 result
[1] = -result
[1];
187 result
[2] = -result
[2];
188 result
[3] = -result
[3];
191 result
[0] = FABSF(result
[0]);
192 result
[1] = FABSF(result
[1]);
193 result
[2] = FABSF(result
[2]);
194 result
[3] = FABSF(result
[3]);
196 if (source
->NegateAbs
) {
197 result
[0] = -result
[0];
198 result
[1] = -result
[1];
199 result
[2] = -result
[2];
200 result
[3] = -result
[3];
206 * Fetch the derivative with respect to X for the given register.
207 * \return GL_TRUE if it was easily computed or GL_FALSE if we
208 * need to execute another instance of the program (ugh)!
211 fetch_vector4_deriv( GLcontext
*ctx
,
212 const struct prog_src_register
*source
,
214 char xOrY
, GLint column
, GLfloat result
[4] )
218 ASSERT(xOrY
== 'X' || xOrY
== 'Y');
220 switch (source
->Index
) {
221 case FRAG_ATTRIB_WPOS
:
225 src
[2] = span
->attrStepX
[FRAG_ATTRIB_WPOS
][2]
226 / ctx
->DrawBuffer
->_DepthMaxF
;
227 src
[3] = span
->attrStepX
[FRAG_ATTRIB_WPOS
][3];
232 src
[2] = span
->attrStepY
[FRAG_ATTRIB_WPOS
][2]
233 / ctx
->DrawBuffer
->_DepthMaxF
;
234 src
[3] = span
->attrStepY
[FRAG_ATTRIB_WPOS
][3];
237 case FRAG_ATTRIB_COL0
:
238 case FRAG_ATTRIB_COL1
:
240 src
[0] = span
->attrStepX
[source
->Index
][0] * (1.0F
/ CHAN_MAXF
);
241 src
[1] = span
->attrStepX
[source
->Index
][1] * (1.0F
/ CHAN_MAXF
);
242 src
[2] = span
->attrStepX
[source
->Index
][2] * (1.0F
/ CHAN_MAXF
);
243 src
[3] = span
->attrStepX
[source
->Index
][3] * (1.0F
/ CHAN_MAXF
);
246 src
[0] = span
->attrStepY
[source
->Index
][0] * (1.0F
/ CHAN_MAXF
);
247 src
[1] = span
->attrStepY
[source
->Index
][1] * (1.0F
/ CHAN_MAXF
);
248 src
[2] = span
->attrStepY
[source
->Index
][2] * (1.0F
/ CHAN_MAXF
);
249 src
[3] = span
->attrStepY
[source
->Index
][3] * (1.0F
/ CHAN_MAXF
);
252 case FRAG_ATTRIB_FOGC
:
254 src
[0] = span
->attrStepX
[FRAG_ATTRIB_FOGC
][0] * (1.0F
/ CHAN_MAXF
);
260 src
[0] = span
->attrStepY
[FRAG_ATTRIB_FOGC
][0] * (1.0F
/ CHAN_MAXF
);
267 assert(source
->Index
< FRAG_ATTRIB_MAX
);
268 /* texcoord or varying */
270 /* this is a little tricky - I think I've got it right */
271 const GLfloat invQ
= 1.0f
/ (span
->attrStart
[source
->Index
][3]
272 + span
->attrStepX
[source
->Index
][3] * column
);
273 src
[0] = span
->attrStepX
[source
->Index
][0] * invQ
;
274 src
[1] = span
->attrStepX
[source
->Index
][1] * invQ
;
275 src
[2] = span
->attrStepX
[source
->Index
][2] * invQ
;
276 src
[3] = span
->attrStepX
[source
->Index
][3] * invQ
;
279 /* Tricky, as above, but in Y direction */
280 const GLfloat invQ
= 1.0f
/ (span
->attrStart
[source
->Index
][3]
281 + span
->attrStepY
[source
->Index
][3]);
282 src
[0] = span
->attrStepY
[source
->Index
][0] * invQ
;
283 src
[1] = span
->attrStepY
[source
->Index
][1] * invQ
;
284 src
[2] = span
->attrStepY
[source
->Index
][2] * invQ
;
285 src
[3] = span
->attrStepY
[source
->Index
][3] * invQ
;
290 result
[0] = src
[GET_SWZ(source
->Swizzle
, 0)];
291 result
[1] = src
[GET_SWZ(source
->Swizzle
, 1)];
292 result
[2] = src
[GET_SWZ(source
->Swizzle
, 2)];
293 result
[3] = src
[GET_SWZ(source
->Swizzle
, 3)];
295 if (source
->NegateBase
) {
296 result
[0] = -result
[0];
297 result
[1] = -result
[1];
298 result
[2] = -result
[2];
299 result
[3] = -result
[3];
302 result
[0] = FABSF(result
[0]);
303 result
[1] = FABSF(result
[1]);
304 result
[2] = FABSF(result
[2]);
305 result
[3] = FABSF(result
[3]);
307 if (source
->NegateAbs
) {
308 result
[0] = -result
[0];
309 result
[1] = -result
[1];
310 result
[2] = -result
[2];
311 result
[3] = -result
[3];
319 * As above, but only return result[0] element.
322 fetch_vector1( GLcontext
*ctx
,
323 const struct prog_src_register
*source
,
324 const struct gl_program_machine
*machine
,
327 const GLfloat
*src
= get_register_pointer(ctx
, source
, machine
);
330 result
[0] = src
[GET_SWZ(source
->Swizzle
, 0)];
332 if (source
->NegateBase
) {
333 result
[0] = -result
[0];
336 result
[0] = FABSF(result
[0]);
338 if (source
->NegateAbs
) {
339 result
[0] = -result
[0];
345 * Test value against zero and return GT, LT, EQ or UN if NaN.
348 generate_cc( float value
)
351 return COND_UN
; /* NaN */
361 * Test if the ccMaskRule is satisfied by the given condition code.
362 * Used to mask destination writes according to the current condition code.
364 static INLINE GLboolean
365 test_cc(GLuint condCode
, GLuint ccMaskRule
)
367 switch (ccMaskRule
) {
368 case COND_EQ
: return (condCode
== COND_EQ
);
369 case COND_NE
: return (condCode
!= COND_EQ
);
370 case COND_LT
: return (condCode
== COND_LT
);
371 case COND_GE
: return (condCode
== COND_GT
|| condCode
== COND_EQ
);
372 case COND_LE
: return (condCode
== COND_LT
|| condCode
== COND_EQ
);
373 case COND_GT
: return (condCode
== COND_GT
);
374 case COND_TR
: return GL_TRUE
;
375 case COND_FL
: return GL_FALSE
;
376 default: return GL_TRUE
;
382 * Evaluate the 4 condition codes against a predicate and return GL_TRUE
383 * or GL_FALSE to indicate result.
385 static INLINE GLboolean
386 eval_condition(const struct gl_program_machine
*machine
,
387 const struct prog_instruction
*inst
)
389 const GLuint swizzle
= inst
->DstReg
.CondSwizzle
;
390 const GLuint condMask
= inst
->DstReg
.CondMask
;
391 if (test_cc(machine
->CondCodes
[GET_SWZ(swizzle
, 0)], condMask
) ||
392 test_cc(machine
->CondCodes
[GET_SWZ(swizzle
, 1)], condMask
) ||
393 test_cc(machine
->CondCodes
[GET_SWZ(swizzle
, 2)], condMask
) ||
394 test_cc(machine
->CondCodes
[GET_SWZ(swizzle
, 3)], condMask
)) {
405 * Store 4 floats into a register. Observe the instructions saturate and
406 * set-condition-code flags.
409 store_vector4( const struct prog_instruction
*inst
,
410 struct gl_program_machine
*machine
,
411 const GLfloat value
[4] )
413 const struct prog_dst_register
*dest
= &(inst
->DstReg
);
414 const GLboolean clamp
= inst
->SaturateMode
== SATURATE_ZERO_ONE
;
417 GLfloat clampedValue
[4];
418 GLuint writeMask
= dest
->WriteMask
;
420 switch (dest
->File
) {
422 dstReg
= machine
->Outputs
[dest
->Index
];
424 case PROGRAM_TEMPORARY
:
425 dstReg
= machine
->Temporaries
[dest
->Index
];
427 case PROGRAM_WRITE_ONLY
:
431 _mesa_problem(NULL
, "bad register file in store_vector4(fp)");
436 if (value
[0] > 1.0e10
||
437 IS_INF_OR_NAN(value
[0]) ||
438 IS_INF_OR_NAN(value
[1]) ||
439 IS_INF_OR_NAN(value
[2]) ||
440 IS_INF_OR_NAN(value
[3]) )
441 printf("store %g %g %g %g\n", value
[0], value
[1], value
[2], value
[3]);
445 clampedValue
[0] = CLAMP(value
[0], 0.0F
, 1.0F
);
446 clampedValue
[1] = CLAMP(value
[1], 0.0F
, 1.0F
);
447 clampedValue
[2] = CLAMP(value
[2], 0.0F
, 1.0F
);
448 clampedValue
[3] = CLAMP(value
[3], 0.0F
, 1.0F
);
449 value
= clampedValue
;
452 if (dest
->CondMask
!= COND_TR
) {
453 /* condition codes may turn off some writes */
454 if (writeMask
& WRITEMASK_X
) {
455 if (!test_cc(machine
->CondCodes
[GET_SWZ(dest
->CondSwizzle
, 0)],
457 writeMask
&= ~WRITEMASK_X
;
459 if (writeMask
& WRITEMASK_Y
) {
460 if (!test_cc(machine
->CondCodes
[GET_SWZ(dest
->CondSwizzle
, 1)],
462 writeMask
&= ~WRITEMASK_Y
;
464 if (writeMask
& WRITEMASK_Z
) {
465 if (!test_cc(machine
->CondCodes
[GET_SWZ(dest
->CondSwizzle
, 2)],
467 writeMask
&= ~WRITEMASK_Z
;
469 if (writeMask
& WRITEMASK_W
) {
470 if (!test_cc(machine
->CondCodes
[GET_SWZ(dest
->CondSwizzle
, 3)],
472 writeMask
&= ~WRITEMASK_W
;
476 if (writeMask
& WRITEMASK_X
)
477 dstReg
[0] = value
[0];
478 if (writeMask
& WRITEMASK_Y
)
479 dstReg
[1] = value
[1];
480 if (writeMask
& WRITEMASK_Z
)
481 dstReg
[2] = value
[2];
482 if (writeMask
& WRITEMASK_W
)
483 dstReg
[3] = value
[3];
485 if (inst
->CondUpdate
) {
486 if (writeMask
& WRITEMASK_X
)
487 machine
->CondCodes
[0] = generate_cc(value
[0]);
488 if (writeMask
& WRITEMASK_Y
)
489 machine
->CondCodes
[1] = generate_cc(value
[1]);
490 if (writeMask
& WRITEMASK_Z
)
491 machine
->CondCodes
[2] = generate_cc(value
[2]);
492 if (writeMask
& WRITEMASK_W
)
493 machine
->CondCodes
[3] = generate_cc(value
[3]);
500 * Initialize a new machine state instance from an existing one, adding
501 * the partial derivatives onto the input registers.
502 * Used to implement DDX and DDY instructions in non-trivial cases.
505 init_machine_deriv( GLcontext
*ctx
,
506 const struct gl_program_machine
*machine
,
507 const struct gl_fragment_program
*program
,
508 const SWspan
*span
, char xOrY
,
509 struct gl_program_machine
*dMachine
)
513 ASSERT(xOrY
== 'X' || xOrY
== 'Y');
515 /* copy existing machine */
516 _mesa_memcpy(dMachine
, machine
, sizeof(struct gl_program_machine
));
518 if (program
->Base
.Target
== GL_FRAGMENT_PROGRAM_NV
) {
519 /* XXX also need to do this when using valgrind */
520 /* Clear temporary registers (undefined for ARB_f_p) */
521 _mesa_bzero( (void*) machine
->Temporaries
,
522 MAX_PROGRAM_TEMPS
* 4 * sizeof(GLfloat
));
525 /* Add derivatives */
526 if (program
->Base
.InputsRead
& FRAG_BIT_WPOS
) {
527 GLfloat
*wpos
= machine
->Attribs
[FRAG_ATTRIB_WPOS
][machine
->CurElement
];
531 wpos
[2] += span
->attrStepX
[FRAG_ATTRIB_WPOS
][2];
532 wpos
[3] += span
->attrStepX
[FRAG_ATTRIB_WPOS
][3];
537 wpos
[2] += span
->attrStepY
[FRAG_ATTRIB_WPOS
][2];
538 wpos
[3] += span
->attrStepY
[FRAG_ATTRIB_WPOS
][3];
542 /* primary, secondary colors */
543 for (attr
= FRAG_ATTRIB_COL0
; attr
<= FRAG_ATTRIB_COL1
; attr
++) {
544 if (program
->Base
.InputsRead
& (1 << attr
)) {
545 GLfloat
*col
= machine
->Attribs
[attr
][machine
->CurElement
];
547 col
[0] += span
->attrStepX
[attr
][0] * (1.0F
/ CHAN_MAXF
);
548 col
[1] += span
->attrStepX
[attr
][1] * (1.0F
/ CHAN_MAXF
);
549 col
[2] += span
->attrStepX
[attr
][2] * (1.0F
/ CHAN_MAXF
);
550 col
[3] += span
->attrStepX
[attr
][3] * (1.0F
/ CHAN_MAXF
);
553 col
[0] += span
->attrStepY
[attr
][0] * (1.0F
/ CHAN_MAXF
);
554 col
[1] += span
->attrStepY
[attr
][1] * (1.0F
/ CHAN_MAXF
);
555 col
[2] += span
->attrStepY
[attr
][2] * (1.0F
/ CHAN_MAXF
);
556 col
[3] += span
->attrStepY
[attr
][3] * (1.0F
/ CHAN_MAXF
);
560 if (program
->Base
.InputsRead
& FRAG_BIT_FOGC
) {
561 GLfloat
*fogc
= machine
->Attribs
[FRAG_ATTRIB_FOGC
][machine
->CurElement
];
563 fogc
[0] += span
->attrStepX
[FRAG_ATTRIB_FOGC
][0];
566 fogc
[0] += span
->attrStepY
[FRAG_ATTRIB_FOGC
][0];
569 /* texcoord and varying vars */
570 for (attr
= FRAG_ATTRIB_TEX0
; attr
< FRAG_ATTRIB_MAX
; attr
++) {
571 if (program
->Base
.InputsRead
& (1 << attr
)) {
572 GLfloat
*val
= machine
->Attribs
[attr
][machine
->CurElement
];
573 /* XXX perspective-correct interpolation */
575 val
[0] += span
->attrStepX
[attr
][0];
576 val
[1] += span
->attrStepX
[attr
][1];
577 val
[2] += span
->attrStepX
[attr
][2];
578 val
[3] += span
->attrStepX
[attr
][3];
581 val
[0] += span
->attrStepY
[attr
][0];
582 val
[1] += span
->attrStepY
[attr
][1];
583 val
[2] += span
->attrStepY
[attr
][2];
584 val
[3] += span
->attrStepY
[attr
][3];
589 /* init condition codes */
590 dMachine
->CondCodes
[0] = COND_EQ
;
591 dMachine
->CondCodes
[1] = COND_EQ
;
592 dMachine
->CondCodes
[2] = COND_EQ
;
593 dMachine
->CondCodes
[3] = COND_EQ
;
599 * Execute the given vertex/fragment program.
601 * \param ctx - rendering context
602 * \param program - the fragment program to execute
603 * \param machine - machine state (register file)
604 * \param maxInst - max number of instructions to execute
605 * \return GL_TRUE if program completed or GL_FALSE if program executed KIL.
608 _mesa_execute_program(GLcontext
*ctx
,
609 const struct gl_program
*program
, GLuint maxInst
,
610 struct gl_program_machine
*machine
, GLuint element
)
612 const GLuint MAX_EXEC
= 10000;
615 machine
->CurProgram
= program
;
618 printf("execute program %u --------------------\n", program
->Id
);
621 #if FEATURE_MESA_program_debug
622 CurrentMachine
= machine
;
625 for (pc
= 0; pc
< maxInst
; pc
++) {
626 const struct prog_instruction
*inst
= program
->Instructions
+ pc
;
628 #if FEATURE_MESA_program_debug
629 if (ctx
->FragmentProgram
.CallbackEnabled
&&
630 ctx
->FragmentProgram
.Callback
) {
631 ctx
->FragmentProgram
.CurrentPosition
= inst
->StringPos
;
632 ctx
->FragmentProgram
.Callback(program
->Target
,
633 ctx
->FragmentProgram
.CallbackData
);
638 _mesa_print_instruction(inst
);
641 switch (inst
->Opcode
) {
644 GLfloat a
[4], result
[4];
645 fetch_vector4( ctx
, &inst
->SrcReg
[0], machine
, a
);
646 result
[0] = FABSF(a
[0]);
647 result
[1] = FABSF(a
[1]);
648 result
[2] = FABSF(a
[2]);
649 result
[3] = FABSF(a
[3]);
650 store_vector4( inst
, machine
, result
);
655 GLfloat a
[4], b
[4], result
[4];
656 fetch_vector4( ctx
, &inst
->SrcReg
[0], machine
, a
);
657 fetch_vector4( ctx
, &inst
->SrcReg
[1], machine
, b
);
658 result
[0] = a
[0] + b
[0];
659 result
[1] = a
[1] + b
[1];
660 result
[2] = a
[2] + b
[2];
661 result
[3] = a
[3] + b
[3];
662 store_vector4( inst
, machine
, result
);
664 printf("ADD (%g %g %g %g) = (%g %g %g %g) + (%g %g %g %g)\n",
665 result
[0], result
[1], result
[2], result
[3],
666 a
[0], a
[1], a
[2], a
[3],
667 b
[0], b
[1], b
[2], b
[3]);
675 /* subtract 1 here since pc is incremented by for(pc) loop */
676 pc
= inst
->BranchTarget
- 1; /* go to matching BNGLOOP */
678 case OPCODE_BGNSUB
: /* begin subroutine */
680 case OPCODE_ENDSUB
: /* end subroutine */
682 case OPCODE_BRA
: /* branch (conditional) */
684 case OPCODE_BRK
: /* break out of loop (conditional) */
686 case OPCODE_CONT
: /* continue loop (conditional) */
687 if (eval_condition(machine
, inst
)) {
689 /* Subtract 1 here since we'll do pc++ at end of for-loop */
690 pc
= inst
->BranchTarget
- 1;
693 case OPCODE_CAL
: /* Call subroutine (conditional) */
694 if (eval_condition(machine
, inst
)) {
695 /* call the subroutine */
696 if (machine
->StackDepth
>= MAX_PROGRAM_CALL_DEPTH
) {
697 return GL_TRUE
; /* Per GL_NV_vertex_program2 spec */
699 machine
->CallStack
[machine
->StackDepth
++] = pc
+ 1;
700 pc
= inst
->BranchTarget
; /* XXX - 1 ??? */
705 GLfloat a
[4], b
[4], c
[4], result
[4];
706 fetch_vector4( ctx
, &inst
->SrcReg
[0], machine
, a
);
707 fetch_vector4( ctx
, &inst
->SrcReg
[1], machine
, b
);
708 fetch_vector4( ctx
, &inst
->SrcReg
[2], machine
, c
);
709 result
[0] = a
[0] < 0.0F
? b
[0] : c
[0];
710 result
[1] = a
[1] < 0.0F
? b
[1] : c
[1];
711 result
[2] = a
[2] < 0.0F
? b
[2] : c
[2];
712 result
[3] = a
[3] < 0.0F
? b
[3] : c
[3];
713 store_vector4( inst
, machine
, result
);
718 GLfloat a
[4], result
[4];
719 fetch_vector1( ctx
, &inst
->SrcReg
[0], machine
, a
);
720 result
[0] = result
[1] = result
[2] = result
[3]
721 = (GLfloat
) _mesa_cos(a
[0]);
722 store_vector4( inst
, machine
, result
);
725 case OPCODE_DDX
: /* Partial derivative with respect to X */
728 GLfloat a
[4], aNext
[4], result
[4];
729 struct gl_program_machine dMachine
;
730 if (!fetch_vector4_deriv(ctx
, &inst
->SrcReg
[0], span
, 'X',
732 /* This is tricky. Make a copy of the current machine state,
733 * increment the input registers by the dx or dy partial
734 * derivatives, then re-execute the program up to the
735 * preceeding instruction, then fetch the source register.
736 * Finally, find the difference in the register values for
737 * the original and derivative runs.
739 fetch_vector4( ctx
, &inst
->SrcReg
[0], machine
, program
, a
);
740 init_machine_deriv(ctx
, machine
, program
, span
,
742 execute_program(ctx
, program
, pc
, &dMachine
, span
, column
);
743 fetch_vector4( ctx
, &inst
->SrcReg
[0], &dMachine
, program
, aNext
);
744 result
[0] = aNext
[0] - a
[0];
745 result
[1] = aNext
[1] - a
[1];
746 result
[2] = aNext
[2] - a
[2];
747 result
[3] = aNext
[3] - a
[3];
749 store_vector4( inst
, machine
, result
);
751 static const GLfloat result
[4] = { 0, 0, 0, 0 };
752 store_vector4( inst
, machine
, result
);
756 case OPCODE_DDY
: /* Partial derivative with respect to Y */
759 GLfloat a
[4], aNext
[4], result
[4];
760 struct gl_program_machine dMachine
;
761 if (!fetch_vector4_deriv(ctx
, &inst
->SrcReg
[0], span
, 'Y',
763 init_machine_deriv(ctx
, machine
, program
, span
,
765 fetch_vector4( ctx
, &inst
->SrcReg
[0], machine
, program
, a
);
766 execute_program(ctx
, program
, pc
, &dMachine
, span
, column
);
767 fetch_vector4( ctx
, &inst
->SrcReg
[0], &dMachine
, program
, aNext
);
768 result
[0] = aNext
[0] - a
[0];
769 result
[1] = aNext
[1] - a
[1];
770 result
[2] = aNext
[2] - a
[2];
771 result
[3] = aNext
[3] - a
[3];
773 store_vector4( inst
, machine
, result
);
775 static const GLfloat result
[4] = { 0, 0, 0, 0 };
776 store_vector4( inst
, machine
, result
);
782 GLfloat a
[4], b
[4], result
[4];
783 fetch_vector4( ctx
, &inst
->SrcReg
[0], machine
, a
);
784 fetch_vector4( ctx
, &inst
->SrcReg
[1], machine
, b
);
785 result
[0] = result
[1] = result
[2] = result
[3] = DOT3(a
, b
);
786 store_vector4( inst
, machine
, result
);
788 printf("DP3 %g = (%g %g %g) . (%g %g %g)\n",
789 result
[0], a
[0], a
[1], a
[2], b
[0], b
[1], b
[2]);
795 GLfloat a
[4], b
[4], result
[4];
796 fetch_vector4( ctx
, &inst
->SrcReg
[0], machine
, a
);
797 fetch_vector4( ctx
, &inst
->SrcReg
[1], machine
, b
);
798 result
[0] = result
[1] = result
[2] = result
[3] = DOT4(a
,b
);
799 store_vector4( inst
, machine
, result
);
801 printf("DP4 %g = (%g, %g %g %g) . (%g, %g %g %g)\n",
802 result
[0], a
[0], a
[1], a
[2], a
[3],
803 b
[0], b
[1], b
[2], b
[3]);
809 GLfloat a
[4], b
[4], result
[4];
810 fetch_vector4( ctx
, &inst
->SrcReg
[0], machine
, a
);
811 fetch_vector4( ctx
, &inst
->SrcReg
[1], machine
, b
);
812 result
[0] = result
[1] = result
[2] = result
[3] =
813 a
[0] * b
[0] + a
[1] * b
[1] + a
[2] * b
[2] + b
[3];
814 store_vector4( inst
, machine
, result
);
817 case OPCODE_DST
: /* Distance vector */
819 GLfloat a
[4], b
[4], result
[4];
820 fetch_vector4( ctx
, &inst
->SrcReg
[0], machine
, a
);
821 fetch_vector4( ctx
, &inst
->SrcReg
[1], machine
, b
);
823 result
[1] = a
[1] * b
[1];
826 store_vector4( inst
, machine
, result
);
829 case OPCODE_EX2
: /* Exponential base 2 */
831 GLfloat a
[4], result
[4];
832 fetch_vector1( ctx
, &inst
->SrcReg
[0], machine
, a
);
833 result
[0] = result
[1] = result
[2] = result
[3] =
834 (GLfloat
) _mesa_pow(2.0, a
[0]);
835 store_vector4( inst
, machine
, result
);
840 GLfloat a
[4], result
[4];
841 fetch_vector4( ctx
, &inst
->SrcReg
[0], machine
, a
);
842 result
[0] = FLOORF(a
[0]);
843 result
[1] = FLOORF(a
[1]);
844 result
[2] = FLOORF(a
[2]);
845 result
[3] = FLOORF(a
[3]);
846 store_vector4( inst
, machine
, result
);
851 GLfloat a
[4], result
[4];
852 fetch_vector4( ctx
, &inst
->SrcReg
[0], machine
, a
);
853 result
[0] = a
[0] - FLOORF(a
[0]);
854 result
[1] = a
[1] - FLOORF(a
[1]);
855 result
[2] = a
[2] - FLOORF(a
[2]);
856 result
[3] = a
[3] - FLOORF(a
[3]);
857 store_vector4( inst
, machine
, result
);
861 if (eval_condition(machine
, inst
)) {
862 /* do if-clause (just continue execution) */
865 /* go to the instruction after ELSE or ENDIF */
866 assert(inst
->BranchTarget
>= 0);
867 pc
= inst
->BranchTarget
- 1;
872 assert(inst
->BranchTarget
>= 0);
873 pc
= inst
->BranchTarget
- 1;
878 case OPCODE_INT
: /* float to int */
880 GLfloat a
[4], result
[4];
881 fetch_vector4( ctx
, &inst
->SrcReg
[0], machine
, a
);
882 result
[0] = (GLfloat
) (GLint
) a
[0];
883 result
[1] = (GLfloat
) (GLint
) a
[1];
884 result
[2] = (GLfloat
) (GLint
) a
[2];
885 result
[3] = (GLfloat
) (GLint
) a
[3];
886 store_vector4( inst
, machine
, result
);
889 case OPCODE_KIL_NV
: /* NV_f_p only (conditional) */
890 if (eval_condition(machine
, inst
)) {
894 case OPCODE_KIL
: /* ARB_f_p only */
897 fetch_vector4( ctx
, &inst
->SrcReg
[0], machine
, a
);
898 if (a
[0] < 0.0F
|| a
[1] < 0.0F
|| a
[2] < 0.0F
|| a
[3] < 0.0F
) {
903 case OPCODE_LG2
: /* log base 2 */
905 GLfloat a
[4], result
[4];
906 fetch_vector1( ctx
, &inst
->SrcReg
[0], machine
, a
);
907 result
[0] = result
[1] = result
[2] = result
[3] = LOG2(a
[0]);
908 store_vector4( inst
, machine
, result
);
913 const GLfloat epsilon
= 1.0F
/ 256.0F
; /* from NV VP spec */
914 GLfloat a
[4], result
[4];
915 fetch_vector4( ctx
, &inst
->SrcReg
[0], machine
, a
);
916 a
[0] = MAX2(a
[0], 0.0F
);
917 a
[1] = MAX2(a
[1], 0.0F
);
918 /* XXX ARB version clamps a[3], NV version doesn't */
919 a
[3] = CLAMP(a
[3], -(128.0F
- epsilon
), (128.0F
- epsilon
));
922 /* XXX we could probably just use pow() here */
924 if (a
[1] == 0.0 && a
[3] == 0.0)
927 result
[2] = EXPF(a
[3] * LOGF(a
[1]));
933 store_vector4( inst
, machine
, result
);
935 printf("LIT (%g %g %g %g) : (%g %g %g %g)\n",
936 result
[0], result
[1], result
[2], result
[3],
937 a
[0], a
[1], a
[2], a
[3]);
943 GLfloat a
[4], b
[4], c
[4], result
[4];
944 fetch_vector4( ctx
, &inst
->SrcReg
[0], machine
, a
);
945 fetch_vector4( ctx
, &inst
->SrcReg
[1], machine
, b
);
946 fetch_vector4( ctx
, &inst
->SrcReg
[2], machine
, c
);
947 result
[0] = a
[0] * b
[0] + (1.0F
- a
[0]) * c
[0];
948 result
[1] = a
[1] * b
[1] + (1.0F
- a
[1]) * c
[1];
949 result
[2] = a
[2] * b
[2] + (1.0F
- a
[2]) * c
[2];
950 result
[3] = a
[3] * b
[3] + (1.0F
- a
[3]) * c
[3];
951 store_vector4( inst
, machine
, result
);
953 printf("LRP (%g %g %g %g) = (%g %g %g %g), "
954 "(%g %g %g %g), (%g %g %g %g)\n",
955 result
[0], result
[1], result
[2], result
[3],
956 a
[0], a
[1], a
[2], a
[3],
957 b
[0], b
[1], b
[2], b
[3],
958 c
[0], c
[1], c
[2], c
[3]);
964 GLfloat a
[4], b
[4], c
[4], result
[4];
965 fetch_vector4( ctx
, &inst
->SrcReg
[0], machine
, a
);
966 fetch_vector4( ctx
, &inst
->SrcReg
[1], machine
, b
);
967 fetch_vector4( ctx
, &inst
->SrcReg
[2], machine
, c
);
968 result
[0] = a
[0] * b
[0] + c
[0];
969 result
[1] = a
[1] * b
[1] + c
[1];
970 result
[2] = a
[2] * b
[2] + c
[2];
971 result
[3] = a
[3] * b
[3] + c
[3];
972 store_vector4( inst
, machine
, result
);
974 printf("MAD (%g %g %g %g) = (%g %g %g %g) * "
975 "(%g %g %g %g) + (%g %g %g %g)\n",
976 result
[0], result
[1], result
[2], result
[3],
977 a
[0], a
[1], a
[2], a
[3],
978 b
[0], b
[1], b
[2], b
[3],
979 c
[0], c
[1], c
[2], c
[3]);
985 GLfloat a
[4], b
[4], result
[4];
986 fetch_vector4( ctx
, &inst
->SrcReg
[0], machine
, a
);
987 fetch_vector4( ctx
, &inst
->SrcReg
[1], machine
, b
);
988 result
[0] = MAX2(a
[0], b
[0]);
989 result
[1] = MAX2(a
[1], b
[1]);
990 result
[2] = MAX2(a
[2], b
[2]);
991 result
[3] = MAX2(a
[3], b
[3]);
992 store_vector4( inst
, machine
, result
);
994 printf("MAX (%g %g %g %g) = (%g %g %g %g), (%g %g %g %g)\n",
995 result
[0], result
[1], result
[2], result
[3],
996 a
[0], a
[1], a
[2], a
[3],
997 b
[0], b
[1], b
[2], b
[3]);
1003 GLfloat a
[4], b
[4], result
[4];
1004 fetch_vector4( ctx
, &inst
->SrcReg
[0], machine
, a
);
1005 fetch_vector4( ctx
, &inst
->SrcReg
[1], machine
, b
);
1006 result
[0] = MIN2(a
[0], b
[0]);
1007 result
[1] = MIN2(a
[1], b
[1]);
1008 result
[2] = MIN2(a
[2], b
[2]);
1009 result
[3] = MIN2(a
[3], b
[3]);
1010 store_vector4( inst
, machine
, result
);
1016 fetch_vector4( ctx
, &inst
->SrcReg
[0], machine
, result
);
1017 store_vector4( inst
, machine
, result
);
1019 printf("MOV (%g %g %g %g)\n",
1020 result
[0], result
[1], result
[2], result
[3]);
1026 GLfloat a
[4], b
[4], result
[4];
1027 fetch_vector4( ctx
, &inst
->SrcReg
[0], machine
, a
);
1028 fetch_vector4( ctx
, &inst
->SrcReg
[1], machine
, b
);
1029 result
[0] = a
[0] * b
[0];
1030 result
[1] = a
[1] * b
[1];
1031 result
[2] = a
[2] * b
[2];
1032 result
[3] = a
[3] * b
[3];
1033 store_vector4( inst
, machine
, result
);
1035 printf("MUL (%g %g %g %g) = (%g %g %g %g) * (%g %g %g %g)\n",
1036 result
[0], result
[1], result
[2], result
[3],
1037 a
[0], a
[1], a
[2], a
[3],
1038 b
[0], b
[1], b
[2], b
[3]);
1044 GLfloat a
[4], result
[4];
1045 fetch_vector1( ctx
, &inst
->SrcReg
[0], machine
, a
);
1049 result
[3] = _slang_library_noise1(a
[0]);
1050 store_vector4( inst
, machine
, result
);
1055 GLfloat a
[4], result
[4];
1056 fetch_vector4( ctx
, &inst
->SrcReg
[0], machine
, a
);
1060 result
[3] = _slang_library_noise2(a
[0], a
[1]);
1061 store_vector4( inst
, machine
, result
);
1066 GLfloat a
[4], result
[4];
1067 fetch_vector4( ctx
, &inst
->SrcReg
[0], machine
, a
);
1071 result
[3] = _slang_library_noise3(a
[0], a
[1], a
[2]);
1072 store_vector4( inst
, machine
, result
);
1077 GLfloat a
[4], result
[4];
1078 fetch_vector4( ctx
, &inst
->SrcReg
[0], machine
, a
);
1082 result
[3] = _slang_library_noise4(a
[0], a
[1], a
[2], a
[3]);
1083 store_vector4( inst
, machine
, result
);
1088 case OPCODE_PK2H
: /* pack two 16-bit floats in one 32-bit float */
1090 GLfloat a
[4], result
[4];
1092 GLuint
*rawResult
= (GLuint
*) result
;
1094 fetch_vector4( ctx
, &inst
->SrcReg
[0], machine
, a
);
1095 hx
= _mesa_float_to_half(a
[0]);
1096 hy
= _mesa_float_to_half(a
[1]);
1097 twoHalves
= hx
| (hy
<< 16);
1098 rawResult
[0] = rawResult
[1] = rawResult
[2] = rawResult
[3]
1100 store_vector4( inst
, machine
, result
);
1103 case OPCODE_PK2US
: /* pack two GLushorts into one 32-bit float */
1105 GLfloat a
[4], result
[4];
1106 GLuint usx
, usy
, *rawResult
= (GLuint
*) result
;
1107 fetch_vector4( ctx
, &inst
->SrcReg
[0], machine
, a
);
1108 a
[0] = CLAMP(a
[0], 0.0F
, 1.0F
);
1109 a
[1] = CLAMP(a
[1], 0.0F
, 1.0F
);
1110 usx
= IROUND(a
[0] * 65535.0F
);
1111 usy
= IROUND(a
[1] * 65535.0F
);
1112 rawResult
[0] = rawResult
[1] = rawResult
[2] = rawResult
[3]
1113 = usx
| (usy
<< 16);
1114 store_vector4( inst
, machine
, result
);
1117 case OPCODE_PK4B
: /* pack four GLbytes into one 32-bit float */
1119 GLfloat a
[4], result
[4];
1120 GLuint ubx
, uby
, ubz
, ubw
, *rawResult
= (GLuint
*) result
;
1121 fetch_vector4( ctx
, &inst
->SrcReg
[0], machine
, a
);
1122 a
[0] = CLAMP(a
[0], -128.0F
/ 127.0F
, 1.0F
);
1123 a
[1] = CLAMP(a
[1], -128.0F
/ 127.0F
, 1.0F
);
1124 a
[2] = CLAMP(a
[2], -128.0F
/ 127.0F
, 1.0F
);
1125 a
[3] = CLAMP(a
[3], -128.0F
/ 127.0F
, 1.0F
);
1126 ubx
= IROUND(127.0F
* a
[0] + 128.0F
);
1127 uby
= IROUND(127.0F
* a
[1] + 128.0F
);
1128 ubz
= IROUND(127.0F
* a
[2] + 128.0F
);
1129 ubw
= IROUND(127.0F
* a
[3] + 128.0F
);
1130 rawResult
[0] = rawResult
[1] = rawResult
[2] = rawResult
[3]
1131 = ubx
| (uby
<< 8) | (ubz
<< 16) | (ubw
<< 24);
1132 store_vector4( inst
, machine
, result
);
1135 case OPCODE_PK4UB
: /* pack four GLubytes into one 32-bit float */
1137 GLfloat a
[4], result
[4];
1138 GLuint ubx
, uby
, ubz
, ubw
, *rawResult
= (GLuint
*) result
;
1139 fetch_vector4( ctx
, &inst
->SrcReg
[0], machine
, a
);
1140 a
[0] = CLAMP(a
[0], 0.0F
, 1.0F
);
1141 a
[1] = CLAMP(a
[1], 0.0F
, 1.0F
);
1142 a
[2] = CLAMP(a
[2], 0.0F
, 1.0F
);
1143 a
[3] = CLAMP(a
[3], 0.0F
, 1.0F
);
1144 ubx
= IROUND(255.0F
* a
[0]);
1145 uby
= IROUND(255.0F
* a
[1]);
1146 ubz
= IROUND(255.0F
* a
[2]);
1147 ubw
= IROUND(255.0F
* a
[3]);
1148 rawResult
[0] = rawResult
[1] = rawResult
[2] = rawResult
[3]
1149 = ubx
| (uby
<< 8) | (ubz
<< 16) | (ubw
<< 24);
1150 store_vector4( inst
, machine
, result
);
1155 GLfloat a
[4], b
[4], result
[4];
1156 fetch_vector1( ctx
, &inst
->SrcReg
[0], machine
, a
);
1157 fetch_vector1( ctx
, &inst
->SrcReg
[1], machine
, b
);
1158 result
[0] = result
[1] = result
[2] = result
[3]
1159 = (GLfloat
)_mesa_pow(a
[0], b
[0]);
1160 store_vector4( inst
, machine
, result
);
1165 GLfloat a
[4], result
[4];
1166 fetch_vector1( ctx
, &inst
->SrcReg
[0], machine
, a
);
1170 else if (IS_INF_OR_NAN(a
[0]))
1171 printf("RCP(inf)\n");
1173 result
[0] = result
[1] = result
[2] = result
[3] = 1.0F
/ a
[0];
1174 store_vector4( inst
, machine
, result
);
1177 case OPCODE_RET
: /* return from subroutine (conditional) */
1178 if (eval_condition(machine
, inst
)) {
1179 if (machine
->StackDepth
== 0) {
1180 return GL_TRUE
; /* Per GL_NV_vertex_program2 spec */
1182 pc
= machine
->CallStack
[--machine
->StackDepth
];
1185 case OPCODE_RFL
: /* reflection vector */
1187 GLfloat axis
[4], dir
[4], result
[4], tmpX
, tmpW
;
1188 fetch_vector4( ctx
, &inst
->SrcReg
[0], machine
, axis
);
1189 fetch_vector4( ctx
, &inst
->SrcReg
[1], machine
, dir
);
1190 tmpW
= DOT3(axis
, axis
);
1191 tmpX
= (2.0F
* DOT3(axis
, dir
)) / tmpW
;
1192 result
[0] = tmpX
* axis
[0] - dir
[0];
1193 result
[1] = tmpX
* axis
[1] - dir
[1];
1194 result
[2] = tmpX
* axis
[2] - dir
[2];
1195 /* result[3] is never written! XXX enforce in parser! */
1196 store_vector4( inst
, machine
, result
);
1199 case OPCODE_RSQ
: /* 1 / sqrt() */
1201 GLfloat a
[4], result
[4];
1202 fetch_vector1( ctx
, &inst
->SrcReg
[0], machine
, a
);
1204 result
[0] = result
[1] = result
[2] = result
[3] = INV_SQRTF(a
[0]);
1205 store_vector4( inst
, machine
, result
);
1207 printf("RSQ %g = 1/sqrt(|%g|)\n", result
[0], a
[0]);
1211 case OPCODE_SCS
: /* sine and cos */
1213 GLfloat a
[4], result
[4];
1214 fetch_vector1( ctx
, &inst
->SrcReg
[0], machine
, a
);
1215 result
[0] = (GLfloat
) _mesa_cos(a
[0]);
1216 result
[1] = (GLfloat
) _mesa_sin(a
[0]);
1217 result
[2] = 0.0; /* undefined! */
1218 result
[3] = 0.0; /* undefined! */
1219 store_vector4( inst
, machine
, result
);
1222 case OPCODE_SEQ
: /* set on equal */
1224 GLfloat a
[4], b
[4], result
[4];
1225 fetch_vector4( ctx
, &inst
->SrcReg
[0], machine
, a
);
1226 fetch_vector4( ctx
, &inst
->SrcReg
[1], machine
, b
);
1227 result
[0] = (a
[0] == b
[0]) ? 1.0F
: 0.0F
;
1228 result
[1] = (a
[1] == b
[1]) ? 1.0F
: 0.0F
;
1229 result
[2] = (a
[2] == b
[2]) ? 1.0F
: 0.0F
;
1230 result
[3] = (a
[3] == b
[3]) ? 1.0F
: 0.0F
;
1231 store_vector4( inst
, machine
, result
);
1234 case OPCODE_SFL
: /* set false, operands ignored */
1236 static const GLfloat result
[4] = { 0.0F
, 0.0F
, 0.0F
, 0.0F
};
1237 store_vector4( inst
, machine
, result
);
1240 case OPCODE_SGE
: /* set on greater or equal */
1242 GLfloat a
[4], b
[4], result
[4];
1243 fetch_vector4( ctx
, &inst
->SrcReg
[0], machine
, a
);
1244 fetch_vector4( ctx
, &inst
->SrcReg
[1], machine
, b
);
1245 result
[0] = (a
[0] >= b
[0]) ? 1.0F
: 0.0F
;
1246 result
[1] = (a
[1] >= b
[1]) ? 1.0F
: 0.0F
;
1247 result
[2] = (a
[2] >= b
[2]) ? 1.0F
: 0.0F
;
1248 result
[3] = (a
[3] >= b
[3]) ? 1.0F
: 0.0F
;
1249 store_vector4( inst
, machine
, result
);
1252 case OPCODE_SGT
: /* set on greater */
1254 GLfloat a
[4], b
[4], result
[4];
1255 fetch_vector4( ctx
, &inst
->SrcReg
[0], machine
, a
);
1256 fetch_vector4( ctx
, &inst
->SrcReg
[1], machine
, b
);
1257 result
[0] = (a
[0] > b
[0]) ? 1.0F
: 0.0F
;
1258 result
[1] = (a
[1] > b
[1]) ? 1.0F
: 0.0F
;
1259 result
[2] = (a
[2] > b
[2]) ? 1.0F
: 0.0F
;
1260 result
[3] = (a
[3] > b
[3]) ? 1.0F
: 0.0F
;
1261 store_vector4( inst
, machine
, result
);
1263 printf("SGT %g %g %g %g\n",
1264 result
[0], result
[1], result
[2], result
[3]);
1270 GLfloat a
[4], result
[4];
1271 fetch_vector1( ctx
, &inst
->SrcReg
[0], machine
, a
);
1272 result
[0] = result
[1] = result
[2] = result
[3]
1273 = (GLfloat
) _mesa_sin(a
[0]);
1274 store_vector4( inst
, machine
, result
);
1277 case OPCODE_SLE
: /* set on less or equal */
1279 GLfloat a
[4], b
[4], result
[4];
1280 fetch_vector4( ctx
, &inst
->SrcReg
[0], machine
, a
);
1281 fetch_vector4( ctx
, &inst
->SrcReg
[1], machine
, b
);
1282 result
[0] = (a
[0] <= b
[0]) ? 1.0F
: 0.0F
;
1283 result
[1] = (a
[1] <= b
[1]) ? 1.0F
: 0.0F
;
1284 result
[2] = (a
[2] <= b
[2]) ? 1.0F
: 0.0F
;
1285 result
[3] = (a
[3] <= b
[3]) ? 1.0F
: 0.0F
;
1286 store_vector4( inst
, machine
, result
);
1289 case OPCODE_SLT
: /* set on less */
1291 GLfloat a
[4], b
[4], result
[4];
1292 fetch_vector4( ctx
, &inst
->SrcReg
[0], machine
, a
);
1293 fetch_vector4( ctx
, &inst
->SrcReg
[1], machine
, b
);
1294 result
[0] = (a
[0] < b
[0]) ? 1.0F
: 0.0F
;
1295 result
[1] = (a
[1] < b
[1]) ? 1.0F
: 0.0F
;
1296 result
[2] = (a
[2] < b
[2]) ? 1.0F
: 0.0F
;
1297 result
[3] = (a
[3] < b
[3]) ? 1.0F
: 0.0F
;
1298 store_vector4( inst
, machine
, result
);
1301 case OPCODE_SNE
: /* set on not equal */
1303 GLfloat a
[4], b
[4], result
[4];
1304 fetch_vector4( ctx
, &inst
->SrcReg
[0], machine
, a
);
1305 fetch_vector4( ctx
, &inst
->SrcReg
[1], machine
, b
);
1306 result
[0] = (a
[0] != b
[0]) ? 1.0F
: 0.0F
;
1307 result
[1] = (a
[1] != b
[1]) ? 1.0F
: 0.0F
;
1308 result
[2] = (a
[2] != b
[2]) ? 1.0F
: 0.0F
;
1309 result
[3] = (a
[3] != b
[3]) ? 1.0F
: 0.0F
;
1310 store_vector4( inst
, machine
, result
);
1313 case OPCODE_STR
: /* set true, operands ignored */
1315 static const GLfloat result
[4] = { 1.0F
, 1.0F
, 1.0F
, 1.0F
};
1316 store_vector4( inst
, machine
, result
);
1321 GLfloat a
[4], b
[4], result
[4];
1322 fetch_vector4( ctx
, &inst
->SrcReg
[0], machine
, a
);
1323 fetch_vector4( ctx
, &inst
->SrcReg
[1], machine
, b
);
1324 result
[0] = a
[0] - b
[0];
1325 result
[1] = a
[1] - b
[1];
1326 result
[2] = a
[2] - b
[2];
1327 result
[3] = a
[3] - b
[3];
1328 store_vector4( inst
, machine
, result
);
1330 printf("SUB (%g %g %g %g) = (%g %g %g %g) - (%g %g %g %g)\n",
1331 result
[0], result
[1], result
[2], result
[3],
1332 a
[0], a
[1], a
[2], a
[3], b
[0], b
[1], b
[2], b
[3]);
1336 case OPCODE_SWZ
: /* extended swizzle */
1338 const struct prog_src_register
*source
= &inst
->SrcReg
[0];
1339 const GLfloat
*src
= get_register_pointer(ctx
, source
, machine
);
1342 for (i
= 0; i
< 4; i
++) {
1343 const GLuint swz
= GET_SWZ(source
->Swizzle
, i
);
1344 if (swz
== SWIZZLE_ZERO
)
1346 else if (swz
== SWIZZLE_ONE
)
1351 result
[i
] = src
[swz
];
1353 if (source
->NegateBase
& (1 << i
))
1354 result
[i
] = -result
[i
];
1356 store_vector4( inst
, machine
, result
);
1359 case OPCODE_TEX
: /* Both ARB and NV frag prog */
1362 /* Note: only use the precomputed lambda value when we're
1363 * sampling texture unit [K] with texcoord[K].
1364 * Otherwise, the lambda value may have no relation to the
1365 * instruction's texcoord or texture image. Using the wrong
1366 * lambda is usually bad news.
1367 * The rest of the time, just use zero (until we get a more
1368 * sophisticated way of computing lambda).
1370 GLfloat coord
[4], color
[4], lambda
;
1372 if (inst
->SrcReg
[0].File
== PROGRAM_INPUT
&&
1373 inst
->SrcReg
[0].Index
== FRAG_ATTRIB_TEX0
+inst
->TexSrcUnit
)
1374 lambda
= span
->array
->lambda
[inst
->TexSrcUnit
][column
];
1378 fetch_vector4(ctx
, &inst
->SrcReg
[0], machine
, coord
);
1379 machine
->FetchTexelLod(ctx
, coord
, lambda
, inst
->TexSrcUnit
, color
);
1381 printf("TEX (%g, %g, %g, %g) = texture[%d][%g, %g, %g, %g], "
1383 color
[0], color
[1], color
[2], color
[3],
1385 coord
[0], coord
[1], coord
[2], coord
[3], lambda
);
1387 store_vector4( inst
, machine
, color
);
1390 case OPCODE_TXB
: /* GL_ARB_fragment_program only */
1391 /* Texel lookup with LOD bias */
1393 const struct gl_texture_unit
*texUnit
1394 = &ctx
->Texture
.Unit
[inst
->TexSrcUnit
];
1395 GLfloat coord
[4], color
[4], lambda
, bias
;
1397 if (inst
->SrcReg
[0].File
== PROGRAM_INPUT
&&
1398 inst
->SrcReg
[0].Index
== FRAG_ATTRIB_TEX0
+inst
->TexSrcUnit
)
1399 lambda
= span
->array
->lambda
[inst
->TexSrcUnit
][column
];
1403 fetch_vector4(ctx
, &inst
->SrcReg
[0], machine
, coord
);
1404 /* coord[3] is the bias to add to lambda */
1405 bias
= texUnit
->LodBias
+ coord
[3];
1406 if (texUnit
->_Current
)
1407 bias
+= texUnit
->_Current
->LodBias
;
1408 machine
->FetchTexelLod(ctx
, coord
, lambda
+ bias
,
1409 inst
->TexSrcUnit
, color
);
1410 store_vector4( inst
, machine
, color
);
1413 case OPCODE_TXD
: /* GL_NV_fragment_program only */
1414 /* Texture lookup w/ partial derivatives for LOD */
1416 GLfloat texcoord
[4], dtdx
[4], dtdy
[4], color
[4];
1417 fetch_vector4( ctx
, &inst
->SrcReg
[0], machine
, texcoord
);
1418 fetch_vector4( ctx
, &inst
->SrcReg
[1], machine
, dtdx
);
1419 fetch_vector4( ctx
, &inst
->SrcReg
[2], machine
, dtdy
);
1420 machine
->FetchTexelDeriv(ctx
, texcoord
, dtdx
, dtdy
,
1421 inst
->TexSrcUnit
, color
);
1422 store_vector4( inst
, machine
, color
);
1425 case OPCODE_TXP
: /* GL_ARB_fragment_program only */
1426 /* Texture lookup w/ projective divide */
1428 GLfloat texcoord
[4], color
[4], lambda
;
1430 if (inst
->SrcReg
[0].File
== PROGRAM_INPUT
&&
1431 inst
->SrcReg
[0].Index
== FRAG_ATTRIB_TEX0
+inst
->TexSrcUnit
)
1432 lambda
= span
->array
->lambda
[inst
->TexSrcUnit
][column
];
1436 fetch_vector4(ctx
, &inst
->SrcReg
[0], machine
, texcoord
);
1437 /* Not so sure about this test - if texcoord[3] is
1438 * zero, we'd probably be fine except for an ASSERT in
1439 * IROUND_POS() which gets triggered by the inf values created.
1441 if (texcoord
[3] != 0.0) {
1442 texcoord
[0] /= texcoord
[3];
1443 texcoord
[1] /= texcoord
[3];
1444 texcoord
[2] /= texcoord
[3];
1446 machine
->FetchTexelLod(ctx
, texcoord
, lambda
,
1447 inst
->TexSrcUnit
, color
);
1448 store_vector4( inst
, machine
, color
);
1451 case OPCODE_TXP_NV
: /* GL_NV_fragment_program only */
1452 /* Texture lookup w/ projective divide */
1454 GLfloat texcoord
[4], color
[4], lambda
;
1456 if (inst
->SrcReg
[0].File
== PROGRAM_INPUT
&&
1457 inst
->SrcReg
[0].Index
== FRAG_ATTRIB_TEX0
+inst
->TexSrcUnit
)
1458 lambda
= span
->array
->lambda
[inst
->TexSrcUnit
][column
];
1462 fetch_vector4(ctx
, &inst
->SrcReg
[0], machine
, texcoord
);
1463 if (inst
->TexSrcTarget
!= TEXTURE_CUBE_INDEX
&&
1464 texcoord
[3] != 0.0) {
1465 texcoord
[0] /= texcoord
[3];
1466 texcoord
[1] /= texcoord
[3];
1467 texcoord
[2] /= texcoord
[3];
1469 machine
->FetchTexelLod(ctx
, texcoord
, lambda
,
1470 inst
->TexSrcUnit
, color
);
1471 store_vector4( inst
, machine
, color
);
1474 case OPCODE_UP2H
: /* unpack two 16-bit floats */
1476 GLfloat a
[4], result
[4];
1477 const GLuint
*rawBits
= (const GLuint
*) a
;
1479 fetch_vector1( ctx
, &inst
->SrcReg
[0], machine
, a
);
1480 hx
= rawBits
[0] & 0xffff;
1481 hy
= rawBits
[0] >> 16;
1482 result
[0] = result
[2] = _mesa_half_to_float(hx
);
1483 result
[1] = result
[3] = _mesa_half_to_float(hy
);
1484 store_vector4( inst
, machine
, result
);
1487 case OPCODE_UP2US
: /* unpack two GLushorts */
1489 GLfloat a
[4], result
[4];
1490 const GLuint
*rawBits
= (const GLuint
*) a
;
1492 fetch_vector1( ctx
, &inst
->SrcReg
[0], machine
, a
);
1493 usx
= rawBits
[0] & 0xffff;
1494 usy
= rawBits
[0] >> 16;
1495 result
[0] = result
[2] = usx
* (1.0f
/ 65535.0f
);
1496 result
[1] = result
[3] = usy
* (1.0f
/ 65535.0f
);
1497 store_vector4( inst
, machine
, result
);
1500 case OPCODE_UP4B
: /* unpack four GLbytes */
1502 GLfloat a
[4], result
[4];
1503 const GLuint
*rawBits
= (const GLuint
*) a
;
1504 fetch_vector1( ctx
, &inst
->SrcReg
[0], machine
, a
);
1505 result
[0] = (((rawBits
[0] >> 0) & 0xff) - 128) / 127.0F
;
1506 result
[1] = (((rawBits
[0] >> 8) & 0xff) - 128) / 127.0F
;
1507 result
[2] = (((rawBits
[0] >> 16) & 0xff) - 128) / 127.0F
;
1508 result
[3] = (((rawBits
[0] >> 24) & 0xff) - 128) / 127.0F
;
1509 store_vector4( inst
, machine
, result
);
1512 case OPCODE_UP4UB
: /* unpack four GLubytes */
1514 GLfloat a
[4], result
[4];
1515 const GLuint
*rawBits
= (const GLuint
*) a
;
1516 fetch_vector1( ctx
, &inst
->SrcReg
[0], machine
, a
);
1517 result
[0] = ((rawBits
[0] >> 0) & 0xff) / 255.0F
;
1518 result
[1] = ((rawBits
[0] >> 8) & 0xff) / 255.0F
;
1519 result
[2] = ((rawBits
[0] >> 16) & 0xff) / 255.0F
;
1520 result
[3] = ((rawBits
[0] >> 24) & 0xff) / 255.0F
;
1521 store_vector4( inst
, machine
, result
);
1524 case OPCODE_XPD
: /* cross product */
1526 GLfloat a
[4], b
[4], result
[4];
1527 fetch_vector4( ctx
, &inst
->SrcReg
[0], machine
, a
);
1528 fetch_vector4( ctx
, &inst
->SrcReg
[1], machine
, b
);
1529 result
[0] = a
[1] * b
[2] - a
[2] * b
[1];
1530 result
[1] = a
[2] * b
[0] - a
[0] * b
[2];
1531 result
[2] = a
[0] * b
[1] - a
[1] * b
[0];
1533 store_vector4( inst
, machine
, result
);
1536 case OPCODE_X2D
: /* 2-D matrix transform */
1538 GLfloat a
[4], b
[4], c
[4], result
[4];
1539 fetch_vector4( ctx
, &inst
->SrcReg
[0], machine
, a
);
1540 fetch_vector4( ctx
, &inst
->SrcReg
[1], machine
, b
);
1541 fetch_vector4( ctx
, &inst
->SrcReg
[2], machine
, c
);
1542 result
[0] = a
[0] + b
[0] * c
[0] + b
[1] * c
[1];
1543 result
[1] = a
[1] + b
[0] * c
[2] + b
[1] * c
[3];
1544 result
[2] = a
[2] + b
[0] * c
[0] + b
[1] * c
[1];
1545 result
[3] = a
[3] + b
[0] * c
[2] + b
[1] * c
[3];
1546 store_vector4( inst
, machine
, result
);
1551 if (inst
->SrcReg
[0].File
!= -1) {
1553 fetch_vector4( ctx
, &inst
->SrcReg
[0], machine
, a
);
1554 _mesa_printf("%s%g, %g, %g, %g\n", (const char *) inst
->Data
,
1555 a
[0], a
[1], a
[2], a
[3]);
1558 _mesa_printf("%s\n", (const char *) inst
->Data
);
1565 _mesa_problem(ctx
, "Bad opcode %d in _mesa_exec_fragment_program",
1567 return GL_TRUE
; /* return value doesn't matter */
1571 if (total
> MAX_EXEC
) {
1572 _mesa_problem(ctx
, "Infinite loop detected in fragment program");
1578 #if FEATURE_MESA_program_debug
1579 CurrentMachine
= NULL
;