don't pass program ptr to fetch_vector[14]()
[mesa.git] / src / mesa / shader / prog_execute.c
1 /*
2 * Mesa 3-D graphics library
3 * Version: 6.5.3
4 *
5 * Copyright (C) 1999-2007 Brian Paul All Rights Reserved.
6 *
7 * Permission is hereby granted, free of charge, to any person obtaining a
8 * copy of this software and associated documentation files (the "Software"),
9 * to deal in the Software without restriction, including without limitation
10 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
11 * and/or sell copies of the Software, and to permit persons to whom the
12 * Software is furnished to do so, subject to the following conditions:
13 *
14 * The above copyright notice and this permission notice shall be included
15 * in all copies or substantial portions of the Software.
16 *
17 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
18 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
19 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
20 * BRIAN PAUL BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN
21 * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
22 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
23 */
24
25 /**
26 * \file prog_execute.c
27 * Software interpreter for vertex/fragment programs.
28 * \author Brian Paul
29 */
30
31 /*
32 * NOTE: we do everything in single-precision floating point; we don't
33 * currently observe the single/half/fixed-precision qualifiers.
34 *
35 */
36
37
38 #include "glheader.h"
39 #include "colormac.h"
40 #include "context.h"
41 #include "program.h"
42 #include "prog_execute.h"
43 #include "prog_instruction.h"
44 #include "prog_parameter.h"
45 #include "prog_print.h"
46 #include "slang_library_noise.h"
47
48
49 /* See comments below for info about this */
50 #define LAMBDA_ZERO 1
51
52 /* debug predicate */
53 #define DEBUG_PROG 0
54
55
56 #if FEATURE_MESA_program_debug
57 static struct gl_program_machine *CurrentMachine = NULL;
58
59 /**
60 * For GL_MESA_program_debug.
61 * Return current value (4*GLfloat) of a program register.
62 * Called via ctx->Driver.GetFragmentProgramRegister().
63 */
64 void
65 _mesa_get_program_register(GLcontext *ctx, enum register_file file,
66 GLuint index, GLfloat val[4])
67 {
68 if (CurrentMachine) {
69 switch (file) {
70 case PROGRAM_INPUT:
71 if (CurrentMachine->CurProgram->Target == GL_VERTEX_PROGRAM_ARB) {
72 COPY_4V(val, CurrentMachine->VertAttribs[index]);
73 }
74 else {
75 COPY_4V(val,
76 CurrentMachine->Attribs[index][CurrentMachine->CurElement]);
77 }
78 break;
79 case PROGRAM_OUTPUT:
80 COPY_4V(val, CurrentMachine->Outputs[index]);
81 break;
82 case PROGRAM_TEMPORARY:
83 COPY_4V(val, CurrentMachine->Temporaries[index]);
84 break;
85 default:
86 _mesa_problem(NULL,
87 "bad register file in _swrast_get_program_register");
88 }
89 }
90 }
91 #endif /* FEATURE_MESA_program_debug */
92
93
94
95 /**
96 * Return a pointer to the 4-element float vector specified by the given
97 * source register.
98 */
99 static INLINE const GLfloat *
100 get_register_pointer( GLcontext *ctx,
101 const struct prog_src_register *source,
102 const struct gl_program_machine *machine)
103 {
104 /* XXX relative addressing... */
105 switch (source->File) {
106 case PROGRAM_TEMPORARY:
107 ASSERT(source->Index < MAX_PROGRAM_TEMPS);
108 return machine->Temporaries[source->Index];
109
110 case PROGRAM_INPUT:
111 if (machine->CurProgram->Target == GL_VERTEX_PROGRAM_ARB) {
112 ASSERT(source->Index < VERT_ATTRIB_MAX);
113 return machine->VertAttribs[source->Index];
114 }
115 else {
116 ASSERT(source->Index < FRAG_ATTRIB_MAX);
117 return machine->Attribs[source->Index][machine->CurElement];
118 }
119
120 case PROGRAM_OUTPUT:
121 /* This is only for PRINT */
122 ASSERT(source->Index < FRAG_RESULT_MAX);
123 return machine->Outputs[source->Index];
124
125 case PROGRAM_LOCAL_PARAM:
126 ASSERT(source->Index < MAX_PROGRAM_LOCAL_PARAMS);
127 return machine->CurProgram->LocalParams[source->Index];
128
129 case PROGRAM_ENV_PARAM:
130 ASSERT(source->Index < MAX_PROGRAM_ENV_PARAMS);
131 if (machine->CurProgram->Target == GL_VERTEX_PROGRAM_ARB)
132 return ctx->VertexProgram.Parameters[source->Index];
133 else
134 return ctx->FragmentProgram.Parameters[source->Index];
135
136 case PROGRAM_STATE_VAR:
137 /* Fallthrough */
138 case PROGRAM_CONSTANT:
139 /* Fallthrough */
140 case PROGRAM_UNIFORM:
141 /* Fallthrough */
142 case PROGRAM_NAMED_PARAM:
143 ASSERT(source->Index <
144 (GLint) machine->CurProgram->Parameters->NumParameters);
145 return machine->CurProgram->Parameters->ParameterValues[source->Index];
146
147 default:
148 _mesa_problem(ctx,
149 "Invalid input register file %d in get_register_pointer()",
150 source->File);
151 return NULL;
152 }
153 }
154
155
156 /**
157 * Fetch a 4-element float vector from the given source register.
158 * Apply swizzling and negating as needed.
159 */
160 static void
161 fetch_vector4( GLcontext *ctx,
162 const struct prog_src_register *source,
163 const struct gl_program_machine *machine,
164 GLfloat result[4] )
165 {
166 const GLfloat *src = get_register_pointer(ctx, source, machine);
167 ASSERT(src);
168
169 if (source->Swizzle == SWIZZLE_NOOP) {
170 /* no swizzling */
171 COPY_4V(result, src);
172 }
173 else {
174 ASSERT(GET_SWZ(source->Swizzle, 0) <= 3);
175 ASSERT(GET_SWZ(source->Swizzle, 1) <= 3);
176 ASSERT(GET_SWZ(source->Swizzle, 2) <= 3);
177 ASSERT(GET_SWZ(source->Swizzle, 3) <= 3);
178 result[0] = src[GET_SWZ(source->Swizzle, 0)];
179 result[1] = src[GET_SWZ(source->Swizzle, 1)];
180 result[2] = src[GET_SWZ(source->Swizzle, 2)];
181 result[3] = src[GET_SWZ(source->Swizzle, 3)];
182 }
183
184 if (source->NegateBase) {
185 result[0] = -result[0];
186 result[1] = -result[1];
187 result[2] = -result[2];
188 result[3] = -result[3];
189 }
190 if (source->Abs) {
191 result[0] = FABSF(result[0]);
192 result[1] = FABSF(result[1]);
193 result[2] = FABSF(result[2]);
194 result[3] = FABSF(result[3]);
195 }
196 if (source->NegateAbs) {
197 result[0] = -result[0];
198 result[1] = -result[1];
199 result[2] = -result[2];
200 result[3] = -result[3];
201 }
202 }
203
204 #if 0
205 /**
206 * Fetch the derivative with respect to X for the given register.
207 * \return GL_TRUE if it was easily computed or GL_FALSE if we
208 * need to execute another instance of the program (ugh)!
209 */
210 static GLboolean
211 fetch_vector4_deriv( GLcontext *ctx,
212 const struct prog_src_register *source,
213 const SWspan *span,
214 char xOrY, GLint column, GLfloat result[4] )
215 {
216 GLfloat src[4];
217
218 ASSERT(xOrY == 'X' || xOrY == 'Y');
219
220 switch (source->Index) {
221 case FRAG_ATTRIB_WPOS:
222 if (xOrY == 'X') {
223 src[0] = 1.0;
224 src[1] = 0.0;
225 src[2] = span->attrStepX[FRAG_ATTRIB_WPOS][2]
226 / ctx->DrawBuffer->_DepthMaxF;
227 src[3] = span->attrStepX[FRAG_ATTRIB_WPOS][3];
228 }
229 else {
230 src[0] = 0.0;
231 src[1] = 1.0;
232 src[2] = span->attrStepY[FRAG_ATTRIB_WPOS][2]
233 / ctx->DrawBuffer->_DepthMaxF;
234 src[3] = span->attrStepY[FRAG_ATTRIB_WPOS][3];
235 }
236 break;
237 case FRAG_ATTRIB_COL0:
238 case FRAG_ATTRIB_COL1:
239 if (xOrY == 'X') {
240 src[0] = span->attrStepX[source->Index][0] * (1.0F / CHAN_MAXF);
241 src[1] = span->attrStepX[source->Index][1] * (1.0F / CHAN_MAXF);
242 src[2] = span->attrStepX[source->Index][2] * (1.0F / CHAN_MAXF);
243 src[3] = span->attrStepX[source->Index][3] * (1.0F / CHAN_MAXF);
244 }
245 else {
246 src[0] = span->attrStepY[source->Index][0] * (1.0F / CHAN_MAXF);
247 src[1] = span->attrStepY[source->Index][1] * (1.0F / CHAN_MAXF);
248 src[2] = span->attrStepY[source->Index][2] * (1.0F / CHAN_MAXF);
249 src[3] = span->attrStepY[source->Index][3] * (1.0F / CHAN_MAXF);
250 }
251 break;
252 case FRAG_ATTRIB_FOGC:
253 if (xOrY == 'X') {
254 src[0] = span->attrStepX[FRAG_ATTRIB_FOGC][0] * (1.0F / CHAN_MAXF);
255 src[1] = 0.0;
256 src[2] = 0.0;
257 src[3] = 0.0;
258 }
259 else {
260 src[0] = span->attrStepY[FRAG_ATTRIB_FOGC][0] * (1.0F / CHAN_MAXF);
261 src[1] = 0.0;
262 src[2] = 0.0;
263 src[3] = 0.0;
264 }
265 break;
266 default:
267 assert(source->Index < FRAG_ATTRIB_MAX);
268 /* texcoord or varying */
269 if (xOrY == 'X') {
270 /* this is a little tricky - I think I've got it right */
271 const GLfloat invQ = 1.0f / (span->attrStart[source->Index][3]
272 + span->attrStepX[source->Index][3] * column);
273 src[0] = span->attrStepX[source->Index][0] * invQ;
274 src[1] = span->attrStepX[source->Index][1] * invQ;
275 src[2] = span->attrStepX[source->Index][2] * invQ;
276 src[3] = span->attrStepX[source->Index][3] * invQ;
277 }
278 else {
279 /* Tricky, as above, but in Y direction */
280 const GLfloat invQ = 1.0f / (span->attrStart[source->Index][3]
281 + span->attrStepY[source->Index][3]);
282 src[0] = span->attrStepY[source->Index][0] * invQ;
283 src[1] = span->attrStepY[source->Index][1] * invQ;
284 src[2] = span->attrStepY[source->Index][2] * invQ;
285 src[3] = span->attrStepY[source->Index][3] * invQ;
286 }
287 break;
288 }
289
290 result[0] = src[GET_SWZ(source->Swizzle, 0)];
291 result[1] = src[GET_SWZ(source->Swizzle, 1)];
292 result[2] = src[GET_SWZ(source->Swizzle, 2)];
293 result[3] = src[GET_SWZ(source->Swizzle, 3)];
294
295 if (source->NegateBase) {
296 result[0] = -result[0];
297 result[1] = -result[1];
298 result[2] = -result[2];
299 result[3] = -result[3];
300 }
301 if (source->Abs) {
302 result[0] = FABSF(result[0]);
303 result[1] = FABSF(result[1]);
304 result[2] = FABSF(result[2]);
305 result[3] = FABSF(result[3]);
306 }
307 if (source->NegateAbs) {
308 result[0] = -result[0];
309 result[1] = -result[1];
310 result[2] = -result[2];
311 result[3] = -result[3];
312 }
313 return GL_TRUE;
314 }
315 #endif
316
317
318 /**
319 * As above, but only return result[0] element.
320 */
321 static void
322 fetch_vector1( GLcontext *ctx,
323 const struct prog_src_register *source,
324 const struct gl_program_machine *machine,
325 GLfloat result[4] )
326 {
327 const GLfloat *src = get_register_pointer(ctx, source, machine);
328 ASSERT(src);
329
330 result[0] = src[GET_SWZ(source->Swizzle, 0)];
331
332 if (source->NegateBase) {
333 result[0] = -result[0];
334 }
335 if (source->Abs) {
336 result[0] = FABSF(result[0]);
337 }
338 if (source->NegateAbs) {
339 result[0] = -result[0];
340 }
341 }
342
343
344 /**
345 * Test value against zero and return GT, LT, EQ or UN if NaN.
346 */
347 static INLINE GLuint
348 generate_cc( float value )
349 {
350 if (value != value)
351 return COND_UN; /* NaN */
352 if (value > 0.0F)
353 return COND_GT;
354 if (value < 0.0F)
355 return COND_LT;
356 return COND_EQ;
357 }
358
359
360 /**
361 * Test if the ccMaskRule is satisfied by the given condition code.
362 * Used to mask destination writes according to the current condition code.
363 */
364 static INLINE GLboolean
365 test_cc(GLuint condCode, GLuint ccMaskRule)
366 {
367 switch (ccMaskRule) {
368 case COND_EQ: return (condCode == COND_EQ);
369 case COND_NE: return (condCode != COND_EQ);
370 case COND_LT: return (condCode == COND_LT);
371 case COND_GE: return (condCode == COND_GT || condCode == COND_EQ);
372 case COND_LE: return (condCode == COND_LT || condCode == COND_EQ);
373 case COND_GT: return (condCode == COND_GT);
374 case COND_TR: return GL_TRUE;
375 case COND_FL: return GL_FALSE;
376 default: return GL_TRUE;
377 }
378 }
379
380
381 /**
382 * Evaluate the 4 condition codes against a predicate and return GL_TRUE
383 * or GL_FALSE to indicate result.
384 */
385 static INLINE GLboolean
386 eval_condition(const struct gl_program_machine *machine,
387 const struct prog_instruction *inst)
388 {
389 const GLuint swizzle = inst->DstReg.CondSwizzle;
390 const GLuint condMask = inst->DstReg.CondMask;
391 if (test_cc(machine->CondCodes[GET_SWZ(swizzle, 0)], condMask) ||
392 test_cc(machine->CondCodes[GET_SWZ(swizzle, 1)], condMask) ||
393 test_cc(machine->CondCodes[GET_SWZ(swizzle, 2)], condMask) ||
394 test_cc(machine->CondCodes[GET_SWZ(swizzle, 3)], condMask)) {
395 return GL_TRUE;
396 }
397 else {
398 return GL_FALSE;
399 }
400 }
401
402
403
404 /**
405 * Store 4 floats into a register. Observe the instructions saturate and
406 * set-condition-code flags.
407 */
408 static void
409 store_vector4( const struct prog_instruction *inst,
410 struct gl_program_machine *machine,
411 const GLfloat value[4] )
412 {
413 const struct prog_dst_register *dest = &(inst->DstReg);
414 const GLboolean clamp = inst->SaturateMode == SATURATE_ZERO_ONE;
415 GLfloat *dstReg;
416 GLfloat dummyReg[4];
417 GLfloat clampedValue[4];
418 GLuint writeMask = dest->WriteMask;
419
420 switch (dest->File) {
421 case PROGRAM_OUTPUT:
422 dstReg = machine->Outputs[dest->Index];
423 break;
424 case PROGRAM_TEMPORARY:
425 dstReg = machine->Temporaries[dest->Index];
426 break;
427 case PROGRAM_WRITE_ONLY:
428 dstReg = dummyReg;
429 return;
430 default:
431 _mesa_problem(NULL, "bad register file in store_vector4(fp)");
432 return;
433 }
434
435 #if 0
436 if (value[0] > 1.0e10 ||
437 IS_INF_OR_NAN(value[0]) ||
438 IS_INF_OR_NAN(value[1]) ||
439 IS_INF_OR_NAN(value[2]) ||
440 IS_INF_OR_NAN(value[3]) )
441 printf("store %g %g %g %g\n", value[0], value[1], value[2], value[3]);
442 #endif
443
444 if (clamp) {
445 clampedValue[0] = CLAMP(value[0], 0.0F, 1.0F);
446 clampedValue[1] = CLAMP(value[1], 0.0F, 1.0F);
447 clampedValue[2] = CLAMP(value[2], 0.0F, 1.0F);
448 clampedValue[3] = CLAMP(value[3], 0.0F, 1.0F);
449 value = clampedValue;
450 }
451
452 if (dest->CondMask != COND_TR) {
453 /* condition codes may turn off some writes */
454 if (writeMask & WRITEMASK_X) {
455 if (!test_cc(machine->CondCodes[GET_SWZ(dest->CondSwizzle, 0)],
456 dest->CondMask))
457 writeMask &= ~WRITEMASK_X;
458 }
459 if (writeMask & WRITEMASK_Y) {
460 if (!test_cc(machine->CondCodes[GET_SWZ(dest->CondSwizzle, 1)],
461 dest->CondMask))
462 writeMask &= ~WRITEMASK_Y;
463 }
464 if (writeMask & WRITEMASK_Z) {
465 if (!test_cc(machine->CondCodes[GET_SWZ(dest->CondSwizzle, 2)],
466 dest->CondMask))
467 writeMask &= ~WRITEMASK_Z;
468 }
469 if (writeMask & WRITEMASK_W) {
470 if (!test_cc(machine->CondCodes[GET_SWZ(dest->CondSwizzle, 3)],
471 dest->CondMask))
472 writeMask &= ~WRITEMASK_W;
473 }
474 }
475
476 if (writeMask & WRITEMASK_X)
477 dstReg[0] = value[0];
478 if (writeMask & WRITEMASK_Y)
479 dstReg[1] = value[1];
480 if (writeMask & WRITEMASK_Z)
481 dstReg[2] = value[2];
482 if (writeMask & WRITEMASK_W)
483 dstReg[3] = value[3];
484
485 if (inst->CondUpdate) {
486 if (writeMask & WRITEMASK_X)
487 machine->CondCodes[0] = generate_cc(value[0]);
488 if (writeMask & WRITEMASK_Y)
489 machine->CondCodes[1] = generate_cc(value[1]);
490 if (writeMask & WRITEMASK_Z)
491 machine->CondCodes[2] = generate_cc(value[2]);
492 if (writeMask & WRITEMASK_W)
493 machine->CondCodes[3] = generate_cc(value[3]);
494 }
495 }
496
497
498 #if 0
499 /**
500 * Initialize a new machine state instance from an existing one, adding
501 * the partial derivatives onto the input registers.
502 * Used to implement DDX and DDY instructions in non-trivial cases.
503 */
504 static void
505 init_machine_deriv( GLcontext *ctx,
506 const struct gl_program_machine *machine,
507 const struct gl_fragment_program *program,
508 const SWspan *span, char xOrY,
509 struct gl_program_machine *dMachine )
510 {
511 GLuint attr;
512
513 ASSERT(xOrY == 'X' || xOrY == 'Y');
514
515 /* copy existing machine */
516 _mesa_memcpy(dMachine, machine, sizeof(struct gl_program_machine));
517
518 if (program->Base.Target == GL_FRAGMENT_PROGRAM_NV) {
519 /* XXX also need to do this when using valgrind */
520 /* Clear temporary registers (undefined for ARB_f_p) */
521 _mesa_bzero( (void*) machine->Temporaries,
522 MAX_PROGRAM_TEMPS * 4 * sizeof(GLfloat));
523 }
524
525 /* Add derivatives */
526 if (program->Base.InputsRead & FRAG_BIT_WPOS) {
527 GLfloat *wpos = machine->Attribs[FRAG_ATTRIB_WPOS][machine->CurElement];
528 if (xOrY == 'X') {
529 wpos[0] += 1.0F;
530 wpos[1] += 0.0F;
531 wpos[2] += span->attrStepX[FRAG_ATTRIB_WPOS][2];
532 wpos[3] += span->attrStepX[FRAG_ATTRIB_WPOS][3];
533 }
534 else {
535 wpos[0] += 0.0F;
536 wpos[1] += 1.0F;
537 wpos[2] += span->attrStepY[FRAG_ATTRIB_WPOS][2];
538 wpos[3] += span->attrStepY[FRAG_ATTRIB_WPOS][3];
539 }
540 }
541
542 /* primary, secondary colors */
543 for (attr = FRAG_ATTRIB_COL0; attr <= FRAG_ATTRIB_COL1; attr++) {
544 if (program->Base.InputsRead & (1 << attr)) {
545 GLfloat *col = machine->Attribs[attr][machine->CurElement];
546 if (xOrY == 'X') {
547 col[0] += span->attrStepX[attr][0] * (1.0F / CHAN_MAXF);
548 col[1] += span->attrStepX[attr][1] * (1.0F / CHAN_MAXF);
549 col[2] += span->attrStepX[attr][2] * (1.0F / CHAN_MAXF);
550 col[3] += span->attrStepX[attr][3] * (1.0F / CHAN_MAXF);
551 }
552 else {
553 col[0] += span->attrStepY[attr][0] * (1.0F / CHAN_MAXF);
554 col[1] += span->attrStepY[attr][1] * (1.0F / CHAN_MAXF);
555 col[2] += span->attrStepY[attr][2] * (1.0F / CHAN_MAXF);
556 col[3] += span->attrStepY[attr][3] * (1.0F / CHAN_MAXF);
557 }
558 }
559 }
560 if (program->Base.InputsRead & FRAG_BIT_FOGC) {
561 GLfloat *fogc = machine->Attribs[FRAG_ATTRIB_FOGC][machine->CurElement];
562 if (xOrY == 'X') {
563 fogc[0] += span->attrStepX[FRAG_ATTRIB_FOGC][0];
564 }
565 else {
566 fogc[0] += span->attrStepY[FRAG_ATTRIB_FOGC][0];
567 }
568 }
569 /* texcoord and varying vars */
570 for (attr = FRAG_ATTRIB_TEX0; attr < FRAG_ATTRIB_MAX; attr++) {
571 if (program->Base.InputsRead & (1 << attr)) {
572 GLfloat *val = machine->Attribs[attr][machine->CurElement];
573 /* XXX perspective-correct interpolation */
574 if (xOrY == 'X') {
575 val[0] += span->attrStepX[attr][0];
576 val[1] += span->attrStepX[attr][1];
577 val[2] += span->attrStepX[attr][2];
578 val[3] += span->attrStepX[attr][3];
579 }
580 else {
581 val[0] += span->attrStepY[attr][0];
582 val[1] += span->attrStepY[attr][1];
583 val[2] += span->attrStepY[attr][2];
584 val[3] += span->attrStepY[attr][3];
585 }
586 }
587 }
588
589 /* init condition codes */
590 dMachine->CondCodes[0] = COND_EQ;
591 dMachine->CondCodes[1] = COND_EQ;
592 dMachine->CondCodes[2] = COND_EQ;
593 dMachine->CondCodes[3] = COND_EQ;
594 }
595 #endif
596
597
598 /**
599 * Execute the given vertex/fragment program.
600 *
601 * \param ctx - rendering context
602 * \param program - the fragment program to execute
603 * \param machine - machine state (register file)
604 * \param maxInst - max number of instructions to execute
605 * \return GL_TRUE if program completed or GL_FALSE if program executed KIL.
606 */
607 GLboolean
608 _mesa_execute_program(GLcontext *ctx,
609 const struct gl_program *program, GLuint maxInst,
610 struct gl_program_machine *machine, GLuint element)
611 {
612 const GLuint MAX_EXEC = 10000;
613 GLint pc, total = 0;
614
615 machine->CurProgram = program;
616
617 if (DEBUG_PROG) {
618 printf("execute program %u --------------------\n", program->Id);
619 }
620
621 #if FEATURE_MESA_program_debug
622 CurrentMachine = machine;
623 #endif
624
625 for (pc = 0; pc < maxInst; pc++) {
626 const struct prog_instruction *inst = program->Instructions + pc;
627
628 #if FEATURE_MESA_program_debug
629 if (ctx->FragmentProgram.CallbackEnabled &&
630 ctx->FragmentProgram.Callback) {
631 ctx->FragmentProgram.CurrentPosition = inst->StringPos;
632 ctx->FragmentProgram.Callback(program->Target,
633 ctx->FragmentProgram.CallbackData);
634 }
635 #endif
636
637 if (DEBUG_PROG) {
638 _mesa_print_instruction(inst);
639 }
640
641 switch (inst->Opcode) {
642 case OPCODE_ABS:
643 {
644 GLfloat a[4], result[4];
645 fetch_vector4( ctx, &inst->SrcReg[0], machine, a );
646 result[0] = FABSF(a[0]);
647 result[1] = FABSF(a[1]);
648 result[2] = FABSF(a[2]);
649 result[3] = FABSF(a[3]);
650 store_vector4( inst, machine, result );
651 }
652 break;
653 case OPCODE_ADD:
654 {
655 GLfloat a[4], b[4], result[4];
656 fetch_vector4( ctx, &inst->SrcReg[0], machine, a );
657 fetch_vector4( ctx, &inst->SrcReg[1], machine, b );
658 result[0] = a[0] + b[0];
659 result[1] = a[1] + b[1];
660 result[2] = a[2] + b[2];
661 result[3] = a[3] + b[3];
662 store_vector4( inst, machine, result );
663 if (DEBUG_PROG) {
664 printf("ADD (%g %g %g %g) = (%g %g %g %g) + (%g %g %g %g)\n",
665 result[0], result[1], result[2], result[3],
666 a[0], a[1], a[2], a[3],
667 b[0], b[1], b[2], b[3]);
668 }
669 }
670 break;
671 case OPCODE_BGNLOOP:
672 /* no-op */
673 break;
674 case OPCODE_ENDLOOP:
675 /* subtract 1 here since pc is incremented by for(pc) loop */
676 pc = inst->BranchTarget - 1; /* go to matching BNGLOOP */
677 break;
678 case OPCODE_BGNSUB: /* begin subroutine */
679 break;
680 case OPCODE_ENDSUB: /* end subroutine */
681 break;
682 case OPCODE_BRA: /* branch (conditional) */
683 /* fall-through */
684 case OPCODE_BRK: /* break out of loop (conditional) */
685 /* fall-through */
686 case OPCODE_CONT: /* continue loop (conditional) */
687 if (eval_condition(machine, inst)) {
688 /* take branch */
689 /* Subtract 1 here since we'll do pc++ at end of for-loop */
690 pc = inst->BranchTarget - 1;
691 }
692 break;
693 case OPCODE_CAL: /* Call subroutine (conditional) */
694 if (eval_condition(machine, inst)) {
695 /* call the subroutine */
696 if (machine->StackDepth >= MAX_PROGRAM_CALL_DEPTH) {
697 return GL_TRUE; /* Per GL_NV_vertex_program2 spec */
698 }
699 machine->CallStack[machine->StackDepth++] = pc + 1;
700 pc = inst->BranchTarget; /* XXX - 1 ??? */
701 }
702 break;
703 case OPCODE_CMP:
704 {
705 GLfloat a[4], b[4], c[4], result[4];
706 fetch_vector4( ctx, &inst->SrcReg[0], machine, a );
707 fetch_vector4( ctx, &inst->SrcReg[1], machine, b );
708 fetch_vector4( ctx, &inst->SrcReg[2], machine, c );
709 result[0] = a[0] < 0.0F ? b[0] : c[0];
710 result[1] = a[1] < 0.0F ? b[1] : c[1];
711 result[2] = a[2] < 0.0F ? b[2] : c[2];
712 result[3] = a[3] < 0.0F ? b[3] : c[3];
713 store_vector4( inst, machine, result );
714 }
715 break;
716 case OPCODE_COS:
717 {
718 GLfloat a[4], result[4];
719 fetch_vector1( ctx, &inst->SrcReg[0], machine, a );
720 result[0] = result[1] = result[2] = result[3]
721 = (GLfloat) _mesa_cos(a[0]);
722 store_vector4( inst, machine, result );
723 }
724 break;
725 case OPCODE_DDX: /* Partial derivative with respect to X */
726 {
727 #if 0
728 GLfloat a[4], aNext[4], result[4];
729 struct gl_program_machine dMachine;
730 if (!fetch_vector4_deriv(ctx, &inst->SrcReg[0], span, 'X',
731 column, result)) {
732 /* This is tricky. Make a copy of the current machine state,
733 * increment the input registers by the dx or dy partial
734 * derivatives, then re-execute the program up to the
735 * preceeding instruction, then fetch the source register.
736 * Finally, find the difference in the register values for
737 * the original and derivative runs.
738 */
739 fetch_vector4( ctx, &inst->SrcReg[0], machine, program, a);
740 init_machine_deriv(ctx, machine, program, span,
741 'X', &dMachine);
742 execute_program(ctx, program, pc, &dMachine, span, column);
743 fetch_vector4( ctx, &inst->SrcReg[0], &dMachine, program, aNext );
744 result[0] = aNext[0] - a[0];
745 result[1] = aNext[1] - a[1];
746 result[2] = aNext[2] - a[2];
747 result[3] = aNext[3] - a[3];
748 }
749 store_vector4( inst, machine, result );
750 #else
751 static const GLfloat result[4] = { 0, 0, 0, 0 };
752 store_vector4( inst, machine, result );
753 #endif
754 }
755 break;
756 case OPCODE_DDY: /* Partial derivative with respect to Y */
757 {
758 #if 0
759 GLfloat a[4], aNext[4], result[4];
760 struct gl_program_machine dMachine;
761 if (!fetch_vector4_deriv(ctx, &inst->SrcReg[0], span, 'Y',
762 column, result)) {
763 init_machine_deriv(ctx, machine, program, span,
764 'Y', &dMachine);
765 fetch_vector4( ctx, &inst->SrcReg[0], machine, program, a);
766 execute_program(ctx, program, pc, &dMachine, span, column);
767 fetch_vector4( ctx, &inst->SrcReg[0], &dMachine, program, aNext );
768 result[0] = aNext[0] - a[0];
769 result[1] = aNext[1] - a[1];
770 result[2] = aNext[2] - a[2];
771 result[3] = aNext[3] - a[3];
772 }
773 store_vector4( inst, machine, result );
774 #else
775 static const GLfloat result[4] = { 0, 0, 0, 0 };
776 store_vector4( inst, machine, result );
777 #endif
778 }
779 break;
780 case OPCODE_DP3:
781 {
782 GLfloat a[4], b[4], result[4];
783 fetch_vector4( ctx, &inst->SrcReg[0], machine, a );
784 fetch_vector4( ctx, &inst->SrcReg[1], machine, b );
785 result[0] = result[1] = result[2] = result[3] = DOT3(a, b);
786 store_vector4( inst, machine, result );
787 if (DEBUG_PROG) {
788 printf("DP3 %g = (%g %g %g) . (%g %g %g)\n",
789 result[0], a[0], a[1], a[2], b[0], b[1], b[2]);
790 }
791 }
792 break;
793 case OPCODE_DP4:
794 {
795 GLfloat a[4], b[4], result[4];
796 fetch_vector4( ctx, &inst->SrcReg[0], machine, a );
797 fetch_vector4( ctx, &inst->SrcReg[1], machine, b );
798 result[0] = result[1] = result[2] = result[3] = DOT4(a,b);
799 store_vector4( inst, machine, result );
800 if (DEBUG_PROG) {
801 printf("DP4 %g = (%g, %g %g %g) . (%g, %g %g %g)\n",
802 result[0], a[0], a[1], a[2], a[3],
803 b[0], b[1], b[2], b[3]);
804 }
805 }
806 break;
807 case OPCODE_DPH:
808 {
809 GLfloat a[4], b[4], result[4];
810 fetch_vector4( ctx, &inst->SrcReg[0], machine, a );
811 fetch_vector4( ctx, &inst->SrcReg[1], machine, b );
812 result[0] = result[1] = result[2] = result[3] =
813 a[0] * b[0] + a[1] * b[1] + a[2] * b[2] + b[3];
814 store_vector4( inst, machine, result );
815 }
816 break;
817 case OPCODE_DST: /* Distance vector */
818 {
819 GLfloat a[4], b[4], result[4];
820 fetch_vector4( ctx, &inst->SrcReg[0], machine, a );
821 fetch_vector4( ctx, &inst->SrcReg[1], machine, b );
822 result[0] = 1.0F;
823 result[1] = a[1] * b[1];
824 result[2] = a[2];
825 result[3] = b[3];
826 store_vector4( inst, machine, result );
827 }
828 break;
829 case OPCODE_EX2: /* Exponential base 2 */
830 {
831 GLfloat a[4], result[4];
832 fetch_vector1( ctx, &inst->SrcReg[0], machine, a );
833 result[0] = result[1] = result[2] = result[3] =
834 (GLfloat) _mesa_pow(2.0, a[0]);
835 store_vector4( inst, machine, result );
836 }
837 break;
838 case OPCODE_FLR:
839 {
840 GLfloat a[4], result[4];
841 fetch_vector4( ctx, &inst->SrcReg[0], machine, a );
842 result[0] = FLOORF(a[0]);
843 result[1] = FLOORF(a[1]);
844 result[2] = FLOORF(a[2]);
845 result[3] = FLOORF(a[3]);
846 store_vector4( inst, machine, result );
847 }
848 break;
849 case OPCODE_FRC:
850 {
851 GLfloat a[4], result[4];
852 fetch_vector4( ctx, &inst->SrcReg[0], machine, a );
853 result[0] = a[0] - FLOORF(a[0]);
854 result[1] = a[1] - FLOORF(a[1]);
855 result[2] = a[2] - FLOORF(a[2]);
856 result[3] = a[3] - FLOORF(a[3]);
857 store_vector4( inst, machine, result );
858 }
859 break;
860 case OPCODE_IF:
861 if (eval_condition(machine, inst)) {
862 /* do if-clause (just continue execution) */
863 }
864 else {
865 /* go to the instruction after ELSE or ENDIF */
866 assert(inst->BranchTarget >= 0);
867 pc = inst->BranchTarget - 1;
868 }
869 break;
870 case OPCODE_ELSE:
871 /* goto ENDIF */
872 assert(inst->BranchTarget >= 0);
873 pc = inst->BranchTarget - 1;
874 break;
875 case OPCODE_ENDIF:
876 /* nothing */
877 break;
878 case OPCODE_INT: /* float to int */
879 {
880 GLfloat a[4], result[4];
881 fetch_vector4( ctx, &inst->SrcReg[0], machine, a );
882 result[0] = (GLfloat) (GLint) a[0];
883 result[1] = (GLfloat) (GLint) a[1];
884 result[2] = (GLfloat) (GLint) a[2];
885 result[3] = (GLfloat) (GLint) a[3];
886 store_vector4( inst, machine, result );
887 }
888 break;
889 case OPCODE_KIL_NV: /* NV_f_p only (conditional) */
890 if (eval_condition(machine, inst)) {
891 return GL_FALSE;
892 }
893 break;
894 case OPCODE_KIL: /* ARB_f_p only */
895 {
896 GLfloat a[4];
897 fetch_vector4( ctx, &inst->SrcReg[0], machine, a );
898 if (a[0] < 0.0F || a[1] < 0.0F || a[2] < 0.0F || a[3] < 0.0F) {
899 return GL_FALSE;
900 }
901 }
902 break;
903 case OPCODE_LG2: /* log base 2 */
904 {
905 GLfloat a[4], result[4];
906 fetch_vector1( ctx, &inst->SrcReg[0], machine, a );
907 result[0] = result[1] = result[2] = result[3] = LOG2(a[0]);
908 store_vector4( inst, machine, result );
909 }
910 break;
911 case OPCODE_LIT:
912 {
913 const GLfloat epsilon = 1.0F / 256.0F; /* from NV VP spec */
914 GLfloat a[4], result[4];
915 fetch_vector4( ctx, &inst->SrcReg[0], machine, a );
916 a[0] = MAX2(a[0], 0.0F);
917 a[1] = MAX2(a[1], 0.0F);
918 /* XXX ARB version clamps a[3], NV version doesn't */
919 a[3] = CLAMP(a[3], -(128.0F - epsilon), (128.0F - epsilon));
920 result[0] = 1.0F;
921 result[1] = a[0];
922 /* XXX we could probably just use pow() here */
923 if (a[0] > 0.0F) {
924 if (a[1] == 0.0 && a[3] == 0.0)
925 result[2] = 1.0;
926 else
927 result[2] = EXPF(a[3] * LOGF(a[1]));
928 }
929 else {
930 result[2] = 0.0;
931 }
932 result[3] = 1.0F;
933 store_vector4( inst, machine, result );
934 if (DEBUG_PROG) {
935 printf("LIT (%g %g %g %g) : (%g %g %g %g)\n",
936 result[0], result[1], result[2], result[3],
937 a[0], a[1], a[2], a[3]);
938 }
939 }
940 break;
941 case OPCODE_LRP:
942 {
943 GLfloat a[4], b[4], c[4], result[4];
944 fetch_vector4( ctx, &inst->SrcReg[0], machine, a );
945 fetch_vector4( ctx, &inst->SrcReg[1], machine, b );
946 fetch_vector4( ctx, &inst->SrcReg[2], machine, c );
947 result[0] = a[0] * b[0] + (1.0F - a[0]) * c[0];
948 result[1] = a[1] * b[1] + (1.0F - a[1]) * c[1];
949 result[2] = a[2] * b[2] + (1.0F - a[2]) * c[2];
950 result[3] = a[3] * b[3] + (1.0F - a[3]) * c[3];
951 store_vector4( inst, machine, result );
952 if (DEBUG_PROG) {
953 printf("LRP (%g %g %g %g) = (%g %g %g %g), "
954 "(%g %g %g %g), (%g %g %g %g)\n",
955 result[0], result[1], result[2], result[3],
956 a[0], a[1], a[2], a[3],
957 b[0], b[1], b[2], b[3],
958 c[0], c[1], c[2], c[3]);
959 }
960 }
961 break;
962 case OPCODE_MAD:
963 {
964 GLfloat a[4], b[4], c[4], result[4];
965 fetch_vector4( ctx, &inst->SrcReg[0], machine, a );
966 fetch_vector4( ctx, &inst->SrcReg[1], machine, b );
967 fetch_vector4( ctx, &inst->SrcReg[2], machine, c );
968 result[0] = a[0] * b[0] + c[0];
969 result[1] = a[1] * b[1] + c[1];
970 result[2] = a[2] * b[2] + c[2];
971 result[3] = a[3] * b[3] + c[3];
972 store_vector4( inst, machine, result );
973 if (DEBUG_PROG) {
974 printf("MAD (%g %g %g %g) = (%g %g %g %g) * "
975 "(%g %g %g %g) + (%g %g %g %g)\n",
976 result[0], result[1], result[2], result[3],
977 a[0], a[1], a[2], a[3],
978 b[0], b[1], b[2], b[3],
979 c[0], c[1], c[2], c[3]);
980 }
981 }
982 break;
983 case OPCODE_MAX:
984 {
985 GLfloat a[4], b[4], result[4];
986 fetch_vector4( ctx, &inst->SrcReg[0], machine, a );
987 fetch_vector4( ctx, &inst->SrcReg[1], machine, b );
988 result[0] = MAX2(a[0], b[0]);
989 result[1] = MAX2(a[1], b[1]);
990 result[2] = MAX2(a[2], b[2]);
991 result[3] = MAX2(a[3], b[3]);
992 store_vector4( inst, machine, result );
993 if (DEBUG_PROG) {
994 printf("MAX (%g %g %g %g) = (%g %g %g %g), (%g %g %g %g)\n",
995 result[0], result[1], result[2], result[3],
996 a[0], a[1], a[2], a[3],
997 b[0], b[1], b[2], b[3]);
998 }
999 }
1000 break;
1001 case OPCODE_MIN:
1002 {
1003 GLfloat a[4], b[4], result[4];
1004 fetch_vector4( ctx, &inst->SrcReg[0], machine, a );
1005 fetch_vector4( ctx, &inst->SrcReg[1], machine, b );
1006 result[0] = MIN2(a[0], b[0]);
1007 result[1] = MIN2(a[1], b[1]);
1008 result[2] = MIN2(a[2], b[2]);
1009 result[3] = MIN2(a[3], b[3]);
1010 store_vector4( inst, machine, result );
1011 }
1012 break;
1013 case OPCODE_MOV:
1014 {
1015 GLfloat result[4];
1016 fetch_vector4( ctx, &inst->SrcReg[0], machine, result );
1017 store_vector4( inst, machine, result );
1018 if (DEBUG_PROG) {
1019 printf("MOV (%g %g %g %g)\n",
1020 result[0], result[1], result[2], result[3]);
1021 }
1022 }
1023 break;
1024 case OPCODE_MUL:
1025 {
1026 GLfloat a[4], b[4], result[4];
1027 fetch_vector4( ctx, &inst->SrcReg[0], machine, a );
1028 fetch_vector4( ctx, &inst->SrcReg[1], machine, b );
1029 result[0] = a[0] * b[0];
1030 result[1] = a[1] * b[1];
1031 result[2] = a[2] * b[2];
1032 result[3] = a[3] * b[3];
1033 store_vector4( inst, machine, result );
1034 if (DEBUG_PROG) {
1035 printf("MUL (%g %g %g %g) = (%g %g %g %g) * (%g %g %g %g)\n",
1036 result[0], result[1], result[2], result[3],
1037 a[0], a[1], a[2], a[3],
1038 b[0], b[1], b[2], b[3]);
1039 }
1040 }
1041 break;
1042 case OPCODE_NOISE1:
1043 {
1044 GLfloat a[4], result[4];
1045 fetch_vector1( ctx, &inst->SrcReg[0], machine, a );
1046 result[0] =
1047 result[1] =
1048 result[2] =
1049 result[3] = _slang_library_noise1(a[0]);
1050 store_vector4( inst, machine, result );
1051 }
1052 break;
1053 case OPCODE_NOISE2:
1054 {
1055 GLfloat a[4], result[4];
1056 fetch_vector4( ctx, &inst->SrcReg[0], machine, a );
1057 result[0] =
1058 result[1] =
1059 result[2] =
1060 result[3] = _slang_library_noise2(a[0], a[1]);
1061 store_vector4( inst, machine, result );
1062 }
1063 break;
1064 case OPCODE_NOISE3:
1065 {
1066 GLfloat a[4], result[4];
1067 fetch_vector4( ctx, &inst->SrcReg[0], machine, a );
1068 result[0] =
1069 result[1] =
1070 result[2] =
1071 result[3] = _slang_library_noise3(a[0], a[1], a[2]);
1072 store_vector4( inst, machine, result );
1073 }
1074 break;
1075 case OPCODE_NOISE4:
1076 {
1077 GLfloat a[4], result[4];
1078 fetch_vector4( ctx, &inst->SrcReg[0], machine, a );
1079 result[0] =
1080 result[1] =
1081 result[2] =
1082 result[3] = _slang_library_noise4(a[0], a[1], a[2], a[3]);
1083 store_vector4( inst, machine, result );
1084 }
1085 break;
1086 case OPCODE_NOP:
1087 break;
1088 case OPCODE_PK2H: /* pack two 16-bit floats in one 32-bit float */
1089 {
1090 GLfloat a[4], result[4];
1091 GLhalfNV hx, hy;
1092 GLuint *rawResult = (GLuint *) result;
1093 GLuint twoHalves;
1094 fetch_vector4( ctx, &inst->SrcReg[0], machine, a );
1095 hx = _mesa_float_to_half(a[0]);
1096 hy = _mesa_float_to_half(a[1]);
1097 twoHalves = hx | (hy << 16);
1098 rawResult[0] = rawResult[1] = rawResult[2] = rawResult[3]
1099 = twoHalves;
1100 store_vector4( inst, machine, result );
1101 }
1102 break;
1103 case OPCODE_PK2US: /* pack two GLushorts into one 32-bit float */
1104 {
1105 GLfloat a[4], result[4];
1106 GLuint usx, usy, *rawResult = (GLuint *) result;
1107 fetch_vector4( ctx, &inst->SrcReg[0], machine, a );
1108 a[0] = CLAMP(a[0], 0.0F, 1.0F);
1109 a[1] = CLAMP(a[1], 0.0F, 1.0F);
1110 usx = IROUND(a[0] * 65535.0F);
1111 usy = IROUND(a[1] * 65535.0F);
1112 rawResult[0] = rawResult[1] = rawResult[2] = rawResult[3]
1113 = usx | (usy << 16);
1114 store_vector4( inst, machine, result );
1115 }
1116 break;
1117 case OPCODE_PK4B: /* pack four GLbytes into one 32-bit float */
1118 {
1119 GLfloat a[4], result[4];
1120 GLuint ubx, uby, ubz, ubw, *rawResult = (GLuint *) result;
1121 fetch_vector4( ctx, &inst->SrcReg[0], machine, a );
1122 a[0] = CLAMP(a[0], -128.0F / 127.0F, 1.0F);
1123 a[1] = CLAMP(a[1], -128.0F / 127.0F, 1.0F);
1124 a[2] = CLAMP(a[2], -128.0F / 127.0F, 1.0F);
1125 a[3] = CLAMP(a[3], -128.0F / 127.0F, 1.0F);
1126 ubx = IROUND(127.0F * a[0] + 128.0F);
1127 uby = IROUND(127.0F * a[1] + 128.0F);
1128 ubz = IROUND(127.0F * a[2] + 128.0F);
1129 ubw = IROUND(127.0F * a[3] + 128.0F);
1130 rawResult[0] = rawResult[1] = rawResult[2] = rawResult[3]
1131 = ubx | (uby << 8) | (ubz << 16) | (ubw << 24);
1132 store_vector4( inst, machine, result );
1133 }
1134 break;
1135 case OPCODE_PK4UB: /* pack four GLubytes into one 32-bit float */
1136 {
1137 GLfloat a[4], result[4];
1138 GLuint ubx, uby, ubz, ubw, *rawResult = (GLuint *) result;
1139 fetch_vector4( ctx, &inst->SrcReg[0], machine, a );
1140 a[0] = CLAMP(a[0], 0.0F, 1.0F);
1141 a[1] = CLAMP(a[1], 0.0F, 1.0F);
1142 a[2] = CLAMP(a[2], 0.0F, 1.0F);
1143 a[3] = CLAMP(a[3], 0.0F, 1.0F);
1144 ubx = IROUND(255.0F * a[0]);
1145 uby = IROUND(255.0F * a[1]);
1146 ubz = IROUND(255.0F * a[2]);
1147 ubw = IROUND(255.0F * a[3]);
1148 rawResult[0] = rawResult[1] = rawResult[2] = rawResult[3]
1149 = ubx | (uby << 8) | (ubz << 16) | (ubw << 24);
1150 store_vector4( inst, machine, result );
1151 }
1152 break;
1153 case OPCODE_POW:
1154 {
1155 GLfloat a[4], b[4], result[4];
1156 fetch_vector1( ctx, &inst->SrcReg[0], machine, a );
1157 fetch_vector1( ctx, &inst->SrcReg[1], machine, b );
1158 result[0] = result[1] = result[2] = result[3]
1159 = (GLfloat)_mesa_pow(a[0], b[0]);
1160 store_vector4( inst, machine, result );
1161 }
1162 break;
1163 case OPCODE_RCP:
1164 {
1165 GLfloat a[4], result[4];
1166 fetch_vector1( ctx, &inst->SrcReg[0], machine, a );
1167 if (DEBUG_PROG) {
1168 if (a[0] == 0)
1169 printf("RCP(0)\n");
1170 else if (IS_INF_OR_NAN(a[0]))
1171 printf("RCP(inf)\n");
1172 }
1173 result[0] = result[1] = result[2] = result[3] = 1.0F / a[0];
1174 store_vector4( inst, machine, result );
1175 }
1176 break;
1177 case OPCODE_RET: /* return from subroutine (conditional) */
1178 if (eval_condition(machine, inst)) {
1179 if (machine->StackDepth == 0) {
1180 return GL_TRUE; /* Per GL_NV_vertex_program2 spec */
1181 }
1182 pc = machine->CallStack[--machine->StackDepth];
1183 }
1184 break;
1185 case OPCODE_RFL: /* reflection vector */
1186 {
1187 GLfloat axis[4], dir[4], result[4], tmpX, tmpW;
1188 fetch_vector4( ctx, &inst->SrcReg[0], machine, axis );
1189 fetch_vector4( ctx, &inst->SrcReg[1], machine, dir );
1190 tmpW = DOT3(axis, axis);
1191 tmpX = (2.0F * DOT3(axis, dir)) / tmpW;
1192 result[0] = tmpX * axis[0] - dir[0];
1193 result[1] = tmpX * axis[1] - dir[1];
1194 result[2] = tmpX * axis[2] - dir[2];
1195 /* result[3] is never written! XXX enforce in parser! */
1196 store_vector4( inst, machine, result );
1197 }
1198 break;
1199 case OPCODE_RSQ: /* 1 / sqrt() */
1200 {
1201 GLfloat a[4], result[4];
1202 fetch_vector1( ctx, &inst->SrcReg[0], machine, a );
1203 a[0] = FABSF(a[0]);
1204 result[0] = result[1] = result[2] = result[3] = INV_SQRTF(a[0]);
1205 store_vector4( inst, machine, result );
1206 if (DEBUG_PROG) {
1207 printf("RSQ %g = 1/sqrt(|%g|)\n", result[0], a[0]);
1208 }
1209 }
1210 break;
1211 case OPCODE_SCS: /* sine and cos */
1212 {
1213 GLfloat a[4], result[4];
1214 fetch_vector1( ctx, &inst->SrcReg[0], machine, a );
1215 result[0] = (GLfloat) _mesa_cos(a[0]);
1216 result[1] = (GLfloat) _mesa_sin(a[0]);
1217 result[2] = 0.0; /* undefined! */
1218 result[3] = 0.0; /* undefined! */
1219 store_vector4( inst, machine, result );
1220 }
1221 break;
1222 case OPCODE_SEQ: /* set on equal */
1223 {
1224 GLfloat a[4], b[4], result[4];
1225 fetch_vector4( ctx, &inst->SrcReg[0], machine, a );
1226 fetch_vector4( ctx, &inst->SrcReg[1], machine, b );
1227 result[0] = (a[0] == b[0]) ? 1.0F : 0.0F;
1228 result[1] = (a[1] == b[1]) ? 1.0F : 0.0F;
1229 result[2] = (a[2] == b[2]) ? 1.0F : 0.0F;
1230 result[3] = (a[3] == b[3]) ? 1.0F : 0.0F;
1231 store_vector4( inst, machine, result );
1232 }
1233 break;
1234 case OPCODE_SFL: /* set false, operands ignored */
1235 {
1236 static const GLfloat result[4] = { 0.0F, 0.0F, 0.0F, 0.0F };
1237 store_vector4( inst, machine, result );
1238 }
1239 break;
1240 case OPCODE_SGE: /* set on greater or equal */
1241 {
1242 GLfloat a[4], b[4], result[4];
1243 fetch_vector4( ctx, &inst->SrcReg[0], machine, a );
1244 fetch_vector4( ctx, &inst->SrcReg[1], machine, b );
1245 result[0] = (a[0] >= b[0]) ? 1.0F : 0.0F;
1246 result[1] = (a[1] >= b[1]) ? 1.0F : 0.0F;
1247 result[2] = (a[2] >= b[2]) ? 1.0F : 0.0F;
1248 result[3] = (a[3] >= b[3]) ? 1.0F : 0.0F;
1249 store_vector4( inst, machine, result );
1250 }
1251 break;
1252 case OPCODE_SGT: /* set on greater */
1253 {
1254 GLfloat a[4], b[4], result[4];
1255 fetch_vector4( ctx, &inst->SrcReg[0], machine, a );
1256 fetch_vector4( ctx, &inst->SrcReg[1], machine, b );
1257 result[0] = (a[0] > b[0]) ? 1.0F : 0.0F;
1258 result[1] = (a[1] > b[1]) ? 1.0F : 0.0F;
1259 result[2] = (a[2] > b[2]) ? 1.0F : 0.0F;
1260 result[3] = (a[3] > b[3]) ? 1.0F : 0.0F;
1261 store_vector4( inst, machine, result );
1262 if (DEBUG_PROG) {
1263 printf("SGT %g %g %g %g\n",
1264 result[0], result[1], result[2], result[3]);
1265 }
1266 }
1267 break;
1268 case OPCODE_SIN:
1269 {
1270 GLfloat a[4], result[4];
1271 fetch_vector1( ctx, &inst->SrcReg[0], machine, a );
1272 result[0] = result[1] = result[2] = result[3]
1273 = (GLfloat) _mesa_sin(a[0]);
1274 store_vector4( inst, machine, result );
1275 }
1276 break;
1277 case OPCODE_SLE: /* set on less or equal */
1278 {
1279 GLfloat a[4], b[4], result[4];
1280 fetch_vector4( ctx, &inst->SrcReg[0], machine, a );
1281 fetch_vector4( ctx, &inst->SrcReg[1], machine, b );
1282 result[0] = (a[0] <= b[0]) ? 1.0F : 0.0F;
1283 result[1] = (a[1] <= b[1]) ? 1.0F : 0.0F;
1284 result[2] = (a[2] <= b[2]) ? 1.0F : 0.0F;
1285 result[3] = (a[3] <= b[3]) ? 1.0F : 0.0F;
1286 store_vector4( inst, machine, result );
1287 }
1288 break;
1289 case OPCODE_SLT: /* set on less */
1290 {
1291 GLfloat a[4], b[4], result[4];
1292 fetch_vector4( ctx, &inst->SrcReg[0], machine, a );
1293 fetch_vector4( ctx, &inst->SrcReg[1], machine, b );
1294 result[0] = (a[0] < b[0]) ? 1.0F : 0.0F;
1295 result[1] = (a[1] < b[1]) ? 1.0F : 0.0F;
1296 result[2] = (a[2] < b[2]) ? 1.0F : 0.0F;
1297 result[3] = (a[3] < b[3]) ? 1.0F : 0.0F;
1298 store_vector4( inst, machine, result );
1299 }
1300 break;
1301 case OPCODE_SNE: /* set on not equal */
1302 {
1303 GLfloat a[4], b[4], result[4];
1304 fetch_vector4( ctx, &inst->SrcReg[0], machine, a );
1305 fetch_vector4( ctx, &inst->SrcReg[1], machine, b );
1306 result[0] = (a[0] != b[0]) ? 1.0F : 0.0F;
1307 result[1] = (a[1] != b[1]) ? 1.0F : 0.0F;
1308 result[2] = (a[2] != b[2]) ? 1.0F : 0.0F;
1309 result[3] = (a[3] != b[3]) ? 1.0F : 0.0F;
1310 store_vector4( inst, machine, result );
1311 }
1312 break;
1313 case OPCODE_STR: /* set true, operands ignored */
1314 {
1315 static const GLfloat result[4] = { 1.0F, 1.0F, 1.0F, 1.0F };
1316 store_vector4( inst, machine, result );
1317 }
1318 break;
1319 case OPCODE_SUB:
1320 {
1321 GLfloat a[4], b[4], result[4];
1322 fetch_vector4( ctx, &inst->SrcReg[0], machine, a );
1323 fetch_vector4( ctx, &inst->SrcReg[1], machine, b );
1324 result[0] = a[0] - b[0];
1325 result[1] = a[1] - b[1];
1326 result[2] = a[2] - b[2];
1327 result[3] = a[3] - b[3];
1328 store_vector4( inst, machine, result );
1329 if (DEBUG_PROG) {
1330 printf("SUB (%g %g %g %g) = (%g %g %g %g) - (%g %g %g %g)\n",
1331 result[0], result[1], result[2], result[3],
1332 a[0], a[1], a[2], a[3], b[0], b[1], b[2], b[3]);
1333 }
1334 }
1335 break;
1336 case OPCODE_SWZ: /* extended swizzle */
1337 {
1338 const struct prog_src_register *source = &inst->SrcReg[0];
1339 const GLfloat *src = get_register_pointer(ctx, source, machine);
1340 GLfloat result[4];
1341 GLuint i;
1342 for (i = 0; i < 4; i++) {
1343 const GLuint swz = GET_SWZ(source->Swizzle, i);
1344 if (swz == SWIZZLE_ZERO)
1345 result[i] = 0.0;
1346 else if (swz == SWIZZLE_ONE)
1347 result[i] = 1.0;
1348 else {
1349 ASSERT(swz >= 0);
1350 ASSERT(swz <= 3);
1351 result[i] = src[swz];
1352 }
1353 if (source->NegateBase & (1 << i))
1354 result[i] = -result[i];
1355 }
1356 store_vector4( inst, machine, result );
1357 }
1358 break;
1359 case OPCODE_TEX: /* Both ARB and NV frag prog */
1360 /* Texel lookup */
1361 {
1362 /* Note: only use the precomputed lambda value when we're
1363 * sampling texture unit [K] with texcoord[K].
1364 * Otherwise, the lambda value may have no relation to the
1365 * instruction's texcoord or texture image. Using the wrong
1366 * lambda is usually bad news.
1367 * The rest of the time, just use zero (until we get a more
1368 * sophisticated way of computing lambda).
1369 */
1370 GLfloat coord[4], color[4], lambda;
1371 #if 0
1372 if (inst->SrcReg[0].File == PROGRAM_INPUT &&
1373 inst->SrcReg[0].Index == FRAG_ATTRIB_TEX0+inst->TexSrcUnit)
1374 lambda = span->array->lambda[inst->TexSrcUnit][column];
1375 else
1376 #endif
1377 lambda = 0.0;
1378 fetch_vector4(ctx, &inst->SrcReg[0], machine, coord);
1379 machine->FetchTexelLod(ctx, coord, lambda, inst->TexSrcUnit, color);
1380 if (DEBUG_PROG) {
1381 printf("TEX (%g, %g, %g, %g) = texture[%d][%g, %g, %g, %g], "
1382 "lod %f\n",
1383 color[0], color[1], color[2], color[3],
1384 inst->TexSrcUnit,
1385 coord[0], coord[1], coord[2], coord[3], lambda);
1386 }
1387 store_vector4( inst, machine, color );
1388 }
1389 break;
1390 case OPCODE_TXB: /* GL_ARB_fragment_program only */
1391 /* Texel lookup with LOD bias */
1392 {
1393 const struct gl_texture_unit *texUnit
1394 = &ctx->Texture.Unit[inst->TexSrcUnit];
1395 GLfloat coord[4], color[4], lambda, bias;
1396 #if 0
1397 if (inst->SrcReg[0].File == PROGRAM_INPUT &&
1398 inst->SrcReg[0].Index == FRAG_ATTRIB_TEX0+inst->TexSrcUnit)
1399 lambda = span->array->lambda[inst->TexSrcUnit][column];
1400 else
1401 #endif
1402 lambda = 0.0;
1403 fetch_vector4(ctx, &inst->SrcReg[0], machine, coord);
1404 /* coord[3] is the bias to add to lambda */
1405 bias = texUnit->LodBias + coord[3];
1406 if (texUnit->_Current)
1407 bias += texUnit->_Current->LodBias;
1408 machine->FetchTexelLod(ctx, coord, lambda + bias,
1409 inst->TexSrcUnit, color);
1410 store_vector4( inst, machine, color );
1411 }
1412 break;
1413 case OPCODE_TXD: /* GL_NV_fragment_program only */
1414 /* Texture lookup w/ partial derivatives for LOD */
1415 {
1416 GLfloat texcoord[4], dtdx[4], dtdy[4], color[4];
1417 fetch_vector4( ctx, &inst->SrcReg[0], machine, texcoord );
1418 fetch_vector4( ctx, &inst->SrcReg[1], machine, dtdx );
1419 fetch_vector4( ctx, &inst->SrcReg[2], machine, dtdy );
1420 machine->FetchTexelDeriv(ctx, texcoord, dtdx, dtdy,
1421 inst->TexSrcUnit, color );
1422 store_vector4( inst, machine, color );
1423 }
1424 break;
1425 case OPCODE_TXP: /* GL_ARB_fragment_program only */
1426 /* Texture lookup w/ projective divide */
1427 {
1428 GLfloat texcoord[4], color[4], lambda;
1429 #if 0
1430 if (inst->SrcReg[0].File == PROGRAM_INPUT &&
1431 inst->SrcReg[0].Index == FRAG_ATTRIB_TEX0+inst->TexSrcUnit)
1432 lambda = span->array->lambda[inst->TexSrcUnit][column];
1433 else
1434 #endif
1435 lambda = 0.0;
1436 fetch_vector4(ctx, &inst->SrcReg[0], machine, texcoord);
1437 /* Not so sure about this test - if texcoord[3] is
1438 * zero, we'd probably be fine except for an ASSERT in
1439 * IROUND_POS() which gets triggered by the inf values created.
1440 */
1441 if (texcoord[3] != 0.0) {
1442 texcoord[0] /= texcoord[3];
1443 texcoord[1] /= texcoord[3];
1444 texcoord[2] /= texcoord[3];
1445 }
1446 machine->FetchTexelLod(ctx, texcoord, lambda,
1447 inst->TexSrcUnit, color);
1448 store_vector4( inst, machine, color );
1449 }
1450 break;
1451 case OPCODE_TXP_NV: /* GL_NV_fragment_program only */
1452 /* Texture lookup w/ projective divide */
1453 {
1454 GLfloat texcoord[4], color[4], lambda;
1455 #if 0
1456 if (inst->SrcReg[0].File == PROGRAM_INPUT &&
1457 inst->SrcReg[0].Index == FRAG_ATTRIB_TEX0+inst->TexSrcUnit)
1458 lambda = span->array->lambda[inst->TexSrcUnit][column];
1459 else
1460 #endif
1461 lambda = 0.0;
1462 fetch_vector4(ctx, &inst->SrcReg[0], machine, texcoord);
1463 if (inst->TexSrcTarget != TEXTURE_CUBE_INDEX &&
1464 texcoord[3] != 0.0) {
1465 texcoord[0] /= texcoord[3];
1466 texcoord[1] /= texcoord[3];
1467 texcoord[2] /= texcoord[3];
1468 }
1469 machine->FetchTexelLod(ctx, texcoord, lambda,
1470 inst->TexSrcUnit, color);
1471 store_vector4( inst, machine, color );
1472 }
1473 break;
1474 case OPCODE_UP2H: /* unpack two 16-bit floats */
1475 {
1476 GLfloat a[4], result[4];
1477 const GLuint *rawBits = (const GLuint *) a;
1478 GLhalfNV hx, hy;
1479 fetch_vector1( ctx, &inst->SrcReg[0], machine, a );
1480 hx = rawBits[0] & 0xffff;
1481 hy = rawBits[0] >> 16;
1482 result[0] = result[2] = _mesa_half_to_float(hx);
1483 result[1] = result[3] = _mesa_half_to_float(hy);
1484 store_vector4( inst, machine, result );
1485 }
1486 break;
1487 case OPCODE_UP2US: /* unpack two GLushorts */
1488 {
1489 GLfloat a[4], result[4];
1490 const GLuint *rawBits = (const GLuint *) a;
1491 GLushort usx, usy;
1492 fetch_vector1( ctx, &inst->SrcReg[0], machine, a );
1493 usx = rawBits[0] & 0xffff;
1494 usy = rawBits[0] >> 16;
1495 result[0] = result[2] = usx * (1.0f / 65535.0f);
1496 result[1] = result[3] = usy * (1.0f / 65535.0f);
1497 store_vector4( inst, machine, result );
1498 }
1499 break;
1500 case OPCODE_UP4B: /* unpack four GLbytes */
1501 {
1502 GLfloat a[4], result[4];
1503 const GLuint *rawBits = (const GLuint *) a;
1504 fetch_vector1( ctx, &inst->SrcReg[0], machine, a );
1505 result[0] = (((rawBits[0] >> 0) & 0xff) - 128) / 127.0F;
1506 result[1] = (((rawBits[0] >> 8) & 0xff) - 128) / 127.0F;
1507 result[2] = (((rawBits[0] >> 16) & 0xff) - 128) / 127.0F;
1508 result[3] = (((rawBits[0] >> 24) & 0xff) - 128) / 127.0F;
1509 store_vector4( inst, machine, result );
1510 }
1511 break;
1512 case OPCODE_UP4UB: /* unpack four GLubytes */
1513 {
1514 GLfloat a[4], result[4];
1515 const GLuint *rawBits = (const GLuint *) a;
1516 fetch_vector1( ctx, &inst->SrcReg[0], machine, a );
1517 result[0] = ((rawBits[0] >> 0) & 0xff) / 255.0F;
1518 result[1] = ((rawBits[0] >> 8) & 0xff) / 255.0F;
1519 result[2] = ((rawBits[0] >> 16) & 0xff) / 255.0F;
1520 result[3] = ((rawBits[0] >> 24) & 0xff) / 255.0F;
1521 store_vector4( inst, machine, result );
1522 }
1523 break;
1524 case OPCODE_XPD: /* cross product */
1525 {
1526 GLfloat a[4], b[4], result[4];
1527 fetch_vector4( ctx, &inst->SrcReg[0], machine, a );
1528 fetch_vector4( ctx, &inst->SrcReg[1], machine, b );
1529 result[0] = a[1] * b[2] - a[2] * b[1];
1530 result[1] = a[2] * b[0] - a[0] * b[2];
1531 result[2] = a[0] * b[1] - a[1] * b[0];
1532 result[3] = 1.0;
1533 store_vector4( inst, machine, result );
1534 }
1535 break;
1536 case OPCODE_X2D: /* 2-D matrix transform */
1537 {
1538 GLfloat a[4], b[4], c[4], result[4];
1539 fetch_vector4( ctx, &inst->SrcReg[0], machine, a );
1540 fetch_vector4( ctx, &inst->SrcReg[1], machine, b );
1541 fetch_vector4( ctx, &inst->SrcReg[2], machine, c );
1542 result[0] = a[0] + b[0] * c[0] + b[1] * c[1];
1543 result[1] = a[1] + b[0] * c[2] + b[1] * c[3];
1544 result[2] = a[2] + b[0] * c[0] + b[1] * c[1];
1545 result[3] = a[3] + b[0] * c[2] + b[1] * c[3];
1546 store_vector4( inst, machine, result );
1547 }
1548 break;
1549 case OPCODE_PRINT:
1550 {
1551 if (inst->SrcReg[0].File != -1) {
1552 GLfloat a[4];
1553 fetch_vector4( ctx, &inst->SrcReg[0], machine, a);
1554 _mesa_printf("%s%g, %g, %g, %g\n", (const char *) inst->Data,
1555 a[0], a[1], a[2], a[3]);
1556 }
1557 else {
1558 _mesa_printf("%s\n", (const char *) inst->Data);
1559 }
1560 }
1561 break;
1562 case OPCODE_END:
1563 return GL_TRUE;
1564 default:
1565 _mesa_problem(ctx, "Bad opcode %d in _mesa_exec_fragment_program",
1566 inst->Opcode);
1567 return GL_TRUE; /* return value doesn't matter */
1568
1569 }
1570 total++;
1571 if (total > MAX_EXEC) {
1572 _mesa_problem(ctx, "Infinite loop detected in fragment program");
1573 return GL_TRUE;
1574 abort();
1575 }
1576 }
1577
1578 #if FEATURE_MESA_program_debug
1579 CurrentMachine = NULL;
1580 #endif
1581
1582 return GL_TRUE;
1583 }