We need to call _mesa_load_state_parameters() whenever a fragment program
[mesa.git] / src / mesa / swrast / s_nvfragprog.c
1 /*
2 * Mesa 3-D graphics library
3 * Version: 6.5.2
4 *
5 * Copyright (C) 1999-2006 Brian Paul All Rights Reserved.
6 *
7 * Permission is hereby granted, free of charge, to any person obtaining a
8 * copy of this software and associated documentation files (the "Software"),
9 * to deal in the Software without restriction, including without limitation
10 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
11 * and/or sell copies of the Software, and to permit persons to whom the
12 * Software is furnished to do so, subject to the following conditions:
13 *
14 * The above copyright notice and this permission notice shall be included
15 * in all copies or substantial portions of the Software.
16 *
17 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
18 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
19 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
20 * BRIAN PAUL BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN
21 * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
22 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
23 */
24
25 /*
26 * Regarding GL_NV_fragment_program:
27 *
28 * Portions of this software may use or implement intellectual
29 * property owned and licensed by NVIDIA Corporation. NVIDIA disclaims
30 * any and all warranties with respect to such intellectual property,
31 * including any use thereof or modifications thereto.
32 */
33
34 #include "glheader.h"
35 #include "colormac.h"
36 #include "context.h"
37 #include "program_instruction.h"
38 #include "program.h"
39
40 #include "s_nvfragprog.h"
41 #include "s_span.h"
42
43
44 /* See comments below for info about this */
45 #define LAMBDA_ZERO 1
46
47 /* if 1, print some debugging info */
48 #define DEBUG_FRAG 0
49
50
51 /**
52 * Virtual machine state used during execution of a fragment programs.
53 */
54 struct fp_machine
55 {
56 GLfloat Temporaries[MAX_NV_FRAGMENT_PROGRAM_TEMPS][4];
57 GLfloat Inputs[MAX_NV_FRAGMENT_PROGRAM_INPUTS][4];
58 GLfloat Outputs[MAX_NV_FRAGMENT_PROGRAM_OUTPUTS][4];
59 GLuint CondCodes[4]; /**< COND_* value for x/y/z/w */
60 };
61
62
63 #if FEATURE_MESA_program_debug
64 static struct fp_machine *CurrentMachine = NULL;
65
66 /**
67 * For GL_MESA_program_debug.
68 * Return current value (4*GLfloat) of a fragment program register.
69 * Called via ctx->Driver.GetFragmentProgramRegister().
70 */
71 void
72 _swrast_get_program_register(GLcontext *ctx, enum register_file file,
73 GLuint index, GLfloat val[4])
74 {
75 if (CurrentMachine) {
76 switch (file) {
77 case PROGRAM_INPUT:
78 COPY_4V(val, CurrentMachine->Inputs[index]);
79 break;
80 case PROGRAM_OUTPUT:
81 COPY_4V(val, CurrentMachine->Outputs[index]);
82 break;
83 case PROGRAM_TEMPORARY:
84 COPY_4V(val, CurrentMachine->Temporaries[index]);
85 break;
86 default:
87 _mesa_problem(NULL,
88 "bad register file in _swrast_get_program_register");
89 }
90 }
91 }
92 #endif /* FEATURE_MESA_program_debug */
93
94
95 /**
96 * Fetch a texel.
97 */
98 static void
99 fetch_texel( GLcontext *ctx, const GLfloat texcoord[4], GLfloat lambda,
100 GLuint unit, GLfloat color[4] )
101 {
102 GLchan rgba[4];
103 SWcontext *swrast = SWRAST_CONTEXT(ctx);
104
105 /* XXX use a float-valued TextureSample routine here!!! */
106 swrast->TextureSample[unit](ctx, ctx->Texture.Unit[unit]._Current,
107 1, (const GLfloat (*)[4]) texcoord,
108 &lambda, &rgba);
109 color[0] = CHAN_TO_FLOAT(rgba[0]);
110 color[1] = CHAN_TO_FLOAT(rgba[1]);
111 color[2] = CHAN_TO_FLOAT(rgba[2]);
112 color[3] = CHAN_TO_FLOAT(rgba[3]);
113 }
114
115
116 /**
117 * Fetch a texel with the given partial derivatives to compute a level
118 * of detail in the mipmap.
119 */
120 static void
121 fetch_texel_deriv( GLcontext *ctx, const GLfloat texcoord[4],
122 const GLfloat texdx[4], const GLfloat texdy[4],
123 GLuint unit, GLfloat color[4] )
124 {
125 SWcontext *swrast = SWRAST_CONTEXT(ctx);
126 const struct gl_texture_object *texObj = ctx->Texture.Unit[unit]._Current;
127 const struct gl_texture_image *texImg = texObj->Image[0][texObj->BaseLevel];
128 const GLfloat texW = (GLfloat) texImg->WidthScale;
129 const GLfloat texH = (GLfloat) texImg->HeightScale;
130 GLchan rgba[4];
131
132 GLfloat lambda = _swrast_compute_lambda(texdx[0], texdy[0], /* ds/dx, ds/dy */
133 texdx[1], texdy[1], /* dt/dx, dt/dy */
134 texdx[3], texdy[2], /* dq/dx, dq/dy */
135 texW, texH,
136 texcoord[0], texcoord[1], texcoord[3],
137 1.0F / texcoord[3]);
138
139 swrast->TextureSample[unit](ctx, ctx->Texture.Unit[unit]._Current,
140 1, (const GLfloat (*)[4]) texcoord,
141 &lambda, &rgba);
142 color[0] = CHAN_TO_FLOAT(rgba[0]);
143 color[1] = CHAN_TO_FLOAT(rgba[1]);
144 color[2] = CHAN_TO_FLOAT(rgba[2]);
145 color[3] = CHAN_TO_FLOAT(rgba[3]);
146 }
147
148
149 /**
150 * Return a pointer to the 4-element float vector specified by the given
151 * source register.
152 */
153 static INLINE const GLfloat *
154 get_register_pointer( GLcontext *ctx,
155 const struct prog_src_register *source,
156 const struct fp_machine *machine,
157 const struct gl_fragment_program *program )
158 {
159 switch (source->File) {
160 case PROGRAM_TEMPORARY:
161 ASSERT(source->Index < MAX_NV_FRAGMENT_PROGRAM_TEMPS);
162 return machine->Temporaries[source->Index];
163 case PROGRAM_INPUT:
164 ASSERT(source->Index < MAX_NV_FRAGMENT_PROGRAM_INPUTS);
165 return machine->Inputs[source->Index];
166 case PROGRAM_OUTPUT:
167 /* This is only for PRINT */
168 ASSERT(source->Index < MAX_NV_FRAGMENT_PROGRAM_OUTPUTS);
169 return machine->Outputs[source->Index];
170 case PROGRAM_LOCAL_PARAM:
171 ASSERT(source->Index < MAX_PROGRAM_LOCAL_PARAMS);
172 return program->Base.LocalParams[source->Index];
173 case PROGRAM_ENV_PARAM:
174 ASSERT(source->Index < MAX_NV_FRAGMENT_PROGRAM_PARAMS);
175 return ctx->FragmentProgram.Parameters[source->Index];
176 case PROGRAM_STATE_VAR:
177 /* Fallthrough */
178 case PROGRAM_CONSTANT:
179 /* Fallthrough */
180 case PROGRAM_NAMED_PARAM:
181 ASSERT(source->Index < (GLint) program->Base.Parameters->NumParameters);
182 return program->Base.Parameters->ParameterValues[source->Index];
183 default:
184 _mesa_problem(ctx, "Invalid input register file %d in fp "
185 "get_register_pointer", source->File);
186 return NULL;
187 }
188 }
189
190
191 /**
192 * Fetch a 4-element float vector from the given source register.
193 * Apply swizzling and negating as needed.
194 */
195 static void
196 fetch_vector4( GLcontext *ctx,
197 const struct prog_src_register *source,
198 const struct fp_machine *machine,
199 const struct gl_fragment_program *program,
200 GLfloat result[4] )
201 {
202 const GLfloat *src = get_register_pointer(ctx, source, machine, program);
203 ASSERT(src);
204
205 if (source->Swizzle == MAKE_SWIZZLE4(SWIZZLE_X, SWIZZLE_Y,
206 SWIZZLE_Z, SWIZZLE_W)) {
207 /* no swizzling */
208 COPY_4V(result, src);
209 }
210 else {
211 result[0] = src[GET_SWZ(source->Swizzle, 0)];
212 result[1] = src[GET_SWZ(source->Swizzle, 1)];
213 result[2] = src[GET_SWZ(source->Swizzle, 2)];
214 result[3] = src[GET_SWZ(source->Swizzle, 3)];
215 }
216
217 if (source->NegateBase) {
218 result[0] = -result[0];
219 result[1] = -result[1];
220 result[2] = -result[2];
221 result[3] = -result[3];
222 }
223 if (source->Abs) {
224 result[0] = FABSF(result[0]);
225 result[1] = FABSF(result[1]);
226 result[2] = FABSF(result[2]);
227 result[3] = FABSF(result[3]);
228 }
229 if (source->NegateAbs) {
230 result[0] = -result[0];
231 result[1] = -result[1];
232 result[2] = -result[2];
233 result[3] = -result[3];
234 }
235 }
236
237
238 /**
239 * Fetch the derivative with respect to X for the given register.
240 * \return GL_TRUE if it was easily computed or GL_FALSE if we
241 * need to execute another instance of the program (ugh)!
242 */
243 static GLboolean
244 fetch_vector4_deriv( GLcontext *ctx,
245 const struct prog_src_register *source,
246 const SWspan *span,
247 char xOrY, GLint column, GLfloat result[4] )
248 {
249 GLfloat src[4];
250
251 ASSERT(xOrY == 'X' || xOrY == 'Y');
252
253 switch (source->Index) {
254 case FRAG_ATTRIB_WPOS:
255 if (xOrY == 'X') {
256 src[0] = 1.0;
257 src[1] = 0.0;
258 src[2] = span->dzdx / ctx->DrawBuffer->_DepthMaxF;
259 src[3] = span->dwdx;
260 }
261 else {
262 src[0] = 0.0;
263 src[1] = 1.0;
264 src[2] = span->dzdy / ctx->DrawBuffer->_DepthMaxF;
265 src[3] = span->dwdy;
266 }
267 break;
268 case FRAG_ATTRIB_COL0:
269 if (xOrY == 'X') {
270 src[0] = span->drdx * (1.0F / CHAN_MAXF);
271 src[1] = span->dgdx * (1.0F / CHAN_MAXF);
272 src[2] = span->dbdx * (1.0F / CHAN_MAXF);
273 src[3] = span->dadx * (1.0F / CHAN_MAXF);
274 }
275 else {
276 src[0] = span->drdy * (1.0F / CHAN_MAXF);
277 src[1] = span->dgdy * (1.0F / CHAN_MAXF);
278 src[2] = span->dbdy * (1.0F / CHAN_MAXF);
279 src[3] = span->dady * (1.0F / CHAN_MAXF);
280 }
281 break;
282 case FRAG_ATTRIB_COL1:
283 if (xOrY == 'X') {
284 src[0] = span->dsrdx * (1.0F / CHAN_MAXF);
285 src[1] = span->dsgdx * (1.0F / CHAN_MAXF);
286 src[2] = span->dsbdx * (1.0F / CHAN_MAXF);
287 src[3] = 0.0; /* XXX need this */
288 }
289 else {
290 src[0] = span->dsrdy * (1.0F / CHAN_MAXF);
291 src[1] = span->dsgdy * (1.0F / CHAN_MAXF);
292 src[2] = span->dsbdy * (1.0F / CHAN_MAXF);
293 src[3] = 0.0; /* XXX need this */
294 }
295 break;
296 case FRAG_ATTRIB_FOGC:
297 if (xOrY == 'X') {
298 src[0] = span->dfogdx;
299 src[1] = 0.0;
300 src[2] = 0.0;
301 src[3] = 0.0;
302 }
303 else {
304 src[0] = span->dfogdy;
305 src[1] = 0.0;
306 src[2] = 0.0;
307 src[3] = 0.0;
308 }
309 break;
310 case FRAG_ATTRIB_TEX0:
311 case FRAG_ATTRIB_TEX1:
312 case FRAG_ATTRIB_TEX2:
313 case FRAG_ATTRIB_TEX3:
314 case FRAG_ATTRIB_TEX4:
315 case FRAG_ATTRIB_TEX5:
316 case FRAG_ATTRIB_TEX6:
317 case FRAG_ATTRIB_TEX7:
318 if (xOrY == 'X') {
319 const GLuint u = source->Index - FRAG_ATTRIB_TEX0;
320 /* this is a little tricky - I think I've got it right */
321 const GLfloat invQ = 1.0f / (span->tex[u][3]
322 + span->texStepX[u][3] * column);
323 src[0] = span->texStepX[u][0] * invQ;
324 src[1] = span->texStepX[u][1] * invQ;
325 src[2] = span->texStepX[u][2] * invQ;
326 src[3] = span->texStepX[u][3] * invQ;
327 }
328 else {
329 const GLuint u = source->Index - FRAG_ATTRIB_TEX0;
330 /* Tricky, as above, but in Y direction */
331 const GLfloat invQ = 1.0f / (span->tex[u][3] + span->texStepY[u][3]);
332 src[0] = span->texStepY[u][0] * invQ;
333 src[1] = span->texStepY[u][1] * invQ;
334 src[2] = span->texStepY[u][2] * invQ;
335 src[3] = span->texStepY[u][3] * invQ;
336 }
337 break;
338 default:
339 return GL_FALSE;
340 }
341
342 result[0] = src[GET_SWZ(source->Swizzle, 0)];
343 result[1] = src[GET_SWZ(source->Swizzle, 1)];
344 result[2] = src[GET_SWZ(source->Swizzle, 2)];
345 result[3] = src[GET_SWZ(source->Swizzle, 3)];
346
347 if (source->NegateBase) {
348 result[0] = -result[0];
349 result[1] = -result[1];
350 result[2] = -result[2];
351 result[3] = -result[3];
352 }
353 if (source->Abs) {
354 result[0] = FABSF(result[0]);
355 result[1] = FABSF(result[1]);
356 result[2] = FABSF(result[2]);
357 result[3] = FABSF(result[3]);
358 }
359 if (source->NegateAbs) {
360 result[0] = -result[0];
361 result[1] = -result[1];
362 result[2] = -result[2];
363 result[3] = -result[3];
364 }
365 return GL_TRUE;
366 }
367
368
369 /**
370 * As above, but only return result[0] element.
371 */
372 static void
373 fetch_vector1( GLcontext *ctx,
374 const struct prog_src_register *source,
375 const struct fp_machine *machine,
376 const struct gl_fragment_program *program,
377 GLfloat result[4] )
378 {
379 const GLfloat *src = get_register_pointer(ctx, source, machine, program);
380 ASSERT(src);
381
382 result[0] = src[GET_SWZ(source->Swizzle, 0)];
383
384 if (source->NegateBase) {
385 result[0] = -result[0];
386 }
387 if (source->Abs) {
388 result[0] = FABSF(result[0]);
389 }
390 if (source->NegateAbs) {
391 result[0] = -result[0];
392 }
393 }
394
395
396 /**
397 * Test value against zero and return GT, LT, EQ or UN if NaN.
398 */
399 static INLINE GLuint
400 generate_cc( float value )
401 {
402 if (value != value)
403 return COND_UN; /* NaN */
404 if (value > 0.0F)
405 return COND_GT;
406 if (value < 0.0F)
407 return COND_LT;
408 return COND_EQ;
409 }
410
411
412 /**
413 * Test if the ccMaskRule is satisfied by the given condition code.
414 * Used to mask destination writes according to the current condition code.
415 */
416 static INLINE GLboolean
417 test_cc(GLuint condCode, GLuint ccMaskRule)
418 {
419 switch (ccMaskRule) {
420 case COND_EQ: return (condCode == COND_EQ);
421 case COND_NE: return (condCode != COND_EQ);
422 case COND_LT: return (condCode == COND_LT);
423 case COND_GE: return (condCode == COND_GT || condCode == COND_EQ);
424 case COND_LE: return (condCode == COND_LT || condCode == COND_EQ);
425 case COND_GT: return (condCode == COND_GT);
426 case COND_TR: return GL_TRUE;
427 case COND_FL: return GL_FALSE;
428 default: return GL_TRUE;
429 }
430 }
431
432
433 /**
434 * Store 4 floats into a register. Observe the instructions saturate and
435 * set-condition-code flags.
436 */
437 static void
438 store_vector4( const struct prog_instruction *inst,
439 struct fp_machine *machine,
440 const GLfloat value[4] )
441 {
442 const struct prog_dst_register *dest = &(inst->DstReg);
443 const GLboolean clamp = inst->SaturateMode == SATURATE_ZERO_ONE;
444 GLfloat *dstReg;
445 GLfloat dummyReg[4];
446 GLfloat clampedValue[4];
447 GLuint writeMask = dest->WriteMask;
448
449 switch (dest->File) {
450 case PROGRAM_OUTPUT:
451 dstReg = machine->Outputs[dest->Index];
452 break;
453 case PROGRAM_TEMPORARY:
454 dstReg = machine->Temporaries[dest->Index];
455 break;
456 case PROGRAM_WRITE_ONLY:
457 dstReg = dummyReg;
458 return;
459 default:
460 _mesa_problem(NULL, "bad register file in store_vector4(fp)");
461 return;
462 }
463
464 #if DEBUG_FRAG
465 if (value[0] > 1.0e10 ||
466 IS_INF_OR_NAN(value[0]) ||
467 IS_INF_OR_NAN(value[1]) ||
468 IS_INF_OR_NAN(value[2]) ||
469 IS_INF_OR_NAN(value[3]) )
470 printf("store %g %g %g %g\n", value[0], value[1], value[2], value[3]);
471 #endif
472
473 if (clamp) {
474 clampedValue[0] = CLAMP(value[0], 0.0F, 1.0F);
475 clampedValue[1] = CLAMP(value[1], 0.0F, 1.0F);
476 clampedValue[2] = CLAMP(value[2], 0.0F, 1.0F);
477 clampedValue[3] = CLAMP(value[3], 0.0F, 1.0F);
478 value = clampedValue;
479 }
480
481 if (dest->CondMask != COND_TR) {
482 /* condition codes may turn off some writes */
483 if (writeMask & WRITEMASK_X) {
484 if (!test_cc(machine->CondCodes[GET_SWZ(dest->CondSwizzle, 0)],
485 dest->CondMask))
486 writeMask &= ~WRITEMASK_X;
487 }
488 if (writeMask & WRITEMASK_Y) {
489 if (!test_cc(machine->CondCodes[GET_SWZ(dest->CondSwizzle, 1)],
490 dest->CondMask))
491 writeMask &= ~WRITEMASK_Y;
492 }
493 if (writeMask & WRITEMASK_Z) {
494 if (!test_cc(machine->CondCodes[GET_SWZ(dest->CondSwizzle, 2)],
495 dest->CondMask))
496 writeMask &= ~WRITEMASK_Z;
497 }
498 if (writeMask & WRITEMASK_W) {
499 if (!test_cc(machine->CondCodes[GET_SWZ(dest->CondSwizzle, 3)],
500 dest->CondMask))
501 writeMask &= ~WRITEMASK_W;
502 }
503 }
504
505 if (writeMask & WRITEMASK_X)
506 dstReg[0] = value[0];
507 if (writeMask & WRITEMASK_Y)
508 dstReg[1] = value[1];
509 if (writeMask & WRITEMASK_Z)
510 dstReg[2] = value[2];
511 if (writeMask & WRITEMASK_W)
512 dstReg[3] = value[3];
513
514 if (inst->CondUpdate) {
515 if (writeMask & WRITEMASK_X)
516 machine->CondCodes[0] = generate_cc(value[0]);
517 if (writeMask & WRITEMASK_Y)
518 machine->CondCodes[1] = generate_cc(value[1]);
519 if (writeMask & WRITEMASK_Z)
520 machine->CondCodes[2] = generate_cc(value[2]);
521 if (writeMask & WRITEMASK_W)
522 machine->CondCodes[3] = generate_cc(value[3]);
523 }
524 }
525
526
527 /**
528 * Initialize a new machine state instance from an existing one, adding
529 * the partial derivatives onto the input registers.
530 * Used to implement DDX and DDY instructions in non-trivial cases.
531 */
532 static void
533 init_machine_deriv( GLcontext *ctx,
534 const struct fp_machine *machine,
535 const struct gl_fragment_program *program,
536 const SWspan *span, char xOrY,
537 struct fp_machine *dMachine )
538 {
539 GLuint u;
540
541 ASSERT(xOrY == 'X' || xOrY == 'Y');
542
543 /* copy existing machine */
544 _mesa_memcpy(dMachine, machine, sizeof(struct fp_machine));
545
546 if (program->Base.Target == GL_FRAGMENT_PROGRAM_NV) {
547 /* Clear temporary registers (undefined for ARB_f_p) */
548 _mesa_bzero( (void*) machine->Temporaries,
549 MAX_NV_FRAGMENT_PROGRAM_TEMPS * 4 * sizeof(GLfloat));
550 }
551
552 /* Add derivatives */
553 if (program->Base.InputsRead & (1 << FRAG_ATTRIB_WPOS)) {
554 GLfloat *wpos = (GLfloat*) machine->Inputs[FRAG_ATTRIB_WPOS];
555 if (xOrY == 'X') {
556 wpos[0] += 1.0F;
557 wpos[1] += 0.0F;
558 wpos[2] += span->dzdx;
559 wpos[3] += span->dwdx;
560 }
561 else {
562 wpos[0] += 0.0F;
563 wpos[1] += 1.0F;
564 wpos[2] += span->dzdy;
565 wpos[3] += span->dwdy;
566 }
567 }
568 if (program->Base.InputsRead & (1 << FRAG_ATTRIB_COL0)) {
569 GLfloat *col0 = (GLfloat*) machine->Inputs[FRAG_ATTRIB_COL0];
570 if (xOrY == 'X') {
571 col0[0] += span->drdx * (1.0F / CHAN_MAXF);
572 col0[1] += span->dgdx * (1.0F / CHAN_MAXF);
573 col0[2] += span->dbdx * (1.0F / CHAN_MAXF);
574 col0[3] += span->dadx * (1.0F / CHAN_MAXF);
575 }
576 else {
577 col0[0] += span->drdy * (1.0F / CHAN_MAXF);
578 col0[1] += span->dgdy * (1.0F / CHAN_MAXF);
579 col0[2] += span->dbdy * (1.0F / CHAN_MAXF);
580 col0[3] += span->dady * (1.0F / CHAN_MAXF);
581 }
582 }
583 if (program->Base.InputsRead & (1 << FRAG_ATTRIB_COL1)) {
584 GLfloat *col1 = (GLfloat*) machine->Inputs[FRAG_ATTRIB_COL1];
585 if (xOrY == 'X') {
586 col1[0] += span->dsrdx * (1.0F / CHAN_MAXF);
587 col1[1] += span->dsgdx * (1.0F / CHAN_MAXF);
588 col1[2] += span->dsbdx * (1.0F / CHAN_MAXF);
589 col1[3] += 0.0; /*XXX fix */
590 }
591 else {
592 col1[0] += span->dsrdy * (1.0F / CHAN_MAXF);
593 col1[1] += span->dsgdy * (1.0F / CHAN_MAXF);
594 col1[2] += span->dsbdy * (1.0F / CHAN_MAXF);
595 col1[3] += 0.0; /*XXX fix */
596 }
597 }
598 if (program->Base.InputsRead & (1 << FRAG_ATTRIB_FOGC)) {
599 GLfloat *fogc = (GLfloat*) machine->Inputs[FRAG_ATTRIB_FOGC];
600 if (xOrY == 'X') {
601 fogc[0] += span->dfogdx;
602 }
603 else {
604 fogc[0] += span->dfogdy;
605 }
606 }
607 for (u = 0; u < ctx->Const.MaxTextureCoordUnits; u++) {
608 if (program->Base.InputsRead & (1 << (FRAG_ATTRIB_TEX0 + u))) {
609 GLfloat *tex = (GLfloat*) machine->Inputs[FRAG_ATTRIB_TEX0 + u];
610 /* XXX perspective-correct interpolation */
611 if (xOrY == 'X') {
612 tex[0] += span->texStepX[u][0];
613 tex[1] += span->texStepX[u][1];
614 tex[2] += span->texStepX[u][2];
615 tex[3] += span->texStepX[u][3];
616 }
617 else {
618 tex[0] += span->texStepY[u][0];
619 tex[1] += span->texStepY[u][1];
620 tex[2] += span->texStepY[u][2];
621 tex[3] += span->texStepY[u][3];
622 }
623 }
624 }
625
626 /* init condition codes */
627 dMachine->CondCodes[0] = COND_EQ;
628 dMachine->CondCodes[1] = COND_EQ;
629 dMachine->CondCodes[2] = COND_EQ;
630 dMachine->CondCodes[3] = COND_EQ;
631 }
632
633
634 /**
635 * Execute the given vertex program.
636 * NOTE: we do everything in single-precision floating point; we don't
637 * currently observe the single/half/fixed-precision qualifiers.
638 * \param ctx - rendering context
639 * \param program - the fragment program to execute
640 * \param machine - machine state (register file)
641 * \param maxInst - max number of instructions to execute
642 * \return GL_TRUE if program completed or GL_FALSE if program executed KIL.
643 */
644 static GLboolean
645 execute_program( GLcontext *ctx,
646 const struct gl_fragment_program *program, GLuint maxInst,
647 struct fp_machine *machine, const SWspan *span,
648 GLuint column )
649 {
650 GLuint pc;
651
652 #if DEBUG_FRAG
653 printf("execute fragment program --------------------\n");
654 #endif
655
656 for (pc = 0; pc < maxInst; pc++) {
657 const struct prog_instruction *inst = program->Base.Instructions + pc;
658
659 if (ctx->FragmentProgram.CallbackEnabled &&
660 ctx->FragmentProgram.Callback) {
661 ctx->FragmentProgram.CurrentPosition = inst->StringPos;
662 ctx->FragmentProgram.Callback(program->Base.Target,
663 ctx->FragmentProgram.CallbackData);
664 }
665
666 #if DEBUG_FRAG
667 _mesa_print_instruction(inst);
668 #endif
669 switch (inst->Opcode) {
670 case OPCODE_ABS:
671 {
672 GLfloat a[4], result[4];
673 fetch_vector4( ctx, &inst->SrcReg[0], machine, program, a );
674 result[0] = FABSF(a[0]);
675 result[1] = FABSF(a[1]);
676 result[2] = FABSF(a[2]);
677 result[3] = FABSF(a[3]);
678 store_vector4( inst, machine, result );
679 }
680 break;
681 case OPCODE_ADD:
682 {
683 GLfloat a[4], b[4], result[4];
684 fetch_vector4( ctx, &inst->SrcReg[0], machine, program, a );
685 fetch_vector4( ctx, &inst->SrcReg[1], machine, program, b );
686 result[0] = a[0] + b[0];
687 result[1] = a[1] + b[1];
688 result[2] = a[2] + b[2];
689 result[3] = a[3] + b[3];
690 store_vector4( inst, machine, result );
691 #if DEBUG_FRAG
692 printf("ADD (%g %g %g %g) = (%g %g %g %g) + (%g %g %g %g)\n",
693 result[0], result[1], result[2], result[3],
694 a[0], a[1], a[2], a[3],
695 b[0], b[1], b[2], b[3]);
696 #endif
697 }
698 break;
699 case OPCODE_CMP:
700 {
701 GLfloat a[4], b[4], c[4], result[4];
702 fetch_vector4( ctx, &inst->SrcReg[0], machine, program, a );
703 fetch_vector4( ctx, &inst->SrcReg[1], machine, program, b );
704 fetch_vector4( ctx, &inst->SrcReg[2], machine, program, c );
705 result[0] = a[0] < 0.0F ? b[0] : c[0];
706 result[1] = a[1] < 0.0F ? b[1] : c[1];
707 result[2] = a[2] < 0.0F ? b[2] : c[2];
708 result[3] = a[3] < 0.0F ? b[3] : c[3];
709 store_vector4( inst, machine, result );
710 }
711 break;
712 case OPCODE_COS:
713 {
714 GLfloat a[4], result[4];
715 fetch_vector1( ctx, &inst->SrcReg[0], machine, program, a );
716 result[0] = result[1] = result[2] = result[3]
717 = (GLfloat) _mesa_cos(a[0]);
718 store_vector4( inst, machine, result );
719 }
720 break;
721 case OPCODE_DDX: /* Partial derivative with respect to X */
722 {
723 GLfloat a[4], aNext[4], result[4];
724 struct fp_machine dMachine;
725 if (!fetch_vector4_deriv(ctx, &inst->SrcReg[0], span, 'X',
726 column, result)) {
727 /* This is tricky. Make a copy of the current machine state,
728 * increment the input registers by the dx or dy partial
729 * derivatives, then re-execute the program up to the
730 * preceeding instruction, then fetch the source register.
731 * Finally, find the difference in the register values for
732 * the original and derivative runs.
733 */
734 fetch_vector4( ctx, &inst->SrcReg[0], machine, program, a);
735 init_machine_deriv(ctx, machine, program, span,
736 'X', &dMachine);
737 execute_program(ctx, program, pc, &dMachine, span, column);
738 fetch_vector4( ctx, &inst->SrcReg[0], &dMachine, program, aNext );
739 result[0] = aNext[0] - a[0];
740 result[1] = aNext[1] - a[1];
741 result[2] = aNext[2] - a[2];
742 result[3] = aNext[3] - a[3];
743 }
744 store_vector4( inst, machine, result );
745 }
746 break;
747 case OPCODE_DDY: /* Partial derivative with respect to Y */
748 {
749 GLfloat a[4], aNext[4], result[4];
750 struct fp_machine dMachine;
751 if (!fetch_vector4_deriv(ctx, &inst->SrcReg[0], span, 'Y',
752 column, result)) {
753 init_machine_deriv(ctx, machine, program, span,
754 'Y', &dMachine);
755 fetch_vector4( ctx, &inst->SrcReg[0], machine, program, a);
756 execute_program(ctx, program, pc, &dMachine, span, column);
757 fetch_vector4( ctx, &inst->SrcReg[0], &dMachine, program, aNext );
758 result[0] = aNext[0] - a[0];
759 result[1] = aNext[1] - a[1];
760 result[2] = aNext[2] - a[2];
761 result[3] = aNext[3] - a[3];
762 }
763 store_vector4( inst, machine, result );
764 }
765 break;
766 case OPCODE_DP3:
767 {
768 GLfloat a[4], b[4], result[4];
769 fetch_vector4( ctx, &inst->SrcReg[0], machine, program, a );
770 fetch_vector4( ctx, &inst->SrcReg[1], machine, program, b );
771 result[0] = result[1] = result[2] = result[3] = DOT3(a, b);
772 store_vector4( inst, machine, result );
773 #if DEBUG_FRAG
774 printf("DP3 %g = (%g %g %g) . (%g %g %g)\n",
775 result[0], a[0], a[1], a[2], b[0], b[1], b[2]);
776 #endif
777 }
778 break;
779 case OPCODE_DP4:
780 {
781 GLfloat a[4], b[4], result[4];
782 fetch_vector4( ctx, &inst->SrcReg[0], machine, program, a );
783 fetch_vector4( ctx, &inst->SrcReg[1], machine, program, b );
784 result[0] = result[1] = result[2] = result[3] = DOT4(a,b);
785 store_vector4( inst, machine, result );
786 #if DEBUG_FRAG
787 printf("DP4 %g = (%g, %g %g %g) . (%g, %g %g %g)\n",
788 result[0], a[0], a[1], a[2], a[3], b[0], b[1], b[2], b[3]);
789 #endif
790 }
791 break;
792 case OPCODE_DPH:
793 {
794 GLfloat a[4], b[4], result[4];
795 fetch_vector4( ctx, &inst->SrcReg[0], machine, program, a );
796 fetch_vector4( ctx, &inst->SrcReg[1], machine, program, b );
797 result[0] = result[1] = result[2] = result[3] =
798 a[0] * b[0] + a[1] * b[1] + a[2] * b[2] + b[3];
799 store_vector4( inst, machine, result );
800 }
801 break;
802 case OPCODE_DST: /* Distance vector */
803 {
804 GLfloat a[4], b[4], result[4];
805 fetch_vector4( ctx, &inst->SrcReg[0], machine, program, a );
806 fetch_vector4( ctx, &inst->SrcReg[1], machine, program, b );
807 result[0] = 1.0F;
808 result[1] = a[1] * b[1];
809 result[2] = a[2];
810 result[3] = b[3];
811 store_vector4( inst, machine, result );
812 }
813 break;
814 case OPCODE_EX2: /* Exponential base 2 */
815 {
816 GLfloat a[4], result[4];
817 fetch_vector1( ctx, &inst->SrcReg[0], machine, program, a );
818 result[0] = result[1] = result[2] = result[3] =
819 (GLfloat) _mesa_pow(2.0, a[0]);
820 store_vector4( inst, machine, result );
821 }
822 break;
823 case OPCODE_FLR:
824 {
825 GLfloat a[4], result[4];
826 fetch_vector4( ctx, &inst->SrcReg[0], machine, program, a );
827 result[0] = FLOORF(a[0]);
828 result[1] = FLOORF(a[1]);
829 result[2] = FLOORF(a[2]);
830 result[3] = FLOORF(a[3]);
831 store_vector4( inst, machine, result );
832 }
833 break;
834 case OPCODE_FRC:
835 {
836 GLfloat a[4], result[4];
837 fetch_vector4( ctx, &inst->SrcReg[0], machine, program, a );
838 result[0] = a[0] - FLOORF(a[0]);
839 result[1] = a[1] - FLOORF(a[1]);
840 result[2] = a[2] - FLOORF(a[2]);
841 result[3] = a[3] - FLOORF(a[3]);
842 store_vector4( inst, machine, result );
843 }
844 break;
845 case OPCODE_KIL_NV: /* NV_f_p only */
846 {
847 const GLuint swizzle = inst->DstReg.CondSwizzle;
848 const GLuint condMask = inst->DstReg.CondMask;
849 if (test_cc(machine->CondCodes[GET_SWZ(swizzle, 0)], condMask) ||
850 test_cc(machine->CondCodes[GET_SWZ(swizzle, 1)], condMask) ||
851 test_cc(machine->CondCodes[GET_SWZ(swizzle, 2)], condMask) ||
852 test_cc(machine->CondCodes[GET_SWZ(swizzle, 3)], condMask)) {
853 return GL_FALSE;
854 }
855 }
856 break;
857 case OPCODE_KIL: /* ARB_f_p only */
858 {
859 GLfloat a[4];
860 fetch_vector4( ctx, &inst->SrcReg[0], machine, program, a );
861 if (a[0] < 0.0F || a[1] < 0.0F || a[2] < 0.0F || a[3] < 0.0F) {
862 return GL_FALSE;
863 }
864 }
865 break;
866 case OPCODE_LG2: /* log base 2 */
867 {
868 GLfloat a[4], result[4];
869 fetch_vector1( ctx, &inst->SrcReg[0], machine, program, a );
870 result[0] = result[1] = result[2] = result[3] = LOG2(a[0]);
871 store_vector4( inst, machine, result );
872 }
873 break;
874 case OPCODE_LIT:
875 {
876 const GLfloat epsilon = 1.0F / 256.0F; /* from NV VP spec */
877 GLfloat a[4], result[4];
878 fetch_vector4( ctx, &inst->SrcReg[0], machine, program, a );
879 a[0] = MAX2(a[0], 0.0F);
880 a[1] = MAX2(a[1], 0.0F);
881 /* XXX ARB version clamps a[3], NV version doesn't */
882 a[3] = CLAMP(a[3], -(128.0F - epsilon), (128.0F - epsilon));
883 result[0] = 1.0F;
884 result[1] = a[0];
885 /* XXX we could probably just use pow() here */
886 if (a[0] > 0.0F) {
887 if (a[1] == 0.0 && a[3] == 0.0)
888 result[2] = 1.0;
889 else
890 result[2] = EXPF(a[3] * LOGF(a[1]));
891 }
892 else {
893 result[2] = 0.0;
894 }
895 result[3] = 1.0F;
896 store_vector4( inst, machine, result );
897 #if DEBUG_FRAG
898 printf("LIT (%g %g %g %g) : (%g %g %g %g)\n",
899 result[0], result[1], result[2], result[3],
900 a[0], a[1], a[2], a[3]);
901 #endif
902 }
903 break;
904 case OPCODE_LRP:
905 {
906 GLfloat a[4], b[4], c[4], result[4];
907 fetch_vector4( ctx, &inst->SrcReg[0], machine, program, a );
908 fetch_vector4( ctx, &inst->SrcReg[1], machine, program, b );
909 fetch_vector4( ctx, &inst->SrcReg[2], machine, program, c );
910 result[0] = a[0] * b[0] + (1.0F - a[0]) * c[0];
911 result[1] = a[1] * b[1] + (1.0F - a[1]) * c[1];
912 result[2] = a[2] * b[2] + (1.0F - a[2]) * c[2];
913 result[3] = a[3] * b[3] + (1.0F - a[3]) * c[3];
914 store_vector4( inst, machine, result );
915 #if DEBUG_FRAG
916 printf("LRP (%g %g %g %g) = (%g %g %g %g), "
917 "(%g %g %g %g), (%g %g %g %g)\n",
918 result[0], result[1], result[2], result[3],
919 a[0], a[1], a[2], a[3],
920 b[0], b[1], b[2], b[3],
921 c[0], c[1], c[2], c[3]);
922 #endif
923 }
924 break;
925 case OPCODE_MAD:
926 {
927 GLfloat a[4], b[4], c[4], result[4];
928 fetch_vector4( ctx, &inst->SrcReg[0], machine, program, a );
929 fetch_vector4( ctx, &inst->SrcReg[1], machine, program, b );
930 fetch_vector4( ctx, &inst->SrcReg[2], machine, program, c );
931 result[0] = a[0] * b[0] + c[0];
932 result[1] = a[1] * b[1] + c[1];
933 result[2] = a[2] * b[2] + c[2];
934 result[3] = a[3] * b[3] + c[3];
935 store_vector4( inst, machine, result );
936 #if DEBUG_FRAG
937 printf("MAD (%g %g %g %g) = (%g %g %g %g) * "
938 "(%g %g %g %g) + (%g %g %g %g)\n",
939 result[0], result[1], result[2], result[3],
940 a[0], a[1], a[2], a[3],
941 b[0], b[1], b[2], b[3],
942 c[0], c[1], c[2], c[3]);
943 #endif
944 }
945 break;
946 case OPCODE_MAX:
947 {
948 GLfloat a[4], b[4], result[4];
949 fetch_vector4( ctx, &inst->SrcReg[0], machine, program, a );
950 fetch_vector4( ctx, &inst->SrcReg[1], machine, program, b );
951 result[0] = MAX2(a[0], b[0]);
952 result[1] = MAX2(a[1], b[1]);
953 result[2] = MAX2(a[2], b[2]);
954 result[3] = MAX2(a[3], b[3]);
955 store_vector4( inst, machine, result );
956 #if DEBUG_FRAG
957 printf("MAX (%g %g %g %g) = (%g %g %g %g), (%g %g %g %g)\n",
958 result[0], result[1], result[2], result[3],
959 a[0], a[1], a[2], a[3],
960 b[0], b[1], b[2], b[3]);
961 #endif
962 }
963 break;
964 case OPCODE_MIN:
965 {
966 GLfloat a[4], b[4], result[4];
967 fetch_vector4( ctx, &inst->SrcReg[0], machine, program, a );
968 fetch_vector4( ctx, &inst->SrcReg[1], machine, program, b );
969 result[0] = MIN2(a[0], b[0]);
970 result[1] = MIN2(a[1], b[1]);
971 result[2] = MIN2(a[2], b[2]);
972 result[3] = MIN2(a[3], b[3]);
973 store_vector4( inst, machine, result );
974 }
975 break;
976 case OPCODE_MOV:
977 {
978 GLfloat result[4];
979 fetch_vector4( ctx, &inst->SrcReg[0], machine, program, result );
980 store_vector4( inst, machine, result );
981 #if DEBUG_FRAG
982 printf("MOV (%g %g %g %g)\n",
983 result[0], result[1], result[2], result[3]);
984 #endif
985 }
986 break;
987 case OPCODE_MUL:
988 {
989 GLfloat a[4], b[4], result[4];
990 fetch_vector4( ctx, &inst->SrcReg[0], machine, program, a );
991 fetch_vector4( ctx, &inst->SrcReg[1], machine, program, b );
992 result[0] = a[0] * b[0];
993 result[1] = a[1] * b[1];
994 result[2] = a[2] * b[2];
995 result[3] = a[3] * b[3];
996 store_vector4( inst, machine, result );
997 #if DEBUG_FRAG
998 printf("MUL (%g %g %g %g) = (%g %g %g %g) * (%g %g %g %g)\n",
999 result[0], result[1], result[2], result[3],
1000 a[0], a[1], a[2], a[3],
1001 b[0], b[1], b[2], b[3]);
1002 #endif
1003 }
1004 break;
1005 case OPCODE_PK2H: /* pack two 16-bit floats in one 32-bit float */
1006 {
1007 GLfloat a[4], result[4];
1008 GLhalfNV hx, hy;
1009 GLuint *rawResult = (GLuint *) result;
1010 GLuint twoHalves;
1011 fetch_vector4( ctx, &inst->SrcReg[0], machine, program, a );
1012 hx = _mesa_float_to_half(a[0]);
1013 hy = _mesa_float_to_half(a[1]);
1014 twoHalves = hx | (hy << 16);
1015 rawResult[0] = rawResult[1] = rawResult[2] = rawResult[3]
1016 = twoHalves;
1017 store_vector4( inst, machine, result );
1018 }
1019 break;
1020 case OPCODE_PK2US: /* pack two GLushorts into one 32-bit float */
1021 {
1022 GLfloat a[4], result[4];
1023 GLuint usx, usy, *rawResult = (GLuint *) result;
1024 fetch_vector4( ctx, &inst->SrcReg[0], machine, program, a );
1025 a[0] = CLAMP(a[0], 0.0F, 1.0F);
1026 a[1] = CLAMP(a[1], 0.0F, 1.0F);
1027 usx = IROUND(a[0] * 65535.0F);
1028 usy = IROUND(a[1] * 65535.0F);
1029 rawResult[0] = rawResult[1] = rawResult[2] = rawResult[3]
1030 = usx | (usy << 16);
1031 store_vector4( inst, machine, result );
1032 }
1033 break;
1034 case OPCODE_PK4B: /* pack four GLbytes into one 32-bit float */
1035 {
1036 GLfloat a[4], result[4];
1037 GLuint ubx, uby, ubz, ubw, *rawResult = (GLuint *) result;
1038 fetch_vector4( ctx, &inst->SrcReg[0], machine, program, a );
1039 a[0] = CLAMP(a[0], -128.0F / 127.0F, 1.0F);
1040 a[1] = CLAMP(a[1], -128.0F / 127.0F, 1.0F);
1041 a[2] = CLAMP(a[2], -128.0F / 127.0F, 1.0F);
1042 a[3] = CLAMP(a[3], -128.0F / 127.0F, 1.0F);
1043 ubx = IROUND(127.0F * a[0] + 128.0F);
1044 uby = IROUND(127.0F * a[1] + 128.0F);
1045 ubz = IROUND(127.0F * a[2] + 128.0F);
1046 ubw = IROUND(127.0F * a[3] + 128.0F);
1047 rawResult[0] = rawResult[1] = rawResult[2] = rawResult[3]
1048 = ubx | (uby << 8) | (ubz << 16) | (ubw << 24);
1049 store_vector4( inst, machine, result );
1050 }
1051 break;
1052 case OPCODE_PK4UB: /* pack four GLubytes into one 32-bit float */
1053 {
1054 GLfloat a[4], result[4];
1055 GLuint ubx, uby, ubz, ubw, *rawResult = (GLuint *) result;
1056 fetch_vector4( ctx, &inst->SrcReg[0], machine, program, a );
1057 a[0] = CLAMP(a[0], 0.0F, 1.0F);
1058 a[1] = CLAMP(a[1], 0.0F, 1.0F);
1059 a[2] = CLAMP(a[2], 0.0F, 1.0F);
1060 a[3] = CLAMP(a[3], 0.0F, 1.0F);
1061 ubx = IROUND(255.0F * a[0]);
1062 uby = IROUND(255.0F * a[1]);
1063 ubz = IROUND(255.0F * a[2]);
1064 ubw = IROUND(255.0F * a[3]);
1065 rawResult[0] = rawResult[1] = rawResult[2] = rawResult[3]
1066 = ubx | (uby << 8) | (ubz << 16) | (ubw << 24);
1067 store_vector4( inst, machine, result );
1068 }
1069 break;
1070 case OPCODE_POW:
1071 {
1072 GLfloat a[4], b[4], result[4];
1073 fetch_vector1( ctx, &inst->SrcReg[0], machine, program, a );
1074 fetch_vector1( ctx, &inst->SrcReg[1], machine, program, b );
1075 result[0] = result[1] = result[2] = result[3]
1076 = (GLfloat)_mesa_pow(a[0], b[0]);
1077 store_vector4( inst, machine, result );
1078 }
1079 break;
1080 case OPCODE_RCP:
1081 {
1082 GLfloat a[4], result[4];
1083 fetch_vector1( ctx, &inst->SrcReg[0], machine, program, a );
1084 #if DEBUG_FRAG
1085 if (a[0] == 0)
1086 printf("RCP(0)\n");
1087 else if (IS_INF_OR_NAN(a[0]))
1088 printf("RCP(inf)\n");
1089 #endif
1090 result[0] = result[1] = result[2] = result[3] = 1.0F / a[0];
1091 store_vector4( inst, machine, result );
1092 }
1093 break;
1094 case OPCODE_RFL: /* reflection vector */
1095 {
1096 GLfloat axis[4], dir[4], result[4], tmpX, tmpW;
1097 fetch_vector4( ctx, &inst->SrcReg[0], machine, program, axis );
1098 fetch_vector4( ctx, &inst->SrcReg[1], machine, program, dir );
1099 tmpW = DOT3(axis, axis);
1100 tmpX = (2.0F * DOT3(axis, dir)) / tmpW;
1101 result[0] = tmpX * axis[0] - dir[0];
1102 result[1] = tmpX * axis[1] - dir[1];
1103 result[2] = tmpX * axis[2] - dir[2];
1104 /* result[3] is never written! XXX enforce in parser! */
1105 store_vector4( inst, machine, result );
1106 }
1107 break;
1108 case OPCODE_RSQ: /* 1 / sqrt() */
1109 {
1110 GLfloat a[4], result[4];
1111 fetch_vector1( ctx, &inst->SrcReg[0], machine, program, a );
1112 a[0] = FABSF(a[0]);
1113 result[0] = result[1] = result[2] = result[3] = INV_SQRTF(a[0]);
1114 store_vector4( inst, machine, result );
1115 #if DEBUG_FRAG
1116 printf("RSQ %g = 1/sqrt(|%g|)\n", result[0], a[0]);
1117 #endif
1118 }
1119 break;
1120 case OPCODE_SCS: /* sine and cos */
1121 {
1122 GLfloat a[4], result[4];
1123 fetch_vector1( ctx, &inst->SrcReg[0], machine, program, a );
1124 result[0] = (GLfloat)_mesa_cos(a[0]);
1125 result[1] = (GLfloat)_mesa_sin(a[0]);
1126 result[2] = 0.0; /* undefined! */
1127 result[3] = 0.0; /* undefined! */
1128 store_vector4( inst, machine, result );
1129 }
1130 break;
1131 case OPCODE_SEQ: /* set on equal */
1132 {
1133 GLfloat a[4], b[4], result[4];
1134 fetch_vector4( ctx, &inst->SrcReg[0], machine, program, a );
1135 fetch_vector4( ctx, &inst->SrcReg[1], machine, program, b );
1136 result[0] = (a[0] == b[0]) ? 1.0F : 0.0F;
1137 result[1] = (a[1] == b[1]) ? 1.0F : 0.0F;
1138 result[2] = (a[2] == b[2]) ? 1.0F : 0.0F;
1139 result[3] = (a[3] == b[3]) ? 1.0F : 0.0F;
1140 store_vector4( inst, machine, result );
1141 }
1142 break;
1143 case OPCODE_SFL: /* set false, operands ignored */
1144 {
1145 static const GLfloat result[4] = { 0.0F, 0.0F, 0.0F, 0.0F };
1146 store_vector4( inst, machine, result );
1147 }
1148 break;
1149 case OPCODE_SGE: /* set on greater or equal */
1150 {
1151 GLfloat a[4], b[4], result[4];
1152 fetch_vector4( ctx, &inst->SrcReg[0], machine, program, a );
1153 fetch_vector4( ctx, &inst->SrcReg[1], machine, program, b );
1154 result[0] = (a[0] >= b[0]) ? 1.0F : 0.0F;
1155 result[1] = (a[1] >= b[1]) ? 1.0F : 0.0F;
1156 result[2] = (a[2] >= b[2]) ? 1.0F : 0.0F;
1157 result[3] = (a[3] >= b[3]) ? 1.0F : 0.0F;
1158 store_vector4( inst, machine, result );
1159 }
1160 break;
1161 case OPCODE_SGT: /* set on greater */
1162 {
1163 GLfloat a[4], b[4], result[4];
1164 fetch_vector4( ctx, &inst->SrcReg[0], machine, program, a );
1165 fetch_vector4( ctx, &inst->SrcReg[1], machine, program, b );
1166 result[0] = (a[0] > b[0]) ? 1.0F : 0.0F;
1167 result[1] = (a[1] > b[1]) ? 1.0F : 0.0F;
1168 result[2] = (a[2] > b[2]) ? 1.0F : 0.0F;
1169 result[3] = (a[3] > b[3]) ? 1.0F : 0.0F;
1170 store_vector4( inst, machine, result );
1171 }
1172 break;
1173 case OPCODE_SIN:
1174 {
1175 GLfloat a[4], result[4];
1176 fetch_vector1( ctx, &inst->SrcReg[0], machine, program, a );
1177 result[0] = result[1] = result[2] = result[3]
1178 = (GLfloat) _mesa_sin(a[0]);
1179 store_vector4( inst, machine, result );
1180 }
1181 break;
1182 case OPCODE_SLE: /* set on less or equal */
1183 {
1184 GLfloat a[4], b[4], result[4];
1185 fetch_vector4( ctx, &inst->SrcReg[0], machine, program, a );
1186 fetch_vector4( ctx, &inst->SrcReg[1], machine, program, b );
1187 result[0] = (a[0] <= b[0]) ? 1.0F : 0.0F;
1188 result[1] = (a[1] <= b[1]) ? 1.0F : 0.0F;
1189 result[2] = (a[2] <= b[2]) ? 1.0F : 0.0F;
1190 result[3] = (a[3] <= b[3]) ? 1.0F : 0.0F;
1191 store_vector4( inst, machine, result );
1192 }
1193 break;
1194 case OPCODE_SLT: /* set on less */
1195 {
1196 GLfloat a[4], b[4], result[4];
1197 fetch_vector4( ctx, &inst->SrcReg[0], machine, program, a );
1198 fetch_vector4( ctx, &inst->SrcReg[1], machine, program, b );
1199 result[0] = (a[0] < b[0]) ? 1.0F : 0.0F;
1200 result[1] = (a[1] < b[1]) ? 1.0F : 0.0F;
1201 result[2] = (a[2] < b[2]) ? 1.0F : 0.0F;
1202 result[3] = (a[3] < b[3]) ? 1.0F : 0.0F;
1203 store_vector4( inst, machine, result );
1204 }
1205 break;
1206 case OPCODE_SNE: /* set on not equal */
1207 {
1208 GLfloat a[4], b[4], result[4];
1209 fetch_vector4( ctx, &inst->SrcReg[0], machine, program, a );
1210 fetch_vector4( ctx, &inst->SrcReg[1], machine, program, b );
1211 result[0] = (a[0] != b[0]) ? 1.0F : 0.0F;
1212 result[1] = (a[1] != b[1]) ? 1.0F : 0.0F;
1213 result[2] = (a[2] != b[2]) ? 1.0F : 0.0F;
1214 result[3] = (a[3] != b[3]) ? 1.0F : 0.0F;
1215 store_vector4( inst, machine, result );
1216 }
1217 break;
1218 case OPCODE_STR: /* set true, operands ignored */
1219 {
1220 static const GLfloat result[4] = { 1.0F, 1.0F, 1.0F, 1.0F };
1221 store_vector4( inst, machine, result );
1222 }
1223 break;
1224 case OPCODE_SUB:
1225 {
1226 GLfloat a[4], b[4], result[4];
1227 fetch_vector4( ctx, &inst->SrcReg[0], machine, program, a );
1228 fetch_vector4( ctx, &inst->SrcReg[1], machine, program, b );
1229 result[0] = a[0] - b[0];
1230 result[1] = a[1] - b[1];
1231 result[2] = a[2] - b[2];
1232 result[3] = a[3] - b[3];
1233 store_vector4( inst, machine, result );
1234 #if DEBUG_FRAG
1235 printf("SUB (%g %g %g %g) = (%g %g %g %g) - (%g %g %g %g)\n",
1236 result[0], result[1], result[2], result[3],
1237 a[0], a[1], a[2], a[3], b[0], b[1], b[2], b[3]);
1238 #endif
1239 }
1240 break;
1241 case OPCODE_SWZ: /* extended swizzle */
1242 {
1243 const struct prog_src_register *source = &inst->SrcReg[0];
1244 const GLfloat *src = get_register_pointer(ctx, source,
1245 machine, program);
1246 GLfloat result[4];
1247 GLuint i;
1248 for (i = 0; i < 4; i++) {
1249 const GLuint swz = GET_SWZ(source->Swizzle, i);
1250 if (swz == SWIZZLE_ZERO)
1251 result[i] = 0.0;
1252 else if (swz == SWIZZLE_ONE)
1253 result[i] = 1.0;
1254 else {
1255 ASSERT(swz >= 0);
1256 ASSERT(swz <= 3);
1257 result[i] = src[swz];
1258 }
1259 if (source->NegateBase & (1 << i))
1260 result[i] = -result[i];
1261 }
1262 store_vector4( inst, machine, result );
1263 }
1264 break;
1265 case OPCODE_TEX: /* Both ARB and NV frag prog */
1266 /* Texel lookup */
1267 {
1268 /* Note: only use the precomputed lambda value when we're
1269 * sampling texture unit [K] with texcoord[K].
1270 * Otherwise, the lambda value may have no relation to the
1271 * instruction's texcoord or texture image. Using the wrong
1272 * lambda is usually bad news.
1273 * The rest of the time, just use zero (until we get a more
1274 * sophisticated way of computing lambda).
1275 */
1276 GLfloat coord[4], color[4], lambda;
1277 if (inst->SrcReg[0].File == PROGRAM_INPUT &&
1278 inst->SrcReg[0].Index == FRAG_ATTRIB_TEX0+inst->TexSrcUnit)
1279 lambda = span->array->lambda[inst->TexSrcUnit][column];
1280 else
1281 lambda = 0.0;
1282 fetch_vector4(ctx, &inst->SrcReg[0], machine, program, coord);
1283 fetch_texel( ctx, coord, lambda, inst->TexSrcUnit, color );
1284 #if DEBUG_FRAG
1285 printf("TEX (%g, %g, %g, %g) = texture[%d][%g, %g, %g, %g], "
1286 "lod %f\n",
1287 color[0], color[1], color[2], color[3], inst->TexSrcUnit,
1288 coord[0], coord[1], coord[2], coord[3], lambda);
1289 #endif
1290 store_vector4( inst, machine, color );
1291 }
1292 break;
1293 case OPCODE_TXB: /* GL_ARB_fragment_program only */
1294 /* Texel lookup with LOD bias */
1295 {
1296 GLfloat coord[4], color[4], lambda, bias;
1297 if (inst->SrcReg[0].File == PROGRAM_INPUT &&
1298 inst->SrcReg[0].Index == FRAG_ATTRIB_TEX0+inst->TexSrcUnit)
1299 lambda = span->array->lambda[inst->TexSrcUnit][column];
1300 else
1301 lambda = 0.0;
1302 fetch_vector4(ctx, &inst->SrcReg[0], machine, program, coord);
1303 /* coord[3] is the bias to add to lambda */
1304 bias = ctx->Texture.Unit[inst->TexSrcUnit].LodBias
1305 + ctx->Texture.Unit[inst->TexSrcUnit]._Current->LodBias
1306 + coord[3];
1307 fetch_texel(ctx, coord, lambda + bias, inst->TexSrcUnit, color);
1308 store_vector4( inst, machine, color );
1309 }
1310 break;
1311 case OPCODE_TXD: /* GL_NV_fragment_program only */
1312 /* Texture lookup w/ partial derivatives for LOD */
1313 {
1314 GLfloat texcoord[4], dtdx[4], dtdy[4], color[4];
1315 fetch_vector4( ctx, &inst->SrcReg[0], machine, program, texcoord );
1316 fetch_vector4( ctx, &inst->SrcReg[1], machine, program, dtdx );
1317 fetch_vector4( ctx, &inst->SrcReg[2], machine, program, dtdy );
1318 fetch_texel_deriv( ctx, texcoord, dtdx, dtdy, inst->TexSrcUnit,
1319 color );
1320 store_vector4( inst, machine, color );
1321 }
1322 break;
1323 case OPCODE_TXP: /* GL_ARB_fragment_program only */
1324 /* Texture lookup w/ projective divide */
1325 {
1326 GLfloat texcoord[4], color[4], lambda;
1327 if (inst->SrcReg[0].File == PROGRAM_INPUT &&
1328 inst->SrcReg[0].Index == FRAG_ATTRIB_TEX0+inst->TexSrcUnit)
1329 lambda = span->array->lambda[inst->TexSrcUnit][column];
1330 else
1331 lambda = 0.0;
1332 fetch_vector4(ctx, &inst->SrcReg[0], machine, program,texcoord);
1333 /* Not so sure about this test - if texcoord[3] is
1334 * zero, we'd probably be fine except for an ASSERT in
1335 * IROUND_POS() which gets triggered by the inf values created.
1336 */
1337 if (texcoord[3] != 0.0) {
1338 texcoord[0] /= texcoord[3];
1339 texcoord[1] /= texcoord[3];
1340 texcoord[2] /= texcoord[3];
1341 }
1342 fetch_texel( ctx, texcoord, lambda, inst->TexSrcUnit, color );
1343 store_vector4( inst, machine, color );
1344 }
1345 break;
1346 case OPCODE_TXP_NV: /* GL_NV_fragment_program only */
1347 /* Texture lookup w/ projective divide */
1348 {
1349 GLfloat texcoord[4], color[4], lambda;
1350 if (inst->SrcReg[0].File == PROGRAM_INPUT &&
1351 inst->SrcReg[0].Index == FRAG_ATTRIB_TEX0+inst->TexSrcUnit)
1352 lambda = span->array->lambda[inst->TexSrcUnit][column];
1353 else
1354 lambda = 0.0;
1355 fetch_vector4(ctx, &inst->SrcReg[0], machine, program,texcoord);
1356 if (inst->TexSrcTarget != TEXTURE_CUBE_INDEX &&
1357 texcoord[3] != 0.0) {
1358 texcoord[0] /= texcoord[3];
1359 texcoord[1] /= texcoord[3];
1360 texcoord[2] /= texcoord[3];
1361 }
1362 fetch_texel( ctx, texcoord, lambda, inst->TexSrcUnit, color );
1363 store_vector4( inst, machine, color );
1364 }
1365 break;
1366 case OPCODE_UP2H: /* unpack two 16-bit floats */
1367 {
1368 GLfloat a[4], result[4];
1369 const GLuint *rawBits = (const GLuint *) a;
1370 GLhalfNV hx, hy;
1371 fetch_vector1( ctx, &inst->SrcReg[0], machine, program, a );
1372 hx = rawBits[0] & 0xffff;
1373 hy = rawBits[0] >> 16;
1374 result[0] = result[2] = _mesa_half_to_float(hx);
1375 result[1] = result[3] = _mesa_half_to_float(hy);
1376 store_vector4( inst, machine, result );
1377 }
1378 break;
1379 case OPCODE_UP2US: /* unpack two GLushorts */
1380 {
1381 GLfloat a[4], result[4];
1382 const GLuint *rawBits = (const GLuint *) a;
1383 GLushort usx, usy;
1384 fetch_vector1( ctx, &inst->SrcReg[0], machine, program, a );
1385 usx = rawBits[0] & 0xffff;
1386 usy = rawBits[0] >> 16;
1387 result[0] = result[2] = usx * (1.0f / 65535.0f);
1388 result[1] = result[3] = usy * (1.0f / 65535.0f);
1389 store_vector4( inst, machine, result );
1390 }
1391 break;
1392 case OPCODE_UP4B: /* unpack four GLbytes */
1393 {
1394 GLfloat a[4], result[4];
1395 const GLuint *rawBits = (const GLuint *) a;
1396 fetch_vector1( ctx, &inst->SrcReg[0], machine, program, a );
1397 result[0] = (((rawBits[0] >> 0) & 0xff) - 128) / 127.0F;
1398 result[1] = (((rawBits[0] >> 8) & 0xff) - 128) / 127.0F;
1399 result[2] = (((rawBits[0] >> 16) & 0xff) - 128) / 127.0F;
1400 result[3] = (((rawBits[0] >> 24) & 0xff) - 128) / 127.0F;
1401 store_vector4( inst, machine, result );
1402 }
1403 break;
1404 case OPCODE_UP4UB: /* unpack four GLubytes */
1405 {
1406 GLfloat a[4], result[4];
1407 const GLuint *rawBits = (const GLuint *) a;
1408 fetch_vector1( ctx, &inst->SrcReg[0], machine, program, a );
1409 result[0] = ((rawBits[0] >> 0) & 0xff) / 255.0F;
1410 result[1] = ((rawBits[0] >> 8) & 0xff) / 255.0F;
1411 result[2] = ((rawBits[0] >> 16) & 0xff) / 255.0F;
1412 result[3] = ((rawBits[0] >> 24) & 0xff) / 255.0F;
1413 store_vector4( inst, machine, result );
1414 }
1415 break;
1416 case OPCODE_XPD: /* cross product */
1417 {
1418 GLfloat a[4], b[4], result[4];
1419 fetch_vector4( ctx, &inst->SrcReg[0], machine, program, a );
1420 fetch_vector4( ctx, &inst->SrcReg[1], machine, program, b );
1421 result[0] = a[1] * b[2] - a[2] * b[1];
1422 result[1] = a[2] * b[0] - a[0] * b[2];
1423 result[2] = a[0] * b[1] - a[1] * b[0];
1424 result[3] = 1.0;
1425 store_vector4( inst, machine, result );
1426 }
1427 break;
1428 case OPCODE_X2D: /* 2-D matrix transform */
1429 {
1430 GLfloat a[4], b[4], c[4], result[4];
1431 fetch_vector4( ctx, &inst->SrcReg[0], machine, program, a );
1432 fetch_vector4( ctx, &inst->SrcReg[1], machine, program, b );
1433 fetch_vector4( ctx, &inst->SrcReg[2], machine, program, c );
1434 result[0] = a[0] + b[0] * c[0] + b[1] * c[1];
1435 result[1] = a[1] + b[0] * c[2] + b[1] * c[3];
1436 result[2] = a[2] + b[0] * c[0] + b[1] * c[1];
1437 result[3] = a[3] + b[0] * c[2] + b[1] * c[3];
1438 store_vector4( inst, machine, result );
1439 }
1440 break;
1441 case OPCODE_PRINT:
1442 {
1443 if (inst->SrcReg[0].File != -1) {
1444 GLfloat a[4];
1445 fetch_vector4( ctx, &inst->SrcReg[0], machine, program, a);
1446 _mesa_printf("%s%g, %g, %g, %g\n", (const char *) inst->Data,
1447 a[0], a[1], a[2], a[3]);
1448 }
1449 else {
1450 _mesa_printf("%s\n", (const char *) inst->Data);
1451 }
1452 }
1453 break;
1454 case OPCODE_END:
1455 return GL_TRUE;
1456 default:
1457 _mesa_problem(ctx, "Bad opcode %d in _mesa_exec_fragment_program",
1458 inst->Opcode);
1459 return GL_TRUE; /* return value doesn't matter */
1460 }
1461 }
1462 return GL_TRUE;
1463 }
1464
1465
1466 /**
1467 * Initialize the virtual fragment program machine state prior to running
1468 * fragment program on a fragment. This involves initializing the input
1469 * registers, condition codes, etc.
1470 * \param machine the virtual machine state to init
1471 * \param program the fragment program we're about to run
1472 * \param span the span of pixels we'll operate on
1473 * \param col which element (column) of the span we'll operate on
1474 */
1475 static void
1476 init_machine( GLcontext *ctx, struct fp_machine *machine,
1477 const struct gl_fragment_program *program,
1478 const SWspan *span, GLuint col )
1479 {
1480 GLuint inputsRead = program->Base.InputsRead;
1481 GLuint u;
1482
1483 if (ctx->FragmentProgram.CallbackEnabled)
1484 inputsRead = ~0;
1485
1486 if (program->Base.Target == GL_FRAGMENT_PROGRAM_NV) {
1487 /* Clear temporary registers (undefined for ARB_f_p) */
1488 _mesa_bzero(machine->Temporaries,
1489 MAX_NV_FRAGMENT_PROGRAM_TEMPS * 4 * sizeof(GLfloat));
1490 }
1491
1492 /* Load input registers */
1493 if (inputsRead & (1 << FRAG_ATTRIB_WPOS)) {
1494 GLfloat *wpos = machine->Inputs[FRAG_ATTRIB_WPOS];
1495 ASSERT(span->arrayMask & SPAN_Z);
1496 if (span->arrayMask & SPAN_XY) {
1497 wpos[0] = (GLfloat) span->array->x[col];
1498 wpos[1] = (GLfloat) span->array->y[col];
1499 }
1500 else {
1501 wpos[0] = (GLfloat) span->x + col;
1502 wpos[1] = (GLfloat) span->y;
1503 }
1504 wpos[2] = (GLfloat) span->array->z[col] / ctx->DrawBuffer->_DepthMaxF;
1505 wpos[3] = span->w + col * span->dwdx;
1506 }
1507 if (inputsRead & (1 << FRAG_ATTRIB_COL0)) {
1508 ASSERT(span->arrayMask & SPAN_RGBA);
1509 COPY_4V(machine->Inputs[FRAG_ATTRIB_COL0],
1510 span->array->color.sz4.rgba[col]);
1511 }
1512 if (inputsRead & (1 << FRAG_ATTRIB_COL1)) {
1513 ASSERT(span->arrayMask & SPAN_SPEC);
1514 COPY_4V(machine->Inputs[FRAG_ATTRIB_COL1],
1515 span->array->color.sz4.spec[col]);
1516 }
1517 if (inputsRead & (1 << FRAG_ATTRIB_FOGC)) {
1518 GLfloat *fogc = machine->Inputs[FRAG_ATTRIB_FOGC];
1519 ASSERT(span->arrayMask & SPAN_FOG);
1520 fogc[0] = span->array->fog[col];
1521 fogc[1] = 0.0F;
1522 fogc[2] = 0.0F;
1523 fogc[3] = 0.0F;
1524 }
1525 for (u = 0; u < ctx->Const.MaxTextureCoordUnits; u++) {
1526 if (inputsRead & (1 << (FRAG_ATTRIB_TEX0 + u))) {
1527 GLfloat *tex = machine->Inputs[FRAG_ATTRIB_TEX0 + u];
1528 /*ASSERT(ctx->Texture._EnabledCoordUnits & (1 << u));*/
1529 COPY_4V(tex, span->array->texcoords[u][col]);
1530 /*ASSERT(tex[0] != 0 || tex[1] != 0 || tex[2] != 0);*/
1531 }
1532 }
1533
1534 /* init condition codes */
1535 machine->CondCodes[0] = COND_EQ;
1536 machine->CondCodes[1] = COND_EQ;
1537 machine->CondCodes[2] = COND_EQ;
1538 machine->CondCodes[3] = COND_EQ;
1539 }
1540
1541
1542 /**
1543 * Run fragment program on the pixels in span from 'start' to 'end' - 1.
1544 */
1545 static void
1546 run_program(GLcontext *ctx, SWspan *span, GLuint start, GLuint end)
1547 {
1548 const struct gl_fragment_program *program = ctx->FragmentProgram._Current;
1549 struct fp_machine machine;
1550 GLuint i;
1551
1552 CurrentMachine = &machine;
1553
1554 for (i = start; i < end; i++) {
1555 if (span->array->mask[i]) {
1556 init_machine(ctx, &machine, program, span, i);
1557
1558 if (execute_program(ctx, program, ~0, &machine, span, i)) {
1559 /* Store result color */
1560 COPY_4V(span->array->color.sz4.rgba[i],
1561 machine.Outputs[FRAG_RESULT_COLR]);
1562
1563 /* Store result depth/z */
1564 if (program->Base.OutputsWritten & (1 << FRAG_RESULT_DEPR)) {
1565 const GLfloat depth = machine.Outputs[FRAG_RESULT_DEPR][2];
1566 if (depth <= 0.0)
1567 span->array->z[i] = 0;
1568 else if (depth >= 1.0)
1569 span->array->z[i] = ctx->DrawBuffer->_DepthMax;
1570 else
1571 span->array->z[i] = IROUND(depth * ctx->DrawBuffer->_DepthMaxF);
1572 }
1573 }
1574 else {
1575 /* killed fragment */
1576 span->array->mask[i] = GL_FALSE;
1577 span->writeAll = GL_FALSE;
1578 }
1579 }
1580 }
1581
1582 CurrentMachine = NULL;
1583 }
1584
1585
1586 /**
1587 * Execute the current fragment program for all the fragments
1588 * in the given span.
1589 */
1590 void
1591 _swrast_exec_fragment_program( GLcontext *ctx, SWspan *span )
1592 {
1593 const struct gl_fragment_program *program = ctx->FragmentProgram._Current;
1594
1595 /* incoming colors should be floats */
1596 ASSERT(span->array->ChanType == GL_FLOAT);
1597
1598 ctx->_CurrentProgram = GL_FRAGMENT_PROGRAM_ARB; /* or NV, doesn't matter */
1599
1600 run_program(ctx, span, 0, span->end);
1601
1602 if (program->Base.OutputsWritten & (1 << FRAG_RESULT_DEPR)) {
1603 span->interpMask &= ~SPAN_Z;
1604 span->arrayMask |= SPAN_Z;
1605 }
1606
1607 ctx->_CurrentProgram = 0;
1608 }
1609