New SWspan and SWspanarrays typedefs.
[mesa.git] / src / mesa / swrast / s_nvfragprog.c
1 /*
2 * Mesa 3-D graphics library
3 * Version: 6.5.1
4 *
5 * Copyright (C) 1999-2006 Brian Paul All Rights Reserved.
6 *
7 * Permission is hereby granted, free of charge, to any person obtaining a
8 * copy of this software and associated documentation files (the "Software"),
9 * to deal in the Software without restriction, including without limitation
10 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
11 * and/or sell copies of the Software, and to permit persons to whom the
12 * Software is furnished to do so, subject to the following conditions:
13 *
14 * The above copyright notice and this permission notice shall be included
15 * in all copies or substantial portions of the Software.
16 *
17 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
18 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
19 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
20 * BRIAN PAUL BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN
21 * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
22 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
23 */
24
25 /*
26 * Regarding GL_NV_fragment_program:
27 *
28 * Portions of this software may use or implement intellectual
29 * property owned and licensed by NVIDIA Corporation. NVIDIA disclaims
30 * any and all warranties with respect to such intellectual property,
31 * including any use thereof or modifications thereto.
32 */
33
34 #include "glheader.h"
35 #include "colormac.h"
36 #include "context.h"
37 #include "program_instruction.h"
38 #include "program.h"
39
40 #include "s_nvfragprog.h"
41 #include "s_span.h"
42
43
44 /* if 1, print some debugging info */
45 #define DEBUG_FRAG 0
46
47 /**
48 * Fetch a texel.
49 */
50 static void
51 fetch_texel( GLcontext *ctx, const GLfloat texcoord[4], GLfloat lambda,
52 GLuint unit, GLfloat color[4] )
53 {
54 GLchan rgba[4];
55 SWcontext *swrast = SWRAST_CONTEXT(ctx);
56
57 /* XXX use a float-valued TextureSample routine here!!! */
58 swrast->TextureSample[unit](ctx, ctx->Texture.Unit[unit]._Current,
59 1, (const GLfloat (*)[4]) texcoord,
60 &lambda, &rgba);
61 color[0] = CHAN_TO_FLOAT(rgba[0]);
62 color[1] = CHAN_TO_FLOAT(rgba[1]);
63 color[2] = CHAN_TO_FLOAT(rgba[2]);
64 color[3] = CHAN_TO_FLOAT(rgba[3]);
65 }
66
67
68 /**
69 * Fetch a texel with the given partial derivatives to compute a level
70 * of detail in the mipmap.
71 */
72 static void
73 fetch_texel_deriv( GLcontext *ctx, const GLfloat texcoord[4],
74 const GLfloat texdx[4], const GLfloat texdy[4],
75 GLuint unit, GLfloat color[4] )
76 {
77 SWcontext *swrast = SWRAST_CONTEXT(ctx);
78 const struct gl_texture_object *texObj = ctx->Texture.Unit[unit]._Current;
79 const struct gl_texture_image *texImg = texObj->Image[0][texObj->BaseLevel];
80 const GLfloat texW = (GLfloat) texImg->WidthScale;
81 const GLfloat texH = (GLfloat) texImg->HeightScale;
82 GLchan rgba[4];
83
84 GLfloat lambda = _swrast_compute_lambda(texdx[0], texdy[0], /* ds/dx, ds/dy */
85 texdx[1], texdy[1], /* dt/dx, dt/dy */
86 texdx[3], texdy[2], /* dq/dx, dq/dy */
87 texW, texH,
88 texcoord[0], texcoord[1], texcoord[3],
89 1.0F / texcoord[3]);
90
91 swrast->TextureSample[unit](ctx, ctx->Texture.Unit[unit]._Current,
92 1, (const GLfloat (*)[4]) texcoord,
93 &lambda, &rgba);
94 color[0] = CHAN_TO_FLOAT(rgba[0]);
95 color[1] = CHAN_TO_FLOAT(rgba[1]);
96 color[2] = CHAN_TO_FLOAT(rgba[2]);
97 color[3] = CHAN_TO_FLOAT(rgba[3]);
98 }
99
100
101 /**
102 * Return a pointer to the 4-element float vector specified by the given
103 * source register.
104 */
105 static INLINE const GLfloat *
106 get_register_pointer( GLcontext *ctx,
107 const struct prog_src_register *source,
108 const struct fp_machine *machine,
109 const struct gl_fragment_program *program )
110 {
111 const GLfloat *src;
112 switch (source->File) {
113 case PROGRAM_TEMPORARY:
114 ASSERT(source->Index < MAX_NV_FRAGMENT_PROGRAM_TEMPS);
115 src = machine->Temporaries[source->Index];
116 break;
117 case PROGRAM_INPUT:
118 ASSERT(source->Index < MAX_NV_FRAGMENT_PROGRAM_INPUTS);
119 src = machine->Inputs[source->Index];
120 break;
121 case PROGRAM_OUTPUT:
122 /* This is only for PRINT */
123 ASSERT(source->Index < MAX_NV_FRAGMENT_PROGRAM_OUTPUTS);
124 src = machine->Outputs[source->Index];
125 break;
126 case PROGRAM_LOCAL_PARAM:
127 ASSERT(source->Index < MAX_PROGRAM_LOCAL_PARAMS);
128 src = program->Base.LocalParams[source->Index];
129 break;
130 case PROGRAM_ENV_PARAM:
131 ASSERT(source->Index < MAX_NV_FRAGMENT_PROGRAM_PARAMS);
132 src = ctx->FragmentProgram.Parameters[source->Index];
133 break;
134 case PROGRAM_STATE_VAR:
135 /* Fallthrough */
136 case PROGRAM_NAMED_PARAM:
137 ASSERT(source->Index < (GLint) program->Base.Parameters->NumParameters);
138 src = program->Base.Parameters->ParameterValues[source->Index];
139 break;
140 default:
141 _mesa_problem(ctx, "Invalid input register file %d in fetch_vector4", source->File);
142 src = NULL;
143 }
144 return src;
145 }
146
147
148 /**
149 * Fetch a 4-element float vector from the given source register.
150 * Apply swizzling and negating as needed.
151 */
152 static void
153 fetch_vector4( GLcontext *ctx,
154 const struct prog_src_register *source,
155 const struct fp_machine *machine,
156 const struct gl_fragment_program *program,
157 GLfloat result[4] )
158 {
159 const GLfloat *src = get_register_pointer(ctx, source, machine, program);
160 ASSERT(src);
161
162 result[0] = src[GET_SWZ(source->Swizzle, 0)];
163 result[1] = src[GET_SWZ(source->Swizzle, 1)];
164 result[2] = src[GET_SWZ(source->Swizzle, 2)];
165 result[3] = src[GET_SWZ(source->Swizzle, 3)];
166
167 if (source->NegateBase) {
168 result[0] = -result[0];
169 result[1] = -result[1];
170 result[2] = -result[2];
171 result[3] = -result[3];
172 }
173 if (source->Abs) {
174 result[0] = FABSF(result[0]);
175 result[1] = FABSF(result[1]);
176 result[2] = FABSF(result[2]);
177 result[3] = FABSF(result[3]);
178 }
179 if (source->NegateAbs) {
180 result[0] = -result[0];
181 result[1] = -result[1];
182 result[2] = -result[2];
183 result[3] = -result[3];
184 }
185 }
186
187
188 /**
189 * Fetch the derivative with respect to X for the given register.
190 * \return GL_TRUE if it was easily computed or GL_FALSE if we
191 * need to execute another instance of the program (ugh)!
192 */
193 static GLboolean
194 fetch_vector4_deriv( GLcontext *ctx,
195 const struct prog_src_register *source,
196 const SWspan *span,
197 char xOrY, GLint column, GLfloat result[4] )
198 {
199 GLfloat src[4];
200
201 ASSERT(xOrY == 'X' || xOrY == 'Y');
202
203 switch (source->Index) {
204 case FRAG_ATTRIB_WPOS:
205 if (xOrY == 'X') {
206 src[0] = 1.0;
207 src[1] = 0.0;
208 src[2] = span->dzdx / ctx->DrawBuffer->_DepthMaxF;
209 src[3] = span->dwdx;
210 }
211 else {
212 src[0] = 0.0;
213 src[1] = 1.0;
214 src[2] = span->dzdy / ctx->DrawBuffer->_DepthMaxF;
215 src[3] = span->dwdy;
216 }
217 break;
218 case FRAG_ATTRIB_COL0:
219 if (xOrY == 'X') {
220 src[0] = span->drdx * (1.0F / CHAN_MAXF);
221 src[1] = span->dgdx * (1.0F / CHAN_MAXF);
222 src[2] = span->dbdx * (1.0F / CHAN_MAXF);
223 src[3] = span->dadx * (1.0F / CHAN_MAXF);
224 }
225 else {
226 src[0] = span->drdy * (1.0F / CHAN_MAXF);
227 src[1] = span->dgdy * (1.0F / CHAN_MAXF);
228 src[2] = span->dbdy * (1.0F / CHAN_MAXF);
229 src[3] = span->dady * (1.0F / CHAN_MAXF);
230 }
231 break;
232 case FRAG_ATTRIB_COL1:
233 if (xOrY == 'X') {
234 src[0] = span->dsrdx * (1.0F / CHAN_MAXF);
235 src[1] = span->dsgdx * (1.0F / CHAN_MAXF);
236 src[2] = span->dsbdx * (1.0F / CHAN_MAXF);
237 src[3] = 0.0; /* XXX need this */
238 }
239 else {
240 src[0] = span->dsrdy * (1.0F / CHAN_MAXF);
241 src[1] = span->dsgdy * (1.0F / CHAN_MAXF);
242 src[2] = span->dsbdy * (1.0F / CHAN_MAXF);
243 src[3] = 0.0; /* XXX need this */
244 }
245 break;
246 case FRAG_ATTRIB_FOGC:
247 if (xOrY == 'X') {
248 src[0] = span->dfogdx;
249 src[1] = 0.0;
250 src[2] = 0.0;
251 src[3] = 0.0;
252 }
253 else {
254 src[0] = span->dfogdy;
255 src[1] = 0.0;
256 src[2] = 0.0;
257 src[3] = 0.0;
258 }
259 break;
260 case FRAG_ATTRIB_TEX0:
261 case FRAG_ATTRIB_TEX1:
262 case FRAG_ATTRIB_TEX2:
263 case FRAG_ATTRIB_TEX3:
264 case FRAG_ATTRIB_TEX4:
265 case FRAG_ATTRIB_TEX5:
266 case FRAG_ATTRIB_TEX6:
267 case FRAG_ATTRIB_TEX7:
268 if (xOrY == 'X') {
269 const GLuint u = source->Index - FRAG_ATTRIB_TEX0;
270 /* this is a little tricky - I think I've got it right */
271 const GLfloat invQ = 1.0f / (span->tex[u][3]
272 + span->texStepX[u][3] * column);
273 src[0] = span->texStepX[u][0] * invQ;
274 src[1] = span->texStepX[u][1] * invQ;
275 src[2] = span->texStepX[u][2] * invQ;
276 src[3] = span->texStepX[u][3] * invQ;
277 }
278 else {
279 const GLuint u = source->Index - FRAG_ATTRIB_TEX0;
280 /* Tricky, as above, but in Y direction */
281 const GLfloat invQ = 1.0f / (span->tex[u][3] + span->texStepY[u][3]);
282 src[0] = span->texStepY[u][0] * invQ;
283 src[1] = span->texStepY[u][1] * invQ;
284 src[2] = span->texStepY[u][2] * invQ;
285 src[3] = span->texStepY[u][3] * invQ;
286 }
287 break;
288 default:
289 return GL_FALSE;
290 }
291
292 result[0] = src[GET_SWZ(source->Swizzle, 0)];
293 result[1] = src[GET_SWZ(source->Swizzle, 1)];
294 result[2] = src[GET_SWZ(source->Swizzle, 2)];
295 result[3] = src[GET_SWZ(source->Swizzle, 3)];
296
297 if (source->NegateBase) {
298 result[0] = -result[0];
299 result[1] = -result[1];
300 result[2] = -result[2];
301 result[3] = -result[3];
302 }
303 if (source->Abs) {
304 result[0] = FABSF(result[0]);
305 result[1] = FABSF(result[1]);
306 result[2] = FABSF(result[2]);
307 result[3] = FABSF(result[3]);
308 }
309 if (source->NegateAbs) {
310 result[0] = -result[0];
311 result[1] = -result[1];
312 result[2] = -result[2];
313 result[3] = -result[3];
314 }
315 return GL_TRUE;
316 }
317
318
319 /**
320 * As above, but only return result[0] element.
321 */
322 static void
323 fetch_vector1( GLcontext *ctx,
324 const struct prog_src_register *source,
325 const struct fp_machine *machine,
326 const struct gl_fragment_program *program,
327 GLfloat result[4] )
328 {
329 const GLfloat *src = get_register_pointer(ctx, source, machine, program);
330 ASSERT(src);
331
332 result[0] = src[GET_SWZ(source->Swizzle, 0)];
333
334 if (source->NegateBase) {
335 result[0] = -result[0];
336 }
337 if (source->Abs) {
338 result[0] = FABSF(result[0]);
339 }
340 if (source->NegateAbs) {
341 result[0] = -result[0];
342 }
343 }
344
345
346 /**
347 * Test value against zero and return GT, LT, EQ or UN if NaN.
348 */
349 static INLINE GLuint
350 generate_cc( float value )
351 {
352 if (value != value)
353 return COND_UN; /* NaN */
354 if (value > 0.0F)
355 return COND_GT;
356 if (value < 0.0F)
357 return COND_LT;
358 return COND_EQ;
359 }
360
361
362 /**
363 * Test if the ccMaskRule is satisfied by the given condition code.
364 * Used to mask destination writes according to the current condition codee.
365 */
366 static INLINE GLboolean
367 test_cc(GLuint condCode, GLuint ccMaskRule)
368 {
369 switch (ccMaskRule) {
370 case COND_EQ: return (condCode == COND_EQ);
371 case COND_NE: return (condCode != COND_EQ);
372 case COND_LT: return (condCode == COND_LT);
373 case COND_GE: return (condCode == COND_GT || condCode == COND_EQ);
374 case COND_LE: return (condCode == COND_LT || condCode == COND_EQ);
375 case COND_GT: return (condCode == COND_GT);
376 case COND_TR: return GL_TRUE;
377 case COND_FL: return GL_FALSE;
378 default: return GL_TRUE;
379 }
380 }
381
382
383 /**
384 * Store 4 floats into a register. Observe the instructions saturate and
385 * set-condition-code flags.
386 */
387 static void
388 store_vector4( const struct prog_instruction *inst,
389 struct fp_machine *machine,
390 const GLfloat value[4] )
391 {
392 const struct prog_dst_register *dest = &(inst->DstReg);
393 const GLboolean clamp = inst->SaturateMode == SATURATE_ZERO_ONE;
394 const GLboolean updateCC = inst->CondUpdate;
395 GLfloat *dstReg;
396 GLfloat dummyReg[4];
397 GLfloat clampedValue[4];
398 GLboolean condWriteMask[4];
399 GLuint writeMask = dest->WriteMask;
400
401 switch (dest->File) {
402 case PROGRAM_OUTPUT:
403 dstReg = machine->Outputs[dest->Index];
404 break;
405 case PROGRAM_TEMPORARY:
406 dstReg = machine->Temporaries[dest->Index];
407 break;
408 case PROGRAM_WRITE_ONLY:
409 dstReg = dummyReg;
410 return;
411 default:
412 _mesa_problem(NULL, "bad register file in store_vector4(fp)");
413 return;
414 }
415
416 #if DEBUG_FRAG
417 if (value[0] > 1.0e10 ||
418 IS_INF_OR_NAN(value[0]) ||
419 IS_INF_OR_NAN(value[1]) ||
420 IS_INF_OR_NAN(value[2]) ||
421 IS_INF_OR_NAN(value[3]) )
422 printf("store %g %g %g %g\n", value[0], value[1], value[2], value[3]);
423 #endif
424
425 if (clamp) {
426 clampedValue[0] = CLAMP(value[0], 0.0F, 1.0F);
427 clampedValue[1] = CLAMP(value[1], 0.0F, 1.0F);
428 clampedValue[2] = CLAMP(value[2], 0.0F, 1.0F);
429 clampedValue[3] = CLAMP(value[3], 0.0F, 1.0F);
430 value = clampedValue;
431 }
432
433 if (dest->CondMask != COND_TR) {
434 condWriteMask[0] = GET_BIT(writeMask, 0)
435 && test_cc(machine->CondCodes[GET_SWZ(dest->CondSwizzle, 0)], dest->CondMask);
436 condWriteMask[1] = GET_BIT(writeMask, 1)
437 && test_cc(machine->CondCodes[GET_SWZ(dest->CondSwizzle, 1)], dest->CondMask);
438 condWriteMask[2] = GET_BIT(writeMask, 2)
439 && test_cc(machine->CondCodes[GET_SWZ(dest->CondSwizzle, 2)], dest->CondMask);
440 condWriteMask[3] = GET_BIT(writeMask, 3)
441 && test_cc(machine->CondCodes[GET_SWZ(dest->CondSwizzle, 3)], dest->CondMask);
442
443 writeMask = ((condWriteMask[0] << 0) |
444 (condWriteMask[1] << 1) |
445 (condWriteMask[2] << 2) |
446 (condWriteMask[3] << 3));
447 }
448
449 if (GET_BIT(writeMask, 0)) {
450 dstReg[0] = value[0];
451 if (updateCC)
452 machine->CondCodes[0] = generate_cc(value[0]);
453 }
454 if (GET_BIT(writeMask, 1)) {
455 dstReg[1] = value[1];
456 if (updateCC)
457 machine->CondCodes[1] = generate_cc(value[1]);
458 }
459 if (GET_BIT(writeMask, 2)) {
460 dstReg[2] = value[2];
461 if (updateCC)
462 machine->CondCodes[2] = generate_cc(value[2]);
463 }
464 if (GET_BIT(writeMask, 3)) {
465 dstReg[3] = value[3];
466 if (updateCC)
467 machine->CondCodes[3] = generate_cc(value[3]);
468 }
469 }
470
471
472 /**
473 * Initialize a new machine state instance from an existing one, adding
474 * the partial derivatives onto the input registers.
475 * Used to implement DDX and DDY instructions in non-trivial cases.
476 */
477 static void
478 init_machine_deriv( GLcontext *ctx,
479 const struct fp_machine *machine,
480 const struct gl_fragment_program *program,
481 const SWspan *span, char xOrY,
482 struct fp_machine *dMachine )
483 {
484 GLuint u;
485
486 ASSERT(xOrY == 'X' || xOrY == 'Y');
487
488 /* copy existing machine */
489 _mesa_memcpy(dMachine, machine, sizeof(struct fp_machine));
490
491 if (program->Base.Target == GL_FRAGMENT_PROGRAM_NV) {
492 /* Clear temporary registers (undefined for ARB_f_p) */
493 _mesa_bzero( (void*) machine->Temporaries,
494 MAX_NV_FRAGMENT_PROGRAM_TEMPS * 4 * sizeof(GLfloat));
495 }
496
497 /* Add derivatives */
498 if (program->Base.InputsRead & (1 << FRAG_ATTRIB_WPOS)) {
499 GLfloat *wpos = (GLfloat*) machine->Inputs[FRAG_ATTRIB_WPOS];
500 if (xOrY == 'X') {
501 wpos[0] += 1.0F;
502 wpos[1] += 0.0F;
503 wpos[2] += span->dzdx;
504 wpos[3] += span->dwdx;
505 }
506 else {
507 wpos[0] += 0.0F;
508 wpos[1] += 1.0F;
509 wpos[2] += span->dzdy;
510 wpos[3] += span->dwdy;
511 }
512 }
513 if (program->Base.InputsRead & (1 << FRAG_ATTRIB_COL0)) {
514 GLfloat *col0 = (GLfloat*) machine->Inputs[FRAG_ATTRIB_COL0];
515 if (xOrY == 'X') {
516 col0[0] += span->drdx * (1.0F / CHAN_MAXF);
517 col0[1] += span->dgdx * (1.0F / CHAN_MAXF);
518 col0[2] += span->dbdx * (1.0F / CHAN_MAXF);
519 col0[3] += span->dadx * (1.0F / CHAN_MAXF);
520 }
521 else {
522 col0[0] += span->drdy * (1.0F / CHAN_MAXF);
523 col0[1] += span->dgdy * (1.0F / CHAN_MAXF);
524 col0[2] += span->dbdy * (1.0F / CHAN_MAXF);
525 col0[3] += span->dady * (1.0F / CHAN_MAXF);
526 }
527 }
528 if (program->Base.InputsRead & (1 << FRAG_ATTRIB_COL1)) {
529 GLfloat *col1 = (GLfloat*) machine->Inputs[FRAG_ATTRIB_COL1];
530 if (xOrY == 'X') {
531 col1[0] += span->dsrdx * (1.0F / CHAN_MAXF);
532 col1[1] += span->dsgdx * (1.0F / CHAN_MAXF);
533 col1[2] += span->dsbdx * (1.0F / CHAN_MAXF);
534 col1[3] += 0.0; /*XXX fix */
535 }
536 else {
537 col1[0] += span->dsrdy * (1.0F / CHAN_MAXF);
538 col1[1] += span->dsgdy * (1.0F / CHAN_MAXF);
539 col1[2] += span->dsbdy * (1.0F / CHAN_MAXF);
540 col1[3] += 0.0; /*XXX fix */
541 }
542 }
543 if (program->Base.InputsRead & (1 << FRAG_ATTRIB_FOGC)) {
544 GLfloat *fogc = (GLfloat*) machine->Inputs[FRAG_ATTRIB_FOGC];
545 if (xOrY == 'X') {
546 fogc[0] += span->dfogdx;
547 }
548 else {
549 fogc[0] += span->dfogdy;
550 }
551 }
552 for (u = 0; u < ctx->Const.MaxTextureCoordUnits; u++) {
553 if (program->Base.InputsRead & (1 << (FRAG_ATTRIB_TEX0 + u))) {
554 GLfloat *tex = (GLfloat*) machine->Inputs[FRAG_ATTRIB_TEX0 + u];
555 /* XXX perspective-correct interpolation */
556 if (xOrY == 'X') {
557 tex[0] += span->texStepX[u][0];
558 tex[1] += span->texStepX[u][1];
559 tex[2] += span->texStepX[u][2];
560 tex[3] += span->texStepX[u][3];
561 }
562 else {
563 tex[0] += span->texStepY[u][0];
564 tex[1] += span->texStepY[u][1];
565 tex[2] += span->texStepY[u][2];
566 tex[3] += span->texStepY[u][3];
567 }
568 }
569 }
570
571 /* init condition codes */
572 dMachine->CondCodes[0] = COND_EQ;
573 dMachine->CondCodes[1] = COND_EQ;
574 dMachine->CondCodes[2] = COND_EQ;
575 dMachine->CondCodes[3] = COND_EQ;
576 }
577
578
579 /**
580 * Execute the given vertex program.
581 * NOTE: we do everything in single-precision floating point; we don't
582 * currently observe the single/half/fixed-precision qualifiers.
583 * \param ctx - rendering context
584 * \param program - the fragment program to execute
585 * \param machine - machine state (register file)
586 * \param maxInst - max number of instructions to execute
587 * \return GL_TRUE if program completed or GL_FALSE if program executed KIL.
588 */
589 static GLboolean
590 execute_program( GLcontext *ctx,
591 const struct gl_fragment_program *program, GLuint maxInst,
592 struct fp_machine *machine, const SWspan *span,
593 GLuint column )
594 {
595 GLuint pc;
596
597 #if DEBUG_FRAG
598 printf("execute fragment program --------------------\n");
599 #endif
600
601 for (pc = 0; pc < maxInst; pc++) {
602 const struct prog_instruction *inst = program->Base.Instructions + pc;
603
604 if (ctx->FragmentProgram.CallbackEnabled &&
605 ctx->FragmentProgram.Callback) {
606 ctx->FragmentProgram.CurrentPosition = inst->StringPos;
607 ctx->FragmentProgram.Callback(program->Base.Target,
608 ctx->FragmentProgram.CallbackData);
609 }
610
611 switch (inst->Opcode) {
612 case OPCODE_ABS:
613 {
614 GLfloat a[4], result[4];
615 fetch_vector4( ctx, &inst->SrcReg[0], machine, program, a );
616 result[0] = FABSF(a[0]);
617 result[1] = FABSF(a[1]);
618 result[2] = FABSF(a[2]);
619 result[3] = FABSF(a[3]);
620 store_vector4( inst, machine, result );
621 }
622 break;
623 case OPCODE_ADD:
624 {
625 GLfloat a[4], b[4], result[4];
626 fetch_vector4( ctx, &inst->SrcReg[0], machine, program, a );
627 fetch_vector4( ctx, &inst->SrcReg[1], machine, program, b );
628 result[0] = a[0] + b[0];
629 result[1] = a[1] + b[1];
630 result[2] = a[2] + b[2];
631 result[3] = a[3] + b[3];
632 store_vector4( inst, machine, result );
633 }
634 break;
635 case OPCODE_CMP:
636 {
637 GLfloat a[4], b[4], c[4], result[4];
638 fetch_vector4( ctx, &inst->SrcReg[0], machine, program, a );
639 fetch_vector4( ctx, &inst->SrcReg[1], machine, program, b );
640 fetch_vector4( ctx, &inst->SrcReg[2], machine, program, c );
641 result[0] = a[0] < 0.0F ? b[0] : c[0];
642 result[1] = a[1] < 0.0F ? b[1] : c[1];
643 result[2] = a[2] < 0.0F ? b[2] : c[2];
644 result[3] = a[3] < 0.0F ? b[3] : c[3];
645 store_vector4( inst, machine, result );
646 }
647 break;
648 case OPCODE_COS:
649 {
650 GLfloat a[4], result[4];
651 fetch_vector1( ctx, &inst->SrcReg[0], machine, program, a );
652 result[0] = result[1] = result[2] = result[3] = (GLfloat)_mesa_cos(a[0]);
653 store_vector4( inst, machine, result );
654 }
655 break;
656 case OPCODE_DDX: /* Partial derivative with respect to X */
657 {
658 GLfloat a[4], aNext[4], result[4];
659 struct fp_machine dMachine;
660 if (!fetch_vector4_deriv(ctx, &inst->SrcReg[0], span, 'X',
661 column, result)) {
662 /* This is tricky. Make a copy of the current machine state,
663 * increment the input registers by the dx or dy partial
664 * derivatives, then re-execute the program up to the
665 * preceeding instruction, then fetch the source register.
666 * Finally, find the difference in the register values for
667 * the original and derivative runs.
668 */
669 fetch_vector4( ctx, &inst->SrcReg[0], machine, program, a);
670 init_machine_deriv(ctx, machine, program, span,
671 'X', &dMachine);
672 execute_program(ctx, program, pc, &dMachine, span, column);
673 fetch_vector4( ctx, &inst->SrcReg[0], &dMachine, program, aNext );
674 result[0] = aNext[0] - a[0];
675 result[1] = aNext[1] - a[1];
676 result[2] = aNext[2] - a[2];
677 result[3] = aNext[3] - a[3];
678 }
679 store_vector4( inst, machine, result );
680 }
681 break;
682 case OPCODE_DDY: /* Partial derivative with respect to Y */
683 {
684 GLfloat a[4], aNext[4], result[4];
685 struct fp_machine dMachine;
686 if (!fetch_vector4_deriv(ctx, &inst->SrcReg[0], span, 'Y',
687 column, result)) {
688 init_machine_deriv(ctx, machine, program, span,
689 'Y', &dMachine);
690 fetch_vector4( ctx, &inst->SrcReg[0], machine, program, a);
691 execute_program(ctx, program, pc, &dMachine, span, column);
692 fetch_vector4( ctx, &inst->SrcReg[0], &dMachine, program, aNext );
693 result[0] = aNext[0] - a[0];
694 result[1] = aNext[1] - a[1];
695 result[2] = aNext[2] - a[2];
696 result[3] = aNext[3] - a[3];
697 }
698 store_vector4( inst, machine, result );
699 }
700 break;
701 case OPCODE_DP3:
702 {
703 GLfloat a[4], b[4], result[4];
704 fetch_vector4( ctx, &inst->SrcReg[0], machine, program, a );
705 fetch_vector4( ctx, &inst->SrcReg[1], machine, program, b );
706 result[0] = result[1] = result[2] = result[3] =
707 a[0] * b[0] + a[1] * b[1] + a[2] * b[2];
708 store_vector4( inst, machine, result );
709 #if DEBUG_FRAG
710 printf("DP3 %g = (%g %g %g) . (%g %g %g)\n",
711 result[0], a[0], a[1], a[2], b[0], b[1], b[2]);
712 #endif
713 }
714 break;
715 case OPCODE_DP4:
716 {
717 GLfloat a[4], b[4], result[4];
718 fetch_vector4( ctx, &inst->SrcReg[0], machine, program, a );
719 fetch_vector4( ctx, &inst->SrcReg[1], machine, program, b );
720 result[0] = result[1] = result[2] = result[3] =
721 a[0] * b[0] + a[1] * b[1] + a[2] * b[2] + a[3] * b[3];
722 store_vector4( inst, machine, result );
723 #if DEBUG_FRAG
724 printf("DP4 %g = (%g, %g %g %g) . (%g, %g %g %g)\n",
725 result[0], a[0], a[1], a[2], a[3], b[0], b[1], b[2], b[3]);
726 #endif
727 }
728 break;
729 case OPCODE_DPH:
730 {
731 GLfloat a[4], b[4], result[4];
732 fetch_vector4( ctx, &inst->SrcReg[0], machine, program, a );
733 fetch_vector4( ctx, &inst->SrcReg[1], machine, program, b );
734 result[0] = result[1] = result[2] = result[3] =
735 a[0] * b[0] + a[1] * b[1] + a[2] * b[2] + b[3];
736 store_vector4( inst, machine, result );
737 }
738 break;
739 case OPCODE_DST: /* Distance vector */
740 {
741 GLfloat a[4], b[4], result[4];
742 fetch_vector4( ctx, &inst->SrcReg[0], machine, program, a );
743 fetch_vector4( ctx, &inst->SrcReg[1], machine, program, b );
744 result[0] = 1.0F;
745 result[1] = a[1] * b[1];
746 result[2] = a[2];
747 result[3] = b[3];
748 store_vector4( inst, machine, result );
749 }
750 break;
751 case OPCODE_EX2: /* Exponential base 2 */
752 {
753 GLfloat a[4], result[4];
754 fetch_vector1( ctx, &inst->SrcReg[0], machine, program, a );
755 result[0] = result[1] = result[2] = result[3] =
756 (GLfloat) _mesa_pow(2.0, a[0]);
757 store_vector4( inst, machine, result );
758 }
759 break;
760 case OPCODE_FLR:
761 {
762 GLfloat a[4], result[4];
763 fetch_vector4( ctx, &inst->SrcReg[0], machine, program, a );
764 result[0] = FLOORF(a[0]);
765 result[1] = FLOORF(a[1]);
766 result[2] = FLOORF(a[2]);
767 result[3] = FLOORF(a[3]);
768 store_vector4( inst, machine, result );
769 }
770 break;
771 case OPCODE_FRC:
772 {
773 GLfloat a[4], result[4];
774 fetch_vector4( ctx, &inst->SrcReg[0], machine, program, a );
775 result[0] = a[0] - FLOORF(a[0]);
776 result[1] = a[1] - FLOORF(a[1]);
777 result[2] = a[2] - FLOORF(a[2]);
778 result[3] = a[3] - FLOORF(a[3]);
779 store_vector4( inst, machine, result );
780 }
781 break;
782 case OPCODE_KIL_NV: /* NV_f_p only */
783 {
784 const GLuint swizzle = inst->DstReg.CondSwizzle;
785 const GLuint condMask = inst->DstReg.CondMask;
786 if (test_cc(machine->CondCodes[GET_SWZ(swizzle, 0)], condMask) ||
787 test_cc(machine->CondCodes[GET_SWZ(swizzle, 1)], condMask) ||
788 test_cc(machine->CondCodes[GET_SWZ(swizzle, 2)], condMask) ||
789 test_cc(machine->CondCodes[GET_SWZ(swizzle, 3)], condMask)) {
790 return GL_FALSE;
791 }
792 }
793 break;
794 case OPCODE_KIL: /* ARB_f_p only */
795 {
796 GLfloat a[4];
797 fetch_vector4( ctx, &inst->SrcReg[0], machine, program, a );
798 if (a[0] < 0.0F || a[1] < 0.0F || a[2] < 0.0F || a[3] < 0.0F) {
799 return GL_FALSE;
800 }
801 }
802 break;
803 case OPCODE_LG2: /* log base 2 */
804 {
805 GLfloat a[4], result[4];
806 fetch_vector1( ctx, &inst->SrcReg[0], machine, program, a );
807 result[0] = result[1] = result[2] = result[3]
808 = LOG2(a[0]);
809 store_vector4( inst, machine, result );
810 }
811 break;
812 case OPCODE_LIT:
813 {
814 const GLfloat epsilon = 1.0F / 256.0F; /* from NV VP spec */
815 GLfloat a[4], result[4];
816 fetch_vector4( ctx, &inst->SrcReg[0], machine, program, a );
817 a[0] = MAX2(a[0], 0.0F);
818 a[1] = MAX2(a[1], 0.0F);
819 /* XXX ARB version clamps a[3], NV version doesn't */
820 a[3] = CLAMP(a[3], -(128.0F - epsilon), (128.0F - epsilon));
821 result[0] = 1.0F;
822 result[1] = a[0];
823 /* XXX we could probably just use pow() here */
824 if (a[0] > 0.0F) {
825 if (a[1] == 0.0 && a[3] == 0.0)
826 result[2] = 1.0;
827 else
828 result[2] = EXPF(a[3] * LOGF(a[1]));
829 }
830 else {
831 result[2] = 0.0;
832 }
833 result[3] = 1.0F;
834 store_vector4( inst, machine, result );
835 }
836 break;
837 case OPCODE_LRP:
838 {
839 GLfloat a[4], b[4], c[4], result[4];
840 fetch_vector4( ctx, &inst->SrcReg[0], machine, program, a );
841 fetch_vector4( ctx, &inst->SrcReg[1], machine, program, b );
842 fetch_vector4( ctx, &inst->SrcReg[2], machine, program, c );
843 result[0] = a[0] * b[0] + (1.0F - a[0]) * c[0];
844 result[1] = a[1] * b[1] + (1.0F - a[1]) * c[1];
845 result[2] = a[2] * b[2] + (1.0F - a[2]) * c[2];
846 result[3] = a[3] * b[3] + (1.0F - a[3]) * c[3];
847 store_vector4( inst, machine, result );
848 }
849 break;
850 case OPCODE_MAD:
851 {
852 GLfloat a[4], b[4], c[4], result[4];
853 fetch_vector4( ctx, &inst->SrcReg[0], machine, program, a );
854 fetch_vector4( ctx, &inst->SrcReg[1], machine, program, b );
855 fetch_vector4( ctx, &inst->SrcReg[2], machine, program, c );
856 result[0] = a[0] * b[0] + c[0];
857 result[1] = a[1] * b[1] + c[1];
858 result[2] = a[2] * b[2] + c[2];
859 result[3] = a[3] * b[3] + c[3];
860 store_vector4( inst, machine, result );
861 }
862 break;
863 case OPCODE_MAX:
864 {
865 GLfloat a[4], b[4], result[4];
866 fetch_vector4( ctx, &inst->SrcReg[0], machine, program, a );
867 fetch_vector4( ctx, &inst->SrcReg[1], machine, program, b );
868 result[0] = MAX2(a[0], b[0]);
869 result[1] = MAX2(a[1], b[1]);
870 result[2] = MAX2(a[2], b[2]);
871 result[3] = MAX2(a[3], b[3]);
872 store_vector4( inst, machine, result );
873 #if DEBUG_FRAG
874 printf("MAX (%g %g %g %g) = (%g %g %g %g), (%g %g %g %g)\n",
875 result[0], result[1], result[2], result[3],
876 a[0], a[1], a[2], a[3],
877 b[0], b[1], b[2], b[3]);
878 #endif
879 }
880 break;
881 case OPCODE_MIN:
882 {
883 GLfloat a[4], b[4], result[4];
884 fetch_vector4( ctx, &inst->SrcReg[0], machine, program, a );
885 fetch_vector4( ctx, &inst->SrcReg[1], machine, program, b );
886 result[0] = MIN2(a[0], b[0]);
887 result[1] = MIN2(a[1], b[1]);
888 result[2] = MIN2(a[2], b[2]);
889 result[3] = MIN2(a[3], b[3]);
890 store_vector4( inst, machine, result );
891 }
892 break;
893 case OPCODE_MOV:
894 {
895 GLfloat result[4];
896 fetch_vector4( ctx, &inst->SrcReg[0], machine, program, result );
897 store_vector4( inst, machine, result );
898 #if DEBUG_FRAG
899 printf("MOV (%g %g %g %g)\n",
900 result[0], result[1], result[2], result[3]);
901 #endif
902 }
903 break;
904 case OPCODE_MUL:
905 {
906 GLfloat a[4], b[4], result[4];
907 fetch_vector4( ctx, &inst->SrcReg[0], machine, program, a );
908 fetch_vector4( ctx, &inst->SrcReg[1], machine, program, b );
909 result[0] = a[0] * b[0];
910 result[1] = a[1] * b[1];
911 result[2] = a[2] * b[2];
912 result[3] = a[3] * b[3];
913 store_vector4( inst, machine, result );
914 #if DEBUG_FRAG
915 printf("MUL (%g %g %g %g) = (%g %g %g %g) * (%g %g %g %g)\n",
916 result[0], result[1], result[2], result[3],
917 a[0], a[1], a[2], a[3],
918 b[0], b[1], b[2], b[3]);
919 #endif
920 }
921 break;
922 case OPCODE_PK2H: /* pack two 16-bit floats in one 32-bit float */
923 {
924 GLfloat a[4], result[4];
925 GLhalfNV hx, hy;
926 GLuint *rawResult = (GLuint *) result;
927 GLuint twoHalves;
928 fetch_vector4( ctx, &inst->SrcReg[0], machine, program, a );
929 hx = _mesa_float_to_half(a[0]);
930 hy = _mesa_float_to_half(a[1]);
931 twoHalves = hx | (hy << 16);
932 rawResult[0] = rawResult[1] = rawResult[2] = rawResult[3]
933 = twoHalves;
934 store_vector4( inst, machine, result );
935 }
936 break;
937 case OPCODE_PK2US: /* pack two GLushorts into one 32-bit float */
938 {
939 GLfloat a[4], result[4];
940 GLuint usx, usy, *rawResult = (GLuint *) result;
941 fetch_vector4( ctx, &inst->SrcReg[0], machine, program, a );
942 a[0] = CLAMP(a[0], 0.0F, 1.0F);
943 a[1] = CLAMP(a[1], 0.0F, 1.0F);
944 usx = IROUND(a[0] * 65535.0F);
945 usy = IROUND(a[1] * 65535.0F);
946 rawResult[0] = rawResult[1] = rawResult[2] = rawResult[3]
947 = usx | (usy << 16);
948 store_vector4( inst, machine, result );
949 }
950 break;
951 case OPCODE_PK4B: /* pack four GLbytes into one 32-bit float */
952 {
953 GLfloat a[4], result[4];
954 GLuint ubx, uby, ubz, ubw, *rawResult = (GLuint *) result;
955 fetch_vector4( ctx, &inst->SrcReg[0], machine, program, a );
956 a[0] = CLAMP(a[0], -128.0F / 127.0F, 1.0F);
957 a[1] = CLAMP(a[1], -128.0F / 127.0F, 1.0F);
958 a[2] = CLAMP(a[2], -128.0F / 127.0F, 1.0F);
959 a[3] = CLAMP(a[3], -128.0F / 127.0F, 1.0F);
960 ubx = IROUND(127.0F * a[0] + 128.0F);
961 uby = IROUND(127.0F * a[1] + 128.0F);
962 ubz = IROUND(127.0F * a[2] + 128.0F);
963 ubw = IROUND(127.0F * a[3] + 128.0F);
964 rawResult[0] = rawResult[1] = rawResult[2] = rawResult[3]
965 = ubx | (uby << 8) | (ubz << 16) | (ubw << 24);
966 store_vector4( inst, machine, result );
967 }
968 break;
969 case OPCODE_PK4UB: /* pack four GLubytes into one 32-bit float */
970 {
971 GLfloat a[4], result[4];
972 GLuint ubx, uby, ubz, ubw, *rawResult = (GLuint *) result;
973 fetch_vector4( ctx, &inst->SrcReg[0], machine, program, a );
974 a[0] = CLAMP(a[0], 0.0F, 1.0F);
975 a[1] = CLAMP(a[1], 0.0F, 1.0F);
976 a[2] = CLAMP(a[2], 0.0F, 1.0F);
977 a[3] = CLAMP(a[3], 0.0F, 1.0F);
978 ubx = IROUND(255.0F * a[0]);
979 uby = IROUND(255.0F * a[1]);
980 ubz = IROUND(255.0F * a[2]);
981 ubw = IROUND(255.0F * a[3]);
982 rawResult[0] = rawResult[1] = rawResult[2] = rawResult[3]
983 = ubx | (uby << 8) | (ubz << 16) | (ubw << 24);
984 store_vector4( inst, machine, result );
985 }
986 break;
987 case OPCODE_POW:
988 {
989 GLfloat a[4], b[4], result[4];
990 fetch_vector1( ctx, &inst->SrcReg[0], machine, program, a );
991 fetch_vector1( ctx, &inst->SrcReg[1], machine, program, b );
992 result[0] = result[1] = result[2] = result[3]
993 = (GLfloat)_mesa_pow(a[0], b[0]);
994 store_vector4( inst, machine, result );
995 }
996 break;
997 case OPCODE_RCP:
998 {
999 GLfloat a[4], result[4];
1000 fetch_vector1( ctx, &inst->SrcReg[0], machine, program, a );
1001 #if DEBUG_FRAG
1002 if (a[0] == 0)
1003 printf("RCP(0)\n");
1004 else if (IS_INF_OR_NAN(a[0]))
1005 printf("RCP(inf)\n");
1006 #endif
1007 result[0] = result[1] = result[2] = result[3]
1008 = 1.0F / a[0];
1009 store_vector4( inst, machine, result );
1010 }
1011 break;
1012 case OPCODE_RFL:
1013 {
1014 GLfloat axis[4], dir[4], result[4], tmp[4];
1015 fetch_vector4( ctx, &inst->SrcReg[0], machine, program, axis );
1016 fetch_vector4( ctx, &inst->SrcReg[1], machine, program, dir );
1017 tmp[3] = axis[0] * axis[0]
1018 + axis[1] * axis[1]
1019 + axis[2] * axis[2];
1020 tmp[0] = (2.0F * (axis[0] * dir[0] +
1021 axis[1] * dir[1] +
1022 axis[2] * dir[2])) / tmp[3];
1023 result[0] = tmp[0] * axis[0] - dir[0];
1024 result[1] = tmp[0] * axis[1] - dir[1];
1025 result[2] = tmp[0] * axis[2] - dir[2];
1026 /* result[3] is never written! XXX enforce in parser! */
1027 store_vector4( inst, machine, result );
1028 }
1029 break;
1030 case OPCODE_RSQ: /* 1 / sqrt() */
1031 {
1032 GLfloat a[4], result[4];
1033 fetch_vector1( ctx, &inst->SrcReg[0], machine, program, a );
1034 a[0] = FABSF(a[0]);
1035 result[0] = result[1] = result[2] = result[3] = INV_SQRTF(a[0]);
1036 store_vector4( inst, machine, result );
1037 #if DEBUG_FRAG
1038 printf("RSQ %g = 1/sqrt(|%g|)\n", result[0], a[0]);
1039 #endif
1040 }
1041 break;
1042 case OPCODE_SCS: /* sine and cos */
1043 {
1044 GLfloat a[4], result[4];
1045 fetch_vector1( ctx, &inst->SrcReg[0], machine, program, a );
1046 result[0] = (GLfloat)_mesa_cos(a[0]);
1047 result[1] = (GLfloat)_mesa_sin(a[0]);
1048 result[2] = 0.0; /* undefined! */
1049 result[3] = 0.0; /* undefined! */
1050 store_vector4( inst, machine, result );
1051 }
1052 break;
1053 case OPCODE_SEQ: /* set on equal */
1054 {
1055 GLfloat a[4], b[4], result[4];
1056 fetch_vector4( ctx, &inst->SrcReg[0], machine, program, a );
1057 fetch_vector4( ctx, &inst->SrcReg[1], machine, program, b );
1058 result[0] = (a[0] == b[0]) ? 1.0F : 0.0F;
1059 result[1] = (a[1] == b[1]) ? 1.0F : 0.0F;
1060 result[2] = (a[2] == b[2]) ? 1.0F : 0.0F;
1061 result[3] = (a[3] == b[3]) ? 1.0F : 0.0F;
1062 store_vector4( inst, machine, result );
1063 }
1064 break;
1065 case OPCODE_SFL: /* set false, operands ignored */
1066 {
1067 static const GLfloat result[4] = { 0.0F, 0.0F, 0.0F, 0.0F };
1068 store_vector4( inst, machine, result );
1069 }
1070 break;
1071 case OPCODE_SGE: /* set on greater or equal */
1072 {
1073 GLfloat a[4], b[4], result[4];
1074 fetch_vector4( ctx, &inst->SrcReg[0], machine, program, a );
1075 fetch_vector4( ctx, &inst->SrcReg[1], machine, program, b );
1076 result[0] = (a[0] >= b[0]) ? 1.0F : 0.0F;
1077 result[1] = (a[1] >= b[1]) ? 1.0F : 0.0F;
1078 result[2] = (a[2] >= b[2]) ? 1.0F : 0.0F;
1079 result[3] = (a[3] >= b[3]) ? 1.0F : 0.0F;
1080 store_vector4( inst, machine, result );
1081 }
1082 break;
1083 case OPCODE_SGT: /* set on greater */
1084 {
1085 GLfloat a[4], b[4], result[4];
1086 fetch_vector4( ctx, &inst->SrcReg[0], machine, program, a );
1087 fetch_vector4( ctx, &inst->SrcReg[1], machine, program, b );
1088 result[0] = (a[0] > b[0]) ? 1.0F : 0.0F;
1089 result[1] = (a[1] > b[1]) ? 1.0F : 0.0F;
1090 result[2] = (a[2] > b[2]) ? 1.0F : 0.0F;
1091 result[3] = (a[3] > b[3]) ? 1.0F : 0.0F;
1092 store_vector4( inst, machine, result );
1093 }
1094 break;
1095 case OPCODE_SIN:
1096 {
1097 GLfloat a[4], result[4];
1098 fetch_vector1( ctx, &inst->SrcReg[0], machine, program, a );
1099 result[0] = result[1] = result[2] =
1100 result[3] = (GLfloat)_mesa_sin(a[0]);
1101 store_vector4( inst, machine, result );
1102 }
1103 break;
1104 case OPCODE_SLE: /* set on less or equal */
1105 {
1106 GLfloat a[4], b[4], result[4];
1107 fetch_vector4( ctx, &inst->SrcReg[0], machine, program, a );
1108 fetch_vector4( ctx, &inst->SrcReg[1], machine, program, b );
1109 result[0] = (a[0] <= b[0]) ? 1.0F : 0.0F;
1110 result[1] = (a[1] <= b[1]) ? 1.0F : 0.0F;
1111 result[2] = (a[2] <= b[2]) ? 1.0F : 0.0F;
1112 result[3] = (a[3] <= b[3]) ? 1.0F : 0.0F;
1113 store_vector4( inst, machine, result );
1114 }
1115 break;
1116 case OPCODE_SLT: /* set on less */
1117 {
1118 GLfloat a[4], b[4], result[4];
1119 fetch_vector4( ctx, &inst->SrcReg[0], machine, program, a );
1120 fetch_vector4( ctx, &inst->SrcReg[1], machine, program, b );
1121 result[0] = (a[0] < b[0]) ? 1.0F : 0.0F;
1122 result[1] = (a[1] < b[1]) ? 1.0F : 0.0F;
1123 result[2] = (a[2] < b[2]) ? 1.0F : 0.0F;
1124 result[3] = (a[3] < b[3]) ? 1.0F : 0.0F;
1125 store_vector4( inst, machine, result );
1126 }
1127 break;
1128 case OPCODE_SNE: /* set on not equal */
1129 {
1130 GLfloat a[4], b[4], result[4];
1131 fetch_vector4( ctx, &inst->SrcReg[0], machine, program, a );
1132 fetch_vector4( ctx, &inst->SrcReg[1], machine, program, b );
1133 result[0] = (a[0] != b[0]) ? 1.0F : 0.0F;
1134 result[1] = (a[1] != b[1]) ? 1.0F : 0.0F;
1135 result[2] = (a[2] != b[2]) ? 1.0F : 0.0F;
1136 result[3] = (a[3] != b[3]) ? 1.0F : 0.0F;
1137 store_vector4( inst, machine, result );
1138 }
1139 break;
1140 case OPCODE_STR: /* set true, operands ignored */
1141 {
1142 static const GLfloat result[4] = { 1.0F, 1.0F, 1.0F, 1.0F };
1143 store_vector4( inst, machine, result );
1144 }
1145 break;
1146 case OPCODE_SUB:
1147 {
1148 GLfloat a[4], b[4], result[4];
1149 fetch_vector4( ctx, &inst->SrcReg[0], machine, program, a );
1150 fetch_vector4( ctx, &inst->SrcReg[1], machine, program, b );
1151 result[0] = a[0] - b[0];
1152 result[1] = a[1] - b[1];
1153 result[2] = a[2] - b[2];
1154 result[3] = a[3] - b[3];
1155 store_vector4( inst, machine, result );
1156 }
1157 break;
1158 case OPCODE_SWZ:
1159 {
1160 const struct prog_src_register *source = &inst->SrcReg[0];
1161 const GLfloat *src = get_register_pointer(ctx, source,
1162 machine, program);
1163 GLfloat result[4];
1164 GLuint i;
1165
1166 /* do extended swizzling here */
1167 for (i = 0; i < 4; i++) {
1168 if (GET_SWZ(source->Swizzle, i) == SWIZZLE_ZERO)
1169 result[i] = 0.0;
1170 else if (GET_SWZ(source->Swizzle, i) == SWIZZLE_ONE)
1171 result[i] = 1.0;
1172 else
1173 result[i] = src[GET_SWZ(source->Swizzle, i)];
1174
1175 if (source->NegateBase & (1 << i))
1176 result[i] = -result[i];
1177 }
1178 store_vector4( inst, machine, result );
1179 }
1180 break;
1181 case OPCODE_TEX: /* Both ARB and NV frag prog */
1182 /* Texel lookup */
1183 {
1184 GLfloat texcoord[4], color[4];
1185 fetch_vector4( ctx, &inst->SrcReg[0], machine, program, texcoord );
1186 /* Note: we pass 0 for LOD. The ARB extension requires it
1187 * while the NV extension says it's implementation dependant.
1188 */
1189 /* KW: Previously lambda was passed as zero, but I
1190 * believe this is incorrect, the spec seems to
1191 * indicate rather that lambda should not be
1192 * changed/biased, unlike TXB where texcoord[3] is
1193 * added to the lambda calculations. The lambda should
1194 * still be calculated normally for TEX & TXP though,
1195 * not set to zero. Otherwise it's very difficult to
1196 * implement normal GL semantics through the fragment
1197 * shader.
1198 */
1199 fetch_texel( ctx, texcoord,
1200 span->array->lambda[inst->TexSrcUnit][column],
1201 inst->TexSrcUnit, color );
1202 #if DEBUG_FRAG
1203 if (color[3])
1204 printf("color[3] = %f\n", color[3]);
1205 #endif
1206 store_vector4( inst, machine, color );
1207 }
1208 break;
1209 case OPCODE_TXB: /* GL_ARB_fragment_program only */
1210 /* Texel lookup with LOD bias */
1211 {
1212 GLfloat texcoord[4], color[4], bias, lambda;
1213
1214 fetch_vector4( ctx, &inst->SrcReg[0], machine, program, texcoord );
1215 /* texcoord[3] is the bias to add to lambda */
1216 bias = ctx->Texture.Unit[inst->TexSrcUnit].LodBias
1217 + ctx->Texture.Unit[inst->TexSrcUnit]._Current->LodBias
1218 + texcoord[3];
1219 lambda = span->array->lambda[inst->TexSrcUnit][column] + bias;
1220 fetch_texel( ctx, texcoord, lambda,
1221 inst->TexSrcUnit, color );
1222 store_vector4( inst, machine, color );
1223 }
1224 break;
1225 case OPCODE_TXD: /* GL_NV_fragment_program only */
1226 /* Texture lookup w/ partial derivatives for LOD */
1227 {
1228 GLfloat texcoord[4], dtdx[4], dtdy[4], color[4];
1229 fetch_vector4( ctx, &inst->SrcReg[0], machine, program, texcoord );
1230 fetch_vector4( ctx, &inst->SrcReg[1], machine, program, dtdx );
1231 fetch_vector4( ctx, &inst->SrcReg[2], machine, program, dtdy );
1232 fetch_texel_deriv( ctx, texcoord, dtdx, dtdy, inst->TexSrcUnit,
1233 color );
1234 store_vector4( inst, machine, color );
1235 }
1236 break;
1237 case OPCODE_TXP: /* GL_ARB_fragment_program only */
1238 /* Texture lookup w/ projective divide */
1239 {
1240 GLfloat texcoord[4], color[4];
1241 fetch_vector4( ctx, &inst->SrcReg[0], machine, program, texcoord );
1242 /* Not so sure about this test - if texcoord[3] is
1243 * zero, we'd probably be fine except for an ASSERT in
1244 * IROUND_POS() which gets triggered by the inf values created.
1245 */
1246 if (texcoord[3] != 0.0) {
1247 texcoord[0] /= texcoord[3];
1248 texcoord[1] /= texcoord[3];
1249 texcoord[2] /= texcoord[3];
1250 }
1251 /* KW: Previously lambda was passed as zero, but I
1252 * believe this is incorrect, the spec seems to
1253 * indicate rather that lambda should not be
1254 * changed/biased, unlike TXB where texcoord[3] is
1255 * added to the lambda calculations. The lambda should
1256 * still be calculated normally for TEX & TXP though,
1257 * not set to zero.
1258 */
1259 fetch_texel( ctx, texcoord,
1260 span->array->lambda[inst->TexSrcUnit][column],
1261 inst->TexSrcUnit, color );
1262 store_vector4( inst, machine, color );
1263 }
1264 break;
1265 case OPCODE_TXP_NV: /* GL_NV_fragment_program only */
1266 /* Texture lookup w/ projective divide */
1267 {
1268 GLfloat texcoord[4], color[4];
1269 fetch_vector4( ctx, &inst->SrcReg[0], machine, program, texcoord );
1270 if (inst->TexSrcTarget != TEXTURE_CUBE_INDEX &&
1271 texcoord[3] != 0.0) {
1272 texcoord[0] /= texcoord[3];
1273 texcoord[1] /= texcoord[3];
1274 texcoord[2] /= texcoord[3];
1275 }
1276 fetch_texel( ctx, texcoord,
1277 span->array->lambda[inst->TexSrcUnit][column],
1278 inst->TexSrcUnit, color );
1279 store_vector4( inst, machine, color );
1280 }
1281 break;
1282 case OPCODE_UP2H: /* unpack two 16-bit floats */
1283 {
1284 GLfloat a[4], result[4];
1285 const GLuint *rawBits = (const GLuint *) a;
1286 GLhalfNV hx, hy;
1287 fetch_vector1( ctx, &inst->SrcReg[0], machine, program, a );
1288 hx = rawBits[0] & 0xffff;
1289 hy = rawBits[0] >> 16;
1290 result[0] = result[2] = _mesa_half_to_float(hx);
1291 result[1] = result[3] = _mesa_half_to_float(hy);
1292 store_vector4( inst, machine, result );
1293 }
1294 break;
1295 case OPCODE_UP2US: /* unpack two GLushorts */
1296 {
1297 GLfloat a[4], result[4];
1298 const GLuint *rawBits = (const GLuint *) a;
1299 GLushort usx, usy;
1300 fetch_vector1( ctx, &inst->SrcReg[0], machine, program, a );
1301 usx = rawBits[0] & 0xffff;
1302 usy = rawBits[0] >> 16;
1303 result[0] = result[2] = usx * (1.0f / 65535.0f);
1304 result[1] = result[3] = usy * (1.0f / 65535.0f);
1305 store_vector4( inst, machine, result );
1306 }
1307 break;
1308 case OPCODE_UP4B: /* unpack four GLbytes */
1309 {
1310 GLfloat a[4], result[4];
1311 const GLuint *rawBits = (const GLuint *) a;
1312 fetch_vector1( ctx, &inst->SrcReg[0], machine, program, a );
1313 result[0] = (((rawBits[0] >> 0) & 0xff) - 128) / 127.0F;
1314 result[1] = (((rawBits[0] >> 8) & 0xff) - 128) / 127.0F;
1315 result[2] = (((rawBits[0] >> 16) & 0xff) - 128) / 127.0F;
1316 result[3] = (((rawBits[0] >> 24) & 0xff) - 128) / 127.0F;
1317 store_vector4( inst, machine, result );
1318 }
1319 break;
1320 case OPCODE_UP4UB: /* unpack four GLubytes */
1321 {
1322 GLfloat a[4], result[4];
1323 const GLuint *rawBits = (const GLuint *) a;
1324 fetch_vector1( ctx, &inst->SrcReg[0], machine, program, a );
1325 result[0] = ((rawBits[0] >> 0) & 0xff) / 255.0F;
1326 result[1] = ((rawBits[0] >> 8) & 0xff) / 255.0F;
1327 result[2] = ((rawBits[0] >> 16) & 0xff) / 255.0F;
1328 result[3] = ((rawBits[0] >> 24) & 0xff) / 255.0F;
1329 store_vector4( inst, machine, result );
1330 }
1331 break;
1332 case OPCODE_XPD: /* cross product */
1333 {
1334 GLfloat a[4], b[4], result[4];
1335 fetch_vector4( ctx, &inst->SrcReg[0], machine, program, a );
1336 fetch_vector4( ctx, &inst->SrcReg[1], machine, program, b );
1337 result[0] = a[1] * b[2] - a[2] * b[1];
1338 result[1] = a[2] * b[0] - a[0] * b[2];
1339 result[2] = a[0] * b[1] - a[1] * b[0];
1340 result[3] = 1.0;
1341 store_vector4( inst, machine, result );
1342 }
1343 break;
1344 case OPCODE_X2D: /* 2-D matrix transform */
1345 {
1346 GLfloat a[4], b[4], c[4], result[4];
1347 fetch_vector4( ctx, &inst->SrcReg[0], machine, program, a );
1348 fetch_vector4( ctx, &inst->SrcReg[1], machine, program, b );
1349 fetch_vector4( ctx, &inst->SrcReg[2], machine, program, c );
1350 result[0] = a[0] + b[0] * c[0] + b[1] * c[1];
1351 result[1] = a[1] + b[0] * c[2] + b[1] * c[3];
1352 result[2] = a[2] + b[0] * c[0] + b[1] * c[1];
1353 result[3] = a[3] + b[0] * c[2] + b[1] * c[3];
1354 store_vector4( inst, machine, result );
1355 }
1356 break;
1357 case OPCODE_PRINT:
1358 {
1359 if (inst->SrcReg[0].File != -1) {
1360 GLfloat a[4];
1361 fetch_vector4( ctx, &inst->SrcReg[0], machine, program, a);
1362 _mesa_printf("%s%g, %g, %g, %g\n", (const char *) inst->Data,
1363 a[0], a[1], a[2], a[3]);
1364 }
1365 else {
1366 _mesa_printf("%s\n", (const char *) inst->Data);
1367 }
1368 }
1369 break;
1370 case OPCODE_END:
1371 return GL_TRUE;
1372 default:
1373 _mesa_problem(ctx, "Bad opcode %d in _mesa_exec_fragment_program",
1374 inst->Opcode);
1375 return GL_TRUE; /* return value doesn't matter */
1376 }
1377 }
1378 return GL_TRUE;
1379 }
1380
1381
1382 static void
1383 init_machine( GLcontext *ctx, struct fp_machine *machine,
1384 const struct gl_fragment_program *program,
1385 const SWspan *span, GLuint col )
1386 {
1387 GLuint inputsRead = program->Base.InputsRead;
1388 GLuint u;
1389
1390 if (ctx->FragmentProgram.CallbackEnabled)
1391 inputsRead = ~0;
1392
1393 if (program->Base.Target == GL_FRAGMENT_PROGRAM_NV) {
1394 /* Clear temporary registers (undefined for ARB_f_p) */
1395 _mesa_bzero(machine->Temporaries,
1396 MAX_NV_FRAGMENT_PROGRAM_TEMPS * 4 * sizeof(GLfloat));
1397 }
1398
1399 /* Load input registers */
1400 if (inputsRead & (1 << FRAG_ATTRIB_WPOS)) {
1401 GLfloat *wpos = machine->Inputs[FRAG_ATTRIB_WPOS];
1402 ASSERT(span->arrayMask & SPAN_Z);
1403 if (span->arrayMask & SPAN_XY) {
1404 wpos[0] = (GLfloat) span->array->x[col];
1405 wpos[1] = (GLfloat) span->array->y[col];
1406 }
1407 else {
1408 wpos[0] = (GLfloat) span->x + col;
1409 wpos[1] = (GLfloat) span->y;
1410 }
1411 wpos[2] = (GLfloat) span->array->z[col] / ctx->DrawBuffer->_DepthMaxF;
1412 wpos[3] = span->w + col * span->dwdx;
1413 }
1414 if (inputsRead & (1 << FRAG_ATTRIB_COL0)) {
1415 GLfloat *col0 = machine->Inputs[FRAG_ATTRIB_COL0];
1416 ASSERT(span->arrayMask & SPAN_RGBA);
1417 col0[0] = CHAN_TO_FLOAT(span->array->rgba[col][RCOMP]);
1418 col0[1] = CHAN_TO_FLOAT(span->array->rgba[col][GCOMP]);
1419 col0[2] = CHAN_TO_FLOAT(span->array->rgba[col][BCOMP]);
1420 col0[3] = CHAN_TO_FLOAT(span->array->rgba[col][ACOMP]);
1421 }
1422 if (inputsRead & (1 << FRAG_ATTRIB_COL1)) {
1423 GLfloat *col1 = machine->Inputs[FRAG_ATTRIB_COL1];
1424 col1[0] = CHAN_TO_FLOAT(span->array->spec[col][RCOMP]);
1425 col1[1] = CHAN_TO_FLOAT(span->array->spec[col][GCOMP]);
1426 col1[2] = CHAN_TO_FLOAT(span->array->spec[col][BCOMP]);
1427 col1[3] = CHAN_TO_FLOAT(span->array->spec[col][ACOMP]);
1428 }
1429 if (inputsRead & (1 << FRAG_ATTRIB_FOGC)) {
1430 GLfloat *fogc = machine->Inputs[FRAG_ATTRIB_FOGC];
1431 ASSERT(span->arrayMask & SPAN_FOG);
1432 fogc[0] = span->array->fog[col];
1433 fogc[1] = 0.0F;
1434 fogc[2] = 0.0F;
1435 fogc[3] = 0.0F;
1436 }
1437 for (u = 0; u < ctx->Const.MaxTextureCoordUnits; u++) {
1438 if (inputsRead & (1 << (FRAG_ATTRIB_TEX0 + u))) {
1439 GLfloat *tex = machine->Inputs[FRAG_ATTRIB_TEX0 + u];
1440 /*ASSERT(ctx->Texture._EnabledCoordUnits & (1 << u));*/
1441 COPY_4V(tex, span->array->texcoords[u][col]);
1442 /*ASSERT(tex[0] != 0 || tex[1] != 0 || tex[2] != 0);*/
1443 }
1444 }
1445
1446 /* init condition codes */
1447 machine->CondCodes[0] = COND_EQ;
1448 machine->CondCodes[1] = COND_EQ;
1449 machine->CondCodes[2] = COND_EQ;
1450 machine->CondCodes[3] = COND_EQ;
1451 }
1452
1453
1454
1455 /**
1456 * Execute the current fragment program, operating on the given span.
1457 */
1458 void
1459 _swrast_exec_fragment_program( GLcontext *ctx, SWspan *span )
1460 {
1461 const struct gl_fragment_program *program = ctx->FragmentProgram._Current;
1462 GLuint i;
1463
1464 ctx->_CurrentProgram = GL_FRAGMENT_PROGRAM_ARB; /* or NV, doesn't matter */
1465
1466 if (program->Base.Parameters) {
1467 _mesa_load_state_parameters(ctx, program->Base.Parameters);
1468 }
1469
1470 for (i = 0; i < span->end; i++) {
1471 if (span->array->mask[i]) {
1472 init_machine(ctx, &ctx->FragmentProgram.Machine,
1473 ctx->FragmentProgram._Current, span, i);
1474
1475 if (!execute_program(ctx, program, ~0,
1476 &ctx->FragmentProgram.Machine, span, i)) {
1477 span->array->mask[i] = GL_FALSE; /* killed fragment */
1478 span->writeAll = GL_FALSE;
1479 }
1480
1481 /* Store output registers */
1482 {
1483 const GLfloat *colOut
1484 = ctx->FragmentProgram.Machine.Outputs[FRAG_RESULT_COLR];
1485 UNCLAMPED_FLOAT_TO_CHAN(span->array->rgba[i][RCOMP], colOut[0]);
1486 UNCLAMPED_FLOAT_TO_CHAN(span->array->rgba[i][GCOMP], colOut[1]);
1487 UNCLAMPED_FLOAT_TO_CHAN(span->array->rgba[i][BCOMP], colOut[2]);
1488 UNCLAMPED_FLOAT_TO_CHAN(span->array->rgba[i][ACOMP], colOut[3]);
1489 }
1490 /* depth value */
1491 if (program->Base.OutputsWritten & (1 << FRAG_RESULT_DEPR)) {
1492 const GLfloat depth
1493 = ctx->FragmentProgram.Machine.Outputs[FRAG_RESULT_DEPR][2];
1494 if (depth <= 0.0)
1495 span->array->z[i] = 0;
1496 else if (depth >= 1.0)
1497 span->array->z[i] = ctx->DrawBuffer->_DepthMax;
1498 else
1499 span->array->z[i] = IROUND(depth * ctx->DrawBuffer->_DepthMaxF);
1500 }
1501 }
1502 }
1503
1504 if (program->Base.OutputsWritten & (1 << FRAG_RESULT_DEPR)) {
1505 span->interpMask &= ~SPAN_Z;
1506 span->arrayMask |= SPAN_Z;
1507 }
1508
1509 ctx->_CurrentProgram = 0;
1510 }
1511