Correct test for attenuation.
[mesa.git] / src / mesa / swrast / s_nvfragprog.c
1 /*
2 * Mesa 3-D graphics library
3 * Version: 6.3
4 *
5 * Copyright (C) 1999-2004 Brian Paul All Rights Reserved.
6 *
7 * Permission is hereby granted, free of charge, to any person obtaining a
8 * copy of this software and associated documentation files (the "Software"),
9 * to deal in the Software without restriction, including without limitation
10 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
11 * and/or sell copies of the Software, and to permit persons to whom the
12 * Software is furnished to do so, subject to the following conditions:
13 *
14 * The above copyright notice and this permission notice shall be included
15 * in all copies or substantial portions of the Software.
16 *
17 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
18 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
19 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
20 * BRIAN PAUL BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN
21 * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
22 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
23 */
24
25 /*
26 * Regarding GL_NV_fragment_program:
27 *
28 * Portions of this software may use or implement intellectual
29 * property owned and licensed by NVIDIA Corporation. NVIDIA disclaims
30 * any and all warranties with respect to such intellectual property,
31 * including any use thereof or modifications thereto.
32 */
33
34 #include "glheader.h"
35 #include "colormac.h"
36 #include "context.h"
37 #include "nvfragprog.h"
38 #include "macros.h"
39 #include "program.h"
40
41 #include "s_nvfragprog.h"
42 #include "s_span.h"
43 #include "s_texture.h"
44
45
46 /* if 1, print some debugging info */
47 #define DEBUG_FRAG 0
48
49 /**
50 * Fetch a texel.
51 */
52 static void
53 fetch_texel( GLcontext *ctx, const GLfloat texcoord[4], GLfloat lambda,
54 GLuint unit, GLfloat color[4] )
55 {
56 GLchan rgba[4];
57 SWcontext *swrast = SWRAST_CONTEXT(ctx);
58
59 /* XXX use a float-valued TextureSample routine here!!! */
60 swrast->TextureSample[unit](ctx, unit, ctx->Texture.Unit[unit]._Current,
61 1, (const GLfloat (*)[4]) texcoord,
62 &lambda, &rgba);
63 color[0] = CHAN_TO_FLOAT(rgba[0]);
64 color[1] = CHAN_TO_FLOAT(rgba[1]);
65 color[2] = CHAN_TO_FLOAT(rgba[2]);
66 color[3] = CHAN_TO_FLOAT(rgba[3]);
67 }
68
69
70 /**
71 * Fetch a texel with the given partial derivatives to compute a level
72 * of detail in the mipmap.
73 */
74 static void
75 fetch_texel_deriv( GLcontext *ctx, const GLfloat texcoord[4],
76 const GLfloat texdx[4], const GLfloat texdy[4],
77 GLuint unit, GLfloat color[4] )
78 {
79 SWcontext *swrast = SWRAST_CONTEXT(ctx);
80 const struct gl_texture_object *texObj = ctx->Texture.Unit[unit]._Current;
81 const struct gl_texture_image *texImg = texObj->Image[0][texObj->BaseLevel];
82 const GLfloat texW = (GLfloat) texImg->WidthScale;
83 const GLfloat texH = (GLfloat) texImg->HeightScale;
84 GLchan rgba[4];
85
86 GLfloat lambda = _swrast_compute_lambda(texdx[0], texdy[0], /* ds/dx, ds/dy */
87 texdx[1], texdy[1], /* dt/dx, dt/dy */
88 texdx[3], texdy[2], /* dq/dx, dq/dy */
89 texW, texH,
90 texcoord[0], texcoord[1], texcoord[3],
91 1.0F / texcoord[3]);
92
93 swrast->TextureSample[unit](ctx, unit, ctx->Texture.Unit[unit]._Current,
94 1, (const GLfloat (*)[4]) texcoord,
95 &lambda, &rgba);
96 color[0] = CHAN_TO_FLOAT(rgba[0]);
97 color[1] = CHAN_TO_FLOAT(rgba[1]);
98 color[2] = CHAN_TO_FLOAT(rgba[2]);
99 color[3] = CHAN_TO_FLOAT(rgba[3]);
100 }
101
102
103 /**
104 * Return a pointer to the 4-element float vector specified by the given
105 * source register.
106 */
107 static INLINE const GLfloat *
108 get_register_pointer( GLcontext *ctx,
109 const struct fp_src_register *source,
110 const struct fp_machine *machine,
111 const struct fragment_program *program )
112 {
113 const GLfloat *src;
114 switch (source->File) {
115 case PROGRAM_TEMPORARY:
116 ASSERT(source->Index < MAX_NV_FRAGMENT_PROGRAM_TEMPS);
117 src = machine->Temporaries[source->Index];
118 break;
119 case PROGRAM_INPUT:
120 ASSERT(source->Index < MAX_NV_FRAGMENT_PROGRAM_INPUTS);
121 src = machine->Inputs[source->Index];
122 break;
123 case PROGRAM_OUTPUT:
124 /* This is only for PRINT */
125 ASSERT(source->Index < MAX_NV_FRAGMENT_PROGRAM_OUTPUTS);
126 src = machine->Outputs[source->Index];
127 break;
128 case PROGRAM_LOCAL_PARAM:
129 ASSERT(source->Index < MAX_PROGRAM_LOCAL_PARAMS);
130 src = program->Base.LocalParams[source->Index];
131 break;
132 case PROGRAM_ENV_PARAM:
133 ASSERT(source->Index < MAX_NV_FRAGMENT_PROGRAM_PARAMS);
134 src = ctx->FragmentProgram.Parameters[source->Index];
135 break;
136 case PROGRAM_STATE_VAR:
137 /* Fallthrough */
138 case PROGRAM_NAMED_PARAM:
139 ASSERT(source->Index < (GLint) program->Parameters->NumParameters);
140 src = program->Parameters->ParameterValues[source->Index];
141 break;
142 default:
143 _mesa_problem(ctx, "Invalid input register file %d in fetch_vector4", source->File);
144 src = NULL;
145 }
146 return src;
147 }
148
149
150 /**
151 * Fetch a 4-element float vector from the given source register.
152 * Apply swizzling and negating as needed.
153 */
154 static void
155 fetch_vector4( GLcontext *ctx,
156 const struct fp_src_register *source,
157 const struct fp_machine *machine,
158 const struct fragment_program *program,
159 GLfloat result[4] )
160 {
161 const GLfloat *src = get_register_pointer(ctx, source, machine, program);
162 ASSERT(src);
163
164 result[0] = src[GET_SWZ(source->Swizzle, 0)];
165 result[1] = src[GET_SWZ(source->Swizzle, 1)];
166 result[2] = src[GET_SWZ(source->Swizzle, 2)];
167 result[3] = src[GET_SWZ(source->Swizzle, 3)];
168
169 if (source->NegateBase) {
170 result[0] = -result[0];
171 result[1] = -result[1];
172 result[2] = -result[2];
173 result[3] = -result[3];
174 }
175 if (source->Abs) {
176 result[0] = FABSF(result[0]);
177 result[1] = FABSF(result[1]);
178 result[2] = FABSF(result[2]);
179 result[3] = FABSF(result[3]);
180 }
181 if (source->NegateAbs) {
182 result[0] = -result[0];
183 result[1] = -result[1];
184 result[2] = -result[2];
185 result[3] = -result[3];
186 }
187 }
188
189
190 /**
191 * Fetch the derivative with respect to X for the given register.
192 * \return GL_TRUE if it was easily computed or GL_FALSE if we
193 * need to execute another instance of the program (ugh)!
194 */
195 static GLboolean
196 fetch_vector4_deriv( GLcontext *ctx,
197 const struct fp_src_register *source,
198 const struct sw_span *span,
199 char xOrY, GLint column, GLfloat result[4] )
200 {
201 GLfloat src[4];
202
203 ASSERT(xOrY == 'X' || xOrY == 'Y');
204
205 switch (source->Index) {
206 case FRAG_ATTRIB_WPOS:
207 if (xOrY == 'X') {
208 src[0] = 1.0;
209 src[1] = 0.0;
210 src[2] = span->dzdx / ctx->DrawBuffer->_DepthMaxF;
211 src[3] = span->dwdx;
212 }
213 else {
214 src[0] = 0.0;
215 src[1] = 1.0;
216 src[2] = span->dzdy / ctx->DrawBuffer->_DepthMaxF;
217 src[3] = span->dwdy;
218 }
219 break;
220 case FRAG_ATTRIB_COL0:
221 if (xOrY == 'X') {
222 src[0] = span->drdx * (1.0F / CHAN_MAXF);
223 src[1] = span->dgdx * (1.0F / CHAN_MAXF);
224 src[2] = span->dbdx * (1.0F / CHAN_MAXF);
225 src[3] = span->dadx * (1.0F / CHAN_MAXF);
226 }
227 else {
228 src[0] = span->drdy * (1.0F / CHAN_MAXF);
229 src[1] = span->dgdy * (1.0F / CHAN_MAXF);
230 src[2] = span->dbdy * (1.0F / CHAN_MAXF);
231 src[3] = span->dady * (1.0F / CHAN_MAXF);
232 }
233 break;
234 case FRAG_ATTRIB_COL1:
235 if (xOrY == 'X') {
236 src[0] = span->dsrdx * (1.0F / CHAN_MAXF);
237 src[1] = span->dsgdx * (1.0F / CHAN_MAXF);
238 src[2] = span->dsbdx * (1.0F / CHAN_MAXF);
239 src[3] = 0.0; /* XXX need this */
240 }
241 else {
242 src[0] = span->dsrdy * (1.0F / CHAN_MAXF);
243 src[1] = span->dsgdy * (1.0F / CHAN_MAXF);
244 src[2] = span->dsbdy * (1.0F / CHAN_MAXF);
245 src[3] = 0.0; /* XXX need this */
246 }
247 break;
248 case FRAG_ATTRIB_FOGC:
249 if (xOrY == 'X') {
250 src[0] = span->dfogdx;
251 src[1] = 0.0;
252 src[2] = 0.0;
253 src[3] = 0.0;
254 }
255 else {
256 src[0] = span->dfogdy;
257 src[1] = 0.0;
258 src[2] = 0.0;
259 src[3] = 0.0;
260 }
261 break;
262 case FRAG_ATTRIB_TEX0:
263 case FRAG_ATTRIB_TEX1:
264 case FRAG_ATTRIB_TEX2:
265 case FRAG_ATTRIB_TEX3:
266 case FRAG_ATTRIB_TEX4:
267 case FRAG_ATTRIB_TEX5:
268 case FRAG_ATTRIB_TEX6:
269 case FRAG_ATTRIB_TEX7:
270 if (xOrY == 'X') {
271 const GLuint u = source->Index - FRAG_ATTRIB_TEX0;
272 /* this is a little tricky - I think I've got it right */
273 const GLfloat invQ = 1.0f / (span->tex[u][3]
274 + span->texStepX[u][3] * column);
275 src[0] = span->texStepX[u][0] * invQ;
276 src[1] = span->texStepX[u][1] * invQ;
277 src[2] = span->texStepX[u][2] * invQ;
278 src[3] = span->texStepX[u][3] * invQ;
279 }
280 else {
281 const GLuint u = source->Index - FRAG_ATTRIB_TEX0;
282 /* Tricky, as above, but in Y direction */
283 const GLfloat invQ = 1.0f / (span->tex[u][3] + span->texStepY[u][3]);
284 src[0] = span->texStepY[u][0] * invQ;
285 src[1] = span->texStepY[u][1] * invQ;
286 src[2] = span->texStepY[u][2] * invQ;
287 src[3] = span->texStepY[u][3] * invQ;
288 }
289 break;
290 default:
291 return GL_FALSE;
292 }
293
294 result[0] = src[GET_SWZ(source->Swizzle, 0)];
295 result[1] = src[GET_SWZ(source->Swizzle, 1)];
296 result[2] = src[GET_SWZ(source->Swizzle, 2)];
297 result[3] = src[GET_SWZ(source->Swizzle, 3)];
298
299 if (source->NegateBase) {
300 result[0] = -result[0];
301 result[1] = -result[1];
302 result[2] = -result[2];
303 result[3] = -result[3];
304 }
305 if (source->Abs) {
306 result[0] = FABSF(result[0]);
307 result[1] = FABSF(result[1]);
308 result[2] = FABSF(result[2]);
309 result[3] = FABSF(result[3]);
310 }
311 if (source->NegateAbs) {
312 result[0] = -result[0];
313 result[1] = -result[1];
314 result[2] = -result[2];
315 result[3] = -result[3];
316 }
317 return GL_TRUE;
318 }
319
320
321 /**
322 * As above, but only return result[0] element.
323 */
324 static void
325 fetch_vector1( GLcontext *ctx,
326 const struct fp_src_register *source,
327 const struct fp_machine *machine,
328 const struct fragment_program *program,
329 GLfloat result[4] )
330 {
331 const GLfloat *src = get_register_pointer(ctx, source, machine, program);
332 ASSERT(src);
333
334 result[0] = src[GET_SWZ(source->Swizzle, 0)];
335
336 if (source->NegateBase) {
337 result[0] = -result[0];
338 }
339 if (source->Abs) {
340 result[0] = FABSF(result[0]);
341 }
342 if (source->NegateAbs) {
343 result[0] = -result[0];
344 }
345 }
346
347
348 /**
349 * Test value against zero and return GT, LT, EQ or UN if NaN.
350 */
351 static INLINE GLuint
352 generate_cc( float value )
353 {
354 if (value != value)
355 return COND_UN; /* NaN */
356 if (value > 0.0F)
357 return COND_GT;
358 if (value < 0.0F)
359 return COND_LT;
360 return COND_EQ;
361 }
362
363
364 /**
365 * Test if the ccMaskRule is satisfied by the given condition code.
366 * Used to mask destination writes according to the current condition codee.
367 */
368 static INLINE GLboolean
369 test_cc(GLuint condCode, GLuint ccMaskRule)
370 {
371 switch (ccMaskRule) {
372 case COND_EQ: return (condCode == COND_EQ);
373 case COND_NE: return (condCode != COND_EQ);
374 case COND_LT: return (condCode == COND_LT);
375 case COND_GE: return (condCode == COND_GT || condCode == COND_EQ);
376 case COND_LE: return (condCode == COND_LT || condCode == COND_EQ);
377 case COND_GT: return (condCode == COND_GT);
378 case COND_TR: return GL_TRUE;
379 case COND_FL: return GL_FALSE;
380 default: return GL_TRUE;
381 }
382 }
383
384
385 /**
386 * Store 4 floats into a register. Observe the instructions saturate and
387 * set-condition-code flags.
388 */
389 static void
390 store_vector4( const struct fp_instruction *inst,
391 struct fp_machine *machine,
392 const GLfloat value[4] )
393 {
394 const struct fp_dst_register *dest = &(inst->DstReg);
395 const GLboolean clamp = inst->Saturate;
396 const GLboolean updateCC = inst->UpdateCondRegister;
397 GLfloat *dstReg;
398 GLfloat dummyReg[4];
399 GLfloat clampedValue[4];
400 GLboolean condWriteMask[4];
401 GLuint writeMask = dest->WriteMask;
402
403 switch (dest->File) {
404 case PROGRAM_OUTPUT:
405 dstReg = machine->Outputs[dest->Index];
406 break;
407 case PROGRAM_TEMPORARY:
408 dstReg = machine->Temporaries[dest->Index];
409 break;
410 case PROGRAM_WRITE_ONLY:
411 dstReg = dummyReg;
412 return;
413 default:
414 _mesa_problem(NULL, "bad register file in store_vector4(fp)");
415 return;
416 }
417
418 #if DEBUG_FRAG
419 if (value[0] > 1.0e10 ||
420 IS_INF_OR_NAN(value[0]) ||
421 IS_INF_OR_NAN(value[1]) ||
422 IS_INF_OR_NAN(value[2]) ||
423 IS_INF_OR_NAN(value[3]) )
424 printf("store %g %g %g %g\n", value[0], value[1], value[2], value[3]);
425 #endif
426
427 if (clamp) {
428 clampedValue[0] = CLAMP(value[0], 0.0F, 1.0F);
429 clampedValue[1] = CLAMP(value[1], 0.0F, 1.0F);
430 clampedValue[2] = CLAMP(value[2], 0.0F, 1.0F);
431 clampedValue[3] = CLAMP(value[3], 0.0F, 1.0F);
432 value = clampedValue;
433 }
434
435 if (dest->CondMask != COND_TR) {
436 condWriteMask[0] = GET_BIT(writeMask, 0)
437 && test_cc(machine->CondCodes[GET_SWZ(dest->CondSwizzle, 0)], dest->CondMask);
438 condWriteMask[1] = GET_BIT(writeMask, 1)
439 && test_cc(machine->CondCodes[GET_SWZ(dest->CondSwizzle, 1)], dest->CondMask);
440 condWriteMask[2] = GET_BIT(writeMask, 2)
441 && test_cc(machine->CondCodes[GET_SWZ(dest->CondSwizzle, 2)], dest->CondMask);
442 condWriteMask[3] = GET_BIT(writeMask, 3)
443 && test_cc(machine->CondCodes[GET_SWZ(dest->CondSwizzle, 3)], dest->CondMask);
444
445 writeMask = ((condWriteMask[0] << 0) |
446 (condWriteMask[1] << 1) |
447 (condWriteMask[2] << 2) |
448 (condWriteMask[3] << 3));
449 }
450
451 if (GET_BIT(writeMask, 0)) {
452 dstReg[0] = value[0];
453 if (updateCC)
454 machine->CondCodes[0] = generate_cc(value[0]);
455 }
456 if (GET_BIT(writeMask, 1)) {
457 dstReg[1] = value[1];
458 if (updateCC)
459 machine->CondCodes[1] = generate_cc(value[1]);
460 }
461 if (GET_BIT(writeMask, 2)) {
462 dstReg[2] = value[2];
463 if (updateCC)
464 machine->CondCodes[2] = generate_cc(value[2]);
465 }
466 if (GET_BIT(writeMask, 3)) {
467 dstReg[3] = value[3];
468 if (updateCC)
469 machine->CondCodes[3] = generate_cc(value[3]);
470 }
471 }
472
473
474 /**
475 * Initialize a new machine state instance from an existing one, adding
476 * the partial derivatives onto the input registers.
477 * Used to implement DDX and DDY instructions in non-trivial cases.
478 */
479 static void
480 init_machine_deriv( GLcontext *ctx,
481 const struct fp_machine *machine,
482 const struct fragment_program *program,
483 const struct sw_span *span, char xOrY,
484 struct fp_machine *dMachine )
485 {
486 GLuint u;
487
488 ASSERT(xOrY == 'X' || xOrY == 'Y');
489
490 /* copy existing machine */
491 _mesa_memcpy(dMachine, machine, sizeof(struct fp_machine));
492
493 if (program->Base.Target == GL_FRAGMENT_PROGRAM_NV) {
494 /* Clear temporary registers (undefined for ARB_f_p) */
495 _mesa_bzero( (void*) machine->Temporaries,
496 MAX_NV_FRAGMENT_PROGRAM_TEMPS * 4 * sizeof(GLfloat));
497 }
498
499 /* Add derivatives */
500 if (program->InputsRead & (1 << FRAG_ATTRIB_WPOS)) {
501 GLfloat *wpos = (GLfloat*) machine->Inputs[FRAG_ATTRIB_WPOS];
502 if (xOrY == 'X') {
503 wpos[0] += 1.0F;
504 wpos[1] += 0.0F;
505 wpos[2] += span->dzdx;
506 wpos[3] += span->dwdx;
507 }
508 else {
509 wpos[0] += 0.0F;
510 wpos[1] += 1.0F;
511 wpos[2] += span->dzdy;
512 wpos[3] += span->dwdy;
513 }
514 }
515 if (program->InputsRead & (1 << FRAG_ATTRIB_COL0)) {
516 GLfloat *col0 = (GLfloat*) machine->Inputs[FRAG_ATTRIB_COL0];
517 if (xOrY == 'X') {
518 col0[0] += span->drdx * (1.0F / CHAN_MAXF);
519 col0[1] += span->dgdx * (1.0F / CHAN_MAXF);
520 col0[2] += span->dbdx * (1.0F / CHAN_MAXF);
521 col0[3] += span->dadx * (1.0F / CHAN_MAXF);
522 }
523 else {
524 col0[0] += span->drdy * (1.0F / CHAN_MAXF);
525 col0[1] += span->dgdy * (1.0F / CHAN_MAXF);
526 col0[2] += span->dbdy * (1.0F / CHAN_MAXF);
527 col0[3] += span->dady * (1.0F / CHAN_MAXF);
528 }
529 }
530 if (program->InputsRead & (1 << FRAG_ATTRIB_COL1)) {
531 GLfloat *col1 = (GLfloat*) machine->Inputs[FRAG_ATTRIB_COL1];
532 if (xOrY == 'X') {
533 col1[0] += span->dsrdx * (1.0F / CHAN_MAXF);
534 col1[1] += span->dsgdx * (1.0F / CHAN_MAXF);
535 col1[2] += span->dsbdx * (1.0F / CHAN_MAXF);
536 col1[3] += 0.0; /*XXX fix */
537 }
538 else {
539 col1[0] += span->dsrdy * (1.0F / CHAN_MAXF);
540 col1[1] += span->dsgdy * (1.0F / CHAN_MAXF);
541 col1[2] += span->dsbdy * (1.0F / CHAN_MAXF);
542 col1[3] += 0.0; /*XXX fix */
543 }
544 }
545 if (program->InputsRead & (1 << FRAG_ATTRIB_FOGC)) {
546 GLfloat *fogc = (GLfloat*) machine->Inputs[FRAG_ATTRIB_FOGC];
547 if (xOrY == 'X') {
548 fogc[0] += span->dfogdx;
549 }
550 else {
551 fogc[0] += span->dfogdy;
552 }
553 }
554 for (u = 0; u < ctx->Const.MaxTextureCoordUnits; u++) {
555 if (program->InputsRead & (1 << (FRAG_ATTRIB_TEX0 + u))) {
556 GLfloat *tex = (GLfloat*) machine->Inputs[FRAG_ATTRIB_TEX0 + u];
557 /* XXX perspective-correct interpolation */
558 if (xOrY == 'X') {
559 tex[0] += span->texStepX[u][0];
560 tex[1] += span->texStepX[u][1];
561 tex[2] += span->texStepX[u][2];
562 tex[3] += span->texStepX[u][3];
563 }
564 else {
565 tex[0] += span->texStepY[u][0];
566 tex[1] += span->texStepY[u][1];
567 tex[2] += span->texStepY[u][2];
568 tex[3] += span->texStepY[u][3];
569 }
570 }
571 }
572
573 /* init condition codes */
574 dMachine->CondCodes[0] = COND_EQ;
575 dMachine->CondCodes[1] = COND_EQ;
576 dMachine->CondCodes[2] = COND_EQ;
577 dMachine->CondCodes[3] = COND_EQ;
578 }
579
580
581 /**
582 * Execute the given vertex program.
583 * NOTE: we do everything in single-precision floating point; we don't
584 * currently observe the single/half/fixed-precision qualifiers.
585 * \param ctx - rendering context
586 * \param program - the fragment program to execute
587 * \param machine - machine state (register file)
588 * \param maxInst - max number of instructions to execute
589 * \return GL_TRUE if program completed or GL_FALSE if program executed KIL.
590 */
591 static GLboolean
592 execute_program( GLcontext *ctx,
593 const struct fragment_program *program, GLuint maxInst,
594 struct fp_machine *machine, const struct sw_span *span,
595 GLuint column )
596 {
597 GLuint pc;
598
599 #if DEBUG_FRAG
600 printf("execute fragment program --------------------\n");
601 #endif
602
603 for (pc = 0; pc < maxInst; pc++) {
604 const struct fp_instruction *inst = program->Instructions + pc;
605
606 if (ctx->FragmentProgram.CallbackEnabled &&
607 ctx->FragmentProgram.Callback) {
608 ctx->FragmentProgram.CurrentPosition = inst->StringPos;
609 ctx->FragmentProgram.Callback(program->Base.Target,
610 ctx->FragmentProgram.CallbackData);
611 }
612
613 switch (inst->Opcode) {
614 case FP_OPCODE_ABS:
615 {
616 GLfloat a[4], result[4];
617 fetch_vector4( ctx, &inst->SrcReg[0], machine, program, a );
618 result[0] = FABSF(a[0]);
619 result[1] = FABSF(a[1]);
620 result[2] = FABSF(a[2]);
621 result[3] = FABSF(a[3]);
622 store_vector4( inst, machine, result );
623 }
624 break;
625 case FP_OPCODE_ADD:
626 {
627 GLfloat a[4], b[4], result[4];
628 fetch_vector4( ctx, &inst->SrcReg[0], machine, program, a );
629 fetch_vector4( ctx, &inst->SrcReg[1], machine, program, b );
630 result[0] = a[0] + b[0];
631 result[1] = a[1] + b[1];
632 result[2] = a[2] + b[2];
633 result[3] = a[3] + b[3];
634 store_vector4( inst, machine, result );
635 }
636 break;
637 case FP_OPCODE_CMP:
638 {
639 GLfloat a[4], b[4], c[4], result[4];
640 fetch_vector4( ctx, &inst->SrcReg[0], machine, program, a );
641 fetch_vector4( ctx, &inst->SrcReg[1], machine, program, b );
642 fetch_vector4( ctx, &inst->SrcReg[2], machine, program, c );
643 result[0] = a[0] < 0.0F ? b[0] : c[0];
644 result[1] = a[1] < 0.0F ? b[1] : c[1];
645 result[2] = a[2] < 0.0F ? b[2] : c[2];
646 result[3] = a[3] < 0.0F ? b[3] : c[3];
647 store_vector4( inst, machine, result );
648 }
649 break;
650 case FP_OPCODE_COS:
651 {
652 GLfloat a[4], result[4];
653 fetch_vector1( ctx, &inst->SrcReg[0], machine, program, a );
654 result[0] = result[1] = result[2] = result[3] = (GLfloat)_mesa_cos(a[0]);
655 store_vector4( inst, machine, result );
656 }
657 break;
658 case FP_OPCODE_DDX: /* Partial derivative with respect to X */
659 {
660 GLfloat a[4], aNext[4], result[4];
661 struct fp_machine dMachine;
662 if (!fetch_vector4_deriv(ctx, &inst->SrcReg[0], span, 'X',
663 column, result)) {
664 /* This is tricky. Make a copy of the current machine state,
665 * increment the input registers by the dx or dy partial
666 * derivatives, then re-execute the program up to the
667 * preceeding instruction, then fetch the source register.
668 * Finally, find the difference in the register values for
669 * the original and derivative runs.
670 */
671 fetch_vector4( ctx, &inst->SrcReg[0], machine, program, a);
672 init_machine_deriv(ctx, machine, program, span,
673 'X', &dMachine);
674 execute_program(ctx, program, pc, &dMachine, span, column);
675 fetch_vector4( ctx, &inst->SrcReg[0], &dMachine, program, aNext );
676 result[0] = aNext[0] - a[0];
677 result[1] = aNext[1] - a[1];
678 result[2] = aNext[2] - a[2];
679 result[3] = aNext[3] - a[3];
680 }
681 store_vector4( inst, machine, result );
682 }
683 break;
684 case FP_OPCODE_DDY: /* Partial derivative with respect to Y */
685 {
686 GLfloat a[4], aNext[4], result[4];
687 struct fp_machine dMachine;
688 if (!fetch_vector4_deriv(ctx, &inst->SrcReg[0], span, 'Y',
689 column, result)) {
690 init_machine_deriv(ctx, machine, program, span,
691 'Y', &dMachine);
692 fetch_vector4( ctx, &inst->SrcReg[0], machine, program, a);
693 execute_program(ctx, program, pc, &dMachine, span, column);
694 fetch_vector4( ctx, &inst->SrcReg[0], &dMachine, program, aNext );
695 result[0] = aNext[0] - a[0];
696 result[1] = aNext[1] - a[1];
697 result[2] = aNext[2] - a[2];
698 result[3] = aNext[3] - a[3];
699 }
700 store_vector4( inst, machine, result );
701 }
702 break;
703 case FP_OPCODE_DP3:
704 {
705 GLfloat a[4], b[4], result[4];
706 fetch_vector4( ctx, &inst->SrcReg[0], machine, program, a );
707 fetch_vector4( ctx, &inst->SrcReg[1], machine, program, b );
708 result[0] = result[1] = result[2] = result[3] =
709 a[0] * b[0] + a[1] * b[1] + a[2] * b[2];
710 store_vector4( inst, machine, result );
711 #if DEBUG_FRAG
712 printf("DP3 %g = (%g %g %g) . (%g %g %g)\n",
713 result[0], a[0], a[1], a[2], b[0], b[1], b[2]);
714 #endif
715 }
716 break;
717 case FP_OPCODE_DP4:
718 {
719 GLfloat a[4], b[4], result[4];
720 fetch_vector4( ctx, &inst->SrcReg[0], machine, program, a );
721 fetch_vector4( ctx, &inst->SrcReg[1], machine, program, b );
722 result[0] = result[1] = result[2] = result[3] =
723 a[0] * b[0] + a[1] * b[1] + a[2] * b[2] + a[3] * b[3];
724 store_vector4( inst, machine, result );
725 #if DEBUG_FRAG
726 printf("DP4 %g = (%g, %g %g %g) . (%g, %g %g %g)\n",
727 result[0], a[0], a[1], a[2], a[3], b[0], b[1], b[2], b[3]);
728 #endif
729 }
730 break;
731 case FP_OPCODE_DPH:
732 {
733 GLfloat a[4], b[4], result[4];
734 fetch_vector4( ctx, &inst->SrcReg[0], machine, program, a );
735 fetch_vector4( ctx, &inst->SrcReg[1], machine, program, b );
736 result[0] = result[1] = result[2] = result[3] =
737 a[0] * b[0] + a[1] * b[1] + a[2] * b[2] + b[3];
738 store_vector4( inst, machine, result );
739 }
740 break;
741 case FP_OPCODE_DST: /* Distance vector */
742 {
743 GLfloat a[4], b[4], result[4];
744 fetch_vector4( ctx, &inst->SrcReg[0], machine, program, a );
745 fetch_vector4( ctx, &inst->SrcReg[1], machine, program, b );
746 result[0] = 1.0F;
747 result[1] = a[1] * b[1];
748 result[2] = a[2];
749 result[3] = b[3];
750 store_vector4( inst, machine, result );
751 }
752 break;
753 case FP_OPCODE_EX2: /* Exponential base 2 */
754 {
755 GLfloat a[4], result[4];
756 fetch_vector1( ctx, &inst->SrcReg[0], machine, program, a );
757 result[0] = result[1] = result[2] = result[3] =
758 (GLfloat) _mesa_pow(2.0, a[0]);
759 store_vector4( inst, machine, result );
760 }
761 break;
762 case FP_OPCODE_FLR:
763 {
764 GLfloat a[4], result[4];
765 fetch_vector4( ctx, &inst->SrcReg[0], machine, program, a );
766 result[0] = FLOORF(a[0]);
767 result[1] = FLOORF(a[1]);
768 result[2] = FLOORF(a[2]);
769 result[3] = FLOORF(a[3]);
770 store_vector4( inst, machine, result );
771 }
772 break;
773 case FP_OPCODE_FRC:
774 {
775 GLfloat a[4], result[4];
776 fetch_vector4( ctx, &inst->SrcReg[0], machine, program, a );
777 result[0] = a[0] - FLOORF(a[0]);
778 result[1] = a[1] - FLOORF(a[1]);
779 result[2] = a[2] - FLOORF(a[2]);
780 result[3] = a[3] - FLOORF(a[3]);
781 store_vector4( inst, machine, result );
782 }
783 break;
784 case FP_OPCODE_KIL_NV: /* NV_f_p only */
785 {
786 const GLuint swizzle = inst->DstReg.CondSwizzle;
787 const GLuint condMask = inst->DstReg.CondMask;
788 if (test_cc(machine->CondCodes[GET_SWZ(swizzle, 0)], condMask) ||
789 test_cc(machine->CondCodes[GET_SWZ(swizzle, 1)], condMask) ||
790 test_cc(machine->CondCodes[GET_SWZ(swizzle, 2)], condMask) ||
791 test_cc(machine->CondCodes[GET_SWZ(swizzle, 3)], condMask)) {
792 return GL_FALSE;
793 }
794 }
795 break;
796 case FP_OPCODE_KIL: /* ARB_f_p only */
797 {
798 GLfloat a[4];
799 fetch_vector4( ctx, &inst->SrcReg[0], machine, program, a );
800 if (a[0] < 0.0F || a[1] < 0.0F || a[2] < 0.0F || a[3] < 0.0F) {
801 return GL_FALSE;
802 }
803 }
804 break;
805 case FP_OPCODE_LG2: /* log base 2 */
806 {
807 GLfloat a[4], result[4];
808 fetch_vector1( ctx, &inst->SrcReg[0], machine, program, a );
809 result[0] = result[1] = result[2] = result[3]
810 = LOG2(a[0]);
811 store_vector4( inst, machine, result );
812 }
813 break;
814 case FP_OPCODE_LIT:
815 {
816 const GLfloat epsilon = 1.0F / 256.0F; /* from NV VP spec */
817 GLfloat a[4], result[4];
818 fetch_vector4( ctx, &inst->SrcReg[0], machine, program, a );
819 a[0] = MAX2(a[0], 0.0F);
820 a[1] = MAX2(a[1], 0.0F);
821 /* XXX ARB version clamps a[3], NV version doesn't */
822 a[3] = CLAMP(a[3], -(128.0F - epsilon), (128.0F - epsilon));
823 result[0] = 1.0F;
824 result[1] = a[0];
825 /* XXX we could probably just use pow() here */
826 result[2] = (a[0] > 0.0F) ? (GLfloat) exp(a[3] * log(a[1])) : 0.0F;
827 result[3] = 1.0F;
828 store_vector4( inst, machine, result );
829 }
830 break;
831 case FP_OPCODE_LRP:
832 {
833 GLfloat a[4], b[4], c[4], result[4];
834 fetch_vector4( ctx, &inst->SrcReg[0], machine, program, a );
835 fetch_vector4( ctx, &inst->SrcReg[1], machine, program, b );
836 fetch_vector4( ctx, &inst->SrcReg[2], machine, program, c );
837 result[0] = a[0] * b[0] + (1.0F - a[0]) * c[0];
838 result[1] = a[1] * b[1] + (1.0F - a[1]) * c[1];
839 result[2] = a[2] * b[2] + (1.0F - a[2]) * c[2];
840 result[3] = a[3] * b[3] + (1.0F - a[3]) * c[3];
841 store_vector4( inst, machine, result );
842 }
843 break;
844 case FP_OPCODE_MAD:
845 {
846 GLfloat a[4], b[4], c[4], result[4];
847 fetch_vector4( ctx, &inst->SrcReg[0], machine, program, a );
848 fetch_vector4( ctx, &inst->SrcReg[1], machine, program, b );
849 fetch_vector4( ctx, &inst->SrcReg[2], machine, program, c );
850 result[0] = a[0] * b[0] + c[0];
851 result[1] = a[1] * b[1] + c[1];
852 result[2] = a[2] * b[2] + c[2];
853 result[3] = a[3] * b[3] + c[3];
854 store_vector4( inst, machine, result );
855 }
856 break;
857 case FP_OPCODE_MAX:
858 {
859 GLfloat a[4], b[4], result[4];
860 fetch_vector4( ctx, &inst->SrcReg[0], machine, program, a );
861 fetch_vector4( ctx, &inst->SrcReg[1], machine, program, b );
862 result[0] = MAX2(a[0], b[0]);
863 result[1] = MAX2(a[1], b[1]);
864 result[2] = MAX2(a[2], b[2]);
865 result[3] = MAX2(a[3], b[3]);
866 store_vector4( inst, machine, result );
867 #if DEBUG_FRAG
868 printf("MAX (%g %g %g %g) = (%g %g %g %g), (%g %g %g %g)\n",
869 result[0], result[1], result[2], result[3],
870 a[0], a[1], a[2], a[3],
871 b[0], b[1], b[2], b[3]);
872 #endif
873 }
874 break;
875 case FP_OPCODE_MIN:
876 {
877 GLfloat a[4], b[4], result[4];
878 fetch_vector4( ctx, &inst->SrcReg[0], machine, program, a );
879 fetch_vector4( ctx, &inst->SrcReg[1], machine, program, b );
880 result[0] = MIN2(a[0], b[0]);
881 result[1] = MIN2(a[1], b[1]);
882 result[2] = MIN2(a[2], b[2]);
883 result[3] = MIN2(a[3], b[3]);
884 store_vector4( inst, machine, result );
885 }
886 break;
887 case FP_OPCODE_MOV:
888 {
889 GLfloat result[4];
890 fetch_vector4( ctx, &inst->SrcReg[0], machine, program, result );
891 store_vector4( inst, machine, result );
892 #if DEBUG_FRAG
893 printf("MOV (%g %g %g %g)\n",
894 result[0], result[1], result[2], result[3]);
895 #endif
896 }
897 break;
898 case FP_OPCODE_MUL:
899 {
900 GLfloat a[4], b[4], result[4];
901 fetch_vector4( ctx, &inst->SrcReg[0], machine, program, a );
902 fetch_vector4( ctx, &inst->SrcReg[1], machine, program, b );
903 result[0] = a[0] * b[0];
904 result[1] = a[1] * b[1];
905 result[2] = a[2] * b[2];
906 result[3] = a[3] * b[3];
907 store_vector4( inst, machine, result );
908 #if DEBUG_FRAG
909 printf("MUL (%g %g %g %g) = (%g %g %g %g) * (%g %g %g %g)\n",
910 result[0], result[1], result[2], result[3],
911 a[0], a[1], a[2], a[3],
912 b[0], b[1], b[2], b[3]);
913 #endif
914 }
915 break;
916 case FP_OPCODE_PK2H: /* pack two 16-bit floats in one 32-bit float */
917 {
918 GLfloat a[4], result[4];
919 GLhalfNV hx, hy;
920 GLuint *rawResult = (GLuint *) result;
921 GLuint twoHalves;
922 fetch_vector4( ctx, &inst->SrcReg[0], machine, program, a );
923 hx = _mesa_float_to_half(a[0]);
924 hy = _mesa_float_to_half(a[1]);
925 twoHalves = hx | (hy << 16);
926 rawResult[0] = rawResult[1] = rawResult[2] = rawResult[3]
927 = twoHalves;
928 store_vector4( inst, machine, result );
929 }
930 break;
931 case FP_OPCODE_PK2US: /* pack two GLushorts into one 32-bit float */
932 {
933 GLfloat a[4], result[4];
934 GLuint usx, usy, *rawResult = (GLuint *) result;
935 fetch_vector4( ctx, &inst->SrcReg[0], machine, program, a );
936 a[0] = CLAMP(a[0], 0.0F, 1.0F);
937 a[1] = CLAMP(a[1], 0.0F, 1.0F);
938 usx = IROUND(a[0] * 65535.0F);
939 usy = IROUND(a[1] * 65535.0F);
940 rawResult[0] = rawResult[1] = rawResult[2] = rawResult[3]
941 = usx | (usy << 16);
942 store_vector4( inst, machine, result );
943 }
944 break;
945 case FP_OPCODE_PK4B: /* pack four GLbytes into one 32-bit float */
946 {
947 GLfloat a[4], result[4];
948 GLuint ubx, uby, ubz, ubw, *rawResult = (GLuint *) result;
949 fetch_vector4( ctx, &inst->SrcReg[0], machine, program, a );
950 a[0] = CLAMP(a[0], -128.0F / 127.0F, 1.0F);
951 a[1] = CLAMP(a[1], -128.0F / 127.0F, 1.0F);
952 a[2] = CLAMP(a[2], -128.0F / 127.0F, 1.0F);
953 a[3] = CLAMP(a[3], -128.0F / 127.0F, 1.0F);
954 ubx = IROUND(127.0F * a[0] + 128.0F);
955 uby = IROUND(127.0F * a[1] + 128.0F);
956 ubz = IROUND(127.0F * a[2] + 128.0F);
957 ubw = IROUND(127.0F * a[3] + 128.0F);
958 rawResult[0] = rawResult[1] = rawResult[2] = rawResult[3]
959 = ubx | (uby << 8) | (ubz << 16) | (ubw << 24);
960 store_vector4( inst, machine, result );
961 }
962 break;
963 case FP_OPCODE_PK4UB: /* pack four GLubytes into one 32-bit float */
964 {
965 GLfloat a[4], result[4];
966 GLuint ubx, uby, ubz, ubw, *rawResult = (GLuint *) result;
967 fetch_vector4( ctx, &inst->SrcReg[0], machine, program, a );
968 a[0] = CLAMP(a[0], 0.0F, 1.0F);
969 a[1] = CLAMP(a[1], 0.0F, 1.0F);
970 a[2] = CLAMP(a[2], 0.0F, 1.0F);
971 a[3] = CLAMP(a[3], 0.0F, 1.0F);
972 ubx = IROUND(255.0F * a[0]);
973 uby = IROUND(255.0F * a[1]);
974 ubz = IROUND(255.0F * a[2]);
975 ubw = IROUND(255.0F * a[3]);
976 rawResult[0] = rawResult[1] = rawResult[2] = rawResult[3]
977 = ubx | (uby << 8) | (ubz << 16) | (ubw << 24);
978 store_vector4( inst, machine, result );
979 }
980 break;
981 case FP_OPCODE_POW:
982 {
983 GLfloat a[4], b[4], result[4];
984 fetch_vector1( ctx, &inst->SrcReg[0], machine, program, a );
985 fetch_vector1( ctx, &inst->SrcReg[1], machine, program, b );
986 result[0] = result[1] = result[2] = result[3]
987 = (GLfloat)_mesa_pow(a[0], b[0]);
988 store_vector4( inst, machine, result );
989 }
990 break;
991 case FP_OPCODE_RCP:
992 {
993 GLfloat a[4], result[4];
994 fetch_vector1( ctx, &inst->SrcReg[0], machine, program, a );
995 #if DEBUG_FRAG
996 if (a[0] == 0)
997 printf("RCP(0)\n");
998 else if (IS_INF_OR_NAN(a[0]))
999 printf("RCP(inf)\n");
1000 #endif
1001 result[0] = result[1] = result[2] = result[3]
1002 = 1.0F / a[0];
1003 store_vector4( inst, machine, result );
1004 }
1005 break;
1006 case FP_OPCODE_RFL:
1007 {
1008 GLfloat axis[4], dir[4], result[4], tmp[4];
1009 fetch_vector4( ctx, &inst->SrcReg[0], machine, program, axis );
1010 fetch_vector4( ctx, &inst->SrcReg[1], machine, program, dir );
1011 tmp[3] = axis[0] * axis[0]
1012 + axis[1] * axis[1]
1013 + axis[2] * axis[2];
1014 tmp[0] = (2.0F * (axis[0] * dir[0] +
1015 axis[1] * dir[1] +
1016 axis[2] * dir[2])) / tmp[3];
1017 result[0] = tmp[0] * axis[0] - dir[0];
1018 result[1] = tmp[0] * axis[1] - dir[1];
1019 result[2] = tmp[0] * axis[2] - dir[2];
1020 /* result[3] is never written! XXX enforce in parser! */
1021 store_vector4( inst, machine, result );
1022 }
1023 break;
1024 case FP_OPCODE_RSQ: /* 1 / sqrt() */
1025 {
1026 GLfloat a[4], result[4];
1027 fetch_vector1( ctx, &inst->SrcReg[0], machine, program, a );
1028 result[0] = result[1] = result[2] = result[3] = INV_SQRTF(a[0]);
1029 store_vector4( inst, machine, result );
1030 #if DEBUG_FRAG
1031 printf("RSQ %g = 1/sqrt(%g)\n", result[0], a[0]);
1032 #endif
1033 }
1034 break;
1035 case FP_OPCODE_SCS: /* sine and cos */
1036 {
1037 GLfloat a[4], result[4];
1038 fetch_vector1( ctx, &inst->SrcReg[0], machine, program, a );
1039 result[0] = (GLfloat)cos(a[0]);
1040 result[1] = (GLfloat)sin(a[0]);
1041 result[2] = 0.0; /* undefined! */
1042 result[3] = 0.0; /* undefined! */
1043 store_vector4( inst, machine, result );
1044 }
1045 break;
1046 case FP_OPCODE_SEQ: /* set on equal */
1047 {
1048 GLfloat a[4], b[4], result[4];
1049 fetch_vector4( ctx, &inst->SrcReg[0], machine, program, a );
1050 fetch_vector4( ctx, &inst->SrcReg[1], machine, program, b );
1051 result[0] = (a[0] == b[0]) ? 1.0F : 0.0F;
1052 result[1] = (a[1] == b[1]) ? 1.0F : 0.0F;
1053 result[2] = (a[2] == b[2]) ? 1.0F : 0.0F;
1054 result[3] = (a[3] == b[3]) ? 1.0F : 0.0F;
1055 store_vector4( inst, machine, result );
1056 }
1057 break;
1058 case FP_OPCODE_SFL: /* set false, operands ignored */
1059 {
1060 static const GLfloat result[4] = { 0.0F, 0.0F, 0.0F, 0.0F };
1061 store_vector4( inst, machine, result );
1062 }
1063 break;
1064 case FP_OPCODE_SGE: /* set on greater or equal */
1065 {
1066 GLfloat a[4], b[4], result[4];
1067 fetch_vector4( ctx, &inst->SrcReg[0], machine, program, a );
1068 fetch_vector4( ctx, &inst->SrcReg[1], machine, program, b );
1069 result[0] = (a[0] >= b[0]) ? 1.0F : 0.0F;
1070 result[1] = (a[1] >= b[1]) ? 1.0F : 0.0F;
1071 result[2] = (a[2] >= b[2]) ? 1.0F : 0.0F;
1072 result[3] = (a[3] >= b[3]) ? 1.0F : 0.0F;
1073 store_vector4( inst, machine, result );
1074 }
1075 break;
1076 case FP_OPCODE_SGT: /* set on greater */
1077 {
1078 GLfloat a[4], b[4], result[4];
1079 fetch_vector4( ctx, &inst->SrcReg[0], machine, program, a );
1080 fetch_vector4( ctx, &inst->SrcReg[1], machine, program, b );
1081 result[0] = (a[0] > b[0]) ? 1.0F : 0.0F;
1082 result[1] = (a[1] > b[1]) ? 1.0F : 0.0F;
1083 result[2] = (a[2] > b[2]) ? 1.0F : 0.0F;
1084 result[3] = (a[3] > b[3]) ? 1.0F : 0.0F;
1085 store_vector4( inst, machine, result );
1086 }
1087 break;
1088 case FP_OPCODE_SIN:
1089 {
1090 GLfloat a[4], result[4];
1091 fetch_vector1( ctx, &inst->SrcReg[0], machine, program, a );
1092 result[0] = result[1] = result[2] =
1093 result[3] = (GLfloat)_mesa_sin(a[0]);
1094 store_vector4( inst, machine, result );
1095 }
1096 break;
1097 case FP_OPCODE_SLE: /* set on less or equal */
1098 {
1099 GLfloat a[4], b[4], result[4];
1100 fetch_vector4( ctx, &inst->SrcReg[0], machine, program, a );
1101 fetch_vector4( ctx, &inst->SrcReg[1], machine, program, b );
1102 result[0] = (a[0] <= b[0]) ? 1.0F : 0.0F;
1103 result[1] = (a[1] <= b[1]) ? 1.0F : 0.0F;
1104 result[2] = (a[2] <= b[2]) ? 1.0F : 0.0F;
1105 result[3] = (a[3] <= b[3]) ? 1.0F : 0.0F;
1106 store_vector4( inst, machine, result );
1107 }
1108 break;
1109 case FP_OPCODE_SLT: /* set on less */
1110 {
1111 GLfloat a[4], b[4], result[4];
1112 fetch_vector4( ctx, &inst->SrcReg[0], machine, program, a );
1113 fetch_vector4( ctx, &inst->SrcReg[1], machine, program, b );
1114 result[0] = (a[0] < b[0]) ? 1.0F : 0.0F;
1115 result[1] = (a[1] < b[1]) ? 1.0F : 0.0F;
1116 result[2] = (a[2] < b[2]) ? 1.0F : 0.0F;
1117 result[3] = (a[3] < b[3]) ? 1.0F : 0.0F;
1118 store_vector4( inst, machine, result );
1119 }
1120 break;
1121 case FP_OPCODE_SNE: /* set on not equal */
1122 {
1123 GLfloat a[4], b[4], result[4];
1124 fetch_vector4( ctx, &inst->SrcReg[0], machine, program, a );
1125 fetch_vector4( ctx, &inst->SrcReg[1], machine, program, b );
1126 result[0] = (a[0] != b[0]) ? 1.0F : 0.0F;
1127 result[1] = (a[1] != b[1]) ? 1.0F : 0.0F;
1128 result[2] = (a[2] != b[2]) ? 1.0F : 0.0F;
1129 result[3] = (a[3] != b[3]) ? 1.0F : 0.0F;
1130 store_vector4( inst, machine, result );
1131 }
1132 break;
1133 case FP_OPCODE_STR: /* set true, operands ignored */
1134 {
1135 static const GLfloat result[4] = { 1.0F, 1.0F, 1.0F, 1.0F };
1136 store_vector4( inst, machine, result );
1137 }
1138 break;
1139 case FP_OPCODE_SUB:
1140 {
1141 GLfloat a[4], b[4], result[4];
1142 fetch_vector4( ctx, &inst->SrcReg[0], machine, program, a );
1143 fetch_vector4( ctx, &inst->SrcReg[1], machine, program, b );
1144 result[0] = a[0] - b[0];
1145 result[1] = a[1] - b[1];
1146 result[2] = a[2] - b[2];
1147 result[3] = a[3] - b[3];
1148 store_vector4( inst, machine, result );
1149 }
1150 break;
1151 case FP_OPCODE_SWZ:
1152 {
1153 const struct fp_src_register *source = &inst->SrcReg[0];
1154 const GLfloat *src = get_register_pointer(ctx, source,
1155 machine, program);
1156 GLfloat result[4];
1157 GLuint i;
1158
1159 /* do extended swizzling here */
1160 for (i = 0; i < 3; i++) {
1161 if (GET_SWZ(source->Swizzle, i) == SWIZZLE_ZERO)
1162 result[i] = 0.0;
1163 else if (GET_SWZ(source->Swizzle, i) == SWIZZLE_ONE)
1164 result[i] = -1.0;
1165 else
1166 result[i] = -src[GET_SWZ(source->Swizzle, i)];
1167
1168 if (source->NegateBase)
1169 result[i] = -result[i];
1170 }
1171 store_vector4( inst, machine, result );
1172 }
1173 break;
1174 case FP_OPCODE_TEX: /* Both ARB and NV frag prog */
1175 /* Texel lookup */
1176 {
1177 GLfloat texcoord[4], color[4];
1178 fetch_vector4( ctx, &inst->SrcReg[0], machine, program, texcoord );
1179 /* Note: we pass 0 for LOD. The ARB extension requires it
1180 * while the NV extension says it's implementation dependant.
1181 */
1182 /* KW: Previously lambda was passed as zero, but I
1183 * believe this is incorrect, the spec seems to
1184 * indicate rather that lambda should not be
1185 * changed/biased, unlike TXB where texcoord[3] is
1186 * added to the lambda calculations. The lambda should
1187 * still be calculated normally for TEX & TXP though,
1188 * not set to zero. Otherwise it's very difficult to
1189 * implement normal GL semantics through the fragment
1190 * shader.
1191 */
1192 fetch_texel( ctx, texcoord,
1193 span->array->lambda[inst->TexSrcUnit][column],
1194 inst->TexSrcUnit, color );
1195 #if DEBUG_FRAG
1196 if (color[3])
1197 printf("color[3] = %f\n", color[3]);
1198 #endif
1199 store_vector4( inst, machine, color );
1200 }
1201 break;
1202 case FP_OPCODE_TXB: /* GL_ARB_fragment_program only */
1203 /* Texel lookup with LOD bias */
1204 {
1205 GLfloat texcoord[4], color[4], bias, lambda;
1206
1207 fetch_vector4( ctx, &inst->SrcReg[0], machine, program, texcoord );
1208 /* texcoord[3] is the bias to add to lambda */
1209 bias = ctx->Texture.Unit[inst->TexSrcUnit].LodBias
1210 + ctx->Texture.Unit[inst->TexSrcUnit]._Current->LodBias
1211 + texcoord[3];
1212 lambda = span->array->lambda[inst->TexSrcUnit][column] + bias;
1213 fetch_texel( ctx, texcoord, lambda,
1214 inst->TexSrcUnit, color );
1215 store_vector4( inst, machine, color );
1216 }
1217 break;
1218 case FP_OPCODE_TXD: /* GL_NV_fragment_program only */
1219 /* Texture lookup w/ partial derivatives for LOD */
1220 {
1221 GLfloat texcoord[4], dtdx[4], dtdy[4], color[4];
1222 fetch_vector4( ctx, &inst->SrcReg[0], machine, program, texcoord );
1223 fetch_vector4( ctx, &inst->SrcReg[1], machine, program, dtdx );
1224 fetch_vector4( ctx, &inst->SrcReg[2], machine, program, dtdy );
1225 fetch_texel_deriv( ctx, texcoord, dtdx, dtdy, inst->TexSrcUnit,
1226 color );
1227 store_vector4( inst, machine, color );
1228 }
1229 break;
1230 case FP_OPCODE_TXP: /* GL_ARB_fragment_program only */
1231 /* Texture lookup w/ projective divide */
1232 {
1233 GLfloat texcoord[4], color[4];
1234 fetch_vector4( ctx, &inst->SrcReg[0], machine, program, texcoord );
1235 /* Not so sure about this test - if texcoord[3] is
1236 * zero, we'd probably be fine except for an ASSERT in
1237 * IROUND_POS() which gets triggered by the inf values created.
1238 */
1239 if (texcoord[3] != 0.0) {
1240 texcoord[0] /= texcoord[3];
1241 texcoord[1] /= texcoord[3];
1242 texcoord[2] /= texcoord[3];
1243 }
1244 /* KW: Previously lambda was passed as zero, but I
1245 * believe this is incorrect, the spec seems to
1246 * indicate rather that lambda should not be
1247 * changed/biased, unlike TXB where texcoord[3] is
1248 * added to the lambda calculations. The lambda should
1249 * still be calculated normally for TEX & TXP though,
1250 * not set to zero.
1251 */
1252 fetch_texel( ctx, texcoord,
1253 span->array->lambda[inst->TexSrcUnit][column],
1254 inst->TexSrcUnit, color );
1255 store_vector4( inst, machine, color );
1256 }
1257 break;
1258 case FP_OPCODE_TXP_NV: /* GL_NV_fragment_program only */
1259 /* Texture lookup w/ projective divide */
1260 {
1261 GLfloat texcoord[4], color[4];
1262 fetch_vector4( ctx, &inst->SrcReg[0], machine, program, texcoord );
1263 if (inst->TexSrcIdx != TEXTURE_CUBE_INDEX &&
1264 texcoord[3] != 0.0) {
1265 texcoord[0] /= texcoord[3];
1266 texcoord[1] /= texcoord[3];
1267 texcoord[2] /= texcoord[3];
1268 }
1269 fetch_texel( ctx, texcoord,
1270 span->array->lambda[inst->TexSrcUnit][column],
1271 inst->TexSrcUnit, color );
1272 store_vector4( inst, machine, color );
1273 }
1274 break;
1275 case FP_OPCODE_UP2H: /* unpack two 16-bit floats */
1276 {
1277 GLfloat a[4], result[4];
1278 const GLuint *rawBits = (const GLuint *) a;
1279 GLhalfNV hx, hy;
1280 fetch_vector1( ctx, &inst->SrcReg[0], machine, program, a );
1281 hx = rawBits[0] & 0xffff;
1282 hy = rawBits[0] >> 16;
1283 result[0] = result[2] = _mesa_half_to_float(hx);
1284 result[1] = result[3] = _mesa_half_to_float(hy);
1285 store_vector4( inst, machine, result );
1286 }
1287 break;
1288 case FP_OPCODE_UP2US: /* unpack two GLushorts */
1289 {
1290 GLfloat a[4], result[4];
1291 const GLuint *rawBits = (const GLuint *) a;
1292 GLushort usx, usy;
1293 fetch_vector1( ctx, &inst->SrcReg[0], machine, program, a );
1294 usx = rawBits[0] & 0xffff;
1295 usy = rawBits[0] >> 16;
1296 result[0] = result[2] = usx * (1.0f / 65535.0f);
1297 result[1] = result[3] = usy * (1.0f / 65535.0f);
1298 store_vector4( inst, machine, result );
1299 }
1300 break;
1301 case FP_OPCODE_UP4B: /* unpack four GLbytes */
1302 {
1303 GLfloat a[4], result[4];
1304 const GLuint *rawBits = (const GLuint *) a;
1305 fetch_vector1( ctx, &inst->SrcReg[0], machine, program, a );
1306 result[0] = (((rawBits[0] >> 0) & 0xff) - 128) / 127.0F;
1307 result[1] = (((rawBits[0] >> 8) & 0xff) - 128) / 127.0F;
1308 result[2] = (((rawBits[0] >> 16) & 0xff) - 128) / 127.0F;
1309 result[3] = (((rawBits[0] >> 24) & 0xff) - 128) / 127.0F;
1310 store_vector4( inst, machine, result );
1311 }
1312 break;
1313 case FP_OPCODE_UP4UB: /* unpack four GLubytes */
1314 {
1315 GLfloat a[4], result[4];
1316 const GLuint *rawBits = (const GLuint *) a;
1317 fetch_vector1( ctx, &inst->SrcReg[0], machine, program, a );
1318 result[0] = ((rawBits[0] >> 0) & 0xff) / 255.0F;
1319 result[1] = ((rawBits[0] >> 8) & 0xff) / 255.0F;
1320 result[2] = ((rawBits[0] >> 16) & 0xff) / 255.0F;
1321 result[3] = ((rawBits[0] >> 24) & 0xff) / 255.0F;
1322 store_vector4( inst, machine, result );
1323 }
1324 break;
1325 case FP_OPCODE_XPD: /* cross product */
1326 {
1327 GLfloat a[4], b[4], result[4];
1328 fetch_vector4( ctx, &inst->SrcReg[0], machine, program, a );
1329 fetch_vector4( ctx, &inst->SrcReg[1], machine, program, b );
1330 result[0] = a[1] * b[2] - a[2] * b[1];
1331 result[1] = a[2] * b[0] - a[0] * b[2];
1332 result[2] = a[0] * b[1] - a[1] * b[0];
1333 result[3] = 1.0;
1334 store_vector4( inst, machine, result );
1335 }
1336 break;
1337 case FP_OPCODE_X2D: /* 2-D matrix transform */
1338 {
1339 GLfloat a[4], b[4], c[4], result[4];
1340 fetch_vector4( ctx, &inst->SrcReg[0], machine, program, a );
1341 fetch_vector4( ctx, &inst->SrcReg[1], machine, program, b );
1342 fetch_vector4( ctx, &inst->SrcReg[2], machine, program, c );
1343 result[0] = a[0] + b[0] * c[0] + b[1] * c[1];
1344 result[1] = a[1] + b[0] * c[2] + b[1] * c[3];
1345 result[2] = a[2] + b[0] * c[0] + b[1] * c[1];
1346 result[3] = a[3] + b[0] * c[2] + b[1] * c[3];
1347 store_vector4( inst, machine, result );
1348 }
1349 break;
1350 case FP_OPCODE_PRINT:
1351 {
1352 if (inst->SrcReg[0].File != -1) {
1353 GLfloat a[4];
1354 fetch_vector4( ctx, &inst->SrcReg[0], machine, program, a);
1355 _mesa_printf("%s%g, %g, %g, %g\n", (const char *) inst->Data,
1356 a[0], a[1], a[2], a[3]);
1357 }
1358 else {
1359 _mesa_printf("%s\n", (const char *) inst->Data);
1360 }
1361 }
1362 break;
1363 case FP_OPCODE_END:
1364 return GL_TRUE;
1365 default:
1366 _mesa_problem(ctx, "Bad opcode %d in _mesa_exec_fragment_program",
1367 inst->Opcode);
1368 return GL_TRUE; /* return value doesn't matter */
1369 }
1370 }
1371 return GL_TRUE;
1372 }
1373
1374
1375 static void
1376 init_machine( GLcontext *ctx, struct fp_machine *machine,
1377 const struct fragment_program *program,
1378 const struct sw_span *span, GLuint col )
1379 {
1380 GLuint inputsRead = program->InputsRead;
1381 GLuint u;
1382
1383 if (ctx->FragmentProgram.CallbackEnabled)
1384 inputsRead = ~0;
1385
1386 if (program->Base.Target == GL_FRAGMENT_PROGRAM_NV) {
1387 /* Clear temporary registers (undefined for ARB_f_p) */
1388 _mesa_bzero(machine->Temporaries,
1389 MAX_NV_FRAGMENT_PROGRAM_TEMPS * 4 * sizeof(GLfloat));
1390 }
1391
1392 /* Load input registers */
1393 if (inputsRead & (1 << FRAG_ATTRIB_WPOS)) {
1394 GLfloat *wpos = machine->Inputs[FRAG_ATTRIB_WPOS];
1395 wpos[0] = (GLfloat) span->x + col;
1396 wpos[1] = (GLfloat) span->y;
1397 wpos[2] = (GLfloat) span->array->z[col] / ctx->DrawBuffer->_DepthMaxF;
1398 wpos[3] = span->w + col * span->dwdx;
1399 }
1400 if (inputsRead & (1 << FRAG_ATTRIB_COL0)) {
1401 GLfloat *col0 = machine->Inputs[FRAG_ATTRIB_COL0];
1402 col0[0] = CHAN_TO_FLOAT(span->array->rgba[col][RCOMP]);
1403 col0[1] = CHAN_TO_FLOAT(span->array->rgba[col][GCOMP]);
1404 col0[2] = CHAN_TO_FLOAT(span->array->rgba[col][BCOMP]);
1405 col0[3] = CHAN_TO_FLOAT(span->array->rgba[col][ACOMP]);
1406 }
1407 if (inputsRead & (1 << FRAG_ATTRIB_COL1)) {
1408 GLfloat *col1 = machine->Inputs[FRAG_ATTRIB_COL1];
1409 col1[0] = CHAN_TO_FLOAT(span->array->spec[col][RCOMP]);
1410 col1[1] = CHAN_TO_FLOAT(span->array->spec[col][GCOMP]);
1411 col1[2] = CHAN_TO_FLOAT(span->array->spec[col][BCOMP]);
1412 col1[3] = CHAN_TO_FLOAT(span->array->spec[col][ACOMP]);
1413 }
1414 if (inputsRead & (1 << FRAG_ATTRIB_FOGC)) {
1415 GLfloat *fogc = machine->Inputs[FRAG_ATTRIB_FOGC];
1416 fogc[0] = span->array->fog[col];
1417 fogc[1] = 0.0F;
1418 fogc[2] = 0.0F;
1419 fogc[3] = 0.0F;
1420 }
1421 for (u = 0; u < ctx->Const.MaxTextureCoordUnits; u++) {
1422 if (inputsRead & (1 << (FRAG_ATTRIB_TEX0 + u))) {
1423 GLfloat *tex = machine->Inputs[FRAG_ATTRIB_TEX0 + u];
1424 /*ASSERT(ctx->Texture._EnabledCoordUnits & (1 << u));*/
1425 COPY_4V(tex, span->array->texcoords[u][col]);
1426 /*ASSERT(tex[0] != 0 || tex[1] != 0 || tex[2] != 0);*/
1427 }
1428 }
1429
1430 /* init condition codes */
1431 machine->CondCodes[0] = COND_EQ;
1432 machine->CondCodes[1] = COND_EQ;
1433 machine->CondCodes[2] = COND_EQ;
1434 machine->CondCodes[3] = COND_EQ;
1435 }
1436
1437
1438
1439 /**
1440 * Execute the current fragment program, operating on the given span.
1441 */
1442 void
1443 _swrast_exec_fragment_program( GLcontext *ctx, struct sw_span *span )
1444 {
1445 const struct fragment_program *program = ctx->FragmentProgram._Current;
1446 GLuint i;
1447
1448 ctx->_CurrentProgram = GL_FRAGMENT_PROGRAM_ARB; /* or NV, doesn't matter */
1449
1450 if (program->Parameters) {
1451 _mesa_load_state_parameters(ctx, program->Parameters);
1452 }
1453
1454 for (i = 0; i < span->end; i++) {
1455 if (span->array->mask[i]) {
1456 init_machine(ctx, &ctx->FragmentProgram.Machine,
1457 ctx->FragmentProgram._Current, span, i);
1458
1459 #ifdef USE_TCC
1460 if (!_swrast_execute_codegen_program(ctx, program, ~0,
1461 &ctx->FragmentProgram.Machine,
1462 span, i)) {
1463 span->array->mask[i] = GL_FALSE; /* killed fragment */
1464 }
1465 #else
1466 if (!execute_program(ctx, program, ~0,
1467 &ctx->FragmentProgram.Machine, span, i)) {
1468 span->array->mask[i] = GL_FALSE; /* killed fragment */
1469 }
1470 #endif
1471
1472 /* Store output registers */
1473 {
1474 const GLfloat *colOut
1475 = ctx->FragmentProgram.Machine.Outputs[FRAG_OUTPUT_COLR];
1476 UNCLAMPED_FLOAT_TO_CHAN(span->array->rgba[i][RCOMP], colOut[0]);
1477 UNCLAMPED_FLOAT_TO_CHAN(span->array->rgba[i][GCOMP], colOut[1]);
1478 UNCLAMPED_FLOAT_TO_CHAN(span->array->rgba[i][BCOMP], colOut[2]);
1479 UNCLAMPED_FLOAT_TO_CHAN(span->array->rgba[i][ACOMP], colOut[3]);
1480 }
1481 /* depth value */
1482 if (program->OutputsWritten & (1 << FRAG_OUTPUT_DEPR))
1483 span->array->z[i] = IROUND(ctx->FragmentProgram.Machine.Outputs[FRAG_OUTPUT_DEPR][0] * ctx->DrawBuffer->_DepthMaxF);
1484 }
1485 }
1486
1487 ctx->_CurrentProgram = 0;
1488 }
1489