88a6593aff45fd5dd8f5304f3cd8990fc25a991d
[mesa.git] / src / mesa / swrast / s_nvfragprog.c
1 /*
2 * Mesa 3-D graphics library
3 * Version: 6.1
4 *
5 * Copyright (C) 1999-2004 Brian Paul All Rights Reserved.
6 *
7 * Permission is hereby granted, free of charge, to any person obtaining a
8 * copy of this software and associated documentation files (the "Software"),
9 * to deal in the Software without restriction, including without limitation
10 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
11 * and/or sell copies of the Software, and to permit persons to whom the
12 * Software is furnished to do so, subject to the following conditions:
13 *
14 * The above copyright notice and this permission notice shall be included
15 * in all copies or substantial portions of the Software.
16 *
17 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
18 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
19 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
20 * BRIAN PAUL BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN
21 * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
22 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
23 */
24
25 /*
26 * Regarding GL_NV_fragment_program:
27 *
28 * Portions of this software may use or implement intellectual
29 * property owned and licensed by NVIDIA Corporation. NVIDIA disclaims
30 * any and all warranties with respect to such intellectual property,
31 * including any use thereof or modifications thereto.
32 */
33
34 #include "glheader.h"
35 #include "colormac.h"
36 #include "context.h"
37 #include "nvfragprog.h"
38 #include "macros.h"
39 #include "program.h"
40
41 #include "s_nvfragprog.h"
42 #include "s_span.h"
43 #include "s_texture.h"
44
45
46 /* if 1, print some debugging info */
47 #define DEBUG_FRAG 0
48
49 /**
50 * Fetch a texel.
51 */
52 static void
53 fetch_texel( GLcontext *ctx, const GLfloat texcoord[4], GLfloat lambda,
54 GLuint unit, GLfloat color[4] )
55 {
56 GLchan rgba[4];
57 SWcontext *swrast = SWRAST_CONTEXT(ctx);
58
59 /* XXX use a float-valued TextureSample routine here!!! */
60 swrast->TextureSample[unit](ctx, unit, ctx->Texture.Unit[unit]._Current,
61 1, (const GLfloat (*)[4]) texcoord,
62 &lambda, &rgba);
63 color[0] = CHAN_TO_FLOAT(rgba[0]);
64 color[1] = CHAN_TO_FLOAT(rgba[1]);
65 color[2] = CHAN_TO_FLOAT(rgba[2]);
66 color[3] = CHAN_TO_FLOAT(rgba[3]);
67 }
68
69
70 /**
71 * Fetch a texel with the given partial derivatives to compute a level
72 * of detail in the mipmap.
73 */
74 static void
75 fetch_texel_deriv( GLcontext *ctx, const GLfloat texcoord[4],
76 const GLfloat texdx[4], const GLfloat texdy[4],
77 GLuint unit, GLfloat color[4] )
78 {
79 SWcontext *swrast = SWRAST_CONTEXT(ctx);
80 const struct gl_texture_object *texObj = ctx->Texture.Unit[unit]._Current;
81 const struct gl_texture_image *texImg = texObj->Image[0][texObj->BaseLevel];
82 const GLfloat texW = (GLfloat) texImg->WidthScale;
83 const GLfloat texH = (GLfloat) texImg->HeightScale;
84 GLchan rgba[4];
85
86 GLfloat lambda = _swrast_compute_lambda(texdx[0], texdy[0], /* ds/dx, ds/dy */
87 texdx[1], texdy[1], /* dt/dx, dt/dy */
88 texdx[3], texdy[2], /* dq/dx, dq/dy */
89 texW, texH,
90 texcoord[0], texcoord[1], texcoord[3],
91 1.0F / texcoord[3]);
92
93 swrast->TextureSample[unit](ctx, unit, ctx->Texture.Unit[unit]._Current,
94 1, (const GLfloat (*)[4]) texcoord,
95 &lambda, &rgba);
96 color[0] = CHAN_TO_FLOAT(rgba[0]);
97 color[1] = CHAN_TO_FLOAT(rgba[1]);
98 color[2] = CHAN_TO_FLOAT(rgba[2]);
99 color[3] = CHAN_TO_FLOAT(rgba[3]);
100 }
101
102
103 /**
104 * Return a pointer to the 4-element float vector specified by the given
105 * source register.
106 */
107 static INLINE const GLfloat *
108 get_register_pointer( GLcontext *ctx,
109 const struct fp_src_register *source,
110 const struct fp_machine *machine,
111 const struct fragment_program *program )
112 {
113 const GLfloat *src;
114 switch (source->File) {
115 case PROGRAM_TEMPORARY:
116 ASSERT(source->Index < MAX_NV_FRAGMENT_PROGRAM_TEMPS);
117 src = machine->Temporaries[source->Index];
118 break;
119 case PROGRAM_INPUT:
120 ASSERT(source->Index < MAX_NV_FRAGMENT_PROGRAM_INPUTS);
121 src = machine->Inputs[source->Index];
122 break;
123 case PROGRAM_LOCAL_PARAM:
124 ASSERT(source->Index < MAX_PROGRAM_LOCAL_PARAMS);
125 src = program->Base.LocalParams[source->Index];
126 break;
127 case PROGRAM_ENV_PARAM:
128 ASSERT(source->Index < MAX_NV_FRAGMENT_PROGRAM_PARAMS);
129 src = ctx->FragmentProgram.Parameters[source->Index];
130 break;
131
132 case PROGRAM_STATE_VAR:
133 /* Fallthrough */
134
135 case PROGRAM_NAMED_PARAM:
136 ASSERT(source->Index < (GLint) program->Parameters->NumParameters);
137 src = program->Parameters->Parameters[source->Index].Values;
138 break;
139 default:
140 _mesa_problem(ctx, "Invalid input register file in fetch_vector4");
141 src = NULL;
142 }
143 return src;
144 }
145
146
147 /**
148 * Fetch a 4-element float vector from the given source register.
149 * Apply swizzling and negating as needed.
150 */
151 static void
152 fetch_vector4( GLcontext *ctx,
153 const struct fp_src_register *source,
154 const struct fp_machine *machine,
155 const struct fragment_program *program,
156 GLfloat result[4] )
157 {
158 const GLfloat *src = get_register_pointer(ctx, source, machine, program);
159 ASSERT(src);
160
161 result[0] = src[source->Swizzle[0]];
162 result[1] = src[source->Swizzle[1]];
163 result[2] = src[source->Swizzle[2]];
164 result[3] = src[source->Swizzle[3]];
165
166 if (source->NegateBase) {
167 result[0] = -result[0];
168 result[1] = -result[1];
169 result[2] = -result[2];
170 result[3] = -result[3];
171 }
172 if (source->Abs) {
173 result[0] = FABSF(result[0]);
174 result[1] = FABSF(result[1]);
175 result[2] = FABSF(result[2]);
176 result[3] = FABSF(result[3]);
177 }
178 if (source->NegateAbs) {
179 result[0] = -result[0];
180 result[1] = -result[1];
181 result[2] = -result[2];
182 result[3] = -result[3];
183 }
184 }
185
186
187 /**
188 * Fetch the derivative with respect to X for the given register.
189 * \return GL_TRUE if it was easily computed or GL_FALSE if we
190 * need to execute another instance of the program (ugh)!
191 */
192 static GLboolean
193 fetch_vector4_deriv( GLcontext *ctx,
194 const struct fp_src_register *source,
195 const struct sw_span *span,
196 char xOrY, GLint column, GLfloat result[4] )
197 {
198 GLfloat src[4];
199
200 ASSERT(xOrY == 'X' || xOrY == 'Y');
201
202 switch (source->Index) {
203 case FRAG_ATTRIB_WPOS:
204 if (xOrY == 'X') {
205 src[0] = 1.0;
206 src[1] = 0.0;
207 src[2] = span->dzdx / ctx->DepthMaxF;
208 src[3] = span->dwdx;
209 }
210 else {
211 src[0] = 0.0;
212 src[1] = 1.0;
213 src[2] = span->dzdy / ctx->DepthMaxF;
214 src[3] = span->dwdy;
215 }
216 break;
217 case FRAG_ATTRIB_COL0:
218 if (xOrY == 'X') {
219 src[0] = span->drdx * (1.0F / CHAN_MAXF);
220 src[1] = span->dgdx * (1.0F / CHAN_MAXF);
221 src[2] = span->dbdx * (1.0F / CHAN_MAXF);
222 src[3] = span->dadx * (1.0F / CHAN_MAXF);
223 }
224 else {
225 src[0] = span->drdy * (1.0F / CHAN_MAXF);
226 src[1] = span->dgdy * (1.0F / CHAN_MAXF);
227 src[2] = span->dbdy * (1.0F / CHAN_MAXF);
228 src[3] = span->dady * (1.0F / CHAN_MAXF);
229 }
230 break;
231 case FRAG_ATTRIB_COL1:
232 if (xOrY == 'X') {
233 src[0] = span->dsrdx * (1.0F / CHAN_MAXF);
234 src[1] = span->dsgdx * (1.0F / CHAN_MAXF);
235 src[2] = span->dsbdx * (1.0F / CHAN_MAXF);
236 src[3] = 0.0; /* XXX need this */
237 }
238 else {
239 src[0] = span->dsrdy * (1.0F / CHAN_MAXF);
240 src[1] = span->dsgdy * (1.0F / CHAN_MAXF);
241 src[2] = span->dsbdy * (1.0F / CHAN_MAXF);
242 src[3] = 0.0; /* XXX need this */
243 }
244 break;
245 case FRAG_ATTRIB_FOGC:
246 if (xOrY == 'X') {
247 src[0] = span->dfogdx;
248 src[1] = 0.0;
249 src[2] = 0.0;
250 src[3] = 0.0;
251 }
252 else {
253 src[0] = span->dfogdy;
254 src[1] = 0.0;
255 src[2] = 0.0;
256 src[3] = 0.0;
257 }
258 break;
259 case FRAG_ATTRIB_TEX0:
260 case FRAG_ATTRIB_TEX1:
261 case FRAG_ATTRIB_TEX2:
262 case FRAG_ATTRIB_TEX3:
263 case FRAG_ATTRIB_TEX4:
264 case FRAG_ATTRIB_TEX5:
265 case FRAG_ATTRIB_TEX6:
266 case FRAG_ATTRIB_TEX7:
267 if (xOrY == 'X') {
268 const GLuint u = source->Index - FRAG_ATTRIB_TEX0;
269 /* this is a little tricky - I think I've got it right */
270 const GLfloat invQ = 1.0f / (span->tex[u][3]
271 + span->texStepX[u][3] * column);
272 src[0] = span->texStepX[u][0] * invQ;
273 src[1] = span->texStepX[u][1] * invQ;
274 src[2] = span->texStepX[u][2] * invQ;
275 src[3] = span->texStepX[u][3] * invQ;
276 }
277 else {
278 const GLuint u = source->Index - FRAG_ATTRIB_TEX0;
279 /* Tricky, as above, but in Y direction */
280 const GLfloat invQ = 1.0f / (span->tex[u][3] + span->texStepY[u][3]);
281 src[0] = span->texStepY[u][0] * invQ;
282 src[1] = span->texStepY[u][1] * invQ;
283 src[2] = span->texStepY[u][2] * invQ;
284 src[3] = span->texStepY[u][3] * invQ;
285 }
286 break;
287 default:
288 return GL_FALSE;
289 }
290
291 result[0] = src[source->Swizzle[0]];
292 result[1] = src[source->Swizzle[1]];
293 result[2] = src[source->Swizzle[2]];
294 result[3] = src[source->Swizzle[3]];
295
296 if (source->NegateBase) {
297 result[0] = -result[0];
298 result[1] = -result[1];
299 result[2] = -result[2];
300 result[3] = -result[3];
301 }
302 if (source->Abs) {
303 result[0] = FABSF(result[0]);
304 result[1] = FABSF(result[1]);
305 result[2] = FABSF(result[2]);
306 result[3] = FABSF(result[3]);
307 }
308 if (source->NegateAbs) {
309 result[0] = -result[0];
310 result[1] = -result[1];
311 result[2] = -result[2];
312 result[3] = -result[3];
313 }
314 return GL_TRUE;
315 }
316
317
318 /**
319 * As above, but only return result[0] element.
320 */
321 static void
322 fetch_vector1( GLcontext *ctx,
323 const struct fp_src_register *source,
324 const struct fp_machine *machine,
325 const struct fragment_program *program,
326 GLfloat result[4] )
327 {
328 const GLfloat *src = get_register_pointer(ctx, source, machine, program);
329 ASSERT(src);
330
331 result[0] = src[source->Swizzle[0]];
332
333 if (source->NegateBase) {
334 result[0] = -result[0];
335 }
336 if (source->Abs) {
337 result[0] = FABSF(result[0]);
338 }
339 if (source->NegateAbs) {
340 result[0] = -result[0];
341 }
342 }
343
344
345 /*
346 * Test value against zero and return GT, LT, EQ or UN if NaN.
347 */
348 static INLINE GLuint
349 generate_cc( float value )
350 {
351 if (value != value)
352 return COND_UN; /* NaN */
353 if (value > 0.0F)
354 return COND_GT;
355 if (value < 0.0F)
356 return COND_LT;
357 return COND_EQ;
358 }
359
360 /*
361 * Test if the ccMaskRule is satisfied by the given condition code.
362 * Used to mask destination writes according to the current condition codee.
363 */
364 static INLINE GLboolean
365 test_cc(GLuint condCode, GLuint ccMaskRule)
366 {
367 switch (ccMaskRule) {
368 case COND_EQ: return (condCode == COND_EQ);
369 case COND_NE: return (condCode != COND_EQ);
370 case COND_LT: return (condCode == COND_LT);
371 case COND_GE: return (condCode == COND_GT || condCode == COND_EQ);
372 case COND_LE: return (condCode == COND_LT || condCode == COND_EQ);
373 case COND_GT: return (condCode == COND_GT);
374 case COND_TR: return GL_TRUE;
375 case COND_FL: return GL_FALSE;
376 default: return GL_TRUE;
377 }
378 }
379
380
381 /**
382 * Store 4 floats into a register. Observe the instructions saturate and
383 * set-condition-code flags.
384 */
385 static void
386 store_vector4( const struct fp_instruction *inst,
387 struct fp_machine *machine,
388 const GLfloat value[4] )
389 {
390 const struct fp_dst_register *dest = &(inst->DstReg);
391 const GLboolean clamp = inst->Saturate;
392 const GLboolean updateCC = inst->UpdateCondRegister;
393 GLfloat *dstReg;
394 GLfloat dummyReg[4];
395 GLfloat clampedValue[4];
396 const GLboolean *writeMask = dest->WriteMask;
397 GLboolean condWriteMask[4];
398
399 switch (dest->File) {
400 case PROGRAM_OUTPUT:
401 dstReg = machine->Outputs[dest->Index];
402 break;
403 case PROGRAM_TEMPORARY:
404 dstReg = machine->Temporaries[dest->Index];
405 break;
406 case PROGRAM_WRITE_ONLY:
407 dstReg = dummyReg;
408 return;
409 default:
410 _mesa_problem(NULL, "bad register file in store_vector4(fp)");
411 return;
412 }
413
414 #if DEBUG_FRAG
415 if (value[0] > 1.0e10 ||
416 IS_INF_OR_NAN(value[0]) ||
417 IS_INF_OR_NAN(value[1]) ||
418 IS_INF_OR_NAN(value[2]) ||
419 IS_INF_OR_NAN(value[3]) )
420 printf("store %g %g %g %g\n", value[0], value[1], value[2], value[3]);
421 #endif
422
423 if (clamp) {
424 clampedValue[0] = CLAMP(value[0], 0.0F, 1.0F);
425 clampedValue[1] = CLAMP(value[1], 0.0F, 1.0F);
426 clampedValue[2] = CLAMP(value[2], 0.0F, 1.0F);
427 clampedValue[3] = CLAMP(value[3], 0.0F, 1.0F);
428 value = clampedValue;
429 }
430
431 if (dest->CondMask != COND_TR) {
432 condWriteMask[0] = writeMask[0]
433 && test_cc(machine->CondCodes[dest->CondSwizzle[0]], dest->CondMask);
434 condWriteMask[1] = writeMask[1]
435 && test_cc(machine->CondCodes[dest->CondSwizzle[1]], dest->CondMask);
436 condWriteMask[2] = writeMask[2]
437 && test_cc(machine->CondCodes[dest->CondSwizzle[2]], dest->CondMask);
438 condWriteMask[3] = writeMask[3]
439 && test_cc(machine->CondCodes[dest->CondSwizzle[3]], dest->CondMask);
440 writeMask = condWriteMask;
441 }
442
443 if (writeMask[0]) {
444 dstReg[0] = value[0];
445 if (updateCC)
446 machine->CondCodes[0] = generate_cc(value[0]);
447 }
448 if (writeMask[1]) {
449 dstReg[1] = value[1];
450 if (updateCC)
451 machine->CondCodes[1] = generate_cc(value[1]);
452 }
453 if (writeMask[2]) {
454 dstReg[2] = value[2];
455 if (updateCC)
456 machine->CondCodes[2] = generate_cc(value[2]);
457 }
458 if (writeMask[3]) {
459 dstReg[3] = value[3];
460 if (updateCC)
461 machine->CondCodes[3] = generate_cc(value[3]);
462 }
463 }
464
465
466 /**
467 * Initialize a new machine state instance from an existing one, adding
468 * the partial derivatives onto the input registers.
469 * Used to implement DDX and DDY instructions in non-trivial cases.
470 */
471 static void
472 init_machine_deriv( GLcontext *ctx,
473 const struct fp_machine *machine,
474 const struct fragment_program *program,
475 const struct sw_span *span, char xOrY,
476 struct fp_machine *dMachine )
477 {
478 GLuint u;
479
480 ASSERT(xOrY == 'X' || xOrY == 'Y');
481
482 /* copy existing machine */
483 _mesa_memcpy(dMachine, machine, sizeof(struct fp_machine));
484
485 if (program->Base.Target == GL_FRAGMENT_PROGRAM_NV) {
486 /* Clear temporary registers (undefined for ARB_f_p) */
487 _mesa_bzero( (void*) machine->Temporaries,
488 MAX_NV_FRAGMENT_PROGRAM_TEMPS * 4 * sizeof(GLfloat));
489 }
490
491 /* Add derivatives */
492 if (program->InputsRead & (1 << FRAG_ATTRIB_WPOS)) {
493 GLfloat *wpos = (GLfloat*) machine->Inputs[FRAG_ATTRIB_WPOS];
494 if (xOrY == 'X') {
495 wpos[0] += 1.0F;
496 wpos[1] += 0.0F;
497 wpos[2] += span->dzdx;
498 wpos[3] += span->dwdx;
499 }
500 else {
501 wpos[0] += 0.0F;
502 wpos[1] += 1.0F;
503 wpos[2] += span->dzdy;
504 wpos[3] += span->dwdy;
505 }
506 }
507 if (program->InputsRead & (1 << FRAG_ATTRIB_COL0)) {
508 GLfloat *col0 = (GLfloat*) machine->Inputs[FRAG_ATTRIB_COL0];
509 if (xOrY == 'X') {
510 col0[0] += span->drdx * (1.0F / CHAN_MAXF);
511 col0[1] += span->dgdx * (1.0F / CHAN_MAXF);
512 col0[2] += span->dbdx * (1.0F / CHAN_MAXF);
513 col0[3] += span->dadx * (1.0F / CHAN_MAXF);
514 }
515 else {
516 col0[0] += span->drdy * (1.0F / CHAN_MAXF);
517 col0[1] += span->dgdy * (1.0F / CHAN_MAXF);
518 col0[2] += span->dbdy * (1.0F / CHAN_MAXF);
519 col0[3] += span->dady * (1.0F / CHAN_MAXF);
520 }
521 }
522 if (program->InputsRead & (1 << FRAG_ATTRIB_COL1)) {
523 GLfloat *col1 = (GLfloat*) machine->Inputs[FRAG_ATTRIB_COL1];
524 if (xOrY == 'X') {
525 col1[0] += span->dsrdx * (1.0F / CHAN_MAXF);
526 col1[1] += span->dsgdx * (1.0F / CHAN_MAXF);
527 col1[2] += span->dsbdx * (1.0F / CHAN_MAXF);
528 col1[3] += 0.0; /*XXX fix */
529 }
530 else {
531 col1[0] += span->dsrdy * (1.0F / CHAN_MAXF);
532 col1[1] += span->dsgdy * (1.0F / CHAN_MAXF);
533 col1[2] += span->dsbdy * (1.0F / CHAN_MAXF);
534 col1[3] += 0.0; /*XXX fix */
535 }
536 }
537 if (program->InputsRead & (1 << FRAG_ATTRIB_FOGC)) {
538 GLfloat *fogc = (GLfloat*) machine->Inputs[FRAG_ATTRIB_FOGC];
539 if (xOrY == 'X') {
540 fogc[0] += span->dfogdx;
541 }
542 else {
543 fogc[0] += span->dfogdy;
544 }
545 }
546 for (u = 0; u < ctx->Const.MaxTextureCoordUnits; u++) {
547 if (program->InputsRead & (1 << (FRAG_ATTRIB_TEX0 + u))) {
548 GLfloat *tex = (GLfloat*) machine->Inputs[FRAG_ATTRIB_TEX0 + u];
549 /* XXX perspective-correct interpolation */
550 if (xOrY == 'X') {
551 tex[0] += span->texStepX[u][0];
552 tex[1] += span->texStepX[u][1];
553 tex[2] += span->texStepX[u][2];
554 tex[3] += span->texStepX[u][3];
555 }
556 else {
557 tex[0] += span->texStepY[u][0];
558 tex[1] += span->texStepY[u][1];
559 tex[2] += span->texStepY[u][2];
560 tex[3] += span->texStepY[u][3];
561 }
562 }
563 }
564
565 /* init condition codes */
566 dMachine->CondCodes[0] = COND_EQ;
567 dMachine->CondCodes[1] = COND_EQ;
568 dMachine->CondCodes[2] = COND_EQ;
569 dMachine->CondCodes[3] = COND_EQ;
570 }
571
572
573 /**
574 * Execute the given vertex program.
575 * NOTE: we do everything in single-precision floating point; we don't
576 * currently observe the single/half/fixed-precision qualifiers.
577 * \param ctx - rendering context
578 * \param program - the fragment program to execute
579 * \param machine - machine state (register file)
580 * \param maxInst - max number of instructions to execute
581 * \return GL_TRUE if program completed or GL_FALSE if program executed KIL.
582 */
583 static GLboolean
584 execute_program( GLcontext *ctx,
585 const struct fragment_program *program, GLuint maxInst,
586 struct fp_machine *machine, const struct sw_span *span,
587 GLuint column )
588 {
589 GLuint pc;
590
591 #if DEBUG_FRAG
592 printf("execute fragment program --------------------\n");
593 #endif
594
595 for (pc = 0; pc < maxInst; pc++) {
596 const struct fp_instruction *inst = program->Instructions + pc;
597
598 if (ctx->FragmentProgram.CallbackEnabled &&
599 ctx->FragmentProgram.Callback) {
600 ctx->FragmentProgram.CurrentPosition = inst->StringPos;
601 ctx->FragmentProgram.Callback(program->Base.Target,
602 ctx->FragmentProgram.CallbackData);
603 }
604
605 switch (inst->Opcode) {
606 case FP_OPCODE_ABS:
607 {
608 GLfloat a[4], result[4];
609 fetch_vector4( ctx, &inst->SrcReg[0], machine, program, a );
610 result[0] = FABSF(a[0]);
611 result[1] = FABSF(a[1]);
612 result[2] = FABSF(a[2]);
613 result[3] = FABSF(a[3]);
614 store_vector4( inst, machine, result );
615 }
616 break;
617 case FP_OPCODE_ADD:
618 {
619 GLfloat a[4], b[4], result[4];
620 fetch_vector4( ctx, &inst->SrcReg[0], machine, program, a );
621 fetch_vector4( ctx, &inst->SrcReg[1], machine, program, b );
622 result[0] = a[0] + b[0];
623 result[1] = a[1] + b[1];
624 result[2] = a[2] + b[2];
625 result[3] = a[3] + b[3];
626 store_vector4( inst, machine, result );
627 }
628 break;
629 case FP_OPCODE_CMP:
630 {
631 GLfloat a[4], b[4], c[4], result[4];
632 fetch_vector4( ctx, &inst->SrcReg[0], machine, program, a );
633 fetch_vector4( ctx, &inst->SrcReg[1], machine, program, b );
634 fetch_vector4( ctx, &inst->SrcReg[2], machine, program, c );
635 result[0] = a[0] < 0.0F ? b[0] : c[0];
636 result[1] = a[1] < 0.0F ? b[1] : c[1];
637 result[2] = a[2] < 0.0F ? b[2] : c[2];
638 result[3] = a[3] < 0.0F ? b[3] : c[3];
639 store_vector4( inst, machine, result );
640 }
641 break;
642 case FP_OPCODE_COS:
643 {
644 GLfloat a[4], result[4];
645 fetch_vector1( ctx, &inst->SrcReg[0], machine, program, a );
646 result[0] = result[1] = result[2] = result[3] = (GLfloat)_mesa_cos(a[0]);
647 store_vector4( inst, machine, result );
648 }
649 break;
650 case FP_OPCODE_DDX: /* Partial derivative with respect to X */
651 {
652 GLfloat a[4], aNext[4], result[4];
653 struct fp_machine dMachine;
654 if (!fetch_vector4_deriv(ctx, &inst->SrcReg[0], span, 'X',
655 column, result)) {
656 /* This is tricky. Make a copy of the current machine state,
657 * increment the input registers by the dx or dy partial
658 * derivatives, then re-execute the program up to the
659 * preceeding instruction, then fetch the source register.
660 * Finally, find the difference in the register values for
661 * the original and derivative runs.
662 */
663 fetch_vector4( ctx, &inst->SrcReg[0], machine, program, a);
664 init_machine_deriv(ctx, machine, program, span,
665 'X', &dMachine);
666 execute_program(ctx, program, pc, &dMachine, span, column);
667 fetch_vector4( ctx, &inst->SrcReg[0], &dMachine, program, aNext );
668 result[0] = aNext[0] - a[0];
669 result[1] = aNext[1] - a[1];
670 result[2] = aNext[2] - a[2];
671 result[3] = aNext[3] - a[3];
672 }
673 store_vector4( inst, machine, result );
674 }
675 break;
676 case FP_OPCODE_DDY: /* Partial derivative with respect to Y */
677 {
678 GLfloat a[4], aNext[4], result[4];
679 struct fp_machine dMachine;
680 if (!fetch_vector4_deriv(ctx, &inst->SrcReg[0], span, 'Y',
681 column, result)) {
682 init_machine_deriv(ctx, machine, program, span,
683 'Y', &dMachine);
684 fetch_vector4( ctx, &inst->SrcReg[0], machine, program, a);
685 execute_program(ctx, program, pc, &dMachine, span, column);
686 fetch_vector4( ctx, &inst->SrcReg[0], &dMachine, program, aNext );
687 result[0] = aNext[0] - a[0];
688 result[1] = aNext[1] - a[1];
689 result[2] = aNext[2] - a[2];
690 result[3] = aNext[3] - a[3];
691 }
692 store_vector4( inst, machine, result );
693 }
694 break;
695 case FP_OPCODE_DP3:
696 {
697 GLfloat a[4], b[4], result[4];
698 fetch_vector4( ctx, &inst->SrcReg[0], machine, program, a );
699 fetch_vector4( ctx, &inst->SrcReg[1], machine, program, b );
700 result[0] = result[1] = result[2] = result[3] =
701 a[0] * b[0] + a[1] * b[1] + a[2] * b[2];
702 store_vector4( inst, machine, result );
703 #if DEBUG_FRAG
704 printf("DP3 %g = (%g %g %g) . (%g %g %g)\n",
705 result[0], a[0], a[1], a[2], b[0], b[1], b[2]);
706 #endif
707 }
708 break;
709 case FP_OPCODE_DP4:
710 {
711 GLfloat a[4], b[4], result[4];
712 fetch_vector4( ctx, &inst->SrcReg[0], machine, program, a );
713 fetch_vector4( ctx, &inst->SrcReg[1], machine, program, b );
714 result[0] = result[1] = result[2] = result[3] =
715 a[0] * b[0] + a[1] * b[1] + a[2] * b[2] + a[3] * b[3];
716 store_vector4( inst, machine, result );
717 #if DEBUG_FRAG
718 printf("DP4 %g = (%g, %g %g %g) . (%g, %g %g %g)\n",
719 result[0], a[0], a[1], a[2], a[3], b[0], b[1], b[2], b[3]);
720 #endif
721 }
722 break;
723 case FP_OPCODE_DPH:
724 {
725 GLfloat a[4], b[4], result[4];
726 fetch_vector4( ctx, &inst->SrcReg[0], machine, program, a );
727 fetch_vector4( ctx, &inst->SrcReg[1], machine, program, b );
728 result[0] = result[1] = result[2] = result[3] =
729 a[0] * b[0] + a[1] * b[1] + a[2] * b[2] + b[3];
730 store_vector4( inst, machine, result );
731 }
732 break;
733 case FP_OPCODE_DST: /* Distance vector */
734 {
735 GLfloat a[4], b[4], result[4];
736 fetch_vector4( ctx, &inst->SrcReg[0], machine, program, a );
737 fetch_vector4( ctx, &inst->SrcReg[1], machine, program, b );
738 result[0] = 1.0F;
739 result[1] = a[1] * b[1];
740 result[2] = a[2];
741 result[3] = b[3];
742 store_vector4( inst, machine, result );
743 }
744 break;
745 case FP_OPCODE_EX2: /* Exponential base 2 */
746 {
747 GLfloat a[4], result[4];
748 fetch_vector1( ctx, &inst->SrcReg[0], machine, program, a );
749 result[0] = result[1] = result[2] = result[3] =
750 (GLfloat) _mesa_pow(2.0, a[0]);
751 store_vector4( inst, machine, result );
752 }
753 break;
754 case FP_OPCODE_FLR:
755 {
756 GLfloat a[4], result[4];
757 fetch_vector4( ctx, &inst->SrcReg[0], machine, program, a );
758 result[0] = FLOORF(a[0]);
759 result[1] = FLOORF(a[1]);
760 result[2] = FLOORF(a[2]);
761 result[3] = FLOORF(a[3]);
762 store_vector4( inst, machine, result );
763 }
764 break;
765 case FP_OPCODE_FRC:
766 {
767 GLfloat a[4], result[4];
768 fetch_vector4( ctx, &inst->SrcReg[0], machine, program, a );
769 result[0] = a[0] - FLOORF(a[0]);
770 result[1] = a[1] - FLOORF(a[1]);
771 result[2] = a[2] - FLOORF(a[2]);
772 result[3] = a[3] - FLOORF(a[3]);
773 store_vector4( inst, machine, result );
774 }
775 break;
776 case FP_OPCODE_KIL_NV: /* NV_f_p only */
777 {
778 const GLuint *swizzle = inst->DstReg.CondSwizzle;
779 const GLuint condMask = inst->DstReg.CondMask;
780 if (test_cc(machine->CondCodes[swizzle[0]], condMask) ||
781 test_cc(machine->CondCodes[swizzle[1]], condMask) ||
782 test_cc(machine->CondCodes[swizzle[2]], condMask) ||
783 test_cc(machine->CondCodes[swizzle[3]], condMask)) {
784 return GL_FALSE;
785 }
786 }
787 break;
788 case FP_OPCODE_KIL: /* ARB_f_p only */
789 {
790 GLfloat a[4];
791 fetch_vector4( ctx, &inst->SrcReg[0], machine, program, a );
792 if (a[0] < 0.0F || a[1] < 0.0F || a[2] < 0.0F || a[3] < 0.0F) {
793 return GL_FALSE;
794 }
795 }
796 break;
797 case FP_OPCODE_LG2: /* log base 2 */
798 {
799 GLfloat a[4], result[4];
800 fetch_vector1( ctx, &inst->SrcReg[0], machine, program, a );
801 result[0] = result[1] = result[2] = result[3]
802 = LOG2(a[0]);
803 store_vector4( inst, machine, result );
804 }
805 break;
806 case FP_OPCODE_LIT:
807 {
808 GLfloat a[4], result[4];
809 fetch_vector4( ctx, &inst->SrcReg[0], machine, program, a );
810 if (a[0] < 0.0F)
811 a[0] = 0.0F;
812 if (a[1] < 0.0F)
813 a[1] = 0.0F;
814 result[0] = 1.0F;
815 result[1] = a[0];
816 result[2] = (a[0] > 0.0F) ? (GLfloat) exp(a[3] * log(a[1])) : 0.0F;
817 result[3] = 1.0F;
818 store_vector4( inst, machine, result );
819 }
820 break;
821 case FP_OPCODE_LRP:
822 {
823 GLfloat a[4], b[4], c[4], result[4];
824 fetch_vector4( ctx, &inst->SrcReg[0], machine, program, a );
825 fetch_vector4( ctx, &inst->SrcReg[1], machine, program, b );
826 fetch_vector4( ctx, &inst->SrcReg[2], machine, program, c );
827 result[0] = a[0] * b[0] + (1.0F - a[0]) * c[0];
828 result[1] = a[1] * b[1] + (1.0F - a[1]) * c[1];
829 result[2] = a[2] * b[2] + (1.0F - a[2]) * c[2];
830 result[3] = a[3] * b[3] + (1.0F - a[3]) * c[3];
831 store_vector4( inst, machine, result );
832 }
833 break;
834 case FP_OPCODE_MAD:
835 {
836 GLfloat a[4], b[4], c[4], result[4];
837 fetch_vector4( ctx, &inst->SrcReg[0], machine, program, a );
838 fetch_vector4( ctx, &inst->SrcReg[1], machine, program, b );
839 fetch_vector4( ctx, &inst->SrcReg[2], machine, program, c );
840 result[0] = a[0] * b[0] + c[0];
841 result[1] = a[1] * b[1] + c[1];
842 result[2] = a[2] * b[2] + c[2];
843 result[3] = a[3] * b[3] + c[3];
844 store_vector4( inst, machine, result );
845 }
846 break;
847 case FP_OPCODE_MAX:
848 {
849 GLfloat a[4], b[4], result[4];
850 fetch_vector4( ctx, &inst->SrcReg[0], machine, program, a );
851 fetch_vector4( ctx, &inst->SrcReg[1], machine, program, b );
852 result[0] = MAX2(a[0], b[0]);
853 result[1] = MAX2(a[1], b[1]);
854 result[2] = MAX2(a[2], b[2]);
855 result[3] = MAX2(a[3], b[3]);
856 store_vector4( inst, machine, result );
857 #if DEBUG_FRAG
858 printf("MAX (%g %g %g %g) = (%g %g %g %g), (%g %g %g %g)\n",
859 result[0], result[1], result[2], result[3],
860 a[0], a[1], a[2], a[3],
861 b[0], b[1], b[2], b[3]);
862 #endif
863 }
864 break;
865 case FP_OPCODE_MIN:
866 {
867 GLfloat a[4], b[4], result[4];
868 fetch_vector4( ctx, &inst->SrcReg[0], machine, program, a );
869 fetch_vector4( ctx, &inst->SrcReg[1], machine, program, b );
870 result[0] = MIN2(a[0], b[0]);
871 result[1] = MIN2(a[1], b[1]);
872 result[2] = MIN2(a[2], b[2]);
873 result[3] = MIN2(a[3], b[3]);
874 store_vector4( inst, machine, result );
875 }
876 break;
877 case FP_OPCODE_MOV:
878 {
879 GLfloat result[4];
880 fetch_vector4( ctx, &inst->SrcReg[0], machine, program, result );
881 store_vector4( inst, machine, result );
882 #if DEBUG_FRAG
883 printf("MOV (%g %g %g %g)\n",
884 result[0], result[1], result[2], result[3]);
885 #endif
886 }
887 break;
888 case FP_OPCODE_MUL:
889 {
890 GLfloat a[4], b[4], result[4];
891 fetch_vector4( ctx, &inst->SrcReg[0], machine, program, a );
892 fetch_vector4( ctx, &inst->SrcReg[1], machine, program, b );
893 result[0] = a[0] * b[0];
894 result[1] = a[1] * b[1];
895 result[2] = a[2] * b[2];
896 result[3] = a[3] * b[3];
897 store_vector4( inst, machine, result );
898 #if DEBUG_FRAG
899 printf("MUL (%g %g %g %g) = (%g %g %g %g) * (%g %g %g %g)\n",
900 result[0], result[1], result[2], result[3],
901 a[0], a[1], a[2], a[3],
902 b[0], b[1], b[2], b[3]);
903 #endif
904 }
905 break;
906 case FP_OPCODE_PK2H: /* pack two 16-bit floats in one 32-bit float */
907 {
908 GLfloat a[4], result[4];
909 GLhalfNV hx, hy;
910 GLuint *rawResult = (GLuint *) result;
911 GLuint twoHalves;
912 fetch_vector4( ctx, &inst->SrcReg[0], machine, program, a );
913 hx = _mesa_float_to_half(a[0]);
914 hy = _mesa_float_to_half(a[1]);
915 twoHalves = hx | (hy << 16);
916 rawResult[0] = rawResult[1] = rawResult[2] = rawResult[3]
917 = twoHalves;
918 store_vector4( inst, machine, result );
919 }
920 break;
921 case FP_OPCODE_PK2US: /* pack two GLushorts into one 32-bit float */
922 {
923 GLfloat a[4], result[4];
924 GLuint usx, usy, *rawResult = (GLuint *) result;
925 fetch_vector4( ctx, &inst->SrcReg[0], machine, program, a );
926 a[0] = CLAMP(a[0], 0.0F, 1.0F);
927 a[1] = CLAMP(a[1], 0.0F, 1.0F);
928 usx = IROUND(a[0] * 65535.0F);
929 usy = IROUND(a[1] * 65535.0F);
930 rawResult[0] = rawResult[1] = rawResult[2] = rawResult[3]
931 = usx | (usy << 16);
932 store_vector4( inst, machine, result );
933 }
934 break;
935 case FP_OPCODE_PK4B: /* pack four GLbytes into one 32-bit float */
936 {
937 GLfloat a[4], result[4];
938 GLuint ubx, uby, ubz, ubw, *rawResult = (GLuint *) result;
939 fetch_vector4( ctx, &inst->SrcReg[0], machine, program, a );
940 a[0] = CLAMP(a[0], -128.0F / 127.0F, 1.0F);
941 a[1] = CLAMP(a[1], -128.0F / 127.0F, 1.0F);
942 a[2] = CLAMP(a[2], -128.0F / 127.0F, 1.0F);
943 a[3] = CLAMP(a[3], -128.0F / 127.0F, 1.0F);
944 ubx = IROUND(127.0F * a[0] + 128.0F);
945 uby = IROUND(127.0F * a[1] + 128.0F);
946 ubz = IROUND(127.0F * a[2] + 128.0F);
947 ubw = IROUND(127.0F * a[3] + 128.0F);
948 rawResult[0] = rawResult[1] = rawResult[2] = rawResult[3]
949 = ubx | (uby << 8) | (ubz << 16) | (ubw << 24);
950 store_vector4( inst, machine, result );
951 }
952 break;
953 case FP_OPCODE_PK4UB: /* pack four GLubytes into one 32-bit float */
954 {
955 GLfloat a[4], result[4];
956 GLuint ubx, uby, ubz, ubw, *rawResult = (GLuint *) result;
957 fetch_vector4( ctx, &inst->SrcReg[0], machine, program, a );
958 a[0] = CLAMP(a[0], 0.0F, 1.0F);
959 a[1] = CLAMP(a[1], 0.0F, 1.0F);
960 a[2] = CLAMP(a[2], 0.0F, 1.0F);
961 a[3] = CLAMP(a[3], 0.0F, 1.0F);
962 ubx = IROUND(255.0F * a[0]);
963 uby = IROUND(255.0F * a[1]);
964 ubz = IROUND(255.0F * a[2]);
965 ubw = IROUND(255.0F * a[3]);
966 rawResult[0] = rawResult[1] = rawResult[2] = rawResult[3]
967 = ubx | (uby << 8) | (ubz << 16) | (ubw << 24);
968 store_vector4( inst, machine, result );
969 }
970 break;
971 case FP_OPCODE_POW:
972 {
973 GLfloat a[4], b[4], result[4];
974 fetch_vector1( ctx, &inst->SrcReg[0], machine, program, a );
975 fetch_vector1( ctx, &inst->SrcReg[1], machine, program, b );
976 result[0] = result[1] = result[2] = result[3]
977 = (GLfloat)_mesa_pow(a[0], b[0]);
978 store_vector4( inst, machine, result );
979 }
980 break;
981 case FP_OPCODE_RCP:
982 {
983 GLfloat a[4], result[4];
984 fetch_vector1( ctx, &inst->SrcReg[0], machine, program, a );
985 #if DEBUG_FRAG
986 if (a[0] == 0)
987 printf("RCP(0)\n");
988 else if (IS_INF_OR_NAN(a[0]))
989 printf("RCP(inf)\n");
990 #endif
991 result[0] = result[1] = result[2] = result[3]
992 = 1.0F / a[0];
993 store_vector4( inst, machine, result );
994 }
995 break;
996 case FP_OPCODE_RFL:
997 {
998 GLfloat axis[4], dir[4], result[4], tmp[4];
999 fetch_vector4( ctx, &inst->SrcReg[0], machine, program, axis );
1000 fetch_vector4( ctx, &inst->SrcReg[1], machine, program, dir );
1001 tmp[3] = axis[0] * axis[0]
1002 + axis[1] * axis[1]
1003 + axis[2] * axis[2];
1004 tmp[0] = (2.0F * (axis[0] * dir[0] +
1005 axis[1] * dir[1] +
1006 axis[2] * dir[2])) / tmp[3];
1007 result[0] = tmp[0] * axis[0] - dir[0];
1008 result[1] = tmp[0] * axis[1] - dir[1];
1009 result[2] = tmp[0] * axis[2] - dir[2];
1010 /* result[3] is never written! XXX enforce in parser! */
1011 store_vector4( inst, machine, result );
1012 }
1013 break;
1014 case FP_OPCODE_RSQ: /* 1 / sqrt() */
1015 {
1016 GLfloat a[4], result[4];
1017 fetch_vector1( ctx, &inst->SrcReg[0], machine, program, a );
1018 result[0] = result[1] = result[2] = result[3] = INV_SQRTF(a[0]);
1019 store_vector4( inst, machine, result );
1020 #if DEBUG_FRAG
1021 printf("RSQ %g = 1/sqrt(%g)\n", result[0], a[0]);
1022 #endif
1023 }
1024 break;
1025 case FP_OPCODE_SCS: /* sine and cos */
1026 {
1027 GLfloat a[4], result[4];
1028 fetch_vector1( ctx, &inst->SrcReg[0], machine, program, a );
1029 result[0] = (GLfloat)cos(a[0]);
1030 result[1] = (GLfloat)sin(a[0]);
1031 result[2] = 0.0; /* undefined! */
1032 result[3] = 0.0; /* undefined! */
1033 store_vector4( inst, machine, result );
1034 }
1035 break;
1036 case FP_OPCODE_SEQ: /* set on equal */
1037 {
1038 GLfloat a[4], b[4], result[4];
1039 fetch_vector4( ctx, &inst->SrcReg[0], machine, program, a );
1040 fetch_vector4( ctx, &inst->SrcReg[1], machine, program, b );
1041 result[0] = (a[0] == b[0]) ? 1.0F : 0.0F;
1042 result[1] = (a[1] == b[1]) ? 1.0F : 0.0F;
1043 result[2] = (a[2] == b[2]) ? 1.0F : 0.0F;
1044 result[3] = (a[3] == b[3]) ? 1.0F : 0.0F;
1045 store_vector4( inst, machine, result );
1046 }
1047 break;
1048 case FP_OPCODE_SFL: /* set false, operands ignored */
1049 {
1050 static const GLfloat result[4] = { 0.0F, 0.0F, 0.0F, 0.0F };
1051 store_vector4( inst, machine, result );
1052 }
1053 break;
1054 case FP_OPCODE_SGE: /* set on greater or equal */
1055 {
1056 GLfloat a[4], b[4], result[4];
1057 fetch_vector4( ctx, &inst->SrcReg[0], machine, program, a );
1058 fetch_vector4( ctx, &inst->SrcReg[1], machine, program, b );
1059 result[0] = (a[0] >= b[0]) ? 1.0F : 0.0F;
1060 result[1] = (a[1] >= b[1]) ? 1.0F : 0.0F;
1061 result[2] = (a[2] >= b[2]) ? 1.0F : 0.0F;
1062 result[3] = (a[3] >= b[3]) ? 1.0F : 0.0F;
1063 store_vector4( inst, machine, result );
1064 }
1065 break;
1066 case FP_OPCODE_SGT: /* set on greater */
1067 {
1068 GLfloat a[4], b[4], result[4];
1069 fetch_vector4( ctx, &inst->SrcReg[0], machine, program, a );
1070 fetch_vector4( ctx, &inst->SrcReg[1], machine, program, b );
1071 result[0] = (a[0] > b[0]) ? 1.0F : 0.0F;
1072 result[1] = (a[1] > b[1]) ? 1.0F : 0.0F;
1073 result[2] = (a[2] > b[2]) ? 1.0F : 0.0F;
1074 result[3] = (a[3] > b[3]) ? 1.0F : 0.0F;
1075 store_vector4( inst, machine, result );
1076 }
1077 break;
1078 case FP_OPCODE_SIN:
1079 {
1080 GLfloat a[4], result[4];
1081 fetch_vector1( ctx, &inst->SrcReg[0], machine, program, a );
1082 result[0] = result[1] = result[2] =
1083 result[3] = (GLfloat)_mesa_sin(a[0]);
1084 store_vector4( inst, machine, result );
1085 }
1086 break;
1087 case FP_OPCODE_SLE: /* set on less or equal */
1088 {
1089 GLfloat a[4], b[4], result[4];
1090 fetch_vector4( ctx, &inst->SrcReg[0], machine, program, a );
1091 fetch_vector4( ctx, &inst->SrcReg[1], machine, program, b );
1092 result[0] = (a[0] <= b[0]) ? 1.0F : 0.0F;
1093 result[1] = (a[1] <= b[1]) ? 1.0F : 0.0F;
1094 result[2] = (a[2] <= b[2]) ? 1.0F : 0.0F;
1095 result[3] = (a[3] <= b[3]) ? 1.0F : 0.0F;
1096 store_vector4( inst, machine, result );
1097 }
1098 break;
1099 case FP_OPCODE_SLT: /* set on less */
1100 {
1101 GLfloat a[4], b[4], result[4];
1102 fetch_vector4( ctx, &inst->SrcReg[0], machine, program, a );
1103 fetch_vector4( ctx, &inst->SrcReg[1], machine, program, b );
1104 result[0] = (a[0] < b[0]) ? 1.0F : 0.0F;
1105 result[1] = (a[1] < b[1]) ? 1.0F : 0.0F;
1106 result[2] = (a[2] < b[2]) ? 1.0F : 0.0F;
1107 result[3] = (a[3] < b[3]) ? 1.0F : 0.0F;
1108 store_vector4( inst, machine, result );
1109 }
1110 break;
1111 case FP_OPCODE_SNE: /* set on not equal */
1112 {
1113 GLfloat a[4], b[4], result[4];
1114 fetch_vector4( ctx, &inst->SrcReg[0], machine, program, a );
1115 fetch_vector4( ctx, &inst->SrcReg[1], machine, program, b );
1116 result[0] = (a[0] != b[0]) ? 1.0F : 0.0F;
1117 result[1] = (a[1] != b[1]) ? 1.0F : 0.0F;
1118 result[2] = (a[2] != b[2]) ? 1.0F : 0.0F;
1119 result[3] = (a[3] != b[3]) ? 1.0F : 0.0F;
1120 store_vector4( inst, machine, result );
1121 }
1122 break;
1123 case FP_OPCODE_STR: /* set true, operands ignored */
1124 {
1125 static const GLfloat result[4] = { 1.0F, 1.0F, 1.0F, 1.0F };
1126 store_vector4( inst, machine, result );
1127 }
1128 break;
1129 case FP_OPCODE_SUB:
1130 {
1131 GLfloat a[4], b[4], result[4];
1132 fetch_vector4( ctx, &inst->SrcReg[0], machine, program, a );
1133 fetch_vector4( ctx, &inst->SrcReg[1], machine, program, b );
1134 result[0] = a[0] - b[0];
1135 result[1] = a[1] - b[1];
1136 result[2] = a[2] - b[2];
1137 result[3] = a[3] - b[3];
1138 store_vector4( inst, machine, result );
1139 }
1140 break;
1141 case FP_OPCODE_SWZ:
1142 {
1143 const struct fp_src_register *source = &inst->SrcReg[0];
1144 const GLfloat *src = get_register_pointer(ctx, source,
1145 machine, program);
1146 GLfloat result[4];
1147 GLuint i;
1148
1149 /* do extended swizzling here */
1150 for (i = 0; i < 3; i++) {
1151 if (source->Swizzle[i] == SWIZZLE_ZERO)
1152 result[i] = 0.0;
1153 else if (source->Swizzle[i] == SWIZZLE_ONE)
1154 result[i] = -1.0;
1155 else
1156 result[i] = -src[source->Swizzle[i]];
1157 if (source->NegateBase)
1158 result[i] = -result[i];
1159 }
1160 store_vector4( inst, machine, result );
1161 }
1162 break;
1163 case FP_OPCODE_TEX: /* Both ARB and NV frag prog */
1164 /* Texel lookup */
1165 {
1166 GLfloat texcoord[4], color[4];
1167 fetch_vector4( ctx, &inst->SrcReg[0], machine, program, texcoord );
1168 /* Note: we pass 0 for LOD. The ARB extension requires it
1169 * while the NV extension says it's implementation dependant.
1170 */
1171 fetch_texel( ctx, texcoord, 0.0F, inst->TexSrcUnit, color );
1172 store_vector4( inst, machine, color );
1173 }
1174 break;
1175 case FP_OPCODE_TXB: /* GL_ARB_fragment_program only */
1176 /* Texel lookup with LOD bias */
1177 {
1178 GLfloat texcoord[4], color[4], bias, lambda;
1179
1180 fetch_vector4( ctx, &inst->SrcReg[0], machine, program, texcoord );
1181 /* texcoord[3] is the bias to add to lambda */
1182 bias = ctx->Texture.Unit[inst->TexSrcUnit].LodBias
1183 + ctx->Texture.Unit[inst->TexSrcUnit]._Current->LodBias
1184 + texcoord[3];
1185 lambda = span->array->lambda[inst->TexSrcUnit][column] + bias;
1186 fetch_texel( ctx, texcoord, lambda,
1187 inst->TexSrcUnit, color );
1188 store_vector4( inst, machine, color );
1189 }
1190 break;
1191 case FP_OPCODE_TXD: /* GL_NV_fragment_program only */
1192 /* Texture lookup w/ partial derivatives for LOD */
1193 {
1194 GLfloat texcoord[4], dtdx[4], dtdy[4], color[4];
1195 fetch_vector4( ctx, &inst->SrcReg[0], machine, program, texcoord );
1196 fetch_vector4( ctx, &inst->SrcReg[1], machine, program, dtdx );
1197 fetch_vector4( ctx, &inst->SrcReg[2], machine, program, dtdy );
1198 fetch_texel_deriv( ctx, texcoord, dtdx, dtdy, inst->TexSrcUnit,
1199 color );
1200 store_vector4( inst, machine, color );
1201 }
1202 break;
1203 case FP_OPCODE_TXP: /* GL_ARB_fragment_program only */
1204 /* Texture lookup w/ projective divide */
1205 {
1206 GLfloat texcoord[4], color[4];
1207 fetch_vector4( ctx, &inst->SrcReg[0], machine, program, texcoord );
1208 texcoord[0] /= texcoord[3];
1209 texcoord[1] /= texcoord[3];
1210 texcoord[2] /= texcoord[3];
1211 /* Note: LOD=0 */
1212 fetch_texel( ctx, texcoord, 0.0F, inst->TexSrcUnit, color );
1213 store_vector4( inst, machine, color );
1214 }
1215 break;
1216 case FP_OPCODE_TXP_NV: /* GL_NV_fragment_program only */
1217 /* Texture lookup w/ projective divide */
1218 {
1219 GLfloat texcoord[4], color[4];
1220 fetch_vector4( ctx, &inst->SrcReg[0], machine, program, texcoord );
1221 if (inst->TexSrcBit != TEXTURE_CUBE_BIT) {
1222 texcoord[0] /= texcoord[3];
1223 texcoord[1] /= texcoord[3];
1224 texcoord[2] /= texcoord[3];
1225 }
1226 fetch_texel( ctx, texcoord,
1227 span->array->lambda[inst->TexSrcUnit][column],
1228 inst->TexSrcUnit, color );
1229 store_vector4( inst, machine, color );
1230 }
1231 break;
1232 case FP_OPCODE_UP2H: /* unpack two 16-bit floats */
1233 {
1234 GLfloat a[4], result[4];
1235 const GLuint *rawBits = (const GLuint *) a;
1236 GLhalfNV hx, hy;
1237 fetch_vector1( ctx, &inst->SrcReg[0], machine, program, a );
1238 hx = rawBits[0] & 0xffff;
1239 hy = rawBits[0] >> 16;
1240 result[0] = result[2] = _mesa_half_to_float(hx);
1241 result[1] = result[3] = _mesa_half_to_float(hy);
1242 store_vector4( inst, machine, result );
1243 }
1244 break;
1245 case FP_OPCODE_UP2US: /* unpack two GLushorts */
1246 {
1247 GLfloat a[4], result[4];
1248 const GLuint *rawBits = (const GLuint *) a;
1249 GLushort usx, usy;
1250 fetch_vector1( ctx, &inst->SrcReg[0], machine, program, a );
1251 usx = rawBits[0] & 0xffff;
1252 usy = rawBits[0] >> 16;
1253 result[0] = result[2] = usx * (1.0f / 65535.0f);
1254 result[1] = result[3] = usy * (1.0f / 65535.0f);
1255 store_vector4( inst, machine, result );
1256 }
1257 break;
1258 case FP_OPCODE_UP4B: /* unpack four GLbytes */
1259 {
1260 GLfloat a[4], result[4];
1261 const GLuint *rawBits = (const GLuint *) a;
1262 fetch_vector1( ctx, &inst->SrcReg[0], machine, program, a );
1263 result[0] = (((rawBits[0] >> 0) & 0xff) - 128) / 127.0F;
1264 result[1] = (((rawBits[0] >> 8) & 0xff) - 128) / 127.0F;
1265 result[2] = (((rawBits[0] >> 16) & 0xff) - 128) / 127.0F;
1266 result[3] = (((rawBits[0] >> 24) & 0xff) - 128) / 127.0F;
1267 store_vector4( inst, machine, result );
1268 }
1269 break;
1270 case FP_OPCODE_UP4UB: /* unpack four GLubytes */
1271 {
1272 GLfloat a[4], result[4];
1273 const GLuint *rawBits = (const GLuint *) a;
1274 fetch_vector1( ctx, &inst->SrcReg[0], machine, program, a );
1275 result[0] = ((rawBits[0] >> 0) & 0xff) / 255.0F;
1276 result[1] = ((rawBits[0] >> 8) & 0xff) / 255.0F;
1277 result[2] = ((rawBits[0] >> 16) & 0xff) / 255.0F;
1278 result[3] = ((rawBits[0] >> 24) & 0xff) / 255.0F;
1279 store_vector4( inst, machine, result );
1280 }
1281 break;
1282 case FP_OPCODE_XPD: /* cross product */
1283 {
1284 GLfloat a[4], b[4], result[4];
1285 fetch_vector4( ctx, &inst->SrcReg[0], machine, program, a );
1286 fetch_vector4( ctx, &inst->SrcReg[1], machine, program, b );
1287 result[0] = a[1] * b[2] - a[2] * b[1];
1288 result[1] = a[2] * b[0] - a[0] * b[2];
1289 result[2] = a[0] * b[1] - a[1] * b[0];
1290 result[3] = 1.0;
1291 store_vector4( inst, machine, result );
1292 }
1293 break;
1294 case FP_OPCODE_X2D: /* 2-D matrix transform */
1295 {
1296 GLfloat a[4], b[4], c[4], result[4];
1297 fetch_vector4( ctx, &inst->SrcReg[0], machine, program, a );
1298 fetch_vector4( ctx, &inst->SrcReg[1], machine, program, b );
1299 fetch_vector4( ctx, &inst->SrcReg[2], machine, program, c );
1300 result[0] = a[0] + b[0] * c[0] + b[1] * c[1];
1301 result[1] = a[1] + b[0] * c[2] + b[1] * c[3];
1302 result[2] = a[2] + b[0] * c[0] + b[1] * c[1];
1303 result[3] = a[3] + b[0] * c[2] + b[1] * c[3];
1304 store_vector4( inst, machine, result );
1305 }
1306 break;
1307 case FP_OPCODE_END:
1308 return GL_TRUE;
1309 default:
1310 _mesa_problem(ctx, "Bad opcode %d in _mesa_exec_fragment_program",
1311 inst->Opcode);
1312 return GL_TRUE; /* return value doesn't matter */
1313 }
1314 }
1315 return GL_TRUE;
1316 }
1317
1318
1319 static void
1320 init_machine( GLcontext *ctx, struct fp_machine *machine,
1321 const struct fragment_program *program,
1322 const struct sw_span *span, GLuint col )
1323 {
1324 GLuint inputsRead = program->InputsRead;
1325 GLuint u;
1326
1327 if (ctx->FragmentProgram.CallbackEnabled)
1328 inputsRead = ~0;
1329
1330 if (program->Base.Target == GL_FRAGMENT_PROGRAM_NV) {
1331 /* Clear temporary registers (undefined for ARB_f_p) */
1332 _mesa_bzero(machine->Temporaries,
1333 MAX_NV_FRAGMENT_PROGRAM_TEMPS * 4 * sizeof(GLfloat));
1334 }
1335
1336 /* Load input registers */
1337 if (inputsRead & (1 << FRAG_ATTRIB_WPOS)) {
1338 GLfloat *wpos = machine->Inputs[FRAG_ATTRIB_WPOS];
1339 wpos[0] = (GLfloat) span->x + col;
1340 wpos[1] = (GLfloat) span->y;
1341 wpos[2] = (GLfloat) span->array->z[col] / ctx->DepthMaxF;
1342 wpos[3] = span->w + col * span->dwdx;
1343 }
1344 if (inputsRead & (1 << FRAG_ATTRIB_COL0)) {
1345 GLfloat *col0 = machine->Inputs[FRAG_ATTRIB_COL0];
1346 col0[0] = CHAN_TO_FLOAT(span->array->rgba[col][RCOMP]);
1347 col0[1] = CHAN_TO_FLOAT(span->array->rgba[col][GCOMP]);
1348 col0[2] = CHAN_TO_FLOAT(span->array->rgba[col][BCOMP]);
1349 col0[3] = CHAN_TO_FLOAT(span->array->rgba[col][ACOMP]);
1350 }
1351 if (inputsRead & (1 << FRAG_ATTRIB_COL1)) {
1352 GLfloat *col1 = machine->Inputs[FRAG_ATTRIB_COL1];
1353 col1[0] = CHAN_TO_FLOAT(span->array->spec[col][RCOMP]);
1354 col1[1] = CHAN_TO_FLOAT(span->array->spec[col][GCOMP]);
1355 col1[2] = CHAN_TO_FLOAT(span->array->spec[col][BCOMP]);
1356 col1[3] = CHAN_TO_FLOAT(span->array->spec[col][ACOMP]);
1357 }
1358 if (inputsRead & (1 << FRAG_ATTRIB_FOGC)) {
1359 GLfloat *fogc = machine->Inputs[FRAG_ATTRIB_FOGC];
1360 fogc[0] = span->array->fog[col];
1361 fogc[1] = 0.0F;
1362 fogc[2] = 0.0F;
1363 fogc[3] = 0.0F;
1364 }
1365 for (u = 0; u < ctx->Const.MaxTextureCoordUnits; u++) {
1366 if (inputsRead & (1 << (FRAG_ATTRIB_TEX0 + u))) {
1367 GLfloat *tex = machine->Inputs[FRAG_ATTRIB_TEX0 + u];
1368 /*ASSERT(ctx->Texture._EnabledCoordUnits & (1 << u));*/
1369 COPY_4V(tex, span->array->texcoords[u][col]);
1370 /*ASSERT(tex[0] != 0 || tex[1] != 0 || tex[2] != 0);*/
1371 }
1372 }
1373
1374 /* init condition codes */
1375 machine->CondCodes[0] = COND_EQ;
1376 machine->CondCodes[1] = COND_EQ;
1377 machine->CondCodes[2] = COND_EQ;
1378 machine->CondCodes[3] = COND_EQ;
1379 }
1380
1381
1382
1383 /**
1384 * Execute the current fragment program, operating on the given span.
1385 */
1386 void
1387 _swrast_exec_fragment_program( GLcontext *ctx, struct sw_span *span )
1388 {
1389 const struct fragment_program *program = ctx->FragmentProgram.Current;
1390 GLuint i;
1391
1392 ctx->_CurrentProgram = GL_FRAGMENT_PROGRAM_ARB; /* or NV, doesn't matter */
1393
1394 for (i = 0; i < span->end; i++) {
1395 if (span->array->mask[i]) {
1396 init_machine(ctx, &ctx->FragmentProgram.Machine,
1397 ctx->FragmentProgram.Current, span, i);
1398
1399 #ifdef USE_TCC
1400 if (!_swrast_execute_codegen_program(ctx, program, ~0,
1401 &ctx->FragmentProgram.Machine,
1402 span, i)) {
1403 span->array->mask[i] = GL_FALSE; /* killed fragment */
1404 }
1405 #else
1406 if (!execute_program(ctx, program, ~0,
1407 &ctx->FragmentProgram.Machine, span, i)) {
1408 span->array->mask[i] = GL_FALSE; /* killed fragment */
1409 }
1410 #endif
1411
1412 /* Store output registers */
1413 {
1414 const GLfloat *colOut
1415 = ctx->FragmentProgram.Machine.Outputs[FRAG_OUTPUT_COLR];
1416 UNCLAMPED_FLOAT_TO_CHAN(span->array->rgba[i][RCOMP], colOut[0]);
1417 UNCLAMPED_FLOAT_TO_CHAN(span->array->rgba[i][GCOMP], colOut[1]);
1418 UNCLAMPED_FLOAT_TO_CHAN(span->array->rgba[i][BCOMP], colOut[2]);
1419 UNCLAMPED_FLOAT_TO_CHAN(span->array->rgba[i][ACOMP], colOut[3]);
1420 }
1421 /* depth value */
1422 if (program->OutputsWritten & (1 << FRAG_OUTPUT_DEPR))
1423 span->array->z[i] = IROUND(ctx->FragmentProgram.Machine.Outputs[FRAG_OUTPUT_DEPR][0] * ctx->DepthMaxF);
1424 }
1425 }
1426
1427 ctx->_CurrentProgram = 0;
1428 }
1429