92fc6ed45281cae7cc08ac9d86ec519459e63b8d
[mesa.git] / src / mesa / swrast / s_nvfragprog.c
1 /*
2 * Mesa 3-D graphics library
3 * Version: 5.1
4 *
5 * Copyright (C) 1999-2003 Brian Paul All Rights Reserved.
6 *
7 * Permission is hereby granted, free of charge, to any person obtaining a
8 * copy of this software and associated documentation files (the "Software"),
9 * to deal in the Software without restriction, including without limitation
10 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
11 * and/or sell copies of the Software, and to permit persons to whom the
12 * Software is furnished to do so, subject to the following conditions:
13 *
14 * The above copyright notice and this permission notice shall be included
15 * in all copies or substantial portions of the Software.
16 *
17 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
18 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
19 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
20 * BRIAN PAUL BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN
21 * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
22 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
23 */
24
25
26 #include "glheader.h"
27 #include "colormac.h"
28 #include "context.h"
29 #include "nvfragprog.h"
30 #include "macros.h"
31 #include "program.h"
32
33 #include "s_nvfragprog.h"
34 #include "s_span.h"
35 #include "s_texture.h"
36
37
38 /* if 1, print some debugging info */
39 #define DEBUG_FRAG 0
40
41
42 /**
43 * Fetch a texel.
44 */
45 static void
46 fetch_texel( GLcontext *ctx, const GLfloat texcoord[4], GLfloat lambda,
47 GLuint unit, GLfloat color[4] )
48 {
49 GLchan rgba[4];
50 SWcontext *swrast = SWRAST_CONTEXT(ctx);
51
52 swrast->TextureSample[unit](ctx, unit, ctx->Texture.Unit[unit]._Current,
53 1, (const GLfloat (*)[4]) texcoord,
54 &lambda, &rgba);
55 color[0] = CHAN_TO_FLOAT(rgba[0]);
56 color[1] = CHAN_TO_FLOAT(rgba[1]);
57 color[2] = CHAN_TO_FLOAT(rgba[2]);
58 color[3] = CHAN_TO_FLOAT(rgba[3]);
59 }
60
61
62 /**
63 * Fetch a texel with the given partial derivatives to compute a level
64 * of detail in the mipmap.
65 */
66 static void
67 fetch_texel_deriv( GLcontext *ctx, const GLfloat texcoord[4],
68 const GLfloat texdx[4], const GLfloat texdy[4],
69 GLuint unit, GLfloat color[4] )
70 {
71 SWcontext *swrast = SWRAST_CONTEXT(ctx);
72 const struct gl_texture_object *texObj = ctx->Texture.Unit[unit]._Current;
73 const struct gl_texture_image *texImg = texObj->Image[texObj->BaseLevel];
74 const GLfloat texW = (GLfloat) texImg->WidthScale;
75 const GLfloat texH = (GLfloat) texImg->HeightScale;
76 GLchan rgba[4];
77
78 GLfloat lambda = _swrast_compute_lambda(texdx[0], texdy[0], /* ds/dx, ds/dy */
79 texdx[1], texdy[1], /* dt/dx, dt/dy */
80 texdx[3], texdy[2], /* dq/dx, dq/dy */
81 texW, texH,
82 texcoord[0], texcoord[1], texcoord[3],
83 1.0F / texcoord[3]);
84
85 swrast->TextureSample[unit](ctx, unit, ctx->Texture.Unit[unit]._Current,
86 1, (const GLfloat (*)[4]) texcoord,
87 &lambda, &rgba);
88 color[0] = CHAN_TO_FLOAT(rgba[0]);
89 color[1] = CHAN_TO_FLOAT(rgba[1]);
90 color[2] = CHAN_TO_FLOAT(rgba[2]);
91 color[3] = CHAN_TO_FLOAT(rgba[3]);
92 }
93
94
95
96 /**
97 * Fetch a 4-element float vector from the given source register.
98 * Apply swizzling and negating as needed.
99 */
100 static void
101 fetch_vector4( GLcontext *ctx,
102 const struct fp_src_register *source,
103 struct fp_machine *machine,
104 const struct fragment_program *program,
105 GLfloat result[4] )
106 {
107 const GLfloat *src;
108
109 switch (source->File) {
110 case PROGRAM_TEMPORARY:
111 ASSERT(source->Index < MAX_NV_FRAGMENT_PROGRAM_TEMPS);
112 src = machine->Temporaries[source->Index];
113 break;
114 case PROGRAM_INPUT:
115 ASSERT(source->Index < MAX_NV_FRAGMENT_PROGRAM_INPUTS);
116 src = machine->Inputs[source->Index];
117 break;
118 case PROGRAM_LOCAL_PARAM:
119 ASSERT(source->Index < MAX_PROGRAM_LOCAL_PARAMS);
120 src = program->Base.LocalParams[source->Index];
121 break;
122 case PROGRAM_ENV_PARAM:
123 ASSERT(source->Index < MAX_NV_FRAGMENT_PROGRAM_PARAMS);
124 src = ctx->FragmentProgram.Parameters[source->Index];
125 break;
126 case PROGRAM_NAMED_PARAM:
127 ASSERT(source->Index < program->Parameters->NumParameters);
128 src = program->Parameters->Parameters[source->Index].Values;
129 break;
130 case PROGRAM_STATE_VAR:
131 abort();
132 default:
133 _mesa_problem(ctx, "Invalid input register file in fetch_vector4");
134 return;
135 }
136
137 result[0] = src[source->Swizzle[0]];
138 result[1] = src[source->Swizzle[1]];
139 result[2] = src[source->Swizzle[2]];
140 result[3] = src[source->Swizzle[3]];
141
142 if (source->NegateBase) {
143 result[0] = -result[0];
144 result[1] = -result[1];
145 result[2] = -result[2];
146 result[3] = -result[3];
147 }
148 if (source->Abs) {
149 result[0] = FABSF(result[0]);
150 result[1] = FABSF(result[1]);
151 result[2] = FABSF(result[2]);
152 result[3] = FABSF(result[3]);
153 }
154 if (source->NegateAbs) {
155 result[0] = -result[0];
156 result[1] = -result[1];
157 result[2] = -result[2];
158 result[3] = -result[3];
159 }
160 }
161
162
163 /**
164 * Fetch the derivative with respect to X for the given register.
165 * \return GL_TRUE if it was easily computed or GL_FALSE if we
166 * need to execute another instance of the program (ugh)!
167 */
168 static GLboolean
169 fetch_vector4_deriv( const struct fp_src_register *source,
170 const struct sw_span *span,
171 char xOrY, GLfloat result[4] )
172 {
173 GLfloat src[4];
174
175 ASSERT(xOrY == 'X' || xOrY == 'Y');
176
177 assert(source->File == PROGRAM_INPUT);
178
179 switch (source->Index) {
180 case FRAG_ATTRIB_WPOS:
181 if (xOrY == 'X') {
182 src[0] = 1.0;
183 src[1] = 0.0;
184 src[2] = span->dzdx;
185 src[3] = span->dwdx;
186 }
187 else {
188 src[0] = 0.0;
189 src[1] = 1.0;
190 src[2] = span->dzdy;
191 src[3] = span->dwdy;
192 }
193 break;
194 case FRAG_ATTRIB_COL0:
195 if (xOrY == 'X') {
196 src[0] = span->drdx * (1.0F / CHAN_MAXF);
197 src[1] = span->dgdx * (1.0F / CHAN_MAXF);
198 src[2] = span->dbdx * (1.0F / CHAN_MAXF);
199 src[3] = span->dadx * (1.0F / CHAN_MAXF);
200 }
201 else {
202 src[0] = span->drdy * (1.0F / CHAN_MAXF);
203 src[1] = span->dgdy * (1.0F / CHAN_MAXF);
204 src[2] = span->dbdy * (1.0F / CHAN_MAXF);
205 src[3] = span->dady * (1.0F / CHAN_MAXF);
206 }
207 break;
208 case FRAG_ATTRIB_COL1:
209 if (xOrY == 'X') {
210 src[0] = span->dsrdx * (1.0F / CHAN_MAXF);
211 src[1] = span->dsgdx * (1.0F / CHAN_MAXF);
212 src[2] = span->dsbdx * (1.0F / CHAN_MAXF);
213 src[3] = 0.0; /* XXX need this */
214 }
215 else {
216 src[0] = span->dsrdy * (1.0F / CHAN_MAXF);
217 src[1] = span->dsgdy * (1.0F / CHAN_MAXF);
218 src[2] = span->dsbdy * (1.0F / CHAN_MAXF);
219 src[3] = 0.0; /* XXX need this */
220 }
221 break;
222 case FRAG_ATTRIB_FOGC:
223 if (xOrY == 'X') {
224 src[0] = span->dfogdx;
225 src[1] = 0.0;
226 src[2] = 0.0;
227 src[3] = 0.0;
228 }
229 else {
230 src[0] = span->dfogdy;
231 src[1] = 0.0;
232 src[2] = 0.0;
233 src[3] = 0.0;
234 }
235 break;
236 case FRAG_ATTRIB_TEX0:
237 case FRAG_ATTRIB_TEX1:
238 case FRAG_ATTRIB_TEX2:
239 case FRAG_ATTRIB_TEX3:
240 case FRAG_ATTRIB_TEX4:
241 case FRAG_ATTRIB_TEX5:
242 case FRAG_ATTRIB_TEX6:
243 case FRAG_ATTRIB_TEX7:
244 if (xOrY == 'X') {
245 const GLuint u = source->Index - FRAG_ATTRIB_TEX0;
246 src[0] = span->texStepX[u][0] * (1.0F / CHAN_MAXF);
247 src[1] = span->texStepX[u][1] * (1.0F / CHAN_MAXF);
248 src[2] = span->texStepX[u][2] * (1.0F / CHAN_MAXF);
249 src[3] = span->texStepX[u][3] * (1.0F / CHAN_MAXF);
250 }
251 else {
252 const GLuint u = source->Index - FRAG_ATTRIB_TEX0;
253 src[0] = span->texStepY[u][0] * (1.0F / CHAN_MAXF);
254 src[1] = span->texStepY[u][1] * (1.0F / CHAN_MAXF);
255 src[2] = span->texStepY[u][2] * (1.0F / CHAN_MAXF);
256 src[3] = span->texStepY[u][3] * (1.0F / CHAN_MAXF);
257 }
258 break;
259 default:
260 return GL_FALSE;
261 }
262
263 result[0] = src[source->Swizzle[0]];
264 result[1] = src[source->Swizzle[1]];
265 result[2] = src[source->Swizzle[2]];
266 result[3] = src[source->Swizzle[3]];
267
268 if (source->NegateBase) {
269 result[0] = -result[0];
270 result[1] = -result[1];
271 result[2] = -result[2];
272 result[3] = -result[3];
273 }
274 if (source->Abs) {
275 result[0] = FABSF(result[0]);
276 result[1] = FABSF(result[1]);
277 result[2] = FABSF(result[2]);
278 result[3] = FABSF(result[3]);
279 }
280 if (source->NegateAbs) {
281 result[0] = -result[0];
282 result[1] = -result[1];
283 result[2] = -result[2];
284 result[3] = -result[3];
285 }
286 return GL_TRUE;
287 }
288
289
290 /**
291 * As above, but only return result[0] element.
292 */
293 static void
294 fetch_vector1( GLcontext *ctx,
295 const struct fp_src_register *source,
296 const struct fp_machine *machine,
297 const struct fragment_program *program,
298 GLfloat result[4] )
299 {
300 const GLfloat *src;
301
302 switch (source->File) {
303 case PROGRAM_TEMPORARY:
304 ASSERT(source->Index < MAX_NV_FRAGMENT_PROGRAM_TEMPS);
305 src = machine->Temporaries[source->Index];
306 break;
307 case PROGRAM_INPUT:
308 ASSERT(source->Index < MAX_NV_FRAGMENT_PROGRAM_INPUTS);
309 src = machine->Inputs[source->Index];
310 break;
311 case PROGRAM_LOCAL_PARAM:
312 ASSERT(source->Index < MAX_PROGRAM_LOCAL_PARAMS);
313 src = program->Base.LocalParams[source->Index];
314 break;
315 case PROGRAM_ENV_PARAM:
316 ASSERT(source->Index < MAX_NV_FRAGMENT_PROGRAM_PARAMS);
317 src = ctx->FragmentProgram.Parameters[source->Index];
318 break;
319 case PROGRAM_NAMED_PARAM:
320 ASSERT(source->Index < program->Parameters->NumParameters);
321 src = program->Parameters->Parameters[source->Index].Values;
322 break;
323 case PROGRAM_STATE_VAR:
324 abort();
325 default:
326 _mesa_problem(ctx, "Invalid input register file in fetch_vector1");
327 return;
328 }
329
330 result[0] = src[source->Swizzle[0]];
331
332 if (source->NegateBase) {
333 result[0] = -result[0];
334 }
335 if (source->Abs) {
336 result[0] = FABSF(result[0]);
337 }
338 if (source->NegateAbs) {
339 result[0] = -result[0];
340 }
341 }
342
343
344 /*
345 * Test value against zero and return GT, LT, EQ or UN if NaN.
346 */
347 static INLINE GLuint
348 generate_cc( float value )
349 {
350 if (value != value)
351 return COND_UN; /* NaN */
352 if (value > 0.0F)
353 return COND_GT;
354 if (value < 0.0F)
355 return COND_LT;
356 return COND_EQ;
357 }
358
359 /*
360 * Test if the ccMaskRule is satisfied by the given condition code.
361 * Used to mask destination writes according to the current condition codee.
362 */
363 static INLINE GLboolean
364 test_cc(GLuint condCode, GLuint ccMaskRule)
365 {
366 switch (ccMaskRule) {
367 case COND_EQ: return (condCode == COND_EQ);
368 case COND_NE: return (condCode != COND_EQ);
369 case COND_LT: return (condCode == COND_LT);
370 case COND_GE: return (condCode == COND_GT || condCode == COND_EQ);
371 case COND_LE: return (condCode == COND_LT || condCode == COND_EQ);
372 case COND_GT: return (condCode == COND_GT);
373 case COND_TR: return GL_TRUE;
374 case COND_FL: return GL_FALSE;
375 default: return GL_TRUE;
376 }
377 }
378
379
380 /**
381 * Store 4 floats into a register. Observe the instructions saturate and
382 * set-condition-code flags.
383 */
384 static void
385 store_vector4( const struct fp_instruction *inst,
386 struct fp_machine *machine,
387 const GLfloat value[4] )
388 {
389 const struct fp_dst_register *dest = &(inst->DstReg);
390 const GLboolean clamp = inst->Saturate;
391 const GLboolean updateCC = inst->UpdateCondRegister;
392 GLfloat *dstReg;
393 GLfloat clampedValue[4];
394 const GLboolean *writeMask = dest->WriteMask;
395 GLboolean condWriteMask[4];
396
397 switch (dest->File) {
398 case PROGRAM_OUTPUT:
399 dstReg = machine->Outputs[dest->Index];
400 break;
401 case PROGRAM_TEMPORARY:
402 dstReg = machine->Temporaries[dest->Index];
403 break;
404 default:
405 _mesa_problem(NULL, "bad register file in store_vector4(fp)");
406 return;
407 }
408
409 #if DEBUG_FRAG
410 if (value[0] > 1.0e10 ||
411 IS_INF_OR_NAN(value[0]) ||
412 IS_INF_OR_NAN(value[1]) ||
413 IS_INF_OR_NAN(value[2]) ||
414 IS_INF_OR_NAN(value[3]) )
415 printf("store %g %g %g %g\n", value[0], value[1], value[2], value[3]);
416 #endif
417
418 if (clamp) {
419 clampedValue[0] = CLAMP(value[0], 0.0F, 1.0F);
420 clampedValue[1] = CLAMP(value[1], 0.0F, 1.0F);
421 clampedValue[2] = CLAMP(value[2], 0.0F, 1.0F);
422 clampedValue[3] = CLAMP(value[3], 0.0F, 1.0F);
423 value = clampedValue;
424 }
425
426 if (dest->CondMask != COND_TR) {
427 condWriteMask[0] = writeMask[0]
428 && test_cc(machine->CondCodes[dest->CondSwizzle[0]], dest->CondMask);
429 condWriteMask[1] = writeMask[1]
430 && test_cc(machine->CondCodes[dest->CondSwizzle[1]], dest->CondMask);
431 condWriteMask[2] = writeMask[2]
432 && test_cc(machine->CondCodes[dest->CondSwizzle[2]], dest->CondMask);
433 condWriteMask[3] = writeMask[3]
434 && test_cc(machine->CondCodes[dest->CondSwizzle[3]], dest->CondMask);
435 writeMask = condWriteMask;
436 }
437
438 if (writeMask[0]) {
439 dstReg[0] = value[0];
440 if (updateCC)
441 machine->CondCodes[0] = generate_cc(value[0]);
442 }
443 if (writeMask[1]) {
444 dstReg[1] = value[1];
445 if (updateCC)
446 machine->CondCodes[1] = generate_cc(value[1]);
447 }
448 if (writeMask[2]) {
449 dstReg[2] = value[2];
450 if (updateCC)
451 machine->CondCodes[2] = generate_cc(value[2]);
452 }
453 if (writeMask[3]) {
454 dstReg[3] = value[3];
455 if (updateCC)
456 machine->CondCodes[3] = generate_cc(value[3]);
457 }
458 }
459
460
461 /**
462 * Initialize a new machine state instance from an existing one, adding
463 * the partial derivatives onto the input registers.
464 * Used to implement DDX and DDY instructions in non-trivial cases.
465 */
466 static void
467 init_machine_deriv( GLcontext *ctx,
468 const struct fp_machine *machine,
469 const struct fragment_program *program,
470 const struct sw_span *span, char xOrY,
471 struct fp_machine *dMachine )
472 {
473 GLuint u;
474
475 ASSERT(xOrY == 'X' || xOrY == 'Y');
476
477 /* copy existing machine */
478 _mesa_memcpy(dMachine, machine, sizeof(struct fp_machine));
479
480 /* Clear temporary registers */
481 _mesa_bzero( (void*) machine->Temporaries,
482 MAX_NV_FRAGMENT_PROGRAM_TEMPS * 4 * sizeof(GLfloat));
483
484 /* Add derivatives */
485 if (program->InputsRead & (1 << FRAG_ATTRIB_WPOS)) {
486 GLfloat *wpos = (GLfloat*) machine->Inputs[FRAG_ATTRIB_WPOS];
487 if (xOrY == 'X') {
488 wpos[0] += 1.0F;
489 wpos[1] += 0.0F;
490 wpos[2] += span->dzdx;
491 wpos[3] += span->dwdx;
492 }
493 else {
494 wpos[0] += 0.0F;
495 wpos[1] += 1.0F;
496 wpos[2] += span->dzdy;
497 wpos[3] += span->dwdy;
498 }
499 }
500 if (program->InputsRead & (1 << FRAG_ATTRIB_COL0)) {
501 GLfloat *col0 = (GLfloat*) machine->Inputs[FRAG_ATTRIB_COL0];
502 if (xOrY == 'X') {
503 col0[0] += span->drdx * (1.0F / CHAN_MAXF);
504 col0[1] += span->dgdx * (1.0F / CHAN_MAXF);
505 col0[2] += span->dbdx * (1.0F / CHAN_MAXF);
506 col0[3] += span->dadx * (1.0F / CHAN_MAXF);
507 }
508 else {
509 col0[0] += span->drdy * (1.0F / CHAN_MAXF);
510 col0[1] += span->dgdy * (1.0F / CHAN_MAXF);
511 col0[2] += span->dbdy * (1.0F / CHAN_MAXF);
512 col0[3] += span->dady * (1.0F / CHAN_MAXF);
513 }
514 }
515 if (program->InputsRead & (1 << FRAG_ATTRIB_COL1)) {
516 GLfloat *col1 = (GLfloat*) machine->Inputs[FRAG_ATTRIB_COL1];
517 if (xOrY == 'X') {
518 col1[0] += span->dsrdx * (1.0F / CHAN_MAXF);
519 col1[1] += span->dsgdx * (1.0F / CHAN_MAXF);
520 col1[2] += span->dsbdx * (1.0F / CHAN_MAXF);
521 col1[3] += 0.0; /*XXX fix */
522 }
523 else {
524 col1[0] += span->dsrdy * (1.0F / CHAN_MAXF);
525 col1[1] += span->dsgdy * (1.0F / CHAN_MAXF);
526 col1[2] += span->dsbdy * (1.0F / CHAN_MAXF);
527 col1[3] += 0.0; /*XXX fix */
528 }
529 }
530 if (program->InputsRead & (1 << FRAG_ATTRIB_FOGC)) {
531 GLfloat *fogc = (GLfloat*) machine->Inputs[FRAG_ATTRIB_FOGC];
532 if (xOrY == 'X') {
533 fogc[0] += span->dfogdx;
534 }
535 else {
536 fogc[0] += span->dfogdy;
537 }
538 }
539 for (u = 0; u < ctx->Const.MaxTextureCoordUnits; u++) {
540 if (program->InputsRead & (1 << (FRAG_ATTRIB_TEX0 + u))) {
541 GLfloat *tex = (GLfloat*) machine->Inputs[FRAG_ATTRIB_TEX0 + u];
542 if (xOrY == 'X') {
543 tex[0] += span->texStepX[u][0];
544 tex[1] += span->texStepX[u][1];
545 tex[2] += span->texStepX[u][2];
546 tex[3] += span->texStepX[u][3];
547 }
548 else {
549 tex[0] += span->texStepY[u][0];
550 tex[1] += span->texStepY[u][1];
551 tex[2] += span->texStepY[u][2];
552 tex[3] += span->texStepY[u][3];
553 }
554 }
555 }
556
557 /* init condition codes */
558 dMachine->CondCodes[0] = COND_EQ;
559 dMachine->CondCodes[1] = COND_EQ;
560 dMachine->CondCodes[2] = COND_EQ;
561 dMachine->CondCodes[3] = COND_EQ;
562 }
563
564
565 /**
566 * Execute the given vertex program.
567 * NOTE: we do everything in single-precision floating point; we don't
568 * currently observe the single/half/fixed-precision qualifiers.
569 * \param ctx - rendering context
570 * \param program - the fragment program to execute
571 * \param machine - machine state (register file)
572 * \param maxInst - max number of instructions to execute
573 * \return GL_TRUE if program completed or GL_FALSE if program executed KIL.
574 */
575 static GLboolean
576 execute_program( GLcontext *ctx,
577 const struct fragment_program *program, GLuint maxInst,
578 struct fp_machine *machine, const struct sw_span *span,
579 GLuint column )
580 {
581 GLuint pc;
582
583 #if DEBUG_FRAG
584 printf("execute fragment program --------------------\n");
585 #endif
586
587 for (pc = 0; pc < maxInst; pc++) {
588 const struct fp_instruction *inst = program->Instructions + pc;
589
590 if (ctx->FragmentProgram.CallbackEnabled &&
591 ctx->FragmentProgram.Callback) {
592 ctx->FragmentProgram.CurrentPosition = inst->StringPos;
593 ctx->FragmentProgram.Callback(program->Base.Target,
594 ctx->FragmentProgram.CallbackData);
595 }
596
597 switch (inst->Opcode) {
598 case FP_OPCODE_ADD:
599 {
600 GLfloat a[4], b[4], result[4];
601 fetch_vector4( ctx, &inst->SrcReg[0], machine, program, a );
602 fetch_vector4( ctx, &inst->SrcReg[1], machine, program, b );
603 result[0] = a[0] + b[0];
604 result[1] = a[1] + b[1];
605 result[2] = a[2] + b[2];
606 result[3] = a[3] + b[3];
607 store_vector4( inst, machine, result );
608 }
609 break;
610 case FP_OPCODE_COS:
611 {
612 GLfloat a[4], result[4];
613 fetch_vector1( ctx, &inst->SrcReg[0], machine, program, a );
614 result[0] = result[1] = result[2] = result[3] = (GLfloat)_mesa_cos(a[0]);
615 store_vector4( inst, machine, result );
616 }
617 break;
618 case FP_OPCODE_DDX: /* Partial derivative with respect to X */
619 {
620 GLfloat a[4], aNext[4], result[4];
621 struct fp_machine dMachine;
622 if (!fetch_vector4_deriv(&inst->SrcReg[0], span, 'X', result)) {
623 /* This is tricky. Make a copy of the current machine state,
624 * increment the input registers by the dx or dy partial
625 * derivatives, then re-execute the program up to the
626 * preceeding instruction, then fetch the source register.
627 * Finally, find the difference in the register values for
628 * the original and derivative runs.
629 */
630 fetch_vector4( ctx, &inst->SrcReg[0], machine, program, a);
631 init_machine_deriv(ctx, machine, program, span,
632 'X', &dMachine);
633 execute_program(ctx, program, pc, &dMachine, span, column);
634 fetch_vector4( ctx, &inst->SrcReg[0], &dMachine, program, aNext );
635 result[0] = aNext[0] - a[0];
636 result[1] = aNext[1] - a[1];
637 result[2] = aNext[2] - a[2];
638 result[3] = aNext[3] - a[3];
639 }
640 store_vector4( inst, machine, result );
641 }
642 break;
643 case FP_OPCODE_DDY: /* Partial derivative with respect to Y */
644 {
645 GLfloat a[4], aNext[4], result[4];
646 struct fp_machine dMachine;
647 if (!fetch_vector4_deriv(&inst->SrcReg[0], span, 'Y', result)) {
648 init_machine_deriv(ctx, machine, program, span,
649 'Y', &dMachine);
650 fetch_vector4( ctx, &inst->SrcReg[0], machine, program, a);
651 execute_program(ctx, program, pc, &dMachine, span, column);
652 fetch_vector4( ctx, &inst->SrcReg[0], &dMachine, program, aNext );
653 result[0] = aNext[0] - a[0];
654 result[1] = aNext[1] - a[1];
655 result[2] = aNext[2] - a[2];
656 result[3] = aNext[3] - a[3];
657 }
658 store_vector4( inst, machine, result );
659 }
660 break;
661 case FP_OPCODE_DP3:
662 {
663 GLfloat a[4], b[4], result[4];
664 fetch_vector4( ctx, &inst->SrcReg[0], machine, program, a );
665 fetch_vector4( ctx, &inst->SrcReg[1], machine, program, b );
666 result[0] = result[1] = result[2] = result[3] =
667 a[0] * b[0] + a[1] * b[1] + a[2] * b[2];
668 store_vector4( inst, machine, result );
669 #if DEBUG_FRAG
670 printf("DP3 %g = (%g %g %g) . (%g %g %g)\n",
671 result[0], a[0], a[1], a[2], b[0], b[1], b[2]);
672 #endif
673 }
674 break;
675 case FP_OPCODE_DP4:
676 {
677 GLfloat a[4], b[4], result[4];
678 fetch_vector4( ctx, &inst->SrcReg[0], machine, program, a );
679 fetch_vector4( ctx, &inst->SrcReg[1], machine, program, b );
680 result[0] = result[1] = result[2] = result[3] =
681 a[0] * b[0] + a[1] * b[1] + a[2] * b[2] + a[3] * b[3];
682 store_vector4( inst, machine, result );
683 }
684 break;
685 case FP_OPCODE_DST: /* Distance vector */
686 {
687 GLfloat a[4], b[4], result[4];
688 fetch_vector4( ctx, &inst->SrcReg[0], machine, program, a );
689 fetch_vector4( ctx, &inst->SrcReg[1], machine, program, b );
690 result[0] = 1.0F;
691 result[1] = a[1] * b[1];
692 result[2] = a[2];
693 result[3] = b[3];
694 store_vector4( inst, machine, result );
695 }
696 break;
697 case FP_OPCODE_EX2: /* Exponential base 2 */
698 {
699 GLfloat a[4], result[4];
700 fetch_vector1( ctx, &inst->SrcReg[0], machine, program, a );
701 result[0] = result[1] = result[2] = result[3] =
702 (GLfloat) _mesa_pow(2.0, a[0]);
703 store_vector4( inst, machine, result );
704 }
705 break;
706 case FP_OPCODE_FLR:
707 {
708 GLfloat a[4], result[4];
709 fetch_vector4( ctx, &inst->SrcReg[0], machine, program, a );
710 result[0] = FLOORF(a[0]);
711 result[1] = FLOORF(a[1]);
712 result[2] = FLOORF(a[2]);
713 result[3] = FLOORF(a[3]);
714 store_vector4( inst, machine, result );
715 }
716 break;
717 case FP_OPCODE_FRC:
718 {
719 GLfloat a[4], result[4];
720 fetch_vector4( ctx, &inst->SrcReg[0], machine, program, a );
721 result[0] = a[0] - FLOORF(a[0]);
722 result[1] = a[1] - FLOORF(a[1]);
723 result[2] = a[2] - FLOORF(a[2]);
724 result[3] = a[3] - FLOORF(a[3]);
725 store_vector4( inst, machine, result );
726 }
727 break;
728 case FP_OPCODE_KIL:
729 {
730 const GLuint *swizzle = inst->DstReg.CondSwizzle;
731 const GLuint condMask = inst->DstReg.CondMask;
732 if (test_cc(machine->CondCodes[swizzle[0]], condMask) ||
733 test_cc(machine->CondCodes[swizzle[1]], condMask) ||
734 test_cc(machine->CondCodes[swizzle[2]], condMask) ||
735 test_cc(machine->CondCodes[swizzle[3]], condMask)) {
736 return GL_FALSE;
737 }
738 }
739 break;
740 case FP_OPCODE_LG2: /* log base 2 */
741 {
742 GLfloat a[4], result[4];
743 fetch_vector1( ctx, &inst->SrcReg[0], machine, program, a );
744 result[0] = result[1] = result[2] = result[3]
745 = LOG2(a[0]);
746 store_vector4( inst, machine, result );
747 }
748 break;
749 case FP_OPCODE_LIT:
750 {
751 GLfloat a[4], result[4];
752 fetch_vector4( ctx, &inst->SrcReg[0], machine, program, a );
753 if (a[0] < 0.0F)
754 a[0] = 0.0F;
755 if (a[1] < 0.0F)
756 a[1] = 0.0F;
757 result[0] = 1.0F;
758 result[1] = a[0];
759 result[2] = (a[0] > 0.0F) ? (GLfloat)_mesa_pow(2.0, a[3]) : 0.0F;
760 result[3] = 1.0F;
761 store_vector4( inst, machine, result );
762 }
763 break;
764 case FP_OPCODE_LRP:
765 {
766 GLfloat a[4], b[4], c[4], result[4];
767 fetch_vector4( ctx, &inst->SrcReg[0], machine, program, a );
768 fetch_vector4( ctx, &inst->SrcReg[1], machine, program, b );
769 fetch_vector4( ctx, &inst->SrcReg[2], machine, program, c );
770 result[0] = a[0] * b[0] + (1.0F - a[0]) * c[0];
771 result[1] = a[1] * b[1] + (1.0F - a[1]) * c[1];
772 result[2] = a[2] * b[2] + (1.0F - a[2]) * c[2];
773 result[3] = a[3] * b[3] + (1.0F - a[3]) * c[3];
774 store_vector4( inst, machine, result );
775 }
776 break;
777 case FP_OPCODE_MAD:
778 {
779 GLfloat a[4], b[4], c[4], result[4];
780 fetch_vector4( ctx, &inst->SrcReg[0], machine, program, a );
781 fetch_vector4( ctx, &inst->SrcReg[1], machine, program, b );
782 fetch_vector4( ctx, &inst->SrcReg[2], machine, program, c );
783 result[0] = a[0] * b[0] + c[0];
784 result[1] = a[1] * b[1] + c[1];
785 result[2] = a[2] * b[2] + c[2];
786 result[3] = a[3] * b[3] + c[3];
787 store_vector4( inst, machine, result );
788 }
789 break;
790 case FP_OPCODE_MAX:
791 {
792 GLfloat a[4], b[4], result[4];
793 fetch_vector4( ctx, &inst->SrcReg[0], machine, program, a );
794 fetch_vector4( ctx, &inst->SrcReg[1], machine, program, b );
795 result[0] = MAX2(a[0], b[0]);
796 result[1] = MAX2(a[1], b[1]);
797 result[2] = MAX2(a[2], b[2]);
798 result[3] = MAX2(a[3], b[3]);
799 store_vector4( inst, machine, result );
800 }
801 break;
802 case FP_OPCODE_MIN:
803 {
804 GLfloat a[4], b[4], result[4];
805 fetch_vector4( ctx, &inst->SrcReg[0], machine, program, a );
806 fetch_vector4( ctx, &inst->SrcReg[1], machine, program, b );
807 result[0] = MIN2(a[0], b[0]);
808 result[1] = MIN2(a[1], b[1]);
809 result[2] = MIN2(a[2], b[2]);
810 result[3] = MIN2(a[3], b[3]);
811 store_vector4( inst, machine, result );
812 }
813 break;
814 case FP_OPCODE_MOV:
815 {
816 GLfloat result[4];
817 fetch_vector4( ctx, &inst->SrcReg[0], machine, program, result );
818 store_vector4( inst, machine, result );
819 }
820 break;
821 case FP_OPCODE_MUL:
822 {
823 GLfloat a[4], b[4], result[4];
824 fetch_vector4( ctx, &inst->SrcReg[0], machine, program, a );
825 fetch_vector4( ctx, &inst->SrcReg[1], machine, program, b );
826 result[0] = a[0] * b[0];
827 result[1] = a[1] * b[1];
828 result[2] = a[2] * b[2];
829 result[3] = a[3] * b[3];
830 store_vector4( inst, machine, result );
831 #if DEBUG_FRAG
832 printf("MUL (%g %g %g %g) = (%g %g %g %g) * (%g %g %g %g)\n",
833 result[0], result[1], result[2], result[3],
834 a[0], a[1], a[2], a[3],
835 b[0], b[1], b[2], b[3]);
836 #endif
837 }
838 break;
839 case FP_OPCODE_PK2H: /* pack two 16-bit floats */
840 /* XXX this is probably wrong */
841 {
842 GLfloat a[4], result[4];
843 const GLuint *rawBits = (const GLuint *) a;
844 GLuint *rawResult = (GLuint *) result;
845 fetch_vector4( ctx, &inst->SrcReg[0], machine, program, a );
846 rawResult[0] = rawResult[1] = rawResult[2] = rawResult[3]
847 = rawBits[0] | (rawBits[1] << 16);
848 store_vector4( inst, machine, result );
849 }
850 break;
851 case FP_OPCODE_PK2US: /* pack two GLushorts */
852 {
853 GLfloat a[4], result[4];
854 GLuint usx, usy, *rawResult = (GLuint *) result;
855 fetch_vector4( ctx, &inst->SrcReg[0], machine, program, a );
856 a[0] = CLAMP(a[0], 0.0F, 1.0F);
857 a[1] = CLAMP(a[0], 0.0F, 1.0F);
858 usx = IROUND(a[0] * 65535.0F);
859 usy = IROUND(a[1] * 65535.0F);
860 rawResult[0] = rawResult[1] = rawResult[2] = rawResult[3]
861 = usx | (usy << 16);
862 store_vector4( inst, machine, result );
863 }
864 break;
865 case FP_OPCODE_PK4B: /* pack four GLbytes */
866 {
867 GLfloat a[4], result[4];
868 GLuint ubx, uby, ubz, ubw, *rawResult = (GLuint *) result;
869 fetch_vector4( ctx, &inst->SrcReg[0], machine, program, a );
870 a[0] = CLAMP(a[0], -128.0F / 127.0F, 1.0F);
871 a[1] = CLAMP(a[1], -128.0F / 127.0F, 1.0F);
872 a[2] = CLAMP(a[2], -128.0F / 127.0F, 1.0F);
873 a[3] = CLAMP(a[3], -128.0F / 127.0F, 1.0F);
874 ubx = IROUND(127.0F * a[0] + 128.0F);
875 uby = IROUND(127.0F * a[1] + 128.0F);
876 ubz = IROUND(127.0F * a[2] + 128.0F);
877 ubw = IROUND(127.0F * a[3] + 128.0F);
878 rawResult[0] = rawResult[1] = rawResult[2] = rawResult[3]
879 = ubx | (uby << 8) | (ubz << 16) | (ubw << 24);
880 store_vector4( inst, machine, result );
881 }
882 break;
883 case FP_OPCODE_PK4UB: /* pack four GLubytes */
884 {
885 GLfloat a[4], result[4];
886 GLuint ubx, uby, ubz, ubw, *rawResult = (GLuint *) result;
887 fetch_vector4( ctx, &inst->SrcReg[0], machine, program, a );
888 a[0] = CLAMP(a[0], 0.0F, 1.0F);
889 a[1] = CLAMP(a[1], 0.0F, 1.0F);
890 a[2] = CLAMP(a[2], 0.0F, 1.0F);
891 a[3] = CLAMP(a[3], 0.0F, 1.0F);
892 ubx = IROUND(255.0F * a[0]);
893 uby = IROUND(255.0F * a[1]);
894 ubz = IROUND(255.0F * a[2]);
895 ubw = IROUND(255.0F * a[3]);
896 rawResult[0] = rawResult[1] = rawResult[2] = rawResult[3]
897 = ubx | (uby << 8) | (ubz << 16) | (ubw << 24);
898 store_vector4( inst, machine, result );
899 }
900 break;
901 case FP_OPCODE_POW:
902 {
903 GLfloat a[4], b[4], result[4];
904 fetch_vector1( ctx, &inst->SrcReg[0], machine, program, a );
905 fetch_vector1( ctx, &inst->SrcReg[1], machine, program, b );
906 result[0] = result[1] = result[2] = result[3]
907 = (GLfloat)_mesa_pow(a[0], b[0]);
908 store_vector4( inst, machine, result );
909 }
910 break;
911 case FP_OPCODE_RCP:
912 {
913 GLfloat a[4], result[4];
914 fetch_vector1( ctx, &inst->SrcReg[0], machine, program, a );
915 #if DEBUG_FRAG
916 if (a[0] == 0)
917 printf("RCP(0)\n");
918 else if (IS_INF_OR_NAN(a[0]))
919 printf("RCP(inf)\n");
920 #endif
921 result[0] = result[1] = result[2] = result[3]
922 = 1.0F / a[0];
923 store_vector4( inst, machine, result );
924 }
925 break;
926 case FP_OPCODE_RFL:
927 {
928 GLfloat axis[4], dir[4], result[4], tmp[4];
929 fetch_vector4( ctx, &inst->SrcReg[0], machine, program, axis );
930 fetch_vector4( ctx, &inst->SrcReg[1], machine, program, dir );
931 tmp[3] = axis[0] * axis[0]
932 + axis[1] * axis[1]
933 + axis[2] * axis[2];
934 tmp[0] = (2.0F * (axis[0] * dir[0] +
935 axis[1] * dir[1] +
936 axis[2] * dir[2])) / tmp[3];
937 result[0] = tmp[0] * axis[0] - dir[0];
938 result[1] = tmp[0] * axis[1] - dir[1];
939 result[2] = tmp[0] * axis[2] - dir[2];
940 /* result[3] is never written! XXX enforce in parser! */
941 store_vector4( inst, machine, result );
942 }
943 break;
944 case FP_OPCODE_RSQ: /* 1 / sqrt() */
945 {
946 GLfloat a[4], result[4];
947 fetch_vector1( ctx, &inst->SrcReg[0], machine, program, a );
948 result[0] = result[1] = result[2] = result[3] = INV_SQRTF(a[0]);
949 store_vector4( inst, machine, result );
950 #if DEBUG_FRAG
951 printf("RSQ %g = 1/sqrt(%g)\n", result[0], a[0]);
952 #endif
953 }
954 break;
955 case FP_OPCODE_SEQ: /* set on equal */
956 {
957 GLfloat a[4], b[4], result[4];
958 fetch_vector4( ctx, &inst->SrcReg[0], machine, program, a );
959 fetch_vector4( ctx, &inst->SrcReg[1], machine, program, b );
960 result[0] = (a[0] == b[0]) ? 1.0F : 0.0F;
961 result[1] = (a[1] == b[1]) ? 1.0F : 0.0F;
962 result[2] = (a[2] == b[2]) ? 1.0F : 0.0F;
963 result[3] = (a[3] == b[3]) ? 1.0F : 0.0F;
964 store_vector4( inst, machine, result );
965 }
966 break;
967 case FP_OPCODE_SFL: /* set false, operands ignored */
968 {
969 static const GLfloat result[4] = { 0.0F, 0.0F, 0.0F, 0.0F };
970 store_vector4( inst, machine, result );
971 }
972 break;
973 case FP_OPCODE_SGE: /* set on greater or equal */
974 {
975 GLfloat a[4], b[4], result[4];
976 fetch_vector4( ctx, &inst->SrcReg[0], machine, program, a );
977 fetch_vector4( ctx, &inst->SrcReg[1], machine, program, b );
978 result[0] = (a[0] >= b[0]) ? 1.0F : 0.0F;
979 result[1] = (a[1] >= b[1]) ? 1.0F : 0.0F;
980 result[2] = (a[2] >= b[2]) ? 1.0F : 0.0F;
981 result[3] = (a[3] >= b[3]) ? 1.0F : 0.0F;
982 store_vector4( inst, machine, result );
983 }
984 break;
985 case FP_OPCODE_SGT: /* set on greater */
986 {
987 GLfloat a[4], b[4], result[4];
988 fetch_vector4( ctx, &inst->SrcReg[0], machine, program, a );
989 fetch_vector4( ctx, &inst->SrcReg[1], machine, program, b );
990 result[0] = (a[0] > b[0]) ? 1.0F : 0.0F;
991 result[1] = (a[1] > b[1]) ? 1.0F : 0.0F;
992 result[2] = (a[2] > b[2]) ? 1.0F : 0.0F;
993 result[3] = (a[3] > b[3]) ? 1.0F : 0.0F;
994 store_vector4( inst, machine, result );
995 }
996 break;
997 case FP_OPCODE_SIN:
998 {
999 GLfloat a[4], result[4];
1000 fetch_vector1( ctx, &inst->SrcReg[0], machine, program, a );
1001 result[0] = result[1] = result[2] =
1002 result[3] = (GLfloat)_mesa_sin(a[0]);
1003 store_vector4( inst, machine, result );
1004 }
1005 break;
1006 case FP_OPCODE_SLE: /* set on less or equal */
1007 {
1008 GLfloat a[4], b[4], result[4];
1009 fetch_vector4( ctx, &inst->SrcReg[0], machine, program, a );
1010 fetch_vector4( ctx, &inst->SrcReg[1], machine, program, b );
1011 result[0] = (a[0] <= b[0]) ? 1.0F : 0.0F;
1012 result[1] = (a[1] <= b[1]) ? 1.0F : 0.0F;
1013 result[2] = (a[2] <= b[2]) ? 1.0F : 0.0F;
1014 result[3] = (a[3] <= b[3]) ? 1.0F : 0.0F;
1015 store_vector4( inst, machine, result );
1016 }
1017 break;
1018 case FP_OPCODE_SLT: /* set on less */
1019 {
1020 GLfloat a[4], b[4], result[4];
1021 fetch_vector4( ctx, &inst->SrcReg[0], machine, program, a );
1022 fetch_vector4( ctx, &inst->SrcReg[1], machine, program, b );
1023 result[0] = (a[0] < b[0]) ? 1.0F : 0.0F;
1024 result[1] = (a[1] < b[1]) ? 1.0F : 0.0F;
1025 result[2] = (a[2] < b[2]) ? 1.0F : 0.0F;
1026 result[3] = (a[3] < b[3]) ? 1.0F : 0.0F;
1027 store_vector4( inst, machine, result );
1028 }
1029 break;
1030 case FP_OPCODE_SNE: /* set on not equal */
1031 {
1032 GLfloat a[4], b[4], result[4];
1033 fetch_vector4( ctx, &inst->SrcReg[0], machine, program, a );
1034 fetch_vector4( ctx, &inst->SrcReg[1], machine, program, b );
1035 result[0] = (a[0] != b[0]) ? 1.0F : 0.0F;
1036 result[1] = (a[1] != b[1]) ? 1.0F : 0.0F;
1037 result[2] = (a[2] != b[2]) ? 1.0F : 0.0F;
1038 result[3] = (a[3] != b[3]) ? 1.0F : 0.0F;
1039 store_vector4( inst, machine, result );
1040 }
1041 break;
1042 case FP_OPCODE_STR: /* set true, operands ignored */
1043 {
1044 static const GLfloat result[4] = { 1.0F, 1.0F, 1.0F, 1.0F };
1045 store_vector4( inst, machine, result );
1046 }
1047 break;
1048 case FP_OPCODE_SUB:
1049 {
1050 GLfloat a[4], b[4], result[4];
1051 fetch_vector4( ctx, &inst->SrcReg[0], machine, program, a );
1052 fetch_vector4( ctx, &inst->SrcReg[1], machine, program, b );
1053 result[0] = a[0] - b[0];
1054 result[1] = a[1] - b[1];
1055 result[2] = a[2] - b[2];
1056 result[3] = a[3] - b[3];
1057 store_vector4( inst, machine, result );
1058 }
1059 break;
1060 case FP_OPCODE_TEX:
1061 /* Texel lookup */
1062 {
1063 GLfloat texcoord[4], color[4];
1064 fetch_vector4( ctx, &inst->SrcReg[0], machine, program, texcoord );
1065 /* XXX: Undo perspective divide from interpolate_texcoords() */
1066 fetch_texel( ctx, texcoord,
1067 span->array->lambda[inst->TexSrcUnit][column],
1068 inst->TexSrcUnit, color );
1069 store_vector4( inst, machine, color );
1070 }
1071 break;
1072 case FP_OPCODE_TXD:
1073 /* Texture lookup w/ partial derivatives for LOD */
1074 {
1075 GLfloat texcoord[4], dtdx[4], dtdy[4], color[4];
1076 fetch_vector4( ctx, &inst->SrcReg[0], machine, program, texcoord );
1077 fetch_vector4( ctx, &inst->SrcReg[1], machine, program, dtdx );
1078 fetch_vector4( ctx, &inst->SrcReg[2], machine, program, dtdy );
1079 fetch_texel_deriv( ctx, texcoord, dtdx, dtdy, inst->TexSrcUnit,
1080 color );
1081 store_vector4( inst, machine, color );
1082 }
1083 break;
1084 case FP_OPCODE_TXP:
1085 /* Texture lookup w/ perspective divide */
1086 {
1087 GLfloat texcoord[4], color[4];
1088 fetch_vector4( ctx, &inst->SrcReg[0], machine, program, texcoord );
1089 /* Already did perspective divide in interpolate_texcoords() */
1090 fetch_texel( ctx, texcoord,
1091 span->array->lambda[inst->TexSrcUnit][column],
1092 inst->TexSrcUnit, color );
1093 store_vector4( inst, machine, color );
1094 }
1095 break;
1096 case FP_OPCODE_UP2H: /* unpack two 16-bit floats */
1097 /* XXX this is probably wrong */
1098 {
1099 GLfloat a[4], result[4];
1100 const GLuint *rawBits = (const GLuint *) a;
1101 GLuint *rawResult = (GLuint *) result;
1102 fetch_vector1( ctx, &inst->SrcReg[0], machine, program, a );
1103 rawResult[0] = rawBits[0] & 0xffff;
1104 rawResult[1] = (rawBits[0] >> 16) & 0xffff;
1105 rawResult[2] = rawBits[0] & 0xffff;
1106 rawResult[3] = (rawBits[0] >> 16) & 0xffff;
1107 store_vector4( inst, machine, result );
1108 }
1109 break;
1110 case FP_OPCODE_UP2US: /* unpack two GLushorts */
1111 {
1112 GLfloat a[4], result[4];
1113 const GLuint *rawBits = (const GLuint *) a;
1114 fetch_vector1( ctx, &inst->SrcReg[0], machine, program, a );
1115 result[0] = (GLfloat) ((rawBits[0] >> 0) & 0xffff) / 65535.0F;
1116 result[1] = (GLfloat) ((rawBits[0] >> 16) & 0xffff) / 65535.0F;
1117 result[2] = result[0];
1118 result[3] = result[1];
1119 store_vector4( inst, machine, result );
1120 }
1121 break;
1122 case FP_OPCODE_UP4B: /* unpack four GLbytes */
1123 {
1124 GLfloat a[4], result[4];
1125 const GLuint *rawBits = (const GLuint *) a;
1126 fetch_vector1( ctx, &inst->SrcReg[0], machine, program, a );
1127 result[0] = (((rawBits[0] >> 0) & 0xff) - 128) / 127.0F;
1128 result[0] = (((rawBits[0] >> 8) & 0xff) - 128) / 127.0F;
1129 result[0] = (((rawBits[0] >> 16) & 0xff) - 128) / 127.0F;
1130 result[0] = (((rawBits[0] >> 24) & 0xff) - 128) / 127.0F;
1131 store_vector4( inst, machine, result );
1132 }
1133 break;
1134 case FP_OPCODE_UP4UB: /* unpack four GLubytes */
1135 {
1136 GLfloat a[4], result[4];
1137 const GLuint *rawBits = (const GLuint *) a;
1138 fetch_vector1( ctx, &inst->SrcReg[0], machine, program, a );
1139 result[0] = ((rawBits[0] >> 0) & 0xff) / 255.0F;
1140 result[0] = ((rawBits[0] >> 8) & 0xff) / 255.0F;
1141 result[0] = ((rawBits[0] >> 16) & 0xff) / 255.0F;
1142 result[0] = ((rawBits[0] >> 24) & 0xff) / 255.0F;
1143 store_vector4( inst, machine, result );
1144 }
1145 break;
1146 case FP_OPCODE_X2D: /* 2-D matrix transform */
1147 {
1148 GLfloat a[4], b[4], c[4], result[4];
1149 fetch_vector4( ctx, &inst->SrcReg[0], machine, program, a );
1150 fetch_vector4( ctx, &inst->SrcReg[1], machine, program, b );
1151 fetch_vector4( ctx, &inst->SrcReg[2], machine, program, c );
1152 result[0] = a[0] + b[0] * c[0] + b[1] * c[1];
1153 result[1] = a[1] + b[0] * c[2] + b[1] * c[3];
1154 result[2] = a[2] + b[0] * c[0] + b[1] * c[1];
1155 result[3] = a[3] + b[0] * c[2] + b[1] * c[3];
1156 store_vector4( inst, machine, result );
1157 }
1158 break;
1159 case FP_OPCODE_END:
1160 return GL_TRUE;
1161 default:
1162 _mesa_problem(ctx, "Bad opcode %d in _mesa_exec_fragment_program",
1163 inst->Opcode);
1164 return GL_TRUE; /* return value doesn't matter */
1165 }
1166 }
1167 return GL_TRUE;
1168 }
1169
1170
1171 static void
1172 init_machine( GLcontext *ctx, struct fp_machine *machine,
1173 const struct fragment_program *program,
1174 const struct sw_span *span, GLuint col )
1175 {
1176 GLuint inputsRead = program->InputsRead;
1177 GLuint u;
1178
1179 if (ctx->FragmentProgram.CallbackEnabled)
1180 inputsRead = ~0;
1181
1182 /* Clear temporary registers */
1183 _mesa_bzero(machine->Temporaries,
1184 MAX_NV_FRAGMENT_PROGRAM_TEMPS * 4 * sizeof(GLfloat));
1185
1186 /* Load input registers */
1187 if (inputsRead & (1 << FRAG_ATTRIB_WPOS)) {
1188 GLfloat *wpos = machine->Inputs[FRAG_ATTRIB_WPOS];
1189 wpos[0] = (GLfloat) span->x + col;
1190 wpos[1] = (GLfloat) span->y;
1191 wpos[2] = (GLfloat) span->array->z[col] / ctx->DepthMaxF;
1192 wpos[3] = span->w + col * span->dwdx;
1193 }
1194 if (inputsRead & (1 << FRAG_ATTRIB_COL0)) {
1195 GLfloat *col0 = machine->Inputs[FRAG_ATTRIB_COL0];
1196 col0[0] = CHAN_TO_FLOAT(span->array->rgba[col][RCOMP]);
1197 col0[1] = CHAN_TO_FLOAT(span->array->rgba[col][GCOMP]);
1198 col0[2] = CHAN_TO_FLOAT(span->array->rgba[col][BCOMP]);
1199 col0[3] = CHAN_TO_FLOAT(span->array->rgba[col][ACOMP]);
1200 }
1201 if (inputsRead & (1 << FRAG_ATTRIB_COL1)) {
1202 GLfloat *col1 = machine->Inputs[FRAG_ATTRIB_COL1];
1203 col1[0] = CHAN_TO_FLOAT(span->array->spec[col][RCOMP]);
1204 col1[1] = CHAN_TO_FLOAT(span->array->spec[col][GCOMP]);
1205 col1[2] = CHAN_TO_FLOAT(span->array->spec[col][BCOMP]);
1206 col1[3] = CHAN_TO_FLOAT(span->array->spec[col][ACOMP]);
1207 }
1208 if (inputsRead & (1 << FRAG_ATTRIB_FOGC)) {
1209 GLfloat *fogc = machine->Inputs[FRAG_ATTRIB_FOGC];
1210 fogc[0] = span->array->fog[col];
1211 fogc[1] = 0.0F;
1212 fogc[2] = 0.0F;
1213 fogc[3] = 0.0F;
1214 }
1215 for (u = 0; u < ctx->Const.MaxTextureCoordUnits; u++) {
1216 if (inputsRead & (1 << (FRAG_ATTRIB_TEX0 + u))) {
1217 GLfloat *tex = machine->Inputs[FRAG_ATTRIB_TEX0 + u];
1218 /*ASSERT(ctx->Texture._EnabledCoordUnits & (1 << u));*/
1219 COPY_4V(tex, span->array->texcoords[u][col]);
1220 /*ASSERT(tex[0] != 0 || tex[1] != 0 || tex[2] != 0);*/
1221 }
1222 }
1223
1224 /* init condition codes */
1225 machine->CondCodes[0] = COND_EQ;
1226 machine->CondCodes[1] = COND_EQ;
1227 machine->CondCodes[2] = COND_EQ;
1228 machine->CondCodes[3] = COND_EQ;
1229 }
1230
1231
1232 void
1233 _swrast_exec_nv_fragment_program( GLcontext *ctx, struct sw_span *span )
1234 {
1235 const struct fragment_program *program = ctx->FragmentProgram.Current;
1236 GLuint i;
1237
1238 ctx->_CurrentProgram = GL_FRAGMENT_PROGRAM_ARB; /* or NV, doesn't matter */
1239
1240 for (i = 0; i < span->end; i++) {
1241 if (span->array->mask[i]) {
1242 init_machine(ctx, &ctx->FragmentProgram.Machine,
1243 ctx->FragmentProgram.Current, span, i);
1244
1245 if (!execute_program(ctx, program, ~0,
1246 &ctx->FragmentProgram.Machine, span, i)) {
1247 span->array->mask[i] = GL_FALSE; /* killed fragment */
1248 }
1249
1250 /* Store output registers */
1251 {
1252 const GLfloat *colOut
1253 = ctx->FragmentProgram.Machine.Outputs[FRAG_OUTPUT_COLR];
1254 UNCLAMPED_FLOAT_TO_CHAN(span->array->rgba[i][RCOMP], colOut[0]);
1255 UNCLAMPED_FLOAT_TO_CHAN(span->array->rgba[i][GCOMP], colOut[1]);
1256 UNCLAMPED_FLOAT_TO_CHAN(span->array->rgba[i][BCOMP], colOut[2]);
1257 UNCLAMPED_FLOAT_TO_CHAN(span->array->rgba[i][ACOMP], colOut[3]);
1258 }
1259 /* depth value */
1260 if (program->OutputsWritten & (1 << FRAG_OUTPUT_DEPR))
1261 span->array->z[i] = IROUND(ctx->FragmentProgram.Machine.Outputs[FRAG_OUTPUT_DEPR][0] * ctx->DepthMaxF);
1262 }
1263 }
1264
1265 ctx->_CurrentProgram = 0;
1266 }
1267