Silence compiler warnings about implicit casts or conversions by supplying explicit...
[mesa.git] / src / mesa / swrast / s_nvfragprog.c
1 /*
2 * Mesa 3-D graphics library
3 * Version: 5.1
4 *
5 * Copyright (C) 1999-2003 Brian Paul All Rights Reserved.
6 *
7 * Permission is hereby granted, free of charge, to any person obtaining a
8 * copy of this software and associated documentation files (the "Software"),
9 * to deal in the Software without restriction, including without limitation
10 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
11 * and/or sell copies of the Software, and to permit persons to whom the
12 * Software is furnished to do so, subject to the following conditions:
13 *
14 * The above copyright notice and this permission notice shall be included
15 * in all copies or substantial portions of the Software.
16 *
17 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
18 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
19 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
20 * BRIAN PAUL BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN
21 * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
22 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
23 */
24
25
26 #include "glheader.h"
27 #include "colormac.h"
28 #include "context.h"
29 #include "nvfragprog.h"
30 #include "macros.h"
31
32 #include "s_nvfragprog.h"
33 #include "s_span.h"
34 #include "s_texture.h"
35
36
37 /* if 1, print some debugging info */
38 #define DEBUG_FRAG 0
39
40
41 /**
42 * Fetch a texel.
43 */
44 static void
45 fetch_texel( GLcontext *ctx, const GLfloat texcoord[4], GLfloat lambda,
46 GLuint unit, GLfloat color[4] )
47 {
48 GLchan rgba[4];
49 SWcontext *swrast = SWRAST_CONTEXT(ctx);
50
51 swrast->TextureSample[unit](ctx, unit, ctx->Texture.Unit[unit]._Current,
52 1, (const GLfloat (*)[4]) texcoord,
53 &lambda, &rgba);
54 color[0] = CHAN_TO_FLOAT(rgba[0]);
55 color[1] = CHAN_TO_FLOAT(rgba[1]);
56 color[2] = CHAN_TO_FLOAT(rgba[2]);
57 color[3] = CHAN_TO_FLOAT(rgba[3]);
58 }
59
60
61 /**
62 * Fetch a texel with the given partial derivatives to compute a level
63 * of detail in the mipmap.
64 */
65 static void
66 fetch_texel_deriv( GLcontext *ctx, const GLfloat texcoord[4],
67 const GLfloat texdx[4], const GLfloat texdy[4],
68 GLuint unit, GLfloat color[4] )
69 {
70 SWcontext *swrast = SWRAST_CONTEXT(ctx);
71 const struct gl_texture_object *texObj = ctx->Texture.Unit[unit]._Current;
72 const struct gl_texture_image *texImg = texObj->Image[texObj->BaseLevel];
73 const GLfloat texW = (GLfloat) texImg->WidthScale;
74 const GLfloat texH = (GLfloat) texImg->HeightScale;
75 GLchan rgba[4];
76
77 GLfloat lambda = _swrast_compute_lambda(texdx[0], texdy[0], /* ds/dx, ds/dy */
78 texdx[1], texdy[1], /* dt/dx, dt/dy */
79 texdx[3], texdy[2], /* dq/dx, dq/dy */
80 texW, texH,
81 texcoord[0], texcoord[1], texcoord[3],
82 1.0F / texcoord[3]);
83
84 swrast->TextureSample[unit](ctx, unit, ctx->Texture.Unit[unit]._Current,
85 1, (const GLfloat (*)[4]) texcoord,
86 &lambda, &rgba);
87 color[0] = CHAN_TO_FLOAT(rgba[0]);
88 color[1] = CHAN_TO_FLOAT(rgba[1]);
89 color[2] = CHAN_TO_FLOAT(rgba[2]);
90 color[3] = CHAN_TO_FLOAT(rgba[3]);
91 }
92
93
94
95 /**
96 * Fetch a 4-element float vector from the given source register.
97 * Apply swizzling and negating as needed.
98 */
99 static void
100 fetch_vector4( GLcontext *ctx,
101 const struct fp_src_register *source,
102 struct fp_machine *machine,
103 const struct fragment_program *program,
104 GLfloat result[4] )
105 {
106 const GLfloat *src;
107
108 switch (source->File) {
109 case PROGRAM_TEMPORARY:
110 ASSERT(source->Index < MAX_NV_FRAGMENT_PROGRAM_TEMPS);
111 src = machine->Temporaries[source->Index];
112 break;
113 case PROGRAM_INPUT:
114 ASSERT(source->Index < MAX_NV_FRAGMENT_PROGRAM_INPUTS);
115 src = machine->Inputs[source->Index];
116 break;
117 case PROGRAM_LOCAL_PARAM:
118 ASSERT(source->Index < MAX_PROGRAM_LOCAL_PARAMS);
119 src = program->Base.LocalParams[source->Index];
120 break;
121 case PROGRAM_ENV_PARAM:
122 ASSERT(source->Index < MAX_NV_FRAGMENT_PROGRAM_PARAMS);
123 src = ctx->FragmentProgram.Parameters[source->Index];
124 break;
125 case PROGRAM_NAMED_PARAM:
126 ASSERT(source->Index < program->NumParameters);
127 src = program->Parameters[source->Index].Values;
128 break;
129 case PROGRAM_STATE_VAR:
130 abort();
131 default:
132 _mesa_problem(ctx, "Invalid input register file in fetch_vector4");
133 return;
134 }
135
136 result[0] = src[source->Swizzle[0]];
137 result[1] = src[source->Swizzle[1]];
138 result[2] = src[source->Swizzle[2]];
139 result[3] = src[source->Swizzle[3]];
140
141 if (source->NegateBase) {
142 result[0] = -result[0];
143 result[1] = -result[1];
144 result[2] = -result[2];
145 result[3] = -result[3];
146 }
147 if (source->Abs) {
148 result[0] = FABSF(result[0]);
149 result[1] = FABSF(result[1]);
150 result[2] = FABSF(result[2]);
151 result[3] = FABSF(result[3]);
152 }
153 if (source->NegateAbs) {
154 result[0] = -result[0];
155 result[1] = -result[1];
156 result[2] = -result[2];
157 result[3] = -result[3];
158 }
159 }
160
161
162 /**
163 * Fetch the derivative with respect to X for the given register.
164 * \return GL_TRUE if it was easily computed or GL_FALSE if we
165 * need to execute another instance of the program (ugh)!
166 */
167 static GLboolean
168 fetch_vector4_deriv( const struct fp_src_register *source,
169 const struct sw_span *span,
170 char xOrY, GLfloat result[4] )
171 {
172 GLfloat src[4];
173
174 ASSERT(xOrY == 'X' || xOrY == 'Y');
175
176 assert(source->File == PROGRAM_INPUT);
177
178 switch (source->Index) {
179 case FRAG_ATTRIB_WPOS:
180 if (xOrY == 'X') {
181 src[0] = 1.0;
182 src[1] = 0.0;
183 src[2] = span->dzdx;
184 src[3] = span->dwdx;
185 }
186 else {
187 src[0] = 0.0;
188 src[1] = 1.0;
189 src[2] = span->dzdy;
190 src[3] = span->dwdy;
191 }
192 break;
193 case FRAG_ATTRIB_COL0:
194 if (xOrY == 'X') {
195 src[0] = span->drdx * (1.0F / CHAN_MAXF);
196 src[1] = span->dgdx * (1.0F / CHAN_MAXF);
197 src[2] = span->dbdx * (1.0F / CHAN_MAXF);
198 src[3] = span->dadx * (1.0F / CHAN_MAXF);
199 }
200 else {
201 src[0] = span->drdy * (1.0F / CHAN_MAXF);
202 src[1] = span->dgdy * (1.0F / CHAN_MAXF);
203 src[2] = span->dbdy * (1.0F / CHAN_MAXF);
204 src[3] = span->dady * (1.0F / CHAN_MAXF);
205 }
206 break;
207 case FRAG_ATTRIB_COL1:
208 if (xOrY == 'X') {
209 src[0] = span->dsrdx * (1.0F / CHAN_MAXF);
210 src[1] = span->dsgdx * (1.0F / CHAN_MAXF);
211 src[2] = span->dsbdx * (1.0F / CHAN_MAXF);
212 src[3] = 0.0; /* XXX need this */
213 }
214 else {
215 src[0] = span->dsrdy * (1.0F / CHAN_MAXF);
216 src[1] = span->dsgdy * (1.0F / CHAN_MAXF);
217 src[2] = span->dsbdy * (1.0F / CHAN_MAXF);
218 src[3] = 0.0; /* XXX need this */
219 }
220 break;
221 case FRAG_ATTRIB_FOGC:
222 if (xOrY == 'X') {
223 src[0] = span->dfogdx;
224 src[1] = 0.0;
225 src[2] = 0.0;
226 src[3] = 0.0;
227 }
228 else {
229 src[0] = span->dfogdy;
230 src[1] = 0.0;
231 src[2] = 0.0;
232 src[3] = 0.0;
233 }
234 break;
235 case FRAG_ATTRIB_TEX0:
236 case FRAG_ATTRIB_TEX1:
237 case FRAG_ATTRIB_TEX2:
238 case FRAG_ATTRIB_TEX3:
239 case FRAG_ATTRIB_TEX4:
240 case FRAG_ATTRIB_TEX5:
241 case FRAG_ATTRIB_TEX6:
242 case FRAG_ATTRIB_TEX7:
243 if (xOrY == 'X') {
244 const GLuint u = source->Index - FRAG_ATTRIB_TEX0;
245 src[0] = span->texStepX[u][0] * (1.0F / CHAN_MAXF);
246 src[1] = span->texStepX[u][1] * (1.0F / CHAN_MAXF);
247 src[2] = span->texStepX[u][2] * (1.0F / CHAN_MAXF);
248 src[3] = span->texStepX[u][3] * (1.0F / CHAN_MAXF);
249 }
250 else {
251 const GLuint u = source->Index - FRAG_ATTRIB_TEX0;
252 src[0] = span->texStepY[u][0] * (1.0F / CHAN_MAXF);
253 src[1] = span->texStepY[u][1] * (1.0F / CHAN_MAXF);
254 src[2] = span->texStepY[u][2] * (1.0F / CHAN_MAXF);
255 src[3] = span->texStepY[u][3] * (1.0F / CHAN_MAXF);
256 }
257 break;
258 default:
259 return GL_FALSE;
260 }
261
262 result[0] = src[source->Swizzle[0]];
263 result[1] = src[source->Swizzle[1]];
264 result[2] = src[source->Swizzle[2]];
265 result[3] = src[source->Swizzle[3]];
266
267 if (source->NegateBase) {
268 result[0] = -result[0];
269 result[1] = -result[1];
270 result[2] = -result[2];
271 result[3] = -result[3];
272 }
273 if (source->Abs) {
274 result[0] = FABSF(result[0]);
275 result[1] = FABSF(result[1]);
276 result[2] = FABSF(result[2]);
277 result[3] = FABSF(result[3]);
278 }
279 if (source->NegateAbs) {
280 result[0] = -result[0];
281 result[1] = -result[1];
282 result[2] = -result[2];
283 result[3] = -result[3];
284 }
285 return GL_TRUE;
286 }
287
288
289 /**
290 * As above, but only return result[0] element.
291 */
292 static void
293 fetch_vector1( GLcontext *ctx,
294 const struct fp_src_register *source,
295 const struct fp_machine *machine,
296 const struct fragment_program *program,
297 GLfloat result[4] )
298 {
299 const GLfloat *src;
300
301 switch (source->File) {
302 case PROGRAM_TEMPORARY:
303 ASSERT(source->Index < MAX_NV_FRAGMENT_PROGRAM_TEMPS);
304 src = machine->Temporaries[source->Index];
305 break;
306 case PROGRAM_INPUT:
307 ASSERT(source->Index < MAX_NV_FRAGMENT_PROGRAM_INPUTS);
308 src = machine->Inputs[source->Index];
309 break;
310 case PROGRAM_LOCAL_PARAM:
311 ASSERT(source->Index < MAX_PROGRAM_LOCAL_PARAMS);
312 src = program->Base.LocalParams[source->Index];
313 break;
314 case PROGRAM_ENV_PARAM:
315 ASSERT(source->Index < MAX_NV_FRAGMENT_PROGRAM_PARAMS);
316 src = ctx->FragmentProgram.Parameters[source->Index];
317 break;
318 case PROGRAM_NAMED_PARAM:
319 ASSERT(source->Index < program->NumParameters);
320 src = program->Parameters[source->Index].Values;
321 break;
322 case PROGRAM_STATE_VAR:
323 abort();
324 default:
325 _mesa_problem(ctx, "Invalid input register file in fetch_vector1");
326 return;
327 }
328
329 result[0] = src[source->Swizzle[0]];
330
331 if (source->NegateBase) {
332 result[0] = -result[0];
333 }
334 if (source->Abs) {
335 result[0] = FABSF(result[0]);
336 }
337 if (source->NegateAbs) {
338 result[0] = -result[0];
339 }
340 }
341
342
343 /*
344 * Test value against zero and return GT, LT, EQ or UN if NaN.
345 */
346 static INLINE GLuint
347 generate_cc( float value )
348 {
349 if (value != value)
350 return COND_UN; /* NaN */
351 if (value > 0.0F)
352 return COND_GT;
353 if (value < 0.0F)
354 return COND_LT;
355 return COND_EQ;
356 }
357
358 /*
359 * Test if the ccMaskRule is satisfied by the given condition code.
360 * Used to mask destination writes according to the current condition codee.
361 */
362 static INLINE GLboolean
363 test_cc(GLuint condCode, GLuint ccMaskRule)
364 {
365 switch (ccMaskRule) {
366 case COND_EQ: return (condCode == COND_EQ);
367 case COND_NE: return (condCode != COND_EQ);
368 case COND_LT: return (condCode == COND_LT);
369 case COND_GE: return (condCode == COND_GT || condCode == COND_EQ);
370 case COND_LE: return (condCode == COND_LT || condCode == COND_EQ);
371 case COND_GT: return (condCode == COND_GT);
372 case COND_TR: return GL_TRUE;
373 case COND_FL: return GL_FALSE;
374 default: return GL_TRUE;
375 }
376 }
377
378
379 /**
380 * Store 4 floats into a register. Observe the instructions saturate and
381 * set-condition-code flags.
382 */
383 static void
384 store_vector4( const struct fp_instruction *inst,
385 struct fp_machine *machine,
386 const GLfloat value[4] )
387 {
388 const struct fp_dst_register *dest = &(inst->DstReg);
389 const GLboolean clamp = inst->Saturate;
390 const GLboolean updateCC = inst->UpdateCondRegister;
391 GLfloat *dstReg;
392 GLfloat clampedValue[4];
393 const GLboolean *writeMask = dest->WriteMask;
394 GLboolean condWriteMask[4];
395
396 switch (dest->File) {
397 case PROGRAM_OUTPUT:
398 dstReg = machine->Outputs[dest->Index];
399 break;
400 case PROGRAM_TEMPORARY:
401 dstReg = machine->Temporaries[dest->Index];
402 break;
403 default:
404 _mesa_problem(NULL, "bad register file in store_vector4(fp)");
405 return;
406 }
407
408 #if DEBUG_FRAG
409 if (value[0] > 1.0e10 ||
410 IS_INF_OR_NAN(value[0]) ||
411 IS_INF_OR_NAN(value[1]) ||
412 IS_INF_OR_NAN(value[2]) ||
413 IS_INF_OR_NAN(value[3]) )
414 printf("store %g %g %g %g\n", value[0], value[1], value[2], value[3]);
415 #endif
416
417 if (clamp) {
418 clampedValue[0] = CLAMP(value[0], 0.0F, 1.0F);
419 clampedValue[1] = CLAMP(value[1], 0.0F, 1.0F);
420 clampedValue[2] = CLAMP(value[2], 0.0F, 1.0F);
421 clampedValue[3] = CLAMP(value[3], 0.0F, 1.0F);
422 value = clampedValue;
423 }
424
425 if (dest->CondMask != COND_TR) {
426 condWriteMask[0] = writeMask[0]
427 && test_cc(machine->CondCodes[dest->CondSwizzle[0]], dest->CondMask);
428 condWriteMask[1] = writeMask[1]
429 && test_cc(machine->CondCodes[dest->CondSwizzle[1]], dest->CondMask);
430 condWriteMask[2] = writeMask[2]
431 && test_cc(machine->CondCodes[dest->CondSwizzle[2]], dest->CondMask);
432 condWriteMask[3] = writeMask[3]
433 && test_cc(machine->CondCodes[dest->CondSwizzle[3]], dest->CondMask);
434 writeMask = condWriteMask;
435 }
436
437 if (writeMask[0]) {
438 dstReg[0] = value[0];
439 if (updateCC)
440 machine->CondCodes[0] = generate_cc(value[0]);
441 }
442 if (writeMask[1]) {
443 dstReg[1] = value[1];
444 if (updateCC)
445 machine->CondCodes[1] = generate_cc(value[1]);
446 }
447 if (writeMask[2]) {
448 dstReg[2] = value[2];
449 if (updateCC)
450 machine->CondCodes[2] = generate_cc(value[2]);
451 }
452 if (writeMask[3]) {
453 dstReg[3] = value[3];
454 if (updateCC)
455 machine->CondCodes[3] = generate_cc(value[3]);
456 }
457 }
458
459
460 /**
461 * Initialize a new machine state instance from an existing one, adding
462 * the partial derivatives onto the input registers.
463 * Used to implement DDX and DDY instructions in non-trivial cases.
464 */
465 static void
466 init_machine_deriv( GLcontext *ctx,
467 const struct fp_machine *machine,
468 const struct fragment_program *program,
469 const struct sw_span *span, char xOrY,
470 struct fp_machine *dMachine )
471 {
472 GLuint u;
473
474 ASSERT(xOrY == 'X' || xOrY == 'Y');
475
476 /* copy existing machine */
477 _mesa_memcpy(dMachine, machine, sizeof(struct fp_machine));
478
479 /* Clear temporary registers */
480 _mesa_bzero( (void*) machine->Temporaries,
481 MAX_NV_FRAGMENT_PROGRAM_TEMPS * 4 * sizeof(GLfloat));
482
483 /* Add derivatives */
484 if (program->InputsRead & (1 << FRAG_ATTRIB_WPOS)) {
485 GLfloat *wpos = (GLfloat*) machine->Inputs[FRAG_ATTRIB_WPOS];
486 if (xOrY == 'X') {
487 wpos[0] += 1.0F;
488 wpos[1] += 0.0F;
489 wpos[2] += span->dzdx;
490 wpos[3] += span->dwdx;
491 }
492 else {
493 wpos[0] += 0.0F;
494 wpos[1] += 1.0F;
495 wpos[2] += span->dzdy;
496 wpos[3] += span->dwdy;
497 }
498 }
499 if (program->InputsRead & (1 << FRAG_ATTRIB_COL0)) {
500 GLfloat *col0 = (GLfloat*) machine->Inputs[FRAG_ATTRIB_COL0];
501 if (xOrY == 'X') {
502 col0[0] += span->drdx * (1.0F / CHAN_MAXF);
503 col0[1] += span->dgdx * (1.0F / CHAN_MAXF);
504 col0[2] += span->dbdx * (1.0F / CHAN_MAXF);
505 col0[3] += span->dadx * (1.0F / CHAN_MAXF);
506 }
507 else {
508 col0[0] += span->drdy * (1.0F / CHAN_MAXF);
509 col0[1] += span->dgdy * (1.0F / CHAN_MAXF);
510 col0[2] += span->dbdy * (1.0F / CHAN_MAXF);
511 col0[3] += span->dady * (1.0F / CHAN_MAXF);
512 }
513 }
514 if (program->InputsRead & (1 << FRAG_ATTRIB_COL1)) {
515 GLfloat *col1 = (GLfloat*) machine->Inputs[FRAG_ATTRIB_COL1];
516 if (xOrY == 'X') {
517 col1[0] += span->dsrdx * (1.0F / CHAN_MAXF);
518 col1[1] += span->dsgdx * (1.0F / CHAN_MAXF);
519 col1[2] += span->dsbdx * (1.0F / CHAN_MAXF);
520 col1[3] += 0.0; /*XXX fix */
521 }
522 else {
523 col1[0] += span->dsrdy * (1.0F / CHAN_MAXF);
524 col1[1] += span->dsgdy * (1.0F / CHAN_MAXF);
525 col1[2] += span->dsbdy * (1.0F / CHAN_MAXF);
526 col1[3] += 0.0; /*XXX fix */
527 }
528 }
529 if (program->InputsRead & (1 << FRAG_ATTRIB_FOGC)) {
530 GLfloat *fogc = (GLfloat*) machine->Inputs[FRAG_ATTRIB_FOGC];
531 if (xOrY == 'X') {
532 fogc[0] += span->dfogdx;
533 }
534 else {
535 fogc[0] += span->dfogdy;
536 }
537 }
538 for (u = 0; u < ctx->Const.MaxTextureCoordUnits; u++) {
539 if (program->InputsRead & (1 << (FRAG_ATTRIB_TEX0 + u))) {
540 GLfloat *tex = (GLfloat*) machine->Inputs[FRAG_ATTRIB_TEX0 + u];
541 if (xOrY == 'X') {
542 tex[0] += span->texStepX[u][0];
543 tex[1] += span->texStepX[u][1];
544 tex[2] += span->texStepX[u][2];
545 tex[3] += span->texStepX[u][3];
546 }
547 else {
548 tex[0] += span->texStepY[u][0];
549 tex[1] += span->texStepY[u][1];
550 tex[2] += span->texStepY[u][2];
551 tex[3] += span->texStepY[u][3];
552 }
553 }
554 }
555
556 /* init condition codes */
557 dMachine->CondCodes[0] = COND_EQ;
558 dMachine->CondCodes[1] = COND_EQ;
559 dMachine->CondCodes[2] = COND_EQ;
560 dMachine->CondCodes[3] = COND_EQ;
561 }
562
563
564 /**
565 * Execute the given vertex program.
566 * NOTE: we do everything in single-precision floating point; we don't
567 * currently observe the single/half/fixed-precision qualifiers.
568 * \param ctx - rendering context
569 * \param program - the fragment program to execute
570 * \param machine - machine state (register file)
571 * \param maxInst - max number of instructions to execute
572 * \return GL_TRUE if program completed or GL_FALSE if program executed KIL.
573 */
574 static GLboolean
575 execute_program( GLcontext *ctx,
576 const struct fragment_program *program, GLuint maxInst,
577 struct fp_machine *machine, const struct sw_span *span,
578 GLuint column )
579 {
580 GLuint pc;
581
582 #if DEBUG_FRAG
583 printf("execute fragment program --------------------\n");
584 #endif
585
586 for (pc = 0; pc < maxInst; pc++) {
587 const struct fp_instruction *inst = program->Instructions + pc;
588
589 if (ctx->FragmentProgram.CallbackEnabled &&
590 ctx->FragmentProgram.Callback) {
591 ctx->FragmentProgram.CurrentPosition = inst->StringPos;
592 ctx->FragmentProgram.Callback(program->Base.Target,
593 ctx->FragmentProgram.CallbackData);
594 }
595
596 switch (inst->Opcode) {
597 case FP_OPCODE_ADD:
598 {
599 GLfloat a[4], b[4], result[4];
600 fetch_vector4( ctx, &inst->SrcReg[0], machine, program, a );
601 fetch_vector4( ctx, &inst->SrcReg[1], machine, program, b );
602 result[0] = a[0] + b[0];
603 result[1] = a[1] + b[1];
604 result[2] = a[2] + b[2];
605 result[3] = a[3] + b[3];
606 store_vector4( inst, machine, result );
607 }
608 break;
609 case FP_OPCODE_COS:
610 {
611 GLfloat a[4], result[4];
612 fetch_vector1( ctx, &inst->SrcReg[0], machine, program, a );
613 result[0] = result[1] = result[2] = result[3] = (GLfloat)_mesa_cos(a[0]);
614 store_vector4( inst, machine, result );
615 }
616 break;
617 case FP_OPCODE_DDX: /* Partial derivative with respect to X */
618 {
619 GLfloat a[4], aNext[4], result[4];
620 struct fp_machine dMachine;
621 if (!fetch_vector4_deriv(&inst->SrcReg[0], span, 'X', result)) {
622 /* This is tricky. Make a copy of the current machine state,
623 * increment the input registers by the dx or dy partial
624 * derivatives, then re-execute the program up to the
625 * preceeding instruction, then fetch the source register.
626 * Finally, find the difference in the register values for
627 * the original and derivative runs.
628 */
629 fetch_vector4( ctx, &inst->SrcReg[0], machine, program, a);
630 init_machine_deriv(ctx, machine, program, span,
631 'X', &dMachine);
632 execute_program(ctx, program, pc, &dMachine, span, column);
633 fetch_vector4( ctx, &inst->SrcReg[0], &dMachine, program, aNext );
634 result[0] = aNext[0] - a[0];
635 result[1] = aNext[1] - a[1];
636 result[2] = aNext[2] - a[2];
637 result[3] = aNext[3] - a[3];
638 }
639 store_vector4( inst, machine, result );
640 }
641 break;
642 case FP_OPCODE_DDY: /* Partial derivative with respect to Y */
643 {
644 GLfloat a[4], aNext[4], result[4];
645 struct fp_machine dMachine;
646 if (!fetch_vector4_deriv(&inst->SrcReg[0], span, 'Y', result)) {
647 init_machine_deriv(ctx, machine, program, span,
648 'Y', &dMachine);
649 fetch_vector4( ctx, &inst->SrcReg[0], machine, program, a);
650 execute_program(ctx, program, pc, &dMachine, span, column);
651 fetch_vector4( ctx, &inst->SrcReg[0], &dMachine, program, aNext );
652 result[0] = aNext[0] - a[0];
653 result[1] = aNext[1] - a[1];
654 result[2] = aNext[2] - a[2];
655 result[3] = aNext[3] - a[3];
656 }
657 store_vector4( inst, machine, result );
658 }
659 break;
660 case FP_OPCODE_DP3:
661 {
662 GLfloat a[4], b[4], result[4];
663 fetch_vector4( ctx, &inst->SrcReg[0], machine, program, a );
664 fetch_vector4( ctx, &inst->SrcReg[1], machine, program, b );
665 result[0] = result[1] = result[2] = result[3] =
666 a[0] * b[0] + a[1] * b[1] + a[2] * b[2];
667 store_vector4( inst, machine, result );
668 #if DEBUG_FRAG
669 printf("DP3 %g = (%g %g %g) . (%g %g %g)\n",
670 result[0], a[0], a[1], a[2], b[0], b[1], b[2]);
671 #endif
672 }
673 break;
674 case FP_OPCODE_DP4:
675 {
676 GLfloat a[4], b[4], result[4];
677 fetch_vector4( ctx, &inst->SrcReg[0], machine, program, a );
678 fetch_vector4( ctx, &inst->SrcReg[1], machine, program, b );
679 result[0] = result[1] = result[2] = result[3] =
680 a[0] * b[0] + a[1] * b[1] + a[2] * b[2] + a[3] * b[3];
681 store_vector4( inst, machine, result );
682 }
683 break;
684 case FP_OPCODE_DST: /* Distance vector */
685 {
686 GLfloat a[4], b[4], result[4];
687 fetch_vector4( ctx, &inst->SrcReg[0], machine, program, a );
688 fetch_vector4( ctx, &inst->SrcReg[1], machine, program, b );
689 result[0] = 1.0F;
690 result[1] = a[1] * b[1];
691 result[2] = a[2];
692 result[3] = b[3];
693 store_vector4( inst, machine, result );
694 }
695 break;
696 case FP_OPCODE_EX2: /* Exponential base 2 */
697 {
698 GLfloat a[4], result[4];
699 fetch_vector1( ctx, &inst->SrcReg[0], machine, program, a );
700 result[0] = result[1] = result[2] = result[3] =
701 (GLfloat) _mesa_pow(2.0, a[0]);
702 store_vector4( inst, machine, result );
703 }
704 break;
705 case FP_OPCODE_FLR:
706 {
707 GLfloat a[4], result[4];
708 fetch_vector4( ctx, &inst->SrcReg[0], machine, program, a );
709 result[0] = FLOORF(a[0]);
710 result[1] = FLOORF(a[1]);
711 result[2] = FLOORF(a[2]);
712 result[3] = FLOORF(a[3]);
713 store_vector4( inst, machine, result );
714 }
715 break;
716 case FP_OPCODE_FRC:
717 {
718 GLfloat a[4], result[4];
719 fetch_vector4( ctx, &inst->SrcReg[0], machine, program, a );
720 result[0] = a[0] - FLOORF(a[0]);
721 result[1] = a[1] - FLOORF(a[1]);
722 result[2] = a[2] - FLOORF(a[2]);
723 result[3] = a[3] - FLOORF(a[3]);
724 store_vector4( inst, machine, result );
725 }
726 break;
727 case FP_OPCODE_KIL:
728 {
729 const GLuint *swizzle = inst->DstReg.CondSwizzle;
730 const GLuint condMask = inst->DstReg.CondMask;
731 if (test_cc(machine->CondCodes[swizzle[0]], condMask) ||
732 test_cc(machine->CondCodes[swizzle[1]], condMask) ||
733 test_cc(machine->CondCodes[swizzle[2]], condMask) ||
734 test_cc(machine->CondCodes[swizzle[3]], condMask)) {
735 return GL_FALSE;
736 }
737 }
738 break;
739 case FP_OPCODE_LG2: /* log base 2 */
740 {
741 GLfloat a[4], result[4];
742 fetch_vector1( ctx, &inst->SrcReg[0], machine, program, a );
743 result[0] = result[1] = result[2] = result[3]
744 = LOG2(a[0]);
745 store_vector4( inst, machine, result );
746 }
747 break;
748 case FP_OPCODE_LIT:
749 {
750 GLfloat a[4], result[4];
751 fetch_vector4( ctx, &inst->SrcReg[0], machine, program, a );
752 if (a[0] < 0.0F)
753 a[0] = 0.0F;
754 if (a[1] < 0.0F)
755 a[1] = 0.0F;
756 result[0] = 1.0F;
757 result[1] = a[0];
758 result[2] = (a[0] > 0.0F) ? (GLfloat)_mesa_pow(2.0, a[3]) : 0.0F;
759 result[3] = 1.0F;
760 store_vector4( inst, machine, result );
761 }
762 break;
763 case FP_OPCODE_LRP:
764 {
765 GLfloat a[4], b[4], c[4], result[4];
766 fetch_vector4( ctx, &inst->SrcReg[0], machine, program, a );
767 fetch_vector4( ctx, &inst->SrcReg[1], machine, program, b );
768 fetch_vector4( ctx, &inst->SrcReg[2], machine, program, c );
769 result[0] = a[0] * b[0] + (1.0F - a[0]) * c[0];
770 result[1] = a[1] * b[1] + (1.0F - a[1]) * c[1];
771 result[2] = a[2] * b[2] + (1.0F - a[2]) * c[2];
772 result[3] = a[3] * b[3] + (1.0F - a[3]) * c[3];
773 store_vector4( inst, machine, result );
774 }
775 break;
776 case FP_OPCODE_MAD:
777 {
778 GLfloat a[4], b[4], c[4], result[4];
779 fetch_vector4( ctx, &inst->SrcReg[0], machine, program, a );
780 fetch_vector4( ctx, &inst->SrcReg[1], machine, program, b );
781 fetch_vector4( ctx, &inst->SrcReg[2], machine, program, c );
782 result[0] = a[0] * b[0] + c[0];
783 result[1] = a[1] * b[1] + c[1];
784 result[2] = a[2] * b[2] + c[2];
785 result[3] = a[3] * b[3] + c[3];
786 store_vector4( inst, machine, result );
787 }
788 break;
789 case FP_OPCODE_MAX:
790 {
791 GLfloat a[4], b[4], result[4];
792 fetch_vector4( ctx, &inst->SrcReg[0], machine, program, a );
793 fetch_vector4( ctx, &inst->SrcReg[1], machine, program, b );
794 result[0] = MAX2(a[0], b[0]);
795 result[1] = MAX2(a[1], b[1]);
796 result[2] = MAX2(a[2], b[2]);
797 result[3] = MAX2(a[3], b[3]);
798 store_vector4( inst, machine, result );
799 }
800 break;
801 case FP_OPCODE_MIN:
802 {
803 GLfloat a[4], b[4], result[4];
804 fetch_vector4( ctx, &inst->SrcReg[0], machine, program, a );
805 fetch_vector4( ctx, &inst->SrcReg[1], machine, program, b );
806 result[0] = MIN2(a[0], b[0]);
807 result[1] = MIN2(a[1], b[1]);
808 result[2] = MIN2(a[2], b[2]);
809 result[3] = MIN2(a[3], b[3]);
810 store_vector4( inst, machine, result );
811 }
812 break;
813 case FP_OPCODE_MOV:
814 {
815 GLfloat result[4];
816 fetch_vector4( ctx, &inst->SrcReg[0], machine, program, result );
817 store_vector4( inst, machine, result );
818 }
819 break;
820 case FP_OPCODE_MUL:
821 {
822 GLfloat a[4], b[4], result[4];
823 fetch_vector4( ctx, &inst->SrcReg[0], machine, program, a );
824 fetch_vector4( ctx, &inst->SrcReg[1], machine, program, b );
825 result[0] = a[0] * b[0];
826 result[1] = a[1] * b[1];
827 result[2] = a[2] * b[2];
828 result[3] = a[3] * b[3];
829 store_vector4( inst, machine, result );
830 #if DEBUG_FRAG
831 printf("MUL (%g %g %g %g) = (%g %g %g %g) * (%g %g %g %g)\n",
832 result[0], result[1], result[2], result[3],
833 a[0], a[1], a[2], a[3],
834 b[0], b[1], b[2], b[3]);
835 #endif
836 }
837 break;
838 case FP_OPCODE_PK2H: /* pack two 16-bit floats */
839 /* XXX this is probably wrong */
840 {
841 GLfloat a[4], result[4];
842 const GLuint *rawBits = (const GLuint *) a;
843 GLuint *rawResult = (GLuint *) result;
844 fetch_vector4( ctx, &inst->SrcReg[0], machine, program, a );
845 rawResult[0] = rawResult[1] = rawResult[2] = rawResult[3]
846 = rawBits[0] | (rawBits[1] << 16);
847 store_vector4( inst, machine, result );
848 }
849 break;
850 case FP_OPCODE_PK2US: /* pack two GLushorts */
851 {
852 GLfloat a[4], result[4];
853 GLuint usx, usy, *rawResult = (GLuint *) result;
854 fetch_vector4( ctx, &inst->SrcReg[0], machine, program, a );
855 a[0] = CLAMP(a[0], 0.0F, 1.0F);
856 a[1] = CLAMP(a[0], 0.0F, 1.0F);
857 usx = IROUND(a[0] * 65535.0F);
858 usy = IROUND(a[1] * 65535.0F);
859 rawResult[0] = rawResult[1] = rawResult[2] = rawResult[3]
860 = usx | (usy << 16);
861 store_vector4( inst, machine, result );
862 }
863 break;
864 case FP_OPCODE_PK4B: /* pack four GLbytes */
865 {
866 GLfloat a[4], result[4];
867 GLuint ubx, uby, ubz, ubw, *rawResult = (GLuint *) result;
868 fetch_vector4( ctx, &inst->SrcReg[0], machine, program, a );
869 a[0] = CLAMP(a[0], -128.0F / 127.0F, 1.0F);
870 a[1] = CLAMP(a[1], -128.0F / 127.0F, 1.0F);
871 a[2] = CLAMP(a[2], -128.0F / 127.0F, 1.0F);
872 a[3] = CLAMP(a[3], -128.0F / 127.0F, 1.0F);
873 ubx = IROUND(127.0F * a[0] + 128.0F);
874 uby = IROUND(127.0F * a[1] + 128.0F);
875 ubz = IROUND(127.0F * a[2] + 128.0F);
876 ubw = IROUND(127.0F * a[3] + 128.0F);
877 rawResult[0] = rawResult[1] = rawResult[2] = rawResult[3]
878 = ubx | (uby << 8) | (ubz << 16) | (ubw << 24);
879 store_vector4( inst, machine, result );
880 }
881 break;
882 case FP_OPCODE_PK4UB: /* pack four GLubytes */
883 {
884 GLfloat a[4], result[4];
885 GLuint ubx, uby, ubz, ubw, *rawResult = (GLuint *) result;
886 fetch_vector4( ctx, &inst->SrcReg[0], machine, program, a );
887 a[0] = CLAMP(a[0], 0.0F, 1.0F);
888 a[1] = CLAMP(a[1], 0.0F, 1.0F);
889 a[2] = CLAMP(a[2], 0.0F, 1.0F);
890 a[3] = CLAMP(a[3], 0.0F, 1.0F);
891 ubx = IROUND(255.0F * a[0]);
892 uby = IROUND(255.0F * a[1]);
893 ubz = IROUND(255.0F * a[2]);
894 ubw = IROUND(255.0F * a[3]);
895 rawResult[0] = rawResult[1] = rawResult[2] = rawResult[3]
896 = ubx | (uby << 8) | (ubz << 16) | (ubw << 24);
897 store_vector4( inst, machine, result );
898 }
899 break;
900 case FP_OPCODE_POW:
901 {
902 GLfloat a[4], b[4], result[4];
903 fetch_vector1( ctx, &inst->SrcReg[0], machine, program, a );
904 fetch_vector1( ctx, &inst->SrcReg[1], machine, program, b );
905 result[0] = result[1] = result[2] = result[3]
906 = (GLfloat)_mesa_pow(a[0], b[0]);
907 store_vector4( inst, machine, result );
908 }
909 break;
910 case FP_OPCODE_RCP:
911 {
912 GLfloat a[4], result[4];
913 fetch_vector1( ctx, &inst->SrcReg[0], machine, program, a );
914 #if DEBUG_FRAG
915 if (a[0] == 0)
916 printf("RCP(0)\n");
917 else if (IS_INF_OR_NAN(a[0]))
918 printf("RCP(inf)\n");
919 #endif
920 result[0] = result[1] = result[2] = result[3]
921 = 1.0F / a[0];
922 store_vector4( inst, machine, result );
923 }
924 break;
925 case FP_OPCODE_RFL:
926 {
927 GLfloat axis[4], dir[4], result[4], tmp[4];
928 fetch_vector4( ctx, &inst->SrcReg[0], machine, program, axis );
929 fetch_vector4( ctx, &inst->SrcReg[1], machine, program, dir );
930 tmp[3] = axis[0] * axis[0]
931 + axis[1] * axis[1]
932 + axis[2] * axis[2];
933 tmp[0] = (2.0F * (axis[0] * dir[0] +
934 axis[1] * dir[1] +
935 axis[2] * dir[2])) / tmp[3];
936 result[0] = tmp[0] * axis[0] - dir[0];
937 result[1] = tmp[0] * axis[1] - dir[1];
938 result[2] = tmp[0] * axis[2] - dir[2];
939 /* result[3] is never written! XXX enforce in parser! */
940 store_vector4( inst, machine, result );
941 }
942 break;
943 case FP_OPCODE_RSQ: /* 1 / sqrt() */
944 {
945 GLfloat a[4], result[4];
946 fetch_vector1( ctx, &inst->SrcReg[0], machine, program, a );
947 result[0] = result[1] = result[2] = result[3] = INV_SQRTF(a[0]);
948 store_vector4( inst, machine, result );
949 #if DEBUG_FRAG
950 printf("RSQ %g = 1/sqrt(%g)\n", result[0], a[0]);
951 #endif
952 }
953 break;
954 case FP_OPCODE_SEQ: /* set on equal */
955 {
956 GLfloat a[4], b[4], result[4];
957 fetch_vector4( ctx, &inst->SrcReg[0], machine, program, a );
958 fetch_vector4( ctx, &inst->SrcReg[1], machine, program, b );
959 result[0] = (a[0] == b[0]) ? 1.0F : 0.0F;
960 result[1] = (a[1] == b[1]) ? 1.0F : 0.0F;
961 result[2] = (a[2] == b[2]) ? 1.0F : 0.0F;
962 result[3] = (a[3] == b[3]) ? 1.0F : 0.0F;
963 store_vector4( inst, machine, result );
964 }
965 break;
966 case FP_OPCODE_SFL: /* set false, operands ignored */
967 {
968 static const GLfloat result[4] = { 0.0F, 0.0F, 0.0F, 0.0F };
969 store_vector4( inst, machine, result );
970 }
971 break;
972 case FP_OPCODE_SGE: /* set on greater or equal */
973 {
974 GLfloat a[4], b[4], result[4];
975 fetch_vector4( ctx, &inst->SrcReg[0], machine, program, a );
976 fetch_vector4( ctx, &inst->SrcReg[1], machine, program, b );
977 result[0] = (a[0] >= b[0]) ? 1.0F : 0.0F;
978 result[1] = (a[1] >= b[1]) ? 1.0F : 0.0F;
979 result[2] = (a[2] >= b[2]) ? 1.0F : 0.0F;
980 result[3] = (a[3] >= b[3]) ? 1.0F : 0.0F;
981 store_vector4( inst, machine, result );
982 }
983 break;
984 case FP_OPCODE_SGT: /* set on greater */
985 {
986 GLfloat a[4], b[4], result[4];
987 fetch_vector4( ctx, &inst->SrcReg[0], machine, program, a );
988 fetch_vector4( ctx, &inst->SrcReg[1], machine, program, b );
989 result[0] = (a[0] > b[0]) ? 1.0F : 0.0F;
990 result[1] = (a[1] > b[1]) ? 1.0F : 0.0F;
991 result[2] = (a[2] > b[2]) ? 1.0F : 0.0F;
992 result[3] = (a[3] > b[3]) ? 1.0F : 0.0F;
993 store_vector4( inst, machine, result );
994 }
995 break;
996 case FP_OPCODE_SIN:
997 {
998 GLfloat a[4], result[4];
999 fetch_vector1( ctx, &inst->SrcReg[0], machine, program, a );
1000 result[0] = result[1] = result[2] =
1001 result[3] = (GLfloat)_mesa_sin(a[0]);
1002 store_vector4( inst, machine, result );
1003 }
1004 break;
1005 case FP_OPCODE_SLE: /* set on less or equal */
1006 {
1007 GLfloat a[4], b[4], result[4];
1008 fetch_vector4( ctx, &inst->SrcReg[0], machine, program, a );
1009 fetch_vector4( ctx, &inst->SrcReg[1], machine, program, b );
1010 result[0] = (a[0] <= b[0]) ? 1.0F : 0.0F;
1011 result[1] = (a[1] <= b[1]) ? 1.0F : 0.0F;
1012 result[2] = (a[2] <= b[2]) ? 1.0F : 0.0F;
1013 result[3] = (a[3] <= b[3]) ? 1.0F : 0.0F;
1014 store_vector4( inst, machine, result );
1015 }
1016 break;
1017 case FP_OPCODE_SLT: /* set on less */
1018 {
1019 GLfloat a[4], b[4], result[4];
1020 fetch_vector4( ctx, &inst->SrcReg[0], machine, program, a );
1021 fetch_vector4( ctx, &inst->SrcReg[1], machine, program, b );
1022 result[0] = (a[0] < b[0]) ? 1.0F : 0.0F;
1023 result[1] = (a[1] < b[1]) ? 1.0F : 0.0F;
1024 result[2] = (a[2] < b[2]) ? 1.0F : 0.0F;
1025 result[3] = (a[3] < b[3]) ? 1.0F : 0.0F;
1026 store_vector4( inst, machine, result );
1027 }
1028 break;
1029 case FP_OPCODE_SNE: /* set on not equal */
1030 {
1031 GLfloat a[4], b[4], result[4];
1032 fetch_vector4( ctx, &inst->SrcReg[0], machine, program, a );
1033 fetch_vector4( ctx, &inst->SrcReg[1], machine, program, b );
1034 result[0] = (a[0] != b[0]) ? 1.0F : 0.0F;
1035 result[1] = (a[1] != b[1]) ? 1.0F : 0.0F;
1036 result[2] = (a[2] != b[2]) ? 1.0F : 0.0F;
1037 result[3] = (a[3] != b[3]) ? 1.0F : 0.0F;
1038 store_vector4( inst, machine, result );
1039 }
1040 break;
1041 case FP_OPCODE_STR: /* set true, operands ignored */
1042 {
1043 static const GLfloat result[4] = { 1.0F, 1.0F, 1.0F, 1.0F };
1044 store_vector4( inst, machine, result );
1045 }
1046 break;
1047 case FP_OPCODE_SUB:
1048 {
1049 GLfloat a[4], b[4], result[4];
1050 fetch_vector4( ctx, &inst->SrcReg[0], machine, program, a );
1051 fetch_vector4( ctx, &inst->SrcReg[1], machine, program, b );
1052 result[0] = a[0] - b[0];
1053 result[1] = a[1] - b[1];
1054 result[2] = a[2] - b[2];
1055 result[3] = a[3] - b[3];
1056 store_vector4( inst, machine, result );
1057 }
1058 break;
1059 case FP_OPCODE_TEX:
1060 /* Texel lookup */
1061 {
1062 GLfloat texcoord[4], color[4];
1063 fetch_vector4( ctx, &inst->SrcReg[0], machine, program, texcoord );
1064 /* XXX: Undo perspective divide from interpolate_texcoords() */
1065 fetch_texel( ctx, texcoord,
1066 span->array->lambda[inst->TexSrcUnit][column],
1067 inst->TexSrcUnit, color );
1068 store_vector4( inst, machine, color );
1069 }
1070 break;
1071 case FP_OPCODE_TXD:
1072 /* Texture lookup w/ partial derivatives for LOD */
1073 {
1074 GLfloat texcoord[4], dtdx[4], dtdy[4], color[4];
1075 fetch_vector4( ctx, &inst->SrcReg[0], machine, program, texcoord );
1076 fetch_vector4( ctx, &inst->SrcReg[1], machine, program, dtdx );
1077 fetch_vector4( ctx, &inst->SrcReg[2], machine, program, dtdy );
1078 fetch_texel_deriv( ctx, texcoord, dtdx, dtdy, inst->TexSrcUnit,
1079 color );
1080 store_vector4( inst, machine, color );
1081 }
1082 break;
1083 case FP_OPCODE_TXP:
1084 /* Texture lookup w/ perspective divide */
1085 {
1086 GLfloat texcoord[4], color[4];
1087 fetch_vector4( ctx, &inst->SrcReg[0], machine, program, texcoord );
1088 /* Already did perspective divide in interpolate_texcoords() */
1089 fetch_texel( ctx, texcoord,
1090 span->array->lambda[inst->TexSrcUnit][column],
1091 inst->TexSrcUnit, color );
1092 store_vector4( inst, machine, color );
1093 }
1094 break;
1095 case FP_OPCODE_UP2H: /* unpack two 16-bit floats */
1096 /* XXX this is probably wrong */
1097 {
1098 GLfloat a[4], result[4];
1099 const GLuint *rawBits = (const GLuint *) a;
1100 GLuint *rawResult = (GLuint *) result;
1101 fetch_vector1( ctx, &inst->SrcReg[0], machine, program, a );
1102 rawResult[0] = rawBits[0] & 0xffff;
1103 rawResult[1] = (rawBits[0] >> 16) & 0xffff;
1104 rawResult[2] = rawBits[0] & 0xffff;
1105 rawResult[3] = (rawBits[0] >> 16) & 0xffff;
1106 store_vector4( inst, machine, result );
1107 }
1108 break;
1109 case FP_OPCODE_UP2US: /* unpack two GLushorts */
1110 {
1111 GLfloat a[4], result[4];
1112 const GLuint *rawBits = (const GLuint *) a;
1113 fetch_vector1( ctx, &inst->SrcReg[0], machine, program, a );
1114 result[0] = (GLfloat) ((rawBits[0] >> 0) & 0xffff) / 65535.0F;
1115 result[1] = (GLfloat) ((rawBits[0] >> 16) & 0xffff) / 65535.0F;
1116 result[2] = result[0];
1117 result[3] = result[1];
1118 store_vector4( inst, machine, result );
1119 }
1120 break;
1121 case FP_OPCODE_UP4B: /* unpack four GLbytes */
1122 {
1123 GLfloat a[4], result[4];
1124 const GLuint *rawBits = (const GLuint *) a;
1125 fetch_vector1( ctx, &inst->SrcReg[0], machine, program, a );
1126 result[0] = (((rawBits[0] >> 0) & 0xff) - 128) / 127.0F;
1127 result[0] = (((rawBits[0] >> 8) & 0xff) - 128) / 127.0F;
1128 result[0] = (((rawBits[0] >> 16) & 0xff) - 128) / 127.0F;
1129 result[0] = (((rawBits[0] >> 24) & 0xff) - 128) / 127.0F;
1130 store_vector4( inst, machine, result );
1131 }
1132 break;
1133 case FP_OPCODE_UP4UB: /* unpack four GLubytes */
1134 {
1135 GLfloat a[4], result[4];
1136 const GLuint *rawBits = (const GLuint *) a;
1137 fetch_vector1( ctx, &inst->SrcReg[0], machine, program, a );
1138 result[0] = ((rawBits[0] >> 0) & 0xff) / 255.0F;
1139 result[0] = ((rawBits[0] >> 8) & 0xff) / 255.0F;
1140 result[0] = ((rawBits[0] >> 16) & 0xff) / 255.0F;
1141 result[0] = ((rawBits[0] >> 24) & 0xff) / 255.0F;
1142 store_vector4( inst, machine, result );
1143 }
1144 break;
1145 case FP_OPCODE_X2D: /* 2-D matrix transform */
1146 {
1147 GLfloat a[4], b[4], c[4], result[4];
1148 fetch_vector4( ctx, &inst->SrcReg[0], machine, program, a );
1149 fetch_vector4( ctx, &inst->SrcReg[1], machine, program, b );
1150 fetch_vector4( ctx, &inst->SrcReg[2], machine, program, c );
1151 result[0] = a[0] + b[0] * c[0] + b[1] * c[1];
1152 result[1] = a[1] + b[0] * c[2] + b[1] * c[3];
1153 result[2] = a[2] + b[0] * c[0] + b[1] * c[1];
1154 result[3] = a[3] + b[0] * c[2] + b[1] * c[3];
1155 store_vector4( inst, machine, result );
1156 }
1157 break;
1158 case FP_OPCODE_END:
1159 return GL_TRUE;
1160 default:
1161 _mesa_problem(ctx, "Bad opcode %d in _mesa_exec_fragment_program",
1162 inst->Opcode);
1163 return GL_TRUE; /* return value doesn't matter */
1164 }
1165 }
1166 return GL_TRUE;
1167 }
1168
1169
1170 static void
1171 init_machine( GLcontext *ctx, struct fp_machine *machine,
1172 const struct fragment_program *program,
1173 const struct sw_span *span, GLuint col )
1174 {
1175 GLuint inputsRead = program->InputsRead;
1176 GLuint u;
1177
1178 if (ctx->FragmentProgram.CallbackEnabled)
1179 inputsRead = ~0;
1180
1181 /* Clear temporary registers */
1182 _mesa_bzero(machine->Temporaries,
1183 MAX_NV_FRAGMENT_PROGRAM_TEMPS * 4 * sizeof(GLfloat));
1184
1185 /* Load input registers */
1186 if (inputsRead & (1 << FRAG_ATTRIB_WPOS)) {
1187 GLfloat *wpos = machine->Inputs[FRAG_ATTRIB_WPOS];
1188 wpos[0] = (GLfloat) span->x + col;
1189 wpos[1] = (GLfloat) span->y;
1190 wpos[2] = (GLfloat) span->array->z[col] / ctx->DepthMaxF;
1191 wpos[3] = span->w + col * span->dwdx;
1192 }
1193 if (inputsRead & (1 << FRAG_ATTRIB_COL0)) {
1194 GLfloat *col0 = machine->Inputs[FRAG_ATTRIB_COL0];
1195 col0[0] = CHAN_TO_FLOAT(span->array->rgba[col][RCOMP]);
1196 col0[1] = CHAN_TO_FLOAT(span->array->rgba[col][GCOMP]);
1197 col0[2] = CHAN_TO_FLOAT(span->array->rgba[col][BCOMP]);
1198 col0[3] = CHAN_TO_FLOAT(span->array->rgba[col][ACOMP]);
1199 }
1200 if (inputsRead & (1 << FRAG_ATTRIB_COL1)) {
1201 GLfloat *col1 = machine->Inputs[FRAG_ATTRIB_COL1];
1202 col1[0] = CHAN_TO_FLOAT(span->array->spec[col][RCOMP]);
1203 col1[1] = CHAN_TO_FLOAT(span->array->spec[col][GCOMP]);
1204 col1[2] = CHAN_TO_FLOAT(span->array->spec[col][BCOMP]);
1205 col1[3] = CHAN_TO_FLOAT(span->array->spec[col][ACOMP]);
1206 }
1207 if (inputsRead & (1 << FRAG_ATTRIB_FOGC)) {
1208 GLfloat *fogc = machine->Inputs[FRAG_ATTRIB_FOGC];
1209 fogc[0] = span->array->fog[col];
1210 fogc[1] = 0.0F;
1211 fogc[2] = 0.0F;
1212 fogc[3] = 0.0F;
1213 }
1214 for (u = 0; u < ctx->Const.MaxTextureCoordUnits; u++) {
1215 if (inputsRead & (1 << (FRAG_ATTRIB_TEX0 + u))) {
1216 GLfloat *tex = machine->Inputs[FRAG_ATTRIB_TEX0 + u];
1217 /*ASSERT(ctx->Texture._EnabledCoordUnits & (1 << u));*/
1218 COPY_4V(tex, span->array->texcoords[u][col]);
1219 /*ASSERT(tex[0] != 0 || tex[1] != 0 || tex[2] != 0);*/
1220 }
1221 }
1222
1223 /* init condition codes */
1224 machine->CondCodes[0] = COND_EQ;
1225 machine->CondCodes[1] = COND_EQ;
1226 machine->CondCodes[2] = COND_EQ;
1227 machine->CondCodes[3] = COND_EQ;
1228 }
1229
1230
1231 void
1232 _swrast_exec_nv_fragment_program( GLcontext *ctx, struct sw_span *span )
1233 {
1234 const struct fragment_program *program = ctx->FragmentProgram.Current;
1235 GLuint i;
1236
1237 ctx->_CurrentProgram = GL_FRAGMENT_PROGRAM_ARB; /* or NV, doesn't matter */
1238
1239 for (i = 0; i < span->end; i++) {
1240 if (span->array->mask[i]) {
1241 init_machine(ctx, &ctx->FragmentProgram.Machine,
1242 ctx->FragmentProgram.Current, span, i);
1243
1244 if (!execute_program(ctx, program, ~0,
1245 &ctx->FragmentProgram.Machine, span, i)) {
1246 span->array->mask[i] = GL_FALSE; /* killed fragment */
1247 }
1248
1249 /* Store output registers */
1250 {
1251 const GLfloat *colOut
1252 = ctx->FragmentProgram.Machine.Outputs[FRAG_OUTPUT_COLR];
1253 UNCLAMPED_FLOAT_TO_CHAN(span->array->rgba[i][RCOMP], colOut[0]);
1254 UNCLAMPED_FLOAT_TO_CHAN(span->array->rgba[i][GCOMP], colOut[1]);
1255 UNCLAMPED_FLOAT_TO_CHAN(span->array->rgba[i][BCOMP], colOut[2]);
1256 UNCLAMPED_FLOAT_TO_CHAN(span->array->rgba[i][ACOMP], colOut[3]);
1257 }
1258 /* depth value */
1259 if (program->OutputsWritten & (1 << FRAG_OUTPUT_DEPR))
1260 span->array->z[i] = IROUND(ctx->FragmentProgram.Machine.Outputs[FRAG_OUTPUT_DEPR][0] * ctx->DepthMaxF);
1261 }
1262 }
1263
1264 ctx->_CurrentProgram = 0;
1265 }
1266