dfca960005cdd844e81061734c1fd93f9681a2c6
[mesa.git] / src / mesa / swrast / s_nvfragprog.c
1 /*
2 * Mesa 3-D graphics library
3 * Version: 6.5.2
4 *
5 * Copyright (C) 1999-2006 Brian Paul All Rights Reserved.
6 *
7 * Permission is hereby granted, free of charge, to any person obtaining a
8 * copy of this software and associated documentation files (the "Software"),
9 * to deal in the Software without restriction, including without limitation
10 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
11 * and/or sell copies of the Software, and to permit persons to whom the
12 * Software is furnished to do so, subject to the following conditions:
13 *
14 * The above copyright notice and this permission notice shall be included
15 * in all copies or substantial portions of the Software.
16 *
17 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
18 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
19 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
20 * BRIAN PAUL BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN
21 * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
22 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
23 */
24
25 /*
26 * Regarding GL_NV_fragment_program:
27 *
28 * Portions of this software may use or implement intellectual
29 * property owned and licensed by NVIDIA Corporation. NVIDIA disclaims
30 * any and all warranties with respect to such intellectual property,
31 * including any use thereof or modifications thereto.
32 */
33
34 #include "glheader.h"
35 #include "colormac.h"
36 #include "context.h"
37 #include "program_instruction.h"
38 #include "program.h"
39
40 #include "s_nvfragprog.h"
41 #include "s_span.h"
42
43
44 /* if 1, print some debugging info */
45 #define DEBUG_FRAG 0
46
47
48 /**
49 * Virtual machine state used during execution of a fragment programs.
50 */
51 struct fp_machine
52 {
53 GLfloat Temporaries[MAX_NV_FRAGMENT_PROGRAM_TEMPS][4];
54 GLfloat Inputs[MAX_NV_FRAGMENT_PROGRAM_INPUTS][4];
55 GLfloat Outputs[MAX_NV_FRAGMENT_PROGRAM_OUTPUTS][4];
56 GLuint CondCodes[4]; /**< COND_* value for x/y/z/w */
57 };
58
59
60 #if FEATURE_MESA_program_debug
61 static struct fp_machine *CurrentMachine = NULL;
62
63 /**
64 * For GL_MESA_program_debug.
65 * Return current value (4*GLfloat) of a fragment program register.
66 * Called via ctx->Driver.GetFragmentProgramRegister().
67 */
68 void
69 _swrast_get_program_register(GLcontext *ctx, enum register_file file,
70 GLuint index, GLfloat val[4])
71 {
72 if (CurrentMachine) {
73 switch (file) {
74 case PROGRAM_INPUT:
75 COPY_4V(val, CurrentMachine->Inputs[index]);
76 break;
77 case PROGRAM_OUTPUT:
78 COPY_4V(val, CurrentMachine->Outputs[index]);
79 break;
80 case PROGRAM_TEMPORARY:
81 COPY_4V(val, CurrentMachine->Temporaries[index]);
82 break;
83 default:
84 _mesa_problem(NULL,
85 "bad register file in _swrast_get_program_register");
86 }
87 }
88 }
89 #endif /* FEATURE_MESA_program_debug */
90
91
92 /**
93 * Fetch a texel.
94 */
95 static void
96 fetch_texel( GLcontext *ctx, const GLfloat texcoord[4], GLfloat lambda,
97 GLuint unit, GLfloat color[4] )
98 {
99 GLchan rgba[4];
100 SWcontext *swrast = SWRAST_CONTEXT(ctx);
101
102 /* XXX use a float-valued TextureSample routine here!!! */
103 swrast->TextureSample[unit](ctx, ctx->Texture.Unit[unit]._Current,
104 1, (const GLfloat (*)[4]) texcoord,
105 &lambda, &rgba);
106 color[0] = CHAN_TO_FLOAT(rgba[0]);
107 color[1] = CHAN_TO_FLOAT(rgba[1]);
108 color[2] = CHAN_TO_FLOAT(rgba[2]);
109 color[3] = CHAN_TO_FLOAT(rgba[3]);
110 }
111
112
113 /**
114 * Fetch a texel with the given partial derivatives to compute a level
115 * of detail in the mipmap.
116 */
117 static void
118 fetch_texel_deriv( GLcontext *ctx, const GLfloat texcoord[4],
119 const GLfloat texdx[4], const GLfloat texdy[4],
120 GLuint unit, GLfloat color[4] )
121 {
122 SWcontext *swrast = SWRAST_CONTEXT(ctx);
123 const struct gl_texture_object *texObj = ctx->Texture.Unit[unit]._Current;
124 const struct gl_texture_image *texImg = texObj->Image[0][texObj->BaseLevel];
125 const GLfloat texW = (GLfloat) texImg->WidthScale;
126 const GLfloat texH = (GLfloat) texImg->HeightScale;
127 GLchan rgba[4];
128
129 GLfloat lambda = _swrast_compute_lambda(texdx[0], texdy[0], /* ds/dx, ds/dy */
130 texdx[1], texdy[1], /* dt/dx, dt/dy */
131 texdx[3], texdy[2], /* dq/dx, dq/dy */
132 texW, texH,
133 texcoord[0], texcoord[1], texcoord[3],
134 1.0F / texcoord[3]);
135
136 swrast->TextureSample[unit](ctx, ctx->Texture.Unit[unit]._Current,
137 1, (const GLfloat (*)[4]) texcoord,
138 &lambda, &rgba);
139 color[0] = CHAN_TO_FLOAT(rgba[0]);
140 color[1] = CHAN_TO_FLOAT(rgba[1]);
141 color[2] = CHAN_TO_FLOAT(rgba[2]);
142 color[3] = CHAN_TO_FLOAT(rgba[3]);
143 }
144
145
146 /**
147 * Return a pointer to the 4-element float vector specified by the given
148 * source register.
149 */
150 static INLINE const GLfloat *
151 get_register_pointer( GLcontext *ctx,
152 const struct prog_src_register *source,
153 const struct fp_machine *machine,
154 const struct gl_fragment_program *program )
155 {
156 switch (source->File) {
157 case PROGRAM_TEMPORARY:
158 ASSERT(source->Index < MAX_NV_FRAGMENT_PROGRAM_TEMPS);
159 return machine->Temporaries[source->Index];
160 case PROGRAM_INPUT:
161 ASSERT(source->Index < MAX_NV_FRAGMENT_PROGRAM_INPUTS);
162 return machine->Inputs[source->Index];
163 case PROGRAM_OUTPUT:
164 /* This is only for PRINT */
165 ASSERT(source->Index < MAX_NV_FRAGMENT_PROGRAM_OUTPUTS);
166 return machine->Outputs[source->Index];
167 case PROGRAM_LOCAL_PARAM:
168 ASSERT(source->Index < MAX_PROGRAM_LOCAL_PARAMS);
169 return program->Base.LocalParams[source->Index];
170 case PROGRAM_ENV_PARAM:
171 ASSERT(source->Index < MAX_NV_FRAGMENT_PROGRAM_PARAMS);
172 return ctx->FragmentProgram.Parameters[source->Index];
173 case PROGRAM_STATE_VAR:
174 /* Fallthrough */
175 case PROGRAM_CONSTANT:
176 /* Fallthrough */
177 case PROGRAM_NAMED_PARAM:
178 ASSERT(source->Index < (GLint) program->Base.Parameters->NumParameters);
179 return program->Base.Parameters->ParameterValues[source->Index];
180 default:
181 _mesa_problem(ctx, "Invalid input register file %d in fetch_vector4",
182 source->File);
183 return NULL;
184 }
185 }
186
187
188 /**
189 * Fetch a 4-element float vector from the given source register.
190 * Apply swizzling and negating as needed.
191 */
192 static void
193 fetch_vector4( GLcontext *ctx,
194 const struct prog_src_register *source,
195 const struct fp_machine *machine,
196 const struct gl_fragment_program *program,
197 GLfloat result[4] )
198 {
199 const GLfloat *src = get_register_pointer(ctx, source, machine, program);
200 ASSERT(src);
201
202 if (source->Swizzle == MAKE_SWIZZLE4(SWIZZLE_X, SWIZZLE_Y,
203 SWIZZLE_Z, SWIZZLE_W)) {
204 /* no swizzling */
205 COPY_4V(result, src);
206 }
207 else {
208 result[0] = src[GET_SWZ(source->Swizzle, 0)];
209 result[1] = src[GET_SWZ(source->Swizzle, 1)];
210 result[2] = src[GET_SWZ(source->Swizzle, 2)];
211 result[3] = src[GET_SWZ(source->Swizzle, 3)];
212 }
213
214 if (source->NegateBase) {
215 result[0] = -result[0];
216 result[1] = -result[1];
217 result[2] = -result[2];
218 result[3] = -result[3];
219 }
220 if (source->Abs) {
221 result[0] = FABSF(result[0]);
222 result[1] = FABSF(result[1]);
223 result[2] = FABSF(result[2]);
224 result[3] = FABSF(result[3]);
225 }
226 if (source->NegateAbs) {
227 result[0] = -result[0];
228 result[1] = -result[1];
229 result[2] = -result[2];
230 result[3] = -result[3];
231 }
232 }
233
234
235 /**
236 * Fetch the derivative with respect to X for the given register.
237 * \return GL_TRUE if it was easily computed or GL_FALSE if we
238 * need to execute another instance of the program (ugh)!
239 */
240 static GLboolean
241 fetch_vector4_deriv( GLcontext *ctx,
242 const struct prog_src_register *source,
243 const SWspan *span,
244 char xOrY, GLint column, GLfloat result[4] )
245 {
246 GLfloat src[4];
247
248 ASSERT(xOrY == 'X' || xOrY == 'Y');
249
250 switch (source->Index) {
251 case FRAG_ATTRIB_WPOS:
252 if (xOrY == 'X') {
253 src[0] = 1.0;
254 src[1] = 0.0;
255 src[2] = span->dzdx / ctx->DrawBuffer->_DepthMaxF;
256 src[3] = span->dwdx;
257 }
258 else {
259 src[0] = 0.0;
260 src[1] = 1.0;
261 src[2] = span->dzdy / ctx->DrawBuffer->_DepthMaxF;
262 src[3] = span->dwdy;
263 }
264 break;
265 case FRAG_ATTRIB_COL0:
266 if (xOrY == 'X') {
267 src[0] = span->drdx * (1.0F / CHAN_MAXF);
268 src[1] = span->dgdx * (1.0F / CHAN_MAXF);
269 src[2] = span->dbdx * (1.0F / CHAN_MAXF);
270 src[3] = span->dadx * (1.0F / CHAN_MAXF);
271 }
272 else {
273 src[0] = span->drdy * (1.0F / CHAN_MAXF);
274 src[1] = span->dgdy * (1.0F / CHAN_MAXF);
275 src[2] = span->dbdy * (1.0F / CHAN_MAXF);
276 src[3] = span->dady * (1.0F / CHAN_MAXF);
277 }
278 break;
279 case FRAG_ATTRIB_COL1:
280 if (xOrY == 'X') {
281 src[0] = span->dsrdx * (1.0F / CHAN_MAXF);
282 src[1] = span->dsgdx * (1.0F / CHAN_MAXF);
283 src[2] = span->dsbdx * (1.0F / CHAN_MAXF);
284 src[3] = 0.0; /* XXX need this */
285 }
286 else {
287 src[0] = span->dsrdy * (1.0F / CHAN_MAXF);
288 src[1] = span->dsgdy * (1.0F / CHAN_MAXF);
289 src[2] = span->dsbdy * (1.0F / CHAN_MAXF);
290 src[3] = 0.0; /* XXX need this */
291 }
292 break;
293 case FRAG_ATTRIB_FOGC:
294 if (xOrY == 'X') {
295 src[0] = span->dfogdx;
296 src[1] = 0.0;
297 src[2] = 0.0;
298 src[3] = 0.0;
299 }
300 else {
301 src[0] = span->dfogdy;
302 src[1] = 0.0;
303 src[2] = 0.0;
304 src[3] = 0.0;
305 }
306 break;
307 case FRAG_ATTRIB_TEX0:
308 case FRAG_ATTRIB_TEX1:
309 case FRAG_ATTRIB_TEX2:
310 case FRAG_ATTRIB_TEX3:
311 case FRAG_ATTRIB_TEX4:
312 case FRAG_ATTRIB_TEX5:
313 case FRAG_ATTRIB_TEX6:
314 case FRAG_ATTRIB_TEX7:
315 if (xOrY == 'X') {
316 const GLuint u = source->Index - FRAG_ATTRIB_TEX0;
317 /* this is a little tricky - I think I've got it right */
318 const GLfloat invQ = 1.0f / (span->tex[u][3]
319 + span->texStepX[u][3] * column);
320 src[0] = span->texStepX[u][0] * invQ;
321 src[1] = span->texStepX[u][1] * invQ;
322 src[2] = span->texStepX[u][2] * invQ;
323 src[3] = span->texStepX[u][3] * invQ;
324 }
325 else {
326 const GLuint u = source->Index - FRAG_ATTRIB_TEX0;
327 /* Tricky, as above, but in Y direction */
328 const GLfloat invQ = 1.0f / (span->tex[u][3] + span->texStepY[u][3]);
329 src[0] = span->texStepY[u][0] * invQ;
330 src[1] = span->texStepY[u][1] * invQ;
331 src[2] = span->texStepY[u][2] * invQ;
332 src[3] = span->texStepY[u][3] * invQ;
333 }
334 break;
335 default:
336 return GL_FALSE;
337 }
338
339 result[0] = src[GET_SWZ(source->Swizzle, 0)];
340 result[1] = src[GET_SWZ(source->Swizzle, 1)];
341 result[2] = src[GET_SWZ(source->Swizzle, 2)];
342 result[3] = src[GET_SWZ(source->Swizzle, 3)];
343
344 if (source->NegateBase) {
345 result[0] = -result[0];
346 result[1] = -result[1];
347 result[2] = -result[2];
348 result[3] = -result[3];
349 }
350 if (source->Abs) {
351 result[0] = FABSF(result[0]);
352 result[1] = FABSF(result[1]);
353 result[2] = FABSF(result[2]);
354 result[3] = FABSF(result[3]);
355 }
356 if (source->NegateAbs) {
357 result[0] = -result[0];
358 result[1] = -result[1];
359 result[2] = -result[2];
360 result[3] = -result[3];
361 }
362 return GL_TRUE;
363 }
364
365
366 /**
367 * As above, but only return result[0] element.
368 */
369 static void
370 fetch_vector1( GLcontext *ctx,
371 const struct prog_src_register *source,
372 const struct fp_machine *machine,
373 const struct gl_fragment_program *program,
374 GLfloat result[4] )
375 {
376 const GLfloat *src = get_register_pointer(ctx, source, machine, program);
377 ASSERT(src);
378
379 result[0] = src[GET_SWZ(source->Swizzle, 0)];
380
381 if (source->NegateBase) {
382 result[0] = -result[0];
383 }
384 if (source->Abs) {
385 result[0] = FABSF(result[0]);
386 }
387 if (source->NegateAbs) {
388 result[0] = -result[0];
389 }
390 }
391
392
393 /**
394 * Test value against zero and return GT, LT, EQ or UN if NaN.
395 */
396 static INLINE GLuint
397 generate_cc( float value )
398 {
399 if (value != value)
400 return COND_UN; /* NaN */
401 if (value > 0.0F)
402 return COND_GT;
403 if (value < 0.0F)
404 return COND_LT;
405 return COND_EQ;
406 }
407
408
409 /**
410 * Test if the ccMaskRule is satisfied by the given condition code.
411 * Used to mask destination writes according to the current condition code.
412 */
413 static INLINE GLboolean
414 test_cc(GLuint condCode, GLuint ccMaskRule)
415 {
416 switch (ccMaskRule) {
417 case COND_EQ: return (condCode == COND_EQ);
418 case COND_NE: return (condCode != COND_EQ);
419 case COND_LT: return (condCode == COND_LT);
420 case COND_GE: return (condCode == COND_GT || condCode == COND_EQ);
421 case COND_LE: return (condCode == COND_LT || condCode == COND_EQ);
422 case COND_GT: return (condCode == COND_GT);
423 case COND_TR: return GL_TRUE;
424 case COND_FL: return GL_FALSE;
425 default: return GL_TRUE;
426 }
427 }
428
429
430 /**
431 * Store 4 floats into a register. Observe the instructions saturate and
432 * set-condition-code flags.
433 */
434 static void
435 store_vector4( const struct prog_instruction *inst,
436 struct fp_machine *machine,
437 const GLfloat value[4] )
438 {
439 const struct prog_dst_register *dest = &(inst->DstReg);
440 const GLboolean clamp = inst->SaturateMode == SATURATE_ZERO_ONE;
441 GLfloat *dstReg;
442 GLfloat dummyReg[4];
443 GLfloat clampedValue[4];
444 GLuint writeMask = dest->WriteMask;
445
446 switch (dest->File) {
447 case PROGRAM_OUTPUT:
448 dstReg = machine->Outputs[dest->Index];
449 break;
450 case PROGRAM_TEMPORARY:
451 dstReg = machine->Temporaries[dest->Index];
452 break;
453 case PROGRAM_WRITE_ONLY:
454 dstReg = dummyReg;
455 return;
456 default:
457 _mesa_problem(NULL, "bad register file in store_vector4(fp)");
458 return;
459 }
460
461 #if DEBUG_FRAG
462 if (value[0] > 1.0e10 ||
463 IS_INF_OR_NAN(value[0]) ||
464 IS_INF_OR_NAN(value[1]) ||
465 IS_INF_OR_NAN(value[2]) ||
466 IS_INF_OR_NAN(value[3]) )
467 printf("store %g %g %g %g\n", value[0], value[1], value[2], value[3]);
468 #endif
469
470 if (clamp) {
471 clampedValue[0] = CLAMP(value[0], 0.0F, 1.0F);
472 clampedValue[1] = CLAMP(value[1], 0.0F, 1.0F);
473 clampedValue[2] = CLAMP(value[2], 0.0F, 1.0F);
474 clampedValue[3] = CLAMP(value[3], 0.0F, 1.0F);
475 value = clampedValue;
476 }
477
478 if (dest->CondMask != COND_TR) {
479 /* condition codes may turn off some writes */
480 if (writeMask & WRITEMASK_X) {
481 if (!test_cc(machine->CondCodes[GET_SWZ(dest->CondSwizzle, 0)],
482 dest->CondMask))
483 writeMask &= ~WRITEMASK_X;
484 }
485 if (writeMask & WRITEMASK_Y) {
486 if (!test_cc(machine->CondCodes[GET_SWZ(dest->CondSwizzle, 1)],
487 dest->CondMask))
488 writeMask &= ~WRITEMASK_Y;
489 }
490 if (writeMask & WRITEMASK_Z) {
491 if (!test_cc(machine->CondCodes[GET_SWZ(dest->CondSwizzle, 2)],
492 dest->CondMask))
493 writeMask &= ~WRITEMASK_Z;
494 }
495 if (writeMask & WRITEMASK_W) {
496 if (!test_cc(machine->CondCodes[GET_SWZ(dest->CondSwizzle, 3)],
497 dest->CondMask))
498 writeMask &= ~WRITEMASK_W;
499 }
500 }
501
502 if (writeMask & WRITEMASK_X)
503 dstReg[0] = value[0];
504 if (writeMask & WRITEMASK_Y)
505 dstReg[1] = value[1];
506 if (writeMask & WRITEMASK_Z)
507 dstReg[2] = value[2];
508 if (writeMask & WRITEMASK_W)
509 dstReg[3] = value[3];
510
511 if (inst->CondUpdate) {
512 if (writeMask & WRITEMASK_X)
513 machine->CondCodes[0] = generate_cc(value[0]);
514 if (writeMask & WRITEMASK_Y)
515 machine->CondCodes[1] = generate_cc(value[1]);
516 if (writeMask & WRITEMASK_Z)
517 machine->CondCodes[2] = generate_cc(value[2]);
518 if (writeMask & WRITEMASK_W)
519 machine->CondCodes[3] = generate_cc(value[3]);
520 }
521 }
522
523
524 /**
525 * Initialize a new machine state instance from an existing one, adding
526 * the partial derivatives onto the input registers.
527 * Used to implement DDX and DDY instructions in non-trivial cases.
528 */
529 static void
530 init_machine_deriv( GLcontext *ctx,
531 const struct fp_machine *machine,
532 const struct gl_fragment_program *program,
533 const SWspan *span, char xOrY,
534 struct fp_machine *dMachine )
535 {
536 GLuint u;
537
538 ASSERT(xOrY == 'X' || xOrY == 'Y');
539
540 /* copy existing machine */
541 _mesa_memcpy(dMachine, machine, sizeof(struct fp_machine));
542
543 if (program->Base.Target == GL_FRAGMENT_PROGRAM_NV) {
544 /* Clear temporary registers (undefined for ARB_f_p) */
545 _mesa_bzero( (void*) machine->Temporaries,
546 MAX_NV_FRAGMENT_PROGRAM_TEMPS * 4 * sizeof(GLfloat));
547 }
548
549 /* Add derivatives */
550 if (program->Base.InputsRead & (1 << FRAG_ATTRIB_WPOS)) {
551 GLfloat *wpos = (GLfloat*) machine->Inputs[FRAG_ATTRIB_WPOS];
552 if (xOrY == 'X') {
553 wpos[0] += 1.0F;
554 wpos[1] += 0.0F;
555 wpos[2] += span->dzdx;
556 wpos[3] += span->dwdx;
557 }
558 else {
559 wpos[0] += 0.0F;
560 wpos[1] += 1.0F;
561 wpos[2] += span->dzdy;
562 wpos[3] += span->dwdy;
563 }
564 }
565 if (program->Base.InputsRead & (1 << FRAG_ATTRIB_COL0)) {
566 GLfloat *col0 = (GLfloat*) machine->Inputs[FRAG_ATTRIB_COL0];
567 if (xOrY == 'X') {
568 col0[0] += span->drdx * (1.0F / CHAN_MAXF);
569 col0[1] += span->dgdx * (1.0F / CHAN_MAXF);
570 col0[2] += span->dbdx * (1.0F / CHAN_MAXF);
571 col0[3] += span->dadx * (1.0F / CHAN_MAXF);
572 }
573 else {
574 col0[0] += span->drdy * (1.0F / CHAN_MAXF);
575 col0[1] += span->dgdy * (1.0F / CHAN_MAXF);
576 col0[2] += span->dbdy * (1.0F / CHAN_MAXF);
577 col0[3] += span->dady * (1.0F / CHAN_MAXF);
578 }
579 }
580 if (program->Base.InputsRead & (1 << FRAG_ATTRIB_COL1)) {
581 GLfloat *col1 = (GLfloat*) machine->Inputs[FRAG_ATTRIB_COL1];
582 if (xOrY == 'X') {
583 col1[0] += span->dsrdx * (1.0F / CHAN_MAXF);
584 col1[1] += span->dsgdx * (1.0F / CHAN_MAXF);
585 col1[2] += span->dsbdx * (1.0F / CHAN_MAXF);
586 col1[3] += 0.0; /*XXX fix */
587 }
588 else {
589 col1[0] += span->dsrdy * (1.0F / CHAN_MAXF);
590 col1[1] += span->dsgdy * (1.0F / CHAN_MAXF);
591 col1[2] += span->dsbdy * (1.0F / CHAN_MAXF);
592 col1[3] += 0.0; /*XXX fix */
593 }
594 }
595 if (program->Base.InputsRead & (1 << FRAG_ATTRIB_FOGC)) {
596 GLfloat *fogc = (GLfloat*) machine->Inputs[FRAG_ATTRIB_FOGC];
597 if (xOrY == 'X') {
598 fogc[0] += span->dfogdx;
599 }
600 else {
601 fogc[0] += span->dfogdy;
602 }
603 }
604 for (u = 0; u < ctx->Const.MaxTextureCoordUnits; u++) {
605 if (program->Base.InputsRead & (1 << (FRAG_ATTRIB_TEX0 + u))) {
606 GLfloat *tex = (GLfloat*) machine->Inputs[FRAG_ATTRIB_TEX0 + u];
607 /* XXX perspective-correct interpolation */
608 if (xOrY == 'X') {
609 tex[0] += span->texStepX[u][0];
610 tex[1] += span->texStepX[u][1];
611 tex[2] += span->texStepX[u][2];
612 tex[3] += span->texStepX[u][3];
613 }
614 else {
615 tex[0] += span->texStepY[u][0];
616 tex[1] += span->texStepY[u][1];
617 tex[2] += span->texStepY[u][2];
618 tex[3] += span->texStepY[u][3];
619 }
620 }
621 }
622
623 /* init condition codes */
624 dMachine->CondCodes[0] = COND_EQ;
625 dMachine->CondCodes[1] = COND_EQ;
626 dMachine->CondCodes[2] = COND_EQ;
627 dMachine->CondCodes[3] = COND_EQ;
628 }
629
630
631 /**
632 * Execute the given vertex program.
633 * NOTE: we do everything in single-precision floating point; we don't
634 * currently observe the single/half/fixed-precision qualifiers.
635 * \param ctx - rendering context
636 * \param program - the fragment program to execute
637 * \param machine - machine state (register file)
638 * \param maxInst - max number of instructions to execute
639 * \return GL_TRUE if program completed or GL_FALSE if program executed KIL.
640 */
641 static GLboolean
642 execute_program( GLcontext *ctx,
643 const struct gl_fragment_program *program, GLuint maxInst,
644 struct fp_machine *machine, const SWspan *span,
645 GLuint column )
646 {
647 GLuint pc;
648
649 #if DEBUG_FRAG
650 printf("execute fragment program --------------------\n");
651 #endif
652
653 for (pc = 0; pc < maxInst; pc++) {
654 const struct prog_instruction *inst = program->Base.Instructions + pc;
655
656 if (ctx->FragmentProgram.CallbackEnabled &&
657 ctx->FragmentProgram.Callback) {
658 ctx->FragmentProgram.CurrentPosition = inst->StringPos;
659 ctx->FragmentProgram.Callback(program->Base.Target,
660 ctx->FragmentProgram.CallbackData);
661 }
662
663 switch (inst->Opcode) {
664 case OPCODE_ABS:
665 {
666 GLfloat a[4], result[4];
667 fetch_vector4( ctx, &inst->SrcReg[0], machine, program, a );
668 result[0] = FABSF(a[0]);
669 result[1] = FABSF(a[1]);
670 result[2] = FABSF(a[2]);
671 result[3] = FABSF(a[3]);
672 store_vector4( inst, machine, result );
673 }
674 break;
675 case OPCODE_ADD:
676 {
677 GLfloat a[4], b[4], result[4];
678 fetch_vector4( ctx, &inst->SrcReg[0], machine, program, a );
679 fetch_vector4( ctx, &inst->SrcReg[1], machine, program, b );
680 result[0] = a[0] + b[0];
681 result[1] = a[1] + b[1];
682 result[2] = a[2] + b[2];
683 result[3] = a[3] + b[3];
684 store_vector4( inst, machine, result );
685 }
686 break;
687 case OPCODE_CMP:
688 {
689 GLfloat a[4], b[4], c[4], result[4];
690 fetch_vector4( ctx, &inst->SrcReg[0], machine, program, a );
691 fetch_vector4( ctx, &inst->SrcReg[1], machine, program, b );
692 fetch_vector4( ctx, &inst->SrcReg[2], machine, program, c );
693 result[0] = a[0] < 0.0F ? b[0] : c[0];
694 result[1] = a[1] < 0.0F ? b[1] : c[1];
695 result[2] = a[2] < 0.0F ? b[2] : c[2];
696 result[3] = a[3] < 0.0F ? b[3] : c[3];
697 store_vector4( inst, machine, result );
698 }
699 break;
700 case OPCODE_COS:
701 {
702 GLfloat a[4], result[4];
703 fetch_vector1( ctx, &inst->SrcReg[0], machine, program, a );
704 result[0] = result[1] = result[2] = result[3]
705 = (GLfloat) _mesa_cos(a[0]);
706 store_vector4( inst, machine, result );
707 }
708 break;
709 case OPCODE_DDX: /* Partial derivative with respect to X */
710 {
711 GLfloat a[4], aNext[4], result[4];
712 struct fp_machine dMachine;
713 if (!fetch_vector4_deriv(ctx, &inst->SrcReg[0], span, 'X',
714 column, result)) {
715 /* This is tricky. Make a copy of the current machine state,
716 * increment the input registers by the dx or dy partial
717 * derivatives, then re-execute the program up to the
718 * preceeding instruction, then fetch the source register.
719 * Finally, find the difference in the register values for
720 * the original and derivative runs.
721 */
722 fetch_vector4( ctx, &inst->SrcReg[0], machine, program, a);
723 init_machine_deriv(ctx, machine, program, span,
724 'X', &dMachine);
725 execute_program(ctx, program, pc, &dMachine, span, column);
726 fetch_vector4( ctx, &inst->SrcReg[0], &dMachine, program, aNext );
727 result[0] = aNext[0] - a[0];
728 result[1] = aNext[1] - a[1];
729 result[2] = aNext[2] - a[2];
730 result[3] = aNext[3] - a[3];
731 }
732 store_vector4( inst, machine, result );
733 }
734 break;
735 case OPCODE_DDY: /* Partial derivative with respect to Y */
736 {
737 GLfloat a[4], aNext[4], result[4];
738 struct fp_machine dMachine;
739 if (!fetch_vector4_deriv(ctx, &inst->SrcReg[0], span, 'Y',
740 column, result)) {
741 init_machine_deriv(ctx, machine, program, span,
742 'Y', &dMachine);
743 fetch_vector4( ctx, &inst->SrcReg[0], machine, program, a);
744 execute_program(ctx, program, pc, &dMachine, span, column);
745 fetch_vector4( ctx, &inst->SrcReg[0], &dMachine, program, aNext );
746 result[0] = aNext[0] - a[0];
747 result[1] = aNext[1] - a[1];
748 result[2] = aNext[2] - a[2];
749 result[3] = aNext[3] - a[3];
750 }
751 store_vector4( inst, machine, result );
752 }
753 break;
754 case OPCODE_DP3:
755 {
756 GLfloat a[4], b[4], result[4];
757 fetch_vector4( ctx, &inst->SrcReg[0], machine, program, a );
758 fetch_vector4( ctx, &inst->SrcReg[1], machine, program, b );
759 result[0] = result[1] = result[2] = result[3] = DOT3(a, b);
760 store_vector4( inst, machine, result );
761 #if DEBUG_FRAG
762 printf("DP3 %g = (%g %g %g) . (%g %g %g)\n",
763 result[0], a[0], a[1], a[2], b[0], b[1], b[2]);
764 #endif
765 }
766 break;
767 case OPCODE_DP4:
768 {
769 GLfloat a[4], b[4], result[4];
770 fetch_vector4( ctx, &inst->SrcReg[0], machine, program, a );
771 fetch_vector4( ctx, &inst->SrcReg[1], machine, program, b );
772 result[0] = result[1] = result[2] = result[3] = DOT4(a,b);
773 store_vector4( inst, machine, result );
774 #if DEBUG_FRAG
775 printf("DP4 %g = (%g, %g %g %g) . (%g, %g %g %g)\n",
776 result[0], a[0], a[1], a[2], a[3], b[0], b[1], b[2], b[3]);
777 #endif
778 }
779 break;
780 case OPCODE_DPH:
781 {
782 GLfloat a[4], b[4], result[4];
783 fetch_vector4( ctx, &inst->SrcReg[0], machine, program, a );
784 fetch_vector4( ctx, &inst->SrcReg[1], machine, program, b );
785 result[0] = result[1] = result[2] = result[3] =
786 a[0] * b[0] + a[1] * b[1] + a[2] * b[2] + b[3];
787 store_vector4( inst, machine, result );
788 }
789 break;
790 case OPCODE_DST: /* Distance vector */
791 {
792 GLfloat a[4], b[4], result[4];
793 fetch_vector4( ctx, &inst->SrcReg[0], machine, program, a );
794 fetch_vector4( ctx, &inst->SrcReg[1], machine, program, b );
795 result[0] = 1.0F;
796 result[1] = a[1] * b[1];
797 result[2] = a[2];
798 result[3] = b[3];
799 store_vector4( inst, machine, result );
800 }
801 break;
802 case OPCODE_EX2: /* Exponential base 2 */
803 {
804 GLfloat a[4], result[4];
805 fetch_vector1( ctx, &inst->SrcReg[0], machine, program, a );
806 result[0] = result[1] = result[2] = result[3] =
807 (GLfloat) _mesa_pow(2.0, a[0]);
808 store_vector4( inst, machine, result );
809 }
810 break;
811 case OPCODE_FLR:
812 {
813 GLfloat a[4], result[4];
814 fetch_vector4( ctx, &inst->SrcReg[0], machine, program, a );
815 result[0] = FLOORF(a[0]);
816 result[1] = FLOORF(a[1]);
817 result[2] = FLOORF(a[2]);
818 result[3] = FLOORF(a[3]);
819 store_vector4( inst, machine, result );
820 }
821 break;
822 case OPCODE_FRC:
823 {
824 GLfloat a[4], result[4];
825 fetch_vector4( ctx, &inst->SrcReg[0], machine, program, a );
826 result[0] = a[0] - FLOORF(a[0]);
827 result[1] = a[1] - FLOORF(a[1]);
828 result[2] = a[2] - FLOORF(a[2]);
829 result[3] = a[3] - FLOORF(a[3]);
830 store_vector4( inst, machine, result );
831 }
832 break;
833 case OPCODE_KIL_NV: /* NV_f_p only */
834 {
835 const GLuint swizzle = inst->DstReg.CondSwizzle;
836 const GLuint condMask = inst->DstReg.CondMask;
837 if (test_cc(machine->CondCodes[GET_SWZ(swizzle, 0)], condMask) ||
838 test_cc(machine->CondCodes[GET_SWZ(swizzle, 1)], condMask) ||
839 test_cc(machine->CondCodes[GET_SWZ(swizzle, 2)], condMask) ||
840 test_cc(machine->CondCodes[GET_SWZ(swizzle, 3)], condMask)) {
841 return GL_FALSE;
842 }
843 }
844 break;
845 case OPCODE_KIL: /* ARB_f_p only */
846 {
847 GLfloat a[4];
848 fetch_vector4( ctx, &inst->SrcReg[0], machine, program, a );
849 if (a[0] < 0.0F || a[1] < 0.0F || a[2] < 0.0F || a[3] < 0.0F) {
850 return GL_FALSE;
851 }
852 }
853 break;
854 case OPCODE_LG2: /* log base 2 */
855 {
856 GLfloat a[4], result[4];
857 fetch_vector1( ctx, &inst->SrcReg[0], machine, program, a );
858 result[0] = result[1] = result[2] = result[3] = LOG2(a[0]);
859 store_vector4( inst, machine, result );
860 }
861 break;
862 case OPCODE_LIT:
863 {
864 const GLfloat epsilon = 1.0F / 256.0F; /* from NV VP spec */
865 GLfloat a[4], result[4];
866 fetch_vector4( ctx, &inst->SrcReg[0], machine, program, a );
867 a[0] = MAX2(a[0], 0.0F);
868 a[1] = MAX2(a[1], 0.0F);
869 /* XXX ARB version clamps a[3], NV version doesn't */
870 a[3] = CLAMP(a[3], -(128.0F - epsilon), (128.0F - epsilon));
871 result[0] = 1.0F;
872 result[1] = a[0];
873 /* XXX we could probably just use pow() here */
874 if (a[0] > 0.0F) {
875 if (a[1] == 0.0 && a[3] == 0.0)
876 result[2] = 1.0;
877 else
878 result[2] = EXPF(a[3] * LOGF(a[1]));
879 }
880 else {
881 result[2] = 0.0;
882 }
883 result[3] = 1.0F;
884 store_vector4( inst, machine, result );
885 }
886 break;
887 case OPCODE_LRP:
888 {
889 GLfloat a[4], b[4], c[4], result[4];
890 fetch_vector4( ctx, &inst->SrcReg[0], machine, program, a );
891 fetch_vector4( ctx, &inst->SrcReg[1], machine, program, b );
892 fetch_vector4( ctx, &inst->SrcReg[2], machine, program, c );
893 result[0] = a[0] * b[0] + (1.0F - a[0]) * c[0];
894 result[1] = a[1] * b[1] + (1.0F - a[1]) * c[1];
895 result[2] = a[2] * b[2] + (1.0F - a[2]) * c[2];
896 result[3] = a[3] * b[3] + (1.0F - a[3]) * c[3];
897 store_vector4( inst, machine, result );
898 #if DEBUG_FRAG
899 printf("LRP (%g %g %g %g) = (%g %g %g %g), "
900 "(%g %g %g %g), (%g %g %g %g)\n",
901 result[0], result[1], result[2], result[3],
902 a[0], a[1], a[2], a[3],
903 b[0], b[1], b[2], b[3],
904 c[0], c[1], c[2], c[3]);
905 #endif
906 }
907 break;
908 case OPCODE_MAD:
909 {
910 GLfloat a[4], b[4], c[4], result[4];
911 fetch_vector4( ctx, &inst->SrcReg[0], machine, program, a );
912 fetch_vector4( ctx, &inst->SrcReg[1], machine, program, b );
913 fetch_vector4( ctx, &inst->SrcReg[2], machine, program, c );
914 result[0] = a[0] * b[0] + c[0];
915 result[1] = a[1] * b[1] + c[1];
916 result[2] = a[2] * b[2] + c[2];
917 result[3] = a[3] * b[3] + c[3];
918 store_vector4( inst, machine, result );
919 }
920 break;
921 case OPCODE_MAX:
922 {
923 GLfloat a[4], b[4], result[4];
924 fetch_vector4( ctx, &inst->SrcReg[0], machine, program, a );
925 fetch_vector4( ctx, &inst->SrcReg[1], machine, program, b );
926 result[0] = MAX2(a[0], b[0]);
927 result[1] = MAX2(a[1], b[1]);
928 result[2] = MAX2(a[2], b[2]);
929 result[3] = MAX2(a[3], b[3]);
930 store_vector4( inst, machine, result );
931 #if DEBUG_FRAG
932 printf("MAX (%g %g %g %g) = (%g %g %g %g), (%g %g %g %g)\n",
933 result[0], result[1], result[2], result[3],
934 a[0], a[1], a[2], a[3],
935 b[0], b[1], b[2], b[3]);
936 #endif
937 }
938 break;
939 case OPCODE_MIN:
940 {
941 GLfloat a[4], b[4], result[4];
942 fetch_vector4( ctx, &inst->SrcReg[0], machine, program, a );
943 fetch_vector4( ctx, &inst->SrcReg[1], machine, program, b );
944 result[0] = MIN2(a[0], b[0]);
945 result[1] = MIN2(a[1], b[1]);
946 result[2] = MIN2(a[2], b[2]);
947 result[3] = MIN2(a[3], b[3]);
948 store_vector4( inst, machine, result );
949 }
950 break;
951 case OPCODE_MOV:
952 {
953 GLfloat result[4];
954 fetch_vector4( ctx, &inst->SrcReg[0], machine, program, result );
955 store_vector4( inst, machine, result );
956 #if DEBUG_FRAG
957 printf("MOV (%g %g %g %g)\n",
958 result[0], result[1], result[2], result[3]);
959 #endif
960 }
961 break;
962 case OPCODE_MUL:
963 {
964 GLfloat a[4], b[4], result[4];
965 fetch_vector4( ctx, &inst->SrcReg[0], machine, program, a );
966 fetch_vector4( ctx, &inst->SrcReg[1], machine, program, b );
967 result[0] = a[0] * b[0];
968 result[1] = a[1] * b[1];
969 result[2] = a[2] * b[2];
970 result[3] = a[3] * b[3];
971 store_vector4( inst, machine, result );
972 #if DEBUG_FRAG
973 printf("MUL (%g %g %g %g) = (%g %g %g %g) * (%g %g %g %g)\n",
974 result[0], result[1], result[2], result[3],
975 a[0], a[1], a[2], a[3],
976 b[0], b[1], b[2], b[3]);
977 #endif
978 }
979 break;
980 case OPCODE_PK2H: /* pack two 16-bit floats in one 32-bit float */
981 {
982 GLfloat a[4], result[4];
983 GLhalfNV hx, hy;
984 GLuint *rawResult = (GLuint *) result;
985 GLuint twoHalves;
986 fetch_vector4( ctx, &inst->SrcReg[0], machine, program, a );
987 hx = _mesa_float_to_half(a[0]);
988 hy = _mesa_float_to_half(a[1]);
989 twoHalves = hx | (hy << 16);
990 rawResult[0] = rawResult[1] = rawResult[2] = rawResult[3]
991 = twoHalves;
992 store_vector4( inst, machine, result );
993 }
994 break;
995 case OPCODE_PK2US: /* pack two GLushorts into one 32-bit float */
996 {
997 GLfloat a[4], result[4];
998 GLuint usx, usy, *rawResult = (GLuint *) result;
999 fetch_vector4( ctx, &inst->SrcReg[0], machine, program, a );
1000 a[0] = CLAMP(a[0], 0.0F, 1.0F);
1001 a[1] = CLAMP(a[1], 0.0F, 1.0F);
1002 usx = IROUND(a[0] * 65535.0F);
1003 usy = IROUND(a[1] * 65535.0F);
1004 rawResult[0] = rawResult[1] = rawResult[2] = rawResult[3]
1005 = usx | (usy << 16);
1006 store_vector4( inst, machine, result );
1007 }
1008 break;
1009 case OPCODE_PK4B: /* pack four GLbytes into one 32-bit float */
1010 {
1011 GLfloat a[4], result[4];
1012 GLuint ubx, uby, ubz, ubw, *rawResult = (GLuint *) result;
1013 fetch_vector4( ctx, &inst->SrcReg[0], machine, program, a );
1014 a[0] = CLAMP(a[0], -128.0F / 127.0F, 1.0F);
1015 a[1] = CLAMP(a[1], -128.0F / 127.0F, 1.0F);
1016 a[2] = CLAMP(a[2], -128.0F / 127.0F, 1.0F);
1017 a[3] = CLAMP(a[3], -128.0F / 127.0F, 1.0F);
1018 ubx = IROUND(127.0F * a[0] + 128.0F);
1019 uby = IROUND(127.0F * a[1] + 128.0F);
1020 ubz = IROUND(127.0F * a[2] + 128.0F);
1021 ubw = IROUND(127.0F * a[3] + 128.0F);
1022 rawResult[0] = rawResult[1] = rawResult[2] = rawResult[3]
1023 = ubx | (uby << 8) | (ubz << 16) | (ubw << 24);
1024 store_vector4( inst, machine, result );
1025 }
1026 break;
1027 case OPCODE_PK4UB: /* pack four GLubytes into one 32-bit float */
1028 {
1029 GLfloat a[4], result[4];
1030 GLuint ubx, uby, ubz, ubw, *rawResult = (GLuint *) result;
1031 fetch_vector4( ctx, &inst->SrcReg[0], machine, program, a );
1032 a[0] = CLAMP(a[0], 0.0F, 1.0F);
1033 a[1] = CLAMP(a[1], 0.0F, 1.0F);
1034 a[2] = CLAMP(a[2], 0.0F, 1.0F);
1035 a[3] = CLAMP(a[3], 0.0F, 1.0F);
1036 ubx = IROUND(255.0F * a[0]);
1037 uby = IROUND(255.0F * a[1]);
1038 ubz = IROUND(255.0F * a[2]);
1039 ubw = IROUND(255.0F * a[3]);
1040 rawResult[0] = rawResult[1] = rawResult[2] = rawResult[3]
1041 = ubx | (uby << 8) | (ubz << 16) | (ubw << 24);
1042 store_vector4( inst, machine, result );
1043 }
1044 break;
1045 case OPCODE_POW:
1046 {
1047 GLfloat a[4], b[4], result[4];
1048 fetch_vector1( ctx, &inst->SrcReg[0], machine, program, a );
1049 fetch_vector1( ctx, &inst->SrcReg[1], machine, program, b );
1050 result[0] = result[1] = result[2] = result[3]
1051 = (GLfloat)_mesa_pow(a[0], b[0]);
1052 store_vector4( inst, machine, result );
1053 }
1054 break;
1055 case OPCODE_RCP:
1056 {
1057 GLfloat a[4], result[4];
1058 fetch_vector1( ctx, &inst->SrcReg[0], machine, program, a );
1059 #if DEBUG_FRAG
1060 if (a[0] == 0)
1061 printf("RCP(0)\n");
1062 else if (IS_INF_OR_NAN(a[0]))
1063 printf("RCP(inf)\n");
1064 #endif
1065 result[0] = result[1] = result[2] = result[3] = 1.0F / a[0];
1066 store_vector4( inst, machine, result );
1067 }
1068 break;
1069 case OPCODE_RFL: /* reflection vector */
1070 {
1071 GLfloat axis[4], dir[4], result[4], tmpX, tmpW;
1072 fetch_vector4( ctx, &inst->SrcReg[0], machine, program, axis );
1073 fetch_vector4( ctx, &inst->SrcReg[1], machine, program, dir );
1074 tmpW = DOT3(axis, axis);
1075 tmpX = (2.0F * DOT3(axis, dir)) / tmpW;
1076 result[0] = tmpX * axis[0] - dir[0];
1077 result[1] = tmpX * axis[1] - dir[1];
1078 result[2] = tmpX * axis[2] - dir[2];
1079 /* result[3] is never written! XXX enforce in parser! */
1080 store_vector4( inst, machine, result );
1081 }
1082 break;
1083 case OPCODE_RSQ: /* 1 / sqrt() */
1084 {
1085 GLfloat a[4], result[4];
1086 fetch_vector1( ctx, &inst->SrcReg[0], machine, program, a );
1087 a[0] = FABSF(a[0]);
1088 result[0] = result[1] = result[2] = result[3] = INV_SQRTF(a[0]);
1089 store_vector4( inst, machine, result );
1090 #if DEBUG_FRAG
1091 printf("RSQ %g = 1/sqrt(|%g|)\n", result[0], a[0]);
1092 #endif
1093 }
1094 break;
1095 case OPCODE_SCS: /* sine and cos */
1096 {
1097 GLfloat a[4], result[4];
1098 fetch_vector1( ctx, &inst->SrcReg[0], machine, program, a );
1099 result[0] = (GLfloat)_mesa_cos(a[0]);
1100 result[1] = (GLfloat)_mesa_sin(a[0]);
1101 result[2] = 0.0; /* undefined! */
1102 result[3] = 0.0; /* undefined! */
1103 store_vector4( inst, machine, result );
1104 }
1105 break;
1106 case OPCODE_SEQ: /* set on equal */
1107 {
1108 GLfloat a[4], b[4], result[4];
1109 fetch_vector4( ctx, &inst->SrcReg[0], machine, program, a );
1110 fetch_vector4( ctx, &inst->SrcReg[1], machine, program, b );
1111 result[0] = (a[0] == b[0]) ? 1.0F : 0.0F;
1112 result[1] = (a[1] == b[1]) ? 1.0F : 0.0F;
1113 result[2] = (a[2] == b[2]) ? 1.0F : 0.0F;
1114 result[3] = (a[3] == b[3]) ? 1.0F : 0.0F;
1115 store_vector4( inst, machine, result );
1116 }
1117 break;
1118 case OPCODE_SFL: /* set false, operands ignored */
1119 {
1120 static const GLfloat result[4] = { 0.0F, 0.0F, 0.0F, 0.0F };
1121 store_vector4( inst, machine, result );
1122 }
1123 break;
1124 case OPCODE_SGE: /* set on greater or equal */
1125 {
1126 GLfloat a[4], b[4], result[4];
1127 fetch_vector4( ctx, &inst->SrcReg[0], machine, program, a );
1128 fetch_vector4( ctx, &inst->SrcReg[1], machine, program, b );
1129 result[0] = (a[0] >= b[0]) ? 1.0F : 0.0F;
1130 result[1] = (a[1] >= b[1]) ? 1.0F : 0.0F;
1131 result[2] = (a[2] >= b[2]) ? 1.0F : 0.0F;
1132 result[3] = (a[3] >= b[3]) ? 1.0F : 0.0F;
1133 store_vector4( inst, machine, result );
1134 }
1135 break;
1136 case OPCODE_SGT: /* set on greater */
1137 {
1138 GLfloat a[4], b[4], result[4];
1139 fetch_vector4( ctx, &inst->SrcReg[0], machine, program, a );
1140 fetch_vector4( ctx, &inst->SrcReg[1], machine, program, b );
1141 result[0] = (a[0] > b[0]) ? 1.0F : 0.0F;
1142 result[1] = (a[1] > b[1]) ? 1.0F : 0.0F;
1143 result[2] = (a[2] > b[2]) ? 1.0F : 0.0F;
1144 result[3] = (a[3] > b[3]) ? 1.0F : 0.0F;
1145 store_vector4( inst, machine, result );
1146 }
1147 break;
1148 case OPCODE_SIN:
1149 {
1150 GLfloat a[4], result[4];
1151 fetch_vector1( ctx, &inst->SrcReg[0], machine, program, a );
1152 result[0] = result[1] = result[2] = result[3]
1153 = (GLfloat) _mesa_sin(a[0]);
1154 store_vector4( inst, machine, result );
1155 }
1156 break;
1157 case OPCODE_SLE: /* set on less or equal */
1158 {
1159 GLfloat a[4], b[4], result[4];
1160 fetch_vector4( ctx, &inst->SrcReg[0], machine, program, a );
1161 fetch_vector4( ctx, &inst->SrcReg[1], machine, program, b );
1162 result[0] = (a[0] <= b[0]) ? 1.0F : 0.0F;
1163 result[1] = (a[1] <= b[1]) ? 1.0F : 0.0F;
1164 result[2] = (a[2] <= b[2]) ? 1.0F : 0.0F;
1165 result[3] = (a[3] <= b[3]) ? 1.0F : 0.0F;
1166 store_vector4( inst, machine, result );
1167 }
1168 break;
1169 case OPCODE_SLT: /* set on less */
1170 {
1171 GLfloat a[4], b[4], result[4];
1172 fetch_vector4( ctx, &inst->SrcReg[0], machine, program, a );
1173 fetch_vector4( ctx, &inst->SrcReg[1], machine, program, b );
1174 result[0] = (a[0] < b[0]) ? 1.0F : 0.0F;
1175 result[1] = (a[1] < b[1]) ? 1.0F : 0.0F;
1176 result[2] = (a[2] < b[2]) ? 1.0F : 0.0F;
1177 result[3] = (a[3] < b[3]) ? 1.0F : 0.0F;
1178 store_vector4( inst, machine, result );
1179 }
1180 break;
1181 case OPCODE_SNE: /* set on not equal */
1182 {
1183 GLfloat a[4], b[4], result[4];
1184 fetch_vector4( ctx, &inst->SrcReg[0], machine, program, a );
1185 fetch_vector4( ctx, &inst->SrcReg[1], machine, program, b );
1186 result[0] = (a[0] != b[0]) ? 1.0F : 0.0F;
1187 result[1] = (a[1] != b[1]) ? 1.0F : 0.0F;
1188 result[2] = (a[2] != b[2]) ? 1.0F : 0.0F;
1189 result[3] = (a[3] != b[3]) ? 1.0F : 0.0F;
1190 store_vector4( inst, machine, result );
1191 }
1192 break;
1193 case OPCODE_STR: /* set true, operands ignored */
1194 {
1195 static const GLfloat result[4] = { 1.0F, 1.0F, 1.0F, 1.0F };
1196 store_vector4( inst, machine, result );
1197 }
1198 break;
1199 case OPCODE_SUB:
1200 {
1201 GLfloat a[4], b[4], result[4];
1202 fetch_vector4( ctx, &inst->SrcReg[0], machine, program, a );
1203 fetch_vector4( ctx, &inst->SrcReg[1], machine, program, b );
1204 result[0] = a[0] - b[0];
1205 result[1] = a[1] - b[1];
1206 result[2] = a[2] - b[2];
1207 result[3] = a[3] - b[3];
1208 store_vector4( inst, machine, result );
1209 #if DEBUG_FRAG
1210 printf("SUB (%g %g %g %g) = (%g %g %g %g) - (%g %g %g %g)\n",
1211 result[0], result[1], result[2], result[3],
1212 a[0], a[1], a[2], a[3], b[0], b[1], b[2], b[3]);
1213 #endif
1214 }
1215 break;
1216 case OPCODE_SWZ: /* extended swizzle */
1217 {
1218 const struct prog_src_register *source = &inst->SrcReg[0];
1219 const GLfloat *src = get_register_pointer(ctx, source,
1220 machine, program);
1221 GLfloat result[4];
1222 GLuint i;
1223 for (i = 0; i < 4; i++) {
1224 const GLuint swz = GET_SWZ(source->Swizzle, i);
1225 if (swz == SWIZZLE_ZERO)
1226 result[i] = 0.0;
1227 else if (swz == SWIZZLE_ONE)
1228 result[i] = 1.0;
1229 else {
1230 ASSERT(swz >= 0);
1231 ASSERT(swz <= 3);
1232 result[i] = src[swz];
1233 }
1234 if (source->NegateBase & (1 << i))
1235 result[i] = -result[i];
1236 }
1237 store_vector4( inst, machine, result );
1238 }
1239 break;
1240 case OPCODE_TEX: /* Both ARB and NV frag prog */
1241 /* Texel lookup */
1242 {
1243 GLfloat texcoord[4], color[4];
1244 fetch_vector4( ctx, &inst->SrcReg[0], machine, program, texcoord );
1245 /* Note: we pass 0 for LOD. The ARB extension requires it
1246 * while the NV extension says it's implementation dependant.
1247 */
1248 /* KW: Previously lambda was passed as zero, but I
1249 * believe this is incorrect, the spec seems to
1250 * indicate rather that lambda should not be
1251 * changed/biased, unlike TXB where texcoord[3] is
1252 * added to the lambda calculations. The lambda should
1253 * still be calculated normally for TEX & TXP though,
1254 * not set to zero. Otherwise it's very difficult to
1255 * implement normal GL semantics through the fragment
1256 * shader.
1257 */
1258 fetch_texel( ctx, texcoord,
1259 span->array->lambda[inst->TexSrcUnit][column],
1260 inst->TexSrcUnit, color );
1261 #if DEBUG_FRAG
1262 if (color[3])
1263 printf("color[3] = %f\n", color[3]);
1264 #endif
1265 store_vector4( inst, machine, color );
1266 }
1267 break;
1268 case OPCODE_TXB: /* GL_ARB_fragment_program only */
1269 /* Texel lookup with LOD bias */
1270 {
1271 GLfloat texcoord[4], color[4], bias, lambda;
1272
1273 fetch_vector4( ctx, &inst->SrcReg[0], machine, program, texcoord );
1274 /* texcoord[3] is the bias to add to lambda */
1275 bias = ctx->Texture.Unit[inst->TexSrcUnit].LodBias
1276 + ctx->Texture.Unit[inst->TexSrcUnit]._Current->LodBias
1277 + texcoord[3];
1278 lambda = span->array->lambda[inst->TexSrcUnit][column] + bias;
1279 fetch_texel( ctx, texcoord, lambda,
1280 inst->TexSrcUnit, color );
1281 store_vector4( inst, machine, color );
1282 }
1283 break;
1284 case OPCODE_TXD: /* GL_NV_fragment_program only */
1285 /* Texture lookup w/ partial derivatives for LOD */
1286 {
1287 GLfloat texcoord[4], dtdx[4], dtdy[4], color[4];
1288 fetch_vector4( ctx, &inst->SrcReg[0], machine, program, texcoord );
1289 fetch_vector4( ctx, &inst->SrcReg[1], machine, program, dtdx );
1290 fetch_vector4( ctx, &inst->SrcReg[2], machine, program, dtdy );
1291 fetch_texel_deriv( ctx, texcoord, dtdx, dtdy, inst->TexSrcUnit,
1292 color );
1293 store_vector4( inst, machine, color );
1294 }
1295 break;
1296 case OPCODE_TXP: /* GL_ARB_fragment_program only */
1297 /* Texture lookup w/ projective divide */
1298 {
1299 GLfloat texcoord[4], color[4];
1300 fetch_vector4( ctx, &inst->SrcReg[0], machine, program, texcoord );
1301 /* Not so sure about this test - if texcoord[3] is
1302 * zero, we'd probably be fine except for an ASSERT in
1303 * IROUND_POS() which gets triggered by the inf values created.
1304 */
1305 if (texcoord[3] != 0.0) {
1306 texcoord[0] /= texcoord[3];
1307 texcoord[1] /= texcoord[3];
1308 texcoord[2] /= texcoord[3];
1309 }
1310 /* KW: Previously lambda was passed as zero, but I
1311 * believe this is incorrect, the spec seems to
1312 * indicate rather that lambda should not be
1313 * changed/biased, unlike TXB where texcoord[3] is
1314 * added to the lambda calculations. The lambda should
1315 * still be calculated normally for TEX & TXP though,
1316 * not set to zero.
1317 */
1318 fetch_texel( ctx, texcoord,
1319 span->array->lambda[inst->TexSrcUnit][column],
1320 inst->TexSrcUnit, color );
1321 store_vector4( inst, machine, color );
1322 }
1323 break;
1324 case OPCODE_TXP_NV: /* GL_NV_fragment_program only */
1325 /* Texture lookup w/ projective divide */
1326 {
1327 GLfloat texcoord[4], color[4];
1328 fetch_vector4( ctx, &inst->SrcReg[0], machine, program, texcoord );
1329 if (inst->TexSrcTarget != TEXTURE_CUBE_INDEX &&
1330 texcoord[3] != 0.0) {
1331 texcoord[0] /= texcoord[3];
1332 texcoord[1] /= texcoord[3];
1333 texcoord[2] /= texcoord[3];
1334 }
1335 fetch_texel( ctx, texcoord,
1336 span->array->lambda[inst->TexSrcUnit][column],
1337 inst->TexSrcUnit, color );
1338 store_vector4( inst, machine, color );
1339 }
1340 break;
1341 case OPCODE_UP2H: /* unpack two 16-bit floats */
1342 {
1343 GLfloat a[4], result[4];
1344 const GLuint *rawBits = (const GLuint *) a;
1345 GLhalfNV hx, hy;
1346 fetch_vector1( ctx, &inst->SrcReg[0], machine, program, a );
1347 hx = rawBits[0] & 0xffff;
1348 hy = rawBits[0] >> 16;
1349 result[0] = result[2] = _mesa_half_to_float(hx);
1350 result[1] = result[3] = _mesa_half_to_float(hy);
1351 store_vector4( inst, machine, result );
1352 }
1353 break;
1354 case OPCODE_UP2US: /* unpack two GLushorts */
1355 {
1356 GLfloat a[4], result[4];
1357 const GLuint *rawBits = (const GLuint *) a;
1358 GLushort usx, usy;
1359 fetch_vector1( ctx, &inst->SrcReg[0], machine, program, a );
1360 usx = rawBits[0] & 0xffff;
1361 usy = rawBits[0] >> 16;
1362 result[0] = result[2] = usx * (1.0f / 65535.0f);
1363 result[1] = result[3] = usy * (1.0f / 65535.0f);
1364 store_vector4( inst, machine, result );
1365 }
1366 break;
1367 case OPCODE_UP4B: /* unpack four GLbytes */
1368 {
1369 GLfloat a[4], result[4];
1370 const GLuint *rawBits = (const GLuint *) a;
1371 fetch_vector1( ctx, &inst->SrcReg[0], machine, program, a );
1372 result[0] = (((rawBits[0] >> 0) & 0xff) - 128) / 127.0F;
1373 result[1] = (((rawBits[0] >> 8) & 0xff) - 128) / 127.0F;
1374 result[2] = (((rawBits[0] >> 16) & 0xff) - 128) / 127.0F;
1375 result[3] = (((rawBits[0] >> 24) & 0xff) - 128) / 127.0F;
1376 store_vector4( inst, machine, result );
1377 }
1378 break;
1379 case OPCODE_UP4UB: /* unpack four GLubytes */
1380 {
1381 GLfloat a[4], result[4];
1382 const GLuint *rawBits = (const GLuint *) a;
1383 fetch_vector1( ctx, &inst->SrcReg[0], machine, program, a );
1384 result[0] = ((rawBits[0] >> 0) & 0xff) / 255.0F;
1385 result[1] = ((rawBits[0] >> 8) & 0xff) / 255.0F;
1386 result[2] = ((rawBits[0] >> 16) & 0xff) / 255.0F;
1387 result[3] = ((rawBits[0] >> 24) & 0xff) / 255.0F;
1388 store_vector4( inst, machine, result );
1389 }
1390 break;
1391 case OPCODE_XPD: /* cross product */
1392 {
1393 GLfloat a[4], b[4], result[4];
1394 fetch_vector4( ctx, &inst->SrcReg[0], machine, program, a );
1395 fetch_vector4( ctx, &inst->SrcReg[1], machine, program, b );
1396 result[0] = a[1] * b[2] - a[2] * b[1];
1397 result[1] = a[2] * b[0] - a[0] * b[2];
1398 result[2] = a[0] * b[1] - a[1] * b[0];
1399 result[3] = 1.0;
1400 store_vector4( inst, machine, result );
1401 }
1402 break;
1403 case OPCODE_X2D: /* 2-D matrix transform */
1404 {
1405 GLfloat a[4], b[4], c[4], result[4];
1406 fetch_vector4( ctx, &inst->SrcReg[0], machine, program, a );
1407 fetch_vector4( ctx, &inst->SrcReg[1], machine, program, b );
1408 fetch_vector4( ctx, &inst->SrcReg[2], machine, program, c );
1409 result[0] = a[0] + b[0] * c[0] + b[1] * c[1];
1410 result[1] = a[1] + b[0] * c[2] + b[1] * c[3];
1411 result[2] = a[2] + b[0] * c[0] + b[1] * c[1];
1412 result[3] = a[3] + b[0] * c[2] + b[1] * c[3];
1413 store_vector4( inst, machine, result );
1414 }
1415 break;
1416 case OPCODE_PRINT:
1417 {
1418 if (inst->SrcReg[0].File != -1) {
1419 GLfloat a[4];
1420 fetch_vector4( ctx, &inst->SrcReg[0], machine, program, a);
1421 _mesa_printf("%s%g, %g, %g, %g\n", (const char *) inst->Data,
1422 a[0], a[1], a[2], a[3]);
1423 }
1424 else {
1425 _mesa_printf("%s\n", (const char *) inst->Data);
1426 }
1427 }
1428 break;
1429 case OPCODE_END:
1430 return GL_TRUE;
1431 default:
1432 _mesa_problem(ctx, "Bad opcode %d in _mesa_exec_fragment_program",
1433 inst->Opcode);
1434 return GL_TRUE; /* return value doesn't matter */
1435 }
1436 }
1437 return GL_TRUE;
1438 }
1439
1440
1441 /**
1442 * Initialize the virtual fragment program machine state prior to running
1443 * fragment program on a fragment. This involves initializing the input
1444 * registers, condition codes, etc.
1445 * \param machine the virtual machine state to init
1446 * \param program the fragment program we're about to run
1447 * \param span the span of pixels we'll operate on
1448 * \param col which element (column) of the span we'll operate on
1449 */
1450 static void
1451 init_machine( GLcontext *ctx, struct fp_machine *machine,
1452 const struct gl_fragment_program *program,
1453 const SWspan *span, GLuint col )
1454 {
1455 GLuint inputsRead = program->Base.InputsRead;
1456 GLuint u;
1457
1458 if (ctx->FragmentProgram.CallbackEnabled)
1459 inputsRead = ~0;
1460
1461 if (program->Base.Target == GL_FRAGMENT_PROGRAM_NV) {
1462 /* Clear temporary registers (undefined for ARB_f_p) */
1463 _mesa_bzero(machine->Temporaries,
1464 MAX_NV_FRAGMENT_PROGRAM_TEMPS * 4 * sizeof(GLfloat));
1465 }
1466
1467 /* Load input registers */
1468 if (inputsRead & (1 << FRAG_ATTRIB_WPOS)) {
1469 GLfloat *wpos = machine->Inputs[FRAG_ATTRIB_WPOS];
1470 ASSERT(span->arrayMask & SPAN_Z);
1471 if (span->arrayMask & SPAN_XY) {
1472 wpos[0] = (GLfloat) span->array->x[col];
1473 wpos[1] = (GLfloat) span->array->y[col];
1474 }
1475 else {
1476 wpos[0] = (GLfloat) span->x + col;
1477 wpos[1] = (GLfloat) span->y;
1478 }
1479 wpos[2] = (GLfloat) span->array->z[col] / ctx->DrawBuffer->_DepthMaxF;
1480 wpos[3] = span->w + col * span->dwdx;
1481 }
1482 if (inputsRead & (1 << FRAG_ATTRIB_COL0)) {
1483 ASSERT(span->arrayMask & SPAN_RGBA);
1484 COPY_4V(machine->Inputs[FRAG_ATTRIB_COL0],
1485 span->array->color.sz4.rgba[col]);
1486 }
1487 if (inputsRead & (1 << FRAG_ATTRIB_COL1)) {
1488 ASSERT(span->arrayMask & SPAN_SPEC);
1489 COPY_4V(machine->Inputs[FRAG_ATTRIB_COL1],
1490 span->array->color.sz4.spec[col]);
1491 }
1492 if (inputsRead & (1 << FRAG_ATTRIB_FOGC)) {
1493 GLfloat *fogc = machine->Inputs[FRAG_ATTRIB_FOGC];
1494 ASSERT(span->arrayMask & SPAN_FOG);
1495 fogc[0] = span->array->fog[col];
1496 fogc[1] = 0.0F;
1497 fogc[2] = 0.0F;
1498 fogc[3] = 0.0F;
1499 }
1500 for (u = 0; u < ctx->Const.MaxTextureCoordUnits; u++) {
1501 if (inputsRead & (1 << (FRAG_ATTRIB_TEX0 + u))) {
1502 GLfloat *tex = machine->Inputs[FRAG_ATTRIB_TEX0 + u];
1503 /*ASSERT(ctx->Texture._EnabledCoordUnits & (1 << u));*/
1504 COPY_4V(tex, span->array->texcoords[u][col]);
1505 /*ASSERT(tex[0] != 0 || tex[1] != 0 || tex[2] != 0);*/
1506 }
1507 }
1508
1509 /* init condition codes */
1510 machine->CondCodes[0] = COND_EQ;
1511 machine->CondCodes[1] = COND_EQ;
1512 machine->CondCodes[2] = COND_EQ;
1513 machine->CondCodes[3] = COND_EQ;
1514 }
1515
1516
1517 /**
1518 * Run fragment program on the pixels in span from 'start' to 'end' - 1.
1519 */
1520 static void
1521 run_program(GLcontext *ctx, SWspan *span, GLuint start, GLuint end)
1522 {
1523 const struct gl_fragment_program *program = ctx->FragmentProgram._Current;
1524 struct fp_machine machine;
1525 GLuint i;
1526
1527 CurrentMachine = &machine;
1528
1529 for (i = start; i < end; i++) {
1530 if (span->array->mask[i]) {
1531 init_machine(ctx, &machine, program, span, i);
1532
1533 if (execute_program(ctx, program, ~0, &machine, span, i)) {
1534 /* Store result color */
1535 COPY_4V(span->array->color.sz4.rgba[i],
1536 machine.Outputs[FRAG_RESULT_COLR]);
1537
1538 /* Store result depth/z */
1539 if (program->Base.OutputsWritten & (1 << FRAG_RESULT_DEPR)) {
1540 const GLfloat depth = machine.Outputs[FRAG_RESULT_DEPR][2];
1541 if (depth <= 0.0)
1542 span->array->z[i] = 0;
1543 else if (depth >= 1.0)
1544 span->array->z[i] = ctx->DrawBuffer->_DepthMax;
1545 else
1546 span->array->z[i] = IROUND(depth * ctx->DrawBuffer->_DepthMaxF);
1547 }
1548 }
1549 else {
1550 /* killed fragment */
1551 span->array->mask[i] = GL_FALSE;
1552 span->writeAll = GL_FALSE;
1553 }
1554 }
1555 }
1556
1557 CurrentMachine = NULL;
1558 }
1559
1560
1561 /**
1562 * Execute the current fragment program for all the fragments
1563 * in the given span.
1564 */
1565 void
1566 _swrast_exec_fragment_program( GLcontext *ctx, SWspan *span )
1567 {
1568 const struct gl_fragment_program *program = ctx->FragmentProgram._Current;
1569
1570 /* incoming colors should be floats */
1571 ASSERT(span->array->ChanType == GL_FLOAT);
1572
1573 ctx->_CurrentProgram = GL_FRAGMENT_PROGRAM_ARB; /* or NV, doesn't matter */
1574
1575 if (program->Base.Parameters) {
1576 _mesa_load_state_parameters(ctx, program->Base.Parameters);
1577 }
1578
1579 run_program(ctx, span, 0, span->end);
1580
1581 if (program->Base.OutputsWritten & (1 << FRAG_RESULT_DEPR)) {
1582 span->interpMask &= ~SPAN_Z;
1583 span->arrayMask |= SPAN_Z;
1584 }
1585
1586 ctx->_CurrentProgram = 0;
1587 }
1588