ARB_Occlusion_query should support multiple query at same time
[mesa.git] / src / mesa / swrast / s_nvfragprog.c
1 /*
2 * Mesa 3-D graphics library
3 * Version: 6.5.2
4 *
5 * Copyright (C) 1999-2006 Brian Paul All Rights Reserved.
6 *
7 * Permission is hereby granted, free of charge, to any person obtaining a
8 * copy of this software and associated documentation files (the "Software"),
9 * to deal in the Software without restriction, including without limitation
10 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
11 * and/or sell copies of the Software, and to permit persons to whom the
12 * Software is furnished to do so, subject to the following conditions:
13 *
14 * The above copyright notice and this permission notice shall be included
15 * in all copies or substantial portions of the Software.
16 *
17 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
18 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
19 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
20 * BRIAN PAUL BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN
21 * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
22 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
23 */
24
25 /*
26 * Regarding GL_NV_fragment_program:
27 *
28 * Portions of this software may use or implement intellectual
29 * property owned and licensed by NVIDIA Corporation. NVIDIA disclaims
30 * any and all warranties with respect to such intellectual property,
31 * including any use thereof or modifications thereto.
32 */
33
34 #include "glheader.h"
35 #include "colormac.h"
36 #include "context.h"
37 #include "program_instruction.h"
38 #include "program.h"
39
40 #include "s_nvfragprog.h"
41 #include "s_span.h"
42
43
44 /* See comments below for info about this */
45 #define LAMBDA_ZERO 1
46
47 /* debug predicate */
48 #define DEBUG_FRAG 0
49
50
51 /**
52 * Virtual machine state used during execution of a fragment programs.
53 */
54 struct fp_machine
55 {
56 GLfloat Temporaries[MAX_NV_FRAGMENT_PROGRAM_TEMPS][4];
57 GLfloat Inputs[MAX_NV_FRAGMENT_PROGRAM_INPUTS][4];
58 GLfloat Outputs[MAX_NV_FRAGMENT_PROGRAM_OUTPUTS][4];
59 GLuint CondCodes[4]; /**< COND_* value for x/y/z/w */
60
61 GLuint CallStack[MAX_PROGRAM_CALL_DEPTH]; /**< For CAL/RET instructions */
62 GLuint StackDepth; /**< Index/ptr to top of CallStack[] */
63 };
64
65
66 #if FEATURE_MESA_program_debug
67 static struct fp_machine *CurrentMachine = NULL;
68
69 /**
70 * For GL_MESA_program_debug.
71 * Return current value (4*GLfloat) of a fragment program register.
72 * Called via ctx->Driver.GetFragmentProgramRegister().
73 */
74 void
75 _swrast_get_program_register(GLcontext *ctx, enum register_file file,
76 GLuint index, GLfloat val[4])
77 {
78 if (CurrentMachine) {
79 switch (file) {
80 case PROGRAM_INPUT:
81 COPY_4V(val, CurrentMachine->Inputs[index]);
82 break;
83 case PROGRAM_OUTPUT:
84 COPY_4V(val, CurrentMachine->Outputs[index]);
85 break;
86 case PROGRAM_TEMPORARY:
87 COPY_4V(val, CurrentMachine->Temporaries[index]);
88 break;
89 default:
90 _mesa_problem(NULL,
91 "bad register file in _swrast_get_program_register");
92 }
93 }
94 }
95 #endif /* FEATURE_MESA_program_debug */
96
97
98 /**
99 * Fetch a texel.
100 */
101 static void
102 fetch_texel( GLcontext *ctx, const GLfloat texcoord[4], GLfloat lambda,
103 GLuint unit, GLfloat color[4] )
104 {
105 GLchan rgba[4];
106 SWcontext *swrast = SWRAST_CONTEXT(ctx);
107
108 /* XXX use a float-valued TextureSample routine here!!! */
109 swrast->TextureSample[unit](ctx, ctx->Texture.Unit[unit]._Current,
110 1, (const GLfloat (*)[4]) texcoord,
111 &lambda, &rgba);
112 color[0] = CHAN_TO_FLOAT(rgba[0]);
113 color[1] = CHAN_TO_FLOAT(rgba[1]);
114 color[2] = CHAN_TO_FLOAT(rgba[2]);
115 color[3] = CHAN_TO_FLOAT(rgba[3]);
116 }
117
118
119 /**
120 * Fetch a texel with the given partial derivatives to compute a level
121 * of detail in the mipmap.
122 */
123 static void
124 fetch_texel_deriv( GLcontext *ctx, const GLfloat texcoord[4],
125 const GLfloat texdx[4], const GLfloat texdy[4],
126 GLuint unit, GLfloat color[4] )
127 {
128 SWcontext *swrast = SWRAST_CONTEXT(ctx);
129 const struct gl_texture_object *texObj = ctx->Texture.Unit[unit]._Current;
130 const struct gl_texture_image *texImg = texObj->Image[0][texObj->BaseLevel];
131 const GLfloat texW = (GLfloat) texImg->WidthScale;
132 const GLfloat texH = (GLfloat) texImg->HeightScale;
133 GLchan rgba[4];
134
135 GLfloat lambda = _swrast_compute_lambda(texdx[0], texdy[0], /* ds/dx, ds/dy */
136 texdx[1], texdy[1], /* dt/dx, dt/dy */
137 texdx[3], texdy[2], /* dq/dx, dq/dy */
138 texW, texH,
139 texcoord[0], texcoord[1], texcoord[3],
140 1.0F / texcoord[3]);
141
142 swrast->TextureSample[unit](ctx, ctx->Texture.Unit[unit]._Current,
143 1, (const GLfloat (*)[4]) texcoord,
144 &lambda, &rgba);
145 color[0] = CHAN_TO_FLOAT(rgba[0]);
146 color[1] = CHAN_TO_FLOAT(rgba[1]);
147 color[2] = CHAN_TO_FLOAT(rgba[2]);
148 color[3] = CHAN_TO_FLOAT(rgba[3]);
149 }
150
151
152 /**
153 * Return a pointer to the 4-element float vector specified by the given
154 * source register.
155 */
156 static INLINE const GLfloat *
157 get_register_pointer( GLcontext *ctx,
158 const struct prog_src_register *source,
159 const struct fp_machine *machine,
160 const struct gl_fragment_program *program )
161 {
162 switch (source->File) {
163 case PROGRAM_TEMPORARY:
164 ASSERT(source->Index < MAX_NV_FRAGMENT_PROGRAM_TEMPS);
165 return machine->Temporaries[source->Index];
166 case PROGRAM_INPUT:
167 ASSERT(source->Index < MAX_NV_FRAGMENT_PROGRAM_INPUTS);
168 return machine->Inputs[source->Index];
169 case PROGRAM_OUTPUT:
170 /* This is only for PRINT */
171 ASSERT(source->Index < MAX_NV_FRAGMENT_PROGRAM_OUTPUTS);
172 return machine->Outputs[source->Index];
173 case PROGRAM_LOCAL_PARAM:
174 ASSERT(source->Index < MAX_PROGRAM_LOCAL_PARAMS);
175 return program->Base.LocalParams[source->Index];
176 case PROGRAM_ENV_PARAM:
177 ASSERT(source->Index < MAX_NV_FRAGMENT_PROGRAM_PARAMS);
178 return ctx->FragmentProgram.Parameters[source->Index];
179 case PROGRAM_STATE_VAR:
180 /* Fallthrough */
181 case PROGRAM_CONSTANT:
182 /* Fallthrough */
183 case PROGRAM_NAMED_PARAM:
184 ASSERT(source->Index < (GLint) program->Base.Parameters->NumParameters);
185 return program->Base.Parameters->ParameterValues[source->Index];
186 default:
187 _mesa_problem(ctx, "Invalid input register file %d in fp "
188 "get_register_pointer", source->File);
189 return NULL;
190 }
191 }
192
193
194 /**
195 * Fetch a 4-element float vector from the given source register.
196 * Apply swizzling and negating as needed.
197 */
198 static void
199 fetch_vector4( GLcontext *ctx,
200 const struct prog_src_register *source,
201 const struct fp_machine *machine,
202 const struct gl_fragment_program *program,
203 GLfloat result[4] )
204 {
205 const GLfloat *src = get_register_pointer(ctx, source, machine, program);
206 ASSERT(src);
207
208 if (source->Swizzle == MAKE_SWIZZLE4(SWIZZLE_X, SWIZZLE_Y,
209 SWIZZLE_Z, SWIZZLE_W)) {
210 /* no swizzling */
211 COPY_4V(result, src);
212 }
213 else {
214 result[0] = src[GET_SWZ(source->Swizzle, 0)];
215 result[1] = src[GET_SWZ(source->Swizzle, 1)];
216 result[2] = src[GET_SWZ(source->Swizzle, 2)];
217 result[3] = src[GET_SWZ(source->Swizzle, 3)];
218 }
219
220 if (source->NegateBase) {
221 result[0] = -result[0];
222 result[1] = -result[1];
223 result[2] = -result[2];
224 result[3] = -result[3];
225 }
226 if (source->Abs) {
227 result[0] = FABSF(result[0]);
228 result[1] = FABSF(result[1]);
229 result[2] = FABSF(result[2]);
230 result[3] = FABSF(result[3]);
231 }
232 if (source->NegateAbs) {
233 result[0] = -result[0];
234 result[1] = -result[1];
235 result[2] = -result[2];
236 result[3] = -result[3];
237 }
238 }
239
240
241 /**
242 * Fetch the derivative with respect to X for the given register.
243 * \return GL_TRUE if it was easily computed or GL_FALSE if we
244 * need to execute another instance of the program (ugh)!
245 */
246 static GLboolean
247 fetch_vector4_deriv( GLcontext *ctx,
248 const struct prog_src_register *source,
249 const SWspan *span,
250 char xOrY, GLint column, GLfloat result[4] )
251 {
252 GLfloat src[4];
253
254 ASSERT(xOrY == 'X' || xOrY == 'Y');
255
256 switch (source->Index) {
257 case FRAG_ATTRIB_WPOS:
258 if (xOrY == 'X') {
259 src[0] = 1.0;
260 src[1] = 0.0;
261 src[2] = span->dzdx / ctx->DrawBuffer->_DepthMaxF;
262 src[3] = span->dwdx;
263 }
264 else {
265 src[0] = 0.0;
266 src[1] = 1.0;
267 src[2] = span->dzdy / ctx->DrawBuffer->_DepthMaxF;
268 src[3] = span->dwdy;
269 }
270 break;
271 case FRAG_ATTRIB_COL0:
272 if (xOrY == 'X') {
273 src[0] = span->drdx * (1.0F / CHAN_MAXF);
274 src[1] = span->dgdx * (1.0F / CHAN_MAXF);
275 src[2] = span->dbdx * (1.0F / CHAN_MAXF);
276 src[3] = span->dadx * (1.0F / CHAN_MAXF);
277 }
278 else {
279 src[0] = span->drdy * (1.0F / CHAN_MAXF);
280 src[1] = span->dgdy * (1.0F / CHAN_MAXF);
281 src[2] = span->dbdy * (1.0F / CHAN_MAXF);
282 src[3] = span->dady * (1.0F / CHAN_MAXF);
283 }
284 break;
285 case FRAG_ATTRIB_COL1:
286 if (xOrY == 'X') {
287 src[0] = span->dsrdx * (1.0F / CHAN_MAXF);
288 src[1] = span->dsgdx * (1.0F / CHAN_MAXF);
289 src[2] = span->dsbdx * (1.0F / CHAN_MAXF);
290 src[3] = 0.0; /* XXX need this */
291 }
292 else {
293 src[0] = span->dsrdy * (1.0F / CHAN_MAXF);
294 src[1] = span->dsgdy * (1.0F / CHAN_MAXF);
295 src[2] = span->dsbdy * (1.0F / CHAN_MAXF);
296 src[3] = 0.0; /* XXX need this */
297 }
298 break;
299 case FRAG_ATTRIB_FOGC:
300 if (xOrY == 'X') {
301 src[0] = span->dfogdx;
302 src[1] = 0.0;
303 src[2] = 0.0;
304 src[3] = 0.0;
305 }
306 else {
307 src[0] = span->dfogdy;
308 src[1] = 0.0;
309 src[2] = 0.0;
310 src[3] = 0.0;
311 }
312 break;
313 case FRAG_ATTRIB_TEX0:
314 case FRAG_ATTRIB_TEX1:
315 case FRAG_ATTRIB_TEX2:
316 case FRAG_ATTRIB_TEX3:
317 case FRAG_ATTRIB_TEX4:
318 case FRAG_ATTRIB_TEX5:
319 case FRAG_ATTRIB_TEX6:
320 case FRAG_ATTRIB_TEX7:
321 if (xOrY == 'X') {
322 const GLuint u = source->Index - FRAG_ATTRIB_TEX0;
323 /* this is a little tricky - I think I've got it right */
324 const GLfloat invQ = 1.0f / (span->tex[u][3]
325 + span->texStepX[u][3] * column);
326 src[0] = span->texStepX[u][0] * invQ;
327 src[1] = span->texStepX[u][1] * invQ;
328 src[2] = span->texStepX[u][2] * invQ;
329 src[3] = span->texStepX[u][3] * invQ;
330 }
331 else {
332 const GLuint u = source->Index - FRAG_ATTRIB_TEX0;
333 /* Tricky, as above, but in Y direction */
334 const GLfloat invQ = 1.0f / (span->tex[u][3] + span->texStepY[u][3]);
335 src[0] = span->texStepY[u][0] * invQ;
336 src[1] = span->texStepY[u][1] * invQ;
337 src[2] = span->texStepY[u][2] * invQ;
338 src[3] = span->texStepY[u][3] * invQ;
339 }
340 break;
341 default:
342 return GL_FALSE;
343 }
344
345 result[0] = src[GET_SWZ(source->Swizzle, 0)];
346 result[1] = src[GET_SWZ(source->Swizzle, 1)];
347 result[2] = src[GET_SWZ(source->Swizzle, 2)];
348 result[3] = src[GET_SWZ(source->Swizzle, 3)];
349
350 if (source->NegateBase) {
351 result[0] = -result[0];
352 result[1] = -result[1];
353 result[2] = -result[2];
354 result[3] = -result[3];
355 }
356 if (source->Abs) {
357 result[0] = FABSF(result[0]);
358 result[1] = FABSF(result[1]);
359 result[2] = FABSF(result[2]);
360 result[3] = FABSF(result[3]);
361 }
362 if (source->NegateAbs) {
363 result[0] = -result[0];
364 result[1] = -result[1];
365 result[2] = -result[2];
366 result[3] = -result[3];
367 }
368 return GL_TRUE;
369 }
370
371
372 /**
373 * As above, but only return result[0] element.
374 */
375 static void
376 fetch_vector1( GLcontext *ctx,
377 const struct prog_src_register *source,
378 const struct fp_machine *machine,
379 const struct gl_fragment_program *program,
380 GLfloat result[4] )
381 {
382 const GLfloat *src = get_register_pointer(ctx, source, machine, program);
383 ASSERT(src);
384
385 result[0] = src[GET_SWZ(source->Swizzle, 0)];
386
387 if (source->NegateBase) {
388 result[0] = -result[0];
389 }
390 if (source->Abs) {
391 result[0] = FABSF(result[0]);
392 }
393 if (source->NegateAbs) {
394 result[0] = -result[0];
395 }
396 }
397
398
399 /**
400 * Test value against zero and return GT, LT, EQ or UN if NaN.
401 */
402 static INLINE GLuint
403 generate_cc( float value )
404 {
405 if (value != value)
406 return COND_UN; /* NaN */
407 if (value > 0.0F)
408 return COND_GT;
409 if (value < 0.0F)
410 return COND_LT;
411 return COND_EQ;
412 }
413
414
415 /**
416 * Test if the ccMaskRule is satisfied by the given condition code.
417 * Used to mask destination writes according to the current condition code.
418 */
419 static INLINE GLboolean
420 test_cc(GLuint condCode, GLuint ccMaskRule)
421 {
422 switch (ccMaskRule) {
423 case COND_EQ: return (condCode == COND_EQ);
424 case COND_NE: return (condCode != COND_EQ);
425 case COND_LT: return (condCode == COND_LT);
426 case COND_GE: return (condCode == COND_GT || condCode == COND_EQ);
427 case COND_LE: return (condCode == COND_LT || condCode == COND_EQ);
428 case COND_GT: return (condCode == COND_GT);
429 case COND_TR: return GL_TRUE;
430 case COND_FL: return GL_FALSE;
431 default: return GL_TRUE;
432 }
433 }
434
435
436 /**
437 * Store 4 floats into a register. Observe the instructions saturate and
438 * set-condition-code flags.
439 */
440 static void
441 store_vector4( const struct prog_instruction *inst,
442 struct fp_machine *machine,
443 const GLfloat value[4] )
444 {
445 const struct prog_dst_register *dest = &(inst->DstReg);
446 const GLboolean clamp = inst->SaturateMode == SATURATE_ZERO_ONE;
447 GLfloat *dstReg;
448 GLfloat dummyReg[4];
449 GLfloat clampedValue[4];
450 GLuint writeMask = dest->WriteMask;
451
452 switch (dest->File) {
453 case PROGRAM_OUTPUT:
454 dstReg = machine->Outputs[dest->Index];
455 break;
456 case PROGRAM_TEMPORARY:
457 dstReg = machine->Temporaries[dest->Index];
458 break;
459 case PROGRAM_WRITE_ONLY:
460 dstReg = dummyReg;
461 return;
462 default:
463 _mesa_problem(NULL, "bad register file in store_vector4(fp)");
464 return;
465 }
466
467 #if 0
468 if (value[0] > 1.0e10 ||
469 IS_INF_OR_NAN(value[0]) ||
470 IS_INF_OR_NAN(value[1]) ||
471 IS_INF_OR_NAN(value[2]) ||
472 IS_INF_OR_NAN(value[3]) )
473 printf("store %g %g %g %g\n", value[0], value[1], value[2], value[3]);
474 #endif
475
476 if (clamp) {
477 clampedValue[0] = CLAMP(value[0], 0.0F, 1.0F);
478 clampedValue[1] = CLAMP(value[1], 0.0F, 1.0F);
479 clampedValue[2] = CLAMP(value[2], 0.0F, 1.0F);
480 clampedValue[3] = CLAMP(value[3], 0.0F, 1.0F);
481 value = clampedValue;
482 }
483
484 if (dest->CondMask != COND_TR) {
485 /* condition codes may turn off some writes */
486 if (writeMask & WRITEMASK_X) {
487 if (!test_cc(machine->CondCodes[GET_SWZ(dest->CondSwizzle, 0)],
488 dest->CondMask))
489 writeMask &= ~WRITEMASK_X;
490 }
491 if (writeMask & WRITEMASK_Y) {
492 if (!test_cc(machine->CondCodes[GET_SWZ(dest->CondSwizzle, 1)],
493 dest->CondMask))
494 writeMask &= ~WRITEMASK_Y;
495 }
496 if (writeMask & WRITEMASK_Z) {
497 if (!test_cc(machine->CondCodes[GET_SWZ(dest->CondSwizzle, 2)],
498 dest->CondMask))
499 writeMask &= ~WRITEMASK_Z;
500 }
501 if (writeMask & WRITEMASK_W) {
502 if (!test_cc(machine->CondCodes[GET_SWZ(dest->CondSwizzle, 3)],
503 dest->CondMask))
504 writeMask &= ~WRITEMASK_W;
505 }
506 }
507
508 if (writeMask & WRITEMASK_X)
509 dstReg[0] = value[0];
510 if (writeMask & WRITEMASK_Y)
511 dstReg[1] = value[1];
512 if (writeMask & WRITEMASK_Z)
513 dstReg[2] = value[2];
514 if (writeMask & WRITEMASK_W)
515 dstReg[3] = value[3];
516
517 if (inst->CondUpdate) {
518 if (writeMask & WRITEMASK_X)
519 machine->CondCodes[0] = generate_cc(value[0]);
520 if (writeMask & WRITEMASK_Y)
521 machine->CondCodes[1] = generate_cc(value[1]);
522 if (writeMask & WRITEMASK_Z)
523 machine->CondCodes[2] = generate_cc(value[2]);
524 if (writeMask & WRITEMASK_W)
525 machine->CondCodes[3] = generate_cc(value[3]);
526 }
527 }
528
529
530 /**
531 * Initialize a new machine state instance from an existing one, adding
532 * the partial derivatives onto the input registers.
533 * Used to implement DDX and DDY instructions in non-trivial cases.
534 */
535 static void
536 init_machine_deriv( GLcontext *ctx,
537 const struct fp_machine *machine,
538 const struct gl_fragment_program *program,
539 const SWspan *span, char xOrY,
540 struct fp_machine *dMachine )
541 {
542 GLuint u;
543
544 ASSERT(xOrY == 'X' || xOrY == 'Y');
545
546 /* copy existing machine */
547 _mesa_memcpy(dMachine, machine, sizeof(struct fp_machine));
548
549 if (program->Base.Target == GL_FRAGMENT_PROGRAM_NV) {
550 /* Clear temporary registers (undefined for ARB_f_p) */
551 _mesa_bzero( (void*) machine->Temporaries,
552 MAX_NV_FRAGMENT_PROGRAM_TEMPS * 4 * sizeof(GLfloat));
553 }
554
555 /* Add derivatives */
556 if (program->Base.InputsRead & (1 << FRAG_ATTRIB_WPOS)) {
557 GLfloat *wpos = (GLfloat*) machine->Inputs[FRAG_ATTRIB_WPOS];
558 if (xOrY == 'X') {
559 wpos[0] += 1.0F;
560 wpos[1] += 0.0F;
561 wpos[2] += span->dzdx;
562 wpos[3] += span->dwdx;
563 }
564 else {
565 wpos[0] += 0.0F;
566 wpos[1] += 1.0F;
567 wpos[2] += span->dzdy;
568 wpos[3] += span->dwdy;
569 }
570 }
571 if (program->Base.InputsRead & (1 << FRAG_ATTRIB_COL0)) {
572 GLfloat *col0 = (GLfloat*) machine->Inputs[FRAG_ATTRIB_COL0];
573 if (xOrY == 'X') {
574 col0[0] += span->drdx * (1.0F / CHAN_MAXF);
575 col0[1] += span->dgdx * (1.0F / CHAN_MAXF);
576 col0[2] += span->dbdx * (1.0F / CHAN_MAXF);
577 col0[3] += span->dadx * (1.0F / CHAN_MAXF);
578 }
579 else {
580 col0[0] += span->drdy * (1.0F / CHAN_MAXF);
581 col0[1] += span->dgdy * (1.0F / CHAN_MAXF);
582 col0[2] += span->dbdy * (1.0F / CHAN_MAXF);
583 col0[3] += span->dady * (1.0F / CHAN_MAXF);
584 }
585 }
586 if (program->Base.InputsRead & (1 << FRAG_ATTRIB_COL1)) {
587 GLfloat *col1 = (GLfloat*) machine->Inputs[FRAG_ATTRIB_COL1];
588 if (xOrY == 'X') {
589 col1[0] += span->dsrdx * (1.0F / CHAN_MAXF);
590 col1[1] += span->dsgdx * (1.0F / CHAN_MAXF);
591 col1[2] += span->dsbdx * (1.0F / CHAN_MAXF);
592 col1[3] += 0.0; /*XXX fix */
593 }
594 else {
595 col1[0] += span->dsrdy * (1.0F / CHAN_MAXF);
596 col1[1] += span->dsgdy * (1.0F / CHAN_MAXF);
597 col1[2] += span->dsbdy * (1.0F / CHAN_MAXF);
598 col1[3] += 0.0; /*XXX fix */
599 }
600 }
601 if (program->Base.InputsRead & (1 << FRAG_ATTRIB_FOGC)) {
602 GLfloat *fogc = (GLfloat*) machine->Inputs[FRAG_ATTRIB_FOGC];
603 if (xOrY == 'X') {
604 fogc[0] += span->dfogdx;
605 }
606 else {
607 fogc[0] += span->dfogdy;
608 }
609 }
610 for (u = 0; u < ctx->Const.MaxTextureCoordUnits; u++) {
611 if (program->Base.InputsRead & (1 << (FRAG_ATTRIB_TEX0 + u))) {
612 GLfloat *tex = (GLfloat*) machine->Inputs[FRAG_ATTRIB_TEX0 + u];
613 /* XXX perspective-correct interpolation */
614 if (xOrY == 'X') {
615 tex[0] += span->texStepX[u][0];
616 tex[1] += span->texStepX[u][1];
617 tex[2] += span->texStepX[u][2];
618 tex[3] += span->texStepX[u][3];
619 }
620 else {
621 tex[0] += span->texStepY[u][0];
622 tex[1] += span->texStepY[u][1];
623 tex[2] += span->texStepY[u][2];
624 tex[3] += span->texStepY[u][3];
625 }
626 }
627 }
628
629 /* init condition codes */
630 dMachine->CondCodes[0] = COND_EQ;
631 dMachine->CondCodes[1] = COND_EQ;
632 dMachine->CondCodes[2] = COND_EQ;
633 dMachine->CondCodes[3] = COND_EQ;
634 }
635
636
637 /**
638 * Execute the given vertex program.
639 * NOTE: we do everything in single-precision floating point; we don't
640 * currently observe the single/half/fixed-precision qualifiers.
641 * \param ctx - rendering context
642 * \param program - the fragment program to execute
643 * \param machine - machine state (register file)
644 * \param maxInst - max number of instructions to execute
645 * \return GL_TRUE if program completed or GL_FALSE if program executed KIL.
646 */
647 static GLboolean
648 execute_program( GLcontext *ctx,
649 const struct gl_fragment_program *program, GLuint maxInst,
650 struct fp_machine *machine, const SWspan *span,
651 GLuint column )
652 {
653 GLuint pc;
654
655 if (DEBUG_FRAG) {
656 printf("execute fragment program --------------------\n");
657 }
658
659 for (pc = 0; pc < maxInst; pc++) {
660 const struct prog_instruction *inst = program->Base.Instructions + pc;
661
662 if (ctx->FragmentProgram.CallbackEnabled &&
663 ctx->FragmentProgram.Callback) {
664 ctx->FragmentProgram.CurrentPosition = inst->StringPos;
665 ctx->FragmentProgram.Callback(program->Base.Target,
666 ctx->FragmentProgram.CallbackData);
667 }
668
669 if (DEBUG_FRAG) {
670 _mesa_print_instruction(inst);
671 }
672
673 switch (inst->Opcode) {
674 case OPCODE_ABS:
675 {
676 GLfloat a[4], result[4];
677 fetch_vector4( ctx, &inst->SrcReg[0], machine, program, a );
678 result[0] = FABSF(a[0]);
679 result[1] = FABSF(a[1]);
680 result[2] = FABSF(a[2]);
681 result[3] = FABSF(a[3]);
682 store_vector4( inst, machine, result );
683 }
684 break;
685 case OPCODE_ADD:
686 {
687 GLfloat a[4], b[4], result[4];
688 fetch_vector4( ctx, &inst->SrcReg[0], machine, program, a );
689 fetch_vector4( ctx, &inst->SrcReg[1], machine, program, b );
690 result[0] = a[0] + b[0];
691 result[1] = a[1] + b[1];
692 result[2] = a[2] + b[2];
693 result[3] = a[3] + b[3];
694 store_vector4( inst, machine, result );
695 if (DEBUG_FRAG) {
696 printf("ADD (%g %g %g %g) = (%g %g %g %g) + (%g %g %g %g)\n",
697 result[0], result[1], result[2], result[3],
698 a[0], a[1], a[2], a[3],
699 b[0], b[1], b[2], b[3]);
700 }
701 }
702 break;
703 case OPCODE_BRA: /* conditional branch */
704 {
705 /* NOTE: The return is conditional! */
706 const GLuint swizzle = inst->DstReg.CondSwizzle;
707 const GLuint condMask = inst->DstReg.CondMask;
708 if (test_cc(machine->CondCodes[GET_SWZ(swizzle, 0)], condMask) ||
709 test_cc(machine->CondCodes[GET_SWZ(swizzle, 1)], condMask) ||
710 test_cc(machine->CondCodes[GET_SWZ(swizzle, 2)], condMask) ||
711 test_cc(machine->CondCodes[GET_SWZ(swizzle, 3)], condMask)) {
712 /* take branch */
713 pc = inst->BranchTarget;
714 }
715 }
716 break;
717 case OPCODE_CAL: /* Call subroutine */
718 {
719 /* NOTE: The call is conditional! */
720 const GLuint swizzle = inst->DstReg.CondSwizzle;
721 const GLuint condMask = inst->DstReg.CondMask;
722 if (test_cc(machine->CondCodes[GET_SWZ(swizzle, 0)], condMask) ||
723 test_cc(machine->CondCodes[GET_SWZ(swizzle, 1)], condMask) ||
724 test_cc(machine->CondCodes[GET_SWZ(swizzle, 2)], condMask) ||
725 test_cc(machine->CondCodes[GET_SWZ(swizzle, 3)], condMask)) {
726 if (machine->StackDepth >= MAX_PROGRAM_CALL_DEPTH) {
727 return GL_TRUE; /* Per GL_NV_vertex_program2 spec */
728 }
729 machine->CallStack[machine->StackDepth++] = pc + 1;
730 pc = inst->BranchTarget;
731 }
732 }
733 break;
734 case OPCODE_CMP:
735 {
736 GLfloat a[4], b[4], c[4], result[4];
737 fetch_vector4( ctx, &inst->SrcReg[0], machine, program, a );
738 fetch_vector4( ctx, &inst->SrcReg[1], machine, program, b );
739 fetch_vector4( ctx, &inst->SrcReg[2], machine, program, c );
740 result[0] = a[0] < 0.0F ? b[0] : c[0];
741 result[1] = a[1] < 0.0F ? b[1] : c[1];
742 result[2] = a[2] < 0.0F ? b[2] : c[2];
743 result[3] = a[3] < 0.0F ? b[3] : c[3];
744 store_vector4( inst, machine, result );
745 }
746 break;
747 case OPCODE_COS:
748 {
749 GLfloat a[4], result[4];
750 fetch_vector1( ctx, &inst->SrcReg[0], machine, program, a );
751 result[0] = result[1] = result[2] = result[3]
752 = (GLfloat) _mesa_cos(a[0]);
753 store_vector4( inst, machine, result );
754 }
755 break;
756 case OPCODE_DDX: /* Partial derivative with respect to X */
757 {
758 GLfloat a[4], aNext[4], result[4];
759 struct fp_machine dMachine;
760 if (!fetch_vector4_deriv(ctx, &inst->SrcReg[0], span, 'X',
761 column, result)) {
762 /* This is tricky. Make a copy of the current machine state,
763 * increment the input registers by the dx or dy partial
764 * derivatives, then re-execute the program up to the
765 * preceeding instruction, then fetch the source register.
766 * Finally, find the difference in the register values for
767 * the original and derivative runs.
768 */
769 fetch_vector4( ctx, &inst->SrcReg[0], machine, program, a);
770 init_machine_deriv(ctx, machine, program, span,
771 'X', &dMachine);
772 execute_program(ctx, program, pc, &dMachine, span, column);
773 fetch_vector4( ctx, &inst->SrcReg[0], &dMachine, program, aNext );
774 result[0] = aNext[0] - a[0];
775 result[1] = aNext[1] - a[1];
776 result[2] = aNext[2] - a[2];
777 result[3] = aNext[3] - a[3];
778 }
779 store_vector4( inst, machine, result );
780 }
781 break;
782 case OPCODE_DDY: /* Partial derivative with respect to Y */
783 {
784 GLfloat a[4], aNext[4], result[4];
785 struct fp_machine dMachine;
786 if (!fetch_vector4_deriv(ctx, &inst->SrcReg[0], span, 'Y',
787 column, result)) {
788 init_machine_deriv(ctx, machine, program, span,
789 'Y', &dMachine);
790 fetch_vector4( ctx, &inst->SrcReg[0], machine, program, a);
791 execute_program(ctx, program, pc, &dMachine, span, column);
792 fetch_vector4( ctx, &inst->SrcReg[0], &dMachine, program, aNext );
793 result[0] = aNext[0] - a[0];
794 result[1] = aNext[1] - a[1];
795 result[2] = aNext[2] - a[2];
796 result[3] = aNext[3] - a[3];
797 }
798 store_vector4( inst, machine, result );
799 }
800 break;
801 case OPCODE_DP3:
802 {
803 GLfloat a[4], b[4], result[4];
804 fetch_vector4( ctx, &inst->SrcReg[0], machine, program, a );
805 fetch_vector4( ctx, &inst->SrcReg[1], machine, program, b );
806 result[0] = result[1] = result[2] = result[3] = DOT3(a, b);
807 store_vector4( inst, machine, result );
808 if (DEBUG_FRAG) {
809 printf("DP3 %g = (%g %g %g) . (%g %g %g)\n",
810 result[0], a[0], a[1], a[2], b[0], b[1], b[2]);
811 }
812 }
813 break;
814 case OPCODE_DP4:
815 {
816 GLfloat a[4], b[4], result[4];
817 fetch_vector4( ctx, &inst->SrcReg[0], machine, program, a );
818 fetch_vector4( ctx, &inst->SrcReg[1], machine, program, b );
819 result[0] = result[1] = result[2] = result[3] = DOT4(a,b);
820 store_vector4( inst, machine, result );
821 if (DEBUG_FRAG) {
822 printf("DP4 %g = (%g, %g %g %g) . (%g, %g %g %g)\n",
823 result[0], a[0], a[1], a[2], a[3],
824 b[0], b[1], b[2], b[3]);
825 }
826 }
827 break;
828 case OPCODE_DPH:
829 {
830 GLfloat a[4], b[4], result[4];
831 fetch_vector4( ctx, &inst->SrcReg[0], machine, program, a );
832 fetch_vector4( ctx, &inst->SrcReg[1], machine, program, b );
833 result[0] = result[1] = result[2] = result[3] =
834 a[0] * b[0] + a[1] * b[1] + a[2] * b[2] + b[3];
835 store_vector4( inst, machine, result );
836 }
837 break;
838 case OPCODE_DST: /* Distance vector */
839 {
840 GLfloat a[4], b[4], result[4];
841 fetch_vector4( ctx, &inst->SrcReg[0], machine, program, a );
842 fetch_vector4( ctx, &inst->SrcReg[1], machine, program, b );
843 result[0] = 1.0F;
844 result[1] = a[1] * b[1];
845 result[2] = a[2];
846 result[3] = b[3];
847 store_vector4( inst, machine, result );
848 }
849 break;
850 case OPCODE_EX2: /* Exponential base 2 */
851 {
852 GLfloat a[4], result[4];
853 fetch_vector1( ctx, &inst->SrcReg[0], machine, program, a );
854 result[0] = result[1] = result[2] = result[3] =
855 (GLfloat) _mesa_pow(2.0, a[0]);
856 store_vector4( inst, machine, result );
857 }
858 break;
859 case OPCODE_FLR:
860 {
861 GLfloat a[4], result[4];
862 fetch_vector4( ctx, &inst->SrcReg[0], machine, program, a );
863 result[0] = FLOORF(a[0]);
864 result[1] = FLOORF(a[1]);
865 result[2] = FLOORF(a[2]);
866 result[3] = FLOORF(a[3]);
867 store_vector4( inst, machine, result );
868 }
869 break;
870 case OPCODE_FRC:
871 {
872 GLfloat a[4], result[4];
873 fetch_vector4( ctx, &inst->SrcReg[0], machine, program, a );
874 result[0] = a[0] - FLOORF(a[0]);
875 result[1] = a[1] - FLOORF(a[1]);
876 result[2] = a[2] - FLOORF(a[2]);
877 result[3] = a[3] - FLOORF(a[3]);
878 store_vector4( inst, machine, result );
879 }
880 break;
881 case OPCODE_KIL_NV: /* NV_f_p only */
882 {
883 const GLuint swizzle = inst->DstReg.CondSwizzle;
884 const GLuint condMask = inst->DstReg.CondMask;
885 if (test_cc(machine->CondCodes[GET_SWZ(swizzle, 0)], condMask) ||
886 test_cc(machine->CondCodes[GET_SWZ(swizzle, 1)], condMask) ||
887 test_cc(machine->CondCodes[GET_SWZ(swizzle, 2)], condMask) ||
888 test_cc(machine->CondCodes[GET_SWZ(swizzle, 3)], condMask)) {
889 return GL_FALSE;
890 }
891 }
892 break;
893 case OPCODE_KIL: /* ARB_f_p only */
894 {
895 GLfloat a[4];
896 fetch_vector4( ctx, &inst->SrcReg[0], machine, program, a );
897 if (a[0] < 0.0F || a[1] < 0.0F || a[2] < 0.0F || a[3] < 0.0F) {
898 return GL_FALSE;
899 }
900 }
901 break;
902 case OPCODE_LG2: /* log base 2 */
903 {
904 GLfloat a[4], result[4];
905 fetch_vector1( ctx, &inst->SrcReg[0], machine, program, a );
906 result[0] = result[1] = result[2] = result[3] = LOG2(a[0]);
907 store_vector4( inst, machine, result );
908 }
909 break;
910 case OPCODE_LIT:
911 {
912 const GLfloat epsilon = 1.0F / 256.0F; /* from NV VP spec */
913 GLfloat a[4], result[4];
914 fetch_vector4( ctx, &inst->SrcReg[0], machine, program, a );
915 a[0] = MAX2(a[0], 0.0F);
916 a[1] = MAX2(a[1], 0.0F);
917 /* XXX ARB version clamps a[3], NV version doesn't */
918 a[3] = CLAMP(a[3], -(128.0F - epsilon), (128.0F - epsilon));
919 result[0] = 1.0F;
920 result[1] = a[0];
921 /* XXX we could probably just use pow() here */
922 if (a[0] > 0.0F) {
923 if (a[1] == 0.0 && a[3] == 0.0)
924 result[2] = 1.0;
925 else
926 result[2] = EXPF(a[3] * LOGF(a[1]));
927 }
928 else {
929 result[2] = 0.0;
930 }
931 result[3] = 1.0F;
932 store_vector4( inst, machine, result );
933 if (DEBUG_FRAG) {
934 printf("LIT (%g %g %g %g) : (%g %g %g %g)\n",
935 result[0], result[1], result[2], result[3],
936 a[0], a[1], a[2], a[3]);
937 }
938 }
939 break;
940 case OPCODE_LRP:
941 {
942 GLfloat a[4], b[4], c[4], result[4];
943 fetch_vector4( ctx, &inst->SrcReg[0], machine, program, a );
944 fetch_vector4( ctx, &inst->SrcReg[1], machine, program, b );
945 fetch_vector4( ctx, &inst->SrcReg[2], machine, program, c );
946 result[0] = a[0] * b[0] + (1.0F - a[0]) * c[0];
947 result[1] = a[1] * b[1] + (1.0F - a[1]) * c[1];
948 result[2] = a[2] * b[2] + (1.0F - a[2]) * c[2];
949 result[3] = a[3] * b[3] + (1.0F - a[3]) * c[3];
950 store_vector4( inst, machine, result );
951 if (DEBUG_FRAG) {
952 printf("LRP (%g %g %g %g) = (%g %g %g %g), "
953 "(%g %g %g %g), (%g %g %g %g)\n",
954 result[0], result[1], result[2], result[3],
955 a[0], a[1], a[2], a[3],
956 b[0], b[1], b[2], b[3],
957 c[0], c[1], c[2], c[3]);
958 }
959 }
960 break;
961 case OPCODE_MAD:
962 {
963 GLfloat a[4], b[4], c[4], result[4];
964 fetch_vector4( ctx, &inst->SrcReg[0], machine, program, a );
965 fetch_vector4( ctx, &inst->SrcReg[1], machine, program, b );
966 fetch_vector4( ctx, &inst->SrcReg[2], machine, program, c );
967 result[0] = a[0] * b[0] + c[0];
968 result[1] = a[1] * b[1] + c[1];
969 result[2] = a[2] * b[2] + c[2];
970 result[3] = a[3] * b[3] + c[3];
971 store_vector4( inst, machine, result );
972 if (DEBUG_FRAG) {
973 printf("MAD (%g %g %g %g) = (%g %g %g %g) * "
974 "(%g %g %g %g) + (%g %g %g %g)\n",
975 result[0], result[1], result[2], result[3],
976 a[0], a[1], a[2], a[3],
977 b[0], b[1], b[2], b[3],
978 c[0], c[1], c[2], c[3]);
979 }
980 }
981 break;
982 case OPCODE_MAX:
983 {
984 GLfloat a[4], b[4], result[4];
985 fetch_vector4( ctx, &inst->SrcReg[0], machine, program, a );
986 fetch_vector4( ctx, &inst->SrcReg[1], machine, program, b );
987 result[0] = MAX2(a[0], b[0]);
988 result[1] = MAX2(a[1], b[1]);
989 result[2] = MAX2(a[2], b[2]);
990 result[3] = MAX2(a[3], b[3]);
991 store_vector4( inst, machine, result );
992 if (DEBUG_FRAG) {
993 printf("MAX (%g %g %g %g) = (%g %g %g %g), (%g %g %g %g)\n",
994 result[0], result[1], result[2], result[3],
995 a[0], a[1], a[2], a[3],
996 b[0], b[1], b[2], b[3]);
997 }
998 }
999 break;
1000 case OPCODE_MIN:
1001 {
1002 GLfloat a[4], b[4], result[4];
1003 fetch_vector4( ctx, &inst->SrcReg[0], machine, program, a );
1004 fetch_vector4( ctx, &inst->SrcReg[1], machine, program, b );
1005 result[0] = MIN2(a[0], b[0]);
1006 result[1] = MIN2(a[1], b[1]);
1007 result[2] = MIN2(a[2], b[2]);
1008 result[3] = MIN2(a[3], b[3]);
1009 store_vector4( inst, machine, result );
1010 }
1011 break;
1012 case OPCODE_MOV:
1013 {
1014 GLfloat result[4];
1015 fetch_vector4( ctx, &inst->SrcReg[0], machine, program, result );
1016 store_vector4( inst, machine, result );
1017 if (DEBUG_FRAG) {
1018 printf("MOV (%g %g %g %g)\n",
1019 result[0], result[1], result[2], result[3]);
1020 }
1021 }
1022 break;
1023 case OPCODE_MUL:
1024 {
1025 GLfloat a[4], b[4], result[4];
1026 fetch_vector4( ctx, &inst->SrcReg[0], machine, program, a );
1027 fetch_vector4( ctx, &inst->SrcReg[1], machine, program, b );
1028 result[0] = a[0] * b[0];
1029 result[1] = a[1] * b[1];
1030 result[2] = a[2] * b[2];
1031 result[3] = a[3] * b[3];
1032 store_vector4( inst, machine, result );
1033 if (DEBUG_FRAG) {
1034 printf("MUL (%g %g %g %g) = (%g %g %g %g) * (%g %g %g %g)\n",
1035 result[0], result[1], result[2], result[3],
1036 a[0], a[1], a[2], a[3],
1037 b[0], b[1], b[2], b[3]);
1038 }
1039 }
1040 break;
1041 case OPCODE_PK2H: /* pack two 16-bit floats in one 32-bit float */
1042 {
1043 GLfloat a[4], result[4];
1044 GLhalfNV hx, hy;
1045 GLuint *rawResult = (GLuint *) result;
1046 GLuint twoHalves;
1047 fetch_vector4( ctx, &inst->SrcReg[0], machine, program, a );
1048 hx = _mesa_float_to_half(a[0]);
1049 hy = _mesa_float_to_half(a[1]);
1050 twoHalves = hx | (hy << 16);
1051 rawResult[0] = rawResult[1] = rawResult[2] = rawResult[3]
1052 = twoHalves;
1053 store_vector4( inst, machine, result );
1054 }
1055 break;
1056 case OPCODE_PK2US: /* pack two GLushorts into one 32-bit float */
1057 {
1058 GLfloat a[4], result[4];
1059 GLuint usx, usy, *rawResult = (GLuint *) result;
1060 fetch_vector4( ctx, &inst->SrcReg[0], machine, program, a );
1061 a[0] = CLAMP(a[0], 0.0F, 1.0F);
1062 a[1] = CLAMP(a[1], 0.0F, 1.0F);
1063 usx = IROUND(a[0] * 65535.0F);
1064 usy = IROUND(a[1] * 65535.0F);
1065 rawResult[0] = rawResult[1] = rawResult[2] = rawResult[3]
1066 = usx | (usy << 16);
1067 store_vector4( inst, machine, result );
1068 }
1069 break;
1070 case OPCODE_PK4B: /* pack four GLbytes into one 32-bit float */
1071 {
1072 GLfloat a[4], result[4];
1073 GLuint ubx, uby, ubz, ubw, *rawResult = (GLuint *) result;
1074 fetch_vector4( ctx, &inst->SrcReg[0], machine, program, a );
1075 a[0] = CLAMP(a[0], -128.0F / 127.0F, 1.0F);
1076 a[1] = CLAMP(a[1], -128.0F / 127.0F, 1.0F);
1077 a[2] = CLAMP(a[2], -128.0F / 127.0F, 1.0F);
1078 a[3] = CLAMP(a[3], -128.0F / 127.0F, 1.0F);
1079 ubx = IROUND(127.0F * a[0] + 128.0F);
1080 uby = IROUND(127.0F * a[1] + 128.0F);
1081 ubz = IROUND(127.0F * a[2] + 128.0F);
1082 ubw = IROUND(127.0F * a[3] + 128.0F);
1083 rawResult[0] = rawResult[1] = rawResult[2] = rawResult[3]
1084 = ubx | (uby << 8) | (ubz << 16) | (ubw << 24);
1085 store_vector4( inst, machine, result );
1086 }
1087 break;
1088 case OPCODE_PK4UB: /* pack four GLubytes into one 32-bit float */
1089 {
1090 GLfloat a[4], result[4];
1091 GLuint ubx, uby, ubz, ubw, *rawResult = (GLuint *) result;
1092 fetch_vector4( ctx, &inst->SrcReg[0], machine, program, a );
1093 a[0] = CLAMP(a[0], 0.0F, 1.0F);
1094 a[1] = CLAMP(a[1], 0.0F, 1.0F);
1095 a[2] = CLAMP(a[2], 0.0F, 1.0F);
1096 a[3] = CLAMP(a[3], 0.0F, 1.0F);
1097 ubx = IROUND(255.0F * a[0]);
1098 uby = IROUND(255.0F * a[1]);
1099 ubz = IROUND(255.0F * a[2]);
1100 ubw = IROUND(255.0F * a[3]);
1101 rawResult[0] = rawResult[1] = rawResult[2] = rawResult[3]
1102 = ubx | (uby << 8) | (ubz << 16) | (ubw << 24);
1103 store_vector4( inst, machine, result );
1104 }
1105 break;
1106 case OPCODE_POW:
1107 {
1108 GLfloat a[4], b[4], result[4];
1109 fetch_vector1( ctx, &inst->SrcReg[0], machine, program, a );
1110 fetch_vector1( ctx, &inst->SrcReg[1], machine, program, b );
1111 result[0] = result[1] = result[2] = result[3]
1112 = (GLfloat)_mesa_pow(a[0], b[0]);
1113 store_vector4( inst, machine, result );
1114 }
1115 break;
1116 case OPCODE_RCP:
1117 {
1118 GLfloat a[4], result[4];
1119 fetch_vector1( ctx, &inst->SrcReg[0], machine, program, a );
1120 if (DEBUG_FRAG) {
1121 if (a[0] == 0)
1122 printf("RCP(0)\n");
1123 else if (IS_INF_OR_NAN(a[0]))
1124 printf("RCP(inf)\n");
1125 }
1126 result[0] = result[1] = result[2] = result[3] = 1.0F / a[0];
1127 store_vector4( inst, machine, result );
1128 }
1129 break;
1130 case OPCODE_RET: /* return from subroutine */
1131 {
1132 /* NOTE: The return is conditional! */
1133 const GLuint swizzle = inst->DstReg.CondSwizzle;
1134 const GLuint condMask = inst->DstReg.CondMask;
1135 if (test_cc(machine->CondCodes[GET_SWZ(swizzle, 0)], condMask) ||
1136 test_cc(machine->CondCodes[GET_SWZ(swizzle, 1)], condMask) ||
1137 test_cc(machine->CondCodes[GET_SWZ(swizzle, 2)], condMask) ||
1138 test_cc(machine->CondCodes[GET_SWZ(swizzle, 3)], condMask)) {
1139 if (machine->StackDepth == 0) {
1140 return GL_TRUE; /* Per GL_NV_vertex_program2 spec */
1141 }
1142 pc = machine->CallStack[--machine->StackDepth];
1143 }
1144 }
1145 break;
1146 case OPCODE_RFL: /* reflection vector */
1147 {
1148 GLfloat axis[4], dir[4], result[4], tmpX, tmpW;
1149 fetch_vector4( ctx, &inst->SrcReg[0], machine, program, axis );
1150 fetch_vector4( ctx, &inst->SrcReg[1], machine, program, dir );
1151 tmpW = DOT3(axis, axis);
1152 tmpX = (2.0F * DOT3(axis, dir)) / tmpW;
1153 result[0] = tmpX * axis[0] - dir[0];
1154 result[1] = tmpX * axis[1] - dir[1];
1155 result[2] = tmpX * axis[2] - dir[2];
1156 /* result[3] is never written! XXX enforce in parser! */
1157 store_vector4( inst, machine, result );
1158 }
1159 break;
1160 case OPCODE_RSQ: /* 1 / sqrt() */
1161 {
1162 GLfloat a[4], result[4];
1163 fetch_vector1( ctx, &inst->SrcReg[0], machine, program, a );
1164 a[0] = FABSF(a[0]);
1165 result[0] = result[1] = result[2] = result[3] = INV_SQRTF(a[0]);
1166 store_vector4( inst, machine, result );
1167 if (DEBUG_FRAG) {
1168 printf("RSQ %g = 1/sqrt(|%g|)\n", result[0], a[0]);
1169 }
1170 }
1171 break;
1172 case OPCODE_SCS: /* sine and cos */
1173 {
1174 GLfloat a[4], result[4];
1175 fetch_vector1( ctx, &inst->SrcReg[0], machine, program, a );
1176 result[0] = (GLfloat)_mesa_cos(a[0]);
1177 result[1] = (GLfloat)_mesa_sin(a[0]);
1178 result[2] = 0.0; /* undefined! */
1179 result[3] = 0.0; /* undefined! */
1180 store_vector4( inst, machine, result );
1181 }
1182 break;
1183 case OPCODE_SEQ: /* set on equal */
1184 {
1185 GLfloat a[4], b[4], result[4];
1186 fetch_vector4( ctx, &inst->SrcReg[0], machine, program, a );
1187 fetch_vector4( ctx, &inst->SrcReg[1], machine, program, b );
1188 result[0] = (a[0] == b[0]) ? 1.0F : 0.0F;
1189 result[1] = (a[1] == b[1]) ? 1.0F : 0.0F;
1190 result[2] = (a[2] == b[2]) ? 1.0F : 0.0F;
1191 result[3] = (a[3] == b[3]) ? 1.0F : 0.0F;
1192 store_vector4( inst, machine, result );
1193 }
1194 break;
1195 case OPCODE_SFL: /* set false, operands ignored */
1196 {
1197 static const GLfloat result[4] = { 0.0F, 0.0F, 0.0F, 0.0F };
1198 store_vector4( inst, machine, result );
1199 }
1200 break;
1201 case OPCODE_SGE: /* set on greater or equal */
1202 {
1203 GLfloat a[4], b[4], result[4];
1204 fetch_vector4( ctx, &inst->SrcReg[0], machine, program, a );
1205 fetch_vector4( ctx, &inst->SrcReg[1], machine, program, b );
1206 result[0] = (a[0] >= b[0]) ? 1.0F : 0.0F;
1207 result[1] = (a[1] >= b[1]) ? 1.0F : 0.0F;
1208 result[2] = (a[2] >= b[2]) ? 1.0F : 0.0F;
1209 result[3] = (a[3] >= b[3]) ? 1.0F : 0.0F;
1210 store_vector4( inst, machine, result );
1211 }
1212 break;
1213 case OPCODE_SGT: /* set on greater */
1214 {
1215 GLfloat a[4], b[4], result[4];
1216 fetch_vector4( ctx, &inst->SrcReg[0], machine, program, a );
1217 fetch_vector4( ctx, &inst->SrcReg[1], machine, program, b );
1218 result[0] = (a[0] > b[0]) ? 1.0F : 0.0F;
1219 result[1] = (a[1] > b[1]) ? 1.0F : 0.0F;
1220 result[2] = (a[2] > b[2]) ? 1.0F : 0.0F;
1221 result[3] = (a[3] > b[3]) ? 1.0F : 0.0F;
1222 store_vector4( inst, machine, result );
1223 }
1224 break;
1225 case OPCODE_SIN:
1226 {
1227 GLfloat a[4], result[4];
1228 fetch_vector1( ctx, &inst->SrcReg[0], machine, program, a );
1229 result[0] = result[1] = result[2] = result[3]
1230 = (GLfloat) _mesa_sin(a[0]);
1231 store_vector4( inst, machine, result );
1232 }
1233 break;
1234 case OPCODE_SLE: /* set on less or equal */
1235 {
1236 GLfloat a[4], b[4], result[4];
1237 fetch_vector4( ctx, &inst->SrcReg[0], machine, program, a );
1238 fetch_vector4( ctx, &inst->SrcReg[1], machine, program, b );
1239 result[0] = (a[0] <= b[0]) ? 1.0F : 0.0F;
1240 result[1] = (a[1] <= b[1]) ? 1.0F : 0.0F;
1241 result[2] = (a[2] <= b[2]) ? 1.0F : 0.0F;
1242 result[3] = (a[3] <= b[3]) ? 1.0F : 0.0F;
1243 store_vector4( inst, machine, result );
1244 }
1245 break;
1246 case OPCODE_SLT: /* set on less */
1247 {
1248 GLfloat a[4], b[4], result[4];
1249 fetch_vector4( ctx, &inst->SrcReg[0], machine, program, a );
1250 fetch_vector4( ctx, &inst->SrcReg[1], machine, program, b );
1251 result[0] = (a[0] < b[0]) ? 1.0F : 0.0F;
1252 result[1] = (a[1] < b[1]) ? 1.0F : 0.0F;
1253 result[2] = (a[2] < b[2]) ? 1.0F : 0.0F;
1254 result[3] = (a[3] < b[3]) ? 1.0F : 0.0F;
1255 store_vector4( inst, machine, result );
1256 }
1257 break;
1258 case OPCODE_SNE: /* set on not equal */
1259 {
1260 GLfloat a[4], b[4], result[4];
1261 fetch_vector4( ctx, &inst->SrcReg[0], machine, program, a );
1262 fetch_vector4( ctx, &inst->SrcReg[1], machine, program, b );
1263 result[0] = (a[0] != b[0]) ? 1.0F : 0.0F;
1264 result[1] = (a[1] != b[1]) ? 1.0F : 0.0F;
1265 result[2] = (a[2] != b[2]) ? 1.0F : 0.0F;
1266 result[3] = (a[3] != b[3]) ? 1.0F : 0.0F;
1267 store_vector4( inst, machine, result );
1268 }
1269 break;
1270 case OPCODE_STR: /* set true, operands ignored */
1271 {
1272 static const GLfloat result[4] = { 1.0F, 1.0F, 1.0F, 1.0F };
1273 store_vector4( inst, machine, result );
1274 }
1275 break;
1276 case OPCODE_SUB:
1277 {
1278 GLfloat a[4], b[4], result[4];
1279 fetch_vector4( ctx, &inst->SrcReg[0], machine, program, a );
1280 fetch_vector4( ctx, &inst->SrcReg[1], machine, program, b );
1281 result[0] = a[0] - b[0];
1282 result[1] = a[1] - b[1];
1283 result[2] = a[2] - b[2];
1284 result[3] = a[3] - b[3];
1285 store_vector4( inst, machine, result );
1286 if (DEBUG_FRAG) {
1287 printf("SUB (%g %g %g %g) = (%g %g %g %g) - (%g %g %g %g)\n",
1288 result[0], result[1], result[2], result[3],
1289 a[0], a[1], a[2], a[3], b[0], b[1], b[2], b[3]);
1290 }
1291 }
1292 break;
1293 case OPCODE_SWZ: /* extended swizzle */
1294 {
1295 const struct prog_src_register *source = &inst->SrcReg[0];
1296 const GLfloat *src = get_register_pointer(ctx, source,
1297 machine, program);
1298 GLfloat result[4];
1299 GLuint i;
1300 for (i = 0; i < 4; i++) {
1301 const GLuint swz = GET_SWZ(source->Swizzle, i);
1302 if (swz == SWIZZLE_ZERO)
1303 result[i] = 0.0;
1304 else if (swz == SWIZZLE_ONE)
1305 result[i] = 1.0;
1306 else {
1307 ASSERT(swz >= 0);
1308 ASSERT(swz <= 3);
1309 result[i] = src[swz];
1310 }
1311 if (source->NegateBase & (1 << i))
1312 result[i] = -result[i];
1313 }
1314 store_vector4( inst, machine, result );
1315 }
1316 break;
1317 case OPCODE_TEX: /* Both ARB and NV frag prog */
1318 /* Texel lookup */
1319 {
1320 /* Note: only use the precomputed lambda value when we're
1321 * sampling texture unit [K] with texcoord[K].
1322 * Otherwise, the lambda value may have no relation to the
1323 * instruction's texcoord or texture image. Using the wrong
1324 * lambda is usually bad news.
1325 * The rest of the time, just use zero (until we get a more
1326 * sophisticated way of computing lambda).
1327 */
1328 GLfloat coord[4], color[4], lambda;
1329 if (inst->SrcReg[0].File == PROGRAM_INPUT &&
1330 inst->SrcReg[0].Index == FRAG_ATTRIB_TEX0+inst->TexSrcUnit)
1331 lambda = span->array->lambda[inst->TexSrcUnit][column];
1332 else
1333 lambda = 0.0;
1334 fetch_vector4(ctx, &inst->SrcReg[0], machine, program, coord);
1335 fetch_texel( ctx, coord, lambda, inst->TexSrcUnit, color );
1336 if (DEBUG_FRAG) {
1337 printf("TEX (%g, %g, %g, %g) = texture[%d][%g, %g, %g, %g], "
1338 "lod %f\n",
1339 color[0], color[1], color[2], color[3],
1340 inst->TexSrcUnit,
1341 coord[0], coord[1], coord[2], coord[3], lambda);
1342 }
1343 store_vector4( inst, machine, color );
1344 }
1345 break;
1346 case OPCODE_TXB: /* GL_ARB_fragment_program only */
1347 /* Texel lookup with LOD bias */
1348 {
1349 GLfloat coord[4], color[4], lambda, bias;
1350 if (inst->SrcReg[0].File == PROGRAM_INPUT &&
1351 inst->SrcReg[0].Index == FRAG_ATTRIB_TEX0+inst->TexSrcUnit)
1352 lambda = span->array->lambda[inst->TexSrcUnit][column];
1353 else
1354 lambda = 0.0;
1355 fetch_vector4(ctx, &inst->SrcReg[0], machine, program, coord);
1356 /* coord[3] is the bias to add to lambda */
1357 bias = ctx->Texture.Unit[inst->TexSrcUnit].LodBias
1358 + ctx->Texture.Unit[inst->TexSrcUnit]._Current->LodBias
1359 + coord[3];
1360 fetch_texel(ctx, coord, lambda + bias, inst->TexSrcUnit, color);
1361 store_vector4( inst, machine, color );
1362 }
1363 break;
1364 case OPCODE_TXD: /* GL_NV_fragment_program only */
1365 /* Texture lookup w/ partial derivatives for LOD */
1366 {
1367 GLfloat texcoord[4], dtdx[4], dtdy[4], color[4];
1368 fetch_vector4( ctx, &inst->SrcReg[0], machine, program, texcoord );
1369 fetch_vector4( ctx, &inst->SrcReg[1], machine, program, dtdx );
1370 fetch_vector4( ctx, &inst->SrcReg[2], machine, program, dtdy );
1371 fetch_texel_deriv( ctx, texcoord, dtdx, dtdy, inst->TexSrcUnit,
1372 color );
1373 store_vector4( inst, machine, color );
1374 }
1375 break;
1376 case OPCODE_TXP: /* GL_ARB_fragment_program only */
1377 /* Texture lookup w/ projective divide */
1378 {
1379 GLfloat texcoord[4], color[4], lambda;
1380 if (inst->SrcReg[0].File == PROGRAM_INPUT &&
1381 inst->SrcReg[0].Index == FRAG_ATTRIB_TEX0+inst->TexSrcUnit)
1382 lambda = span->array->lambda[inst->TexSrcUnit][column];
1383 else
1384 lambda = 0.0;
1385 fetch_vector4(ctx, &inst->SrcReg[0], machine, program,texcoord);
1386 /* Not so sure about this test - if texcoord[3] is
1387 * zero, we'd probably be fine except for an ASSERT in
1388 * IROUND_POS() which gets triggered by the inf values created.
1389 */
1390 if (texcoord[3] != 0.0) {
1391 texcoord[0] /= texcoord[3];
1392 texcoord[1] /= texcoord[3];
1393 texcoord[2] /= texcoord[3];
1394 }
1395 fetch_texel( ctx, texcoord, lambda, inst->TexSrcUnit, color );
1396 store_vector4( inst, machine, color );
1397 }
1398 break;
1399 case OPCODE_TXP_NV: /* GL_NV_fragment_program only */
1400 /* Texture lookup w/ projective divide */
1401 {
1402 GLfloat texcoord[4], color[4], lambda;
1403 if (inst->SrcReg[0].File == PROGRAM_INPUT &&
1404 inst->SrcReg[0].Index == FRAG_ATTRIB_TEX0+inst->TexSrcUnit)
1405 lambda = span->array->lambda[inst->TexSrcUnit][column];
1406 else
1407 lambda = 0.0;
1408 fetch_vector4(ctx, &inst->SrcReg[0], machine, program,texcoord);
1409 if (inst->TexSrcTarget != TEXTURE_CUBE_INDEX &&
1410 texcoord[3] != 0.0) {
1411 texcoord[0] /= texcoord[3];
1412 texcoord[1] /= texcoord[3];
1413 texcoord[2] /= texcoord[3];
1414 }
1415 fetch_texel( ctx, texcoord, lambda, inst->TexSrcUnit, color );
1416 store_vector4( inst, machine, color );
1417 }
1418 break;
1419 case OPCODE_UP2H: /* unpack two 16-bit floats */
1420 {
1421 GLfloat a[4], result[4];
1422 const GLuint *rawBits = (const GLuint *) a;
1423 GLhalfNV hx, hy;
1424 fetch_vector1( ctx, &inst->SrcReg[0], machine, program, a );
1425 hx = rawBits[0] & 0xffff;
1426 hy = rawBits[0] >> 16;
1427 result[0] = result[2] = _mesa_half_to_float(hx);
1428 result[1] = result[3] = _mesa_half_to_float(hy);
1429 store_vector4( inst, machine, result );
1430 }
1431 break;
1432 case OPCODE_UP2US: /* unpack two GLushorts */
1433 {
1434 GLfloat a[4], result[4];
1435 const GLuint *rawBits = (const GLuint *) a;
1436 GLushort usx, usy;
1437 fetch_vector1( ctx, &inst->SrcReg[0], machine, program, a );
1438 usx = rawBits[0] & 0xffff;
1439 usy = rawBits[0] >> 16;
1440 result[0] = result[2] = usx * (1.0f / 65535.0f);
1441 result[1] = result[3] = usy * (1.0f / 65535.0f);
1442 store_vector4( inst, machine, result );
1443 }
1444 break;
1445 case OPCODE_UP4B: /* unpack four GLbytes */
1446 {
1447 GLfloat a[4], result[4];
1448 const GLuint *rawBits = (const GLuint *) a;
1449 fetch_vector1( ctx, &inst->SrcReg[0], machine, program, a );
1450 result[0] = (((rawBits[0] >> 0) & 0xff) - 128) / 127.0F;
1451 result[1] = (((rawBits[0] >> 8) & 0xff) - 128) / 127.0F;
1452 result[2] = (((rawBits[0] >> 16) & 0xff) - 128) / 127.0F;
1453 result[3] = (((rawBits[0] >> 24) & 0xff) - 128) / 127.0F;
1454 store_vector4( inst, machine, result );
1455 }
1456 break;
1457 case OPCODE_UP4UB: /* unpack four GLubytes */
1458 {
1459 GLfloat a[4], result[4];
1460 const GLuint *rawBits = (const GLuint *) a;
1461 fetch_vector1( ctx, &inst->SrcReg[0], machine, program, a );
1462 result[0] = ((rawBits[0] >> 0) & 0xff) / 255.0F;
1463 result[1] = ((rawBits[0] >> 8) & 0xff) / 255.0F;
1464 result[2] = ((rawBits[0] >> 16) & 0xff) / 255.0F;
1465 result[3] = ((rawBits[0] >> 24) & 0xff) / 255.0F;
1466 store_vector4( inst, machine, result );
1467 }
1468 break;
1469 case OPCODE_XPD: /* cross product */
1470 {
1471 GLfloat a[4], b[4], result[4];
1472 fetch_vector4( ctx, &inst->SrcReg[0], machine, program, a );
1473 fetch_vector4( ctx, &inst->SrcReg[1], machine, program, b );
1474 result[0] = a[1] * b[2] - a[2] * b[1];
1475 result[1] = a[2] * b[0] - a[0] * b[2];
1476 result[2] = a[0] * b[1] - a[1] * b[0];
1477 result[3] = 1.0;
1478 store_vector4( inst, machine, result );
1479 }
1480 break;
1481 case OPCODE_X2D: /* 2-D matrix transform */
1482 {
1483 GLfloat a[4], b[4], c[4], result[4];
1484 fetch_vector4( ctx, &inst->SrcReg[0], machine, program, a );
1485 fetch_vector4( ctx, &inst->SrcReg[1], machine, program, b );
1486 fetch_vector4( ctx, &inst->SrcReg[2], machine, program, c );
1487 result[0] = a[0] + b[0] * c[0] + b[1] * c[1];
1488 result[1] = a[1] + b[0] * c[2] + b[1] * c[3];
1489 result[2] = a[2] + b[0] * c[0] + b[1] * c[1];
1490 result[3] = a[3] + b[0] * c[2] + b[1] * c[3];
1491 store_vector4( inst, machine, result );
1492 }
1493 break;
1494 case OPCODE_PRINT:
1495 {
1496 if (inst->SrcReg[0].File != -1) {
1497 GLfloat a[4];
1498 fetch_vector4( ctx, &inst->SrcReg[0], machine, program, a);
1499 _mesa_printf("%s%g, %g, %g, %g\n", (const char *) inst->Data,
1500 a[0], a[1], a[2], a[3]);
1501 }
1502 else {
1503 _mesa_printf("%s\n", (const char *) inst->Data);
1504 }
1505 }
1506 break;
1507 case OPCODE_END:
1508 return GL_TRUE;
1509 default:
1510 _mesa_problem(ctx, "Bad opcode %d in _mesa_exec_fragment_program",
1511 inst->Opcode);
1512 return GL_TRUE; /* return value doesn't matter */
1513 }
1514 }
1515 return GL_TRUE;
1516 }
1517
1518
1519 /**
1520 * Initialize the virtual fragment program machine state prior to running
1521 * fragment program on a fragment. This involves initializing the input
1522 * registers, condition codes, etc.
1523 * \param machine the virtual machine state to init
1524 * \param program the fragment program we're about to run
1525 * \param span the span of pixels we'll operate on
1526 * \param col which element (column) of the span we'll operate on
1527 */
1528 static void
1529 init_machine( GLcontext *ctx, struct fp_machine *machine,
1530 const struct gl_fragment_program *program,
1531 const SWspan *span, GLuint col )
1532 {
1533 GLuint inputsRead = program->Base.InputsRead;
1534 GLuint u;
1535
1536 if (ctx->FragmentProgram.CallbackEnabled)
1537 inputsRead = ~0;
1538
1539 if (program->Base.Target == GL_FRAGMENT_PROGRAM_NV) {
1540 /* Clear temporary registers (undefined for ARB_f_p) */
1541 _mesa_bzero(machine->Temporaries,
1542 MAX_NV_FRAGMENT_PROGRAM_TEMPS * 4 * sizeof(GLfloat));
1543 }
1544
1545 /* Load input registers */
1546 if (inputsRead & (1 << FRAG_ATTRIB_WPOS)) {
1547 GLfloat *wpos = machine->Inputs[FRAG_ATTRIB_WPOS];
1548 ASSERT(span->arrayMask & SPAN_Z);
1549 if (span->arrayMask & SPAN_XY) {
1550 wpos[0] = (GLfloat) span->array->x[col];
1551 wpos[1] = (GLfloat) span->array->y[col];
1552 }
1553 else {
1554 wpos[0] = (GLfloat) span->x + col;
1555 wpos[1] = (GLfloat) span->y;
1556 }
1557 wpos[2] = (GLfloat) span->array->z[col] / ctx->DrawBuffer->_DepthMaxF;
1558 wpos[3] = span->w + col * span->dwdx;
1559 }
1560 if (inputsRead & (1 << FRAG_ATTRIB_COL0)) {
1561 ASSERT(span->arrayMask & SPAN_RGBA);
1562 COPY_4V(machine->Inputs[FRAG_ATTRIB_COL0],
1563 span->array->color.sz4.rgba[col]);
1564 }
1565 if (inputsRead & (1 << FRAG_ATTRIB_COL1)) {
1566 ASSERT(span->arrayMask & SPAN_SPEC);
1567 COPY_4V(machine->Inputs[FRAG_ATTRIB_COL1],
1568 span->array->color.sz4.spec[col]);
1569 }
1570 if (inputsRead & (1 << FRAG_ATTRIB_FOGC)) {
1571 GLfloat *fogc = machine->Inputs[FRAG_ATTRIB_FOGC];
1572 ASSERT(span->arrayMask & SPAN_FOG);
1573 fogc[0] = span->array->fog[col];
1574 fogc[1] = 0.0F;
1575 fogc[2] = 0.0F;
1576 fogc[3] = 0.0F;
1577 }
1578 for (u = 0; u < ctx->Const.MaxTextureCoordUnits; u++) {
1579 if (inputsRead & (1 << (FRAG_ATTRIB_TEX0 + u))) {
1580 GLfloat *tex = machine->Inputs[FRAG_ATTRIB_TEX0 + u];
1581 /*ASSERT(ctx->Texture._EnabledCoordUnits & (1 << u));*/
1582 COPY_4V(tex, span->array->texcoords[u][col]);
1583 /*ASSERT(tex[0] != 0 || tex[1] != 0 || tex[2] != 0);*/
1584 }
1585 }
1586
1587 /* init condition codes */
1588 machine->CondCodes[0] = COND_EQ;
1589 machine->CondCodes[1] = COND_EQ;
1590 machine->CondCodes[2] = COND_EQ;
1591 machine->CondCodes[3] = COND_EQ;
1592
1593 /* init call stack */
1594 machine->StackDepth = 0;
1595 }
1596
1597
1598 /**
1599 * Run fragment program on the pixels in span from 'start' to 'end' - 1.
1600 */
1601 static void
1602 run_program(GLcontext *ctx, SWspan *span, GLuint start, GLuint end)
1603 {
1604 const struct gl_fragment_program *program = ctx->FragmentProgram._Current;
1605 struct fp_machine machine;
1606 GLuint i;
1607
1608 CurrentMachine = &machine;
1609
1610 for (i = start; i < end; i++) {
1611 if (span->array->mask[i]) {
1612 init_machine(ctx, &machine, program, span, i);
1613
1614 if (execute_program(ctx, program, ~0, &machine, span, i)) {
1615 /* Store result color */
1616 COPY_4V(span->array->color.sz4.rgba[i],
1617 machine.Outputs[FRAG_RESULT_COLR]);
1618
1619 /* Store result depth/z */
1620 if (program->Base.OutputsWritten & (1 << FRAG_RESULT_DEPR)) {
1621 const GLfloat depth = machine.Outputs[FRAG_RESULT_DEPR][2];
1622 if (depth <= 0.0)
1623 span->array->z[i] = 0;
1624 else if (depth >= 1.0)
1625 span->array->z[i] = ctx->DrawBuffer->_DepthMax;
1626 else
1627 span->array->z[i] = IROUND(depth * ctx->DrawBuffer->_DepthMaxF);
1628 }
1629 }
1630 else {
1631 /* killed fragment */
1632 span->array->mask[i] = GL_FALSE;
1633 span->writeAll = GL_FALSE;
1634 }
1635 }
1636 }
1637
1638 CurrentMachine = NULL;
1639 }
1640
1641
1642 /**
1643 * Execute the current fragment program for all the fragments
1644 * in the given span.
1645 */
1646 void
1647 _swrast_exec_fragment_program( GLcontext *ctx, SWspan *span )
1648 {
1649 const struct gl_fragment_program *program = ctx->FragmentProgram._Current;
1650
1651 /* incoming colors should be floats */
1652 ASSERT(span->array->ChanType == GL_FLOAT);
1653
1654 ctx->_CurrentProgram = GL_FRAGMENT_PROGRAM_ARB; /* or NV, doesn't matter */
1655
1656 run_program(ctx, span, 0, span->end);
1657
1658 if (program->Base.OutputsWritten & (1 << FRAG_RESULT_DEPR)) {
1659 span->interpMask &= ~SPAN_Z;
1660 span->arrayMask |= SPAN_Z;
1661 }
1662
1663 ctx->_CurrentProgram = 0;
1664 }
1665