738c78274ef6f06bd87d43d593da6a7ca9dfa3b3
[mesa.git] / src / mesa / swrast / s_nvfragprog.c
1 /* $Id: s_nvfragprog.c,v 1.4 2003/02/25 19:29:43 brianp Exp $ */
2
3 /*
4 * Mesa 3-D graphics library
5 * Version: 5.1
6 *
7 * Copyright (C) 1999-2003 Brian Paul All Rights Reserved.
8 *
9 * Permission is hereby granted, free of charge, to any person obtaining a
10 * copy of this software and associated documentation files (the "Software"),
11 * to deal in the Software without restriction, including without limitation
12 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
13 * and/or sell copies of the Software, and to permit persons to whom the
14 * Software is furnished to do so, subject to the following conditions:
15 *
16 * The above copyright notice and this permission notice shall be included
17 * in all copies or substantial portions of the Software.
18 *
19 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
20 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
21 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
22 * BRIAN PAUL BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN
23 * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
24 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
25 */
26
27
28
29 #include "glheader.h"
30 #include "colormac.h"
31 #include "context.h"
32 #include "nvfragprog.h"
33 #include "macros.h"
34 #include "mmath.h"
35
36 #include "s_nvfragprog.h"
37
38
39
40 /**
41 * Fetch a texel.
42 */
43 static void
44 fetch_texel( GLcontext *ctx, const GLfloat texcoord[4], GLuint unit,
45 GLuint targetIndex, GLfloat color[4] )
46 {
47 const GLfloat *lambda = NULL;
48 GLchan rgba[4];
49 SWcontext *swrast = SWRAST_CONTEXT(ctx);
50 const struct gl_texture_object *texObj;
51
52 switch (targetIndex) {
53 case TEXTURE_1D_INDEX:
54 texObj = ctx->Texture.Unit[unit].Current1D;
55 break;
56 case TEXTURE_2D_INDEX:
57 texObj = ctx->Texture.Unit[unit].Current2D;
58 break;
59 case TEXTURE_3D_INDEX:
60 texObj = ctx->Texture.Unit[unit].Current3D;
61 break;
62 case TEXTURE_CUBE_INDEX:
63 texObj = ctx->Texture.Unit[unit].CurrentCubeMap;
64 break;
65 case TEXTURE_RECT_INDEX:
66 texObj = ctx->Texture.Unit[unit].CurrentRect;
67 break;
68 default:
69 _mesa_problem(ctx, "Invalid target in fetch_texel");
70 }
71
72 swrast->TextureSample[unit](ctx, unit, texObj, 1,
73 (const GLfloat (*)[4]) &texcoord,
74 lambda, &rgba);
75 }
76
77
78 /**
79 * Fetch a texel w/ partial derivatives.
80 */
81 static void
82 fetch_texel_deriv( GLcontext *ctx, const GLfloat texcoord[4],
83 const GLfloat dtdx[4], const GLfloat dtdy[4],
84 GLuint unit, GLuint targetIndex, GLfloat color[4] )
85 {
86 /* XXX to do */
87
88 }
89
90
91
92 /**
93 * Fetch a 4-element float vector from the given source register.
94 * Apply swizzling and negating as needed.
95 */
96 static void
97 fetch_vector4( const struct fp_src_register *source,
98 const struct fp_machine *machine,
99 GLfloat result[4] )
100 {
101 const GLfloat *src;
102
103 /*
104 if (source->RelAddr) {
105 GLint reg = source->Register + machine->AddressReg;
106 if (reg < VP_PROG_REG_START || reg > VP_PROG_REG_END)
107 src = zero;
108 else
109 src = machine->Registers[reg];
110 }
111 else
112 */
113
114 src = machine->Registers[source->Register];
115
116 result[0] = src[source->Swizzle[0]];
117 result[1] = src[source->Swizzle[1]];
118 result[2] = src[source->Swizzle[2]];
119 result[3] = src[source->Swizzle[3]];
120
121 if (source->NegateBase) {
122 result[0] = -result[0];
123 result[1] = -result[1];
124 result[2] = -result[2];
125 result[3] = -result[3];
126 }
127 if (source->Abs) {
128 result[0] = FABSF(result[0]);
129 result[1] = FABSF(result[1]);
130 result[2] = FABSF(result[2]);
131 result[3] = FABSF(result[3]);
132 }
133 if (source->NegateAbs) {
134 result[0] = -result[0];
135 result[1] = -result[1];
136 result[2] = -result[2];
137 result[3] = -result[3];
138 }
139 }
140
141
142 /**
143 * As above, but only return result[0] element.
144 */
145 static void
146 fetch_vector1( const struct fp_src_register *source,
147 const struct fp_machine *machine,
148 GLfloat result[4] )
149 {
150 const GLfloat *src;
151
152 /*
153 if (source->RelAddr) {
154 GLint reg = source->Register + machine->AddressReg;
155 if (reg < VP_PROG_REG_START || reg > VP_PROG_REG_END)
156 src = zero;
157 else
158 src = machine->Registers[reg];
159 }
160 else
161 */
162
163 src = machine->Registers[source->Register];
164
165 result[0] = src[source->Swizzle[0]];
166
167 if (source->NegateBase) {
168 result[0] = -result[0];
169 }
170 if (source->Abs) {
171 result[0] = FABSF(result[0]);
172 }
173 if (source->NegateAbs) {
174 result[0] = -result[0];
175 }
176 }
177
178
179 /*
180 * Test value against zero and return GT, LT, EQ or UN if NaN.
181 */
182 static INLINE GLuint
183 generate_cc( float value )
184 {
185 if (value != value)
186 return COND_UN; /* NaN */
187 if (value > 0.0F)
188 return COND_GT;
189 if (value < 0.0F)
190 return COND_LT;
191 return COND_EQ;
192 }
193
194 /*
195 * Test if the ccMaskRule is satisfied by the given condition code.
196 * Used to mask destination writes according to the current condition codee.
197 */
198 static INLINE GLboolean
199 test_cc(GLuint condCode, GLuint ccMaskRule)
200 {
201 switch (ccMaskRule) {
202 case COND_EQ: return (condCode == COND_EQ);
203 case COND_NE: return (condCode != COND_EQ);
204 case COND_LT: return (condCode == COND_LT);
205 case COND_GE: return (condCode == COND_GT || condCode == COND_EQ);
206 case COND_LE: return (condCode == COND_LT || condCode == COND_EQ);
207 case COND_GT: return (condCode == COND_GT);
208 case COND_TR: return GL_TRUE;
209 case COND_FL: return GL_FALSE;
210 default: return GL_TRUE;
211 }
212 }
213
214
215 /**
216 * Store 4 floats into a register. Observe the instructions saturate and
217 * set-condition-code flags.
218 */
219 static void
220 store_vector4( const struct fp_instruction *inst,
221 struct fp_machine *machine,
222 const GLfloat value[4] )
223 {
224 const struct fp_dst_register *dest = &(inst->DstReg);
225 const GLboolean clamp = inst->Saturate;
226 const GLboolean updateCC = inst->UpdateCondRegister;
227 GLfloat *dstReg = machine->Registers[dest->Register];
228 GLfloat clampedValue[4];
229 const GLboolean *writeMask = dest->WriteMask;
230 GLboolean condWriteMask[4];
231
232 if (clamp) {
233 clampedValue[0] = CLAMP(value[0], 0.0F, 1.0F);
234 clampedValue[1] = CLAMP(value[1], 0.0F, 1.0F);
235 clampedValue[2] = CLAMP(value[2], 0.0F, 1.0F);
236 clampedValue[3] = CLAMP(value[3], 0.0F, 1.0F);
237 value = clampedValue;
238 }
239
240 if (dest->CondMask != COND_TR) {
241 condWriteMask[0] = writeMask[0]
242 && test_cc(machine->CondCodes[dest->CondSwizzle[0]], dest->CondMask);
243 condWriteMask[1] = writeMask[1]
244 && test_cc(machine->CondCodes[dest->CondSwizzle[1]], dest->CondMask);
245 condWriteMask[2] = writeMask[2]
246 && test_cc(machine->CondCodes[dest->CondSwizzle[2]], dest->CondMask);
247 condWriteMask[3] = writeMask[3]
248 && test_cc(machine->CondCodes[dest->CondSwizzle[3]], dest->CondMask);
249 writeMask = condWriteMask;
250 }
251
252 if (writeMask[0]) {
253 dstReg[0] = value[0];
254 if (updateCC)
255 machine->CondCodes[0] = generate_cc(value[0]);
256 }
257 if (writeMask[1]) {
258 dstReg[1] = value[1];
259 if (updateCC)
260 machine->CondCodes[1] = generate_cc(value[1]);
261 }
262 if (writeMask[2]) {
263 dstReg[2] = value[2];
264 if (updateCC)
265 machine->CondCodes[2] = generate_cc(value[2]);
266 }
267 if (writeMask[3]) {
268 dstReg[3] = value[3];
269 if (updateCC)
270 machine->CondCodes[3] = generate_cc(value[3]);
271 }
272 }
273
274
275 /**
276 * Execute the given vertex program
277 * \return GL_TRUE if program completed or GL_FALSE if program executed KIL.
278 */
279 static GLboolean
280 execute_program(GLcontext *ctx, const struct fragment_program *program)
281 {
282 struct fp_machine *machine = &ctx->FragmentProgram.Machine;
283 const struct fp_instruction *inst;
284
285 for (inst = program->Instructions; inst->Opcode != FP_OPCODE_END; inst++) {
286 switch (inst->Opcode) {
287 case FP_OPCODE_ADD:
288 {
289 GLfloat a[4], b[4], result[4];
290 fetch_vector4( &inst->SrcReg[0], machine, a );
291 fetch_vector4( &inst->SrcReg[1], machine, b );
292 result[0] = a[0] + b[0];
293 result[1] = a[1] + b[1];
294 result[2] = a[2] + b[2];
295 result[3] = a[3] + b[3];
296 store_vector4( inst, machine, result );
297 }
298 break;
299 case FP_OPCODE_COS:
300 {
301 GLfloat a[4], result[4];
302 fetch_vector1( &inst->SrcReg[0], machine, a );
303 result[0] = result[1] = result[2] = result[3] = cos(a[0]);
304 store_vector4( inst, machine, result );
305 }
306 break;
307 case FP_OPCODE_DDX: /* Partial derivative with respect to X */
308 {
309 GLfloat a[4], result[4];
310 fetch_vector4( &inst->SrcReg[0], machine, a );
311 result[0] = 0; /* XXX fix */
312 result[1] = 0;
313 result[2] = 0;
314 result[3] = 0;
315 store_vector4( inst, machine, result );
316 }
317 break;
318 case FP_OPCODE_DDY: /* Partial derivative with respect to Y */
319 {
320 GLfloat a[4], result[4];
321 fetch_vector4( &inst->SrcReg[0], machine, a );
322 result[0] = 0; /* XXX fix */
323 result[1] = 0;
324 result[2] = 0;
325 result[3] = 0;
326 store_vector4( inst, machine, result );
327 }
328 break;
329 case FP_OPCODE_DP3:
330 {
331 GLfloat a[4], b[4], result[4];
332 fetch_vector4( &inst->SrcReg[0], machine, a );
333 fetch_vector4( &inst->SrcReg[1], machine, b );
334 result[0] = result[1] = result[2] = result[3] =
335 a[0] + b[0] + a[1] * b[1] + a[2] * b[2];
336 store_vector4( inst, machine, result );
337 }
338 break;
339 case FP_OPCODE_DP4:
340 {
341 GLfloat a[4], b[4], result[4];
342 fetch_vector4( &inst->SrcReg[0], machine, a );
343 fetch_vector4( &inst->SrcReg[1], machine, b );
344 result[0] = result[1] = result[2] = result[3] =
345 a[0] + b[0] + a[1] * b[1] + a[2] * b[2] + a[3] * b[3];
346 store_vector4( inst, machine, result );
347 }
348 break;
349 case FP_OPCODE_DST: /* Distance vector */
350 {
351 GLfloat a[4], b[4], result[4];
352 fetch_vector4( &inst->SrcReg[0], machine, a );
353 fetch_vector4( &inst->SrcReg[1], machine, b );
354 result[0] = 1.0F;
355 result[1] = a[1] * b[1];
356 result[2] = a[2];
357 result[3] = b[3];
358 store_vector4( inst, machine, result );
359 }
360 break;
361 case FP_OPCODE_EX2: /* Exponential base 2 */
362 {
363 GLfloat a[4], result[4];
364 fetch_vector1( &inst->SrcReg[0], machine, a );
365 result[0] = result[1] = result[2] = result[3] =
366 (GLfloat) pow(2.0, a[0]);
367 store_vector4( inst, machine, result );
368 }
369 break;
370 case FP_OPCODE_FLR:
371 {
372 GLfloat a[4], result[4];
373 fetch_vector4( &inst->SrcReg[0], machine, a );
374 result[0] = FLOORF(a[0]);
375 result[1] = FLOORF(a[1]);
376 result[2] = FLOORF(a[2]);
377 result[3] = FLOORF(a[3]);
378 store_vector4( inst, machine, result );
379 }
380 break;
381 case FP_OPCODE_FRC:
382 {
383 GLfloat a[4], result[4];
384 fetch_vector4( &inst->SrcReg[0], machine, a );
385 result[0] = a[0] - FLOORF(a[0]);
386 result[1] = a[1] - FLOORF(a[1]);
387 result[2] = a[2] - FLOORF(a[2]);
388 result[3] = a[3] - FLOORF(a[3]);
389 store_vector4( inst, machine, result );
390 }
391 break;
392 case FP_OPCODE_KIL:
393 {
394 const GLuint *swizzle = inst->DstReg.CondSwizzle;
395 const GLuint condMask = inst->DstReg.CondMask;
396 if (test_cc(machine->CondCodes[swizzle[0]], condMask) ||
397 test_cc(machine->CondCodes[swizzle[1]], condMask) ||
398 test_cc(machine->CondCodes[swizzle[2]], condMask) ||
399 test_cc(machine->CondCodes[swizzle[3]], condMask))
400 return GL_FALSE;
401 }
402 break;
403 case FP_OPCODE_LG2: /* log base 2 */
404 {
405 GLfloat a[4], result[4];
406 fetch_vector1( &inst->SrcReg[0], machine, a );
407 result[0] = result[1] = result[2] = result[3]
408 = LOG2(a[0]);
409 store_vector4( inst, machine, result );
410 }
411 break;
412 case FP_OPCODE_LIT:
413 {
414 GLfloat a[4], result[4];
415 fetch_vector4( &inst->SrcReg[0], machine, a );
416 if (a[0] < 0.0F)
417 a[0] = 0.0F;
418 if (a[1] < 0.0F)
419 a[1] = 0.0F;
420 result[0] = 1.0F;
421 result[1] = a[0];
422 result[2] = (a[0] > 0.0) ? pow(2.0, a[3]) : 0.0F;
423 result[3] = 1.0F;
424 store_vector4( inst, machine, result );
425 }
426 break;
427 case FP_OPCODE_LRP:
428 {
429 GLfloat a[4], b[4], c[4], result[4];
430 fetch_vector4( &inst->SrcReg[0], machine, a );
431 fetch_vector4( &inst->SrcReg[1], machine, b );
432 fetch_vector4( &inst->SrcReg[2], machine, c );
433 result[0] = a[0] * b[0] + (1.0F - a[0]) * c[0];
434 result[1] = a[1] * b[1] + (1.0F - a[1]) * c[1];
435 result[2] = a[2] * b[2] + (1.0F - a[2]) * c[2];
436 result[3] = a[3] * b[3] + (1.0F - a[3]) * c[3];
437 store_vector4( inst, machine, result );
438 }
439 break;
440 case FP_OPCODE_MAD:
441 {
442 GLfloat a[4], b[4], c[4], result[4];
443 fetch_vector4( &inst->SrcReg[0], machine, a );
444 fetch_vector4( &inst->SrcReg[1], machine, b );
445 fetch_vector4( &inst->SrcReg[2], machine, c );
446 result[0] = a[0] * b[0] + c[0];
447 result[1] = a[1] * b[1] + c[1];
448 result[2] = a[2] * b[2] + c[2];
449 result[3] = a[3] * b[3] + c[3];
450 store_vector4( inst, machine, result );
451 }
452 break;
453 case FP_OPCODE_MAX:
454 {
455 GLfloat a[4], b[4], result[4];
456 fetch_vector4( &inst->SrcReg[0], machine, a );
457 fetch_vector4( &inst->SrcReg[1], machine, b );
458 result[0] = MAX2(a[0], b[0]);
459 result[1] = MAX2(a[1], b[1]);
460 result[2] = MAX2(a[2], b[2]);
461 result[3] = MAX2(a[3], b[3]);
462 store_vector4( inst, machine, result );
463 }
464 break;
465 case FP_OPCODE_MIN:
466 {
467 GLfloat a[4], b[4], result[4];
468 fetch_vector4( &inst->SrcReg[0], machine, a );
469 fetch_vector4( &inst->SrcReg[1], machine, b );
470 result[0] = MIN2(a[0], b[0]);
471 result[1] = MIN2(a[1], b[1]);
472 result[2] = MIN2(a[2], b[2]);
473 result[3] = MIN2(a[3], b[3]);
474 store_vector4( inst, machine, result );
475 }
476 break;
477 case FP_OPCODE_MOV:
478 {
479 GLfloat result[4];
480 fetch_vector4( &inst->SrcReg[0], machine, result );
481 store_vector4( inst, machine, result );
482 }
483 break;
484 case FP_OPCODE_MUL:
485 {
486 GLfloat a[4], b[4], result[4];
487 fetch_vector4( &inst->SrcReg[0], machine, a );
488 fetch_vector4( &inst->SrcReg[1], machine, b );
489 result[0] = a[0] * b[0];
490 result[1] = a[1] * b[1];
491 result[2] = a[2] * b[2];
492 result[3] = a[3] * b[3];
493 store_vector4( inst, machine, result );
494 }
495 break;
496 case FP_OPCODE_PK2H: /* pack two 16-bit floats */
497 /* XXX this is probably wrong */
498 {
499 GLfloat a[4], result[4];
500 const GLuint *rawBits = (const GLuint *) a;
501 GLuint *rawResult = (GLuint *) result;
502 fetch_vector4( &inst->SrcReg[0], machine, a );
503 rawResult[0] = rawResult[1] = rawResult[2] = rawResult[3]
504 = rawBits[0] | (rawBits[1] << 16);
505 store_vector4( inst, machine, result );
506 }
507 break;
508 case FP_OPCODE_PK2US: /* pack two GLushorts */
509 {
510 GLfloat a[4], result[4];
511 GLuint usx, usy, *rawResult = (GLuint *) result;
512 fetch_vector4( &inst->SrcReg[0], machine, a );
513 a[0] = CLAMP(a[0], 0.0F, 1.0F);
514 a[1] = CLAMP(a[0], 0.0F, 1.0F);
515 usx = IROUND(a[0] * 65535.0F);
516 usy = IROUND(a[1] * 65535.0F);
517 rawResult[0] = rawResult[1] = rawResult[2] = rawResult[3]
518 = usx | (usy << 16);
519 store_vector4( inst, machine, result );
520 }
521 break;
522 case FP_OPCODE_PK4B: /* pack four GLbytes */
523 {
524 GLfloat a[4], result[4];
525 GLuint ubx, uby, ubz, ubw, *rawResult = (GLuint *) result;
526 fetch_vector4( &inst->SrcReg[0], machine, a );
527 a[0] = CLAMP(a[0], -128.0F / 127.0F, 1.0F);
528 a[1] = CLAMP(a[1], -128.0F / 127.0F, 1.0F);
529 a[2] = CLAMP(a[2], -128.0F / 127.0F, 1.0F);
530 a[3] = CLAMP(a[3], -128.0F / 127.0F, 1.0F);
531 ubx = IROUND(127.0F * a[0] + 128.0F);
532 uby = IROUND(127.0F * a[1] + 128.0F);
533 ubz = IROUND(127.0F * a[2] + 128.0F);
534 ubw = IROUND(127.0F * a[3] + 128.0F);
535 rawResult[0] = rawResult[1] = rawResult[2] = rawResult[3]
536 = ubx | (uby << 8) | (ubz << 16) | (ubw << 24);
537 store_vector4( inst, machine, result );
538 }
539 break;
540 case FP_OPCODE_PK4UB: /* pack four GLubytes */
541 {
542 GLfloat a[4], result[4];
543 GLuint ubx, uby, ubz, ubw, *rawResult = (GLuint *) result;
544 fetch_vector4( &inst->SrcReg[0], machine, a );
545 a[0] = CLAMP(a[0], 0.0F, 1.0F);
546 a[1] = CLAMP(a[1], 0.0F, 1.0F);
547 a[2] = CLAMP(a[2], 0.0F, 1.0F);
548 a[3] = CLAMP(a[3], 0.0F, 1.0F);
549 ubx = IROUND(255.0F * a[0]);
550 uby = IROUND(255.0F * a[1]);
551 ubz = IROUND(255.0F * a[2]);
552 ubw = IROUND(255.0F * a[3]);
553 rawResult[0] = rawResult[1] = rawResult[2] = rawResult[3]
554 = ubx | (uby << 8) | (ubz << 16) | (ubw << 24);
555 store_vector4( inst, machine, result );
556 }
557 break;
558 case FP_OPCODE_POW:
559 {
560 GLfloat a[4], b[4], result[4];
561 fetch_vector1( &inst->SrcReg[0], machine, a );
562 fetch_vector1( &inst->SrcReg[1], machine, b );
563 result[0] = result[1] = result[2] = result[3]
564 = pow(a[0], b[0]);
565 store_vector4( inst, machine, result );
566 }
567 break;
568 case FP_OPCODE_RCP:
569 {
570 GLfloat a[4], result[4];
571 fetch_vector1( &inst->SrcReg[0], machine, a );
572 result[0] = result[1] = result[2] = result[3]
573 = 1.0F / a[0];
574 store_vector4( inst, machine, result );
575 }
576 break;
577 case FP_OPCODE_RFL:
578 {
579 GLfloat axis[4], dir[4], result[4], tmp[4];
580 fetch_vector4( &inst->SrcReg[0], machine, axis );
581 fetch_vector4( &inst->SrcReg[1], machine, dir );
582 tmp[3] = axis[0] * axis[0]
583 + axis[1] * axis[1]
584 + axis[2] * axis[2];
585 tmp[0] = (2.0F * (axis[0] * dir[0] +
586 axis[1] * dir[1] +
587 axis[2] * dir[2])) / tmp[3];
588 result[0] = tmp[0] * axis[0] - dir[0];
589 result[1] = tmp[0] * axis[1] - dir[1];
590 result[2] = tmp[0] * axis[2] - dir[2];
591 /* result[3] is never written! XXX enforce in parser! */
592 store_vector4( inst, machine, result );
593 }
594 break;
595 case FP_OPCODE_RSQ: /* 1 / sqrt() */
596 {
597 GLfloat a[4], result[4];
598 fetch_vector1( &inst->SrcReg[0], machine, a );
599 result[0] = result[1] = result[2] = result[3]
600 = 1.0F / GL_SQRT(a[0]);
601 store_vector4( inst, machine, result );
602 }
603 break;
604 case FP_OPCODE_SEQ: /* set on equal */
605 {
606 GLfloat a[4], b[4], result[4];
607 fetch_vector4( &inst->SrcReg[0], machine, a );
608 fetch_vector4( &inst->SrcReg[1], machine, b );
609 result[0] = (a[0] == b[0]) ? 1.0F : 0.0F;
610 result[1] = (a[1] == b[1]) ? 1.0F : 0.0F;
611 result[2] = (a[2] == b[2]) ? 1.0F : 0.0F;
612 result[3] = (a[3] == b[3]) ? 1.0F : 0.0F;
613 store_vector4( inst, machine, result );
614 }
615 break;
616 case FP_OPCODE_SFL: /* set false, operands ignored */
617 {
618 static const GLfloat result[4] = { 0.0F, 0.0F, 0.0F, 0.0F };
619 store_vector4( inst, machine, result );
620 }
621 break;
622 case FP_OPCODE_SGE: /* set on greater or equal */
623 {
624 GLfloat a[4], b[4], result[4];
625 fetch_vector4( &inst->SrcReg[0], machine, a );
626 fetch_vector4( &inst->SrcReg[1], machine, b );
627 result[0] = (a[0] >= b[0]) ? 1.0F : 0.0F;
628 result[1] = (a[1] >= b[1]) ? 1.0F : 0.0F;
629 result[2] = (a[2] >= b[2]) ? 1.0F : 0.0F;
630 result[3] = (a[3] >= b[3]) ? 1.0F : 0.0F;
631 store_vector4( inst, machine, result );
632 }
633 break;
634 case FP_OPCODE_SGT: /* set on greater */
635 {
636 GLfloat a[4], b[4], result[4];
637 fetch_vector4( &inst->SrcReg[0], machine, a );
638 fetch_vector4( &inst->SrcReg[1], machine, b );
639 result[0] = (a[0] > b[0]) ? 1.0F : 0.0F;
640 result[1] = (a[1] > b[1]) ? 1.0F : 0.0F;
641 result[2] = (a[2] > b[2]) ? 1.0F : 0.0F;
642 result[3] = (a[3] > b[3]) ? 1.0F : 0.0F;
643 store_vector4( inst, machine, result );
644 }
645 break;
646 case FP_OPCODE_SIN:
647 {
648 GLfloat a[4], result[4];
649 fetch_vector1( &inst->SrcReg[0], machine, a );
650 result[0] = result[1] = result[2] = result[3] = sin(a[0]);
651 store_vector4( inst, machine, result );
652 }
653 break;
654 case FP_OPCODE_SLE: /* set on less or equal */
655 {
656 GLfloat a[4], b[4], result[4];
657 fetch_vector4( &inst->SrcReg[0], machine, a );
658 fetch_vector4( &inst->SrcReg[1], machine, b );
659 result[0] = (a[0] <= b[0]) ? 1.0F : 0.0F;
660 result[1] = (a[1] <= b[1]) ? 1.0F : 0.0F;
661 result[2] = (a[2] <= b[2]) ? 1.0F : 0.0F;
662 result[3] = (a[3] <= b[3]) ? 1.0F : 0.0F;
663 store_vector4( inst, machine, result );
664 }
665 break;
666 case FP_OPCODE_SLT: /* set on less */
667 {
668 GLfloat a[4], b[4], result[4];
669 fetch_vector4( &inst->SrcReg[0], machine, a );
670 fetch_vector4( &inst->SrcReg[1], machine, b );
671 result[0] = (a[0] < b[0]) ? 1.0F : 0.0F;
672 result[1] = (a[1] < b[1]) ? 1.0F : 0.0F;
673 result[2] = (a[2] < b[2]) ? 1.0F : 0.0F;
674 result[3] = (a[3] < b[3]) ? 1.0F : 0.0F;
675 store_vector4( inst, machine, result );
676 }
677 break;
678 case FP_OPCODE_SNE: /* set on not equal */
679 {
680 GLfloat a[4], b[4], result[4];
681 fetch_vector4( &inst->SrcReg[0], machine, a );
682 fetch_vector4( &inst->SrcReg[1], machine, b );
683 result[0] = (a[0] != b[0]) ? 1.0F : 0.0F;
684 result[1] = (a[1] != b[1]) ? 1.0F : 0.0F;
685 result[2] = (a[2] != b[2]) ? 1.0F : 0.0F;
686 result[3] = (a[3] != b[3]) ? 1.0F : 0.0F;
687 store_vector4( inst, machine, result );
688 }
689 break;
690 case FP_OPCODE_STR: /* set true, operands ignored */
691 {
692 static const GLfloat result[4] = { 1.0F, 1.0F, 1.0F, 1.0F };
693 store_vector4( inst, machine, result );
694 }
695 break;
696 case FP_OPCODE_SUB:
697 {
698 GLfloat a[4], b[4], result[4];
699 fetch_vector4( &inst->SrcReg[0], machine, a );
700 fetch_vector4( &inst->SrcReg[1], machine, b );
701 result[0] = a[0] - b[0];
702 result[1] = a[1] - b[1];
703 result[2] = a[2] - b[2];
704 result[3] = a[3] - b[3];
705 store_vector4( inst, machine, result );
706 }
707 break;
708 case FP_OPCODE_TEX:
709 /* Texel lookup */
710 {
711 GLfloat texcoord[4], color[4];
712 fetch_vector4( &inst->SrcReg[0], machine, texcoord );
713 fetch_texel( ctx, texcoord, inst->TexSrcUnit,
714 inst->TexSrcIndex, color );
715 store_vector4( inst, machine, color );
716 }
717 break;
718 case FP_OPCODE_TXD:
719 /* Texture lookup w/ partial derivatives for LOD */
720 {
721 GLfloat texcoord[4], dtdx[4], dtdy[4], color[4];
722 fetch_vector4( &inst->SrcReg[0], machine, texcoord );
723 fetch_vector4( &inst->SrcReg[1], machine, dtdx );
724 fetch_vector4( &inst->SrcReg[2], machine, dtdy );
725 fetch_texel_deriv( ctx, texcoord, dtdx, dtdy, inst->TexSrcUnit,
726 inst->TexSrcIndex, color );
727 store_vector4( inst, machine, color );
728 }
729 break;
730 case FP_OPCODE_TXP:
731 /* Texture lookup w/ perspective divide */
732 {
733 GLfloat texcoord[4], color[4];
734 fetch_vector4( &inst->SrcReg[0], machine, texcoord );
735 texcoord[0] /= texcoord[3];
736 texcoord[1] /= texcoord[3];
737 texcoord[2] /= texcoord[3];
738 fetch_texel( ctx, texcoord, inst->TexSrcUnit,
739 inst->TexSrcIndex, color );
740 store_vector4( inst, machine, color );
741 }
742 break;
743 case FP_OPCODE_UP2H: /* unpack two 16-bit floats */
744 /* XXX this is probably wrong */
745 {
746 GLfloat a[4], result[4];
747 const GLuint *rawBits = (const GLuint *) a;
748 GLuint *rawResult = (GLuint *) result;
749 fetch_vector1( &inst->SrcReg[0], machine, a );
750 rawResult[0] = rawBits[0] & 0xffff;
751 rawResult[1] = (rawBits[0] >> 16) & 0xffff;
752 rawResult[2] = rawBits[0] & 0xffff;
753 rawResult[3] = (rawBits[0] >> 16) & 0xffff;
754 store_vector4( inst, machine, result );
755 }
756 break;
757 case FP_OPCODE_UP2US: /* unpack two GLushorts */
758 {
759 GLfloat a[4], result[4];
760 const GLuint *rawBits = (const GLuint *) a;
761 fetch_vector1( &inst->SrcReg[0], machine, a );
762 result[0] = (GLfloat) ((rawBits[0] >> 0) & 0xffff) / 65535.0F;
763 result[1] = (GLfloat) ((rawBits[0] >> 16) & 0xffff) / 65535.0F;
764 result[2] = result[0];
765 result[3] = result[1];
766 store_vector4( inst, machine, result );
767 }
768 break;
769 case FP_OPCODE_UP4B: /* unpack four GLbytes */
770 {
771 GLfloat a[4], result[4];
772 const GLuint *rawBits = (const GLuint *) a;
773 fetch_vector1( &inst->SrcReg[0], machine, a );
774 result[0] = (((rawBits[0] >> 0) & 0xff) - 128) / 127.0F;
775 result[0] = (((rawBits[0] >> 8) & 0xff) - 128) / 127.0F;
776 result[0] = (((rawBits[0] >> 16) & 0xff) - 128) / 127.0F;
777 result[0] = (((rawBits[0] >> 24) & 0xff) - 128) / 127.0F;
778 store_vector4( inst, machine, result );
779 }
780 break;
781 case FP_OPCODE_UP4UB: /* unpack four GLubytes */
782 {
783 GLfloat a[4], result[4];
784 const GLuint *rawBits = (const GLuint *) a;
785 fetch_vector1( &inst->SrcReg[0], machine, a );
786 result[0] = ((rawBits[0] >> 0) & 0xff) / 255.0F;
787 result[0] = ((rawBits[0] >> 8) & 0xff) / 255.0F;
788 result[0] = ((rawBits[0] >> 16) & 0xff) / 255.0F;
789 result[0] = ((rawBits[0] >> 24) & 0xff) / 255.0F;
790 store_vector4( inst, machine, result );
791 }
792 break;
793 case FP_OPCODE_X2D: /* 2-D matrix transform */
794 {
795 GLfloat a[4], b[4], c[4], result[4];
796 fetch_vector4( &inst->SrcReg[0], machine, a );
797 fetch_vector4( &inst->SrcReg[1], machine, b );
798 fetch_vector4( &inst->SrcReg[2], machine, c );
799 result[0] = a[0] + b[0] * c[0] + b[1] * c[1];
800 result[1] = a[1] + b[0] * c[2] + b[1] * c[3];
801 result[2] = a[2] + b[0] * c[0] + b[1] * c[1];
802 result[3] = a[3] + b[0] * c[2] + b[1] * c[3];
803 store_vector4( inst, machine, result );
804 }
805 break;
806 default:
807 _mesa_problem(ctx, "Bad opcode in _mesa_exec_fragment_program");
808 return GL_TRUE; /* return value doesn't matter */
809 }
810 }
811 return GL_TRUE;
812 }
813
814
815
816 void
817 _swrast_exec_nv_fragment_program( GLcontext *ctx, struct sw_span *span )
818 {
819 GLuint i;
820
821 for (i = 0; i < span->end; i++) {
822 if (span->array->mask[i]) {
823 GLfloat *wpos = ctx->FragmentProgram.Machine.Registers[0];
824 GLfloat *col0 = ctx->FragmentProgram.Machine.Registers[1];
825 GLfloat *col1 = ctx->FragmentProgram.Machine.Registers[2];
826 GLfloat *fogc = ctx->FragmentProgram.Machine.Registers[3];
827 const GLfloat *colOut = ctx->FragmentProgram.Machine.Registers[FP_OUTPUT_REG_START];
828 GLuint j;
829
830 /* Clear temporary registers XXX use memzero() */
831 _mesa_bzero(ctx->FragmentProgram.Machine.Registers +FP_TEMP_REG_START,
832 MAX_NV_FRAGMENT_PROGRAM_TEMPS * 4 * sizeof(GLfloat));
833
834 /*
835 * Load input registers - yes this is all very inefficient for now.
836 */
837 wpos[0] = span->x + i;
838 wpos[1] = span->y + i;
839 wpos[2] = (GLfloat) span->array->z[i] / ctx->DepthMaxF;
840 wpos[3] = 1.0; /* XXX should be 1/w */
841
842 col0[0] = CHAN_TO_FLOAT(span->array->rgba[i][RCOMP]);
843 col0[1] = CHAN_TO_FLOAT(span->array->rgba[i][GCOMP]);
844 col0[2] = CHAN_TO_FLOAT(span->array->rgba[i][BCOMP]);
845 col0[3] = CHAN_TO_FLOAT(span->array->rgba[i][ACOMP]);
846
847 col1[0] = CHAN_TO_FLOAT(span->array->spec[i][RCOMP]);
848 col1[1] = CHAN_TO_FLOAT(span->array->spec[i][GCOMP]);
849 col1[2] = CHAN_TO_FLOAT(span->array->spec[i][BCOMP]);
850 col1[3] = CHAN_TO_FLOAT(span->array->spec[i][ACOMP]);
851
852 fogc[0] = span->array->fog[i];
853 fogc[1] = 0.0F;
854 fogc[2] = 0.0F;
855 fogc[3] = 0.0F;
856
857 for (j = 0; j < ctx->Const.MaxTextureCoordUnits; j++) {
858 if (ctx->Texture.Unit[j]._ReallyEnabled) {
859 COPY_4V(ctx->FragmentProgram.Machine.Registers[4 + j],
860 span->array->texcoords[j][i]);
861 }
862 else {
863 COPY_4V(ctx->FragmentProgram.Machine.Registers[4 + j],
864 ctx->Current.Attrib[VERT_ATTRIB_TEX0 + j]);
865 }
866 }
867
868 if (!execute_program(ctx, ctx->FragmentProgram.Current))
869 span->array->mask[i] = GL_FALSE; /* killed fragment */
870
871 /* Store output registers */
872 UNCLAMPED_FLOAT_TO_CHAN(span->array->rgba[i][RCOMP], colOut[0]);
873 UNCLAMPED_FLOAT_TO_CHAN(span->array->rgba[i][GCOMP], colOut[1]);
874 UNCLAMPED_FLOAT_TO_CHAN(span->array->rgba[i][BCOMP], colOut[2]);
875 UNCLAMPED_FLOAT_TO_CHAN(span->array->rgba[i][ACOMP], colOut[3]);
876 /* depth value */
877 if (ctx->FragmentProgram.Current->OutputsWritten & 2)
878 span->array->z[i] = IROUND(ctx->FragmentProgram.Machine.Registers[FP_OUTPUT_REG_START + 2][0] * ctx->DepthMaxF);
879 }
880 }
881 }
882