fix bug with ATI_fragment_shader in swrast (need to copy all 4 values due to later...
[mesa.git] / src / mesa / swrast / s_atifragshader.c
1 /*
2 *
3 * Copyright (C) 2004 David Airlie All Rights Reserved.
4 *
5 * Permission is hereby granted, free of charge, to any person obtaining a
6 * copy of this software and associated documentation files (the "Software"),
7 * to deal in the Software without restriction, including without limitation
8 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
9 * and/or sell copies of the Software, and to permit persons to whom the
10 * Software is furnished to do so, subject to the following conditions:
11 *
12 * The above copyright notice and this permission notice shall be included
13 * in all copies or substantial portions of the Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
16 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * DAVID AIRLIE BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN
19 * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
20 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
21 */
22
23 #include "glheader.h"
24 #include "colormac.h"
25 #include "context.h"
26 #include "atifragshader.h"
27 #include "macros.h"
28 #include "program.h"
29
30 #include "s_atifragshader.h"
31 #include "s_nvfragprog.h"
32 #include "s_span.h"
33 #include "s_texture.h"
34
35 /**
36 * Fetch a texel.
37 */
38 static void
39 fetch_texel(GLcontext * ctx, const GLfloat texcoord[4], GLfloat lambda,
40 GLuint unit, GLfloat color[4])
41 {
42 GLchan rgba[4];
43 SWcontext *swrast = SWRAST_CONTEXT(ctx);
44
45 /* XXX use a float-valued TextureSample routine here!!! */
46 swrast->TextureSample[unit] (ctx, unit, ctx->Texture.Unit[unit]._Current,
47 1, (const GLfloat(*)[4]) texcoord,
48 &lambda, &rgba);
49 color[0] = CHAN_TO_FLOAT(rgba[0]);
50 color[1] = CHAN_TO_FLOAT(rgba[1]);
51 color[2] = CHAN_TO_FLOAT(rgba[2]);
52 color[3] = CHAN_TO_FLOAT(rgba[3]);
53 }
54
55 static void
56 apply_swizzle(GLfloat values[4], GLuint swizzle)
57 {
58 GLfloat s, t, r, q;
59
60 s = values[0];
61 t = values[1];
62 r = values[2];
63 q = values[3];
64
65 switch (swizzle) {
66 case GL_SWIZZLE_STR_ATI:
67 values[0] = s;
68 values[1] = t;
69 values[2] = r;
70 break;
71 case GL_SWIZZLE_STQ_ATI:
72 values[0] = s;
73 values[1] = t;
74 values[2] = q;
75 break;
76 case GL_SWIZZLE_STR_DR_ATI:
77 values[0] = s / r;
78 values[1] = t / r;
79 values[2] = 1 / r;
80 break;
81 case GL_SWIZZLE_STQ_DQ_ATI:
82 /* make sure q is not 0 to avoid problems later with infinite values (texture lookup)? */
83 if (q == 0.0F) q = 0.000000001;
84 values[0] = s / q;
85 values[1] = t / q;
86 values[2] = 1 / q;
87 break;
88 }
89 values[3] = 0.0;
90 }
91
92 static void
93 apply_src_rep(GLint optype, GLuint rep, GLfloat * val)
94 {
95 GLint i;
96 GLint start, end;
97 if (!rep)
98 return;
99
100 start = optype ? 3 : 0;
101 end = optype ? 4 : 3;
102
103 for (i = start; i < end; i++) {
104 switch (rep) {
105 case GL_RED:
106 val[i] = val[0];
107 break;
108 case GL_GREEN:
109 val[i] = val[1];
110 break;
111 case GL_BLUE:
112 val[i] = val[2];
113 break;
114 case GL_ALPHA:
115 val[i] = val[3];
116 break;
117 }
118 }
119 }
120
121 static void
122 apply_src_mod(GLint optype, GLuint mod, GLfloat * val)
123 {
124 GLint i;
125 GLint start, end;
126
127 if (!mod)
128 return;
129
130 start = optype ? 3 : 0;
131 end = optype ? 4 : 3;
132
133 for (i = start; i < end; i++) {
134 if (mod & GL_COMP_BIT_ATI)
135 val[i] = 1 - val[i];
136
137 if (mod & GL_BIAS_BIT_ATI)
138 val[i] = val[i] - 0.5;
139
140 if (mod & GL_2X_BIT_ATI)
141 val[i] = 2 * val[i];
142
143 if (mod & GL_NEGATE_BIT_ATI)
144 val[i] = -val[i];
145 }
146 }
147
148 static void
149 apply_dst_mod(GLuint optype, GLuint mod, GLfloat * val)
150 {
151 GLint i;
152 GLint has_sat = mod & GL_SATURATE_BIT_ATI;
153 GLint start, end;
154
155 mod &= ~GL_SATURATE_BIT_ATI;
156
157 start = optype ? 3 : 0;
158 end = optype ? 4 : 3;
159
160 for (i = start; i < end; i++) {
161 switch (mod) {
162 case GL_2X_BIT_ATI:
163 val[i] = 2 * val[i];
164 break;
165 case GL_4X_BIT_ATI:
166 val[i] = 4 * val[i];
167 break;
168 case GL_8X_BIT_ATI:
169 val[i] = 8 * val[i];
170 break;
171 case GL_HALF_BIT_ATI:
172 val[i] = val[i] * 0.5;
173 break;
174 case GL_QUARTER_BIT_ATI:
175 val[i] = val[i] * 0.25;
176 break;
177 case GL_EIGHTH_BIT_ATI:
178 val[i] = val[i] * 0.125;
179 break;
180 }
181
182 if (has_sat) {
183 if (val[i] < 0.0)
184 val[i] = 0;
185 else if (val[i] > 1.0)
186 val[i] = 1.0;
187 }
188 else {
189 if (val[i] < -8.0)
190 val[i] = -8.0;
191 else if (val[i] > 8.0)
192 val[i] = 8.0;
193 }
194 }
195 }
196
197
198 static void
199 write_dst_addr(GLuint optype, GLuint mod, GLuint mask, GLfloat * src,
200 GLfloat * dst)
201 {
202 GLint i;
203 apply_dst_mod(optype, mod, src);
204
205 if (optype == ATI_FRAGMENT_SHADER_COLOR_OP) {
206 if (mask) {
207 if (mask & GL_RED_BIT_ATI)
208 dst[0] = src[0];
209
210 if (mask & GL_GREEN_BIT_ATI)
211 dst[1] = src[1];
212
213 if (mask & GL_BLUE_BIT_ATI)
214 dst[2] = src[2];
215 }
216 else {
217 for (i = 0; i < 3; i++)
218 dst[i] = src[i];
219 }
220 }
221 else
222 dst[3] = src[3];
223 }
224
225 static void
226 finish_pass(struct atifs_machine *machine)
227 {
228 GLint i;
229
230 for (i = 0; i < 6; i++) {
231 COPY_4V(machine->PrevPassRegisters[i], machine->Registers[i]);
232 }
233 }
234
235 /**
236 * Execute the given fragment shader
237 * NOTE: we do everything in single-precision floating point; we don't
238 * currently observe the single/half/fixed-precision qualifiers.
239 * \param ctx - rendering context
240 * \param program - the fragment program to execute
241 * \param machine - machine state (register file)
242 * \param maxInst - max number of instructions to execute
243 * \return GL_TRUE if program completed or GL_FALSE if program executed KIL.
244 */
245
246 struct ati_fs_opcode_st ati_fs_opcodes[] = {
247 {GL_ADD_ATI, 2},
248 {GL_SUB_ATI, 2},
249 {GL_MUL_ATI, 2},
250 {GL_MAD_ATI, 3},
251 {GL_LERP_ATI, 3},
252 {GL_MOV_ATI, 1},
253 {GL_CND_ATI, 3},
254 {GL_CND0_ATI, 3},
255 {GL_DOT2_ADD_ATI, 3},
256 {GL_DOT3_ATI, 2},
257 {GL_DOT4_ATI, 2}
258 };
259
260
261
262 static void
263 handle_pass_op(struct atifs_machine *machine, struct atifs_setupinst *texinst,
264 const struct sw_span *span, GLuint column, GLuint idx)
265 {
266 GLuint swizzle = texinst->swizzle;
267 GLuint pass_tex = texinst->src;
268
269 if (pass_tex >= GL_TEXTURE0_ARB && pass_tex <= GL_TEXTURE7_ARB) {
270 pass_tex -= GL_TEXTURE0_ARB;
271 COPY_4V(machine->Registers[idx],
272 span->array->texcoords[pass_tex][column]);
273 }
274 else if (pass_tex >= GL_REG_0_ATI && pass_tex <= GL_REG_5_ATI) {
275 pass_tex -= GL_REG_0_ATI;
276 COPY_4V(machine->Registers[idx], machine->PrevPassRegisters[pass_tex]);
277 }
278 apply_swizzle(machine->Registers[idx], swizzle);
279
280 }
281
282 static void
283 handle_sample_op(GLcontext * ctx, struct atifs_machine *machine,
284 struct atifs_setupinst *texinst, const struct sw_span *span,
285 GLuint column, GLuint idx)
286 {
287 /* sample from unit idx using texinst->src as coords */
288 GLuint swizzle = texinst->swizzle;
289 GLuint coord_source = texinst->src;
290 GLfloat tex_coords[4];
291
292 if (coord_source >= GL_TEXTURE0_ARB && coord_source <= GL_TEXTURE7_ARB) {
293 coord_source -= GL_TEXTURE0_ARB;
294 COPY_4V(tex_coords, span->array->texcoords[coord_source][column]);
295 }
296 else if (coord_source >= GL_REG_0_ATI && coord_source <= GL_REG_5_ATI) {
297 coord_source -= GL_REG_0_ATI;
298 COPY_4V(tex_coords, machine->PrevPassRegisters[coord_source]);
299 }
300 apply_swizzle(tex_coords, swizzle);
301 fetch_texel(ctx, tex_coords, 0.0F, idx, machine->Registers[idx]);
302 }
303
304 #define SETUP_SRC_REG(optype, i, x) \
305 do { \
306 COPY_4V(src[optype][i], x); \
307 } while (0)
308
309 static GLboolean
310 execute_shader(GLcontext * ctx,
311 const struct ati_fragment_shader *shader, GLuint maxInst,
312 struct atifs_machine *machine, const struct sw_span *span,
313 GLuint column)
314 {
315 GLuint pc;
316 struct atifs_instruction *inst;
317 struct atifs_setupinst *texinst;
318 GLint optype;
319 GLint i, j, pass;
320 GLint dstreg;
321 GLfloat src[2][3][4];
322 GLfloat zeros[4] = { 0.0, 0.0, 0.0, 0.0 };
323 GLfloat ones[4] = { 1.0, 1.0, 1.0, 1.0 };
324 GLfloat dst[2][4], *dstp;
325
326 for (pass = 0; pass < shader->NumPasses; pass++) {
327 if (pass > 0)
328 finish_pass(machine);
329 for (j = 0; j < MAX_NUM_FRAGMENT_REGISTERS_ATI; j++) {
330 texinst = &shader->SetupInst[pass][j];
331 if (texinst->Opcode == ATI_FRAGMENT_SHADER_PASS_OP)
332 handle_pass_op(machine, texinst, span, column, j);
333 else if (texinst->Opcode == ATI_FRAGMENT_SHADER_SAMPLE_OP)
334 handle_sample_op(ctx, machine, texinst, span, column, j);
335 }
336
337 for (pc = 0; pc < shader->numArithInstr[pass]; pc++) {
338 inst = &shader->Instructions[pass][pc];
339
340 /* setup the source registers for color and alpha ops */
341 for (optype = 0; optype < 2; optype++) {
342 for (i = 0; i < inst->ArgCount[optype]; i++) {
343 GLint index = inst->SrcReg[optype][i].Index;
344
345 if (index >= GL_REG_0_ATI && index <= GL_REG_5_ATI)
346 SETUP_SRC_REG(optype, i,
347 machine->Registers[index - GL_REG_0_ATI]);
348 else if (index >= GL_CON_0_ATI && index <= GL_CON_7_ATI) {
349 if (shader->localConstDef & (1 << (index - GL_CON_0_ATI))) {
350 SETUP_SRC_REG(optype, i,
351 shader->Constants[index - GL_CON_0_ATI]);
352 } else {
353 SETUP_SRC_REG(optype, i,
354 ctx->ATIFragmentShader.globalConstants[index - GL_CON_0_ATI]);
355 }
356 }
357 else if (index == GL_ONE)
358 SETUP_SRC_REG(optype, i, ones);
359 else if (index == GL_ZERO)
360 SETUP_SRC_REG(optype, i, zeros);
361 else if (index == GL_PRIMARY_COLOR_EXT)
362 SETUP_SRC_REG(optype, i,
363 machine->Inputs[ATI_FS_INPUT_PRIMARY]);
364 else if (index == GL_SECONDARY_INTERPOLATOR_ATI)
365 SETUP_SRC_REG(optype, i,
366 machine->Inputs[ATI_FS_INPUT_SECONDARY]);
367
368 apply_src_rep(optype, inst->SrcReg[optype][i].argRep,
369 src[optype][i]);
370 apply_src_mod(optype, inst->SrcReg[optype][i].argMod,
371 src[optype][i]);
372 }
373 }
374
375 /* Execute the operations - color then alpha */
376 for (optype = 0; optype < 2; optype++) {
377 if (inst->Opcode[optype]) {
378 switch (inst->Opcode[optype]) {
379 case GL_ADD_ATI:
380 if (!optype)
381 for (i = 0; i < 3; i++) {
382 dst[optype][i] =
383 src[optype][0][i] + src[optype][1][i];
384 }
385 else
386 dst[optype][3] = src[optype][0][3] + src[optype][1][3];
387 break;
388 case GL_SUB_ATI:
389 if (!optype)
390 for (i = 0; i < 3; i++) {
391 dst[optype][i] =
392 src[optype][0][i] - src[optype][1][i];
393 }
394 else
395 dst[optype][3] = src[optype][0][3] - src[optype][1][3];
396 break;
397 case GL_MUL_ATI:
398 if (!optype)
399 for (i = 0; i < 3; i++) {
400 dst[optype][i] =
401 src[optype][0][i] * src[optype][1][i];
402 }
403 else
404 dst[optype][3] = src[optype][0][3] * src[optype][1][3];
405 break;
406 case GL_MAD_ATI:
407 if (!optype)
408 for (i = 0; i < 3; i++) {
409 dst[optype][i] =
410 src[optype][0][i] * src[optype][1][i] +
411 src[optype][2][i];
412 }
413 else
414 dst[optype][3] =
415 src[optype][0][3] * src[optype][1][3] +
416 src[optype][2][3];
417 break;
418 case GL_LERP_ATI:
419 if (!optype)
420 for (i = 0; i < 3; i++) {
421 dst[optype][i] =
422 src[optype][0][i] * src[optype][1][i] + (1 -
423 src
424 [optype]
425 [0][i]) *
426 src[optype][2][i];
427 }
428 else
429 dst[optype][3] =
430 src[optype][0][3] * src[optype][1][3] + (1 -
431 src[optype]
432 [0][3]) *
433 src[optype][2][3];
434 break;
435
436 case GL_MOV_ATI:
437 if (!optype)
438 for (i = 0; i < 3; i++) {
439 dst[optype][i] = src[optype][0][i];
440 }
441 else
442 dst[optype][3] = src[optype][0][3];
443 break;
444 case GL_CND_ATI:
445 if (!optype) {
446 for (i = 0; i < 3; i++) {
447 dst[optype][i] =
448 (src[optype][2][i] >
449 0.5) ? src[optype][0][i] : src[optype][1][i];
450 }
451 }
452 else {
453 dst[optype][3] =
454 (src[optype][2][3] >
455 0.5) ? src[optype][0][3] : src[optype][1][3];
456 }
457 break;
458
459 case GL_CND0_ATI:
460 if (!optype)
461 for (i = 0; i < 3; i++) {
462 dst[optype][i] =
463 (src[optype][2][i] >=
464 0) ? src[optype][0][i] : src[optype][1][i];
465 }
466 else {
467 dst[optype][3] =
468 (src[optype][2][3] >=
469 0) ? src[optype][0][3] : src[optype][1][3];
470 }
471 break;
472 case GL_DOT2_ADD_ATI:
473 {
474 GLfloat result;
475
476 /* DOT 2 always uses the source from the color op */
477 result = src[0][0][0] * src[0][1][0] +
478 src[0][0][1] * src[0][1][1] + src[0][2][2];
479 if (!optype) {
480 for (i = 0; i < 3; i++) {
481 dst[optype][i] = result;
482 }
483 }
484 else
485 dst[optype][3] = result;
486
487 }
488 break;
489 case GL_DOT3_ATI:
490 {
491 GLfloat result;
492
493 /* DOT 3 always uses the source from the color op */
494 result = src[0][0][0] * src[0][1][0] +
495 src[0][0][1] * src[0][1][1] +
496 src[0][0][2] * src[0][1][2];
497
498 if (!optype) {
499 for (i = 0; i < 3; i++) {
500 dst[optype][i] = result;
501 }
502 }
503 else
504 dst[optype][3] = result;
505 }
506 break;
507 case GL_DOT4_ATI:
508 {
509 GLfloat result;
510
511 /* DOT 4 always uses the source from the color op */
512 result = src[optype][0][0] * src[0][1][0] +
513 src[0][0][1] * src[0][1][1] +
514 src[0][0][2] * src[0][1][2] +
515 src[0][0][3] * src[0][1][3];
516 if (!optype) {
517 for (i = 0; i < 3; i++) {
518 dst[optype][i] = result;
519 }
520 }
521 else
522 dst[optype][3] = result;
523 }
524 break;
525
526 }
527 }
528 }
529
530 /* write out the destination registers */
531 for (optype = 0; optype < 2; optype++) {
532 if (inst->Opcode[optype]) {
533 dstreg = inst->DstReg[optype].Index;
534 dstp = machine->Registers[dstreg - GL_REG_0_ATI];
535
536 write_dst_addr(optype, inst->DstReg[optype].dstMod,
537 inst->DstReg[optype].dstMask, dst[optype],
538 dstp);
539 }
540 }
541 }
542 }
543 return GL_TRUE;
544 }
545
546 static void
547 init_machine(GLcontext * ctx, struct atifs_machine *machine,
548 const struct ati_fragment_shader *shader,
549 const struct sw_span *span, GLuint col)
550 {
551 GLint i, j;
552
553 for (i = 0; i < 6; i++) {
554 for (j = 0; j < 4; j++)
555 ctx->ATIFragmentShader.Machine.Registers[i][j] = 0.0;
556
557 }
558
559 ctx->ATIFragmentShader.Machine.Inputs[ATI_FS_INPUT_PRIMARY][0] =
560 CHAN_TO_FLOAT(span->array->rgba[col][0]);
561 ctx->ATIFragmentShader.Machine.Inputs[ATI_FS_INPUT_PRIMARY][1] =
562 CHAN_TO_FLOAT(span->array->rgba[col][1]);
563 ctx->ATIFragmentShader.Machine.Inputs[ATI_FS_INPUT_PRIMARY][2] =
564 CHAN_TO_FLOAT(span->array->rgba[col][2]);
565 ctx->ATIFragmentShader.Machine.Inputs[ATI_FS_INPUT_PRIMARY][3] =
566 CHAN_TO_FLOAT(span->array->rgba[col][3]);
567
568 ctx->ATIFragmentShader.Machine.Inputs[ATI_FS_INPUT_SECONDARY][0] =
569 CHAN_TO_FLOAT(span->array->spec[col][0]);
570 ctx->ATIFragmentShader.Machine.Inputs[ATI_FS_INPUT_SECONDARY][1] =
571 CHAN_TO_FLOAT(span->array->spec[col][1]);
572 ctx->ATIFragmentShader.Machine.Inputs[ATI_FS_INPUT_SECONDARY][2] =
573 CHAN_TO_FLOAT(span->array->spec[col][2]);
574 ctx->ATIFragmentShader.Machine.Inputs[ATI_FS_INPUT_SECONDARY][3] =
575 CHAN_TO_FLOAT(span->array->spec[col][3]);
576
577 ctx->ATIFragmentShader.Machine.pass = 0;
578 }
579
580
581
582 /**
583 * Execute the current fragment program, operating on the given span.
584 */
585 void
586 _swrast_exec_fragment_shader(GLcontext * ctx, struct sw_span *span)
587 {
588 const struct ati_fragment_shader *shader = ctx->ATIFragmentShader.Current;
589 GLuint i;
590
591 ctx->_CurrentProgram = GL_FRAGMENT_SHADER_ATI;
592
593 for (i = 0; i < span->end; i++) {
594 if (span->array->mask[i]) {
595 init_machine(ctx, &ctx->ATIFragmentShader.Machine,
596 ctx->ATIFragmentShader.Current, span, i);
597
598 if (execute_shader(ctx, shader, ~0,
599 &ctx->ATIFragmentShader.Machine, span, i)) {
600 span->array->mask[i] = GL_FALSE;
601 }
602
603 {
604 const GLfloat *colOut =
605 ctx->ATIFragmentShader.Machine.Registers[0];
606
607 /*fprintf(stderr,"outputs %f %f %f %f\n", colOut[0], colOut[1], colOut[2], colOut[3]); */
608 UNCLAMPED_FLOAT_TO_CHAN(span->array->rgba[i][RCOMP], colOut[0]);
609 UNCLAMPED_FLOAT_TO_CHAN(span->array->rgba[i][GCOMP], colOut[1]);
610 UNCLAMPED_FLOAT_TO_CHAN(span->array->rgba[i][BCOMP], colOut[2]);
611 UNCLAMPED_FLOAT_TO_CHAN(span->array->rgba[i][ACOMP], colOut[3]);
612 }
613 }
614
615 }
616
617
618 ctx->_CurrentProgram = 0;
619
620 }