Merge branch 'master' of git+ssh://znh@git.freedesktop.org/git/mesa/mesa into 965...
[mesa.git] / src / mesa / swrast / s_atifragshader.c
1 /*
2 *
3 * Copyright (C) 2004 David Airlie All Rights Reserved.
4 *
5 * Permission is hereby granted, free of charge, to any person obtaining a
6 * copy of this software and associated documentation files (the "Software"),
7 * to deal in the Software without restriction, including without limitation
8 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
9 * and/or sell copies of the Software, and to permit persons to whom the
10 * Software is furnished to do so, subject to the following conditions:
11 *
12 * The above copyright notice and this permission notice shall be included
13 * in all copies or substantial portions of the Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
16 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * DAVID AIRLIE BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN
19 * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
20 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
21 */
22
23 #include "glheader.h"
24 #include "colormac.h"
25 #include "context.h"
26 #include "macros.h"
27 #include "shader/program.h"
28 #include "shader/atifragshader.h"
29 #include "swrast/s_atifragshader.h"
30
31
32 /**
33 * State for executing ATI fragment shader.
34 */
35 struct atifs_machine
36 {
37 GLfloat Registers[6][4]; /** six temporary registers */
38 GLfloat PrevPassRegisters[6][4];
39 GLfloat Inputs[2][4]; /** Primary, secondary input colors */
40 };
41
42
43
44 /**
45 * Fetch a texel.
46 */
47 static void
48 fetch_texel(GLcontext * ctx, const GLfloat texcoord[4], GLfloat lambda,
49 GLuint unit, GLfloat color[4])
50 {
51 GLchan rgba[4];
52 SWcontext *swrast = SWRAST_CONTEXT(ctx);
53
54 /* XXX use a float-valued TextureSample routine here!!! */
55 swrast->TextureSample[unit](ctx, ctx->Texture.Unit[unit]._Current,
56 1, (const GLfloat(*)[4]) texcoord,
57 &lambda, &rgba);
58 color[0] = CHAN_TO_FLOAT(rgba[0]);
59 color[1] = CHAN_TO_FLOAT(rgba[1]);
60 color[2] = CHAN_TO_FLOAT(rgba[2]);
61 color[3] = CHAN_TO_FLOAT(rgba[3]);
62 }
63
64 static void
65 apply_swizzle(GLfloat values[4], GLuint swizzle)
66 {
67 GLfloat s, t, r, q;
68
69 s = values[0];
70 t = values[1];
71 r = values[2];
72 q = values[3];
73
74 switch (swizzle) {
75 case GL_SWIZZLE_STR_ATI:
76 values[0] = s;
77 values[1] = t;
78 values[2] = r;
79 break;
80 case GL_SWIZZLE_STQ_ATI:
81 values[0] = s;
82 values[1] = t;
83 values[2] = q;
84 break;
85 case GL_SWIZZLE_STR_DR_ATI:
86 values[0] = s / r;
87 values[1] = t / r;
88 values[2] = 1 / r;
89 break;
90 case GL_SWIZZLE_STQ_DQ_ATI:
91 /* make sure q is not 0 to avoid problems later with infinite values (texture lookup)? */
92 if (q == 0.0F) q = 0.000000001;
93 values[0] = s / q;
94 values[1] = t / q;
95 values[2] = 1 / q;
96 break;
97 }
98 values[3] = 0.0;
99 }
100
101 static void
102 apply_src_rep(GLint optype, GLuint rep, GLfloat * val)
103 {
104 GLint i;
105 GLint start, end;
106 if (!rep)
107 return;
108
109 start = optype ? 3 : 0;
110 end = 4;
111
112 for (i = start; i < end; i++) {
113 switch (rep) {
114 case GL_RED:
115 val[i] = val[0];
116 break;
117 case GL_GREEN:
118 val[i] = val[1];
119 break;
120 case GL_BLUE:
121 val[i] = val[2];
122 break;
123 case GL_ALPHA:
124 val[i] = val[3];
125 break;
126 }
127 }
128 }
129
130 static void
131 apply_src_mod(GLint optype, GLuint mod, GLfloat * val)
132 {
133 GLint i;
134 GLint start, end;
135
136 if (!mod)
137 return;
138
139 start = optype ? 3 : 0;
140 end = 4;
141
142 for (i = start; i < end; i++) {
143 if (mod & GL_COMP_BIT_ATI)
144 val[i] = 1 - val[i];
145
146 if (mod & GL_BIAS_BIT_ATI)
147 val[i] = val[i] - 0.5;
148
149 if (mod & GL_2X_BIT_ATI)
150 val[i] = 2 * val[i];
151
152 if (mod & GL_NEGATE_BIT_ATI)
153 val[i] = -val[i];
154 }
155 }
156
157 static void
158 apply_dst_mod(GLuint optype, GLuint mod, GLfloat * val)
159 {
160 GLint i;
161 GLint has_sat = mod & GL_SATURATE_BIT_ATI;
162 GLint start, end;
163
164 mod &= ~GL_SATURATE_BIT_ATI;
165
166 start = optype ? 3 : 0;
167 end = optype ? 4 : 3;
168
169 for (i = start; i < end; i++) {
170 switch (mod) {
171 case GL_2X_BIT_ATI:
172 val[i] = 2 * val[i];
173 break;
174 case GL_4X_BIT_ATI:
175 val[i] = 4 * val[i];
176 break;
177 case GL_8X_BIT_ATI:
178 val[i] = 8 * val[i];
179 break;
180 case GL_HALF_BIT_ATI:
181 val[i] = val[i] * 0.5;
182 break;
183 case GL_QUARTER_BIT_ATI:
184 val[i] = val[i] * 0.25;
185 break;
186 case GL_EIGHTH_BIT_ATI:
187 val[i] = val[i] * 0.125;
188 break;
189 }
190
191 if (has_sat) {
192 if (val[i] < 0.0)
193 val[i] = 0;
194 else if (val[i] > 1.0)
195 val[i] = 1.0;
196 }
197 else {
198 if (val[i] < -8.0)
199 val[i] = -8.0;
200 else if (val[i] > 8.0)
201 val[i] = 8.0;
202 }
203 }
204 }
205
206
207 static void
208 write_dst_addr(GLuint optype, GLuint mod, GLuint mask, GLfloat * src,
209 GLfloat * dst)
210 {
211 GLint i;
212 apply_dst_mod(optype, mod, src);
213
214 if (optype == ATI_FRAGMENT_SHADER_COLOR_OP) {
215 if (mask) {
216 if (mask & GL_RED_BIT_ATI)
217 dst[0] = src[0];
218
219 if (mask & GL_GREEN_BIT_ATI)
220 dst[1] = src[1];
221
222 if (mask & GL_BLUE_BIT_ATI)
223 dst[2] = src[2];
224 }
225 else {
226 for (i = 0; i < 3; i++)
227 dst[i] = src[i];
228 }
229 }
230 else
231 dst[3] = src[3];
232 }
233
234 static void
235 finish_pass(struct atifs_machine *machine)
236 {
237 GLint i;
238
239 for (i = 0; i < 6; i++) {
240 COPY_4V(machine->PrevPassRegisters[i], machine->Registers[i]);
241 }
242 }
243
244 struct ati_fs_opcode_st ati_fs_opcodes[] = {
245 {GL_ADD_ATI, 2},
246 {GL_SUB_ATI, 2},
247 {GL_MUL_ATI, 2},
248 {GL_MAD_ATI, 3},
249 {GL_LERP_ATI, 3},
250 {GL_MOV_ATI, 1},
251 {GL_CND_ATI, 3},
252 {GL_CND0_ATI, 3},
253 {GL_DOT2_ADD_ATI, 3},
254 {GL_DOT3_ATI, 2},
255 {GL_DOT4_ATI, 2}
256 };
257
258
259
260 static void
261 handle_pass_op(struct atifs_machine *machine, struct atifs_setupinst *texinst,
262 const SWspan *span, GLuint column, GLuint idx)
263 {
264 GLuint swizzle = texinst->swizzle;
265 GLuint pass_tex = texinst->src;
266
267 if (pass_tex >= GL_TEXTURE0_ARB && pass_tex <= GL_TEXTURE7_ARB) {
268 pass_tex -= GL_TEXTURE0_ARB;
269 COPY_4V(machine->Registers[idx],
270 span->array->attribs[FRAG_ATTRIB_TEX0 + pass_tex][column]);
271 }
272 else if (pass_tex >= GL_REG_0_ATI && pass_tex <= GL_REG_5_ATI) {
273 pass_tex -= GL_REG_0_ATI;
274 COPY_4V(machine->Registers[idx], machine->PrevPassRegisters[pass_tex]);
275 }
276 apply_swizzle(machine->Registers[idx], swizzle);
277
278 }
279
280 static void
281 handle_sample_op(GLcontext * ctx, struct atifs_machine *machine,
282 struct atifs_setupinst *texinst, const SWspan *span,
283 GLuint column, GLuint idx)
284 {
285 /* sample from unit idx using texinst->src as coords */
286 GLuint swizzle = texinst->swizzle;
287 GLuint coord_source = texinst->src;
288 GLfloat tex_coords[4];
289
290 if (coord_source >= GL_TEXTURE0_ARB && coord_source <= GL_TEXTURE7_ARB) {
291 coord_source -= GL_TEXTURE0_ARB;
292 COPY_4V(tex_coords,
293 span->array->attribs[FRAG_ATTRIB_TEX0 + coord_source][column]);
294 }
295 else if (coord_source >= GL_REG_0_ATI && coord_source <= GL_REG_5_ATI) {
296 coord_source -= GL_REG_0_ATI;
297 COPY_4V(tex_coords, machine->PrevPassRegisters[coord_source]);
298 }
299 apply_swizzle(tex_coords, swizzle);
300 fetch_texel(ctx, tex_coords, 0.0F, idx, machine->Registers[idx]);
301 }
302
303 #define SETUP_SRC_REG(optype, i, x) \
304 do { \
305 COPY_4V(src[optype][i], x); \
306 } while (0)
307
308
309
310 /**
311 * Execute the given fragment shader.
312 * NOTE: we do everything in single-precision floating point
313 * \param ctx - rendering context
314 * \param shader - the shader to execute
315 * \param machine - virtual machine state
316 * \param span - the SWspan we're operating on
317 * \param column - which pixel [i] we're operating on in the span
318 */
319 static void
320 execute_shader(GLcontext *ctx, const struct ati_fragment_shader *shader,
321 struct atifs_machine *machine, const SWspan *span,
322 GLuint column)
323 {
324 GLuint pc;
325 struct atifs_instruction *inst;
326 struct atifs_setupinst *texinst;
327 GLint optype;
328 GLuint i;
329 GLint j, pass;
330 GLint dstreg;
331 GLfloat src[2][3][4];
332 GLfloat zeros[4] = { 0.0, 0.0, 0.0, 0.0 };
333 GLfloat ones[4] = { 1.0, 1.0, 1.0, 1.0 };
334 GLfloat dst[2][4], *dstp;
335
336 for (pass = 0; pass < shader->NumPasses; pass++) {
337 if (pass > 0)
338 finish_pass(machine);
339 for (j = 0; j < MAX_NUM_FRAGMENT_REGISTERS_ATI; j++) {
340 texinst = &shader->SetupInst[pass][j];
341 if (texinst->Opcode == ATI_FRAGMENT_SHADER_PASS_OP)
342 handle_pass_op(machine, texinst, span, column, j);
343 else if (texinst->Opcode == ATI_FRAGMENT_SHADER_SAMPLE_OP)
344 handle_sample_op(ctx, machine, texinst, span, column, j);
345 }
346
347 for (pc = 0; pc < shader->numArithInstr[pass]; pc++) {
348 inst = &shader->Instructions[pass][pc];
349
350 /* setup the source registers for color and alpha ops */
351 for (optype = 0; optype < 2; optype++) {
352 for (i = 0; i < inst->ArgCount[optype]; i++) {
353 GLint index = inst->SrcReg[optype][i].Index;
354
355 if (index >= GL_REG_0_ATI && index <= GL_REG_5_ATI)
356 SETUP_SRC_REG(optype, i,
357 machine->Registers[index - GL_REG_0_ATI]);
358 else if (index >= GL_CON_0_ATI && index <= GL_CON_7_ATI) {
359 if (shader->LocalConstDef & (1 << (index - GL_CON_0_ATI))) {
360 SETUP_SRC_REG(optype, i,
361 shader->Constants[index - GL_CON_0_ATI]);
362 } else {
363 SETUP_SRC_REG(optype, i,
364 ctx->ATIFragmentShader.GlobalConstants[index - GL_CON_0_ATI]);
365 }
366 }
367 else if (index == GL_ONE)
368 SETUP_SRC_REG(optype, i, ones);
369 else if (index == GL_ZERO)
370 SETUP_SRC_REG(optype, i, zeros);
371 else if (index == GL_PRIMARY_COLOR_EXT)
372 SETUP_SRC_REG(optype, i,
373 machine->Inputs[ATI_FS_INPUT_PRIMARY]);
374 else if (index == GL_SECONDARY_INTERPOLATOR_ATI)
375 SETUP_SRC_REG(optype, i,
376 machine->Inputs[ATI_FS_INPUT_SECONDARY]);
377
378 apply_src_rep(optype, inst->SrcReg[optype][i].argRep,
379 src[optype][i]);
380 apply_src_mod(optype, inst->SrcReg[optype][i].argMod,
381 src[optype][i]);
382 }
383 }
384
385 /* Execute the operations - color then alpha */
386 for (optype = 0; optype < 2; optype++) {
387 if (inst->Opcode[optype]) {
388 switch (inst->Opcode[optype]) {
389 case GL_ADD_ATI:
390 if (!optype)
391 for (i = 0; i < 3; i++) {
392 dst[optype][i] =
393 src[optype][0][i] + src[optype][1][i];
394 }
395 else
396 dst[optype][3] = src[optype][0][3] + src[optype][1][3];
397 break;
398 case GL_SUB_ATI:
399 if (!optype)
400 for (i = 0; i < 3; i++) {
401 dst[optype][i] =
402 src[optype][0][i] - src[optype][1][i];
403 }
404 else
405 dst[optype][3] = src[optype][0][3] - src[optype][1][3];
406 break;
407 case GL_MUL_ATI:
408 if (!optype)
409 for (i = 0; i < 3; i++) {
410 dst[optype][i] =
411 src[optype][0][i] * src[optype][1][i];
412 }
413 else
414 dst[optype][3] = src[optype][0][3] * src[optype][1][3];
415 break;
416 case GL_MAD_ATI:
417 if (!optype)
418 for (i = 0; i < 3; i++) {
419 dst[optype][i] =
420 src[optype][0][i] * src[optype][1][i] +
421 src[optype][2][i];
422 }
423 else
424 dst[optype][3] =
425 src[optype][0][3] * src[optype][1][3] +
426 src[optype][2][3];
427 break;
428 case GL_LERP_ATI:
429 if (!optype)
430 for (i = 0; i < 3; i++) {
431 dst[optype][i] =
432 src[optype][0][i] * src[optype][1][i] + (1 -
433 src
434 [optype]
435 [0][i]) *
436 src[optype][2][i];
437 }
438 else
439 dst[optype][3] =
440 src[optype][0][3] * src[optype][1][3] + (1 -
441 src[optype]
442 [0][3]) *
443 src[optype][2][3];
444 break;
445
446 case GL_MOV_ATI:
447 if (!optype)
448 for (i = 0; i < 3; i++) {
449 dst[optype][i] = src[optype][0][i];
450 }
451 else
452 dst[optype][3] = src[optype][0][3];
453 break;
454 case GL_CND_ATI:
455 if (!optype) {
456 for (i = 0; i < 3; i++) {
457 dst[optype][i] =
458 (src[optype][2][i] >
459 0.5) ? src[optype][0][i] : src[optype][1][i];
460 }
461 }
462 else {
463 dst[optype][3] =
464 (src[optype][2][3] >
465 0.5) ? src[optype][0][3] : src[optype][1][3];
466 }
467 break;
468
469 case GL_CND0_ATI:
470 if (!optype)
471 for (i = 0; i < 3; i++) {
472 dst[optype][i] =
473 (src[optype][2][i] >=
474 0) ? src[optype][0][i] : src[optype][1][i];
475 }
476 else {
477 dst[optype][3] =
478 (src[optype][2][3] >=
479 0) ? src[optype][0][3] : src[optype][1][3];
480 }
481 break;
482 case GL_DOT2_ADD_ATI:
483 {
484 GLfloat result;
485
486 /* DOT 2 always uses the source from the color op */
487 /* could save recalculation of dot products for alpha inst */
488 result = src[0][0][0] * src[0][1][0] +
489 src[0][0][1] * src[0][1][1] + src[0][2][2];
490 if (!optype) {
491 for (i = 0; i < 3; i++) {
492 dst[optype][i] = result;
493 }
494 }
495 else
496 dst[optype][3] = result;
497 }
498 break;
499 case GL_DOT3_ATI:
500 {
501 GLfloat result;
502
503 /* DOT 3 always uses the source from the color op */
504 result = src[0][0][0] * src[0][1][0] +
505 src[0][0][1] * src[0][1][1] +
506 src[0][0][2] * src[0][1][2];
507
508 if (!optype) {
509 for (i = 0; i < 3; i++) {
510 dst[optype][i] = result;
511 }
512 }
513 else
514 dst[optype][3] = result;
515 }
516 break;
517 case GL_DOT4_ATI:
518 {
519 GLfloat result;
520
521 /* DOT 4 always uses the source from the color op */
522 result = src[0][0][0] * src[0][1][0] +
523 src[0][0][1] * src[0][1][1] +
524 src[0][0][2] * src[0][1][2] +
525 src[0][0][3] * src[0][1][3];
526 if (!optype) {
527 for (i = 0; i < 3; i++) {
528 dst[optype][i] = result;
529 }
530 }
531 else
532 dst[optype][3] = result;
533 }
534 break;
535
536 }
537 }
538 }
539
540 /* write out the destination registers */
541 for (optype = 0; optype < 2; optype++) {
542 if (inst->Opcode[optype]) {
543 dstreg = inst->DstReg[optype].Index;
544 dstp = machine->Registers[dstreg - GL_REG_0_ATI];
545
546 if ((optype == 0) || ((inst->Opcode[1] != GL_DOT2_ADD_ATI) &&
547 (inst->Opcode[1] != GL_DOT3_ATI) && (inst->Opcode[1] != GL_DOT4_ATI)))
548 write_dst_addr(optype, inst->DstReg[optype].dstMod,
549 inst->DstReg[optype].dstMask, dst[optype],
550 dstp);
551 else
552 write_dst_addr(1, inst->DstReg[0].dstMod, 0, dst[1], dstp);
553 }
554 }
555 }
556 }
557 }
558
559
560 /**
561 * Init fragment shader virtual machine state.
562 */
563 static void
564 init_machine(GLcontext * ctx, struct atifs_machine *machine,
565 const struct ati_fragment_shader *shader,
566 const SWspan *span, GLuint col)
567 {
568 GLfloat (*inputs)[4] = machine->Inputs;
569 GLint i, j;
570
571 for (i = 0; i < 6; i++) {
572 for (j = 0; j < 4; j++)
573 machine->Registers[i][j] = 0.0;
574 }
575
576 COPY_4V(inputs[ATI_FS_INPUT_PRIMARY], span->array->attribs[FRAG_ATTRIB_COL0][col]);
577 COPY_4V(inputs[ATI_FS_INPUT_SECONDARY], span->array->attribs[FRAG_ATTRIB_COL1][col]);
578 }
579
580
581
582 /**
583 * Execute the current ATI shader program, operating on the given span.
584 */
585 void
586 _swrast_exec_fragment_shader(GLcontext * ctx, SWspan *span)
587 {
588 const struct ati_fragment_shader *shader = ctx->ATIFragmentShader.Current;
589 struct atifs_machine machine;
590 GLuint i;
591
592 /* incoming colors should be floats */
593 ASSERT(span->array->ChanType == GL_FLOAT);
594
595 ctx->_CurrentProgram = GL_FRAGMENT_SHADER_ATI;
596
597 for (i = 0; i < span->end; i++) {
598 if (span->array->mask[i]) {
599 init_machine(ctx, &machine, shader, span, i);
600
601 execute_shader(ctx, shader, &machine, span, i);
602
603 /* store result color */
604 {
605 const GLfloat *colOut = machine.Registers[0];
606 /*fprintf(stderr,"outputs %f %f %f %f\n",
607 colOut[0], colOut[1], colOut[2], colOut[3]); */
608 COPY_4V(span->array->attribs[FRAG_ATTRIB_COL0][i], colOut);
609 }
610 }
611 }
612
613 ctx->_CurrentProgram = 0;
614 }