Merge commit 'origin/gallium-0.1' into gallium-0.2
[mesa.git] / src / mesa / swrast / s_atifragshader.c
1 /*
2 * Copyright (C) 2004 David Airlie All Rights Reserved.
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice shall be included
12 * in all copies or substantial portions of the Software.
13 *
14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
15 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
17 * DAVID AIRLIE BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN
18 * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
19 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
20 */
21
22 #include "main/glheader.h"
23 #include "main/colormac.h"
24 #include "main/context.h"
25 #include "main/macros.h"
26 #include "shader/program.h"
27 #include "shader/atifragshader.h"
28 #include "swrast/s_atifragshader.h"
29
30
31 /**
32 * State for executing ATI fragment shader.
33 */
34 struct atifs_machine
35 {
36 GLfloat Registers[6][4]; /** six temporary registers */
37 GLfloat PrevPassRegisters[6][4];
38 GLfloat Inputs[2][4]; /** Primary, secondary input colors */
39 };
40
41
42
43 /**
44 * Fetch a texel.
45 */
46 static void
47 fetch_texel(GLcontext * ctx, const GLfloat texcoord[4], GLfloat lambda,
48 GLuint unit, GLfloat color[4])
49 {
50 GLchan rgba[4];
51 SWcontext *swrast = SWRAST_CONTEXT(ctx);
52
53 /* XXX use a float-valued TextureSample routine here!!! */
54 swrast->TextureSample[unit](ctx, ctx->Texture.Unit[unit]._Current,
55 1, (const GLfloat(*)[4]) texcoord,
56 &lambda, &rgba);
57 color[0] = CHAN_TO_FLOAT(rgba[0]);
58 color[1] = CHAN_TO_FLOAT(rgba[1]);
59 color[2] = CHAN_TO_FLOAT(rgba[2]);
60 color[3] = CHAN_TO_FLOAT(rgba[3]);
61 }
62
63 static void
64 apply_swizzle(GLfloat values[4], GLuint swizzle)
65 {
66 GLfloat s, t, r, q;
67
68 s = values[0];
69 t = values[1];
70 r = values[2];
71 q = values[3];
72
73 switch (swizzle) {
74 case GL_SWIZZLE_STR_ATI:
75 values[0] = s;
76 values[1] = t;
77 values[2] = r;
78 break;
79 case GL_SWIZZLE_STQ_ATI:
80 values[0] = s;
81 values[1] = t;
82 values[2] = q;
83 break;
84 case GL_SWIZZLE_STR_DR_ATI:
85 values[0] = s / r;
86 values[1] = t / r;
87 values[2] = 1 / r;
88 break;
89 case GL_SWIZZLE_STQ_DQ_ATI:
90 /* make sure q is not 0 to avoid problems later with infinite values (texture lookup)? */
91 if (q == 0.0F) q = 0.000000001;
92 values[0] = s / q;
93 values[1] = t / q;
94 values[2] = 1 / q;
95 break;
96 }
97 values[3] = 0.0;
98 }
99
100 static void
101 apply_src_rep(GLint optype, GLuint rep, GLfloat * val)
102 {
103 GLint i;
104 GLint start, end;
105 if (!rep)
106 return;
107
108 start = optype ? 3 : 0;
109 end = 4;
110
111 for (i = start; i < end; i++) {
112 switch (rep) {
113 case GL_RED:
114 val[i] = val[0];
115 break;
116 case GL_GREEN:
117 val[i] = val[1];
118 break;
119 case GL_BLUE:
120 val[i] = val[2];
121 break;
122 case GL_ALPHA:
123 val[i] = val[3];
124 break;
125 }
126 }
127 }
128
129 static void
130 apply_src_mod(GLint optype, GLuint mod, GLfloat * val)
131 {
132 GLint i;
133 GLint start, end;
134
135 if (!mod)
136 return;
137
138 start = optype ? 3 : 0;
139 end = 4;
140
141 for (i = start; i < end; i++) {
142 if (mod & GL_COMP_BIT_ATI)
143 val[i] = 1 - val[i];
144
145 if (mod & GL_BIAS_BIT_ATI)
146 val[i] = val[i] - 0.5;
147
148 if (mod & GL_2X_BIT_ATI)
149 val[i] = 2 * val[i];
150
151 if (mod & GL_NEGATE_BIT_ATI)
152 val[i] = -val[i];
153 }
154 }
155
156 static void
157 apply_dst_mod(GLuint optype, GLuint mod, GLfloat * val)
158 {
159 GLint i;
160 GLint has_sat = mod & GL_SATURATE_BIT_ATI;
161 GLint start, end;
162
163 mod &= ~GL_SATURATE_BIT_ATI;
164
165 start = optype ? 3 : 0;
166 end = optype ? 4 : 3;
167
168 for (i = start; i < end; i++) {
169 switch (mod) {
170 case GL_2X_BIT_ATI:
171 val[i] = 2 * val[i];
172 break;
173 case GL_4X_BIT_ATI:
174 val[i] = 4 * val[i];
175 break;
176 case GL_8X_BIT_ATI:
177 val[i] = 8 * val[i];
178 break;
179 case GL_HALF_BIT_ATI:
180 val[i] = val[i] * 0.5;
181 break;
182 case GL_QUARTER_BIT_ATI:
183 val[i] = val[i] * 0.25;
184 break;
185 case GL_EIGHTH_BIT_ATI:
186 val[i] = val[i] * 0.125;
187 break;
188 }
189
190 if (has_sat) {
191 if (val[i] < 0.0)
192 val[i] = 0;
193 else if (val[i] > 1.0)
194 val[i] = 1.0;
195 }
196 else {
197 if (val[i] < -8.0)
198 val[i] = -8.0;
199 else if (val[i] > 8.0)
200 val[i] = 8.0;
201 }
202 }
203 }
204
205
206 static void
207 write_dst_addr(GLuint optype, GLuint mod, GLuint mask, GLfloat * src,
208 GLfloat * dst)
209 {
210 GLint i;
211 apply_dst_mod(optype, mod, src);
212
213 if (optype == ATI_FRAGMENT_SHADER_COLOR_OP) {
214 if (mask) {
215 if (mask & GL_RED_BIT_ATI)
216 dst[0] = src[0];
217
218 if (mask & GL_GREEN_BIT_ATI)
219 dst[1] = src[1];
220
221 if (mask & GL_BLUE_BIT_ATI)
222 dst[2] = src[2];
223 }
224 else {
225 for (i = 0; i < 3; i++)
226 dst[i] = src[i];
227 }
228 }
229 else
230 dst[3] = src[3];
231 }
232
233 static void
234 finish_pass(struct atifs_machine *machine)
235 {
236 GLint i;
237
238 for (i = 0; i < 6; i++) {
239 COPY_4V(machine->PrevPassRegisters[i], machine->Registers[i]);
240 }
241 }
242
243 struct ati_fs_opcode_st ati_fs_opcodes[] = {
244 {GL_ADD_ATI, 2},
245 {GL_SUB_ATI, 2},
246 {GL_MUL_ATI, 2},
247 {GL_MAD_ATI, 3},
248 {GL_LERP_ATI, 3},
249 {GL_MOV_ATI, 1},
250 {GL_CND_ATI, 3},
251 {GL_CND0_ATI, 3},
252 {GL_DOT2_ADD_ATI, 3},
253 {GL_DOT3_ATI, 2},
254 {GL_DOT4_ATI, 2}
255 };
256
257
258
259 static void
260 handle_pass_op(struct atifs_machine *machine, struct atifs_setupinst *texinst,
261 const SWspan *span, GLuint column, GLuint idx)
262 {
263 GLuint swizzle = texinst->swizzle;
264 GLuint pass_tex = texinst->src;
265
266 if (pass_tex >= GL_TEXTURE0_ARB && pass_tex <= GL_TEXTURE7_ARB) {
267 pass_tex -= GL_TEXTURE0_ARB;
268 COPY_4V(machine->Registers[idx],
269 span->array->attribs[FRAG_ATTRIB_TEX0 + pass_tex][column]);
270 }
271 else if (pass_tex >= GL_REG_0_ATI && pass_tex <= GL_REG_5_ATI) {
272 pass_tex -= GL_REG_0_ATI;
273 COPY_4V(machine->Registers[idx], machine->PrevPassRegisters[pass_tex]);
274 }
275 apply_swizzle(machine->Registers[idx], swizzle);
276
277 }
278
279 static void
280 handle_sample_op(GLcontext * ctx, struct atifs_machine *machine,
281 struct atifs_setupinst *texinst, const SWspan *span,
282 GLuint column, GLuint idx)
283 {
284 /* sample from unit idx using texinst->src as coords */
285 GLuint swizzle = texinst->swizzle;
286 GLuint coord_source = texinst->src;
287 GLfloat tex_coords[4];
288
289 if (coord_source >= GL_TEXTURE0_ARB && coord_source <= GL_TEXTURE7_ARB) {
290 coord_source -= GL_TEXTURE0_ARB;
291 COPY_4V(tex_coords,
292 span->array->attribs[FRAG_ATTRIB_TEX0 + coord_source][column]);
293 }
294 else if (coord_source >= GL_REG_0_ATI && coord_source <= GL_REG_5_ATI) {
295 coord_source -= GL_REG_0_ATI;
296 COPY_4V(tex_coords, machine->PrevPassRegisters[coord_source]);
297 }
298 apply_swizzle(tex_coords, swizzle);
299 fetch_texel(ctx, tex_coords, 0.0F, idx, machine->Registers[idx]);
300 }
301
302 #define SETUP_SRC_REG(optype, i, x) \
303 do { \
304 COPY_4V(src[optype][i], x); \
305 } while (0)
306
307
308
309 /**
310 * Execute the given fragment shader.
311 * NOTE: we do everything in single-precision floating point
312 * \param ctx - rendering context
313 * \param shader - the shader to execute
314 * \param machine - virtual machine state
315 * \param span - the SWspan we're operating on
316 * \param column - which pixel [i] we're operating on in the span
317 */
318 static void
319 execute_shader(GLcontext *ctx, const struct ati_fragment_shader *shader,
320 struct atifs_machine *machine, const SWspan *span,
321 GLuint column)
322 {
323 GLuint pc;
324 struct atifs_instruction *inst;
325 struct atifs_setupinst *texinst;
326 GLint optype;
327 GLuint i;
328 GLint j, pass;
329 GLint dstreg;
330 GLfloat src[2][3][4];
331 GLfloat zeros[4] = { 0.0, 0.0, 0.0, 0.0 };
332 GLfloat ones[4] = { 1.0, 1.0, 1.0, 1.0 };
333 GLfloat dst[2][4], *dstp;
334
335 for (pass = 0; pass < shader->NumPasses; pass++) {
336 if (pass > 0)
337 finish_pass(machine);
338 for (j = 0; j < MAX_NUM_FRAGMENT_REGISTERS_ATI; j++) {
339 texinst = &shader->SetupInst[pass][j];
340 if (texinst->Opcode == ATI_FRAGMENT_SHADER_PASS_OP)
341 handle_pass_op(machine, texinst, span, column, j);
342 else if (texinst->Opcode == ATI_FRAGMENT_SHADER_SAMPLE_OP)
343 handle_sample_op(ctx, machine, texinst, span, column, j);
344 }
345
346 for (pc = 0; pc < shader->numArithInstr[pass]; pc++) {
347 inst = &shader->Instructions[pass][pc];
348
349 /* setup the source registers for color and alpha ops */
350 for (optype = 0; optype < 2; optype++) {
351 for (i = 0; i < inst->ArgCount[optype]; i++) {
352 GLint index = inst->SrcReg[optype][i].Index;
353
354 if (index >= GL_REG_0_ATI && index <= GL_REG_5_ATI)
355 SETUP_SRC_REG(optype, i,
356 machine->Registers[index - GL_REG_0_ATI]);
357 else if (index >= GL_CON_0_ATI && index <= GL_CON_7_ATI) {
358 if (shader->LocalConstDef & (1 << (index - GL_CON_0_ATI))) {
359 SETUP_SRC_REG(optype, i,
360 shader->Constants[index - GL_CON_0_ATI]);
361 } else {
362 SETUP_SRC_REG(optype, i,
363 ctx->ATIFragmentShader.GlobalConstants[index - GL_CON_0_ATI]);
364 }
365 }
366 else if (index == GL_ONE)
367 SETUP_SRC_REG(optype, i, ones);
368 else if (index == GL_ZERO)
369 SETUP_SRC_REG(optype, i, zeros);
370 else if (index == GL_PRIMARY_COLOR_EXT)
371 SETUP_SRC_REG(optype, i,
372 machine->Inputs[ATI_FS_INPUT_PRIMARY]);
373 else if (index == GL_SECONDARY_INTERPOLATOR_ATI)
374 SETUP_SRC_REG(optype, i,
375 machine->Inputs[ATI_FS_INPUT_SECONDARY]);
376
377 apply_src_rep(optype, inst->SrcReg[optype][i].argRep,
378 src[optype][i]);
379 apply_src_mod(optype, inst->SrcReg[optype][i].argMod,
380 src[optype][i]);
381 }
382 }
383
384 /* Execute the operations - color then alpha */
385 for (optype = 0; optype < 2; optype++) {
386 if (inst->Opcode[optype]) {
387 switch (inst->Opcode[optype]) {
388 case GL_ADD_ATI:
389 if (!optype)
390 for (i = 0; i < 3; i++) {
391 dst[optype][i] =
392 src[optype][0][i] + src[optype][1][i];
393 }
394 else
395 dst[optype][3] = src[optype][0][3] + src[optype][1][3];
396 break;
397 case GL_SUB_ATI:
398 if (!optype)
399 for (i = 0; i < 3; i++) {
400 dst[optype][i] =
401 src[optype][0][i] - src[optype][1][i];
402 }
403 else
404 dst[optype][3] = src[optype][0][3] - src[optype][1][3];
405 break;
406 case GL_MUL_ATI:
407 if (!optype)
408 for (i = 0; i < 3; i++) {
409 dst[optype][i] =
410 src[optype][0][i] * src[optype][1][i];
411 }
412 else
413 dst[optype][3] = src[optype][0][3] * src[optype][1][3];
414 break;
415 case GL_MAD_ATI:
416 if (!optype)
417 for (i = 0; i < 3; i++) {
418 dst[optype][i] =
419 src[optype][0][i] * src[optype][1][i] +
420 src[optype][2][i];
421 }
422 else
423 dst[optype][3] =
424 src[optype][0][3] * src[optype][1][3] +
425 src[optype][2][3];
426 break;
427 case GL_LERP_ATI:
428 if (!optype)
429 for (i = 0; i < 3; i++) {
430 dst[optype][i] =
431 src[optype][0][i] * src[optype][1][i] + (1 -
432 src
433 [optype]
434 [0][i]) *
435 src[optype][2][i];
436 }
437 else
438 dst[optype][3] =
439 src[optype][0][3] * src[optype][1][3] + (1 -
440 src[optype]
441 [0][3]) *
442 src[optype][2][3];
443 break;
444
445 case GL_MOV_ATI:
446 if (!optype)
447 for (i = 0; i < 3; i++) {
448 dst[optype][i] = src[optype][0][i];
449 }
450 else
451 dst[optype][3] = src[optype][0][3];
452 break;
453 case GL_CND_ATI:
454 if (!optype) {
455 for (i = 0; i < 3; i++) {
456 dst[optype][i] =
457 (src[optype][2][i] >
458 0.5) ? src[optype][0][i] : src[optype][1][i];
459 }
460 }
461 else {
462 dst[optype][3] =
463 (src[optype][2][3] >
464 0.5) ? src[optype][0][3] : src[optype][1][3];
465 }
466 break;
467
468 case GL_CND0_ATI:
469 if (!optype)
470 for (i = 0; i < 3; i++) {
471 dst[optype][i] =
472 (src[optype][2][i] >=
473 0) ? src[optype][0][i] : src[optype][1][i];
474 }
475 else {
476 dst[optype][3] =
477 (src[optype][2][3] >=
478 0) ? src[optype][0][3] : src[optype][1][3];
479 }
480 break;
481 case GL_DOT2_ADD_ATI:
482 {
483 GLfloat result;
484
485 /* DOT 2 always uses the source from the color op */
486 /* could save recalculation of dot products for alpha inst */
487 result = src[0][0][0] * src[0][1][0] +
488 src[0][0][1] * src[0][1][1] + src[0][2][2];
489 if (!optype) {
490 for (i = 0; i < 3; i++) {
491 dst[optype][i] = result;
492 }
493 }
494 else
495 dst[optype][3] = result;
496 }
497 break;
498 case GL_DOT3_ATI:
499 {
500 GLfloat result;
501
502 /* DOT 3 always uses the source from the color op */
503 result = src[0][0][0] * src[0][1][0] +
504 src[0][0][1] * src[0][1][1] +
505 src[0][0][2] * src[0][1][2];
506
507 if (!optype) {
508 for (i = 0; i < 3; i++) {
509 dst[optype][i] = result;
510 }
511 }
512 else
513 dst[optype][3] = result;
514 }
515 break;
516 case GL_DOT4_ATI:
517 {
518 GLfloat result;
519
520 /* DOT 4 always uses the source from the color op */
521 result = src[0][0][0] * src[0][1][0] +
522 src[0][0][1] * src[0][1][1] +
523 src[0][0][2] * src[0][1][2] +
524 src[0][0][3] * src[0][1][3];
525 if (!optype) {
526 for (i = 0; i < 3; i++) {
527 dst[optype][i] = result;
528 }
529 }
530 else
531 dst[optype][3] = result;
532 }
533 break;
534
535 }
536 }
537 }
538
539 /* write out the destination registers */
540 for (optype = 0; optype < 2; optype++) {
541 if (inst->Opcode[optype]) {
542 dstreg = inst->DstReg[optype].Index;
543 dstp = machine->Registers[dstreg - GL_REG_0_ATI];
544
545 if ((optype == 0) || ((inst->Opcode[1] != GL_DOT2_ADD_ATI) &&
546 (inst->Opcode[1] != GL_DOT3_ATI) && (inst->Opcode[1] != GL_DOT4_ATI)))
547 write_dst_addr(optype, inst->DstReg[optype].dstMod,
548 inst->DstReg[optype].dstMask, dst[optype],
549 dstp);
550 else
551 write_dst_addr(1, inst->DstReg[0].dstMod, 0, dst[1], dstp);
552 }
553 }
554 }
555 }
556 }
557
558
559 /**
560 * Init fragment shader virtual machine state.
561 */
562 static void
563 init_machine(GLcontext * ctx, struct atifs_machine *machine,
564 const struct ati_fragment_shader *shader,
565 const SWspan *span, GLuint col)
566 {
567 GLfloat (*inputs)[4] = machine->Inputs;
568 GLint i, j;
569
570 for (i = 0; i < 6; i++) {
571 for (j = 0; j < 4; j++)
572 machine->Registers[i][j] = 0.0;
573 }
574
575 COPY_4V(inputs[ATI_FS_INPUT_PRIMARY], span->array->attribs[FRAG_ATTRIB_COL0][col]);
576 COPY_4V(inputs[ATI_FS_INPUT_SECONDARY], span->array->attribs[FRAG_ATTRIB_COL1][col]);
577 }
578
579
580
581 /**
582 * Execute the current ATI shader program, operating on the given span.
583 */
584 void
585 _swrast_exec_fragment_shader(GLcontext * ctx, SWspan *span)
586 {
587 const struct ati_fragment_shader *shader = ctx->ATIFragmentShader.Current;
588 struct atifs_machine machine;
589 GLuint i;
590
591 /* incoming colors should be floats */
592 ASSERT(span->array->ChanType == GL_FLOAT);
593
594 ctx->_CurrentProgram = GL_FRAGMENT_SHADER_ATI;
595
596 for (i = 0; i < span->end; i++) {
597 if (span->array->mask[i]) {
598 init_machine(ctx, &machine, shader, span, i);
599
600 execute_shader(ctx, shader, &machine, span, i);
601
602 /* store result color */
603 {
604 const GLfloat *colOut = machine.Registers[0];
605 /*fprintf(stderr,"outputs %f %f %f %f\n",
606 colOut[0], colOut[1], colOut[2], colOut[3]); */
607 COPY_4V(span->array->attribs[FRAG_ATTRIB_COL0][i], colOut);
608 }
609 }
610 }
611
612 ctx->_CurrentProgram = 0;
613 }