bumpmap sample is correct now
[mesa.git] / src / mesa / swrast / s_atifragshader.c
1 /*
2 *
3 * Copyright (C) 2004 David Airlie All Rights Reserved.
4 *
5 * Permission is hereby granted, free of charge, to any person obtaining a
6 * copy of this software and associated documentation files (the "Software"),
7 * to deal in the Software without restriction, including without limitation
8 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
9 * and/or sell copies of the Software, and to permit persons to whom the
10 * Software is furnished to do so, subject to the following conditions:
11 *
12 * The above copyright notice and this permission notice shall be included
13 * in all copies or substantial portions of the Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
16 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * DAVID AIRLIE BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN
19 * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
20 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
21 */
22
23 #include "glheader.h"
24 #include "colormac.h"
25 #include "context.h"
26 #include "atifragshader.h"
27 #include "macros.h"
28 #include "program.h"
29
30 #include "s_atifragshader.h"
31
32
33 /**
34 * State for executing ATI fragment shader.
35 */
36 struct atifs_machine
37 {
38 GLfloat Registers[6][4]; /** six temporary registers */
39 GLfloat PrevPassRegisters[6][4];
40 GLfloat Inputs[2][4]; /** Primary, secondary input colors */
41 };
42
43
44
45 /**
46 * Fetch a texel.
47 */
48 static void
49 fetch_texel(GLcontext * ctx, const GLfloat texcoord[4], GLfloat lambda,
50 GLuint unit, GLfloat color[4])
51 {
52 GLchan rgba[4];
53 SWcontext *swrast = SWRAST_CONTEXT(ctx);
54
55 /* XXX use a float-valued TextureSample routine here!!! */
56 swrast->TextureSample[unit](ctx, ctx->Texture.Unit[unit]._Current,
57 1, (const GLfloat(*)[4]) texcoord,
58 &lambda, &rgba);
59 color[0] = CHAN_TO_FLOAT(rgba[0]);
60 color[1] = CHAN_TO_FLOAT(rgba[1]);
61 color[2] = CHAN_TO_FLOAT(rgba[2]);
62 color[3] = CHAN_TO_FLOAT(rgba[3]);
63 }
64
65 static void
66 apply_swizzle(GLfloat values[4], GLuint swizzle)
67 {
68 GLfloat s, t, r, q;
69
70 s = values[0];
71 t = values[1];
72 r = values[2];
73 q = values[3];
74
75 switch (swizzle) {
76 case GL_SWIZZLE_STR_ATI:
77 values[0] = s;
78 values[1] = t;
79 values[2] = r;
80 break;
81 case GL_SWIZZLE_STQ_ATI:
82 values[0] = s;
83 values[1] = t;
84 values[2] = q;
85 break;
86 case GL_SWIZZLE_STR_DR_ATI:
87 values[0] = s / r;
88 values[1] = t / r;
89 values[2] = 1 / r;
90 break;
91 case GL_SWIZZLE_STQ_DQ_ATI:
92 /* make sure q is not 0 to avoid problems later with infinite values (texture lookup)? */
93 if (q == 0.0F) q = 0.000000001;
94 values[0] = s / q;
95 values[1] = t / q;
96 values[2] = 1 / q;
97 break;
98 }
99 values[3] = 0.0;
100 }
101
102 static void
103 apply_src_rep(GLint optype, GLuint rep, GLfloat * val)
104 {
105 GLint i;
106 GLint start, end;
107 if (!rep)
108 return;
109
110 start = optype ? 3 : 0;
111 end = 4;
112
113 for (i = start; i < end; i++) {
114 switch (rep) {
115 case GL_RED:
116 val[i] = val[0];
117 break;
118 case GL_GREEN:
119 val[i] = val[1];
120 break;
121 case GL_BLUE:
122 val[i] = val[2];
123 break;
124 case GL_ALPHA:
125 val[i] = val[3];
126 break;
127 }
128 }
129 }
130
131 static void
132 apply_src_mod(GLint optype, GLuint mod, GLfloat * val)
133 {
134 GLint i;
135 GLint start, end;
136
137 if (!mod)
138 return;
139
140 start = optype ? 3 : 0;
141 end = 4;
142
143 for (i = start; i < end; i++) {
144 if (mod & GL_COMP_BIT_ATI)
145 val[i] = 1 - val[i];
146
147 if (mod & GL_BIAS_BIT_ATI)
148 val[i] = val[i] - 0.5;
149
150 if (mod & GL_2X_BIT_ATI)
151 val[i] = 2 * val[i];
152
153 if (mod & GL_NEGATE_BIT_ATI)
154 val[i] = -val[i];
155 }
156 }
157
158 static void
159 apply_dst_mod(GLuint optype, GLuint mod, GLfloat * val)
160 {
161 GLint i;
162 GLint has_sat = mod & GL_SATURATE_BIT_ATI;
163 GLint start, end;
164
165 mod &= ~GL_SATURATE_BIT_ATI;
166
167 start = optype ? 3 : 0;
168 end = optype ? 4 : 3;
169
170 for (i = start; i < end; i++) {
171 switch (mod) {
172 case GL_2X_BIT_ATI:
173 val[i] = 2 * val[i];
174 break;
175 case GL_4X_BIT_ATI:
176 val[i] = 4 * val[i];
177 break;
178 case GL_8X_BIT_ATI:
179 val[i] = 8 * val[i];
180 break;
181 case GL_HALF_BIT_ATI:
182 val[i] = val[i] * 0.5;
183 break;
184 case GL_QUARTER_BIT_ATI:
185 val[i] = val[i] * 0.25;
186 break;
187 case GL_EIGHTH_BIT_ATI:
188 val[i] = val[i] * 0.125;
189 break;
190 }
191
192 if (has_sat) {
193 if (val[i] < 0.0)
194 val[i] = 0;
195 else if (val[i] > 1.0)
196 val[i] = 1.0;
197 }
198 else {
199 if (val[i] < -8.0)
200 val[i] = -8.0;
201 else if (val[i] > 8.0)
202 val[i] = 8.0;
203 }
204 }
205 }
206
207
208 static void
209 write_dst_addr(GLuint optype, GLuint mod, GLuint mask, GLfloat * src,
210 GLfloat * dst)
211 {
212 GLint i;
213 apply_dst_mod(optype, mod, src);
214
215 if (optype == ATI_FRAGMENT_SHADER_COLOR_OP) {
216 if (mask) {
217 if (mask & GL_RED_BIT_ATI)
218 dst[0] = src[0];
219
220 if (mask & GL_GREEN_BIT_ATI)
221 dst[1] = src[1];
222
223 if (mask & GL_BLUE_BIT_ATI)
224 dst[2] = src[2];
225 }
226 else {
227 for (i = 0; i < 3; i++)
228 dst[i] = src[i];
229 }
230 }
231 else
232 dst[3] = src[3];
233 }
234
235 static void
236 finish_pass(struct atifs_machine *machine)
237 {
238 GLint i;
239
240 for (i = 0; i < 6; i++) {
241 COPY_4V(machine->PrevPassRegisters[i], machine->Registers[i]);
242 }
243 }
244
245 struct ati_fs_opcode_st ati_fs_opcodes[] = {
246 {GL_ADD_ATI, 2},
247 {GL_SUB_ATI, 2},
248 {GL_MUL_ATI, 2},
249 {GL_MAD_ATI, 3},
250 {GL_LERP_ATI, 3},
251 {GL_MOV_ATI, 1},
252 {GL_CND_ATI, 3},
253 {GL_CND0_ATI, 3},
254 {GL_DOT2_ADD_ATI, 3},
255 {GL_DOT3_ATI, 2},
256 {GL_DOT4_ATI, 2}
257 };
258
259
260
261 static void
262 handle_pass_op(struct atifs_machine *machine, struct atifs_setupinst *texinst,
263 const SWspan *span, GLuint column, GLuint idx)
264 {
265 GLuint swizzle = texinst->swizzle;
266 GLuint pass_tex = texinst->src;
267
268 if (pass_tex >= GL_TEXTURE0_ARB && pass_tex <= GL_TEXTURE7_ARB) {
269 pass_tex -= GL_TEXTURE0_ARB;
270 COPY_4V(machine->Registers[idx],
271 span->array->attribs[FRAG_ATTRIB_TEX0 + pass_tex][column]);
272 }
273 else if (pass_tex >= GL_REG_0_ATI && pass_tex <= GL_REG_5_ATI) {
274 pass_tex -= GL_REG_0_ATI;
275 COPY_4V(machine->Registers[idx], machine->PrevPassRegisters[pass_tex]);
276 }
277 apply_swizzle(machine->Registers[idx], swizzle);
278
279 }
280
281 static void
282 handle_sample_op(GLcontext * ctx, struct atifs_machine *machine,
283 struct atifs_setupinst *texinst, const SWspan *span,
284 GLuint column, GLuint idx)
285 {
286 /* sample from unit idx using texinst->src as coords */
287 GLuint swizzle = texinst->swizzle;
288 GLuint coord_source = texinst->src;
289 GLfloat tex_coords[4];
290
291 if (coord_source >= GL_TEXTURE0_ARB && coord_source <= GL_TEXTURE7_ARB) {
292 coord_source -= GL_TEXTURE0_ARB;
293 COPY_4V(tex_coords,
294 span->array->attribs[FRAG_ATTRIB_TEX0 + coord_source][column]);
295 }
296 else if (coord_source >= GL_REG_0_ATI && coord_source <= GL_REG_5_ATI) {
297 coord_source -= GL_REG_0_ATI;
298 COPY_4V(tex_coords, machine->PrevPassRegisters[coord_source]);
299 }
300 apply_swizzle(tex_coords, swizzle);
301 fetch_texel(ctx, tex_coords, 0.0F, idx, machine->Registers[idx]);
302 }
303
304 #define SETUP_SRC_REG(optype, i, x) \
305 do { \
306 COPY_4V(src[optype][i], x); \
307 } while (0)
308
309
310
311 /**
312 * Execute the given fragment shader.
313 * NOTE: we do everything in single-precision floating point
314 * \param ctx - rendering context
315 * \param shader - the shader to execute
316 * \param machine - virtual machine state
317 * \param span - the SWspan we're operating on
318 * \param column - which pixel [i] we're operating on in the span
319 */
320 static void
321 execute_shader(GLcontext *ctx, const struct ati_fragment_shader *shader,
322 struct atifs_machine *machine, const SWspan *span,
323 GLuint column)
324 {
325 GLuint pc;
326 struct atifs_instruction *inst;
327 struct atifs_setupinst *texinst;
328 GLint optype;
329 GLuint i;
330 GLint j, pass;
331 GLint dstreg;
332 GLfloat src[2][3][4];
333 GLfloat zeros[4] = { 0.0, 0.0, 0.0, 0.0 };
334 GLfloat ones[4] = { 1.0, 1.0, 1.0, 1.0 };
335 GLfloat dst[2][4], *dstp;
336
337 for (pass = 0; pass < shader->NumPasses; pass++) {
338 if (pass > 0)
339 finish_pass(machine);
340 for (j = 0; j < MAX_NUM_FRAGMENT_REGISTERS_ATI; j++) {
341 texinst = &shader->SetupInst[pass][j];
342 if (texinst->Opcode == ATI_FRAGMENT_SHADER_PASS_OP)
343 handle_pass_op(machine, texinst, span, column, j);
344 else if (texinst->Opcode == ATI_FRAGMENT_SHADER_SAMPLE_OP)
345 handle_sample_op(ctx, machine, texinst, span, column, j);
346 }
347
348 for (pc = 0; pc < shader->numArithInstr[pass]; pc++) {
349 inst = &shader->Instructions[pass][pc];
350
351 /* setup the source registers for color and alpha ops */
352 for (optype = 0; optype < 2; optype++) {
353 for (i = 0; i < inst->ArgCount[optype]; i++) {
354 GLint index = inst->SrcReg[optype][i].Index;
355
356 if (index >= GL_REG_0_ATI && index <= GL_REG_5_ATI)
357 SETUP_SRC_REG(optype, i,
358 machine->Registers[index - GL_REG_0_ATI]);
359 else if (index >= GL_CON_0_ATI && index <= GL_CON_7_ATI) {
360 if (shader->LocalConstDef & (1 << (index - GL_CON_0_ATI))) {
361 SETUP_SRC_REG(optype, i,
362 shader->Constants[index - GL_CON_0_ATI]);
363 } else {
364 SETUP_SRC_REG(optype, i,
365 ctx->ATIFragmentShader.GlobalConstants[index - GL_CON_0_ATI]);
366 }
367 }
368 else if (index == GL_ONE)
369 SETUP_SRC_REG(optype, i, ones);
370 else if (index == GL_ZERO)
371 SETUP_SRC_REG(optype, i, zeros);
372 else if (index == GL_PRIMARY_COLOR_EXT)
373 SETUP_SRC_REG(optype, i,
374 machine->Inputs[ATI_FS_INPUT_PRIMARY]);
375 else if (index == GL_SECONDARY_INTERPOLATOR_ATI)
376 SETUP_SRC_REG(optype, i,
377 machine->Inputs[ATI_FS_INPUT_SECONDARY]);
378
379 apply_src_rep(optype, inst->SrcReg[optype][i].argRep,
380 src[optype][i]);
381 apply_src_mod(optype, inst->SrcReg[optype][i].argMod,
382 src[optype][i]);
383 }
384 }
385
386 /* Execute the operations - color then alpha */
387 for (optype = 0; optype < 2; optype++) {
388 if (inst->Opcode[optype]) {
389 switch (inst->Opcode[optype]) {
390 case GL_ADD_ATI:
391 if (!optype)
392 for (i = 0; i < 3; i++) {
393 dst[optype][i] =
394 src[optype][0][i] + src[optype][1][i];
395 }
396 else
397 dst[optype][3] = src[optype][0][3] + src[optype][1][3];
398 break;
399 case GL_SUB_ATI:
400 if (!optype)
401 for (i = 0; i < 3; i++) {
402 dst[optype][i] =
403 src[optype][0][i] - src[optype][1][i];
404 }
405 else
406 dst[optype][3] = src[optype][0][3] - src[optype][1][3];
407 break;
408 case GL_MUL_ATI:
409 if (!optype)
410 for (i = 0; i < 3; i++) {
411 dst[optype][i] =
412 src[optype][0][i] * src[optype][1][i];
413 }
414 else
415 dst[optype][3] = src[optype][0][3] * src[optype][1][3];
416 break;
417 case GL_MAD_ATI:
418 if (!optype)
419 for (i = 0; i < 3; i++) {
420 dst[optype][i] =
421 src[optype][0][i] * src[optype][1][i] +
422 src[optype][2][i];
423 }
424 else
425 dst[optype][3] =
426 src[optype][0][3] * src[optype][1][3] +
427 src[optype][2][3];
428 break;
429 case GL_LERP_ATI:
430 if (!optype)
431 for (i = 0; i < 3; i++) {
432 dst[optype][i] =
433 src[optype][0][i] * src[optype][1][i] + (1 -
434 src
435 [optype]
436 [0][i]) *
437 src[optype][2][i];
438 }
439 else
440 dst[optype][3] =
441 src[optype][0][3] * src[optype][1][3] + (1 -
442 src[optype]
443 [0][3]) *
444 src[optype][2][3];
445 break;
446
447 case GL_MOV_ATI:
448 if (!optype)
449 for (i = 0; i < 3; i++) {
450 dst[optype][i] = src[optype][0][i];
451 }
452 else
453 dst[optype][3] = src[optype][0][3];
454 break;
455 case GL_CND_ATI:
456 if (!optype) {
457 for (i = 0; i < 3; i++) {
458 dst[optype][i] =
459 (src[optype][2][i] >
460 0.5) ? src[optype][0][i] : src[optype][1][i];
461 }
462 }
463 else {
464 dst[optype][3] =
465 (src[optype][2][3] >
466 0.5) ? src[optype][0][3] : src[optype][1][3];
467 }
468 break;
469
470 case GL_CND0_ATI:
471 if (!optype)
472 for (i = 0; i < 3; i++) {
473 dst[optype][i] =
474 (src[optype][2][i] >=
475 0) ? src[optype][0][i] : src[optype][1][i];
476 }
477 else {
478 dst[optype][3] =
479 (src[optype][2][3] >=
480 0) ? src[optype][0][3] : src[optype][1][3];
481 }
482 break;
483 case GL_DOT2_ADD_ATI:
484 {
485 GLfloat result;
486
487 /* DOT 2 always uses the source from the color op */
488 /* could save recalculation of dot products for alpha inst */
489 result = src[0][0][0] * src[0][1][0] +
490 src[0][0][1] * src[0][1][1] + src[0][2][2];
491 if (!optype) {
492 for (i = 0; i < 3; i++) {
493 dst[optype][i] = result;
494 }
495 }
496 else
497 dst[optype][3] = result;
498 }
499 break;
500 case GL_DOT3_ATI:
501 {
502 GLfloat result;
503
504 /* DOT 3 always uses the source from the color op */
505 result = src[0][0][0] * src[0][1][0] +
506 src[0][0][1] * src[0][1][1] +
507 src[0][0][2] * src[0][1][2];
508
509 if (!optype) {
510 for (i = 0; i < 3; i++) {
511 dst[optype][i] = result;
512 }
513 }
514 else
515 dst[optype][3] = result;
516 }
517 break;
518 case GL_DOT4_ATI:
519 {
520 GLfloat result;
521
522 /* DOT 4 always uses the source from the color op */
523 result = src[0][0][0] * src[0][1][0] +
524 src[0][0][1] * src[0][1][1] +
525 src[0][0][2] * src[0][1][2] +
526 src[0][0][3] * src[0][1][3];
527 if (!optype) {
528 for (i = 0; i < 3; i++) {
529 dst[optype][i] = result;
530 }
531 }
532 else
533 dst[optype][3] = result;
534 }
535 break;
536
537 }
538 }
539 }
540
541 /* write out the destination registers */
542 for (optype = 0; optype < 2; optype++) {
543 if (inst->Opcode[optype]) {
544 dstreg = inst->DstReg[optype].Index;
545 dstp = machine->Registers[dstreg - GL_REG_0_ATI];
546
547 if ((optype == 0) || ((inst->Opcode[1] != GL_DOT2_ADD_ATI) &&
548 (inst->Opcode[1] != GL_DOT3_ATI) && (inst->Opcode[1] != GL_DOT4_ATI)))
549 write_dst_addr(optype, inst->DstReg[optype].dstMod,
550 inst->DstReg[optype].dstMask, dst[optype],
551 dstp);
552 else
553 write_dst_addr(1, inst->DstReg[0].dstMod, 0, dst[1], dstp);
554 }
555 }
556 }
557 }
558 }
559
560
561 /**
562 * Init fragment shader virtual machine state.
563 */
564 static void
565 init_machine(GLcontext * ctx, struct atifs_machine *machine,
566 const struct ati_fragment_shader *shader,
567 const SWspan *span, GLuint col)
568 {
569 GLfloat (*inputs)[4] = machine->Inputs;
570 GLint i, j;
571
572 for (i = 0; i < 6; i++) {
573 for (j = 0; j < 4; j++)
574 machine->Registers[i][j] = 0.0;
575 }
576
577 COPY_4V(inputs[ATI_FS_INPUT_PRIMARY], span->array->attribs[FRAG_ATTRIB_COL0][col]);
578 COPY_4V(inputs[ATI_FS_INPUT_SECONDARY], span->array->attribs[FRAG_ATTRIB_COL1][col]);
579 }
580
581
582
583 /**
584 * Execute the current ATI shader program, operating on the given span.
585 */
586 void
587 _swrast_exec_fragment_shader(GLcontext * ctx, SWspan *span)
588 {
589 const struct ati_fragment_shader *shader = ctx->ATIFragmentShader.Current;
590 struct atifs_machine machine;
591 GLuint i;
592
593 /* incoming colors should be floats */
594 ASSERT(span->array->ChanType == GL_FLOAT);
595
596 ctx->_CurrentProgram = GL_FRAGMENT_SHADER_ATI;
597
598 for (i = 0; i < span->end; i++) {
599 if (span->array->mask[i]) {
600 init_machine(ctx, &machine, shader, span, i);
601
602 execute_shader(ctx, shader, &machine, span, i);
603
604 /* store result color */
605 {
606 const GLfloat *colOut = machine.Registers[0];
607 /*fprintf(stderr,"outputs %f %f %f %f\n",
608 colOut[0], colOut[1], colOut[2], colOut[3]); */
609 COPY_4V(span->array->attribs[FRAG_ATTRIB_COL0][i], colOut);
610 }
611 }
612 }
613
614 ctx->_CurrentProgram = 0;
615 }