Merge remote branch 'origin/opengl-es-v2'
[mesa.git] / src / mesa / swrast / s_atifragshader.c
1 /*
2 * Copyright (C) 2004 David Airlie All Rights Reserved.
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice shall be included
12 * in all copies or substantial portions of the Software.
13 *
14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
15 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
17 * DAVID AIRLIE BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN
18 * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
19 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
20 */
21
22 #include "main/glheader.h"
23 #include "main/colormac.h"
24 #include "main/context.h"
25 #include "main/macros.h"
26 #include "shader/atifragshader.h"
27 #include "swrast/s_atifragshader.h"
28
29
30 /**
31 * State for executing ATI fragment shader.
32 */
33 struct atifs_machine
34 {
35 GLfloat Registers[6][4]; /** six temporary registers */
36 GLfloat PrevPassRegisters[6][4];
37 GLfloat Inputs[2][4]; /** Primary, secondary input colors */
38 };
39
40
41
42 /**
43 * Fetch a texel.
44 */
45 static void
46 fetch_texel(GLcontext * ctx, const GLfloat texcoord[4], GLfloat lambda,
47 GLuint unit, GLfloat color[4])
48 {
49 SWcontext *swrast = SWRAST_CONTEXT(ctx);
50
51 /* XXX use a float-valued TextureSample routine here!!! */
52 swrast->TextureSample[unit](ctx, ctx->Texture.Unit[unit]._Current,
53 1, (const GLfloat(*)[4]) texcoord,
54 &lambda, (GLfloat (*)[4]) color);
55 }
56
57 static void
58 apply_swizzle(GLfloat values[4], GLuint swizzle)
59 {
60 GLfloat s, t, r, q;
61
62 s = values[0];
63 t = values[1];
64 r = values[2];
65 q = values[3];
66
67 switch (swizzle) {
68 case GL_SWIZZLE_STR_ATI:
69 values[0] = s;
70 values[1] = t;
71 values[2] = r;
72 break;
73 case GL_SWIZZLE_STQ_ATI:
74 values[0] = s;
75 values[1] = t;
76 values[2] = q;
77 break;
78 case GL_SWIZZLE_STR_DR_ATI:
79 values[0] = s / r;
80 values[1] = t / r;
81 values[2] = 1 / r;
82 break;
83 case GL_SWIZZLE_STQ_DQ_ATI:
84 /* make sure q is not 0 to avoid problems later with infinite values (texture lookup)? */
85 if (q == 0.0F) q = 0.000000001;
86 values[0] = s / q;
87 values[1] = t / q;
88 values[2] = 1 / q;
89 break;
90 }
91 values[3] = 0.0;
92 }
93
94 static void
95 apply_src_rep(GLint optype, GLuint rep, GLfloat * val)
96 {
97 GLint i;
98 GLint start, end;
99 if (!rep)
100 return;
101
102 start = optype ? 3 : 0;
103 end = 4;
104
105 for (i = start; i < end; i++) {
106 switch (rep) {
107 case GL_RED:
108 val[i] = val[0];
109 break;
110 case GL_GREEN:
111 val[i] = val[1];
112 break;
113 case GL_BLUE:
114 val[i] = val[2];
115 break;
116 case GL_ALPHA:
117 val[i] = val[3];
118 break;
119 }
120 }
121 }
122
123 static void
124 apply_src_mod(GLint optype, GLuint mod, GLfloat * val)
125 {
126 GLint i;
127 GLint start, end;
128
129 if (!mod)
130 return;
131
132 start = optype ? 3 : 0;
133 end = 4;
134
135 for (i = start; i < end; i++) {
136 if (mod & GL_COMP_BIT_ATI)
137 val[i] = 1 - val[i];
138
139 if (mod & GL_BIAS_BIT_ATI)
140 val[i] = val[i] - 0.5;
141
142 if (mod & GL_2X_BIT_ATI)
143 val[i] = 2 * val[i];
144
145 if (mod & GL_NEGATE_BIT_ATI)
146 val[i] = -val[i];
147 }
148 }
149
150 static void
151 apply_dst_mod(GLuint optype, GLuint mod, GLfloat * val)
152 {
153 GLint i;
154 GLint has_sat = mod & GL_SATURATE_BIT_ATI;
155 GLint start, end;
156
157 mod &= ~GL_SATURATE_BIT_ATI;
158
159 start = optype ? 3 : 0;
160 end = optype ? 4 : 3;
161
162 for (i = start; i < end; i++) {
163 switch (mod) {
164 case GL_2X_BIT_ATI:
165 val[i] = 2 * val[i];
166 break;
167 case GL_4X_BIT_ATI:
168 val[i] = 4 * val[i];
169 break;
170 case GL_8X_BIT_ATI:
171 val[i] = 8 * val[i];
172 break;
173 case GL_HALF_BIT_ATI:
174 val[i] = val[i] * 0.5;
175 break;
176 case GL_QUARTER_BIT_ATI:
177 val[i] = val[i] * 0.25;
178 break;
179 case GL_EIGHTH_BIT_ATI:
180 val[i] = val[i] * 0.125;
181 break;
182 }
183
184 if (has_sat) {
185 if (val[i] < 0.0)
186 val[i] = 0;
187 else if (val[i] > 1.0)
188 val[i] = 1.0;
189 }
190 else {
191 if (val[i] < -8.0)
192 val[i] = -8.0;
193 else if (val[i] > 8.0)
194 val[i] = 8.0;
195 }
196 }
197 }
198
199
200 static void
201 write_dst_addr(GLuint optype, GLuint mod, GLuint mask, GLfloat * src,
202 GLfloat * dst)
203 {
204 GLint i;
205 apply_dst_mod(optype, mod, src);
206
207 if (optype == ATI_FRAGMENT_SHADER_COLOR_OP) {
208 if (mask) {
209 if (mask & GL_RED_BIT_ATI)
210 dst[0] = src[0];
211
212 if (mask & GL_GREEN_BIT_ATI)
213 dst[1] = src[1];
214
215 if (mask & GL_BLUE_BIT_ATI)
216 dst[2] = src[2];
217 }
218 else {
219 for (i = 0; i < 3; i++)
220 dst[i] = src[i];
221 }
222 }
223 else
224 dst[3] = src[3];
225 }
226
227 static void
228 finish_pass(struct atifs_machine *machine)
229 {
230 GLint i;
231
232 for (i = 0; i < 6; i++) {
233 COPY_4V(machine->PrevPassRegisters[i], machine->Registers[i]);
234 }
235 }
236
237 struct ati_fs_opcode_st ati_fs_opcodes[] = {
238 {GL_ADD_ATI, 2},
239 {GL_SUB_ATI, 2},
240 {GL_MUL_ATI, 2},
241 {GL_MAD_ATI, 3},
242 {GL_LERP_ATI, 3},
243 {GL_MOV_ATI, 1},
244 {GL_CND_ATI, 3},
245 {GL_CND0_ATI, 3},
246 {GL_DOT2_ADD_ATI, 3},
247 {GL_DOT3_ATI, 2},
248 {GL_DOT4_ATI, 2}
249 };
250
251
252
253 static void
254 handle_pass_op(struct atifs_machine *machine, struct atifs_setupinst *texinst,
255 const SWspan *span, GLuint column, GLuint idx)
256 {
257 GLuint swizzle = texinst->swizzle;
258 GLuint pass_tex = texinst->src;
259
260 if (pass_tex >= GL_TEXTURE0_ARB && pass_tex <= GL_TEXTURE7_ARB) {
261 pass_tex -= GL_TEXTURE0_ARB;
262 COPY_4V(machine->Registers[idx],
263 span->array->attribs[FRAG_ATTRIB_TEX0 + pass_tex][column]);
264 }
265 else if (pass_tex >= GL_REG_0_ATI && pass_tex <= GL_REG_5_ATI) {
266 pass_tex -= GL_REG_0_ATI;
267 COPY_4V(machine->Registers[idx], machine->PrevPassRegisters[pass_tex]);
268 }
269 apply_swizzle(machine->Registers[idx], swizzle);
270
271 }
272
273 static void
274 handle_sample_op(GLcontext * ctx, struct atifs_machine *machine,
275 struct atifs_setupinst *texinst, const SWspan *span,
276 GLuint column, GLuint idx)
277 {
278 /* sample from unit idx using texinst->src as coords */
279 GLuint swizzle = texinst->swizzle;
280 GLuint coord_source = texinst->src;
281 GLfloat tex_coords[4] = { 0 };
282
283 if (coord_source >= GL_TEXTURE0_ARB && coord_source <= GL_TEXTURE7_ARB) {
284 coord_source -= GL_TEXTURE0_ARB;
285 COPY_4V(tex_coords,
286 span->array->attribs[FRAG_ATTRIB_TEX0 + coord_source][column]);
287 }
288 else if (coord_source >= GL_REG_0_ATI && coord_source <= GL_REG_5_ATI) {
289 coord_source -= GL_REG_0_ATI;
290 COPY_4V(tex_coords, machine->PrevPassRegisters[coord_source]);
291 }
292 apply_swizzle(tex_coords, swizzle);
293 fetch_texel(ctx, tex_coords, 0.0F, idx, machine->Registers[idx]);
294 }
295
296 #define SETUP_SRC_REG(optype, i, x) \
297 do { \
298 COPY_4V(src[optype][i], x); \
299 } while (0)
300
301
302
303 /**
304 * Execute the given fragment shader.
305 * NOTE: we do everything in single-precision floating point
306 * \param ctx - rendering context
307 * \param shader - the shader to execute
308 * \param machine - virtual machine state
309 * \param span - the SWspan we're operating on
310 * \param column - which pixel [i] we're operating on in the span
311 */
312 static void
313 execute_shader(GLcontext *ctx, const struct ati_fragment_shader *shader,
314 struct atifs_machine *machine, const SWspan *span,
315 GLuint column)
316 {
317 GLuint pc;
318 struct atifs_instruction *inst;
319 struct atifs_setupinst *texinst;
320 GLint optype;
321 GLuint i;
322 GLint j, pass;
323 GLint dstreg;
324 GLfloat src[2][3][4];
325 GLfloat zeros[4] = { 0.0, 0.0, 0.0, 0.0 };
326 GLfloat ones[4] = { 1.0, 1.0, 1.0, 1.0 };
327 GLfloat dst[2][4], *dstp;
328
329 for (pass = 0; pass < shader->NumPasses; pass++) {
330 if (pass > 0)
331 finish_pass(machine);
332 for (j = 0; j < MAX_NUM_FRAGMENT_REGISTERS_ATI; j++) {
333 texinst = &shader->SetupInst[pass][j];
334 if (texinst->Opcode == ATI_FRAGMENT_SHADER_PASS_OP)
335 handle_pass_op(machine, texinst, span, column, j);
336 else if (texinst->Opcode == ATI_FRAGMENT_SHADER_SAMPLE_OP)
337 handle_sample_op(ctx, machine, texinst, span, column, j);
338 }
339
340 for (pc = 0; pc < shader->numArithInstr[pass]; pc++) {
341 inst = &shader->Instructions[pass][pc];
342
343 /* setup the source registers for color and alpha ops */
344 for (optype = 0; optype < 2; optype++) {
345 for (i = 0; i < inst->ArgCount[optype]; i++) {
346 GLint index = inst->SrcReg[optype][i].Index;
347
348 if (index >= GL_REG_0_ATI && index <= GL_REG_5_ATI)
349 SETUP_SRC_REG(optype, i,
350 machine->Registers[index - GL_REG_0_ATI]);
351 else if (index >= GL_CON_0_ATI && index <= GL_CON_7_ATI) {
352 if (shader->LocalConstDef & (1 << (index - GL_CON_0_ATI))) {
353 SETUP_SRC_REG(optype, i,
354 shader->Constants[index - GL_CON_0_ATI]);
355 } else {
356 SETUP_SRC_REG(optype, i,
357 ctx->ATIFragmentShader.GlobalConstants[index - GL_CON_0_ATI]);
358 }
359 }
360 else if (index == GL_ONE)
361 SETUP_SRC_REG(optype, i, ones);
362 else if (index == GL_ZERO)
363 SETUP_SRC_REG(optype, i, zeros);
364 else if (index == GL_PRIMARY_COLOR_EXT)
365 SETUP_SRC_REG(optype, i,
366 machine->Inputs[ATI_FS_INPUT_PRIMARY]);
367 else if (index == GL_SECONDARY_INTERPOLATOR_ATI)
368 SETUP_SRC_REG(optype, i,
369 machine->Inputs[ATI_FS_INPUT_SECONDARY]);
370
371 apply_src_rep(optype, inst->SrcReg[optype][i].argRep,
372 src[optype][i]);
373 apply_src_mod(optype, inst->SrcReg[optype][i].argMod,
374 src[optype][i]);
375 }
376 }
377
378 /* Execute the operations - color then alpha */
379 for (optype = 0; optype < 2; optype++) {
380 if (inst->Opcode[optype]) {
381 switch (inst->Opcode[optype]) {
382 case GL_ADD_ATI:
383 if (!optype)
384 for (i = 0; i < 3; i++) {
385 dst[optype][i] =
386 src[optype][0][i] + src[optype][1][i];
387 }
388 else
389 dst[optype][3] = src[optype][0][3] + src[optype][1][3];
390 break;
391 case GL_SUB_ATI:
392 if (!optype)
393 for (i = 0; i < 3; i++) {
394 dst[optype][i] =
395 src[optype][0][i] - src[optype][1][i];
396 }
397 else
398 dst[optype][3] = src[optype][0][3] - src[optype][1][3];
399 break;
400 case GL_MUL_ATI:
401 if (!optype)
402 for (i = 0; i < 3; i++) {
403 dst[optype][i] =
404 src[optype][0][i] * src[optype][1][i];
405 }
406 else
407 dst[optype][3] = src[optype][0][3] * src[optype][1][3];
408 break;
409 case GL_MAD_ATI:
410 if (!optype)
411 for (i = 0; i < 3; i++) {
412 dst[optype][i] =
413 src[optype][0][i] * src[optype][1][i] +
414 src[optype][2][i];
415 }
416 else
417 dst[optype][3] =
418 src[optype][0][3] * src[optype][1][3] +
419 src[optype][2][3];
420 break;
421 case GL_LERP_ATI:
422 if (!optype)
423 for (i = 0; i < 3; i++) {
424 dst[optype][i] =
425 src[optype][0][i] * src[optype][1][i] + (1 -
426 src
427 [optype]
428 [0][i]) *
429 src[optype][2][i];
430 }
431 else
432 dst[optype][3] =
433 src[optype][0][3] * src[optype][1][3] + (1 -
434 src[optype]
435 [0][3]) *
436 src[optype][2][3];
437 break;
438
439 case GL_MOV_ATI:
440 if (!optype)
441 for (i = 0; i < 3; i++) {
442 dst[optype][i] = src[optype][0][i];
443 }
444 else
445 dst[optype][3] = src[optype][0][3];
446 break;
447 case GL_CND_ATI:
448 if (!optype) {
449 for (i = 0; i < 3; i++) {
450 dst[optype][i] =
451 (src[optype][2][i] >
452 0.5) ? src[optype][0][i] : src[optype][1][i];
453 }
454 }
455 else {
456 dst[optype][3] =
457 (src[optype][2][3] >
458 0.5) ? src[optype][0][3] : src[optype][1][3];
459 }
460 break;
461
462 case GL_CND0_ATI:
463 if (!optype)
464 for (i = 0; i < 3; i++) {
465 dst[optype][i] =
466 (src[optype][2][i] >=
467 0) ? src[optype][0][i] : src[optype][1][i];
468 }
469 else {
470 dst[optype][3] =
471 (src[optype][2][3] >=
472 0) ? src[optype][0][3] : src[optype][1][3];
473 }
474 break;
475 case GL_DOT2_ADD_ATI:
476 {
477 GLfloat result;
478
479 /* DOT 2 always uses the source from the color op */
480 /* could save recalculation of dot products for alpha inst */
481 result = src[0][0][0] * src[0][1][0] +
482 src[0][0][1] * src[0][1][1] + src[0][2][2];
483 if (!optype) {
484 for (i = 0; i < 3; i++) {
485 dst[optype][i] = result;
486 }
487 }
488 else
489 dst[optype][3] = result;
490 }
491 break;
492 case GL_DOT3_ATI:
493 {
494 GLfloat result;
495
496 /* DOT 3 always uses the source from the color op */
497 result = src[0][0][0] * src[0][1][0] +
498 src[0][0][1] * src[0][1][1] +
499 src[0][0][2] * src[0][1][2];
500
501 if (!optype) {
502 for (i = 0; i < 3; i++) {
503 dst[optype][i] = result;
504 }
505 }
506 else
507 dst[optype][3] = result;
508 }
509 break;
510 case GL_DOT4_ATI:
511 {
512 GLfloat result;
513
514 /* DOT 4 always uses the source from the color op */
515 result = src[0][0][0] * src[0][1][0] +
516 src[0][0][1] * src[0][1][1] +
517 src[0][0][2] * src[0][1][2] +
518 src[0][0][3] * src[0][1][3];
519 if (!optype) {
520 for (i = 0; i < 3; i++) {
521 dst[optype][i] = result;
522 }
523 }
524 else
525 dst[optype][3] = result;
526 }
527 break;
528
529 }
530 }
531 }
532
533 /* write out the destination registers */
534 for (optype = 0; optype < 2; optype++) {
535 if (inst->Opcode[optype]) {
536 dstreg = inst->DstReg[optype].Index;
537 dstp = machine->Registers[dstreg - GL_REG_0_ATI];
538
539 if ((optype == 0) || ((inst->Opcode[1] != GL_DOT2_ADD_ATI) &&
540 (inst->Opcode[1] != GL_DOT3_ATI) && (inst->Opcode[1] != GL_DOT4_ATI)))
541 write_dst_addr(optype, inst->DstReg[optype].dstMod,
542 inst->DstReg[optype].dstMask, dst[optype],
543 dstp);
544 else
545 write_dst_addr(1, inst->DstReg[0].dstMod, 0, dst[1], dstp);
546 }
547 }
548 }
549 }
550 }
551
552
553 /**
554 * Init fragment shader virtual machine state.
555 */
556 static void
557 init_machine(GLcontext * ctx, struct atifs_machine *machine,
558 const struct ati_fragment_shader *shader,
559 const SWspan *span, GLuint col)
560 {
561 GLfloat (*inputs)[4] = machine->Inputs;
562 GLint i, j;
563
564 for (i = 0; i < 6; i++) {
565 for (j = 0; j < 4; j++)
566 machine->Registers[i][j] = 0.0;
567 }
568
569 COPY_4V(inputs[ATI_FS_INPUT_PRIMARY], span->array->attribs[FRAG_ATTRIB_COL0][col]);
570 COPY_4V(inputs[ATI_FS_INPUT_SECONDARY], span->array->attribs[FRAG_ATTRIB_COL1][col]);
571 }
572
573
574
575 /**
576 * Execute the current ATI shader program, operating on the given span.
577 */
578 void
579 _swrast_exec_fragment_shader(GLcontext * ctx, SWspan *span)
580 {
581 const struct ati_fragment_shader *shader = ctx->ATIFragmentShader.Current;
582 struct atifs_machine machine;
583 GLuint i;
584
585 /* incoming colors should be floats */
586 ASSERT(span->array->ChanType == GL_FLOAT);
587
588 for (i = 0; i < span->end; i++) {
589 if (span->array->mask[i]) {
590 init_machine(ctx, &machine, shader, span, i);
591
592 execute_shader(ctx, shader, &machine, span, i);
593
594 /* store result color */
595 {
596 const GLfloat *colOut = machine.Registers[0];
597 /*fprintf(stderr,"outputs %f %f %f %f\n",
598 colOut[0], colOut[1], colOut[2], colOut[3]); */
599 COPY_4V(span->array->attribs[FRAG_ATTRIB_COL0][i], colOut);
600 }
601 }
602 }
603 }