Merge branch 'llvm-cliptest-viewport'
[mesa.git] / src / mesa / swrast / s_atifragshader.c
1 /*
2 * Copyright (C) 2004 David Airlie All Rights Reserved.
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice shall be included
12 * in all copies or substantial portions of the Software.
13 *
14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
15 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
17 * DAVID AIRLIE BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN
18 * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
19 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
20 */
21
22 #include "main/glheader.h"
23 #include "main/colormac.h"
24 #include "main/macros.h"
25 #include "main/atifragshader.h"
26 #include "swrast/s_atifragshader.h"
27 #include "swrast/s_context.h"
28
29
30 /**
31 * State for executing ATI fragment shader.
32 */
33 struct atifs_machine
34 {
35 GLfloat Registers[6][4]; /** six temporary registers */
36 GLfloat PrevPassRegisters[6][4];
37 GLfloat Inputs[2][4]; /** Primary, secondary input colors */
38 };
39
40
41
42 /**
43 * Fetch a texel.
44 */
45 static void
46 fetch_texel(struct gl_context * ctx, const GLfloat texcoord[4], GLfloat lambda,
47 GLuint unit, GLfloat color[4])
48 {
49 SWcontext *swrast = SWRAST_CONTEXT(ctx);
50
51 /* XXX use a float-valued TextureSample routine here!!! */
52 swrast->TextureSample[unit](ctx, ctx->Texture.Unit[unit]._Current,
53 1, (const GLfloat(*)[4]) texcoord,
54 &lambda, (GLfloat (*)[4]) color);
55 }
56
57 static void
58 apply_swizzle(GLfloat values[4], GLuint swizzle)
59 {
60 GLfloat s, t, r, q;
61
62 s = values[0];
63 t = values[1];
64 r = values[2];
65 q = values[3];
66
67 switch (swizzle) {
68 case GL_SWIZZLE_STR_ATI:
69 values[0] = s;
70 values[1] = t;
71 values[2] = r;
72 break;
73 case GL_SWIZZLE_STQ_ATI:
74 values[0] = s;
75 values[1] = t;
76 values[2] = q;
77 break;
78 case GL_SWIZZLE_STR_DR_ATI:
79 values[0] = s / r;
80 values[1] = t / r;
81 values[2] = 1 / r;
82 break;
83 case GL_SWIZZLE_STQ_DQ_ATI:
84 /* make sure q is not 0 to avoid problems later with infinite values (texture lookup)? */
85 if (q == 0.0F)
86 q = 0.000000001F;
87 values[0] = s / q;
88 values[1] = t / q;
89 values[2] = 1.0F / q;
90 break;
91 }
92 values[3] = 0.0;
93 }
94
95 static void
96 apply_src_rep(GLint optype, GLuint rep, GLfloat * val)
97 {
98 GLint i;
99 GLint start, end;
100 if (!rep)
101 return;
102
103 start = optype ? 3 : 0;
104 end = 4;
105
106 for (i = start; i < end; i++) {
107 switch (rep) {
108 case GL_RED:
109 val[i] = val[0];
110 break;
111 case GL_GREEN:
112 val[i] = val[1];
113 break;
114 case GL_BLUE:
115 val[i] = val[2];
116 break;
117 case GL_ALPHA:
118 val[i] = val[3];
119 break;
120 }
121 }
122 }
123
124 static void
125 apply_src_mod(GLint optype, GLuint mod, GLfloat * val)
126 {
127 GLint i;
128 GLint start, end;
129
130 if (!mod)
131 return;
132
133 start = optype ? 3 : 0;
134 end = 4;
135
136 for (i = start; i < end; i++) {
137 if (mod & GL_COMP_BIT_ATI)
138 val[i] = 1 - val[i];
139
140 if (mod & GL_BIAS_BIT_ATI)
141 val[i] = val[i] - 0.5F;
142
143 if (mod & GL_2X_BIT_ATI)
144 val[i] = 2 * val[i];
145
146 if (mod & GL_NEGATE_BIT_ATI)
147 val[i] = -val[i];
148 }
149 }
150
151 static void
152 apply_dst_mod(GLuint optype, GLuint mod, GLfloat * val)
153 {
154 GLint i;
155 GLint has_sat = mod & GL_SATURATE_BIT_ATI;
156 GLint start, end;
157
158 mod &= ~GL_SATURATE_BIT_ATI;
159
160 start = optype ? 3 : 0;
161 end = optype ? 4 : 3;
162
163 for (i = start; i < end; i++) {
164 switch (mod) {
165 case GL_2X_BIT_ATI:
166 val[i] = 2 * val[i];
167 break;
168 case GL_4X_BIT_ATI:
169 val[i] = 4 * val[i];
170 break;
171 case GL_8X_BIT_ATI:
172 val[i] = 8 * val[i];
173 break;
174 case GL_HALF_BIT_ATI:
175 val[i] = val[i] * 0.5F;
176 break;
177 case GL_QUARTER_BIT_ATI:
178 val[i] = val[i] * 0.25F;
179 break;
180 case GL_EIGHTH_BIT_ATI:
181 val[i] = val[i] * 0.125F;
182 break;
183 }
184
185 if (has_sat) {
186 if (val[i] < 0.0F)
187 val[i] = 0.0F;
188 else if (val[i] > 1.0F)
189 val[i] = 1.0F;
190 }
191 else {
192 if (val[i] < -8.0F)
193 val[i] = -8.0F;
194 else if (val[i] > 8.0F)
195 val[i] = 8.0F;
196 }
197 }
198 }
199
200
201 static void
202 write_dst_addr(GLuint optype, GLuint mod, GLuint mask, GLfloat * src,
203 GLfloat * dst)
204 {
205 GLint i;
206 apply_dst_mod(optype, mod, src);
207
208 if (optype == ATI_FRAGMENT_SHADER_COLOR_OP) {
209 if (mask) {
210 if (mask & GL_RED_BIT_ATI)
211 dst[0] = src[0];
212
213 if (mask & GL_GREEN_BIT_ATI)
214 dst[1] = src[1];
215
216 if (mask & GL_BLUE_BIT_ATI)
217 dst[2] = src[2];
218 }
219 else {
220 for (i = 0; i < 3; i++)
221 dst[i] = src[i];
222 }
223 }
224 else
225 dst[3] = src[3];
226 }
227
228 static void
229 finish_pass(struct atifs_machine *machine)
230 {
231 GLint i;
232
233 for (i = 0; i < 6; i++) {
234 COPY_4V(machine->PrevPassRegisters[i], machine->Registers[i]);
235 }
236 }
237
238 struct ati_fs_opcode_st ati_fs_opcodes[] = {
239 {GL_ADD_ATI, 2},
240 {GL_SUB_ATI, 2},
241 {GL_MUL_ATI, 2},
242 {GL_MAD_ATI, 3},
243 {GL_LERP_ATI, 3},
244 {GL_MOV_ATI, 1},
245 {GL_CND_ATI, 3},
246 {GL_CND0_ATI, 3},
247 {GL_DOT2_ADD_ATI, 3},
248 {GL_DOT3_ATI, 2},
249 {GL_DOT4_ATI, 2}
250 };
251
252
253
254 static void
255 handle_pass_op(struct atifs_machine *machine, struct atifs_setupinst *texinst,
256 const SWspan *span, GLuint column, GLuint idx)
257 {
258 GLuint swizzle = texinst->swizzle;
259 GLuint pass_tex = texinst->src;
260
261 if (pass_tex >= GL_TEXTURE0_ARB && pass_tex <= GL_TEXTURE7_ARB) {
262 pass_tex -= GL_TEXTURE0_ARB;
263 COPY_4V(machine->Registers[idx],
264 span->array->attribs[FRAG_ATTRIB_TEX0 + pass_tex][column]);
265 }
266 else if (pass_tex >= GL_REG_0_ATI && pass_tex <= GL_REG_5_ATI) {
267 pass_tex -= GL_REG_0_ATI;
268 COPY_4V(machine->Registers[idx], machine->PrevPassRegisters[pass_tex]);
269 }
270 apply_swizzle(machine->Registers[idx], swizzle);
271
272 }
273
274 static void
275 handle_sample_op(struct gl_context * ctx, struct atifs_machine *machine,
276 struct atifs_setupinst *texinst, const SWspan *span,
277 GLuint column, GLuint idx)
278 {
279 /* sample from unit idx using texinst->src as coords */
280 GLuint swizzle = texinst->swizzle;
281 GLuint coord_source = texinst->src;
282 GLfloat tex_coords[4] = { 0 };
283
284 if (coord_source >= GL_TEXTURE0_ARB && coord_source <= GL_TEXTURE7_ARB) {
285 coord_source -= GL_TEXTURE0_ARB;
286 COPY_4V(tex_coords,
287 span->array->attribs[FRAG_ATTRIB_TEX0 + coord_source][column]);
288 }
289 else if (coord_source >= GL_REG_0_ATI && coord_source <= GL_REG_5_ATI) {
290 coord_source -= GL_REG_0_ATI;
291 COPY_4V(tex_coords, machine->PrevPassRegisters[coord_source]);
292 }
293 apply_swizzle(tex_coords, swizzle);
294 fetch_texel(ctx, tex_coords, 0.0F, idx, machine->Registers[idx]);
295 }
296
297 #define SETUP_SRC_REG(optype, i, x) \
298 do { \
299 COPY_4V(src[optype][i], x); \
300 } while (0)
301
302
303
304 /**
305 * Execute the given fragment shader.
306 * NOTE: we do everything in single-precision floating point
307 * \param ctx - rendering context
308 * \param shader - the shader to execute
309 * \param machine - virtual machine state
310 * \param span - the SWspan we're operating on
311 * \param column - which pixel [i] we're operating on in the span
312 */
313 static void
314 execute_shader(struct gl_context *ctx, const struct ati_fragment_shader *shader,
315 struct atifs_machine *machine, const SWspan *span,
316 GLuint column)
317 {
318 GLuint pc;
319 struct atifs_instruction *inst;
320 struct atifs_setupinst *texinst;
321 GLint optype;
322 GLuint i;
323 GLint j, pass;
324 GLint dstreg;
325 GLfloat src[2][3][4];
326 GLfloat zeros[4] = { 0.0, 0.0, 0.0, 0.0 };
327 GLfloat ones[4] = { 1.0, 1.0, 1.0, 1.0 };
328 GLfloat dst[2][4], *dstp;
329
330 for (pass = 0; pass < shader->NumPasses; pass++) {
331 if (pass > 0)
332 finish_pass(machine);
333 for (j = 0; j < MAX_NUM_FRAGMENT_REGISTERS_ATI; j++) {
334 texinst = &shader->SetupInst[pass][j];
335 if (texinst->Opcode == ATI_FRAGMENT_SHADER_PASS_OP)
336 handle_pass_op(machine, texinst, span, column, j);
337 else if (texinst->Opcode == ATI_FRAGMENT_SHADER_SAMPLE_OP)
338 handle_sample_op(ctx, machine, texinst, span, column, j);
339 }
340
341 for (pc = 0; pc < shader->numArithInstr[pass]; pc++) {
342 inst = &shader->Instructions[pass][pc];
343
344 /* setup the source registers for color and alpha ops */
345 for (optype = 0; optype < 2; optype++) {
346 for (i = 0; i < inst->ArgCount[optype]; i++) {
347 GLint index = inst->SrcReg[optype][i].Index;
348
349 if (index >= GL_REG_0_ATI && index <= GL_REG_5_ATI)
350 SETUP_SRC_REG(optype, i,
351 machine->Registers[index - GL_REG_0_ATI]);
352 else if (index >= GL_CON_0_ATI && index <= GL_CON_7_ATI) {
353 if (shader->LocalConstDef & (1 << (index - GL_CON_0_ATI))) {
354 SETUP_SRC_REG(optype, i,
355 shader->Constants[index - GL_CON_0_ATI]);
356 } else {
357 SETUP_SRC_REG(optype, i,
358 ctx->ATIFragmentShader.GlobalConstants[index - GL_CON_0_ATI]);
359 }
360 }
361 else if (index == GL_ONE)
362 SETUP_SRC_REG(optype, i, ones);
363 else if (index == GL_ZERO)
364 SETUP_SRC_REG(optype, i, zeros);
365 else if (index == GL_PRIMARY_COLOR_EXT)
366 SETUP_SRC_REG(optype, i,
367 machine->Inputs[ATI_FS_INPUT_PRIMARY]);
368 else if (index == GL_SECONDARY_INTERPOLATOR_ATI)
369 SETUP_SRC_REG(optype, i,
370 machine->Inputs[ATI_FS_INPUT_SECONDARY]);
371
372 apply_src_rep(optype, inst->SrcReg[optype][i].argRep,
373 src[optype][i]);
374 apply_src_mod(optype, inst->SrcReg[optype][i].argMod,
375 src[optype][i]);
376 }
377 }
378
379 /* Execute the operations - color then alpha */
380 for (optype = 0; optype < 2; optype++) {
381 if (inst->Opcode[optype]) {
382 switch (inst->Opcode[optype]) {
383 case GL_ADD_ATI:
384 if (!optype)
385 for (i = 0; i < 3; i++) {
386 dst[optype][i] =
387 src[optype][0][i] + src[optype][1][i];
388 }
389 else
390 dst[optype][3] = src[optype][0][3] + src[optype][1][3];
391 break;
392 case GL_SUB_ATI:
393 if (!optype)
394 for (i = 0; i < 3; i++) {
395 dst[optype][i] =
396 src[optype][0][i] - src[optype][1][i];
397 }
398 else
399 dst[optype][3] = src[optype][0][3] - src[optype][1][3];
400 break;
401 case GL_MUL_ATI:
402 if (!optype)
403 for (i = 0; i < 3; i++) {
404 dst[optype][i] =
405 src[optype][0][i] * src[optype][1][i];
406 }
407 else
408 dst[optype][3] = src[optype][0][3] * src[optype][1][3];
409 break;
410 case GL_MAD_ATI:
411 if (!optype)
412 for (i = 0; i < 3; i++) {
413 dst[optype][i] =
414 src[optype][0][i] * src[optype][1][i] +
415 src[optype][2][i];
416 }
417 else
418 dst[optype][3] =
419 src[optype][0][3] * src[optype][1][3] +
420 src[optype][2][3];
421 break;
422 case GL_LERP_ATI:
423 if (!optype)
424 for (i = 0; i < 3; i++) {
425 dst[optype][i] =
426 src[optype][0][i] * src[optype][1][i] + (1 -
427 src
428 [optype]
429 [0][i]) *
430 src[optype][2][i];
431 }
432 else
433 dst[optype][3] =
434 src[optype][0][3] * src[optype][1][3] + (1 -
435 src[optype]
436 [0][3]) *
437 src[optype][2][3];
438 break;
439
440 case GL_MOV_ATI:
441 if (!optype)
442 for (i = 0; i < 3; i++) {
443 dst[optype][i] = src[optype][0][i];
444 }
445 else
446 dst[optype][3] = src[optype][0][3];
447 break;
448 case GL_CND_ATI:
449 if (!optype) {
450 for (i = 0; i < 3; i++) {
451 dst[optype][i] =
452 (src[optype][2][i] >
453 0.5) ? src[optype][0][i] : src[optype][1][i];
454 }
455 }
456 else {
457 dst[optype][3] =
458 (src[optype][2][3] >
459 0.5) ? src[optype][0][3] : src[optype][1][3];
460 }
461 break;
462
463 case GL_CND0_ATI:
464 if (!optype)
465 for (i = 0; i < 3; i++) {
466 dst[optype][i] =
467 (src[optype][2][i] >=
468 0) ? src[optype][0][i] : src[optype][1][i];
469 }
470 else {
471 dst[optype][3] =
472 (src[optype][2][3] >=
473 0) ? src[optype][0][3] : src[optype][1][3];
474 }
475 break;
476 case GL_DOT2_ADD_ATI:
477 {
478 GLfloat result;
479
480 /* DOT 2 always uses the source from the color op */
481 /* could save recalculation of dot products for alpha inst */
482 result = src[0][0][0] * src[0][1][0] +
483 src[0][0][1] * src[0][1][1] + src[0][2][2];
484 if (!optype) {
485 for (i = 0; i < 3; i++) {
486 dst[optype][i] = result;
487 }
488 }
489 else
490 dst[optype][3] = result;
491 }
492 break;
493 case GL_DOT3_ATI:
494 {
495 GLfloat result;
496
497 /* DOT 3 always uses the source from the color op */
498 result = src[0][0][0] * src[0][1][0] +
499 src[0][0][1] * src[0][1][1] +
500 src[0][0][2] * src[0][1][2];
501
502 if (!optype) {
503 for (i = 0; i < 3; i++) {
504 dst[optype][i] = result;
505 }
506 }
507 else
508 dst[optype][3] = result;
509 }
510 break;
511 case GL_DOT4_ATI:
512 {
513 GLfloat result;
514
515 /* DOT 4 always uses the source from the color op */
516 result = src[0][0][0] * src[0][1][0] +
517 src[0][0][1] * src[0][1][1] +
518 src[0][0][2] * src[0][1][2] +
519 src[0][0][3] * src[0][1][3];
520 if (!optype) {
521 for (i = 0; i < 3; i++) {
522 dst[optype][i] = result;
523 }
524 }
525 else
526 dst[optype][3] = result;
527 }
528 break;
529
530 }
531 }
532 }
533
534 /* write out the destination registers */
535 for (optype = 0; optype < 2; optype++) {
536 if (inst->Opcode[optype]) {
537 dstreg = inst->DstReg[optype].Index;
538 dstp = machine->Registers[dstreg - GL_REG_0_ATI];
539
540 if ((optype == 0) || ((inst->Opcode[1] != GL_DOT2_ADD_ATI) &&
541 (inst->Opcode[1] != GL_DOT3_ATI) && (inst->Opcode[1] != GL_DOT4_ATI)))
542 write_dst_addr(optype, inst->DstReg[optype].dstMod,
543 inst->DstReg[optype].dstMask, dst[optype],
544 dstp);
545 else
546 write_dst_addr(1, inst->DstReg[0].dstMod, 0, dst[1], dstp);
547 }
548 }
549 }
550 }
551 }
552
553
554 /**
555 * Init fragment shader virtual machine state.
556 */
557 static void
558 init_machine(struct gl_context * ctx, struct atifs_machine *machine,
559 const struct ati_fragment_shader *shader,
560 const SWspan *span, GLuint col)
561 {
562 GLfloat (*inputs)[4] = machine->Inputs;
563 GLint i, j;
564
565 for (i = 0; i < 6; i++) {
566 for (j = 0; j < 4; j++)
567 machine->Registers[i][j] = 0.0;
568 }
569
570 COPY_4V(inputs[ATI_FS_INPUT_PRIMARY], span->array->attribs[FRAG_ATTRIB_COL0][col]);
571 COPY_4V(inputs[ATI_FS_INPUT_SECONDARY], span->array->attribs[FRAG_ATTRIB_COL1][col]);
572 }
573
574
575
576 /**
577 * Execute the current ATI shader program, operating on the given span.
578 */
579 void
580 _swrast_exec_fragment_shader(struct gl_context * ctx, SWspan *span)
581 {
582 const struct ati_fragment_shader *shader = ctx->ATIFragmentShader.Current;
583 struct atifs_machine machine;
584 GLuint i;
585
586 /* incoming colors should be floats */
587 ASSERT(span->array->ChanType == GL_FLOAT);
588
589 for (i = 0; i < span->end; i++) {
590 if (span->array->mask[i]) {
591 init_machine(ctx, &machine, shader, span, i);
592
593 execute_shader(ctx, shader, &machine, span, i);
594
595 /* store result color */
596 {
597 const GLfloat *colOut = machine.Registers[0];
598 /*fprintf(stderr,"outputs %f %f %f %f\n",
599 colOut[0], colOut[1], colOut[2], colOut[3]); */
600 COPY_4V(span->array->attribs[FRAG_ATTRIB_COL0][i], colOut);
601 }
602 }
603 }
604 }