mesa/formats: define the 2D ASTC formats
[mesa.git] / src / mesa / swrast / s_atifragshader.c
1 /*
2 * Copyright (C) 2004 David Airlie All Rights Reserved.
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice shall be included
12 * in all copies or substantial portions of the Software.
13 *
14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
15 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
17 * DAVID AIRLIE BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN
18 * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
19 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
20 */
21
22 #include "main/glheader.h"
23 #include "main/macros.h"
24 #include "main/atifragshader.h"
25 #include "main/samplerobj.h"
26 #include "swrast/s_atifragshader.h"
27 #include "swrast/s_context.h"
28
29
30 /**
31 * State for executing ATI fragment shader.
32 */
33 struct atifs_machine
34 {
35 GLfloat Registers[6][4]; /** six temporary registers */
36 GLfloat PrevPassRegisters[6][4];
37 GLfloat Inputs[2][4]; /** Primary, secondary input colors */
38 };
39
40
41
42 /**
43 * Fetch a texel.
44 */
45 static void
46 fetch_texel(struct gl_context * ctx, const GLfloat texcoord[4], GLfloat lambda,
47 GLuint unit, GLfloat color[4])
48 {
49 SWcontext *swrast = SWRAST_CONTEXT(ctx);
50
51 /* XXX use a float-valued TextureSample routine here!!! */
52 swrast->TextureSample[unit](ctx, _mesa_get_samplerobj(ctx, unit),
53 ctx->Texture.Unit[unit]._Current,
54 1, (const GLfloat(*)[4]) texcoord,
55 &lambda, (GLfloat (*)[4]) color);
56 }
57
58 static void
59 apply_swizzle(GLfloat values[4], GLuint swizzle)
60 {
61 GLfloat s, t, r, q;
62
63 s = values[0];
64 t = values[1];
65 r = values[2];
66 q = values[3];
67
68 switch (swizzle) {
69 case GL_SWIZZLE_STR_ATI:
70 values[0] = s;
71 values[1] = t;
72 values[2] = r;
73 break;
74 case GL_SWIZZLE_STQ_ATI:
75 values[0] = s;
76 values[1] = t;
77 values[2] = q;
78 break;
79 case GL_SWIZZLE_STR_DR_ATI:
80 values[0] = s / r;
81 values[1] = t / r;
82 values[2] = 1 / r;
83 break;
84 case GL_SWIZZLE_STQ_DQ_ATI:
85 /* make sure q is not 0 to avoid problems later with infinite values (texture lookup)? */
86 if (q == 0.0F)
87 q = 0.000000001F;
88 values[0] = s / q;
89 values[1] = t / q;
90 values[2] = 1.0F / q;
91 break;
92 }
93 values[3] = 0.0;
94 }
95
96 static void
97 apply_src_rep(GLint optype, GLuint rep, GLfloat * val)
98 {
99 GLint i;
100 GLint start, end;
101 if (!rep)
102 return;
103
104 start = optype ? 3 : 0;
105 end = 4;
106
107 for (i = start; i < end; i++) {
108 switch (rep) {
109 case GL_RED:
110 val[i] = val[0];
111 break;
112 case GL_GREEN:
113 val[i] = val[1];
114 break;
115 case GL_BLUE:
116 val[i] = val[2];
117 break;
118 case GL_ALPHA:
119 val[i] = val[3];
120 break;
121 }
122 }
123 }
124
125 static void
126 apply_src_mod(GLint optype, GLuint mod, GLfloat * val)
127 {
128 GLint i;
129 GLint start, end;
130
131 if (!mod)
132 return;
133
134 start = optype ? 3 : 0;
135 end = 4;
136
137 for (i = start; i < end; i++) {
138 if (mod & GL_COMP_BIT_ATI)
139 val[i] = 1 - val[i];
140
141 if (mod & GL_BIAS_BIT_ATI)
142 val[i] = val[i] - 0.5F;
143
144 if (mod & GL_2X_BIT_ATI)
145 val[i] = 2 * val[i];
146
147 if (mod & GL_NEGATE_BIT_ATI)
148 val[i] = -val[i];
149 }
150 }
151
152 static void
153 apply_dst_mod(GLuint optype, GLuint mod, GLfloat * val)
154 {
155 GLint i;
156 GLint has_sat = mod & GL_SATURATE_BIT_ATI;
157 GLint start, end;
158
159 mod &= ~GL_SATURATE_BIT_ATI;
160
161 start = optype ? 3 : 0;
162 end = optype ? 4 : 3;
163
164 for (i = start; i < end; i++) {
165 switch (mod) {
166 case GL_2X_BIT_ATI:
167 val[i] = 2 * val[i];
168 break;
169 case GL_4X_BIT_ATI:
170 val[i] = 4 * val[i];
171 break;
172 case GL_8X_BIT_ATI:
173 val[i] = 8 * val[i];
174 break;
175 case GL_HALF_BIT_ATI:
176 val[i] = val[i] * 0.5F;
177 break;
178 case GL_QUARTER_BIT_ATI:
179 val[i] = val[i] * 0.25F;
180 break;
181 case GL_EIGHTH_BIT_ATI:
182 val[i] = val[i] * 0.125F;
183 break;
184 }
185
186 if (has_sat) {
187 if (val[i] < 0.0F)
188 val[i] = 0.0F;
189 else if (val[i] > 1.0F)
190 val[i] = 1.0F;
191 }
192 else {
193 if (val[i] < -8.0F)
194 val[i] = -8.0F;
195 else if (val[i] > 8.0F)
196 val[i] = 8.0F;
197 }
198 }
199 }
200
201
202 static void
203 write_dst_addr(GLuint optype, GLuint mod, GLuint mask, GLfloat * src,
204 GLfloat * dst)
205 {
206 GLint i;
207 apply_dst_mod(optype, mod, src);
208
209 if (optype == ATI_FRAGMENT_SHADER_COLOR_OP) {
210 if (mask) {
211 if (mask & GL_RED_BIT_ATI)
212 dst[0] = src[0];
213
214 if (mask & GL_GREEN_BIT_ATI)
215 dst[1] = src[1];
216
217 if (mask & GL_BLUE_BIT_ATI)
218 dst[2] = src[2];
219 }
220 else {
221 for (i = 0; i < 3; i++)
222 dst[i] = src[i];
223 }
224 }
225 else
226 dst[3] = src[3];
227 }
228
229 static void
230 finish_pass(struct atifs_machine *machine)
231 {
232 GLint i;
233
234 for (i = 0; i < 6; i++) {
235 COPY_4V(machine->PrevPassRegisters[i], machine->Registers[i]);
236 }
237 }
238
239
240 static void
241 handle_pass_op(struct atifs_machine *machine, struct atifs_setupinst *texinst,
242 const SWspan *span, GLuint column, GLuint idx)
243 {
244 GLuint swizzle = texinst->swizzle;
245 GLuint pass_tex = texinst->src;
246
247 if (pass_tex >= GL_TEXTURE0_ARB && pass_tex <= GL_TEXTURE7_ARB) {
248 pass_tex -= GL_TEXTURE0_ARB;
249 COPY_4V(machine->Registers[idx],
250 span->array->attribs[VARYING_SLOT_TEX0 + pass_tex][column]);
251 }
252 else if (pass_tex >= GL_REG_0_ATI && pass_tex <= GL_REG_5_ATI) {
253 pass_tex -= GL_REG_0_ATI;
254 COPY_4V(machine->Registers[idx], machine->PrevPassRegisters[pass_tex]);
255 }
256 apply_swizzle(machine->Registers[idx], swizzle);
257
258 }
259
260 static void
261 handle_sample_op(struct gl_context * ctx, struct atifs_machine *machine,
262 struct atifs_setupinst *texinst, const SWspan *span,
263 GLuint column, GLuint idx)
264 {
265 /* sample from unit idx using texinst->src as coords */
266 GLuint swizzle = texinst->swizzle;
267 GLuint coord_source = texinst->src;
268 GLfloat tex_coords[4] = { 0 };
269
270 if (coord_source >= GL_TEXTURE0_ARB && coord_source <= GL_TEXTURE7_ARB) {
271 coord_source -= GL_TEXTURE0_ARB;
272 COPY_4V(tex_coords,
273 span->array->attribs[VARYING_SLOT_TEX0 + coord_source][column]);
274 }
275 else if (coord_source >= GL_REG_0_ATI && coord_source <= GL_REG_5_ATI) {
276 coord_source -= GL_REG_0_ATI;
277 COPY_4V(tex_coords, machine->PrevPassRegisters[coord_source]);
278 }
279 apply_swizzle(tex_coords, swizzle);
280 fetch_texel(ctx, tex_coords, 0.0F, idx, machine->Registers[idx]);
281 }
282
283 #define SETUP_SRC_REG(optype, i, x) \
284 do { \
285 COPY_4V(src[optype][i], x); \
286 } while (0)
287
288
289
290 /**
291 * Execute the given fragment shader.
292 * NOTE: we do everything in single-precision floating point
293 * \param ctx - rendering context
294 * \param shader - the shader to execute
295 * \param machine - virtual machine state
296 * \param span - the SWspan we're operating on
297 * \param column - which pixel [i] we're operating on in the span
298 */
299 static void
300 execute_shader(struct gl_context *ctx, const struct ati_fragment_shader *shader,
301 struct atifs_machine *machine, const SWspan *span,
302 GLuint column)
303 {
304 GLuint pc;
305 struct atifs_instruction *inst;
306 struct atifs_setupinst *texinst;
307 GLint optype;
308 GLuint i;
309 GLint j, pass;
310 GLint dstreg;
311 GLfloat src[2][3][4];
312 GLfloat zeros[4] = { 0.0, 0.0, 0.0, 0.0 };
313 GLfloat ones[4] = { 1.0, 1.0, 1.0, 1.0 };
314 GLfloat dst[2][4], *dstp;
315
316 for (pass = 0; pass < shader->NumPasses; pass++) {
317 if (pass > 0)
318 finish_pass(machine);
319 for (j = 0; j < MAX_NUM_FRAGMENT_REGISTERS_ATI; j++) {
320 texinst = &shader->SetupInst[pass][j];
321 if (texinst->Opcode == ATI_FRAGMENT_SHADER_PASS_OP)
322 handle_pass_op(machine, texinst, span, column, j);
323 else if (texinst->Opcode == ATI_FRAGMENT_SHADER_SAMPLE_OP)
324 handle_sample_op(ctx, machine, texinst, span, column, j);
325 }
326
327 for (pc = 0; pc < shader->numArithInstr[pass]; pc++) {
328 inst = &shader->Instructions[pass][pc];
329
330 /* setup the source registers for color and alpha ops */
331 for (optype = 0; optype < 2; optype++) {
332 for (i = 0; i < inst->ArgCount[optype]; i++) {
333 GLint index = inst->SrcReg[optype][i].Index;
334
335 if (index >= GL_REG_0_ATI && index <= GL_REG_5_ATI)
336 SETUP_SRC_REG(optype, i,
337 machine->Registers[index - GL_REG_0_ATI]);
338 else if (index >= GL_CON_0_ATI && index <= GL_CON_7_ATI) {
339 if (shader->LocalConstDef & (1 << (index - GL_CON_0_ATI))) {
340 SETUP_SRC_REG(optype, i,
341 shader->Constants[index - GL_CON_0_ATI]);
342 } else {
343 SETUP_SRC_REG(optype, i,
344 ctx->ATIFragmentShader.GlobalConstants[index - GL_CON_0_ATI]);
345 }
346 }
347 else if (index == GL_ONE)
348 SETUP_SRC_REG(optype, i, ones);
349 else if (index == GL_ZERO)
350 SETUP_SRC_REG(optype, i, zeros);
351 else if (index == GL_PRIMARY_COLOR_EXT)
352 SETUP_SRC_REG(optype, i,
353 machine->Inputs[ATI_FS_INPUT_PRIMARY]);
354 else if (index == GL_SECONDARY_INTERPOLATOR_ATI)
355 SETUP_SRC_REG(optype, i,
356 machine->Inputs[ATI_FS_INPUT_SECONDARY]);
357
358 apply_src_rep(optype, inst->SrcReg[optype][i].argRep,
359 src[optype][i]);
360 apply_src_mod(optype, inst->SrcReg[optype][i].argMod,
361 src[optype][i]);
362 }
363 }
364
365 /* Execute the operations - color then alpha */
366 for (optype = 0; optype < 2; optype++) {
367 if (inst->Opcode[optype]) {
368 switch (inst->Opcode[optype]) {
369 case GL_ADD_ATI:
370 if (!optype)
371 for (i = 0; i < 3; i++) {
372 dst[optype][i] =
373 src[optype][0][i] + src[optype][1][i];
374 }
375 else
376 dst[optype][3] = src[optype][0][3] + src[optype][1][3];
377 break;
378 case GL_SUB_ATI:
379 if (!optype)
380 for (i = 0; i < 3; i++) {
381 dst[optype][i] =
382 src[optype][0][i] - src[optype][1][i];
383 }
384 else
385 dst[optype][3] = src[optype][0][3] - src[optype][1][3];
386 break;
387 case GL_MUL_ATI:
388 if (!optype)
389 for (i = 0; i < 3; i++) {
390 dst[optype][i] =
391 src[optype][0][i] * src[optype][1][i];
392 }
393 else
394 dst[optype][3] = src[optype][0][3] * src[optype][1][3];
395 break;
396 case GL_MAD_ATI:
397 if (!optype)
398 for (i = 0; i < 3; i++) {
399 dst[optype][i] =
400 src[optype][0][i] * src[optype][1][i] +
401 src[optype][2][i];
402 }
403 else
404 dst[optype][3] =
405 src[optype][0][3] * src[optype][1][3] +
406 src[optype][2][3];
407 break;
408 case GL_LERP_ATI:
409 if (!optype)
410 for (i = 0; i < 3; i++) {
411 dst[optype][i] =
412 src[optype][0][i] * src[optype][1][i] + (1 -
413 src
414 [optype]
415 [0][i]) *
416 src[optype][2][i];
417 }
418 else
419 dst[optype][3] =
420 src[optype][0][3] * src[optype][1][3] + (1 -
421 src[optype]
422 [0][3]) *
423 src[optype][2][3];
424 break;
425
426 case GL_MOV_ATI:
427 if (!optype)
428 for (i = 0; i < 3; i++) {
429 dst[optype][i] = src[optype][0][i];
430 }
431 else
432 dst[optype][3] = src[optype][0][3];
433 break;
434 case GL_CND_ATI:
435 if (!optype) {
436 for (i = 0; i < 3; i++) {
437 dst[optype][i] =
438 (src[optype][2][i] >
439 0.5F) ? src[optype][0][i] : src[optype][1][i];
440 }
441 }
442 else {
443 dst[optype][3] =
444 (src[optype][2][3] >
445 0.5F) ? src[optype][0][3] : src[optype][1][3];
446 }
447 break;
448
449 case GL_CND0_ATI:
450 if (!optype)
451 for (i = 0; i < 3; i++) {
452 dst[optype][i] =
453 (src[optype][2][i] >=
454 0) ? src[optype][0][i] : src[optype][1][i];
455 }
456 else {
457 dst[optype][3] =
458 (src[optype][2][3] >=
459 0) ? src[optype][0][3] : src[optype][1][3];
460 }
461 break;
462 case GL_DOT2_ADD_ATI:
463 {
464 GLfloat result;
465
466 /* DOT 2 always uses the source from the color op */
467 /* could save recalculation of dot products for alpha inst */
468 result = src[0][0][0] * src[0][1][0] +
469 src[0][0][1] * src[0][1][1] + src[0][2][2];
470 if (!optype) {
471 for (i = 0; i < 3; i++) {
472 dst[optype][i] = result;
473 }
474 }
475 else
476 dst[optype][3] = result;
477 }
478 break;
479 case GL_DOT3_ATI:
480 {
481 GLfloat result;
482
483 /* DOT 3 always uses the source from the color op */
484 result = src[0][0][0] * src[0][1][0] +
485 src[0][0][1] * src[0][1][1] +
486 src[0][0][2] * src[0][1][2];
487
488 if (!optype) {
489 for (i = 0; i < 3; i++) {
490 dst[optype][i] = result;
491 }
492 }
493 else
494 dst[optype][3] = result;
495 }
496 break;
497 case GL_DOT4_ATI:
498 {
499 GLfloat result;
500
501 /* DOT 4 always uses the source from the color op */
502 result = src[0][0][0] * src[0][1][0] +
503 src[0][0][1] * src[0][1][1] +
504 src[0][0][2] * src[0][1][2] +
505 src[0][0][3] * src[0][1][3];
506 if (!optype) {
507 for (i = 0; i < 3; i++) {
508 dst[optype][i] = result;
509 }
510 }
511 else
512 dst[optype][3] = result;
513 }
514 break;
515
516 }
517 }
518 }
519
520 /* write out the destination registers */
521 for (optype = 0; optype < 2; optype++) {
522 if (inst->Opcode[optype]) {
523 dstreg = inst->DstReg[optype].Index;
524 dstp = machine->Registers[dstreg - GL_REG_0_ATI];
525
526 if ((optype == 0) || ((inst->Opcode[1] != GL_DOT2_ADD_ATI) &&
527 (inst->Opcode[1] != GL_DOT3_ATI) && (inst->Opcode[1] != GL_DOT4_ATI)))
528 write_dst_addr(optype, inst->DstReg[optype].dstMod,
529 inst->DstReg[optype].dstMask, dst[optype],
530 dstp);
531 else
532 write_dst_addr(1, inst->DstReg[0].dstMod, 0, dst[1], dstp);
533 }
534 }
535 }
536 }
537 }
538
539
540 /**
541 * Init fragment shader virtual machine state.
542 */
543 static void
544 init_machine(struct gl_context * ctx, struct atifs_machine *machine,
545 const struct ati_fragment_shader *shader,
546 const SWspan *span, GLuint col)
547 {
548 GLfloat (*inputs)[4] = machine->Inputs;
549 GLint i, j;
550
551 for (i = 0; i < 6; i++) {
552 for (j = 0; j < 4; j++)
553 machine->Registers[i][j] = 0.0;
554 }
555
556 COPY_4V(inputs[ATI_FS_INPUT_PRIMARY], span->array->attribs[VARYING_SLOT_COL0][col]);
557 COPY_4V(inputs[ATI_FS_INPUT_SECONDARY], span->array->attribs[VARYING_SLOT_COL1][col]);
558 }
559
560
561
562 /**
563 * Execute the current ATI shader program, operating on the given span.
564 */
565 void
566 _swrast_exec_fragment_shader(struct gl_context * ctx, SWspan *span)
567 {
568 const struct ati_fragment_shader *shader = ctx->ATIFragmentShader.Current;
569 struct atifs_machine machine;
570 GLuint i;
571
572 /* incoming colors should be floats */
573 assert(span->array->ChanType == GL_FLOAT);
574
575 for (i = 0; i < span->end; i++) {
576 if (span->array->mask[i]) {
577 init_machine(ctx, &machine, shader, span, i);
578
579 execute_shader(ctx, shader, &machine, span, i);
580
581 /* store result color */
582 {
583 const GLfloat *colOut = machine.Registers[0];
584 /*fprintf(stderr,"outputs %f %f %f %f\n",
585 colOut[0], colOut[1], colOut[2], colOut[3]); */
586 COPY_4V(span->array->attribs[VARYING_SLOT_COL0][i], colOut);
587 }
588 }
589 }
590 }