swrast: Use BITFIELD64_BIT for arrayAttribs.
[mesa.git] / src / mesa / swrast / s_atifragshader.c
1 /*
2 * Copyright (C) 2004 David Airlie All Rights Reserved.
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice shall be included
12 * in all copies or substantial portions of the Software.
13 *
14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
15 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
17 * DAVID AIRLIE BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN
18 * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
19 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
20 */
21
22 #include "main/glheader.h"
23 #include "main/colormac.h"
24 #include "main/macros.h"
25 #include "main/atifragshader.h"
26 #include "main/samplerobj.h"
27 #include "swrast/s_atifragshader.h"
28 #include "swrast/s_context.h"
29
30
31 /**
32 * State for executing ATI fragment shader.
33 */
34 struct atifs_machine
35 {
36 GLfloat Registers[6][4]; /** six temporary registers */
37 GLfloat PrevPassRegisters[6][4];
38 GLfloat Inputs[2][4]; /** Primary, secondary input colors */
39 };
40
41
42
43 /**
44 * Fetch a texel.
45 */
46 static void
47 fetch_texel(struct gl_context * ctx, const GLfloat texcoord[4], GLfloat lambda,
48 GLuint unit, GLfloat color[4])
49 {
50 SWcontext *swrast = SWRAST_CONTEXT(ctx);
51
52 /* XXX use a float-valued TextureSample routine here!!! */
53 swrast->TextureSample[unit](ctx, _mesa_get_samplerobj(ctx, unit),
54 ctx->Texture.Unit[unit]._Current,
55 1, (const GLfloat(*)[4]) texcoord,
56 &lambda, (GLfloat (*)[4]) color);
57 }
58
59 static void
60 apply_swizzle(GLfloat values[4], GLuint swizzle)
61 {
62 GLfloat s, t, r, q;
63
64 s = values[0];
65 t = values[1];
66 r = values[2];
67 q = values[3];
68
69 switch (swizzle) {
70 case GL_SWIZZLE_STR_ATI:
71 values[0] = s;
72 values[1] = t;
73 values[2] = r;
74 break;
75 case GL_SWIZZLE_STQ_ATI:
76 values[0] = s;
77 values[1] = t;
78 values[2] = q;
79 break;
80 case GL_SWIZZLE_STR_DR_ATI:
81 values[0] = s / r;
82 values[1] = t / r;
83 values[2] = 1 / r;
84 break;
85 case GL_SWIZZLE_STQ_DQ_ATI:
86 /* make sure q is not 0 to avoid problems later with infinite values (texture lookup)? */
87 if (q == 0.0F)
88 q = 0.000000001F;
89 values[0] = s / q;
90 values[1] = t / q;
91 values[2] = 1.0F / q;
92 break;
93 }
94 values[3] = 0.0;
95 }
96
97 static void
98 apply_src_rep(GLint optype, GLuint rep, GLfloat * val)
99 {
100 GLint i;
101 GLint start, end;
102 if (!rep)
103 return;
104
105 start = optype ? 3 : 0;
106 end = 4;
107
108 for (i = start; i < end; i++) {
109 switch (rep) {
110 case GL_RED:
111 val[i] = val[0];
112 break;
113 case GL_GREEN:
114 val[i] = val[1];
115 break;
116 case GL_BLUE:
117 val[i] = val[2];
118 break;
119 case GL_ALPHA:
120 val[i] = val[3];
121 break;
122 }
123 }
124 }
125
126 static void
127 apply_src_mod(GLint optype, GLuint mod, GLfloat * val)
128 {
129 GLint i;
130 GLint start, end;
131
132 if (!mod)
133 return;
134
135 start = optype ? 3 : 0;
136 end = 4;
137
138 for (i = start; i < end; i++) {
139 if (mod & GL_COMP_BIT_ATI)
140 val[i] = 1 - val[i];
141
142 if (mod & GL_BIAS_BIT_ATI)
143 val[i] = val[i] - 0.5F;
144
145 if (mod & GL_2X_BIT_ATI)
146 val[i] = 2 * val[i];
147
148 if (mod & GL_NEGATE_BIT_ATI)
149 val[i] = -val[i];
150 }
151 }
152
153 static void
154 apply_dst_mod(GLuint optype, GLuint mod, GLfloat * val)
155 {
156 GLint i;
157 GLint has_sat = mod & GL_SATURATE_BIT_ATI;
158 GLint start, end;
159
160 mod &= ~GL_SATURATE_BIT_ATI;
161
162 start = optype ? 3 : 0;
163 end = optype ? 4 : 3;
164
165 for (i = start; i < end; i++) {
166 switch (mod) {
167 case GL_2X_BIT_ATI:
168 val[i] = 2 * val[i];
169 break;
170 case GL_4X_BIT_ATI:
171 val[i] = 4 * val[i];
172 break;
173 case GL_8X_BIT_ATI:
174 val[i] = 8 * val[i];
175 break;
176 case GL_HALF_BIT_ATI:
177 val[i] = val[i] * 0.5F;
178 break;
179 case GL_QUARTER_BIT_ATI:
180 val[i] = val[i] * 0.25F;
181 break;
182 case GL_EIGHTH_BIT_ATI:
183 val[i] = val[i] * 0.125F;
184 break;
185 }
186
187 if (has_sat) {
188 if (val[i] < 0.0F)
189 val[i] = 0.0F;
190 else if (val[i] > 1.0F)
191 val[i] = 1.0F;
192 }
193 else {
194 if (val[i] < -8.0F)
195 val[i] = -8.0F;
196 else if (val[i] > 8.0F)
197 val[i] = 8.0F;
198 }
199 }
200 }
201
202
203 static void
204 write_dst_addr(GLuint optype, GLuint mod, GLuint mask, GLfloat * src,
205 GLfloat * dst)
206 {
207 GLint i;
208 apply_dst_mod(optype, mod, src);
209
210 if (optype == ATI_FRAGMENT_SHADER_COLOR_OP) {
211 if (mask) {
212 if (mask & GL_RED_BIT_ATI)
213 dst[0] = src[0];
214
215 if (mask & GL_GREEN_BIT_ATI)
216 dst[1] = src[1];
217
218 if (mask & GL_BLUE_BIT_ATI)
219 dst[2] = src[2];
220 }
221 else {
222 for (i = 0; i < 3; i++)
223 dst[i] = src[i];
224 }
225 }
226 else
227 dst[3] = src[3];
228 }
229
230 static void
231 finish_pass(struct atifs_machine *machine)
232 {
233 GLint i;
234
235 for (i = 0; i < 6; i++) {
236 COPY_4V(machine->PrevPassRegisters[i], machine->Registers[i]);
237 }
238 }
239
240
241 static void
242 handle_pass_op(struct atifs_machine *machine, struct atifs_setupinst *texinst,
243 const SWspan *span, GLuint column, GLuint idx)
244 {
245 GLuint swizzle = texinst->swizzle;
246 GLuint pass_tex = texinst->src;
247
248 if (pass_tex >= GL_TEXTURE0_ARB && pass_tex <= GL_TEXTURE7_ARB) {
249 pass_tex -= GL_TEXTURE0_ARB;
250 COPY_4V(machine->Registers[idx],
251 span->array->attribs[VARYING_SLOT_TEX0 + pass_tex][column]);
252 }
253 else if (pass_tex >= GL_REG_0_ATI && pass_tex <= GL_REG_5_ATI) {
254 pass_tex -= GL_REG_0_ATI;
255 COPY_4V(machine->Registers[idx], machine->PrevPassRegisters[pass_tex]);
256 }
257 apply_swizzle(machine->Registers[idx], swizzle);
258
259 }
260
261 static void
262 handle_sample_op(struct gl_context * ctx, struct atifs_machine *machine,
263 struct atifs_setupinst *texinst, const SWspan *span,
264 GLuint column, GLuint idx)
265 {
266 /* sample from unit idx using texinst->src as coords */
267 GLuint swizzle = texinst->swizzle;
268 GLuint coord_source = texinst->src;
269 GLfloat tex_coords[4] = { 0 };
270
271 if (coord_source >= GL_TEXTURE0_ARB && coord_source <= GL_TEXTURE7_ARB) {
272 coord_source -= GL_TEXTURE0_ARB;
273 COPY_4V(tex_coords,
274 span->array->attribs[VARYING_SLOT_TEX0 + coord_source][column]);
275 }
276 else if (coord_source >= GL_REG_0_ATI && coord_source <= GL_REG_5_ATI) {
277 coord_source -= GL_REG_0_ATI;
278 COPY_4V(tex_coords, machine->PrevPassRegisters[coord_source]);
279 }
280 apply_swizzle(tex_coords, swizzle);
281 fetch_texel(ctx, tex_coords, 0.0F, idx, machine->Registers[idx]);
282 }
283
284 #define SETUP_SRC_REG(optype, i, x) \
285 do { \
286 COPY_4V(src[optype][i], x); \
287 } while (0)
288
289
290
291 /**
292 * Execute the given fragment shader.
293 * NOTE: we do everything in single-precision floating point
294 * \param ctx - rendering context
295 * \param shader - the shader to execute
296 * \param machine - virtual machine state
297 * \param span - the SWspan we're operating on
298 * \param column - which pixel [i] we're operating on in the span
299 */
300 static void
301 execute_shader(struct gl_context *ctx, const struct ati_fragment_shader *shader,
302 struct atifs_machine *machine, const SWspan *span,
303 GLuint column)
304 {
305 GLuint pc;
306 struct atifs_instruction *inst;
307 struct atifs_setupinst *texinst;
308 GLint optype;
309 GLuint i;
310 GLint j, pass;
311 GLint dstreg;
312 GLfloat src[2][3][4];
313 GLfloat zeros[4] = { 0.0, 0.0, 0.0, 0.0 };
314 GLfloat ones[4] = { 1.0, 1.0, 1.0, 1.0 };
315 GLfloat dst[2][4], *dstp;
316
317 for (pass = 0; pass < shader->NumPasses; pass++) {
318 if (pass > 0)
319 finish_pass(machine);
320 for (j = 0; j < MAX_NUM_FRAGMENT_REGISTERS_ATI; j++) {
321 texinst = &shader->SetupInst[pass][j];
322 if (texinst->Opcode == ATI_FRAGMENT_SHADER_PASS_OP)
323 handle_pass_op(machine, texinst, span, column, j);
324 else if (texinst->Opcode == ATI_FRAGMENT_SHADER_SAMPLE_OP)
325 handle_sample_op(ctx, machine, texinst, span, column, j);
326 }
327
328 for (pc = 0; pc < shader->numArithInstr[pass]; pc++) {
329 inst = &shader->Instructions[pass][pc];
330
331 /* setup the source registers for color and alpha ops */
332 for (optype = 0; optype < 2; optype++) {
333 for (i = 0; i < inst->ArgCount[optype]; i++) {
334 GLint index = inst->SrcReg[optype][i].Index;
335
336 if (index >= GL_REG_0_ATI && index <= GL_REG_5_ATI)
337 SETUP_SRC_REG(optype, i,
338 machine->Registers[index - GL_REG_0_ATI]);
339 else if (index >= GL_CON_0_ATI && index <= GL_CON_7_ATI) {
340 if (shader->LocalConstDef & (1 << (index - GL_CON_0_ATI))) {
341 SETUP_SRC_REG(optype, i,
342 shader->Constants[index - GL_CON_0_ATI]);
343 } else {
344 SETUP_SRC_REG(optype, i,
345 ctx->ATIFragmentShader.GlobalConstants[index - GL_CON_0_ATI]);
346 }
347 }
348 else if (index == GL_ONE)
349 SETUP_SRC_REG(optype, i, ones);
350 else if (index == GL_ZERO)
351 SETUP_SRC_REG(optype, i, zeros);
352 else if (index == GL_PRIMARY_COLOR_EXT)
353 SETUP_SRC_REG(optype, i,
354 machine->Inputs[ATI_FS_INPUT_PRIMARY]);
355 else if (index == GL_SECONDARY_INTERPOLATOR_ATI)
356 SETUP_SRC_REG(optype, i,
357 machine->Inputs[ATI_FS_INPUT_SECONDARY]);
358
359 apply_src_rep(optype, inst->SrcReg[optype][i].argRep,
360 src[optype][i]);
361 apply_src_mod(optype, inst->SrcReg[optype][i].argMod,
362 src[optype][i]);
363 }
364 }
365
366 /* Execute the operations - color then alpha */
367 for (optype = 0; optype < 2; optype++) {
368 if (inst->Opcode[optype]) {
369 switch (inst->Opcode[optype]) {
370 case GL_ADD_ATI:
371 if (!optype)
372 for (i = 0; i < 3; i++) {
373 dst[optype][i] =
374 src[optype][0][i] + src[optype][1][i];
375 }
376 else
377 dst[optype][3] = src[optype][0][3] + src[optype][1][3];
378 break;
379 case GL_SUB_ATI:
380 if (!optype)
381 for (i = 0; i < 3; i++) {
382 dst[optype][i] =
383 src[optype][0][i] - src[optype][1][i];
384 }
385 else
386 dst[optype][3] = src[optype][0][3] - src[optype][1][3];
387 break;
388 case GL_MUL_ATI:
389 if (!optype)
390 for (i = 0; i < 3; i++) {
391 dst[optype][i] =
392 src[optype][0][i] * src[optype][1][i];
393 }
394 else
395 dst[optype][3] = src[optype][0][3] * src[optype][1][3];
396 break;
397 case GL_MAD_ATI:
398 if (!optype)
399 for (i = 0; i < 3; i++) {
400 dst[optype][i] =
401 src[optype][0][i] * src[optype][1][i] +
402 src[optype][2][i];
403 }
404 else
405 dst[optype][3] =
406 src[optype][0][3] * src[optype][1][3] +
407 src[optype][2][3];
408 break;
409 case GL_LERP_ATI:
410 if (!optype)
411 for (i = 0; i < 3; i++) {
412 dst[optype][i] =
413 src[optype][0][i] * src[optype][1][i] + (1 -
414 src
415 [optype]
416 [0][i]) *
417 src[optype][2][i];
418 }
419 else
420 dst[optype][3] =
421 src[optype][0][3] * src[optype][1][3] + (1 -
422 src[optype]
423 [0][3]) *
424 src[optype][2][3];
425 break;
426
427 case GL_MOV_ATI:
428 if (!optype)
429 for (i = 0; i < 3; i++) {
430 dst[optype][i] = src[optype][0][i];
431 }
432 else
433 dst[optype][3] = src[optype][0][3];
434 break;
435 case GL_CND_ATI:
436 if (!optype) {
437 for (i = 0; i < 3; i++) {
438 dst[optype][i] =
439 (src[optype][2][i] >
440 0.5) ? src[optype][0][i] : src[optype][1][i];
441 }
442 }
443 else {
444 dst[optype][3] =
445 (src[optype][2][3] >
446 0.5) ? src[optype][0][3] : src[optype][1][3];
447 }
448 break;
449
450 case GL_CND0_ATI:
451 if (!optype)
452 for (i = 0; i < 3; i++) {
453 dst[optype][i] =
454 (src[optype][2][i] >=
455 0) ? src[optype][0][i] : src[optype][1][i];
456 }
457 else {
458 dst[optype][3] =
459 (src[optype][2][3] >=
460 0) ? src[optype][0][3] : src[optype][1][3];
461 }
462 break;
463 case GL_DOT2_ADD_ATI:
464 {
465 GLfloat result;
466
467 /* DOT 2 always uses the source from the color op */
468 /* could save recalculation of dot products for alpha inst */
469 result = src[0][0][0] * src[0][1][0] +
470 src[0][0][1] * src[0][1][1] + src[0][2][2];
471 if (!optype) {
472 for (i = 0; i < 3; i++) {
473 dst[optype][i] = result;
474 }
475 }
476 else
477 dst[optype][3] = result;
478 }
479 break;
480 case GL_DOT3_ATI:
481 {
482 GLfloat result;
483
484 /* DOT 3 always uses the source from the color op */
485 result = src[0][0][0] * src[0][1][0] +
486 src[0][0][1] * src[0][1][1] +
487 src[0][0][2] * src[0][1][2];
488
489 if (!optype) {
490 for (i = 0; i < 3; i++) {
491 dst[optype][i] = result;
492 }
493 }
494 else
495 dst[optype][3] = result;
496 }
497 break;
498 case GL_DOT4_ATI:
499 {
500 GLfloat result;
501
502 /* DOT 4 always uses the source from the color op */
503 result = src[0][0][0] * src[0][1][0] +
504 src[0][0][1] * src[0][1][1] +
505 src[0][0][2] * src[0][1][2] +
506 src[0][0][3] * src[0][1][3];
507 if (!optype) {
508 for (i = 0; i < 3; i++) {
509 dst[optype][i] = result;
510 }
511 }
512 else
513 dst[optype][3] = result;
514 }
515 break;
516
517 }
518 }
519 }
520
521 /* write out the destination registers */
522 for (optype = 0; optype < 2; optype++) {
523 if (inst->Opcode[optype]) {
524 dstreg = inst->DstReg[optype].Index;
525 dstp = machine->Registers[dstreg - GL_REG_0_ATI];
526
527 if ((optype == 0) || ((inst->Opcode[1] != GL_DOT2_ADD_ATI) &&
528 (inst->Opcode[1] != GL_DOT3_ATI) && (inst->Opcode[1] != GL_DOT4_ATI)))
529 write_dst_addr(optype, inst->DstReg[optype].dstMod,
530 inst->DstReg[optype].dstMask, dst[optype],
531 dstp);
532 else
533 write_dst_addr(1, inst->DstReg[0].dstMod, 0, dst[1], dstp);
534 }
535 }
536 }
537 }
538 }
539
540
541 /**
542 * Init fragment shader virtual machine state.
543 */
544 static void
545 init_machine(struct gl_context * ctx, struct atifs_machine *machine,
546 const struct ati_fragment_shader *shader,
547 const SWspan *span, GLuint col)
548 {
549 GLfloat (*inputs)[4] = machine->Inputs;
550 GLint i, j;
551
552 for (i = 0; i < 6; i++) {
553 for (j = 0; j < 4; j++)
554 machine->Registers[i][j] = 0.0;
555 }
556
557 COPY_4V(inputs[ATI_FS_INPUT_PRIMARY], span->array->attribs[VARYING_SLOT_COL0][col]);
558 COPY_4V(inputs[ATI_FS_INPUT_SECONDARY], span->array->attribs[VARYING_SLOT_COL1][col]);
559 }
560
561
562
563 /**
564 * Execute the current ATI shader program, operating on the given span.
565 */
566 void
567 _swrast_exec_fragment_shader(struct gl_context * ctx, SWspan *span)
568 {
569 const struct ati_fragment_shader *shader = ctx->ATIFragmentShader.Current;
570 struct atifs_machine machine;
571 GLuint i;
572
573 /* incoming colors should be floats */
574 assert(span->array->ChanType == GL_FLOAT);
575
576 for (i = 0; i < span->end; i++) {
577 if (span->array->mask[i]) {
578 init_machine(ctx, &machine, shader, span, i);
579
580 execute_shader(ctx, shader, &machine, span, i);
581
582 /* store result color */
583 {
584 const GLfloat *colOut = machine.Registers[0];
585 /*fprintf(stderr,"outputs %f %f %f %f\n",
586 colOut[0], colOut[1], colOut[2], colOut[3]); */
587 COPY_4V(span->array->attribs[VARYING_SLOT_COL0][i], colOut);
588 }
589 }
590 }
591 }