Runtime generate sse/sse2 code for some vertex programs. Experimental
[mesa.git] / src / mesa / swrast / s_atifragshader.c
1 /*
2 *
3 * Copyright (C) 2004 David Airlie All Rights Reserved.
4 *
5 * Permission is hereby granted, free of charge, to any person obtaining a
6 * copy of this software and associated documentation files (the "Software"),
7 * to deal in the Software without restriction, including without limitation
8 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
9 * and/or sell copies of the Software, and to permit persons to whom the
10 * Software is furnished to do so, subject to the following conditions:
11 *
12 * The above copyright notice and this permission notice shall be included
13 * in all copies or substantial portions of the Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
16 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * DAVID AIRLIE BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN
19 * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
20 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
21 */
22
23 #include "glheader.h"
24 #include "colormac.h"
25 #include "context.h"
26 #include "atifragshader.h"
27 #include "macros.h"
28 #include "program.h"
29
30 #include "s_atifragshader.h"
31 #include "s_nvfragprog.h"
32 #include "s_span.h"
33 #include "s_texture.h"
34
35 /**
36 * Fetch a texel.
37 */
38 static void
39 fetch_texel(GLcontext * ctx, const GLfloat texcoord[4], GLfloat lambda,
40 GLuint unit, GLfloat color[4])
41 {
42 GLchan rgba[4];
43 SWcontext *swrast = SWRAST_CONTEXT(ctx);
44
45 /* XXX use a float-valued TextureSample routine here!!! */
46 swrast->TextureSample[unit] (ctx, unit, ctx->Texture.Unit[unit]._Current,
47 1, (const GLfloat(*)[4]) texcoord,
48 &lambda, &rgba);
49 color[0] = CHAN_TO_FLOAT(rgba[0]);
50 color[1] = CHAN_TO_FLOAT(rgba[1]);
51 color[2] = CHAN_TO_FLOAT(rgba[2]);
52 color[3] = CHAN_TO_FLOAT(rgba[3]);
53 }
54
55 static void
56 apply_swizzle(struct atifs_machine *machine, GLuint reg, GLuint swizzle)
57 {
58 GLfloat s, t, r, q;
59
60 s = machine->Registers[reg][0];
61 t = machine->Registers[reg][1];
62 r = machine->Registers[reg][2];
63 q = machine->Registers[reg][3];
64
65 switch (swizzle) {
66 case GL_SWIZZLE_STR_ATI:
67 machine->Registers[reg][0] = s;
68 machine->Registers[reg][1] = t;
69 machine->Registers[reg][2] = r;
70 break;
71 case GL_SWIZZLE_STQ_ATI:
72 machine->Registers[reg][0] = s;
73 machine->Registers[reg][1] = t;
74 machine->Registers[reg][2] = q;
75 break;
76 case GL_SWIZZLE_STR_DR_ATI:
77 machine->Registers[reg][0] = s / r;
78 machine->Registers[reg][1] = t / r;
79 machine->Registers[reg][2] = 1 / r;
80 break;
81 case GL_SWIZZLE_STQ_DQ_ATI:
82 machine->Registers[reg][0] = s / q;
83 machine->Registers[reg][1] = t / q;
84 machine->Registers[reg][2] = 1 / q;
85 break;
86 }
87 machine->Registers[reg][3] = 0.0;
88 }
89
90 static void
91 apply_src_rep(GLint optype, GLuint rep, GLfloat * val)
92 {
93 GLint i;
94 GLint start, end;
95 if (!rep)
96 return;
97
98 start = optype ? 3 : 0;
99 end = optype ? 4 : 3;
100
101 for (i = start; i < end; i++) {
102 switch (rep) {
103 case GL_RED:
104 val[i] = val[0];
105 break;
106 case GL_GREEN:
107 val[i] = val[1];
108 break;
109 case GL_BLUE:
110 val[i] = val[2];
111 break;
112 case GL_ALPHA:
113 val[i] = val[3];
114 break;
115 }
116 }
117 }
118
119 static void
120 apply_src_mod(GLint optype, GLuint mod, GLfloat * val)
121 {
122 GLint i;
123 GLint start, end;
124
125 if (!mod)
126 return;
127
128 start = optype ? 3 : 0;
129 end = optype ? 4 : 3;
130
131 for (i = start; i < end; i++) {
132 if (mod & GL_COMP_BIT_ATI)
133 val[i] = 1 - val[i];
134
135 if (mod & GL_BIAS_BIT_ATI)
136 val[i] = val[i] - 0.5;
137
138 if (mod & GL_2X_BIT_ATI)
139 val[i] = 2 * val[i];
140
141 if (mod & GL_NEGATE_BIT_ATI)
142 val[i] = -val[i];
143 }
144 }
145
146 static void
147 apply_dst_mod(GLuint optype, GLuint mod, GLfloat * val)
148 {
149 GLint i;
150 GLint has_sat = mod & GL_SATURATE_BIT_ATI;
151 GLint start, end;
152
153 mod &= ~GL_SATURATE_BIT_ATI;
154
155 start = optype ? 3 : 0;
156 end = optype ? 4 : 3;
157
158 for (i = start; i < end; i++) {
159 switch (mod) {
160 case GL_2X_BIT_ATI:
161 val[i] = 2 * val[i];
162 break;
163 case GL_4X_BIT_ATI:
164 val[i] = 4 * val[i];
165 break;
166 case GL_8X_BIT_ATI:
167 val[i] = 8 * val[i];
168 break;
169 case GL_HALF_BIT_ATI:
170 val[i] = val[i] * 0.5;
171 break;
172 case GL_QUARTER_BIT_ATI:
173 val[i] = val[i] * 0.25;
174 break;
175 case GL_EIGHTH_BIT_ATI:
176 val[i] = val[i] * 0.125;
177 break;
178 }
179
180 if (has_sat) {
181 if (val[i] < 0.0)
182 val[i] = 0;
183 else if (val[i] > 1.0)
184 val[i] = 1.0;
185 }
186 else {
187 if (val[i] < -8.0)
188 val[i] = -8.0;
189 else if (val[i] > 8.0)
190 val[i] = 8.0;
191 }
192 }
193 }
194
195
196 static void
197 write_dst_addr(GLuint optype, GLuint mod, GLuint mask, GLfloat * src,
198 GLfloat * dst)
199 {
200 GLint i;
201 apply_dst_mod(optype, mod, src);
202
203 if (optype == ATI_FRAGMENT_SHADER_COLOR_OP) {
204 if (mask) {
205 if (mask & GL_RED_BIT_ATI)
206 dst[0] = src[0];
207
208 if (mask & GL_GREEN_BIT_ATI)
209 dst[1] = src[1];
210
211 if (mask & GL_BLUE_BIT_ATI)
212 dst[2] = src[2];
213 }
214 else {
215 for (i = 0; i < 3; i++)
216 dst[i] = src[i];
217 }
218 }
219 else
220 dst[3] = src[3];
221 }
222
223 static void
224 finish_pass(struct atifs_machine *machine)
225 {
226 GLint i;
227
228 for (i = 0; i < 6; i++) {
229 COPY_4V(machine->PrevPassRegisters[i], machine->Registers[i]);
230 }
231 }
232
233 /**
234 * Execute the given fragment shader
235 * NOTE: we do everything in single-precision floating point; we don't
236 * currently observe the single/half/fixed-precision qualifiers.
237 * \param ctx - rendering context
238 * \param program - the fragment program to execute
239 * \param machine - machine state (register file)
240 * \param maxInst - max number of instructions to execute
241 * \return GL_TRUE if program completed or GL_FALSE if program executed KIL.
242 */
243
244 struct ati_fs_opcode_st ati_fs_opcodes[] = {
245 {GL_ADD_ATI, 2},
246 {GL_SUB_ATI, 2},
247 {GL_MUL_ATI, 2},
248 {GL_MAD_ATI, 3},
249 {GL_LERP_ATI, 3},
250 {GL_MOV_ATI, 1},
251 {GL_CND_ATI, 3},
252 {GL_CND0_ATI, 3},
253 {GL_DOT2_ADD_ATI, 3},
254 {GL_DOT3_ATI, 2},
255 {GL_DOT4_ATI, 2}
256 };
257
258
259
260 static void
261 handle_pass_op(struct atifs_machine *machine, struct atifs_instruction *inst,
262 const struct sw_span *span, GLuint column)
263 {
264 GLuint idx = inst->DstReg[0].Index - GL_REG_0_ATI;
265 GLuint swizzle = inst->DstReg[0].Swizzle;
266 GLuint pass_tex = inst->SrcReg[0][0].Index;
267
268 /* if we get here after passing pass one then we are starting pass two - backup the registers */
269 if (machine->pass == 1) {
270 finish_pass(machine);
271 machine->pass = 2;
272 }
273 if (pass_tex >= GL_TEXTURE0_ARB && pass_tex <= GL_TEXTURE7_ARB) {
274 pass_tex -= GL_TEXTURE0_ARB;
275 COPY_4V(machine->Registers[idx],
276 span->array->texcoords[pass_tex][column]);
277 }
278 else if (pass_tex >= GL_REG_0_ATI && pass_tex <= GL_REG_5_ATI
279 && machine->pass == 2) {
280 pass_tex -= GL_REG_0_ATI;
281 COPY_4V(machine->Registers[idx], machine->PrevPassRegisters[pass_tex]);
282 }
283 apply_swizzle(machine, idx, swizzle);
284
285 }
286
287 static void
288 handle_sample_op(GLcontext * ctx, struct atifs_machine *machine,
289 struct atifs_instruction *inst, const struct sw_span *span,
290 GLuint column)
291 {
292 GLuint idx = inst->DstReg[0].Index - GL_REG_0_ATI;
293 GLuint swizzle = inst->DstReg[0].Swizzle;
294 GLuint sample_tex = inst->SrcReg[0][0].Index;
295
296 /* if we get here after passing pass one then we are starting pass two - backup the registers */
297 if (machine->pass == 1) {
298 finish_pass(machine);
299 machine->pass = 2;
300 }
301
302 if (sample_tex >= GL_TEXTURE0_ARB && sample_tex <= GL_TEXTURE7_ARB) {
303 sample_tex -= GL_TEXTURE0_ARB;
304 fetch_texel(ctx, span->array->texcoords[sample_tex][column], 0.0F,
305 sample_tex, machine->Registers[idx]);
306 }
307 else if (sample_tex >= GL_REG_0_ATI && sample_tex <= GL_REG_5_ATI) {
308 /* this is wrong... */
309 sample_tex -= GL_REG_0_ATI;
310 fetch_texel(ctx, machine->Registers[sample_tex], 0, sample_tex,
311 machine->Registers[idx]);
312 }
313
314 apply_swizzle(machine, idx, swizzle);
315 }
316
317 #define SETUP_SRC_REG(optype, i, x) do { \
318 if (optype) \
319 src[optype][i][3] = x[3]; \
320 else \
321 COPY_3V(src[optype][i], x); \
322 } while (0)
323
324 static GLboolean
325 execute_shader(GLcontext * ctx,
326 const struct ati_fragment_shader *shader, GLuint maxInst,
327 struct atifs_machine *machine, const struct sw_span *span,
328 GLuint column)
329 {
330 GLuint pc;
331 struct atifs_instruction *inst;
332 GLint optype;
333 GLint i;
334 GLint dstreg;
335 GLfloat src[2][3][4];
336 GLfloat zeros[4] = { 0.0, 0.0, 0.0, 0.0 };
337 GLfloat ones[4] = { 1.0, 1.0, 1.0, 1.0 };
338 GLfloat dst[2][4], *dstp;
339
340 for (pc = 0; pc < shader->Base.NumInstructions; pc++) {
341 inst = &shader->Instructions[pc];
342
343 if (inst->Opcode[0] == ATI_FRAGMENT_SHADER_PASS_OP)
344 handle_pass_op(machine, inst, span, column);
345 else if (inst->Opcode[0] == ATI_FRAGMENT_SHADER_SAMPLE_OP)
346 handle_sample_op(ctx, machine, inst, span, column);
347 else {
348 if (machine->pass == 0)
349 machine->pass = 1;
350
351 /* setup the source registers for color and alpha ops */
352 for (optype = 0; optype < 2; optype++) {
353 for (i = 0; i < inst->ArgCount[optype]; i++) {
354 GLint index = inst->SrcReg[optype][i].Index;
355
356 if (index >= GL_REG_0_ATI && index <= GL_REG_5_ATI)
357 SETUP_SRC_REG(optype, i,
358 machine->Registers[index - GL_REG_0_ATI]);
359 else if (index >= GL_CON_0_ATI && index <= GL_CON_7_ATI)
360 SETUP_SRC_REG(optype, i,
361 shader->Constants[index - GL_CON_0_ATI]);
362 else if (index == GL_ONE)
363 SETUP_SRC_REG(optype, i, ones);
364 else if (index == GL_ZERO)
365 SETUP_SRC_REG(optype, i, zeros);
366 else if (index == GL_PRIMARY_COLOR_EXT)
367 SETUP_SRC_REG(optype, i,
368 machine->Inputs[ATI_FS_INPUT_PRIMARY]);
369 else if (index == GL_SECONDARY_INTERPOLATOR_ATI)
370 SETUP_SRC_REG(optype, i,
371 machine->Inputs[ATI_FS_INPUT_SECONDARY]);
372
373 apply_src_rep(optype, inst->SrcReg[optype][i].argRep,
374 src[optype][i]);
375 apply_src_mod(optype, inst->SrcReg[optype][i].argMod,
376 src[optype][i]);
377 }
378 }
379
380 /* Execute the operations - color then alpha */
381 for (optype = 0; optype < 2; optype++) {
382 if (inst->Opcode[optype]) {
383 switch (inst->Opcode[optype]) {
384 case GL_ADD_ATI:
385 if (!optype)
386 for (i = 0; i < 3; i++) {
387 dst[optype][i] =
388 src[optype][0][i] + src[optype][1][i];
389 }
390 else
391 dst[optype][3] = src[optype][0][3] + src[optype][1][3];
392 break;
393 case GL_SUB_ATI:
394 if (!optype)
395 for (i = 0; i < 3; i++) {
396 dst[optype][i] =
397 src[optype][0][i] - src[optype][1][i];
398 }
399 else
400 dst[optype][3] = src[optype][0][3] - src[optype][1][3];
401 break;
402 case GL_MUL_ATI:
403 if (!optype)
404 for (i = 0; i < 3; i++) {
405 dst[optype][i] =
406 src[optype][0][i] * src[optype][1][i];
407 }
408 else
409 dst[optype][3] = src[optype][0][3] * src[optype][1][3];
410 break;
411 case GL_MAD_ATI:
412 if (!optype)
413 for (i = 0; i < 3; i++) {
414 dst[optype][i] =
415 src[optype][0][i] * src[optype][1][i] +
416 src[optype][2][i];
417 }
418 else
419 dst[optype][3] =
420 src[optype][0][3] * src[optype][1][3] +
421 src[optype][2][3];
422 break;
423 case GL_LERP_ATI:
424 if (!optype)
425 for (i = 0; i < 3; i++) {
426 dst[optype][i] =
427 src[optype][0][i] * src[optype][1][i] + (1 -
428 src
429 [optype]
430 [0][i]) *
431 src[optype][2][i];
432 }
433 else
434 dst[optype][3] =
435 src[optype][0][3] * src[optype][1][3] + (1 -
436 src[optype]
437 [0][3]) *
438 src[optype][2][3];
439 break;
440
441 case GL_MOV_ATI:
442 if (!optype)
443 for (i = 0; i < 3; i++) {
444 dst[optype][i] = src[optype][0][i];
445 }
446 else
447 dst[optype][3] = src[optype][0][3];
448 break;
449 case GL_CND_ATI:
450 if (!optype) {
451 for (i = 0; i < 3; i++) {
452 dst[optype][i] =
453 (src[optype][2][i] >
454 0.5) ? src[optype][0][i] : src[optype][1][i];
455 }
456 }
457 else {
458 dst[optype][3] =
459 (src[optype][2][3] >
460 0.5) ? src[optype][0][3] : src[optype][1][3];
461 }
462 break;
463
464 case GL_CND0_ATI:
465 if (!optype)
466 for (i = 0; i < 3; i++) {
467 dst[optype][i] =
468 (src[optype][2][i] >=
469 0) ? src[optype][0][i] : src[optype][1][i];
470 }
471 else {
472 dst[optype][3] =
473 (src[optype][2][3] >=
474 0) ? src[optype][0][3] : src[optype][1][3];
475 }
476 break;
477 case GL_DOT2_ADD_ATI:
478 {
479 GLfloat result;
480
481 /* DOT 2 always uses the source from the color op */
482 result = src[0][0][0] * src[0][1][0] +
483 src[0][0][1] * src[0][1][1] + src[0][2][2];
484 if (!optype) {
485 for (i = 0; i < 3; i++) {
486 dst[optype][i] = result;
487 }
488 }
489 else
490 dst[optype][3] = result;
491
492 }
493 break;
494 case GL_DOT3_ATI:
495 {
496 GLfloat result;
497
498 /* DOT 3 always uses the source from the color op */
499 result = src[0][0][0] * src[0][1][0] +
500 src[0][0][1] * src[0][1][1] +
501 src[0][0][2] * src[0][1][2];
502
503 if (!optype) {
504 for (i = 0; i < 3; i++) {
505 dst[optype][i] = result;
506 }
507 }
508 else
509 dst[optype][3] = result;
510 }
511 break;
512 case GL_DOT4_ATI:
513 {
514 GLfloat result;
515
516 /* DOT 4 always uses the source from the color op */
517 result = src[optype][0][0] * src[0][1][0] +
518 src[0][0][1] * src[0][1][1] +
519 src[0][0][2] * src[0][1][2] +
520 src[0][0][3] * src[0][1][3];
521 if (!optype) {
522 for (i = 0; i < 3; i++) {
523 dst[optype][i] = result;
524 }
525 }
526 else
527 dst[optype][3] = result;
528 }
529 break;
530
531 }
532 }
533 }
534
535 /* write out the destination registers */
536 for (optype = 0; optype < 2; optype++) {
537 if (inst->Opcode[optype]) {
538 dstreg = inst->DstReg[optype].Index;
539 dstp = machine->Registers[dstreg - GL_REG_0_ATI];
540
541 write_dst_addr(optype, inst->DstReg[optype].dstMod,
542 inst->DstReg[optype].dstMask, dst[optype],
543 dstp);
544 }
545 }
546 }
547 }
548 return GL_TRUE;
549 }
550
551 static void
552 init_machine(GLcontext * ctx, struct atifs_machine *machine,
553 const struct ati_fragment_shader *shader,
554 const struct sw_span *span, GLuint col)
555 {
556 GLint i, j;
557
558 for (i = 0; i < 6; i++) {
559 for (j = 0; j < 4; j++)
560 ctx->ATIFragmentShader.Machine.Registers[i][j] = 0.0;
561
562 }
563
564 ctx->ATIFragmentShader.Machine.Inputs[ATI_FS_INPUT_PRIMARY][0] =
565 CHAN_TO_FLOAT(span->array->rgba[col][0]);
566 ctx->ATIFragmentShader.Machine.Inputs[ATI_FS_INPUT_PRIMARY][1] =
567 CHAN_TO_FLOAT(span->array->rgba[col][1]);
568 ctx->ATIFragmentShader.Machine.Inputs[ATI_FS_INPUT_PRIMARY][2] =
569 CHAN_TO_FLOAT(span->array->rgba[col][2]);
570 ctx->ATIFragmentShader.Machine.Inputs[ATI_FS_INPUT_PRIMARY][3] =
571 CHAN_TO_FLOAT(span->array->rgba[col][3]);
572
573 ctx->ATIFragmentShader.Machine.Inputs[ATI_FS_INPUT_SECONDARY][0] =
574 CHAN_TO_FLOAT(span->array->spec[col][0]);
575 ctx->ATIFragmentShader.Machine.Inputs[ATI_FS_INPUT_SECONDARY][1] =
576 CHAN_TO_FLOAT(span->array->spec[col][1]);
577 ctx->ATIFragmentShader.Machine.Inputs[ATI_FS_INPUT_SECONDARY][2] =
578 CHAN_TO_FLOAT(span->array->spec[col][2]);
579 ctx->ATIFragmentShader.Machine.Inputs[ATI_FS_INPUT_SECONDARY][3] =
580 CHAN_TO_FLOAT(span->array->spec[col][3]);
581
582 ctx->ATIFragmentShader.Machine.pass = 0;
583 }
584
585
586
587 /**
588 * Execute the current fragment program, operating on the given span.
589 */
590 void
591 _swrast_exec_fragment_shader(GLcontext * ctx, struct sw_span *span)
592 {
593 const struct ati_fragment_shader *shader = ctx->ATIFragmentShader.Current;
594 GLuint i;
595
596 ctx->_CurrentProgram = GL_FRAGMENT_SHADER_ATI;
597
598 for (i = 0; i < span->end; i++) {
599 if (span->array->mask[i]) {
600 init_machine(ctx, &ctx->ATIFragmentShader.Machine,
601 ctx->ATIFragmentShader.Current, span, i);
602
603 if (execute_shader(ctx, shader, ~0,
604 &ctx->ATIFragmentShader.Machine, span, i)) {
605 span->array->mask[i] = GL_FALSE;
606 }
607
608 {
609 const GLfloat *colOut =
610 ctx->ATIFragmentShader.Machine.Registers[0];
611
612 /*fprintf(stderr,"outputs %f %f %f %f\n", colOut[0], colOut[1], colOut[2], colOut[3]); */
613 UNCLAMPED_FLOAT_TO_CHAN(span->array->rgba[i][RCOMP], colOut[0]);
614 UNCLAMPED_FLOAT_TO_CHAN(span->array->rgba[i][GCOMP], colOut[1]);
615 UNCLAMPED_FLOAT_TO_CHAN(span->array->rgba[i][BCOMP], colOut[2]);
616 UNCLAMPED_FLOAT_TO_CHAN(span->array->rgba[i][ACOMP], colOut[3]);
617 }
618 }
619
620 }
621
622
623 ctx->_CurrentProgram = 0;
624
625 }