b649da2a3ed57e4349bdf5f1d7336d54108f4b15
[mesa.git] / src / mesa / swrast / s_fragprog_to_c.c
1 /*
2 * Mesa 3-D graphics library
3 * Version: 6.1
4 *
5 * Copyright (C) 1999-2004 Brian Paul All Rights Reserved.
6 *
7 * Permission is hereby granted, free of charge, to any person obtaining a
8 * copy of this software and associated documentation files (the "Software"),
9 * to deal in the Software without restriction, including without limitation
10 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
11 * and/or sell copies of the Software, and to permit persons to whom the
12 * Software is furnished to do so, subject to the following conditions:
13 *
14 * The above copyright notice and this permission notice shall be included
15 * in all copies or substantial portions of the Software.
16 *
17 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
18 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
19 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
20 * BRIAN PAUL BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN
21 * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
22 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
23 */
24
25 /* An amusing little utility to print ARB fragment programs out as a C
26 * function. Resulting code not tested except visually.
27 */
28
29
30 #include "glheader.h"
31 #include "colormac.h"
32 #include "context.h"
33 #include "nvfragprog.h"
34 #include "macros.h"
35 #include "program.h"
36
37 #include "s_nvfragprog.h"
38 #include "s_span.h"
39 #include "s_texture.h"
40
41
42 /* UREG - a way of representing an FP source register including
43 * swizzling and negation in a single GLuint. Major flaw is the
44 * limitiation to source->Index < 32. Secondary flaw is the fact that
45 * it's overkill & we could probably just pass around the original
46 * datatypes instead.
47 */
48
49 #define UREG_TYPE_TEMP 0
50 #define UREG_TYPE_INTERP 1
51 #define UREG_TYPE_LOCAL_CONST 2
52 #define UREG_TYPE_ENV_CONST 3
53 #define UREG_TYPE_STATE_CONST 4
54 #define UREG_TYPE_PARAM 5
55 #define UREG_TYPE_OUTPUT 6
56 #define UREG_TYPE_MASK 0x7
57
58 #define UREG_TYPE_SHIFT 29
59 #define UREG_NR_SHIFT 24
60 #define UREG_NR_MASK 0x1f /* 31 */
61 #define UREG_CHANNEL_X_NEGATE_SHIFT 23
62 #define UREG_CHANNEL_X_SHIFT 20
63 #define UREG_CHANNEL_Y_NEGATE_SHIFT 19
64 #define UREG_CHANNEL_Y_SHIFT 16
65 #define UREG_CHANNEL_Z_NEGATE_SHIFT 15
66 #define UREG_CHANNEL_Z_SHIFT 12
67 #define UREG_CHANNEL_W_NEGATE_SHIFT 11
68 #define UREG_CHANNEL_W_SHIFT 8
69 #define UREG_CHANNEL_ZERO_NEGATE_MBZ 5
70 #define UREG_CHANNEL_ZERO_SHIFT 4
71 #define UREG_CHANNEL_ONE_NEGATE_MBZ 1
72 #define UREG_CHANNEL_ONE_SHIFT 0
73
74 #define UREG_BAD 0xffffffff /* not a valid ureg */
75
76 #define _X 0
77 #define _Y 1
78 #define _Z 2
79 #define _W 3
80 #define _ZERO 4 /* NOTE! */
81 #define _ONE 5 /* NOTE! */
82
83
84 /* Construct a ureg:
85 */
86 #define UREG( type, nr ) (((type)<< UREG_TYPE_SHIFT) | \
87 ((nr) << UREG_NR_SHIFT) | \
88 (_X << UREG_CHANNEL_X_SHIFT) | \
89 (_Y << UREG_CHANNEL_Y_SHIFT) | \
90 (_Z << UREG_CHANNEL_Z_SHIFT) | \
91 (_W << UREG_CHANNEL_W_SHIFT) | \
92 (_ZERO << UREG_CHANNEL_ZERO_SHIFT) | \
93 (_ONE << UREG_CHANNEL_ONE_SHIFT))
94
95 #define GET_CHANNEL_SRC( reg, channel ) ((reg<<(channel*4)) & \
96 (0xf<<UREG_CHANNEL_X_SHIFT))
97 #define CHANNEL_SRC( src, channel ) (src>>(channel*4))
98
99 #define GET_UREG_TYPE(reg) (((reg)>>UREG_TYPE_SHIFT)&UREG_TYPE_MASK)
100 #define GET_UREG_NR(reg) (((reg)>>UREG_NR_SHIFT)&UREG_NR_MASK)
101
102
103
104 #define UREG_XYZW_CHANNEL_MASK 0x00ffff00
105
106 #define deref(reg,pos) swizzle(reg, pos, pos, pos, pos)
107
108
109 static INLINE int is_swizzled( int reg )
110 {
111 return ((reg & UREG_XYZW_CHANNEL_MASK) !=
112 (UREG(0,0) & UREG_XYZW_CHANNEL_MASK));
113 }
114
115
116 /* One neat thing about the UREG representation:
117 */
118 static INLINE int swizzle( int reg, int x, int y, int z, int w )
119 {
120 return ((reg & ~UREG_XYZW_CHANNEL_MASK) |
121 CHANNEL_SRC( GET_CHANNEL_SRC( reg, x ), 0 ) |
122 CHANNEL_SRC( GET_CHANNEL_SRC( reg, y ), 1 ) |
123 CHANNEL_SRC( GET_CHANNEL_SRC( reg, z ), 2 ) |
124 CHANNEL_SRC( GET_CHANNEL_SRC( reg, w ), 3 ));
125 }
126
127 /* Another neat thing about the UREG representation:
128 */
129 static INLINE int negate( int reg, int x, int y, int z, int w )
130 {
131 return reg ^ (((x&1)<<UREG_CHANNEL_X_NEGATE_SHIFT)|
132 ((y&1)<<UREG_CHANNEL_Y_NEGATE_SHIFT)|
133 ((z&1)<<UREG_CHANNEL_Z_NEGATE_SHIFT)|
134 ((w&1)<<UREG_CHANNEL_W_NEGATE_SHIFT));
135 }
136
137
138
139 static GLuint src_reg_file( GLuint file )
140 {
141 switch (file) {
142 case PROGRAM_TEMPORARY: return UREG_TYPE_TEMP;
143 case PROGRAM_INPUT: return UREG_TYPE_INTERP;
144 case PROGRAM_LOCAL_PARAM: return UREG_TYPE_LOCAL_CONST;
145 case PROGRAM_ENV_PARAM: return UREG_TYPE_ENV_CONST;
146
147 case PROGRAM_STATE_VAR: return UREG_TYPE_STATE_CONST;
148 case PROGRAM_NAMED_PARAM: return UREG_TYPE_PARAM;
149 default: return UREG_BAD;
150 }
151 }
152
153
154 /**
155 * Retrieve a ureg for the given source register. Will emit
156 * constants, apply swizzling and negation as needed.
157 */
158 static GLuint src_vector( const struct fp_src_register *source )
159 {
160 GLuint src;
161
162 /* fprintf(stderr, "%s File %d, Index %d\n", */
163 /* __FUNCTION__, source->File, source->Index); */
164
165 assert(source->Index < 32); /* limitiation of UREG representation */
166
167 src = UREG( src_reg_file( source->File ), source->Index );
168
169 src = swizzle(src,
170 _X + source->Swizzle[0],
171 _X + source->Swizzle[1],
172 _X + source->Swizzle[2],
173 _X + source->Swizzle[3]);
174
175 if (source->NegateBase)
176 src = negate( src, 1,1,1,1 );
177
178 return src;
179 }
180
181 static void print_header( void )
182 {
183 printf("static void run_program( const GLfloat (*local_param)[4], \n"
184 " const GLfloat (*env_param)[4], \n"
185 " const GLfloat (*state_param)[4], \n"
186 " const GLfloat (*interp)[4], \n"
187 " GLfloat *outputs)\n"
188 "{\n"
189 " GLfloat temp[32][4];\n"
190 );
191 }
192
193 static void print_footer( void )
194 {
195 printf("}\n");
196 }
197
198 static void print_dest_reg( const struct fp_instruction *inst )
199 {
200 switch (inst->DstReg.File) {
201 case PROGRAM_OUTPUT:
202 printf("outputs[%d]", inst->DstReg.Index);
203 break;
204 case PROGRAM_TEMPORARY:
205 printf("temp[%d]", inst->DstReg.Index);
206 break;
207 default:
208 break;
209 }
210 }
211
212 static void print_dest( const struct fp_instruction *inst,
213 GLuint idx )
214 {
215 print_dest_reg(inst);
216 printf("[%d]", idx);
217 }
218
219
220 #define UREG_SRC0(reg) (((reg)>>UREG_CHANNEL_X_SHIFT) & 0x7)
221
222 static void print_reg( GLuint arg )
223 {
224 switch (GET_UREG_TYPE(arg)) {
225 case UREG_TYPE_TEMP: printf("temp"); break;
226 case UREG_TYPE_INTERP: printf("interp"); break;
227 case UREG_TYPE_LOCAL_CONST: printf("local_const"); break;
228 case UREG_TYPE_ENV_CONST: printf("env_const"); break;
229 case UREG_TYPE_STATE_CONST: printf("state_const"); break;
230 case UREG_TYPE_PARAM: printf("param"); break;
231 };
232
233 printf("[%d]", GET_UREG_NR(arg));
234 }
235
236
237 static void print_arg( const struct fragment_program *p,
238 GLuint arg )
239 {
240 GLuint src = UREG_SRC0(arg);
241
242 if (src == _ZERO) {
243 printf("0");
244 return;
245 }
246
247 if (arg & (1<<UREG_CHANNEL_X_NEGATE_SHIFT))
248 printf("-");
249
250 if (src == _ONE) {
251 printf("1");
252 return;
253 }
254
255 if (GET_UREG_TYPE(arg) == UREG_TYPE_STATE_CONST) {
256 printf("%g", p->Parameters->Parameters[GET_UREG_NR(arg)].Values[src]);
257 return;
258 }
259
260 print_reg( arg );
261
262 switch (src){
263 case _X: printf("[0]"); break;
264 case _Y: printf("[1]"); break;
265 case _Z: printf("[2]"); break;
266 case _W: printf("[3]"); break;
267 }
268 }
269
270
271 /* This is where the handling of expressions breaks down into string
272 * processing:
273 */
274 static void print_expression( const struct fragment_program *p,
275 GLuint i,
276 const char *fmt,
277 va_list ap )
278 {
279 while (*fmt) {
280 if (*fmt == '%' && *(fmt+1) == 's') {
281 int reg = va_arg(ap, int);
282
283 /* Use of deref() is a bit of a hack:
284 */
285 print_arg( p, deref(reg, i) );
286 fmt += 2;
287 }
288 else {
289 putchar(*fmt);
290 fmt++;
291 }
292 }
293
294 printf(";\n");
295 }
296
297 static void do_tex_simple( const struct fragment_program *p,
298 const struct fp_instruction *inst,
299 const char *fn, GLuint texunit, GLuint arg )
300 {
301 printf(" %s( ctx, ", fn);
302 print_reg(arg);
303 printf(", %d, ", texunit );
304 print_dest_reg(inst);
305 printf(");\n");
306 }
307
308
309 static void do_tex( const struct fragment_program *p,
310 const struct fp_instruction *inst,
311 const char *fn, GLuint texunit, GLuint arg )
312 {
313 GLuint i;
314 GLboolean need_tex = GL_FALSE, need_result = GL_FALSE;
315
316 for (i = 0; i < 4; i++)
317 if (!inst->DstReg.WriteMask[i])
318 need_result = GL_TRUE;
319
320 if (is_swizzled(arg))
321 need_tex = GL_TRUE;
322
323 if (!need_tex && !need_result) {
324 do_tex_simple( p, inst, fn, texunit, arg );
325 return;
326 }
327
328 printf(" {\n");
329 printf(" GLfloat texcoord[4];\n");
330 printf(" GLfloat result[4];\n");
331
332 for (i = 0; i < 4; i++) {
333 printf(" texcoord[%d] = ", i);
334 print_arg( p, deref(arg, i) );
335 printf(";\n");
336 }
337
338 printf(" %s( ctx, texcoord, %d, result);\n", fn, texunit );
339
340 for (i = 0; i < 4; i++) {
341 if (inst->DstReg.WriteMask[i]) {
342 printf(" ");
343 print_dest(inst, i);
344 printf(" = result[%d];\n", i);
345 }
346 }
347
348 printf(" }\n");
349 }
350
351 static void assign_single( GLuint i,
352 const struct fragment_program *p,
353 const struct fp_instruction *inst,
354 const char *fmt,
355 ... )
356 {
357 va_list ap;
358 va_start( ap, fmt );
359
360 if (inst->DstReg.WriteMask[i]) {
361 printf(" ");
362 print_dest(inst, i);
363 printf(" = ");
364 print_expression( p, i, fmt, ap);
365 }
366
367 va_end( ap );
368 }
369
370 static void assign4( const struct fragment_program *p,
371 const struct fp_instruction *inst,
372 const char *fmt,
373 ... )
374 {
375 GLuint i;
376 va_list ap;
377 va_start( ap, fmt );
378
379 for (i = 0; i < 4; i++)
380 if (inst->DstReg.WriteMask[i]) {
381 printf(" ");
382 print_dest(inst, i);
383 printf(" = ");
384 print_expression( p, i, fmt, ap);
385 }
386
387 va_end( ap );
388 }
389
390 static void assign4_replicate( const struct fragment_program *p,
391 const struct fp_instruction *inst,
392 const char *fmt,
393 ... )
394 {
395 GLuint i;
396 GLboolean ok = 0;
397 va_list ap;
398
399 for (i = 0; i < 4; i++)
400 if (inst->DstReg.WriteMask[i])
401 ok = 1;
402
403 if (!ok) return;
404
405 va_start( ap, fmt );
406
407 printf(" ");
408
409 for (i = 0; i < 4; i++)
410 if (inst->DstReg.WriteMask[i]) {
411 print_dest(inst, i);
412 printf(" = ");
413 }
414
415 print_expression( p, 0, fmt, ap);
416
417 va_end( ap );
418 }
419
420
421
422
423
424
425 static GLuint nr_args( GLuint opcode )
426 {
427 switch (opcode) {
428 case FP_OPCODE_ABS: return 1;
429 case FP_OPCODE_ADD: return 2;
430 case FP_OPCODE_CMP: return 3;
431 case FP_OPCODE_COS: return 1;
432 case FP_OPCODE_DP3: return 2;
433 case FP_OPCODE_DP4: return 2;
434 case FP_OPCODE_DPH: return 2;
435 case FP_OPCODE_DST: return 2;
436 case FP_OPCODE_EX2: return 1;
437 case FP_OPCODE_FLR: return 1;
438 case FP_OPCODE_FRC: return 1;
439 case FP_OPCODE_KIL: return 1;
440 case FP_OPCODE_LG2: return 1;
441 case FP_OPCODE_LIT: return 1;
442 case FP_OPCODE_LRP: return 3;
443 case FP_OPCODE_MAD: return 3;
444 case FP_OPCODE_MAX: return 2;
445 case FP_OPCODE_MIN: return 2;
446 case FP_OPCODE_MOV: return 1;
447 case FP_OPCODE_MUL: return 2;
448 case FP_OPCODE_POW: return 2;
449 case FP_OPCODE_RCP: return 1;
450 case FP_OPCODE_RSQ: return 1;
451 case FP_OPCODE_SCS: return 1;
452 case FP_OPCODE_SGE: return 2;
453 case FP_OPCODE_SIN: return 1;
454 case FP_OPCODE_SLT: return 2;
455 case FP_OPCODE_SUB: return 2;
456 case FP_OPCODE_SWZ: return 1;
457 case FP_OPCODE_TEX: return 1;
458 case FP_OPCODE_TXB: return 1;
459 case FP_OPCODE_TXP: return 1;
460 case FP_OPCODE_XPD: return 2;
461 default: return 0;
462 }
463 }
464
465
466
467 static void upload_program( const struct fragment_program *p )
468 {
469 const struct fp_instruction *inst = p->Instructions;
470
471 for (; inst->Opcode != FP_OPCODE_END; inst++) {
472
473 GLuint src[3], i;
474 GLuint nr = nr_args( inst->Opcode );
475
476 for (i = 0; i < nr; i++)
477 src[i] = src_vector( &inst->SrcReg[i] );
478
479 /* Print the original program instruction string */
480 {
481 const char *s = (const char *) p->Base.String + inst->StringPos;
482 printf(" /* ");
483 while (*s != ';') {
484 putchar(*s);
485 s++;
486 }
487 printf("; */\n");
488 }
489
490 switch (inst->Opcode) {
491 case FP_OPCODE_ABS:
492 assign4(p, inst, "FABSF(%s)", src[0]);
493 break;
494
495 case FP_OPCODE_ADD:
496 assign4(p, inst, "%s + %s", src[0], src[1]);
497 break;
498
499 case FP_OPCODE_CMP:
500 assign4(p, inst, "%s < 0.0F ? %s : %s", src[0], src[1], src[2]);
501 break;
502
503 case FP_OPCODE_COS:
504 assign4_replicate(p, inst, "COS(%s)", src[0]);
505 break;
506
507 case FP_OPCODE_DP3:
508 assign4_replicate(p, inst,
509 "%s*%s + %s*%s + %s*%s",
510 deref(src[0],_X),
511 deref(src[1],_X),
512 deref(src[0],_Y),
513 deref(src[1],_Y),
514 deref(src[0],_Z),
515 deref(src[1],_Z));
516 break;
517
518 case FP_OPCODE_DP4:
519 assign4_replicate(p, inst,
520 "%s*%s + %s*%s + %s*%s + %s*%s",
521 deref(src[0],_X),
522 deref(src[1],_X),
523 deref(src[0],_Y),
524 deref(src[1],_Y),
525 deref(src[0],_Z),
526 deref(src[1],_Z));
527 break;
528
529 case FP_OPCODE_DPH:
530 assign4_replicate(p, inst,
531 "%s*%s + %s*%s + %s*%s + %s",
532 deref(src[0],_X),
533 deref(src[1],_X),
534 deref(src[0],_Y),
535 deref(src[1],_Y),
536 deref(src[1],_Z));
537 break;
538
539 case FP_OPCODE_DST:
540 /* result[0] = 1 * 1;
541 * result[1] = a[1] * b[1];
542 * result[2] = a[2] * 1;
543 * result[3] = 1 * b[3];
544 *
545 * Here we hope that the compiler can optimize away "x*1" to "x".
546 */
547 assign4(p, inst,
548 "%s*%s",
549 swizzle(src[0], _ONE, _Y, _Z, _ONE),
550 swizzle(src[1], _ONE, _Y, _ONE, _W ));
551 break;
552
553 case FP_OPCODE_EX2:
554 assign4_replicate(p, inst, "EX2(%s)", src[0]);
555 break;
556
557 case FP_OPCODE_FLR:
558 assign4_replicate(p, inst, "FLR(%s)", src[0]);
559 break;
560
561 case FP_OPCODE_FRC:
562 assign4_replicate(p, inst, "FRC(%s)", src[0]);
563 break;
564
565 case FP_OPCODE_KIL:
566 /* TODO */
567 break;
568
569 case FP_OPCODE_LG2:
570 assign4_replicate(p, inst, "LOG(%s)", deref(src[0], _X));
571 break;
572
573 case FP_OPCODE_LIT:
574 assign_single(0, p, inst, "1.0");
575 assign_single(1, p, inst, "MIN2(%s, 0)", deref(src[0], _X));
576 assign_single(2, p, inst, "(%s > 0.0) ? EXP(%s * MIN2(%s, 0)) : 0.0",
577 deref(src[0], _X),
578 deref(src[0], _Z),
579 deref(src[0], _Y));
580 assign_single(3, p, inst, "1.0");
581 break;
582
583 case FP_OPCODE_LRP:
584 assign4(p, inst,
585 "%s * %s + (1.0 - %s) * %s",
586 src[0], src[1], src[0], src[2]);
587 break;
588
589 case FP_OPCODE_MAD:
590 assign4(p, inst, "%s * %s + %s", src[0], src[1], src[2]);
591 break;
592
593 case FP_OPCODE_MAX:
594 assign4(p, inst, "MAX2(%s, %s)", src[0], src[1]);
595 break;
596
597 case FP_OPCODE_MIN:
598 assign4(p, inst, "MIN2(%s, %s)", src[0], src[1]);
599 break;
600
601 case FP_OPCODE_MOV:
602 assign4(p, inst, "%s", src[0]);
603 break;
604
605 case FP_OPCODE_MUL:
606 assign4(p, inst, "%s * %s", src[0], src[1]);
607 break;
608
609 case FP_OPCODE_POW:
610 assign4_replicate(p, inst, "POW(%s, %s)",
611 deref(src[0], _X),
612 deref(src[1], _X));
613 break;
614
615 case FP_OPCODE_RCP:
616 assign4_replicate(p, inst, "1.0/%s", deref(src[0], _X));
617 break;
618
619 case FP_OPCODE_RSQ:
620 assign4_replicate(p, inst, "INV_SQRTF(%s)", deref(src[0], _X));
621 break;
622
623 case FP_OPCODE_SCS:
624 if (inst->DstReg.WriteMask[0]) {
625 assign_single(0, p, inst, "COS(%s)", deref(src[0], _X));
626 }
627
628 if (inst->DstReg.WriteMask[1]) {
629 assign_single(1, p, inst, "SIN(%s)", deref(src[0], _X));
630 }
631 break;
632
633 case FP_OPCODE_SGE:
634 assign4(p, inst, "%s >= %s ? 1.0 : 0.0", src[0], src[1]);
635 break;
636
637 case FP_OPCODE_SIN:
638 assign4_replicate(p, inst, "SIN(%s)", deref(src[0], _X));
639 break;
640
641 case FP_OPCODE_SLT:
642 assign4(p, inst, "%s < %s ? 1.0 : 0.0", src[0], src[1]);
643 break;
644
645 case FP_OPCODE_SUB:
646 assign4(p, inst, "%s - %s", src[0], src[1]);
647 break;
648
649 case FP_OPCODE_SWZ: /* same implementation as MOV: */
650 assign4(p, inst, "%s", src[0]);
651 break;
652
653 case FP_OPCODE_TEX:
654 do_tex(p, inst, "TEX", inst->TexSrcUnit, src[0]);
655 break;
656
657 case FP_OPCODE_TXB:
658 do_tex(p, inst, "TXB", inst->TexSrcUnit, src[0]);
659 break;
660
661 case FP_OPCODE_TXP:
662 do_tex(p, inst, "TXP", inst->TexSrcUnit, src[0]);
663 break;
664
665 case FP_OPCODE_X2D:
666 /* Cross product:
667 * result.x = src[0].y * src[1].z - src[0].z * src[1].y;
668 * result.y = src[0].z * src[1].x - src[0].x * src[1].z;
669 * result.z = src[0].x * src[1].y - src[0].y * src[1].x;
670 * result.w = undef;
671 */
672 assign4(p, inst,
673 "%s * %s - %s * %s",
674 swizzle(src[0], _Y, _Z, _X, _ONE),
675 swizzle(src[1], _Z, _X, _Y, _ONE),
676 swizzle(src[0], _Z, _X, _Y, _ONE),
677 swizzle(src[1], _Y, _Z, _X, _ONE));
678 break;
679
680 default:
681 return;
682 }
683 }
684 }
685
686
687
688
689
690 void _swrast_translate_program( GLcontext *ctx )
691 {
692 if (ctx->FragmentProgram.Current) {
693 print_header();
694 upload_program( ctx->FragmentProgram.Current );
695 print_footer();
696 }
697 }
698