2 * Mesa 3-D graphics library
5 * Copyright (C) 1999-2004 Brian Paul All Rights Reserved.
7 * Permission is hereby granted, free of charge, to any person obtaining a
8 * copy of this software and associated documentation files (the "Software"),
9 * to deal in the Software without restriction, including without limitation
10 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
11 * and/or sell copies of the Software, and to permit persons to whom the
12 * Software is furnished to do so, subject to the following conditions:
14 * The above copyright notice and this permission notice shall be included
15 * in all copies or substantial portions of the Software.
17 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
18 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
19 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
20 * BRIAN PAUL BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN
21 * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
22 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
25 /* An amusing little utility to print ARB fragment programs out as a C
26 * function. Resulting code not tested except visually.
33 #include "nvfragprog.h"
37 #include "s_nvfragprog.h"
39 #include "s_texture.h"
42 /* UREG - a way of representing an FP source register including
43 * swizzling and negation in a single GLuint. Major flaw is the
44 * limitiation to source->Index < 32. Secondary flaw is the fact that
45 * it's overkill & we could probably just pass around the original
49 #define UREG_TYPE_TEMP 0
50 #define UREG_TYPE_INTERP 1
51 #define UREG_TYPE_LOCAL_CONST 2
52 #define UREG_TYPE_ENV_CONST 3
53 #define UREG_TYPE_STATE_CONST 4
54 #define UREG_TYPE_PARAM 5
55 #define UREG_TYPE_OUTPUT 6
56 #define UREG_TYPE_MASK 0x7
58 #define UREG_TYPE_SHIFT 29
59 #define UREG_NR_SHIFT 24
60 #define UREG_NR_MASK 0x1f /* 31 */
61 #define UREG_CHANNEL_X_NEGATE_SHIFT 23
62 #define UREG_CHANNEL_X_SHIFT 20
63 #define UREG_CHANNEL_Y_NEGATE_SHIFT 19
64 #define UREG_CHANNEL_Y_SHIFT 16
65 #define UREG_CHANNEL_Z_NEGATE_SHIFT 15
66 #define UREG_CHANNEL_Z_SHIFT 12
67 #define UREG_CHANNEL_W_NEGATE_SHIFT 11
68 #define UREG_CHANNEL_W_SHIFT 8
69 #define UREG_CHANNEL_ZERO_NEGATE_MBZ 5
70 #define UREG_CHANNEL_ZERO_SHIFT 4
71 #define UREG_CHANNEL_ONE_NEGATE_MBZ 1
72 #define UREG_CHANNEL_ONE_SHIFT 0
74 #define UREG_BAD 0xffffffff /* not a valid ureg */
80 #define _ZERO 4 /* NOTE! */
81 #define _ONE 5 /* NOTE! */
86 #define UREG( type, nr ) (((type)<< UREG_TYPE_SHIFT) | \
87 ((nr) << UREG_NR_SHIFT) | \
88 (_X << UREG_CHANNEL_X_SHIFT) | \
89 (_Y << UREG_CHANNEL_Y_SHIFT) | \
90 (_Z << UREG_CHANNEL_Z_SHIFT) | \
91 (_W << UREG_CHANNEL_W_SHIFT) | \
92 (_ZERO << UREG_CHANNEL_ZERO_SHIFT) | \
93 (_ONE << UREG_CHANNEL_ONE_SHIFT))
95 #define GET_CHANNEL_SRC( reg, channel ) ((reg<<(channel*4)) & \
96 (0xf<<UREG_CHANNEL_X_SHIFT))
97 #define CHANNEL_SRC( src, channel ) (src>>(channel*4))
99 #define GET_UREG_TYPE(reg) (((reg)>>UREG_TYPE_SHIFT)&UREG_TYPE_MASK)
100 #define GET_UREG_NR(reg) (((reg)>>UREG_NR_SHIFT)&UREG_NR_MASK)
104 #define UREG_XYZW_CHANNEL_MASK 0x00ffff00
106 #define deref(reg,pos) swizzle(reg, pos, pos, pos, pos)
109 static INLINE
int is_swizzled( int reg
)
111 return ((reg
& UREG_XYZW_CHANNEL_MASK
) !=
112 (UREG(0,0) & UREG_XYZW_CHANNEL_MASK
));
116 /* One neat thing about the UREG representation:
118 static INLINE
int swizzle( int reg
, int x
, int y
, int z
, int w
)
120 return ((reg
& ~UREG_XYZW_CHANNEL_MASK
) |
121 CHANNEL_SRC( GET_CHANNEL_SRC( reg
, x
), 0 ) |
122 CHANNEL_SRC( GET_CHANNEL_SRC( reg
, y
), 1 ) |
123 CHANNEL_SRC( GET_CHANNEL_SRC( reg
, z
), 2 ) |
124 CHANNEL_SRC( GET_CHANNEL_SRC( reg
, w
), 3 ));
127 /* Another neat thing about the UREG representation:
129 static INLINE
int negate( int reg
, int x
, int y
, int z
, int w
)
131 return reg
^ (((x
&1)<<UREG_CHANNEL_X_NEGATE_SHIFT
)|
132 ((y
&1)<<UREG_CHANNEL_Y_NEGATE_SHIFT
)|
133 ((z
&1)<<UREG_CHANNEL_Z_NEGATE_SHIFT
)|
134 ((w
&1)<<UREG_CHANNEL_W_NEGATE_SHIFT
));
139 static GLuint
src_reg_file( GLuint file
)
142 case PROGRAM_TEMPORARY
: return UREG_TYPE_TEMP
;
143 case PROGRAM_INPUT
: return UREG_TYPE_INTERP
;
144 case PROGRAM_LOCAL_PARAM
: return UREG_TYPE_LOCAL_CONST
;
145 case PROGRAM_ENV_PARAM
: return UREG_TYPE_ENV_CONST
;
147 case PROGRAM_STATE_VAR
: return UREG_TYPE_STATE_CONST
;
148 case PROGRAM_NAMED_PARAM
: return UREG_TYPE_PARAM
;
149 default: return UREG_BAD
;
153 static void emit( struct fragment_program
*p
,
160 p
->c_strlen
+= vsnprintf( p
->c_str
+ p
->c_strlen
,
161 sizeof(p
->c_str
) - p
->c_strlen
,
167 static INLINE
void emit_char( struct fragment_program
*p
, char c
)
169 if (p
->c_strlen
< sizeof(p
->c_str
))
170 p
->c_str
[p
->c_strlen
] = c
;
177 * Retrieve a ureg for the given source register. Will emit
178 * constants, apply swizzling and negation as needed.
180 static GLuint
src_vector( const struct fp_src_register
*source
)
184 assert(source
->Index
< 32); /* limitiation of UREG representation */
186 src
= UREG( src_reg_file( source
->File
), source
->Index
);
189 _X
+ source
->Swizzle
[0],
190 _X
+ source
->Swizzle
[1],
191 _X
+ source
->Swizzle
[2],
192 _X
+ source
->Swizzle
[3]);
194 if (source
->NegateBase
)
195 src
= negate( src
, 1,1,1,1 );
201 static void print_header( struct fragment_program
*p
)
205 /* Texture samplers, not written yet:
207 emit(p
, "extern void TEX( void *ctx, const float *txc, int unit, float *rslt );\n"
208 "extern void TXB( void *ctx, const float *txc, int unit, float *rslt );\n"
209 "extern void TXP( void *ctx, const float *txc, int unit, float *rslt );\n");
211 /* Resort to the standard math library (float versions):
213 emit(p
, "extern float fabsf( float );\n"
214 "extern float cosf( float );\n"
215 "extern float sinf( float );\n"
216 "extern float expf( float );\n"
217 "extern float powf( float, float );\n"
218 "extern float floorf( float );\n");
220 /* These ones we have fast code in Mesa for:
222 emit(p
, "extern float LOG2( float );\n"
223 "extern float _mesa_inv_sqrtf( float );\n");
225 /* The usual macros, not really needed, but handy:
227 emit(p
, "#define MIN2(x,y) ((x)<(y)?(x):(y))\n"
228 "#define MAX2(x,y) ((x)<(y)?(x):(y))\n");
232 emit(p
, "void run_program( void *ctx, \n"
233 " const float (*local_param)[4], \n"
234 " const float (*env_param)[4], \n"
235 " const float (*state_param)[4], \n"
236 " const float (*interp)[4], \n"
237 " float (*outputs)[4])\n"
239 " float temp[32][4];\n"
243 static void print_footer( struct fragment_program
*p
)
248 static void print_dest_reg( struct fragment_program
*p
,
249 const struct fp_instruction
*inst
)
251 switch (inst
->DstReg
.File
) {
253 emit(p
, "outputs[%d]", inst
->DstReg
.Index
);
255 case PROGRAM_TEMPORARY
:
256 emit(p
, "temp[%d]", inst
->DstReg
.Index
);
263 static void print_dest( struct fragment_program
*p
,
264 const struct fp_instruction
*inst
,
267 print_dest_reg(p
, inst
);
268 emit(p
, "[%d]", idx
);
272 #define UREG_SRC0(reg) (((reg)>>UREG_CHANNEL_X_SHIFT) & 0x7)
274 static void print_reg( struct fragment_program
*p
,
277 switch (GET_UREG_TYPE(arg
)) {
278 case UREG_TYPE_TEMP
: emit(p
, "temp"); break;
279 case UREG_TYPE_INTERP
: emit(p
, "interp"); break;
280 case UREG_TYPE_LOCAL_CONST
: emit(p
, "local_const"); break;
281 case UREG_TYPE_ENV_CONST
: emit(p
, "env_const"); break;
282 case UREG_TYPE_STATE_CONST
: emit(p
, "state_const"); break;
283 case UREG_TYPE_PARAM
: emit(p
, "param"); break;
286 emit(p
, "[%d]", GET_UREG_NR(arg
));
290 static void print_arg( struct fragment_program
*p
,
293 GLuint src
= UREG_SRC0(arg
);
300 if (arg
& (1<<UREG_CHANNEL_X_NEGATE_SHIFT
))
308 if (GET_UREG_TYPE(arg
) == UREG_TYPE_STATE_CONST
) {
309 emit(p
, "%g", p
->Parameters
->Parameters
[GET_UREG_NR(arg
)].Values
[src
]);
316 case _X
: emit(p
, "[0]"); break;
317 case _Y
: emit(p
, "[1]"); break;
318 case _Z
: emit(p
, "[2]"); break;
319 case _W
: emit(p
, "[3]"); break;
324 /* This is where the handling of expressions breaks down into string
327 static void print_expression( struct fragment_program
*p
,
333 if (*fmt
== '%' && *(fmt
+1) == 's') {
334 int reg
= va_arg(ap
, int);
336 /* Use of deref() is a bit of a hack:
338 print_arg( p
, deref(reg
, i
) );
350 static void do_tex_simple( struct fragment_program
*p
,
351 const struct fp_instruction
*inst
,
352 const char *fn
, GLuint texunit
, GLuint arg
)
354 emit(p
, " %s( ctx, ", fn
);
356 emit(p
, ", %d, ", texunit
);
357 print_dest_reg(p
, inst
);
362 static void do_tex( struct fragment_program
*p
,
363 const struct fp_instruction
*inst
,
364 const char *fn
, GLuint texunit
, GLuint arg
)
367 GLboolean need_tex
= GL_FALSE
, need_result
= GL_FALSE
;
369 for (i
= 0; i
< 4; i
++)
370 if (!inst
->DstReg
.WriteMask
[i
])
371 need_result
= GL_TRUE
;
373 if (is_swizzled(arg
))
376 if (!need_tex
&& !need_result
) {
377 do_tex_simple( p
, inst
, fn
, texunit
, arg
);
382 emit(p
, " float texcoord[4];\n");
383 emit(p
, " float result[4];\n");
385 for (i
= 0; i
< 4; i
++) {
386 emit(p
, " texcoord[%d] = ", i
);
387 print_arg( p
, deref(arg
, i
) );
391 emit(p
, " %s( ctx, texcoord, %d, result);\n", fn
, texunit
);
393 for (i
= 0; i
< 4; i
++) {
394 if (inst
->DstReg
.WriteMask
[i
]) {
396 print_dest(p
, inst
, i
);
397 emit(p
, " = result[%d];\n", i
);
405 static void saturate( struct fragment_program
*p
,
406 const struct fp_instruction
*inst
,
410 print_dest(p
, inst
, i
);
411 emit(p
, " = CLAMPF( ");
412 print_dest(p
, inst
, i
);
413 emit(p
, ", 0.0, 1.0);\n");
416 static void assign_single( GLuint i
,
417 struct fragment_program
*p
,
418 const struct fp_instruction
*inst
,
425 if (inst
->DstReg
.WriteMask
[i
]) {
427 print_dest(p
, inst
, i
);
429 print_expression( p
, i
, fmt
, ap
);
431 saturate(p
, inst
, i
);
437 static void assign4( struct fragment_program
*p
,
438 const struct fp_instruction
*inst
,
446 for (i
= 0; i
< 4; i
++)
447 if (inst
->DstReg
.WriteMask
[i
]) {
449 print_dest(p
, inst
, i
);
451 print_expression( p
, i
, fmt
, ap
);
453 saturate(p
, inst
, i
);
459 static void assign4_replicate( struct fragment_program
*p
,
460 const struct fp_instruction
*inst
,
468 for (i
= 0; i
< 4; i
++)
469 if (inst
->DstReg
.WriteMask
[i
]) {
481 print_dest(p
, inst
, first
);
483 print_expression( p
, 0, fmt
, ap
);
485 saturate(p
, inst
, first
);
488 for (i
= first
+1; i
< 4; i
++)
489 if (inst
->DstReg
.WriteMask
[i
]) {
491 print_dest(p
, inst
, i
);
493 print_dest(p
, inst
, first
);
501 static GLuint
nr_args( GLuint opcode
)
504 case FP_OPCODE_ABS
: return 1;
505 case FP_OPCODE_ADD
: return 2;
506 case FP_OPCODE_CMP
: return 3;
507 case FP_OPCODE_COS
: return 1;
508 case FP_OPCODE_DP3
: return 2;
509 case FP_OPCODE_DP4
: return 2;
510 case FP_OPCODE_DPH
: return 2;
511 case FP_OPCODE_DST
: return 2;
512 case FP_OPCODE_EX2
: return 1;
513 case FP_OPCODE_FLR
: return 1;
514 case FP_OPCODE_FRC
: return 1;
515 case FP_OPCODE_KIL
: return 1;
516 case FP_OPCODE_LG2
: return 1;
517 case FP_OPCODE_LIT
: return 1;
518 case FP_OPCODE_LRP
: return 3;
519 case FP_OPCODE_MAD
: return 3;
520 case FP_OPCODE_MAX
: return 2;
521 case FP_OPCODE_MIN
: return 2;
522 case FP_OPCODE_MOV
: return 1;
523 case FP_OPCODE_MUL
: return 2;
524 case FP_OPCODE_POW
: return 2;
525 case FP_OPCODE_RCP
: return 1;
526 case FP_OPCODE_RSQ
: return 1;
527 case FP_OPCODE_SCS
: return 1;
528 case FP_OPCODE_SGE
: return 2;
529 case FP_OPCODE_SIN
: return 1;
530 case FP_OPCODE_SLT
: return 2;
531 case FP_OPCODE_SUB
: return 2;
532 case FP_OPCODE_SWZ
: return 1;
533 case FP_OPCODE_TEX
: return 1;
534 case FP_OPCODE_TXB
: return 1;
535 case FP_OPCODE_TXP
: return 1;
536 case FP_OPCODE_XPD
: return 2;
543 static void translate_program( struct fragment_program
*p
)
545 const struct fp_instruction
*inst
= p
->Instructions
;
547 for (; inst
->Opcode
!= FP_OPCODE_END
; inst
++) {
550 GLuint nr
= nr_args( inst
->Opcode
);
552 for (i
= 0; i
< nr
; i
++)
553 src
[i
] = src_vector( &inst
->SrcReg
[i
] );
555 /* Print the original program instruction string */
558 const char *s
= (const char *) p
->Base
.String
+ inst
->StringPos
;
567 switch (inst
->Opcode
) {
569 assign4(p
, inst
, "fabsf(%s)", src
[0]);
573 assign4(p
, inst
, "%s + %s", src
[0], src
[1]);
577 assign4(p
, inst
, "%s < 0.0F ? %s : %s", src
[0], src
[1], src
[2]);
581 assign4_replicate(p
, inst
, "COS(%s)", src
[0]);
585 assign4_replicate(p
, inst
,
586 "%s*%s + %s*%s + %s*%s",
596 assign4_replicate(p
, inst
,
597 "%s*%s + %s*%s + %s*%s + %s*%s",
607 assign4_replicate(p
, inst
,
608 "%s*%s + %s*%s + %s*%s + %s",
617 /* result[0] = 1 * 1;
618 * result[1] = a[1] * b[1];
619 * result[2] = a[2] * 1;
620 * result[3] = 1 * b[3];
622 assign_single(0, p
, inst
, "1.0");
624 assign_single(1, p
, inst
, "%s * %s",
625 deref(src
[0], _Y
), deref(src
[1], _Y
));
627 assign_single(2, p
, inst
, "%s", deref(src
[0], _Z
));
628 assign_single(3, p
, inst
, "%s", deref(src
[1], _W
));
632 assign4_replicate(p
, inst
, "powf(2.0, %s)", src
[0]);
636 assign4_replicate(p
, inst
, "floorf(%s)", src
[0]);
640 assign4_replicate(p
, inst
, "%s - floorf(%s)", src
[0], src
[0]);
648 assign4_replicate(p
, inst
, "LOG2(%s)", src
[0]);
652 assign_single(0, p
, inst
, "1.0");
653 assign_single(1, p
, inst
, "MIN2(%s, 0)", deref(src
[0], _X
));
654 assign_single(2, p
, inst
, "(%s > 0.0) ? expf(%s * MIN2(%s, 0)) : 0.0",
658 assign_single(3, p
, inst
, "1.0");
663 "%s * %s + (1.0 - %s) * %s",
664 src
[0], src
[1], src
[0], src
[2]);
668 assign4(p
, inst
, "%s * %s + %s", src
[0], src
[1], src
[2]);
672 assign4(p
, inst
, "MAX2(%s, %s)", src
[0], src
[1]);
676 assign4(p
, inst
, "MIN2(%s, %s)", src
[0], src
[1]);
680 assign4(p
, inst
, "%s", src
[0]);
684 assign4(p
, inst
, "%s * %s", src
[0], src
[1]);
688 assign4_replicate(p
, inst
, "powf(%s, %s)", src
[0], src
[1]);
692 assign4_replicate(p
, inst
, "1.0/%s", src
[0]);
696 assign4_replicate(p
, inst
, "_mesa_inv_sqrtf(%s)", src
[0]);
700 if (inst
->DstReg
.WriteMask
[0]) {
701 assign_single(0, p
, inst
, "cosf(%s)", deref(src
[0], _X
));
704 if (inst
->DstReg
.WriteMask
[1]) {
705 assign_single(1, p
, inst
, "sinf(%s)", deref(src
[0], _X
));
710 assign4(p
, inst
, "%s >= %s ? 1.0 : 0.0", src
[0], src
[1]);
714 assign4_replicate(p
, inst
, "sinf(%s)", src
[0]);
718 assign4(p
, inst
, "%s < %s ? 1.0 : 0.0", src
[0], src
[1]);
722 assign4(p
, inst
, "%s - %s", src
[0], src
[1]);
725 case FP_OPCODE_SWZ
: /* same implementation as MOV: */
726 assign4(p
, inst
, "%s", src
[0]);
730 do_tex(p
, inst
, "TEX", inst
->TexSrcUnit
, src
[0]);
734 do_tex(p
, inst
, "TXB", inst
->TexSrcUnit
, src
[0]);
738 do_tex(p
, inst
, "TXP", inst
->TexSrcUnit
, src
[0]);
743 * result.x = src[0].y * src[1].z - src[0].z * src[1].y;
744 * result.y = src[0].z * src[1].x - src[0].x * src[1].z;
745 * result.z = src[0].x * src[1].y - src[0].y * src[1].x;
750 swizzle(src
[0], _Y
, _Z
, _X
, _ONE
),
751 swizzle(src
[1], _Z
, _X
, _Y
, _ONE
),
752 swizzle(src
[0], _Z
, _X
, _Y
, _ONE
),
753 swizzle(src
[1], _Y
, _Z
, _X
, _ONE
));
757 emit(p
, "BOGUS OPCODE\n");
767 void _swrast_translate_program( GLcontext
*ctx
)
769 struct fragment_program
*p
= ctx
->FragmentProgram
.Current
;
775 translate_program( p
);
779 printf("C program length: %d/%d chars\n", p
->c_strlen
, strlen(p
->c_str
));