2 * Mesa 3-D graphics library
5 * Copyright (C) 1999-2004 Brian Paul All Rights Reserved.
7 * Permission is hereby granted, free of charge, to any person obtaining a
8 * copy of this software and associated documentation files (the "Software"),
9 * to deal in the Software without restriction, including without limitation
10 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
11 * and/or sell copies of the Software, and to permit persons to whom the
12 * Software is furnished to do so, subject to the following conditions:
14 * The above copyright notice and this permission notice shall be included
15 * in all copies or substantial portions of the Software.
17 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
18 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
19 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
20 * BRIAN PAUL BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN
21 * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
22 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
25 /* An amusing little utility to print ARB fragment programs out as a C
26 * function. Resulting code not tested except visually.
33 #include "nvfragprog.h"
37 #include "s_nvfragprog.h"
39 #include "s_texture.h"
44 /* UREG - a way of representing an FP source register including
45 * swizzling and negation in a single GLuint. Major flaw is the
46 * limitiation to source->Index < 32. Secondary flaw is the fact that
47 * it's overkill & we could probably just pass around the original
51 #define UREG_TYPE_TEMP 0
52 #define UREG_TYPE_INTERP 1
53 #define UREG_TYPE_LOCAL_CONST 2
54 #define UREG_TYPE_ENV_CONST 3
55 #define UREG_TYPE_STATE_CONST 4
56 #define UREG_TYPE_PARAM 5
57 #define UREG_TYPE_OUTPUT 6
58 #define UREG_TYPE_MASK 0x7
60 #define UREG_TYPE_SHIFT 29
61 #define UREG_NR_SHIFT 24
62 #define UREG_NR_MASK 0x1f /* 31 */
63 #define UREG_CHANNEL_X_NEGATE_SHIFT 23
64 #define UREG_CHANNEL_X_SHIFT 20
65 #define UREG_CHANNEL_Y_NEGATE_SHIFT 19
66 #define UREG_CHANNEL_Y_SHIFT 16
67 #define UREG_CHANNEL_Z_NEGATE_SHIFT 15
68 #define UREG_CHANNEL_Z_SHIFT 12
69 #define UREG_CHANNEL_W_NEGATE_SHIFT 11
70 #define UREG_CHANNEL_W_SHIFT 8
71 #define UREG_CHANNEL_ZERO_NEGATE_MBZ 5
72 #define UREG_CHANNEL_ZERO_SHIFT 4
73 #define UREG_CHANNEL_ONE_NEGATE_MBZ 1
74 #define UREG_CHANNEL_ONE_SHIFT 0
76 #define UREG_BAD 0xffffffff /* not a valid ureg */
82 #define _ZERO 4 /* NOTE! */
83 #define _ONE 5 /* NOTE! */
88 #define UREG( type, nr ) (((type)<< UREG_TYPE_SHIFT) | \
89 ((nr) << UREG_NR_SHIFT) | \
90 (_X << UREG_CHANNEL_X_SHIFT) | \
91 (_Y << UREG_CHANNEL_Y_SHIFT) | \
92 (_Z << UREG_CHANNEL_Z_SHIFT) | \
93 (_W << UREG_CHANNEL_W_SHIFT) | \
94 (_ZERO << UREG_CHANNEL_ZERO_SHIFT) | \
95 (_ONE << UREG_CHANNEL_ONE_SHIFT))
97 #define GET_CHANNEL_SRC( reg, channel ) ((reg<<(channel*4)) & \
98 (0xf<<UREG_CHANNEL_X_SHIFT))
99 #define CHANNEL_SRC( src, channel ) (src>>(channel*4))
101 #define GET_UREG_TYPE(reg) (((reg)>>UREG_TYPE_SHIFT)&UREG_TYPE_MASK)
102 #define GET_UREG_NR(reg) (((reg)>>UREG_NR_SHIFT)&UREG_NR_MASK)
106 #define UREG_XYZW_CHANNEL_MASK 0x00ffff00
108 #define deref(reg,pos) swizzle(reg, pos, pos, pos, pos)
111 static INLINE
int is_swizzled( int reg
)
113 return ((reg
& UREG_XYZW_CHANNEL_MASK
) !=
114 (UREG(0,0) & UREG_XYZW_CHANNEL_MASK
));
118 /* One neat thing about the UREG representation:
120 static INLINE
int swizzle( int reg
, int x
, int y
, int z
, int w
)
122 return ((reg
& ~UREG_XYZW_CHANNEL_MASK
) |
123 CHANNEL_SRC( GET_CHANNEL_SRC( reg
, x
), 0 ) |
124 CHANNEL_SRC( GET_CHANNEL_SRC( reg
, y
), 1 ) |
125 CHANNEL_SRC( GET_CHANNEL_SRC( reg
, z
), 2 ) |
126 CHANNEL_SRC( GET_CHANNEL_SRC( reg
, w
), 3 ));
129 /* Another neat thing about the UREG representation:
131 static INLINE
int negate( int reg
, int x
, int y
, int z
, int w
)
133 return reg
^ (((x
&1)<<UREG_CHANNEL_X_NEGATE_SHIFT
)|
134 ((y
&1)<<UREG_CHANNEL_Y_NEGATE_SHIFT
)|
135 ((z
&1)<<UREG_CHANNEL_Z_NEGATE_SHIFT
)|
136 ((w
&1)<<UREG_CHANNEL_W_NEGATE_SHIFT
));
141 static GLuint
src_reg_file( GLuint file
)
144 case PROGRAM_TEMPORARY
: return UREG_TYPE_TEMP
;
145 case PROGRAM_INPUT
: return UREG_TYPE_INTERP
;
146 case PROGRAM_LOCAL_PARAM
: return UREG_TYPE_LOCAL_CONST
;
147 case PROGRAM_ENV_PARAM
: return UREG_TYPE_ENV_CONST
;
149 case PROGRAM_STATE_VAR
: return UREG_TYPE_STATE_CONST
;
150 case PROGRAM_NAMED_PARAM
: return UREG_TYPE_PARAM
;
151 default: return UREG_BAD
;
155 static void emit( struct fragment_program
*p
,
162 if (p
->c_strlen
< sizeof(p
->c_str
))
163 p
->c_strlen
+= vsnprintf( p
->c_str
+ p
->c_strlen
,
164 sizeof(p
->c_str
) - p
->c_strlen
,
170 static INLINE
void emit_char( struct fragment_program
*p
, char c
)
172 if (p
->c_strlen
< sizeof(p
->c_str
)) {
173 p
->c_str
[p
->c_strlen
] = c
;
180 * Retrieve a ureg for the given source register. Will emit
181 * constants, apply swizzling and negation as needed.
183 static GLuint
src_vector( const struct fp_src_register
*source
)
187 assert(source
->Index
< 32); /* limitiation of UREG representation */
189 src
= UREG( src_reg_file( source
->File
), source
->Index
);
192 _X
+ source
->Swizzle
[0],
193 _X
+ source
->Swizzle
[1],
194 _X
+ source
->Swizzle
[2],
195 _X
+ source
->Swizzle
[3]);
197 if (source
->NegateBase
)
198 src
= negate( src
, 1,1,1,1 );
204 static void print_header( struct fragment_program
*p
)
208 /* Mesa's program_parameter struct:
211 "struct program_parameter\n"
213 " const char *Name;\n"
215 " int StateIndexes[6];\n"
216 " float Values[4];\n"
220 /* Texture samplers, not written yet:
222 emit(p
, "extern void TEX( void *ctx, const float *txc, int unit, float *rslt );\n"
223 "extern void TXB( void *ctx, const float *txc, int unit, float *rslt );\n"
224 "extern void TXP( void *ctx, const float *txc, int unit, float *rslt );\n");
226 /* Resort to the standard math library (float versions):
228 emit(p
, "extern float fabsf( float );\n"
229 "extern float cosf( float );\n"
230 "extern float sinf( float );\n"
231 "extern float expf( float );\n"
232 "extern float powf( float, float );\n"
233 "extern float floorf( float );\n");
235 /* These ones we have fast code in Mesa for:
237 emit(p
, "extern float LOG2( float );\n"
238 "extern float _mesa_inv_sqrtf( float );\n");
240 /* The usual macros, not really needed, but handy:
242 emit(p
, "#define MIN2(x,y) ((x)<(y)?(x):(y))\n"
243 "#define MAX2(x,y) ((x)<(y)?(x):(y))\n"
244 "#define SATURATE(x) ((x)>1.0?1.0:((x)<0.0?0.0:(x)))\n");
248 emit(p
, "int run_program( void *ctx, \n"
249 " const float (*local_param)[4], \n"
250 " const float (*env_param)[4], \n"
251 " const struct program_parameter *state_param, \n"
252 " const float (*interp)[4], \n"
253 " float (*outputs)[4])\n"
255 " float temp[32][4];\n"
259 static void print_footer( struct fragment_program
*p
)
261 emit(p
, " return 1;");
265 static void print_dest_reg( struct fragment_program
*p
,
266 const struct fp_instruction
*inst
)
268 switch (inst
->DstReg
.File
) {
270 emit(p
, "outputs[%d]", inst
->DstReg
.Index
);
272 case PROGRAM_TEMPORARY
:
273 emit(p
, "temp[%d]", inst
->DstReg
.Index
);
280 static void print_dest( struct fragment_program
*p
,
281 const struct fp_instruction
*inst
,
284 print_dest_reg(p
, inst
);
285 emit(p
, "[%d]", idx
);
289 #define UREG_SRC0(reg) (((reg)>>UREG_CHANNEL_X_SHIFT) & 0x7)
291 static void print_reg( struct fragment_program
*p
,
294 switch (GET_UREG_TYPE(arg
)) {
295 case UREG_TYPE_TEMP
: emit(p
, "temp"); break;
296 case UREG_TYPE_INTERP
: emit(p
, "interp"); break;
297 case UREG_TYPE_LOCAL_CONST
: emit(p
, "local_const"); break;
298 case UREG_TYPE_ENV_CONST
: emit(p
, "env_const"); break;
299 case UREG_TYPE_STATE_CONST
: emit(p
, "state_param"); break;
300 case UREG_TYPE_PARAM
: emit(p
, "local_param"); break;
303 emit(p
, "[%d]", GET_UREG_NR(arg
));
305 if (GET_UREG_TYPE(arg
) == UREG_TYPE_STATE_CONST
) {
311 static void print_arg( struct fragment_program
*p
,
314 GLuint src
= UREG_SRC0(arg
);
321 if (arg
& (1<<UREG_CHANNEL_X_NEGATE_SHIFT
))
329 if (GET_UREG_TYPE(arg
) == UREG_TYPE_STATE_CONST
&&
330 p
->Parameters
->Parameters
[GET_UREG_NR(arg
)].Type
== CONSTANT
) {
331 emit(p
, "%g", p
->Parameters
->Parameters
[GET_UREG_NR(arg
)].Values
[src
]);
338 case _X
: emit(p
, "[0]"); break;
339 case _Y
: emit(p
, "[1]"); break;
340 case _Z
: emit(p
, "[2]"); break;
341 case _W
: emit(p
, "[3]"); break;
346 /* This is where the handling of expressions breaks down into string
349 static void print_expression( struct fragment_program
*p
,
355 if (*fmt
== '%' && *(fmt
+1) == 's') {
356 int reg
= va_arg(ap
, int);
358 /* Use of deref() is a bit of a hack:
360 print_arg( p
, deref(reg
, i
) );
372 static void do_tex_kill( struct fragment_program
*p
,
373 const struct fp_instruction
*inst
,
380 for (i
= 0; i
< 4; i
++) {
381 print_arg( p
, deref(arg
, i
) );
388 emit(p
, " return 0;\n");
392 static void do_tex_simple( struct fragment_program
*p
,
393 const struct fp_instruction
*inst
,
394 const char *fn
, GLuint texunit
, GLuint arg
)
396 emit(p
, " %s( ctx, ", fn
);
398 emit(p
, ", %d, ", texunit
);
399 print_dest_reg(p
, inst
);
404 static void do_tex( struct fragment_program
*p
,
405 const struct fp_instruction
*inst
,
406 const char *fn
, GLuint texunit
, GLuint arg
)
409 GLboolean need_tex
= GL_FALSE
, need_result
= GL_FALSE
;
411 for (i
= 0; i
< 4; i
++)
412 if (!inst
->DstReg
.WriteMask
[i
])
413 need_result
= GL_TRUE
;
415 if (is_swizzled(arg
))
418 if (!need_tex
&& !need_result
) {
419 do_tex_simple( p
, inst
, fn
, texunit
, arg
);
424 emit(p
, " float texcoord[4];\n");
425 emit(p
, " float result[4];\n");
427 for (i
= 0; i
< 4; i
++) {
428 emit(p
, " texcoord[%d] = ", i
);
429 print_arg( p
, deref(arg
, i
) );
433 emit(p
, " %s( ctx, texcoord, %d, result);\n", fn
, texunit
);
435 for (i
= 0; i
< 4; i
++) {
436 if (inst
->DstReg
.WriteMask
[i
]) {
438 print_dest(p
, inst
, i
);
439 emit(p
, " = result[%d];\n", i
);
447 static void saturate( struct fragment_program
*p
,
448 const struct fp_instruction
*inst
,
452 print_dest(p
, inst
, i
);
453 emit(p
, " = SATURATE( ");
454 print_dest(p
, inst
, i
);
458 static void assign_single( GLuint i
,
459 struct fragment_program
*p
,
460 const struct fp_instruction
*inst
,
467 if (inst
->DstReg
.WriteMask
[i
]) {
469 print_dest(p
, inst
, i
);
471 print_expression( p
, i
, fmt
, ap
);
473 saturate(p
, inst
, i
);
479 static void assign4( struct fragment_program
*p
,
480 const struct fp_instruction
*inst
,
488 for (i
= 0; i
< 4; i
++)
489 if (inst
->DstReg
.WriteMask
[i
]) {
491 print_dest(p
, inst
, i
);
493 print_expression( p
, i
, fmt
, ap
);
495 saturate(p
, inst
, i
);
501 static void assign4_replicate( struct fragment_program
*p
,
502 const struct fp_instruction
*inst
,
510 for (i
= 0; i
< 4; i
++)
511 if (inst
->DstReg
.WriteMask
[i
]) {
523 print_dest(p
, inst
, first
);
525 print_expression( p
, 0, fmt
, ap
);
527 saturate(p
, inst
, first
);
530 for (i
= first
+1; i
< 4; i
++)
531 if (inst
->DstReg
.WriteMask
[i
]) {
533 print_dest(p
, inst
, i
);
535 print_dest(p
, inst
, first
);
543 static GLuint
nr_args( GLuint opcode
)
546 case FP_OPCODE_ABS
: return 1;
547 case FP_OPCODE_ADD
: return 2;
548 case FP_OPCODE_CMP
: return 3;
549 case FP_OPCODE_COS
: return 1;
550 case FP_OPCODE_DP3
: return 2;
551 case FP_OPCODE_DP4
: return 2;
552 case FP_OPCODE_DPH
: return 2;
553 case FP_OPCODE_DST
: return 2;
554 case FP_OPCODE_EX2
: return 1;
555 case FP_OPCODE_FLR
: return 1;
556 case FP_OPCODE_FRC
: return 1;
557 case FP_OPCODE_KIL
: return 1;
558 case FP_OPCODE_LG2
: return 1;
559 case FP_OPCODE_LIT
: return 1;
560 case FP_OPCODE_LRP
: return 3;
561 case FP_OPCODE_MAD
: return 3;
562 case FP_OPCODE_MAX
: return 2;
563 case FP_OPCODE_MIN
: return 2;
564 case FP_OPCODE_MOV
: return 1;
565 case FP_OPCODE_MUL
: return 2;
566 case FP_OPCODE_POW
: return 2;
567 case FP_OPCODE_RCP
: return 1;
568 case FP_OPCODE_RSQ
: return 1;
569 case FP_OPCODE_SCS
: return 1;
570 case FP_OPCODE_SGE
: return 2;
571 case FP_OPCODE_SIN
: return 1;
572 case FP_OPCODE_SLT
: return 2;
573 case FP_OPCODE_SUB
: return 2;
574 case FP_OPCODE_SWZ
: return 1;
575 case FP_OPCODE_TEX
: return 1;
576 case FP_OPCODE_TXB
: return 1;
577 case FP_OPCODE_TXP
: return 1;
578 case FP_OPCODE_XPD
: return 2;
585 static void translate_program( struct fragment_program
*p
)
587 const struct fp_instruction
*inst
= p
->Instructions
;
589 for (; inst
->Opcode
!= FP_OPCODE_END
; inst
++) {
592 GLuint nr
= nr_args( inst
->Opcode
);
594 for (i
= 0; i
< nr
; i
++)
595 src
[i
] = src_vector( &inst
->SrcReg
[i
] );
597 /* Print the original program instruction string */
600 const char *s
= (const char *) p
->Base
.String
+ inst
->StringPos
;
609 switch (inst
->Opcode
) {
611 assign4(p
, inst
, "fabsf(%s)", src
[0]);
615 assign4(p
, inst
, "%s + %s", src
[0], src
[1]);
619 assign4(p
, inst
, "%s < 0.0F ? %s : %s", src
[0], src
[1], src
[2]);
623 assign4_replicate(p
, inst
, "COS(%s)", src
[0]);
627 assign4_replicate(p
, inst
,
628 "%s*%s + %s*%s + %s*%s",
638 assign4_replicate(p
, inst
,
639 "%s*%s + %s*%s + %s*%s + %s*%s",
649 assign4_replicate(p
, inst
,
650 "%s*%s + %s*%s + %s*%s + %s",
659 /* result[0] = 1 * 1;
660 * result[1] = a[1] * b[1];
661 * result[2] = a[2] * 1;
662 * result[3] = 1 * b[3];
664 assign_single(0, p
, inst
, "1.0");
666 assign_single(1, p
, inst
, "%s * %s",
667 deref(src
[0], _Y
), deref(src
[1], _Y
));
669 assign_single(2, p
, inst
, "%s", deref(src
[0], _Z
));
670 assign_single(3, p
, inst
, "%s", deref(src
[1], _W
));
674 assign4_replicate(p
, inst
, "powf(2.0, %s)", src
[0]);
678 assign4_replicate(p
, inst
, "floorf(%s)", src
[0]);
682 assign4_replicate(p
, inst
, "%s - floorf(%s)", src
[0], src
[0]);
686 do_tex_kill(p
, inst
, src
[0]);
690 assign4_replicate(p
, inst
, "LOG2(%s)", src
[0]);
694 assign_single(0, p
, inst
, "1.0");
695 assign_single(1, p
, inst
, "MIN2(%s, 0)", deref(src
[0], _X
));
696 assign_single(2, p
, inst
, "(%s > 0.0) ? expf(%s * MIN2(%s, 0)) : 0.0",
700 assign_single(3, p
, inst
, "1.0");
705 "%s * %s + (1.0 - %s) * %s",
706 src
[0], src
[1], src
[0], src
[2]);
710 assign4(p
, inst
, "%s * %s + %s", src
[0], src
[1], src
[2]);
714 assign4(p
, inst
, "MAX2(%s, %s)", src
[0], src
[1]);
718 assign4(p
, inst
, "MIN2(%s, %s)", src
[0], src
[1]);
722 assign4(p
, inst
, "%s", src
[0]);
726 assign4(p
, inst
, "%s * %s", src
[0], src
[1]);
730 assign4_replicate(p
, inst
, "powf(%s, %s)", src
[0], src
[1]);
734 assign4_replicate(p
, inst
, "1.0/%s", src
[0]);
738 assign4_replicate(p
, inst
, "_mesa_inv_sqrtf(%s)", src
[0]);
742 if (inst
->DstReg
.WriteMask
[0]) {
743 assign_single(0, p
, inst
, "cosf(%s)", deref(src
[0], _X
));
746 if (inst
->DstReg
.WriteMask
[1]) {
747 assign_single(1, p
, inst
, "sinf(%s)", deref(src
[0], _X
));
752 assign4(p
, inst
, "%s >= %s ? 1.0 : 0.0", src
[0], src
[1]);
756 assign4_replicate(p
, inst
, "sinf(%s)", src
[0]);
760 assign4(p
, inst
, "%s < %s ? 1.0 : 0.0", src
[0], src
[1]);
764 assign4(p
, inst
, "%s - %s", src
[0], src
[1]);
767 case FP_OPCODE_SWZ
: /* same implementation as MOV: */
768 assign4(p
, inst
, "%s", src
[0]);
772 do_tex(p
, inst
, "TEX", inst
->TexSrcUnit
, src
[0]);
776 do_tex(p
, inst
, "TXB", inst
->TexSrcUnit
, src
[0]);
780 do_tex(p
, inst
, "TXP", inst
->TexSrcUnit
, src
[0]);
785 * result.x = src[0].y * src[1].z - src[0].z * src[1].y;
786 * result.y = src[0].z * src[1].x - src[0].x * src[1].z;
787 * result.z = src[0].x * src[1].y - src[0].y * src[1].x;
792 swizzle(src
[0], _Y
, _Z
, _X
, _ONE
),
793 swizzle(src
[1], _Z
, _X
, _Y
, _ONE
),
794 swizzle(src
[0], _Z
, _X
, _Y
, _ONE
),
795 swizzle(src
[1], _Y
, _Z
, _X
, _ONE
));
799 emit(p
, "BOGUS OPCODE\n");
809 void _swrast_translate_program( GLcontext
*ctx
)
811 struct fragment_program
*p
= ctx
->FragmentProgram
._Current
;
817 translate_program( p
);