2 * Mesa 3-D graphics library
5 * Copyright (C) 1999-2004 Brian Paul All Rights Reserved.
7 * Permission is hereby granted, free of charge, to any person obtaining a
8 * copy of this software and associated documentation files (the "Software"),
9 * to deal in the Software without restriction, including without limitation
10 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
11 * and/or sell copies of the Software, and to permit persons to whom the
12 * Software is furnished to do so, subject to the following conditions:
14 * The above copyright notice and this permission notice shall be included
15 * in all copies or substantial portions of the Software.
17 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
18 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
19 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
20 * BRIAN PAUL BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN
21 * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
22 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
25 /* An amusing little utility to print ARB fragment programs out as a C
26 * function. Resulting code not tested except visually.
33 #include "nvfragprog.h"
37 #include "s_nvfragprog.h"
39 #include "s_texture.h"
42 /* UREG - a way of representing an FP source register including
43 * swizzling and negation in a single GLuint. Major flaw is the
44 * limitiation to source->Index < 32. Secondary flaw is the fact that
45 * it's overkill & we could probably just pass around the original
49 #define UREG_TYPE_TEMP 0
50 #define UREG_TYPE_INTERP 1
51 #define UREG_TYPE_LOCAL_CONST 2
52 #define UREG_TYPE_ENV_CONST 3
53 #define UREG_TYPE_STATE_CONST 4
54 #define UREG_TYPE_PARAM 5
55 #define UREG_TYPE_OUTPUT 6
56 #define UREG_TYPE_MASK 0x7
58 #define UREG_TYPE_SHIFT 29
59 #define UREG_NR_SHIFT 24
60 #define UREG_NR_MASK 0x1f /* 31 */
61 #define UREG_CHANNEL_X_NEGATE_SHIFT 23
62 #define UREG_CHANNEL_X_SHIFT 20
63 #define UREG_CHANNEL_Y_NEGATE_SHIFT 19
64 #define UREG_CHANNEL_Y_SHIFT 16
65 #define UREG_CHANNEL_Z_NEGATE_SHIFT 15
66 #define UREG_CHANNEL_Z_SHIFT 12
67 #define UREG_CHANNEL_W_NEGATE_SHIFT 11
68 #define UREG_CHANNEL_W_SHIFT 8
69 #define UREG_CHANNEL_ZERO_NEGATE_MBZ 5
70 #define UREG_CHANNEL_ZERO_SHIFT 4
71 #define UREG_CHANNEL_ONE_NEGATE_MBZ 1
72 #define UREG_CHANNEL_ONE_SHIFT 0
74 #define UREG_BAD 0xffffffff /* not a valid ureg */
80 #define _ZERO 4 /* NOTE! */
81 #define _ONE 5 /* NOTE! */
86 #define UREG( type, nr ) (((type)<< UREG_TYPE_SHIFT) | \
87 ((nr) << UREG_NR_SHIFT) | \
88 (_X << UREG_CHANNEL_X_SHIFT) | \
89 (_Y << UREG_CHANNEL_Y_SHIFT) | \
90 (_Z << UREG_CHANNEL_Z_SHIFT) | \
91 (_W << UREG_CHANNEL_W_SHIFT) | \
92 (_ZERO << UREG_CHANNEL_ZERO_SHIFT) | \
93 (_ONE << UREG_CHANNEL_ONE_SHIFT))
95 #define GET_CHANNEL_SRC( reg, channel ) ((reg<<(channel*4)) & \
96 (0xf<<UREG_CHANNEL_X_SHIFT))
97 #define CHANNEL_SRC( src, channel ) (src>>(channel*4))
99 #define GET_UREG_TYPE(reg) (((reg)>>UREG_TYPE_SHIFT)&UREG_TYPE_MASK)
100 #define GET_UREG_NR(reg) (((reg)>>UREG_NR_SHIFT)&UREG_NR_MASK)
104 #define UREG_XYZW_CHANNEL_MASK 0x00ffff00
106 #define deref(reg,pos) swizzle(reg, pos, pos, pos, pos)
109 static INLINE
int is_swizzled( int reg
)
111 return ((reg
& UREG_XYZW_CHANNEL_MASK
) !=
112 (UREG(0,0) & UREG_XYZW_CHANNEL_MASK
));
116 /* One neat thing about the UREG representation:
118 static INLINE
int swizzle( int reg
, int x
, int y
, int z
, int w
)
120 return ((reg
& ~UREG_XYZW_CHANNEL_MASK
) |
121 CHANNEL_SRC( GET_CHANNEL_SRC( reg
, x
), 0 ) |
122 CHANNEL_SRC( GET_CHANNEL_SRC( reg
, y
), 1 ) |
123 CHANNEL_SRC( GET_CHANNEL_SRC( reg
, z
), 2 ) |
124 CHANNEL_SRC( GET_CHANNEL_SRC( reg
, w
), 3 ));
127 /* Another neat thing about the UREG representation:
129 static INLINE
int negate( int reg
, int x
, int y
, int z
, int w
)
131 return reg
^ (((x
&1)<<UREG_CHANNEL_X_NEGATE_SHIFT
)|
132 ((y
&1)<<UREG_CHANNEL_Y_NEGATE_SHIFT
)|
133 ((z
&1)<<UREG_CHANNEL_Z_NEGATE_SHIFT
)|
134 ((w
&1)<<UREG_CHANNEL_W_NEGATE_SHIFT
));
139 static GLuint
src_reg_file( GLuint file
)
142 case PROGRAM_TEMPORARY
: return UREG_TYPE_TEMP
;
143 case PROGRAM_INPUT
: return UREG_TYPE_INTERP
;
144 case PROGRAM_LOCAL_PARAM
: return UREG_TYPE_LOCAL_CONST
;
145 case PROGRAM_ENV_PARAM
: return UREG_TYPE_ENV_CONST
;
147 case PROGRAM_STATE_VAR
: return UREG_TYPE_STATE_CONST
;
148 case PROGRAM_NAMED_PARAM
: return UREG_TYPE_PARAM
;
149 default: return UREG_BAD
;
155 * Retrieve a ureg for the given source register. Will emit
156 * constants, apply swizzling and negation as needed.
158 static GLuint
src_vector( const struct fp_src_register
*source
)
162 /* fprintf(stderr, "%s File %d, Index %d\n", */
163 /* __FUNCTION__, source->File, source->Index); */
165 assert(source
->Index
< 32); /* limitiation of UREG representation */
167 src
= UREG( src_reg_file( source
->File
), source
->Index
);
170 _X
+ source
->Swizzle
[0],
171 _X
+ source
->Swizzle
[1],
172 _X
+ source
->Swizzle
[2],
173 _X
+ source
->Swizzle
[3]);
175 if (source
->NegateBase
)
176 src
= negate( src
, 1,1,1,1 );
181 static void print_header( void )
183 printf("static void run_program( const GLfloat (*local_param)[4], \n"
184 " const GLfloat (*env_param)[4], \n"
185 " const GLfloat (*state_param)[4], \n"
186 " const GLfloat (*interp)[4], \n"
187 " GLfloat *outputs)\n"
189 " GLfloat temp[32][4];\n"
193 static void print_footer( void )
198 static void print_dest_reg( const struct fp_instruction
*inst
)
200 switch (inst
->DstReg
.File
) {
202 printf("outputs[%d]", inst
->DstReg
.Index
);
204 case PROGRAM_TEMPORARY
:
205 printf("temp[%d]", inst
->DstReg
.Index
);
212 static void print_dest( const struct fp_instruction
*inst
,
215 print_dest_reg(inst
);
220 #define UREG_SRC0(reg) (((reg)>>UREG_CHANNEL_X_SHIFT) & 0x7)
222 static void print_reg( GLuint arg
)
224 switch (GET_UREG_TYPE(arg
)) {
225 case UREG_TYPE_TEMP
: printf("temp"); break;
226 case UREG_TYPE_INTERP
: printf("interp"); break;
227 case UREG_TYPE_LOCAL_CONST
: printf("local_const"); break;
228 case UREG_TYPE_ENV_CONST
: printf("env_const"); break;
229 case UREG_TYPE_STATE_CONST
: printf("state_const"); break;
230 case UREG_TYPE_PARAM
: printf("param"); break;
233 printf("[%d]", GET_UREG_NR(arg
));
237 static void print_arg( const struct fragment_program
*p
,
240 GLuint src
= UREG_SRC0(arg
);
247 if (arg
& (1<<UREG_CHANNEL_X_NEGATE_SHIFT
))
255 if (GET_UREG_TYPE(arg
) == UREG_TYPE_STATE_CONST
) {
256 printf("%g", p
->Parameters
->Parameters
[GET_UREG_NR(arg
)].Values
[src
]);
263 case _X
: printf("[0]"); break;
264 case _Y
: printf("[1]"); break;
265 case _Z
: printf("[2]"); break;
266 case _W
: printf("[3]"); break;
271 /* This is where the handling of expressions breaks down into string
274 static void print_expression( const struct fragment_program
*p
,
280 if (*fmt
== '%' && *(fmt
+1) == 's') {
281 int reg
= va_arg(ap
, int);
283 /* Use of deref() is a bit of a hack:
285 print_arg( p
, deref(reg
, i
) );
297 static void do_tex_simple( const struct fragment_program
*p
,
298 const struct fp_instruction
*inst
,
299 const char *fn
, GLuint texunit
, GLuint arg
)
301 printf(" %s( ctx, ", fn
);
303 printf(", %d, ", texunit
);
304 print_dest_reg(inst
);
309 static void do_tex( const struct fragment_program
*p
,
310 const struct fp_instruction
*inst
,
311 const char *fn
, GLuint texunit
, GLuint arg
)
314 GLboolean need_tex
= GL_FALSE
, need_result
= GL_FALSE
;
316 for (i
= 0; i
< 4; i
++)
317 if (!inst
->DstReg
.WriteMask
[i
])
318 need_result
= GL_TRUE
;
320 if (is_swizzled(arg
))
323 if (!need_tex
&& !need_result
) {
324 do_tex_simple( p
, inst
, fn
, texunit
, arg
);
329 printf(" GLfloat texcoord[4];\n");
330 printf(" GLfloat result[4];\n");
332 for (i
= 0; i
< 4; i
++) {
333 printf(" texcoord[%d] = ", i
);
334 print_arg( p
, deref(arg
, i
) );
338 printf(" %s( ctx, texcoord, %d, result);\n", fn
, texunit
);
340 for (i
= 0; i
< 4; i
++) {
341 if (inst
->DstReg
.WriteMask
[i
]) {
344 printf(" = result[%d];\n", i
);
351 static void assign_single( GLuint i
,
352 const struct fragment_program
*p
,
353 const struct fp_instruction
*inst
,
360 if (inst
->DstReg
.WriteMask
[i
]) {
364 print_expression( p
, i
, fmt
, ap
);
370 static void assign4( const struct fragment_program
*p
,
371 const struct fp_instruction
*inst
,
379 for (i
= 0; i
< 4; i
++)
380 if (inst
->DstReg
.WriteMask
[i
]) {
384 print_expression( p
, i
, fmt
, ap
);
390 static void assign4_replicate( const struct fragment_program
*p
,
391 const struct fp_instruction
*inst
,
399 for (i
= 0; i
< 4; i
++)
400 if (inst
->DstReg
.WriteMask
[i
])
409 for (i
= 0; i
< 4; i
++)
410 if (inst
->DstReg
.WriteMask
[i
]) {
415 print_expression( p
, 0, fmt
, ap
);
425 static GLuint
nr_args( GLuint opcode
)
428 case FP_OPCODE_ABS
: return 1;
429 case FP_OPCODE_ADD
: return 2;
430 case FP_OPCODE_CMP
: return 3;
431 case FP_OPCODE_COS
: return 1;
432 case FP_OPCODE_DP3
: return 2;
433 case FP_OPCODE_DP4
: return 2;
434 case FP_OPCODE_DPH
: return 2;
435 case FP_OPCODE_DST
: return 2;
436 case FP_OPCODE_EX2
: return 1;
437 case FP_OPCODE_FLR
: return 1;
438 case FP_OPCODE_FRC
: return 1;
439 case FP_OPCODE_KIL
: return 1;
440 case FP_OPCODE_LG2
: return 1;
441 case FP_OPCODE_LIT
: return 1;
442 case FP_OPCODE_LRP
: return 3;
443 case FP_OPCODE_MAD
: return 3;
444 case FP_OPCODE_MAX
: return 2;
445 case FP_OPCODE_MIN
: return 2;
446 case FP_OPCODE_MOV
: return 1;
447 case FP_OPCODE_MUL
: return 2;
448 case FP_OPCODE_POW
: return 2;
449 case FP_OPCODE_RCP
: return 1;
450 case FP_OPCODE_RSQ
: return 1;
451 case FP_OPCODE_SCS
: return 1;
452 case FP_OPCODE_SGE
: return 2;
453 case FP_OPCODE_SIN
: return 1;
454 case FP_OPCODE_SLT
: return 2;
455 case FP_OPCODE_SUB
: return 2;
456 case FP_OPCODE_SWZ
: return 1;
457 case FP_OPCODE_TEX
: return 1;
458 case FP_OPCODE_TXB
: return 1;
459 case FP_OPCODE_TXP
: return 1;
460 case FP_OPCODE_XPD
: return 2;
467 static void upload_program( const struct fragment_program
*p
)
469 const struct fp_instruction
*inst
= p
->Instructions
;
471 for (; inst
->Opcode
!= FP_OPCODE_END
; inst
++) {
474 GLuint nr
= nr_args( inst
->Opcode
);
476 for (i
= 0; i
< nr
; i
++)
477 src
[i
] = src_vector( &inst
->SrcReg
[i
] );
479 /* Print the original program instruction string */
481 const char *s
= (const char *) p
->Base
.String
+ inst
->StringPos
;
490 switch (inst
->Opcode
) {
492 assign4(p
, inst
, "FABSF(%s)", src
[0]);
496 assign4(p
, inst
, "%s + %s", src
[0], src
[1]);
500 assign4(p
, inst
, "%s < 0.0F ? %s : %s", src
[0], src
[1], src
[2]);
504 assign4_replicate(p
, inst
, "COS(%s)", src
[0]);
508 assign4_replicate(p
, inst
,
509 "%s*%s + %s*%s + %s*%s",
519 assign4_replicate(p
, inst
,
520 "%s*%s + %s*%s + %s*%s + %s*%s",
530 assign4_replicate(p
, inst
,
531 "%s*%s + %s*%s + %s*%s + %s",
540 /* result[0] = 1 * 1;
541 * result[1] = a[1] * b[1];
542 * result[2] = a[2] * 1;
543 * result[3] = 1 * b[3];
545 * Here we hope that the compiler can optimize away "x*1" to "x".
549 swizzle(src
[0], _ONE
, _Y
, _Z
, _ONE
),
550 swizzle(src
[1], _ONE
, _Y
, _ONE
, _W
));
554 assign4_replicate(p
, inst
, "EX2(%s)", src
[0]);
558 assign4_replicate(p
, inst
, "FLR(%s)", src
[0]);
562 assign4_replicate(p
, inst
, "FRC(%s)", src
[0]);
570 assign4_replicate(p
, inst
, "LOG(%s)", deref(src
[0], _X
));
574 assign_single(0, p
, inst
, "1.0");
575 assign_single(1, p
, inst
, "MIN2(%s, 0)", deref(src
[0], _X
));
576 assign_single(2, p
, inst
, "(%s > 0.0) ? EXP(%s * MIN2(%s, 0)) : 0.0",
580 assign_single(3, p
, inst
, "1.0");
585 "%s * %s + (1.0 - %s) * %s",
586 src
[0], src
[1], src
[0], src
[2]);
590 assign4(p
, inst
, "%s * %s + %s", src
[0], src
[1], src
[2]);
594 assign4(p
, inst
, "MAX2(%s, %s)", src
[0], src
[1]);
598 assign4(p
, inst
, "MIN2(%s, %s)", src
[0], src
[1]);
602 assign4(p
, inst
, "%s", src
[0]);
606 assign4(p
, inst
, "%s * %s", src
[0], src
[1]);
610 assign4_replicate(p
, inst
, "POW(%s, %s)",
616 assign4_replicate(p
, inst
, "1.0/%s", deref(src
[0], _X
));
620 assign4_replicate(p
, inst
, "INV_SQRTF(%s)", deref(src
[0], _X
));
624 if (inst
->DstReg
.WriteMask
[0]) {
625 assign_single(0, p
, inst
, "COS(%s)", deref(src
[0], _X
));
628 if (inst
->DstReg
.WriteMask
[1]) {
629 assign_single(1, p
, inst
, "SIN(%s)", deref(src
[0], _X
));
634 assign4(p
, inst
, "%s >= %s ? 1.0 : 0.0", src
[0], src
[1]);
638 assign4_replicate(p
, inst
, "SIN(%s)", deref(src
[0], _X
));
642 assign4(p
, inst
, "%s < %s ? 1.0 : 0.0", src
[0], src
[1]);
646 assign4(p
, inst
, "%s - %s", src
[0], src
[1]);
649 case FP_OPCODE_SWZ
: /* same implementation as MOV: */
650 assign4(p
, inst
, "%s", src
[0]);
654 do_tex(p
, inst
, "TEX", inst
->TexSrcUnit
, src
[0]);
658 do_tex(p
, inst
, "TXB", inst
->TexSrcUnit
, src
[0]);
662 do_tex(p
, inst
, "TXP", inst
->TexSrcUnit
, src
[0]);
667 * result.x = src[0].y * src[1].z - src[0].z * src[1].y;
668 * result.y = src[0].z * src[1].x - src[0].x * src[1].z;
669 * result.z = src[0].x * src[1].y - src[0].y * src[1].x;
674 swizzle(src
[0], _Y
, _Z
, _X
, _ONE
),
675 swizzle(src
[1], _Z
, _X
, _Y
, _ONE
),
676 swizzle(src
[0], _Z
, _X
, _Y
, _ONE
),
677 swizzle(src
[1], _Y
, _Z
, _X
, _ONE
));
690 void _swrast_translate_program( GLcontext
*ctx
)
692 if (ctx
->FragmentProgram
.Current
) {
694 upload_program( ctx
->FragmentProgram
.Current
);