updated some printfs, added comment about sched_yield
[mesa.git] / src / mesa / tnl / t_vb_arbprogram.h
1 /*
2 * Mesa 3-D graphics library
3 * Version: 6.5
4 *
5 * Copyright (C) 1999-2005 Brian Paul All Rights Reserved.
6 *
7 * Permission is hereby granted, free of charge, to any person obtaining a
8 * copy of this software and associated documentation files (the "Software"),
9 * to deal in the Software without restriction, including without limitation
10 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
11 * and/or sell copies of the Software, and to permit persons to whom the
12 * Software is furnished to do so, subject to the following conditions:
13 *
14 * The above copyright notice and this permission notice shall be included
15 * in all copies or substantial portions of the Software.
16 *
17 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
18 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
19 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
20 * BRIAN PAUL BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN
21 * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
22 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
23 */
24
25 /**
26 * \file t_arb_program.c
27 * Compile vertex programs to an intermediate representation.
28 * Execute vertex programs over a buffer of vertices.
29 * \author Keith Whitwell, Brian Paul
30 */
31
32
33 #ifndef _T_VB_ARBPROGRAM_H_
34 #define _T_VB_ARBPROGRAM_H_
35
36
37 /* New, internal instructions:
38 */
39 #define RSW (MAX_OPCODE)
40 #define MSK (MAX_OPCODE+1)
41 #define REL (MAX_OPCODE+2)
42
43 /**
44 * Register files for vertex programs
45 */
46 #define FILE_REG 0 /* temporaries */
47 #define FILE_LOCAL_PARAM 1 /* local parameters */
48 #define FILE_ENV_PARAM 2 /* global parameters */
49 #define FILE_STATE_PARAM 3 /* GL state references */
50
51 #define REG_ARG0 0
52 #define REG_ARG1 1
53 #define REG_ARG2 2
54 #define REG_RES 3
55 #define REG_ADDR 4
56 #define REG_TMP0 5
57 #define REG_TMP11 16
58 #define REG_OUT0 17
59 #define REG_OUT14 31
60 #define REG_IN0 32
61 #define REG_IN31 63
62 #define REG_ID 64 /* 0,0,0,1 */
63 #define REG_ONES 65 /* 1,1,1,1 */
64 #define REG_SWZ 66 /* -1,1,0,0 */
65 #define REG_NEG 67 /* -1,-1,-1,-1 */
66 #define REG_LIT 68 /* 1,0,0,1 */
67 #define REG_LIT2 69 /* 1,0,0,1 */
68 #define REG_SCRATCH 70 /* internal temporary. XXX we can't actually use this because 70 doesn't fit in the 5-bit 'dst' instruction field! */
69 #define REG_UNDEF 127 /* special case - never used */
70 #define REG_MAX 128
71 #define REG_INVALID ~0
72
73 /* ARB_vp instructions are broken down into one or more of the
74 * following micro-instructions, each representable in a 64 bit packed
75 * structure.
76 */
77 struct reg {
78 GLuint file:2;
79 GLuint idx:7;
80 };
81
82
83 union instruction {
84 struct {
85 GLuint opcode:7;
86 GLuint dst:5;
87 GLuint file0:2;
88 GLuint idx0:7;
89 GLuint file1:2;
90 GLuint idx1:7;
91 GLuint pad:2;
92 GLuint pad2;
93 } alu;
94
95 struct {
96 GLuint opcode:7;
97 GLuint dst:5;
98 GLuint file0:2;
99 GLuint idx0:7;
100 GLuint neg:4;
101 GLuint swz:8; /* xyzw only */
102 } rsw;
103
104 struct {
105 GLuint opcode:7;
106 GLuint dst:5;
107 GLuint file:2;
108 GLuint idx:7;
109 GLuint mask:4;
110 GLuint pad:7;
111 GLuint pad2;
112 } msk;
113 };
114
115
116 /**
117 * Reduced swizzle is a 2-bit field; only X/Y/Z/W are allowed, not 0/1.
118 */
119 #define RSW_NOOP ((0<<0) | (1<<2) | (2<<4) | (3<<6))
120 #define GET_RSW(swz, idx) (((swz) >> ((idx)*2)) & 0x3)
121
122
123 struct input {
124 GLuint idx;
125 GLfloat *data;
126 GLuint stride;
127 GLuint size;
128 };
129
130 struct output {
131 GLuint idx;
132 GLfloat *data;
133 };
134
135
136
137 /*--------------------------------------------------------------------------- */
138 #if defined(USE_SSE_ASM)
139 #ifdef NO_FAST_MATH
140 #define RESTORE_FPU (DEFAULT_X86_FPU)
141 #define RND_NEG_FPU (DEFAULT_X86_FPU | 0x400)
142 #else
143 #define RESTORE_FPU (FAST_X86_FPU)
144 #define RND_NEG_FPU (FAST_X86_FPU | 0x400)
145 #endif
146 #else
147 #define RESTORE_FPU 0
148 #define RND_NEG_FPU 0
149 #endif
150
151
152 /**
153 * Private storage for the vertex program pipeline stage.
154 */
155 struct arb_vp_machine {
156 GLfloat (*File[4])[4]; /* All values referencable from the program. */
157
158 struct input input[_TNL_ATTRIB_MAX];
159 GLuint nr_inputs;
160
161 struct output output[VERT_RESULT_MAX];
162 GLuint nr_outputs;
163
164 GLvector4f attribs[VERT_RESULT_MAX]; /**< result vectors. */
165 GLvector4f ndcCoords; /**< normalized device coords */
166 GLubyte *clipmask; /**< clip flags */
167 GLubyte ormask, andmask; /**< for clipping */
168
169 GLuint vtx_nr; /**< loop counter */
170
171 struct vertex_buffer *VB;
172
173 GLshort fpucntl_rnd_neg; /* constant value */
174 GLshort fpucntl_restore; /* constant value */
175
176 GLboolean try_codegen;
177 };
178
179 struct tnl_compiled_program {
180 union instruction instructions[1024];
181 GLint nr_instructions;
182 void (*compiled_func)( struct arb_vp_machine * ); /**< codegen'd program */
183 };
184
185 void _tnl_program_string_change( struct vertex_program * );
186 void _tnl_program_destroy( struct vertex_program * );
187
188 void _tnl_disassem_vba_insn( union instruction op );
189
190 GLboolean _tnl_sse_codegen_vertex_program(struct tnl_compiled_program *p);
191
192 #endif