gallium: a lot more complete implementation of stream output
[mesa.git] / src / gallium / auxiliary / draw / draw_vs_aos.h
1 /**************************************************************************
2 *
3 * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas.
4 * All Rights Reserved.
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a
7 * copy of this software and associated documentation files (the
8 * "Software"), to deal in the Software without restriction, including
9 * without limitation the rights to use, copy, modify, merge, publish,
10 * distribute, sub license, and/or sell copies of the Software, and to
11 * permit persons to whom the Software is furnished to do so, subject to
12 * the following conditions:
13 *
14 * The above copyright notice and this permission notice (including the
15 * next paragraph) shall be included in all copies or substantial portions
16 * of the Software.
17 *
18 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
19 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
20 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
21 * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
22 * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
23 * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
24 * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
25 *
26 **************************************************************************/
27
28 /* Authors: Keith Whitwell <keith@tungstengraphics.com>
29 */
30
31 #ifndef DRAW_VS_AOS_H
32 #define DRAW_VS_AOS_H
33
34 #include "pipe/p_config.h"
35
36 #ifdef PIPE_ARCH_X86
37
38 struct tgsi_token;
39 struct x86_function;
40
41 #include "pipe/p_state.h"
42 #include "rtasm/rtasm_x86sse.h"
43
44
45
46
47
48 #define X 0
49 #define Y 1
50 #define Z 2
51 #define W 3
52
53 #define MAX_INPUTS PIPE_MAX_ATTRIBS
54 #define MAX_OUTPUTS PIPE_MAX_SHADER_OUTPUTS
55 #define MAX_TEMPS TGSI_EXEC_NUM_TEMPS
56 #define MAX_CONSTANTS 1024 /** only used for sanity checking */
57 #define MAX_IMMEDIATES 1024 /** only used for sanity checking */
58 #define MAX_INTERNALS 8 /** see IMM_x values below */
59
60 #define AOS_FILE_INTERNAL TGSI_FILE_COUNT
61
62 #define FPU_RND_NEG 1
63 #define FPU_RND_NEAREST 2
64
65 struct aos_machine;
66 typedef void (PIPE_CDECL *lit_func)( struct aos_machine *,
67 float *result,
68 const float *in,
69 unsigned count );
70
71 void PIPE_CDECL aos_do_lit( struct aos_machine *machine,
72 float *result,
73 const float *in,
74 unsigned count );
75
76 struct shine_tab {
77 float exponent;
78 float values[258];
79 unsigned last_used;
80 };
81
82 struct lit_info {
83 lit_func func;
84 struct shine_tab *shine_tab;
85 };
86
87 #define MAX_SHINE_TAB 4
88 #define MAX_LIT_INFO 16
89
90 struct aos_buffer {
91 const void *base_ptr;
92 unsigned stride;
93 void *ptr; /* updated per vertex */
94 };
95
96
97
98
99 /* This is the temporary storage used by all the aos_sse vs varients.
100 * Create one per context and reuse by passing a pointer in at
101 * vs_varient creation??
102 */
103 struct aos_machine {
104 float input [MAX_INPUTS ][4];
105 float output [MAX_OUTPUTS ][4];
106 float temp [MAX_TEMPS ][4];
107 float internal [MAX_INTERNALS ][4];
108
109 float scale[4]; /* viewport */
110 float translate[4]; /* viewport */
111
112 float tmp[2][4]; /* scratch space for LIT */
113
114 struct shine_tab shine_tab[MAX_SHINE_TAB];
115 struct lit_info lit_info[MAX_LIT_INFO];
116 unsigned now;
117
118
119 ushort fpu_rnd_nearest;
120 ushort fpu_rnd_neg_inf;
121 ushort fpu_restore;
122 ushort fpucntl; /* one of FPU_* above */
123
124 const float (*immediates)[4]; /* points to shader data */
125 const void *constants[PIPE_MAX_CONSTANT_BUFFERS]; /* points to draw data */
126
127 const struct aos_buffer *buffer; /* points to ? */
128 };
129
130
131
132
133 struct aos_compilation {
134 struct x86_function *func;
135 struct draw_vs_varient_aos_sse *vaos;
136
137 unsigned insn_counter;
138 unsigned num_immediates;
139 unsigned count;
140 unsigned lit_count;
141
142 struct {
143 unsigned idx:16;
144 unsigned file:8;
145 unsigned dirty:8;
146 unsigned last_used;
147 } xmm[8];
148
149 unsigned x86_reg[2]; /* one of X86_* */
150
151 boolean input_fetched[PIPE_MAX_ATTRIBS];
152 unsigned output_last_write[PIPE_MAX_ATTRIBS];
153
154 boolean have_sse2;
155 boolean error;
156 short fpucntl;
157
158 /* these are actually known values, but putting them in a struct
159 * like this is helpful to keep them in sync across the file.
160 */
161 struct x86_reg tmp_EAX;
162 struct x86_reg idx_EBX; /* either start+i or &elt[i] */
163 struct x86_reg outbuf_ECX;
164 struct x86_reg machine_EDX;
165 struct x86_reg count_ESI; /* decrements to zero */
166 struct x86_reg temp_EBP;
167 struct x86_reg stack_ESP;
168 };
169
170 struct x86_reg aos_get_xmm_reg( struct aos_compilation *cp );
171 void aos_release_xmm_reg( struct aos_compilation *cp, unsigned idx );
172
173 void aos_adopt_xmm_reg( struct aos_compilation *cp,
174 struct x86_reg reg,
175 unsigned file,
176 unsigned idx,
177 unsigned dirty );
178
179 void aos_spill_all( struct aos_compilation *cp );
180
181 struct x86_reg aos_get_shader_reg( struct aos_compilation *cp,
182 unsigned file,
183 unsigned idx );
184
185 boolean aos_init_inputs( struct aos_compilation *cp, boolean linear );
186 boolean aos_fetch_inputs( struct aos_compilation *cp, boolean linear );
187 boolean aos_incr_inputs( struct aos_compilation *cp, boolean linear );
188
189 boolean aos_emit_outputs( struct aos_compilation *cp );
190
191
192 #define IMM_ONES 0 /* 1, 1,1,1 */
193 #define IMM_SWZ 1 /* 1,-1,0, 0xffffffff */
194 #define IMM_IDENTITY 2 /* 0, 0,0,1 */
195 #define IMM_INV_255 3 /* 1/255, 1/255, 1/255, 1/255 */
196 #define IMM_255 4 /* 255, 255, 255, 255 */
197 #define IMM_NEGS 5 /* -1,-1,-1,-1 */
198 #define IMM_RSQ 6 /* -.5,1.5,_,_ */
199 #define IMM_PSIZE 7 /* not really an immediate - updated each run */
200
201 struct x86_reg aos_get_internal( struct aos_compilation *cp,
202 unsigned imm );
203 struct x86_reg aos_get_internal_xmm( struct aos_compilation *cp,
204 unsigned imm );
205
206
207 #define AOS_ERROR(cp, msg) \
208 do { \
209 if (0) debug_printf("%s: x86 translation failed: %s\n", __FUNCTION__, msg); \
210 cp->error = 1; \
211 } while (0)
212
213
214 #define X86_NULL 0
215 #define X86_IMMEDIATES 1
216 #define X86_CONSTANTS 2
217 #define X86_BUFFERS 3
218
219 struct x86_reg aos_get_x86( struct aos_compilation *cp,
220 unsigned which_reg,
221 unsigned value );
222
223
224 typedef void (PIPE_CDECL *vaos_run_elts_func)( struct aos_machine *,
225 const unsigned *elts,
226 unsigned count,
227 void *output_buffer);
228
229 typedef void (PIPE_CDECL *vaos_run_linear_func)( struct aos_machine *,
230 unsigned start,
231 unsigned count,
232 void *output_buffer);
233
234
235 struct draw_vs_varient_aos_sse {
236 struct draw_vs_varient base;
237 struct draw_context *draw;
238
239 struct aos_buffer *buffer;
240 unsigned nr_vb;
241
242 vaos_run_linear_func gen_run_linear;
243 vaos_run_elts_func gen_run_elts;
244
245
246 struct x86_function func[2];
247 };
248
249
250 #endif
251
252 #endif
253