Merge tgsi/exec and tgsi/util directories.
[mesa.git] / src / gallium / auxiliary / draw / draw_vs_aos_machine.c
1 /**************************************************************************
2 *
3 * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas.
4 * All Rights Reserved.
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a
7 * copy of this software and associated documentation files (the
8 * "Software"), to deal in the Software without restriction, including
9 * without limitation the rights to use, copy, modify, merge, publish,
10 * distribute, sub license, and/or sell copies of the Software, and to
11 * permit persons to whom the Software is furnished to do so, subject to
12 * the following conditions:
13 *
14 * The above copyright notice and this permission notice (including the
15 * next paragraph) shall be included in all copies or substantial portions
16 * of the Software.
17 *
18 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
19 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
20 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
21 * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
22 * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
23 * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
24 * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
25 *
26 **************************************************************************/
27
28
29 #include "pipe/p_config.h"
30
31
32 #include "pipe/p_util.h"
33 #include "pipe/p_shader_tokens.h"
34 #include "tgsi/tgsi_parse.h"
35 #include "tgsi/tgsi_util.h"
36 #include "tgsi/tgsi_exec.h"
37 #include "draw_vs.h"
38 #include "draw_vs_aos.h"
39 #include "draw_vertex.h"
40
41 #ifdef PIPE_ARCH_X86
42
43 #include "rtasm/rtasm_x86sse.h"
44
45
46 #define X87_CW_EXCEPTION_INV_OP (1<<0)
47 #define X87_CW_EXCEPTION_DENORM_OP (1<<1)
48 #define X87_CW_EXCEPTION_ZERO_DIVIDE (1<<2)
49 #define X87_CW_EXCEPTION_OVERFLOW (1<<3)
50 #define X87_CW_EXCEPTION_UNDERFLOW (1<<4)
51 #define X87_CW_EXCEPTION_PRECISION (1<<5)
52 #define X87_CW_PRECISION_SINGLE (0<<8)
53 #define X87_CW_PRECISION_RESERVED (1<<8)
54 #define X87_CW_PRECISION_DOUBLE (2<<8)
55 #define X87_CW_PRECISION_DOUBLE_EXT (3<<8)
56 #define X87_CW_PRECISION_MASK (3<<8)
57 #define X87_CW_ROUND_NEAREST (0<<10)
58 #define X87_CW_ROUND_DOWN (1<<10)
59 #define X87_CW_ROUND_UP (2<<10)
60 #define X87_CW_ROUND_ZERO (3<<10)
61 #define X87_CW_ROUND_MASK (3<<10)
62 #define X87_CW_INFINITY (1<<12)
63
64
65 void PIPE_CDECL aos_do_lit( struct aos_machine *machine,
66 float *result,
67 const float *in,
68 unsigned count )
69 {
70 if (in[0] > 0)
71 {
72 if (in[1] <= 0.0)
73 {
74 result[0] = 1.0F;
75 result[1] = in[0];
76 result[2] = 1.0;
77 result[3] = 1.0F;
78 }
79 else
80 {
81 const float epsilon = 1.0F / 256.0F;
82 float exponent = CLAMP(in[3], -(128.0F - epsilon), (128.0F - epsilon));
83 result[0] = 1.0F;
84 result[1] = in[0];
85 result[2] = powf(in[1], exponent);
86 result[3] = 1.0;
87 }
88 }
89 else
90 {
91 result[0] = 1.0F;
92 result[1] = 0.0;
93 result[2] = 0.0;
94 result[3] = 1.0F;
95 }
96 }
97
98
99 static void PIPE_CDECL do_lit_lut( struct aos_machine *machine,
100 float *result,
101 const float *in,
102 unsigned count )
103 {
104 if (in[0] > 0)
105 {
106 if (in[1] <= 0.0)
107 {
108 result[0] = 1.0F;
109 result[1] = in[0];
110 result[2] = 1.0;
111 result[3] = 1.0F;
112 return;
113 }
114
115 if (machine->lit_info[count].shine_tab->exponent != in[3]) {
116 machine->lit_info[count].func = aos_do_lit;
117 goto no_luck;
118 }
119
120 if (in[1] <= 1.0)
121 {
122 const float *tab = machine->lit_info[count].shine_tab->values;
123 float f = in[1] * 256;
124 int k = (int)f;
125 float frac = f - (float)k;
126
127 result[0] = 1.0F;
128 result[1] = in[0];
129 result[2] = tab[k] + frac*(tab[k+1]-tab[k]);
130 result[3] = 1.0;
131 return;
132 }
133
134 no_luck:
135 {
136 const float epsilon = 1.0F / 256.0F;
137 float exponent = CLAMP(in[3], -(128.0F - epsilon), (128.0F - epsilon));
138 result[0] = 1.0F;
139 result[1] = in[0];
140 result[2] = powf(in[1], exponent);
141 result[3] = 1.0;
142 }
143 }
144 else
145 {
146 result[0] = 1.0F;
147 result[1] = 0.0;
148 result[2] = 0.0;
149 result[3] = 1.0F;
150 }
151 }
152
153
154 static void do_populate_lut( struct shine_tab *tab,
155 float unclamped_exponent )
156 {
157 const float epsilon = 1.0F / 256.0F;
158 float exponent = CLAMP(unclamped_exponent, -(128.0F - epsilon), (128.0F - epsilon));
159 unsigned i;
160
161 tab->exponent = unclamped_exponent; /* for later comparison */
162
163 tab->values[0] = 0;
164 if (exponent == 0) {
165 for (i = 1; i < 258; i++) {
166 tab->values[i] = 1.0;
167 }
168 }
169 else {
170 for (i = 1; i < 258; i++) {
171 tab->values[i] = powf((float)i * epsilon, exponent);
172 }
173 }
174 }
175
176
177
178
179 static void PIPE_CDECL populate_lut( struct aos_machine *machine,
180 float *result,
181 const float *in,
182 unsigned count )
183 {
184 unsigned i, tab;
185
186 /* Search for an existing table for this value. Note that without
187 * static analysis we don't really know if in[3] will be constant,
188 * but it usually is...
189 */
190 for (tab = 0; tab < 4; tab++) {
191 if (machine->shine_tab[tab].exponent == in[3]) {
192 goto found;
193 }
194 }
195
196 for (tab = 0, i = 1; i < 4; i++) {
197 if (machine->shine_tab[i].last_used < machine->shine_tab[tab].last_used)
198 tab = i;
199 }
200
201 if (machine->shine_tab[tab].last_used == machine->now) {
202 /* No unused tables (this is not a ffvertex program...). Just
203 * call pow each time:
204 */
205 machine->lit_info[count].func = aos_do_lit;
206 machine->lit_info[count].func( machine, result, in, count );
207 return;
208 }
209 else {
210 do_populate_lut( &machine->shine_tab[tab], in[3] );
211 }
212
213 found:
214 machine->shine_tab[tab].last_used = machine->now;
215 machine->lit_info[count].shine_tab = &machine->shine_tab[tab];
216 machine->lit_info[count].func = do_lit_lut;
217 machine->lit_info[count].func( machine, result, in, count );
218 }
219
220
221 void draw_vs_aos_machine_constants( struct aos_machine *machine,
222 const float (*constants)[4] )
223 {
224 machine->constants = constants;
225
226 {
227 unsigned i;
228 for (i = 0; i < MAX_LIT_INFO; i++) {
229 machine->lit_info[i].func = populate_lut;
230 machine->now++;
231 }
232 }
233 }
234
235
236 void draw_vs_aos_machine_viewport( struct aos_machine *machine,
237 const struct pipe_viewport_state *viewport )
238 {
239 memcpy(machine->scale, viewport->scale, 4 * sizeof(float));
240 memcpy(machine->translate, viewport->translate, 4 * sizeof(float));
241 }
242
243
244
245 void draw_vs_aos_machine_destroy( struct aos_machine *machine )
246 {
247 align_free(machine);
248 }
249
250 struct aos_machine *draw_vs_aos_machine( void )
251 {
252 struct aos_machine *machine;
253 unsigned i;
254 float inv = 1.0f/255.0f;
255 float f255 = 255.0f;
256
257 machine = align_malloc(sizeof(struct aos_machine), 16);
258 if (!machine)
259 return NULL;
260
261 memset(machine, 0, sizeof(*machine));
262
263 ASSIGN_4V(machine->internal[IMM_SWZ], 1.0f, -1.0f, 0.0f, 1.0f);
264 *(unsigned *)&machine->internal[IMM_SWZ][3] = 0xffffffff;
265
266 ASSIGN_4V(machine->internal[IMM_ONES], 1.0f, 1.0f, 1.0f, 1.0f);
267 ASSIGN_4V(machine->internal[IMM_NEGS], -1.0f, -1.0f, -1.0f, -1.0f);
268 ASSIGN_4V(machine->internal[IMM_IDENTITY], 0.0f, 0.0f, 0.0f, 1.0f);
269 ASSIGN_4V(machine->internal[IMM_INV_255], inv, inv, inv, inv);
270 ASSIGN_4V(machine->internal[IMM_255], f255, f255, f255, f255);
271 ASSIGN_4V(machine->internal[IMM_RSQ], -.5f, 1.5f, 0.0f, 0.0f);
272
273
274 machine->fpu_rnd_nearest = (X87_CW_EXCEPTION_INV_OP |
275 X87_CW_EXCEPTION_DENORM_OP |
276 X87_CW_EXCEPTION_ZERO_DIVIDE |
277 X87_CW_EXCEPTION_OVERFLOW |
278 X87_CW_EXCEPTION_UNDERFLOW |
279 X87_CW_EXCEPTION_PRECISION |
280 (1<<6) |
281 X87_CW_ROUND_NEAREST |
282 X87_CW_PRECISION_DOUBLE_EXT);
283
284 assert(machine->fpu_rnd_nearest == 0x37f);
285
286 machine->fpu_rnd_neg_inf = (X87_CW_EXCEPTION_INV_OP |
287 X87_CW_EXCEPTION_DENORM_OP |
288 X87_CW_EXCEPTION_ZERO_DIVIDE |
289 X87_CW_EXCEPTION_OVERFLOW |
290 X87_CW_EXCEPTION_UNDERFLOW |
291 X87_CW_EXCEPTION_PRECISION |
292 (1<<6) |
293 X87_CW_ROUND_DOWN |
294 X87_CW_PRECISION_DOUBLE_EXT);
295
296 for (i = 0; i < MAX_SHINE_TAB; i++)
297 do_populate_lut( &machine->shine_tab[i], 1.0f );
298
299 return machine;
300 }
301
302 #else
303
304 void draw_vs_aos_machine_viewport( struct aos_machine *machine,
305 const struct pipe_viewport_state *viewport )
306 {
307 }
308
309 void draw_vs_aos_machine_constants( struct aos_machine *machine,
310 const float (*constants)[4] )
311 {
312 }
313
314 void draw_vs_aos_machine_destroy( struct aos_machine *machine )
315 {
316 }
317
318 struct aos_machine *draw_vs_aos_machine( void )
319 {
320 return NULL;
321 }
322 #endif
323