Merge remote branch 'upstream/gallium-0.1' into nouveau-gallium-0.1
[mesa.git] / src / gallium / auxiliary / draw / draw_vs_aos_machine.c
1 /**************************************************************************
2 *
3 * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas.
4 * All Rights Reserved.
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a
7 * copy of this software and associated documentation files (the
8 * "Software"), to deal in the Software without restriction, including
9 * without limitation the rights to use, copy, modify, merge, publish,
10 * distribute, sub license, and/or sell copies of the Software, and to
11 * permit persons to whom the Software is furnished to do so, subject to
12 * the following conditions:
13 *
14 * The above copyright notice and this permission notice (including the
15 * next paragraph) shall be included in all copies or substantial portions
16 * of the Software.
17 *
18 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
19 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
20 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
21 * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
22 * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
23 * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
24 * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
25 *
26 **************************************************************************/
27
28
29 #include "pipe/p_config.h"
30
31
32 #include "pipe/p_shader_tokens.h"
33 #include "util/u_math.h"
34 #include "util/u_memory.h"
35 #include "tgsi/tgsi_parse.h"
36 #include "tgsi/tgsi_util.h"
37 #include "tgsi/tgsi_exec.h"
38 #include "draw_vs.h"
39 #include "draw_vs_aos.h"
40 #include "draw_vertex.h"
41
42 #ifdef PIPE_ARCH_X86
43
44 #include "rtasm/rtasm_x86sse.h"
45
46
47 #define X87_CW_EXCEPTION_INV_OP (1<<0)
48 #define X87_CW_EXCEPTION_DENORM_OP (1<<1)
49 #define X87_CW_EXCEPTION_ZERO_DIVIDE (1<<2)
50 #define X87_CW_EXCEPTION_OVERFLOW (1<<3)
51 #define X87_CW_EXCEPTION_UNDERFLOW (1<<4)
52 #define X87_CW_EXCEPTION_PRECISION (1<<5)
53 #define X87_CW_PRECISION_SINGLE (0<<8)
54 #define X87_CW_PRECISION_RESERVED (1<<8)
55 #define X87_CW_PRECISION_DOUBLE (2<<8)
56 #define X87_CW_PRECISION_DOUBLE_EXT (3<<8)
57 #define X87_CW_PRECISION_MASK (3<<8)
58 #define X87_CW_ROUND_NEAREST (0<<10)
59 #define X87_CW_ROUND_DOWN (1<<10)
60 #define X87_CW_ROUND_UP (2<<10)
61 #define X87_CW_ROUND_ZERO (3<<10)
62 #define X87_CW_ROUND_MASK (3<<10)
63 #define X87_CW_INFINITY (1<<12)
64
65
66 void PIPE_CDECL aos_do_lit( struct aos_machine *machine,
67 float *result,
68 const float *in,
69 unsigned count )
70 {
71 if (in[0] > 0)
72 {
73 if (in[1] <= 0.0)
74 {
75 result[0] = 1.0F;
76 result[1] = in[0];
77 result[2] = 1.0;
78 result[3] = 1.0F;
79 }
80 else
81 {
82 const float epsilon = 1.0F / 256.0F;
83 float exponent = CLAMP(in[3], -(128.0F - epsilon), (128.0F - epsilon));
84 result[0] = 1.0F;
85 result[1] = in[0];
86 result[2] = powf(in[1], exponent);
87 result[3] = 1.0;
88 }
89 }
90 else
91 {
92 result[0] = 1.0F;
93 result[1] = 0.0;
94 result[2] = 0.0;
95 result[3] = 1.0F;
96 }
97 }
98
99
100 static void PIPE_CDECL do_lit_lut( struct aos_machine *machine,
101 float *result,
102 const float *in,
103 unsigned count )
104 {
105 if (in[0] > 0)
106 {
107 if (in[1] <= 0.0)
108 {
109 result[0] = 1.0F;
110 result[1] = in[0];
111 result[2] = 1.0;
112 result[3] = 1.0F;
113 return;
114 }
115
116 if (machine->lit_info[count].shine_tab->exponent != in[3]) {
117 machine->lit_info[count].func = aos_do_lit;
118 goto no_luck;
119 }
120
121 if (in[1] <= 1.0)
122 {
123 const float *tab = machine->lit_info[count].shine_tab->values;
124 float f = in[1] * 256;
125 int k = (int)f;
126 float frac = f - (float)k;
127
128 result[0] = 1.0F;
129 result[1] = in[0];
130 result[2] = tab[k] + frac*(tab[k+1]-tab[k]);
131 result[3] = 1.0;
132 return;
133 }
134
135 no_luck:
136 {
137 const float epsilon = 1.0F / 256.0F;
138 float exponent = CLAMP(in[3], -(128.0F - epsilon), (128.0F - epsilon));
139 result[0] = 1.0F;
140 result[1] = in[0];
141 result[2] = powf(in[1], exponent);
142 result[3] = 1.0;
143 }
144 }
145 else
146 {
147 result[0] = 1.0F;
148 result[1] = 0.0;
149 result[2] = 0.0;
150 result[3] = 1.0F;
151 }
152 }
153
154
155 static void do_populate_lut( struct shine_tab *tab,
156 float unclamped_exponent )
157 {
158 const float epsilon = 1.0F / 256.0F;
159 float exponent = CLAMP(unclamped_exponent, -(128.0F - epsilon), (128.0F - epsilon));
160 unsigned i;
161
162 tab->exponent = unclamped_exponent; /* for later comparison */
163
164 tab->values[0] = 0;
165 if (exponent == 0) {
166 for (i = 1; i < 258; i++) {
167 tab->values[i] = 1.0;
168 }
169 }
170 else {
171 for (i = 1; i < 258; i++) {
172 tab->values[i] = powf((float)i * epsilon, exponent);
173 }
174 }
175 }
176
177
178
179
180 static void PIPE_CDECL populate_lut( struct aos_machine *machine,
181 float *result,
182 const float *in,
183 unsigned count )
184 {
185 unsigned i, tab;
186
187 /* Search for an existing table for this value. Note that without
188 * static analysis we don't really know if in[3] will be constant,
189 * but it usually is...
190 */
191 for (tab = 0; tab < 4; tab++) {
192 if (machine->shine_tab[tab].exponent == in[3]) {
193 goto found;
194 }
195 }
196
197 for (tab = 0, i = 1; i < 4; i++) {
198 if (machine->shine_tab[i].last_used < machine->shine_tab[tab].last_used)
199 tab = i;
200 }
201
202 if (machine->shine_tab[tab].last_used == machine->now) {
203 /* No unused tables (this is not a ffvertex program...). Just
204 * call pow each time:
205 */
206 machine->lit_info[count].func = aos_do_lit;
207 machine->lit_info[count].func( machine, result, in, count );
208 return;
209 }
210 else {
211 do_populate_lut( &machine->shine_tab[tab], in[3] );
212 }
213
214 found:
215 machine->shine_tab[tab].last_used = machine->now;
216 machine->lit_info[count].shine_tab = &machine->shine_tab[tab];
217 machine->lit_info[count].func = do_lit_lut;
218 machine->lit_info[count].func( machine, result, in, count );
219 }
220
221
222 void draw_vs_aos_machine_constants( struct aos_machine *machine,
223 const float (*constants)[4] )
224 {
225 machine->constants = constants;
226
227 {
228 unsigned i;
229 for (i = 0; i < MAX_LIT_INFO; i++) {
230 machine->lit_info[i].func = populate_lut;
231 machine->now++;
232 }
233 }
234 }
235
236
237 void draw_vs_aos_machine_viewport( struct aos_machine *machine,
238 const struct pipe_viewport_state *viewport )
239 {
240 memcpy(machine->scale, viewport->scale, 4 * sizeof(float));
241 memcpy(machine->translate, viewport->translate, 4 * sizeof(float));
242 }
243
244
245
246 void draw_vs_aos_machine_destroy( struct aos_machine *machine )
247 {
248 align_free(machine);
249 }
250
251 struct aos_machine *draw_vs_aos_machine( void )
252 {
253 struct aos_machine *machine;
254 unsigned i;
255 float inv = 1.0f/255.0f;
256 float f255 = 255.0f;
257
258 machine = align_malloc(sizeof(struct aos_machine), 16);
259 if (!machine)
260 return NULL;
261
262 memset(machine, 0, sizeof(*machine));
263
264 ASSIGN_4V(machine->internal[IMM_SWZ], 1.0f, -1.0f, 0.0f, 1.0f);
265 *(unsigned *)&machine->internal[IMM_SWZ][3] = 0xffffffff;
266
267 ASSIGN_4V(machine->internal[IMM_ONES], 1.0f, 1.0f, 1.0f, 1.0f);
268 ASSIGN_4V(machine->internal[IMM_NEGS], -1.0f, -1.0f, -1.0f, -1.0f);
269 ASSIGN_4V(machine->internal[IMM_IDENTITY], 0.0f, 0.0f, 0.0f, 1.0f);
270 ASSIGN_4V(machine->internal[IMM_INV_255], inv, inv, inv, inv);
271 ASSIGN_4V(machine->internal[IMM_255], f255, f255, f255, f255);
272 ASSIGN_4V(machine->internal[IMM_RSQ], -.5f, 1.5f, 0.0f, 0.0f);
273
274
275 machine->fpu_rnd_nearest = (X87_CW_EXCEPTION_INV_OP |
276 X87_CW_EXCEPTION_DENORM_OP |
277 X87_CW_EXCEPTION_ZERO_DIVIDE |
278 X87_CW_EXCEPTION_OVERFLOW |
279 X87_CW_EXCEPTION_UNDERFLOW |
280 X87_CW_EXCEPTION_PRECISION |
281 (1<<6) |
282 X87_CW_ROUND_NEAREST |
283 X87_CW_PRECISION_DOUBLE_EXT);
284
285 assert(machine->fpu_rnd_nearest == 0x37f);
286
287 machine->fpu_rnd_neg_inf = (X87_CW_EXCEPTION_INV_OP |
288 X87_CW_EXCEPTION_DENORM_OP |
289 X87_CW_EXCEPTION_ZERO_DIVIDE |
290 X87_CW_EXCEPTION_OVERFLOW |
291 X87_CW_EXCEPTION_UNDERFLOW |
292 X87_CW_EXCEPTION_PRECISION |
293 (1<<6) |
294 X87_CW_ROUND_DOWN |
295 X87_CW_PRECISION_DOUBLE_EXT);
296
297 for (i = 0; i < MAX_SHINE_TAB; i++)
298 do_populate_lut( &machine->shine_tab[i], 1.0f );
299
300 return machine;
301 }
302
303 #else
304
305 void draw_vs_aos_machine_viewport( struct aos_machine *machine,
306 const struct pipe_viewport_state *viewport )
307 {
308 }
309
310 void draw_vs_aos_machine_constants( struct aos_machine *machine,
311 const float (*constants)[4] )
312 {
313 }
314
315 void draw_vs_aos_machine_destroy( struct aos_machine *machine )
316 {
317 }
318
319 struct aos_machine *draw_vs_aos_machine( void )
320 {
321 return NULL;
322 }
323 #endif
324