53e999b191e92d7a5004315a668752d95bd2c408
[mesa.git] / src / gallium / auxiliary / draw / draw_vs_aos_machine.c
1 /**************************************************************************
2 *
3 * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas.
4 * All Rights Reserved.
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a
7 * copy of this software and associated documentation files (the
8 * "Software"), to deal in the Software without restriction, including
9 * without limitation the rights to use, copy, modify, merge, publish,
10 * distribute, sub license, and/or sell copies of the Software, and to
11 * permit persons to whom the Software is furnished to do so, subject to
12 * the following conditions:
13 *
14 * The above copyright notice and this permission notice (including the
15 * next paragraph) shall be included in all copies or substantial portions
16 * of the Software.
17 *
18 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
19 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
20 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
21 * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
22 * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
23 * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
24 * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
25 *
26 **************************************************************************/
27
28
29 #include "pipe/p_util.h"
30 #include "pipe/p_shader_tokens.h"
31 #include "tgsi/util/tgsi_parse.h"
32 #include "tgsi/util/tgsi_util.h"
33 #include "tgsi/exec/tgsi_exec.h"
34 #include "draw_vs.h"
35 #include "draw_vs_aos.h"
36 #include "draw_vertex.h"
37
38 #include "rtasm/rtasm_x86sse.h"
39
40
41 #define X87_CW_EXCEPTION_INV_OP (1<<0)
42 #define X87_CW_EXCEPTION_DENORM_OP (1<<1)
43 #define X87_CW_EXCEPTION_ZERO_DIVIDE (1<<2)
44 #define X87_CW_EXCEPTION_OVERFLOW (1<<3)
45 #define X87_CW_EXCEPTION_UNDERFLOW (1<<4)
46 #define X87_CW_EXCEPTION_PRECISION (1<<5)
47 #define X87_CW_PRECISION_SINGLE (0<<8)
48 #define X87_CW_PRECISION_RESERVED (1<<8)
49 #define X87_CW_PRECISION_DOUBLE (2<<8)
50 #define X87_CW_PRECISION_DOUBLE_EXT (3<<8)
51 #define X87_CW_PRECISION_MASK (3<<8)
52 #define X87_CW_ROUND_NEAREST (0<<10)
53 #define X87_CW_ROUND_DOWN (1<<10)
54 #define X87_CW_ROUND_UP (2<<10)
55 #define X87_CW_ROUND_ZERO (3<<10)
56 #define X87_CW_ROUND_MASK (3<<10)
57 #define X87_CW_INFINITY (1<<12)
58
59
60 PIPE_CDECL void aos_do_lit( struct aos_machine *machine,
61 float *result,
62 const float *in,
63 unsigned count )
64 {
65 if (in[0] > 0)
66 {
67 if (in[1] <= 0.0)
68 {
69 result[0] = 1.0F;
70 result[1] = in[0];
71 result[2] = 1.0;
72 result[3] = 1.0F;
73 }
74 else
75 {
76 const float epsilon = 1.0F / 256.0F;
77 float exponent = CLAMP(in[3], -(128.0F - epsilon), (128.0F - epsilon));
78 result[0] = 1.0F;
79 result[1] = in[0];
80 result[2] = powf(in[1], exponent);
81 result[3] = 1.0;
82 }
83 }
84 else
85 {
86 result[0] = 1.0F;
87 result[1] = 0.0;
88 result[2] = 0.0;
89 result[3] = 1.0F;
90 }
91 }
92
93
94 static PIPE_CDECL void do_lit_lut( struct aos_machine *machine,
95 float *result,
96 const float *in,
97 unsigned count )
98 {
99 if (in[0] > 0)
100 {
101 if (in[1] <= 0.0)
102 {
103 result[0] = 1.0F;
104 result[1] = in[0];
105 result[2] = 1.0;
106 result[3] = 1.0F;
107 return;
108 }
109
110 if (machine->lit_info[count].shine_tab->exponent != in[3]) {
111 machine->lit_info[count].func = aos_do_lit;
112 goto no_luck;
113 }
114
115 if (in[1] <= 1.0)
116 {
117 const float *tab = machine->lit_info[count].shine_tab->values;
118 float f = in[1] * 256;
119 int k = (int)f;
120 float frac = f - (float)k;
121
122 result[0] = 1.0F;
123 result[1] = in[0];
124 result[2] = tab[k] + frac*(tab[k+1]-tab[k]);
125 result[3] = 1.0;
126 return;
127 }
128
129 no_luck:
130 {
131 const float epsilon = 1.0F / 256.0F;
132 float exponent = CLAMP(in[3], -(128.0F - epsilon), (128.0F - epsilon));
133 result[0] = 1.0F;
134 result[1] = in[0];
135 result[2] = powf(in[1], exponent);
136 result[3] = 1.0;
137 }
138 }
139 else
140 {
141 result[0] = 1.0F;
142 result[1] = 0.0;
143 result[2] = 0.0;
144 result[3] = 1.0F;
145 }
146 }
147
148
149 static void do_populate_lut( struct shine_tab *tab,
150 float unclamped_exponent )
151 {
152 const float epsilon = 1.0F / 256.0F;
153 float exponent = CLAMP(unclamped_exponent, -(128.0F - epsilon), (128.0F - epsilon));
154 unsigned i;
155
156 tab->exponent = unclamped_exponent; /* for later comparison */
157
158 tab->values[0] = 0;
159 if (exponent == 0) {
160 for (i = 1; i < 258; i++) {
161 tab->values[i] = 1.0;
162 }
163 }
164 else {
165 for (i = 1; i < 258; i++) {
166 tab->values[i] = powf((float)i * epsilon, exponent);
167 }
168 }
169 }
170
171
172
173
174 static void PIPE_CDECL populate_lut( struct aos_machine *machine,
175 float *result,
176 const float *in,
177 unsigned count )
178 {
179 unsigned i, tab;
180
181 /* Search for an existing table for this value. Note that without
182 * static analysis we don't really know if in[3] will be constant,
183 * but it usually is...
184 */
185 for (tab = 0; tab < 4; tab++) {
186 if (machine->shine_tab[tab].exponent == in[3]) {
187 goto found;
188 }
189 }
190
191 for (tab = 0, i = 1; i < 4; i++) {
192 if (machine->shine_tab[i].last_used < machine->shine_tab[tab].last_used)
193 tab = i;
194 }
195
196 if (machine->shine_tab[tab].last_used == machine->now) {
197 /* No unused tables (this is not a ffvertex program...). Just
198 * call pow each time:
199 */
200 machine->lit_info[count].func = aos_do_lit;
201 machine->lit_info[count].func( machine, result, in, count );
202 return;
203 }
204 else {
205 do_populate_lut( &machine->shine_tab[tab], in[3] );
206 }
207
208 found:
209 machine->shine_tab[tab].last_used = machine->now;
210 machine->lit_info[count].shine_tab = &machine->shine_tab[tab];
211 machine->lit_info[count].func = do_lit_lut;
212 machine->lit_info[count].func( machine, result, in, count );
213 }
214
215
216 void draw_vs_aos_machine_constants( struct aos_machine *machine,
217 const float (*constants)[4] )
218 {
219 machine->constants = constants;
220
221 {
222 unsigned i;
223 for (i = 0; i < MAX_LIT_INFO; i++) {
224 machine->lit_info[i].func = populate_lut;
225 machine->now++;
226 }
227 }
228 }
229
230
231 void draw_vs_aos_machine_viewport( struct aos_machine *machine,
232 const struct pipe_viewport_state *viewport )
233 {
234 memcpy(machine->scale, viewport->scale, 4 * sizeof(float));
235 memcpy(machine->translate, viewport->translate, 4 * sizeof(float));
236 }
237
238
239
240 void draw_vs_aos_machine_destroy( struct aos_machine *machine )
241 {
242 align_free(machine);
243 }
244
245 struct aos_machine *draw_vs_aos_machine( void )
246 {
247 struct aos_machine *machine;
248 unsigned i;
249 float inv = 1.0f/255.0f;
250 float f255 = 255.0f;
251
252 machine = align_malloc(sizeof(struct aos_machine), 16);
253 if (!machine)
254 return NULL;
255
256 memset(machine, 0, sizeof(*machine));
257
258 ASSIGN_4V(machine->internal[IMM_SWZ], 1.0f, -1.0f, 0.0f, 1.0f);
259 *(unsigned *)&machine->internal[IMM_SWZ][3] = 0xffffffff;
260
261 ASSIGN_4V(machine->internal[IMM_ONES], 1.0f, 1.0f, 1.0f, 1.0f);
262 ASSIGN_4V(machine->internal[IMM_NEGS], -1.0f, -1.0f, -1.0f, -1.0f);
263 ASSIGN_4V(machine->internal[IMM_IDENTITY], 0.0f, 0.0f, 0.0f, 1.0f);
264 ASSIGN_4V(machine->internal[IMM_INV_255], inv, inv, inv, inv);
265 ASSIGN_4V(machine->internal[IMM_255], f255, f255, f255, f255);
266 ASSIGN_4V(machine->internal[IMM_RSQ], -.5f, 1.5f, 0.0f, 0.0f);
267
268
269 machine->fpu_rnd_nearest = (X87_CW_EXCEPTION_INV_OP |
270 X87_CW_EXCEPTION_DENORM_OP |
271 X87_CW_EXCEPTION_ZERO_DIVIDE |
272 X87_CW_EXCEPTION_OVERFLOW |
273 X87_CW_EXCEPTION_UNDERFLOW |
274 X87_CW_EXCEPTION_PRECISION |
275 (1<<6) |
276 X87_CW_ROUND_NEAREST |
277 X87_CW_PRECISION_DOUBLE_EXT);
278
279 assert(machine->fpu_rnd_nearest == 0x37f);
280
281 machine->fpu_rnd_neg_inf = (X87_CW_EXCEPTION_INV_OP |
282 X87_CW_EXCEPTION_DENORM_OP |
283 X87_CW_EXCEPTION_ZERO_DIVIDE |
284 X87_CW_EXCEPTION_OVERFLOW |
285 X87_CW_EXCEPTION_UNDERFLOW |
286 X87_CW_EXCEPTION_PRECISION |
287 (1<<6) |
288 X87_CW_ROUND_DOWN |
289 X87_CW_PRECISION_DOUBLE_EXT);
290
291 for (i = 0; i < MAX_SHINE_TAB; i++)
292 do_populate_lut( &machine->shine_tab[i], 1.0f );
293
294 return machine;
295 }
296
297