1 /**************************************************************************
3 * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas.
6 * Permission is hereby granted, free of charge, to any person obtaining a
7 * copy of this software and associated documentation files (the
8 * "Software"), to deal in the Software without restriction, including
9 * without limitation the rights to use, copy, modify, merge, publish,
10 * distribute, sub license, and/or sell copies of the Software, and to
11 * permit persons to whom the Software is furnished to do so, subject to
12 * the following conditions:
14 * The above copyright notice and this permission notice (including the
15 * next paragraph) shall be included in all copies or substantial portions
18 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
19 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
20 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
21 * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
22 * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
23 * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
24 * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
26 **************************************************************************/
29 #include "pipe/p_config.h"
32 #include "pipe/p_shader_tokens.h"
33 #include "util/u_math.h"
34 #include "util/u_memory.h"
35 #include "tgsi/tgsi_parse.h"
36 #include "tgsi/tgsi_util.h"
37 #include "tgsi/tgsi_exec.h"
39 #include "draw_vs_aos.h"
40 #include "draw_vertex.h"
44 #include "rtasm/rtasm_x86sse.h"
47 #define X87_CW_EXCEPTION_INV_OP (1<<0)
48 #define X87_CW_EXCEPTION_DENORM_OP (1<<1)
49 #define X87_CW_EXCEPTION_ZERO_DIVIDE (1<<2)
50 #define X87_CW_EXCEPTION_OVERFLOW (1<<3)
51 #define X87_CW_EXCEPTION_UNDERFLOW (1<<4)
52 #define X87_CW_EXCEPTION_PRECISION (1<<5)
53 #define X87_CW_PRECISION_SINGLE (0<<8)
54 #define X87_CW_PRECISION_RESERVED (1<<8)
55 #define X87_CW_PRECISION_DOUBLE (2<<8)
56 #define X87_CW_PRECISION_DOUBLE_EXT (3<<8)
57 #define X87_CW_PRECISION_MASK (3<<8)
58 #define X87_CW_ROUND_NEAREST (0<<10)
59 #define X87_CW_ROUND_DOWN (1<<10)
60 #define X87_CW_ROUND_UP (2<<10)
61 #define X87_CW_ROUND_ZERO (3<<10)
62 #define X87_CW_ROUND_MASK (3<<10)
63 #define X87_CW_INFINITY (1<<12)
66 void PIPE_CDECL
aos_do_lit( struct aos_machine
*machine
,
82 const float epsilon
= 1.0F
/ 256.0F
;
83 float exponent
= CLAMP(in
[3], -(128.0F
- epsilon
), (128.0F
- epsilon
));
86 result
[2] = powf(in
[1], exponent
);
100 static void PIPE_CDECL
do_lit_lut( struct aos_machine
*machine
,
116 if (machine
->lit_info
[count
].shine_tab
->exponent
!= in
[3]) {
117 machine
->lit_info
[count
].func
= aos_do_lit
;
123 const float *tab
= machine
->lit_info
[count
].shine_tab
->values
;
124 float f
= in
[1] * 256;
126 float frac
= f
- (float)k
;
130 result
[2] = tab
[k
] + frac
*(tab
[k
+1]-tab
[k
]);
137 const float epsilon
= 1.0F
/ 256.0F
;
138 float exponent
= CLAMP(in
[3], -(128.0F
- epsilon
), (128.0F
- epsilon
));
141 result
[2] = powf(in
[1], exponent
);
155 static void do_populate_lut( struct shine_tab
*tab
,
156 float unclamped_exponent
)
158 const float epsilon
= 1.0F
/ 256.0F
;
159 float exponent
= CLAMP(unclamped_exponent
, -(128.0F
- epsilon
), (128.0F
- epsilon
));
162 tab
->exponent
= unclamped_exponent
; /* for later comparison */
166 for (i
= 1; i
< 258; i
++) {
167 tab
->values
[i
] = 1.0;
171 for (i
= 1; i
< 258; i
++) {
172 tab
->values
[i
] = powf((float)i
* epsilon
, exponent
);
180 static void PIPE_CDECL
populate_lut( struct aos_machine
*machine
,
187 /* Search for an existing table for this value. Note that without
188 * static analysis we don't really know if in[3] will be constant,
189 * but it usually is...
191 for (tab
= 0; tab
< 4; tab
++) {
192 if (machine
->shine_tab
[tab
].exponent
== in
[3]) {
197 for (tab
= 0, i
= 1; i
< 4; i
++) {
198 if (machine
->shine_tab
[i
].last_used
< machine
->shine_tab
[tab
].last_used
)
202 if (machine
->shine_tab
[tab
].last_used
== machine
->now
) {
203 /* No unused tables (this is not a ffvertex program...). Just
204 * call pow each time:
206 machine
->lit_info
[count
].func
= aos_do_lit
;
207 machine
->lit_info
[count
].func( machine
, result
, in
, count
);
211 do_populate_lut( &machine
->shine_tab
[tab
], in
[3] );
215 machine
->shine_tab
[tab
].last_used
= machine
->now
;
216 machine
->lit_info
[count
].shine_tab
= &machine
->shine_tab
[tab
];
217 machine
->lit_info
[count
].func
= do_lit_lut
;
218 machine
->lit_info
[count
].func( machine
, result
, in
, count
);
222 void draw_vs_aos_machine_constants( struct aos_machine
*machine
,
223 const float (*constants
)[4] )
225 machine
->constants
= constants
;
229 for (i
= 0; i
< MAX_LIT_INFO
; i
++) {
230 machine
->lit_info
[i
].func
= populate_lut
;
237 void draw_vs_aos_machine_viewport( struct aos_machine
*machine
,
238 const struct pipe_viewport_state
*viewport
)
240 memcpy(machine
->scale
, viewport
->scale
, 4 * sizeof(float));
241 memcpy(machine
->translate
, viewport
->translate
, 4 * sizeof(float));
246 void draw_vs_aos_machine_destroy( struct aos_machine
*machine
)
251 struct aos_machine
*draw_vs_aos_machine( void )
253 struct aos_machine
*machine
;
255 float inv
= 1.0f
/255.0f
;
258 machine
= align_malloc(sizeof(struct aos_machine
), 16);
262 memset(machine
, 0, sizeof(*machine
));
264 ASSIGN_4V(machine
->internal
[IMM_SWZ
], 1.0f
, -1.0f
, 0.0f
, 1.0f
);
265 *(unsigned *)&machine
->internal
[IMM_SWZ
][3] = 0xffffffff;
267 ASSIGN_4V(machine
->internal
[IMM_ONES
], 1.0f
, 1.0f
, 1.0f
, 1.0f
);
268 ASSIGN_4V(machine
->internal
[IMM_NEGS
], -1.0f
, -1.0f
, -1.0f
, -1.0f
);
269 ASSIGN_4V(machine
->internal
[IMM_IDENTITY
], 0.0f
, 0.0f
, 0.0f
, 1.0f
);
270 ASSIGN_4V(machine
->internal
[IMM_INV_255
], inv
, inv
, inv
, inv
);
271 ASSIGN_4V(machine
->internal
[IMM_255
], f255
, f255
, f255
, f255
);
272 ASSIGN_4V(machine
->internal
[IMM_RSQ
], -.5f
, 1.5f
, 0.0f
, 0.0f
);
275 machine
->fpu_rnd_nearest
= (X87_CW_EXCEPTION_INV_OP
|
276 X87_CW_EXCEPTION_DENORM_OP
|
277 X87_CW_EXCEPTION_ZERO_DIVIDE
|
278 X87_CW_EXCEPTION_OVERFLOW
|
279 X87_CW_EXCEPTION_UNDERFLOW
|
280 X87_CW_EXCEPTION_PRECISION
|
282 X87_CW_ROUND_NEAREST
|
283 X87_CW_PRECISION_DOUBLE_EXT
);
285 assert(machine
->fpu_rnd_nearest
== 0x37f);
287 machine
->fpu_rnd_neg_inf
= (X87_CW_EXCEPTION_INV_OP
|
288 X87_CW_EXCEPTION_DENORM_OP
|
289 X87_CW_EXCEPTION_ZERO_DIVIDE
|
290 X87_CW_EXCEPTION_OVERFLOW
|
291 X87_CW_EXCEPTION_UNDERFLOW
|
292 X87_CW_EXCEPTION_PRECISION
|
295 X87_CW_PRECISION_DOUBLE_EXT
);
297 for (i
= 0; i
< MAX_SHINE_TAB
; i
++)
298 do_populate_lut( &machine
->shine_tab
[i
], 1.0f
);
305 void draw_vs_aos_machine_viewport( struct aos_machine
*machine
,
306 const struct pipe_viewport_state
*viewport
)
310 void draw_vs_aos_machine_constants( struct aos_machine
*machine
,
311 const float (*constants
)[4] )
315 void draw_vs_aos_machine_destroy( struct aos_machine
*machine
)
319 struct aos_machine
*draw_vs_aos_machine( void )