1 /**************************************************************************
3 * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas.
6 * Permission is hereby granted, free of charge, to any person obtaining a
7 * copy of this software and associated documentation files (the
8 * "Software"), to deal in the Software without restriction, including
9 * without limitation the rights to use, copy, modify, merge, publish,
10 * distribute, sub license, and/or sell copies of the Software, and to
11 * permit persons to whom the Software is furnished to do so, subject to
12 * the following conditions:
14 * The above copyright notice and this permission notice (including the
15 * next paragraph) shall be included in all copies or substantial portions
18 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
19 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
20 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
21 * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
22 * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
23 * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
24 * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
26 **************************************************************************/
29 #include "util/u_memory.h"
30 #include "pipe/p_shader_tokens.h"
31 #include "tgsi/tgsi_parse.h"
32 #include "tgsi/tgsi_util.h"
33 #include "tgsi/tgsi_exec.h"
35 #include "draw_vs_aos.h"
36 #include "draw_vertex.h"
38 #include "rtasm/rtasm_x86sse.h"
42 /* Note - don't yet have to worry about interacting with the code in
43 * draw_vs_aos.c as there is no intermingling of generated code...
44 * That may have to change, we'll see.
46 static void emit_load_R32G32B32A32( struct aos_compilation
*cp
,
48 struct x86_reg src_ptr
)
50 sse_movups(cp
->func
, data
, src_ptr
);
53 static void emit_load_R32G32B32( struct aos_compilation
*cp
,
55 struct x86_reg src_ptr
)
58 sse_movss(cp
->func
, data
, x86_make_disp(src_ptr
, 8));
60 sse_shufps(cp
->func
, data
, aos_get_internal_xmm( cp
, IMM_IDENTITY
), SHUF(X
,Y
,Z
,W
) );
62 sse_shufps(cp
->func
, data
, data
, SHUF(Y
,Z
,X
,W
) );
64 sse_movlps(cp
->func
, data
, src_ptr
);
67 sse_movups(cp
->func
, data
, src_ptr
);
69 sse2_pshufd(cp
->func
, data
, data
, SHUF(W
,X
,Y
,Z
) );
71 sse_movss(cp
->func
, data
, aos_get_internal_xmm( cp
, IMM_ONES
) );
73 sse2_pshufd(cp
->func
, data
, data
, SHUF(Y
,Z
,W
,X
) );
78 static void emit_load_R32G32( struct aos_compilation
*cp
,
80 struct x86_reg src_ptr
)
82 sse_movups(cp
->func
, data
, aos_get_internal_xmm( cp
, IMM_IDENTITY
) );
83 sse_movlps(cp
->func
, data
, src_ptr
);
87 static void emit_load_R32( struct aos_compilation
*cp
,
89 struct x86_reg src_ptr
)
91 sse_movss(cp
->func
, data
, src_ptr
);
92 sse_orps(cp
->func
, data
, aos_get_internal_xmm( cp
, IMM_IDENTITY
) );
96 static void emit_load_R8G8B8A8_UNORM( struct aos_compilation
*cp
,
98 struct x86_reg src_ptr
)
100 sse_movss(cp
->func
, data
, src_ptr
);
101 sse2_punpcklbw(cp
->func
, data
, aos_get_internal_xmm( cp
, IMM_IDENTITY
));
102 sse2_punpcklbw(cp
->func
, data
, aos_get_internal_xmm( cp
, IMM_IDENTITY
));
103 sse2_cvtdq2ps(cp
->func
, data
, data
);
104 sse_mulps(cp
->func
, data
, aos_get_internal(cp
, IMM_INV_255
));
109 /* Extended swizzles? Maybe later.
111 static void emit_swizzle( struct aos_compilation
*cp
,
116 sse_shufps(cp
->func
, dest
, src
, shuffle
);
121 static boolean
get_buffer_ptr( struct aos_compilation
*cp
,
127 struct x86_reg buf
= x86_make_disp(aos_get_x86( cp
, 0, X86_BUFFERS
),
128 buf_idx
* sizeof(struct aos_buffer
));
130 struct x86_reg buf_stride
= x86_make_disp(buf
,
131 Offset(struct aos_buffer
, stride
));
133 struct x86_reg buf_ptr
= x86_make_disp(buf
,
134 Offset(struct aos_buffer
, ptr
));
137 /* Calculate pointer to current attrib:
139 x86_mov(cp
->func
, ptr
, buf_ptr
);
140 x86_mov(cp
->func
, elt
, buf_stride
);
141 x86_add(cp
->func
, elt
, ptr
);
142 if (buf_idx
== 0) sse_prefetchnta(cp
->func
, x86_make_disp(elt
, 192));
143 x86_mov(cp
->func
, buf_ptr
, elt
);
146 struct x86_reg buf_base_ptr
= x86_make_disp(buf
,
147 Offset(struct aos_buffer
, base_ptr
));
150 /* Calculate pointer to current attrib:
152 x86_mov(cp
->func
, ptr
, buf_stride
);
153 x86_imul(cp
->func
, ptr
, elt
);
154 x86_add(cp
->func
, ptr
, buf_base_ptr
);
163 static boolean
load_input( struct aos_compilation
*cp
,
165 struct x86_reg bufptr
)
167 unsigned format
= cp
->vaos
->base
.key
.element
[idx
].in
.format
;
168 unsigned offset
= cp
->vaos
->base
.key
.element
[idx
].in
.offset
;
169 struct x86_reg dataXMM
= aos_get_xmm_reg(cp
);
171 /* Figure out source pointer address:
173 struct x86_reg src
= x86_make_disp(bufptr
, offset
);
175 aos_adopt_xmm_reg( cp
,
182 case PIPE_FORMAT_R32_FLOAT
:
183 emit_load_R32(cp
, dataXMM
, src
);
185 case PIPE_FORMAT_R32G32_FLOAT
:
186 emit_load_R32G32(cp
, dataXMM
, src
);
188 case PIPE_FORMAT_R32G32B32_FLOAT
:
189 emit_load_R32G32B32(cp
, dataXMM
, src
);
191 case PIPE_FORMAT_R32G32B32A32_FLOAT
:
192 emit_load_R32G32B32A32(cp
, dataXMM
, src
);
194 case PIPE_FORMAT_A8R8G8B8_UNORM
:
195 emit_load_R8G8B8A8_UNORM(cp
, dataXMM
, src
);
196 emit_swizzle(cp
, dataXMM
, dataXMM
, SHUF(Z
,Y
,X
,W
));
198 case PIPE_FORMAT_R8G8B8A8_UNORM
:
199 emit_load_R8G8B8A8_UNORM(cp
, dataXMM
, src
);
202 AOS_ERROR(cp
, "unhandled input format");
209 static boolean
load_inputs( struct aos_compilation
*cp
,
215 for (i
= 0; i
< cp
->vaos
->base
.key
.nr_inputs
; i
++) {
216 if (cp
->vaos
->base
.key
.element
[i
].in
.buffer
== buffer
) {
218 if (!load_input( cp
, i
, ptr
))
228 boolean
aos_init_inputs( struct aos_compilation
*cp
, boolean linear
)
231 for (i
= 0; i
< cp
->vaos
->nr_vb
; i
++) {
232 struct x86_reg buf
= x86_make_disp(aos_get_x86( cp
, 0, X86_BUFFERS
),
233 i
* sizeof(struct aos_buffer
));
235 struct x86_reg buf_base_ptr
= x86_make_disp(buf
,
236 Offset(struct aos_buffer
, base_ptr
));
238 if (cp
->vaos
->base
.key
.const_vbuffers
& (1<<i
)) {
239 struct x86_reg ptr
= cp
->tmp_EAX
;
241 x86_mov(cp
->func
, ptr
, buf_base_ptr
);
243 /* Load all inputs for this constant vertex buffer
245 load_inputs( cp
, i
, x86_deref(ptr
) );
247 /* Then just force them out to aos_machine.input[]
254 struct x86_reg elt
= cp
->idx_EBX
;
255 struct x86_reg ptr
= cp
->tmp_EAX
;
257 struct x86_reg buf_stride
= x86_make_disp(buf
,
258 Offset(struct aos_buffer
, stride
));
260 struct x86_reg buf_ptr
= x86_make_disp(buf
,
261 Offset(struct aos_buffer
, ptr
));
264 /* Calculate pointer to current attrib:
266 x86_mov(cp
->func
, ptr
, buf_stride
);
267 x86_imul(cp
->func
, ptr
, elt
);
268 x86_add(cp
->func
, ptr
, buf_base_ptr
);
271 /* In the linear case, keep the buffer pointer instead of the
274 if (cp
->vaos
->nr_vb
== 1)
275 x86_mov( cp
->func
, elt
, ptr
);
277 x86_mov( cp
->func
, buf_ptr
, ptr
);
286 boolean
aos_fetch_inputs( struct aos_compilation
*cp
, boolean linear
)
290 for (j
= 0; j
< cp
->vaos
->nr_vb
; j
++) {
291 if (cp
->vaos
->base
.key
.const_vbuffers
& (1<<j
)) {
292 /* just retreive pre-transformed input */
294 else if (linear
&& cp
->vaos
->nr_vb
== 1) {
295 load_inputs( cp
, 0, cp
->idx_EBX
);
298 struct x86_reg elt
= linear
? cp
->idx_EBX
: x86_deref(cp
->idx_EBX
);
299 struct x86_reg ptr
= cp
->tmp_EAX
;
301 if (!get_buffer_ptr( cp
, linear
, j
, elt
, ptr
))
304 if (!load_inputs( cp
, j
, ptr
))
312 boolean
aos_incr_inputs( struct aos_compilation
*cp
, boolean linear
)
314 if (linear
&& cp
->vaos
->nr_vb
== 1) {
315 struct x86_reg stride
= x86_make_disp(aos_get_x86( cp
, 0, X86_BUFFERS
),
316 (0 * sizeof(struct aos_buffer
) +
317 Offset(struct aos_buffer
, stride
)));
319 x86_add(cp
->func
, cp
->idx_EBX
, stride
);
320 sse_prefetchnta(cp
->func
, x86_make_disp(cp
->idx_EBX
, 192));
326 x86_lea(cp
->func
, cp
->idx_EBX
, x86_make_disp(cp
->idx_EBX
, 4));
337 static void emit_store_R32G32B32A32( struct aos_compilation
*cp
,
338 struct x86_reg dst_ptr
,
339 struct x86_reg dataXMM
)
341 sse_movups(cp
->func
, dst_ptr
, dataXMM
);
344 static void emit_store_R32G32B32( struct aos_compilation
*cp
,
345 struct x86_reg dst_ptr
,
346 struct x86_reg dataXMM
)
348 sse_movlps(cp
->func
, dst_ptr
, dataXMM
);
349 sse_shufps(cp
->func
, dataXMM
, dataXMM
, SHUF(Z
,Z
,Z
,Z
) ); /* NOTE! destructive */
350 sse_movss(cp
->func
, x86_make_disp(dst_ptr
,8), dataXMM
);
353 static void emit_store_R32G32( struct aos_compilation
*cp
,
354 struct x86_reg dst_ptr
,
355 struct x86_reg dataXMM
)
357 sse_movlps(cp
->func
, dst_ptr
, dataXMM
);
360 static void emit_store_R32( struct aos_compilation
*cp
,
361 struct x86_reg dst_ptr
,
362 struct x86_reg dataXMM
)
364 sse_movss(cp
->func
, dst_ptr
, dataXMM
);
369 static void emit_store_R8G8B8A8_UNORM( struct aos_compilation
*cp
,
370 struct x86_reg dst_ptr
,
371 struct x86_reg dataXMM
)
373 sse_mulps(cp
->func
, dataXMM
, aos_get_internal(cp
, IMM_255
));
374 sse2_cvtps2dq(cp
->func
, dataXMM
, dataXMM
);
375 sse2_packssdw(cp
->func
, dataXMM
, dataXMM
);
376 sse2_packuswb(cp
->func
, dataXMM
, dataXMM
);
377 sse_movss(cp
->func
, dst_ptr
, dataXMM
);
384 static boolean
emit_output( struct aos_compilation
*cp
,
386 struct x86_reg dataXMM
,
392 emit_store_R32(cp
, ptr
, dataXMM
);
395 emit_store_R32G32(cp
, ptr
, dataXMM
);
398 emit_store_R32G32B32(cp
, ptr
, dataXMM
);
401 emit_store_R32G32B32A32(cp
, ptr
, dataXMM
);
405 emit_swizzle(cp
, dataXMM
, dataXMM
, SHUF(Z
,Y
,X
,W
));
406 emit_store_R8G8B8A8_UNORM(cp
, ptr
, dataXMM
);
409 emit_store_R8G8B8A8_UNORM(cp
, ptr
, dataXMM
);
413 AOS_ERROR(cp
, "unhandled output format");
422 boolean
aos_emit_outputs( struct aos_compilation
*cp
)
426 for (i
= 0; i
< cp
->vaos
->base
.key
.nr_outputs
; i
++) {
427 unsigned format
= cp
->vaos
->base
.key
.element
[i
].out
.format
;
428 unsigned offset
= cp
->vaos
->base
.key
.element
[i
].out
.offset
;
429 unsigned vs_output
= cp
->vaos
->base
.key
.element
[i
].out
.vs_output
;
433 if (format
== EMIT_1F_PSIZE
) {
434 data
= aos_get_internal_xmm( cp
, IMM_PSIZE
);
437 data
= aos_get_shader_reg( cp
,
442 if (data
.file
!= file_XMM
) {
443 struct x86_reg tmp
= aos_get_xmm_reg( cp
);
444 sse_movaps(cp
->func
, tmp
, data
);
448 if (!emit_output( cp
,
449 x86_make_disp( cp
->outbuf_ECX
, offset
),
454 aos_release_xmm_reg( cp
, data
.idx
);