1 /**************************************************************************
3 * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas.
6 * Permission is hereby granted, free of charge, to any person obtaining a
7 * copy of this software and associated documentation files (the
8 * "Software"), to deal in the Software without restriction, including
9 * without limitation the rights to use, copy, modify, merge, publish,
10 * distribute, sub license, and/or sell copies of the Software, and to
11 * permit persons to whom the Software is furnished to do so, subject to
12 * the following conditions:
14 * The above copyright notice and this permission notice (including the
15 * next paragraph) shall be included in all copies or substantial portions
18 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
19 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
20 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
21 * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
22 * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
23 * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
24 * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
26 **************************************************************************/
29 #include "pipe/p_util.h"
30 #include "pipe/p_shader_tokens.h"
31 #include "tgsi/util/tgsi_parse.h"
32 #include "tgsi/util/tgsi_util.h"
33 #include "tgsi/exec/tgsi_exec.h"
35 #include "draw_vs_aos.h"
37 #include "rtasm/rtasm_x86sse.h"
41 /* Note - don't yet have to worry about interacting with the code in
42 * draw_vs_aos.c as there is no intermingling of generated code...
43 * That may have to change, we'll see.
45 static void emit_load_R32G32B32A32( struct aos_compilation
*cp
,
47 struct x86_reg src_ptr
)
49 sse_movups(cp
->func
, data
, src_ptr
);
52 static void emit_load_R32G32B32( struct aos_compilation
*cp
,
54 struct x86_reg src_ptr
)
56 sse_movss(cp
->func
, data
, x86_make_disp(src_ptr
, 8));
57 sse_shufps(cp
->func
, data
, aos_get_internal_xmm( cp
, IMM_IDENTITY
), SHUF(X
,Y
,Z
,W
) );
58 sse_shufps(cp
->func
, data
, data
, SHUF(Y
,Z
,X
,W
) );
59 sse_movlps(cp
->func
, data
, src_ptr
);
62 static void emit_load_R32G32( struct aos_compilation
*cp
,
64 struct x86_reg src_ptr
)
66 sse_movups(cp
->func
, data
, aos_get_internal_xmm( cp
, IMM_IDENTITY
) );
67 sse_movlps(cp
->func
, data
, src_ptr
);
71 static void emit_load_R32( struct aos_compilation
*cp
,
73 struct x86_reg src_ptr
)
75 sse_movss(cp
->func
, data
, src_ptr
);
76 sse_orps(cp
->func
, data
, aos_get_internal_xmm( cp
, IMM_IDENTITY
) );
80 static void emit_load_R8G8B8A8_UNORM( struct aos_compilation
*cp
,
82 struct x86_reg src_ptr
)
84 sse_movss(cp
->func
, data
, src_ptr
);
85 sse2_punpcklbw(cp
->func
, data
, aos_get_internal_xmm( cp
, IMM_IDENTITY
));
86 sse2_punpcklbw(cp
->func
, data
, aos_get_internal_xmm( cp
, IMM_IDENTITY
));
87 sse2_cvtdq2ps(cp
->func
, data
, data
);
88 sse_mulps(cp
->func
, data
, aos_get_internal(cp
, IMM_INV_255
));
93 static void get_src_ptr( struct x86_function
*func
,
95 struct x86_reg machine
,
99 struct x86_reg input_ptr
=
100 x86_make_disp(machine
,
101 Offset(struct aos_machine
, attrib
[a
].input_ptr
));
103 struct x86_reg input_stride
=
104 x86_make_disp(machine
,
105 Offset(struct aos_machine
, attrib
[a
].input_stride
));
107 /* Calculate pointer to current attrib:
109 x86_mov(func
, src
, input_stride
);
110 x86_imul(func
, src
, elt
);
111 x86_add(func
, src
, input_ptr
);
115 /* Extended swizzles? Maybe later.
117 static void emit_swizzle( struct aos_compilation
*cp
,
122 sse_shufps(cp
->func
, dest
, src
, shuffle
);
126 static boolean
load_input( struct aos_compilation
*cp
,
130 unsigned format
= cp
->vaos
->base
.key
.element
[idx
].in
.format
;
131 struct x86_reg src
= cp
->tmp_EAX
;
132 struct x86_reg dataXMM
= aos_get_xmm_reg(cp
);
134 /* Figure out source pointer address:
136 get_src_ptr(cp
->func
,
139 linear
? cp
->idx_EBX
: x86_deref(cp
->idx_EBX
),
142 src
= x86_deref(src
);
144 aos_adopt_xmm_reg( cp
,
151 case PIPE_FORMAT_R32_FLOAT
:
152 emit_load_R32(cp
, dataXMM
, src
);
154 case PIPE_FORMAT_R32G32_FLOAT
:
155 emit_load_R32G32(cp
, dataXMM
, src
);
157 case PIPE_FORMAT_R32G32B32_FLOAT
:
158 emit_load_R32G32B32(cp
, dataXMM
, src
);
160 case PIPE_FORMAT_R32G32B32A32_FLOAT
:
161 emit_load_R32G32B32A32(cp
, dataXMM
, src
);
163 case PIPE_FORMAT_B8G8R8A8_UNORM
:
164 emit_load_R8G8B8A8_UNORM(cp
, dataXMM
, src
);
165 emit_swizzle(cp
, dataXMM
, dataXMM
, SHUF(Z
,Y
,X
,W
));
167 case PIPE_FORMAT_R8G8B8A8_UNORM
:
168 emit_load_R8G8B8A8_UNORM(cp
, dataXMM
, src
);
171 ERROR(cp
, "unhandled input format");
179 boolean
aos_fetch_inputs( struct aos_compilation
*cp
, boolean linear
)
183 for (i
= 0; i
< cp
->vaos
->base
.key
.nr_inputs
; i
++) {
184 if (!load_input( cp
, i
, linear
))
199 static void emit_store_R32G32B32A32( struct aos_compilation
*cp
,
200 struct x86_reg dst_ptr
,
201 struct x86_reg dataXMM
)
203 sse_movups(cp
->func
, dst_ptr
, dataXMM
);
206 static void emit_store_R32G32B32( struct aos_compilation
*cp
,
207 struct x86_reg dst_ptr
,
208 struct x86_reg dataXMM
)
210 sse_movlps(cp
->func
, dst_ptr
, dataXMM
);
211 sse_shufps(cp
->func
, dataXMM
, dataXMM
, SHUF(Z
,Z
,Z
,Z
) ); /* NOTE! destructive */
212 sse_movss(cp
->func
, x86_make_disp(dst_ptr
,8), dataXMM
);
215 static void emit_store_R32G32( struct aos_compilation
*cp
,
216 struct x86_reg dst_ptr
,
217 struct x86_reg dataXMM
)
219 sse_movlps(cp
->func
, dst_ptr
, dataXMM
);
222 static void emit_store_R32( struct aos_compilation
*cp
,
223 struct x86_reg dst_ptr
,
224 struct x86_reg dataXMM
)
226 sse_movss(cp
->func
, dst_ptr
, dataXMM
);
231 static void emit_store_R8G8B8A8_UNORM( struct aos_compilation
*cp
,
232 struct x86_reg dst_ptr
,
233 struct x86_reg dataXMM
)
235 sse_mulps(cp
->func
, dataXMM
, aos_get_internal(cp
, IMM_255
));
236 sse2_cvtps2dq(cp
->func
, dataXMM
, dataXMM
);
237 sse2_packssdw(cp
->func
, dataXMM
, dataXMM
);
238 sse2_packuswb(cp
->func
, dataXMM
, dataXMM
);
239 sse_movss(cp
->func
, dst_ptr
, dataXMM
);
246 static boolean
emit_output( struct aos_compilation
*cp
,
248 struct x86_reg dataXMM
,
252 case PIPE_FORMAT_R32_FLOAT
:
253 emit_store_R32(cp
, ptr
, dataXMM
);
255 case PIPE_FORMAT_R32G32_FLOAT
:
256 emit_store_R32G32(cp
, ptr
, dataXMM
);
258 case PIPE_FORMAT_R32G32B32_FLOAT
:
259 emit_store_R32G32B32(cp
, ptr
, dataXMM
);
261 case PIPE_FORMAT_R32G32B32A32_FLOAT
:
262 emit_store_R32G32B32A32(cp
, ptr
, dataXMM
);
264 case PIPE_FORMAT_B8G8R8A8_UNORM
:
265 emit_swizzle(cp
, dataXMM
, dataXMM
, SHUF(Z
,Y
,X
,W
));
266 emit_store_R8G8B8A8_UNORM(cp
, ptr
, dataXMM
);
268 case PIPE_FORMAT_R8G8B8A8_UNORM
:
269 emit_store_R8G8B8A8_UNORM(cp
, ptr
, dataXMM
);
272 ERROR(cp
, "unhandled output format");
281 boolean
aos_emit_outputs( struct aos_compilation
*cp
)
285 for (i
= 0; i
< cp
->vaos
->base
.key
.nr_outputs
; i
++) {
286 unsigned format
= cp
->vaos
->base
.key
.element
[i
].out
.format
;
287 unsigned offset
= cp
->vaos
->base
.key
.element
[i
].out
.offset
;
288 unsigned vs_output
= cp
->vaos
->base
.key
.element
[i
].out
.vs_output
;
290 struct x86_reg data
= aos_get_shader_reg( cp
,
294 if (data
.file
!= file_XMM
) {
295 struct x86_reg tmp
= aos_get_xmm_reg( cp
);
296 sse_movups(cp
->func
, tmp
, data
);
300 if (!emit_output( cp
,
301 x86_make_disp( cp
->outbuf_ECX
, offset
),
306 aos_release_xmm_reg( cp
, data
.idx
);