Merge commit 'origin/master' into gallium-0.2
[mesa.git] / src / gallium / auxiliary / draw / draw_vs_aos_io.c
1 /**************************************************************************
2 *
3 * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas.
4 * All Rights Reserved.
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a
7 * copy of this software and associated documentation files (the
8 * "Software"), to deal in the Software without restriction, including
9 * without limitation the rights to use, copy, modify, merge, publish,
10 * distribute, sub license, and/or sell copies of the Software, and to
11 * permit persons to whom the Software is furnished to do so, subject to
12 * the following conditions:
13 *
14 * The above copyright notice and this permission notice (including the
15 * next paragraph) shall be included in all copies or substantial portions
16 * of the Software.
17 *
18 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
19 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
20 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
21 * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
22 * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
23 * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
24 * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
25 *
26 **************************************************************************/
27
28
29 #include "util/u_memory.h"
30 #include "pipe/p_shader_tokens.h"
31 #include "tgsi/tgsi_parse.h"
32 #include "tgsi/tgsi_util.h"
33 #include "tgsi/tgsi_exec.h"
34 #include "draw_vs.h"
35 #include "draw_vs_aos.h"
36 #include "draw_vertex.h"
37
38 #include "rtasm/rtasm_x86sse.h"
39
40 #ifdef PIPE_ARCH_X86
41
42 /* Note - don't yet have to worry about interacting with the code in
43 * draw_vs_aos.c as there is no intermingling of generated code...
44 * That may have to change, we'll see.
45 */
46 static void emit_load_R32G32B32A32( struct aos_compilation *cp,
47 struct x86_reg data,
48 struct x86_reg src_ptr )
49 {
50 sse_movups(cp->func, data, src_ptr);
51 }
52
53 static void emit_load_R32G32B32( struct aos_compilation *cp,
54 struct x86_reg data,
55 struct x86_reg src_ptr )
56 {
57 sse_movss(cp->func, data, x86_make_disp(src_ptr, 8));
58 /* data = z ? ? ? */
59 sse_shufps(cp->func, data, aos_get_internal_xmm( cp, IMM_IDENTITY ), SHUF(X,Y,Z,W) );
60 /* data = z ? 0 1 */
61 sse_shufps(cp->func, data, data, SHUF(Y,Z,X,W) );
62 /* data = ? 0 z 1 */
63 sse_movlps(cp->func, data, src_ptr);
64 /* data = x y z 1 */
65 }
66
67 static void emit_load_R32G32( struct aos_compilation *cp,
68 struct x86_reg data,
69 struct x86_reg src_ptr )
70 {
71 sse_movups(cp->func, data, aos_get_internal_xmm( cp, IMM_IDENTITY ) );
72 sse_movlps(cp->func, data, src_ptr);
73 }
74
75
76 static void emit_load_R32( struct aos_compilation *cp,
77 struct x86_reg data,
78 struct x86_reg src_ptr )
79 {
80 sse_movss(cp->func, data, src_ptr);
81 sse_orps(cp->func, data, aos_get_internal_xmm( cp, IMM_IDENTITY ) );
82 }
83
84
85 static void emit_load_R8G8B8A8_UNORM( struct aos_compilation *cp,
86 struct x86_reg data,
87 struct x86_reg src_ptr )
88 {
89 sse_movss(cp->func, data, src_ptr);
90 sse2_punpcklbw(cp->func, data, aos_get_internal_xmm( cp, IMM_IDENTITY ));
91 sse2_punpcklbw(cp->func, data, aos_get_internal_xmm( cp, IMM_IDENTITY ));
92 sse2_cvtdq2ps(cp->func, data, data);
93 sse_mulps(cp->func, data, aos_get_internal(cp, IMM_INV_255));
94 }
95
96
97
98 static void get_src_ptr( struct aos_compilation *cp,
99 struct x86_reg src,
100 struct x86_reg elt,
101 unsigned a )
102 {
103 struct x86_reg attrib = x86_make_disp(aos_get_x86( cp, 0, X86_ATTRIBS ),
104 a * sizeof(struct aos_attrib));
105
106 struct x86_reg input_ptr = x86_make_disp(attrib,
107 Offset(struct aos_attrib, input_ptr));
108
109 struct x86_reg input_stride = x86_make_disp(attrib,
110 Offset(struct aos_attrib, input_stride));
111
112 /* Calculate pointer to current attrib:
113 */
114 x86_mov(cp->func, src, input_stride);
115 x86_imul(cp->func, src, elt);
116 x86_add(cp->func, src, input_ptr);
117 }
118
119
120 /* Extended swizzles? Maybe later.
121 */
122 static void emit_swizzle( struct aos_compilation *cp,
123 struct x86_reg dest,
124 struct x86_reg src,
125 ubyte shuffle )
126 {
127 sse_shufps(cp->func, dest, src, shuffle);
128 }
129
130
131 static boolean load_input( struct aos_compilation *cp,
132 unsigned idx,
133 boolean linear )
134 {
135 unsigned format = cp->vaos->base.key.element[idx].in.format;
136 struct x86_reg src = cp->tmp_EAX;
137 struct x86_reg dataXMM = aos_get_xmm_reg(cp);
138
139 /* Figure out source pointer address:
140 */
141 get_src_ptr(cp,
142 src,
143 linear ? cp->idx_EBX : x86_deref(cp->idx_EBX),
144 idx);
145
146 src = x86_deref(src);
147
148 aos_adopt_xmm_reg( cp,
149 dataXMM,
150 TGSI_FILE_INPUT,
151 idx,
152 TRUE );
153
154 switch (format) {
155 case PIPE_FORMAT_R32_FLOAT:
156 emit_load_R32(cp, dataXMM, src);
157 break;
158 case PIPE_FORMAT_R32G32_FLOAT:
159 emit_load_R32G32(cp, dataXMM, src);
160 break;
161 case PIPE_FORMAT_R32G32B32_FLOAT:
162 emit_load_R32G32B32(cp, dataXMM, src);
163 break;
164 case PIPE_FORMAT_R32G32B32A32_FLOAT:
165 emit_load_R32G32B32A32(cp, dataXMM, src);
166 break;
167 case PIPE_FORMAT_B8G8R8A8_UNORM:
168 emit_load_R8G8B8A8_UNORM(cp, dataXMM, src);
169 emit_swizzle(cp, dataXMM, dataXMM, SHUF(Z,Y,X,W));
170 break;
171 case PIPE_FORMAT_R8G8B8A8_UNORM:
172 emit_load_R8G8B8A8_UNORM(cp, dataXMM, src);
173 break;
174 default:
175 ERROR(cp, "unhandled input format");
176 return FALSE;
177 }
178
179 return TRUE;
180 }
181
182
183 boolean aos_fetch_inputs( struct aos_compilation *cp, boolean linear )
184 {
185 unsigned i;
186
187 for (i = 0; i < cp->vaos->base.key.nr_inputs; i++) {
188 if (!load_input( cp, i, linear ))
189 return FALSE;
190 cp->insn_counter++;
191 }
192
193 return TRUE;
194 }
195
196
197
198
199
200
201
202 static void emit_store_R32G32B32A32( struct aos_compilation *cp,
203 struct x86_reg dst_ptr,
204 struct x86_reg dataXMM )
205 {
206 sse_movups(cp->func, dst_ptr, dataXMM);
207 }
208
209 static void emit_store_R32G32B32( struct aos_compilation *cp,
210 struct x86_reg dst_ptr,
211 struct x86_reg dataXMM )
212 {
213 sse_movlps(cp->func, dst_ptr, dataXMM);
214 sse_shufps(cp->func, dataXMM, dataXMM, SHUF(Z,Z,Z,Z) ); /* NOTE! destructive */
215 sse_movss(cp->func, x86_make_disp(dst_ptr,8), dataXMM);
216 }
217
218 static void emit_store_R32G32( struct aos_compilation *cp,
219 struct x86_reg dst_ptr,
220 struct x86_reg dataXMM )
221 {
222 sse_movlps(cp->func, dst_ptr, dataXMM);
223 }
224
225 static void emit_store_R32( struct aos_compilation *cp,
226 struct x86_reg dst_ptr,
227 struct x86_reg dataXMM )
228 {
229 sse_movss(cp->func, dst_ptr, dataXMM);
230 }
231
232
233
234 static void emit_store_R8G8B8A8_UNORM( struct aos_compilation *cp,
235 struct x86_reg dst_ptr,
236 struct x86_reg dataXMM )
237 {
238 sse_mulps(cp->func, dataXMM, aos_get_internal(cp, IMM_255));
239 sse2_cvtps2dq(cp->func, dataXMM, dataXMM);
240 sse2_packssdw(cp->func, dataXMM, dataXMM);
241 sse2_packuswb(cp->func, dataXMM, dataXMM);
242 sse_movss(cp->func, dst_ptr, dataXMM);
243 }
244
245
246
247
248
249 static boolean emit_output( struct aos_compilation *cp,
250 struct x86_reg ptr,
251 struct x86_reg dataXMM,
252 unsigned format )
253 {
254 switch (format) {
255 case EMIT_1F:
256 case EMIT_1F_PSIZE:
257 emit_store_R32(cp, ptr, dataXMM);
258 break;
259 case EMIT_2F:
260 emit_store_R32G32(cp, ptr, dataXMM);
261 break;
262 case EMIT_3F:
263 emit_store_R32G32B32(cp, ptr, dataXMM);
264 break;
265 case EMIT_4F:
266 emit_store_R32G32B32A32(cp, ptr, dataXMM);
267 break;
268 case EMIT_4UB:
269 if (1) {
270 emit_swizzle(cp, dataXMM, dataXMM, SHUF(Z,Y,X,W));
271 emit_store_R8G8B8A8_UNORM(cp, ptr, dataXMM);
272 }
273 else {
274 emit_store_R8G8B8A8_UNORM(cp, ptr, dataXMM);
275 }
276 break;
277 default:
278 ERROR(cp, "unhandled output format");
279 return FALSE;
280 }
281
282 return TRUE;
283 }
284
285
286
287 boolean aos_emit_outputs( struct aos_compilation *cp )
288 {
289 unsigned i;
290
291 for (i = 0; i < cp->vaos->base.key.nr_outputs; i++) {
292 unsigned format = cp->vaos->base.key.element[i].out.format;
293 unsigned offset = cp->vaos->base.key.element[i].out.offset;
294 unsigned vs_output = cp->vaos->base.key.element[i].out.vs_output;
295
296 struct x86_reg data;
297
298 if (format == EMIT_1F_PSIZE) {
299 data = aos_get_internal_xmm( cp, IMM_PSIZE );
300 }
301 else {
302 data = aos_get_shader_reg( cp,
303 TGSI_FILE_OUTPUT,
304 vs_output );
305 }
306
307 if (data.file != file_XMM) {
308 struct x86_reg tmp = aos_get_xmm_reg( cp );
309 sse_movups(cp->func, tmp, data);
310 data = tmp;
311 }
312
313 if (!emit_output( cp,
314 x86_make_disp( cp->outbuf_ECX, offset ),
315 data,
316 format ))
317 return FALSE;
318
319 aos_release_xmm_reg( cp, data.idx );
320
321 cp->insn_counter++;
322 }
323
324 return TRUE;
325 }
326
327 #endif