8e08b9285f96203e54479e6d7c8b2870c5279a2d
[mesa.git] / src / gallium / auxiliary / draw / draw_vs_aos_io.c
1 /**************************************************************************
2 *
3 * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas.
4 * All Rights Reserved.
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a
7 * copy of this software and associated documentation files (the
8 * "Software"), to deal in the Software without restriction, including
9 * without limitation the rights to use, copy, modify, merge, publish,
10 * distribute, sub license, and/or sell copies of the Software, and to
11 * permit persons to whom the Software is furnished to do so, subject to
12 * the following conditions:
13 *
14 * The above copyright notice and this permission notice (including the
15 * next paragraph) shall be included in all copies or substantial portions
16 * of the Software.
17 *
18 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
19 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
20 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
21 * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
22 * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
23 * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
24 * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
25 *
26 **************************************************************************/
27
28
29 #include "util/u_memory.h"
30 #include "pipe/p_shader_tokens.h"
31 #include "tgsi/tgsi_parse.h"
32 #include "tgsi/tgsi_util.h"
33 #include "tgsi/tgsi_exec.h"
34 #include "draw_vs.h"
35 #include "draw_vs_aos.h"
36 #include "draw_vertex.h"
37
38 #include "rtasm/rtasm_x86sse.h"
39
40 #ifdef PIPE_ARCH_X86
41
42 /* Note - don't yet have to worry about interacting with the code in
43 * draw_vs_aos.c as there is no intermingling of generated code...
44 * That may have to change, we'll see.
45 */
46 static void emit_load_R32G32B32A32( struct aos_compilation *cp,
47 struct x86_reg data,
48 struct x86_reg src_ptr )
49 {
50 sse_movups(cp->func, data, src_ptr);
51 }
52
53 static void emit_load_R32G32B32( struct aos_compilation *cp,
54 struct x86_reg data,
55 struct x86_reg src_ptr )
56 {
57 sse_movss(cp->func, data, x86_make_disp(src_ptr, 8));
58 /* data = z ? ? ? */
59 sse_shufps(cp->func, data, aos_get_internal_xmm( cp, IMM_IDENTITY ), SHUF(X,Y,Z,W) );
60 /* data = z ? 0 1 */
61 sse_shufps(cp->func, data, data, SHUF(Y,Z,X,W) );
62 /* data = ? 0 z 1 */
63 sse_movlps(cp->func, data, src_ptr);
64 /* data = x y z 1 */
65 }
66
67 static void emit_load_R32G32( struct aos_compilation *cp,
68 struct x86_reg data,
69 struct x86_reg src_ptr )
70 {
71 sse_movups(cp->func, data, aos_get_internal_xmm( cp, IMM_IDENTITY ) );
72 sse_movlps(cp->func, data, src_ptr);
73 }
74
75
76 static void emit_load_R32( struct aos_compilation *cp,
77 struct x86_reg data,
78 struct x86_reg src_ptr )
79 {
80 sse_movss(cp->func, data, src_ptr);
81 sse_orps(cp->func, data, aos_get_internal_xmm( cp, IMM_IDENTITY ) );
82 }
83
84
85 static void emit_load_R8G8B8A8_UNORM( struct aos_compilation *cp,
86 struct x86_reg data,
87 struct x86_reg src_ptr )
88 {
89 sse_movss(cp->func, data, src_ptr);
90 sse2_punpcklbw(cp->func, data, aos_get_internal_xmm( cp, IMM_IDENTITY ));
91 sse2_punpcklbw(cp->func, data, aos_get_internal_xmm( cp, IMM_IDENTITY ));
92 sse2_cvtdq2ps(cp->func, data, data);
93 sse_mulps(cp->func, data, aos_get_internal(cp, IMM_INV_255));
94 }
95
96
97
98 /* Extended swizzles? Maybe later.
99 */
100 static void emit_swizzle( struct aos_compilation *cp,
101 struct x86_reg dest,
102 struct x86_reg src,
103 ubyte shuffle )
104 {
105 sse_shufps(cp->func, dest, src, shuffle);
106 }
107
108
109
110 static boolean get_buffer_ptr( struct aos_compilation *cp,
111 unsigned buf_idx,
112 struct x86_reg elt,
113 struct x86_reg ptr)
114 {
115 struct x86_reg buf = x86_make_disp(aos_get_x86( cp, 0, X86_BUFFERS ),
116 buf_idx * sizeof(struct aos_buffer));
117
118 struct x86_reg buf_base_ptr = x86_make_disp(buf,
119 Offset(struct aos_buffer, base_ptr));
120
121 struct x86_reg buf_stride = x86_make_disp(buf,
122 Offset(struct aos_buffer, stride));
123
124 /* Calculate pointer to current attrib:
125 */
126 x86_mov(cp->func, ptr, buf_stride);
127 x86_imul(cp->func, ptr, elt);
128 x86_add(cp->func, ptr, buf_base_ptr);
129
130 return TRUE;
131 }
132
133
134
135
136 static boolean load_input( struct aos_compilation *cp,
137 unsigned idx,
138 struct x86_reg bufptr )
139 {
140 unsigned format = cp->vaos->base.key.element[idx].in.format;
141 unsigned offset = cp->vaos->base.key.element[idx].in.offset;
142 struct x86_reg dataXMM = aos_get_xmm_reg(cp);
143
144 /* Figure out source pointer address:
145 */
146 struct x86_reg src = x86_make_disp(bufptr, offset);
147
148 aos_adopt_xmm_reg( cp,
149 dataXMM,
150 TGSI_FILE_INPUT,
151 idx,
152 TRUE );
153
154 switch (format) {
155 case PIPE_FORMAT_R32_FLOAT:
156 emit_load_R32(cp, dataXMM, src);
157 break;
158 case PIPE_FORMAT_R32G32_FLOAT:
159 emit_load_R32G32(cp, dataXMM, src);
160 break;
161 case PIPE_FORMAT_R32G32B32_FLOAT:
162 emit_load_R32G32B32(cp, dataXMM, src);
163 break;
164 case PIPE_FORMAT_R32G32B32A32_FLOAT:
165 emit_load_R32G32B32A32(cp, dataXMM, src);
166 break;
167 case PIPE_FORMAT_B8G8R8A8_UNORM:
168 emit_load_R8G8B8A8_UNORM(cp, dataXMM, src);
169 emit_swizzle(cp, dataXMM, dataXMM, SHUF(Z,Y,X,W));
170 break;
171 case PIPE_FORMAT_R8G8B8A8_UNORM:
172 emit_load_R8G8B8A8_UNORM(cp, dataXMM, src);
173 break;
174 default:
175 ERROR(cp, "unhandled input format");
176 return FALSE;
177 }
178
179 return TRUE;
180 }
181
182 static boolean load_inputs( struct aos_compilation *cp,
183 unsigned buffer,
184 struct x86_reg ptr )
185 {
186 unsigned i;
187
188 for (i = 0; i < cp->vaos->base.key.nr_inputs; i++) {
189 if (cp->vaos->base.key.element[i].in.buffer == buffer) {
190
191 if (!load_input( cp, i, ptr ))
192 return FALSE;
193
194 cp->insn_counter++;
195 }
196 }
197
198 return TRUE;
199 }
200
201 boolean aos_init_inputs( struct aos_compilation *cp, boolean linear )
202 {
203 if (linear && cp->vaos->nr_vb == 1) {
204
205 struct x86_reg elt = cp->idx_EBX;
206 struct x86_reg ptr = cp->tmp_EAX;
207
208 if (!get_buffer_ptr( cp, 0, elt, ptr ))
209 return FALSE;
210
211 /* In the linear, single buffer case, keep the buffer pointer
212 * instead of the index number.
213 */
214 x86_mov( cp->func, elt, ptr );
215 }
216
217 return TRUE;
218 }
219
220 boolean aos_fetch_inputs( struct aos_compilation *cp, boolean linear )
221 {
222 if (linear && cp->vaos->nr_vb == 1) {
223
224 load_inputs( cp, 0, cp->idx_EBX );
225
226 }
227 else {
228 struct x86_reg elt = linear ? cp->idx_EBX : x86_deref(cp->idx_EBX);
229 unsigned j;
230
231 for (j = 0; j < cp->vaos->nr_vb; j++) {
232 struct x86_reg ptr = cp->tmp_EAX;
233
234 if (!get_buffer_ptr( cp, j, elt, ptr ))
235 return FALSE;
236
237 cp->insn_counter++;
238
239 if (!load_inputs( cp, j, ptr ))
240 return FALSE;
241 }
242 }
243
244 return TRUE;
245 }
246
247 boolean aos_incr_inputs( struct aos_compilation *cp, boolean linear )
248 {
249 if (linear && cp->vaos->nr_vb == 1) {
250 struct x86_reg stride = x86_make_disp(aos_get_x86( cp, 0, X86_BUFFERS ),
251 (0 * sizeof(struct aos_buffer) +
252 Offset(struct aos_buffer, stride)));
253
254 x86_add(cp->func, cp->idx_EBX, stride);
255 }
256 else if (linear) {
257 x86_inc(cp->func, cp->idx_EBX);
258 }
259 else {
260 x86_lea(cp->func, cp->idx_EBX, x86_make_disp(cp->idx_EBX, 4));
261 }
262 }
263
264
265
266
267
268
269 static void emit_store_R32G32B32A32( struct aos_compilation *cp,
270 struct x86_reg dst_ptr,
271 struct x86_reg dataXMM )
272 {
273 sse_movups(cp->func, dst_ptr, dataXMM);
274 }
275
276 static void emit_store_R32G32B32( struct aos_compilation *cp,
277 struct x86_reg dst_ptr,
278 struct x86_reg dataXMM )
279 {
280 sse_movlps(cp->func, dst_ptr, dataXMM);
281 sse_shufps(cp->func, dataXMM, dataXMM, SHUF(Z,Z,Z,Z) ); /* NOTE! destructive */
282 sse_movss(cp->func, x86_make_disp(dst_ptr,8), dataXMM);
283 }
284
285 static void emit_store_R32G32( struct aos_compilation *cp,
286 struct x86_reg dst_ptr,
287 struct x86_reg dataXMM )
288 {
289 sse_movlps(cp->func, dst_ptr, dataXMM);
290 }
291
292 static void emit_store_R32( struct aos_compilation *cp,
293 struct x86_reg dst_ptr,
294 struct x86_reg dataXMM )
295 {
296 sse_movss(cp->func, dst_ptr, dataXMM);
297 }
298
299
300
301 static void emit_store_R8G8B8A8_UNORM( struct aos_compilation *cp,
302 struct x86_reg dst_ptr,
303 struct x86_reg dataXMM )
304 {
305 sse_mulps(cp->func, dataXMM, aos_get_internal(cp, IMM_255));
306 sse2_cvtps2dq(cp->func, dataXMM, dataXMM);
307 sse2_packssdw(cp->func, dataXMM, dataXMM);
308 sse2_packuswb(cp->func, dataXMM, dataXMM);
309 sse_movss(cp->func, dst_ptr, dataXMM);
310 }
311
312
313
314
315
316 static boolean emit_output( struct aos_compilation *cp,
317 struct x86_reg ptr,
318 struct x86_reg dataXMM,
319 unsigned format )
320 {
321 switch (format) {
322 case EMIT_1F:
323 case EMIT_1F_PSIZE:
324 emit_store_R32(cp, ptr, dataXMM);
325 break;
326 case EMIT_2F:
327 emit_store_R32G32(cp, ptr, dataXMM);
328 break;
329 case EMIT_3F:
330 emit_store_R32G32B32(cp, ptr, dataXMM);
331 break;
332 case EMIT_4F:
333 emit_store_R32G32B32A32(cp, ptr, dataXMM);
334 break;
335 case EMIT_4UB:
336 if (1) {
337 emit_swizzle(cp, dataXMM, dataXMM, SHUF(Z,Y,X,W));
338 emit_store_R8G8B8A8_UNORM(cp, ptr, dataXMM);
339 }
340 else {
341 emit_store_R8G8B8A8_UNORM(cp, ptr, dataXMM);
342 }
343 break;
344 default:
345 ERROR(cp, "unhandled output format");
346 return FALSE;
347 }
348
349 return TRUE;
350 }
351
352
353
354 boolean aos_emit_outputs( struct aos_compilation *cp )
355 {
356 unsigned i;
357
358 for (i = 0; i < cp->vaos->base.key.nr_outputs; i++) {
359 unsigned format = cp->vaos->base.key.element[i].out.format;
360 unsigned offset = cp->vaos->base.key.element[i].out.offset;
361 unsigned vs_output = cp->vaos->base.key.element[i].out.vs_output;
362
363 struct x86_reg data;
364
365 if (format == EMIT_1F_PSIZE) {
366 data = aos_get_internal_xmm( cp, IMM_PSIZE );
367 }
368 else {
369 data = aos_get_shader_reg( cp,
370 TGSI_FILE_OUTPUT,
371 vs_output );
372 }
373
374 if (data.file != file_XMM) {
375 struct x86_reg tmp = aos_get_xmm_reg( cp );
376 sse_movups(cp->func, tmp, data);
377 data = tmp;
378 }
379
380 if (!emit_output( cp,
381 x86_make_disp( cp->outbuf_ECX, offset ),
382 data,
383 format ))
384 return FALSE;
385
386 aos_release_xmm_reg( cp, data.idx );
387
388 cp->insn_counter++;
389 }
390
391 return TRUE;
392 }
393
394 #endif