draw: ensure vs outputs mapped correctly to vinfo attribs
[mesa.git] / src / gallium / auxiliary / draw / draw_vs_aos_io.c
1 /**************************************************************************
2 *
3 * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas.
4 * All Rights Reserved.
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a
7 * copy of this software and associated documentation files (the
8 * "Software"), to deal in the Software without restriction, including
9 * without limitation the rights to use, copy, modify, merge, publish,
10 * distribute, sub license, and/or sell copies of the Software, and to
11 * permit persons to whom the Software is furnished to do so, subject to
12 * the following conditions:
13 *
14 * The above copyright notice and this permission notice (including the
15 * next paragraph) shall be included in all copies or substantial portions
16 * of the Software.
17 *
18 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
19 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
20 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
21 * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
22 * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
23 * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
24 * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
25 *
26 **************************************************************************/
27
28
29 #include "pipe/p_util.h"
30 #include "pipe/p_shader_tokens.h"
31 #include "tgsi/util/tgsi_parse.h"
32 #include "tgsi/util/tgsi_util.h"
33 #include "tgsi/exec/tgsi_exec.h"
34 #include "draw_vs.h"
35 #include "draw_vs_aos.h"
36
37 #include "rtasm/rtasm_x86sse.h"
38
39 #ifdef PIPE_ARCH_X86
40
41 /* Note - don't yet have to worry about interacting with the code in
42 * draw_vs_aos.c as there is no intermingling of generated code...
43 * That may have to change, we'll see.
44 */
45 static void emit_load_R32G32B32A32( struct aos_compilation *cp,
46 struct x86_reg data,
47 struct x86_reg src_ptr )
48 {
49 sse_movups(cp->func, data, src_ptr);
50 }
51
52 static void emit_load_R32G32B32( struct aos_compilation *cp,
53 struct x86_reg data,
54 struct x86_reg src_ptr )
55 {
56 sse_movss(cp->func, data, x86_make_disp(src_ptr, 8));
57 sse_shufps(cp->func, data, aos_get_internal_xmm( cp, IMM_IDENTITY ), SHUF(X,Y,Z,W) );
58 sse_shufps(cp->func, data, data, SHUF(Y,Z,X,W) );
59 sse_movlps(cp->func, data, src_ptr);
60 }
61
62 static void emit_load_R32G32( struct aos_compilation *cp,
63 struct x86_reg data,
64 struct x86_reg src_ptr )
65 {
66 sse_movups(cp->func, data, aos_get_internal_xmm( cp, IMM_IDENTITY ) );
67 sse_movlps(cp->func, data, src_ptr);
68 }
69
70
71 static void emit_load_R32( struct aos_compilation *cp,
72 struct x86_reg data,
73 struct x86_reg src_ptr )
74 {
75 sse_movss(cp->func, data, src_ptr);
76 sse_orps(cp->func, data, aos_get_internal_xmm( cp, IMM_IDENTITY ) );
77 }
78
79
80 static void emit_load_R8G8B8A8_UNORM( struct aos_compilation *cp,
81 struct x86_reg data,
82 struct x86_reg src_ptr )
83 {
84 sse_movss(cp->func, data, src_ptr);
85 sse2_punpcklbw(cp->func, data, aos_get_internal_xmm( cp, IMM_IDENTITY ));
86 sse2_punpcklbw(cp->func, data, aos_get_internal_xmm( cp, IMM_IDENTITY ));
87 sse2_cvtdq2ps(cp->func, data, data);
88 sse_mulps(cp->func, data, aos_get_internal(cp, IMM_INV_255));
89 }
90
91
92
93 static void get_src_ptr( struct x86_function *func,
94 struct x86_reg src,
95 struct x86_reg machine,
96 struct x86_reg elt,
97 unsigned a )
98 {
99 struct x86_reg input_ptr =
100 x86_make_disp(machine,
101 Offset(struct aos_machine, attrib[a].input_ptr));
102
103 struct x86_reg input_stride =
104 x86_make_disp(machine,
105 Offset(struct aos_machine, attrib[a].input_stride));
106
107 /* Calculate pointer to current attrib:
108 */
109 x86_mov(func, src, input_stride);
110 x86_imul(func, src, elt);
111 x86_add(func, src, input_ptr);
112 }
113
114
115 /* Extended swizzles? Maybe later.
116 */
117 static void emit_swizzle( struct aos_compilation *cp,
118 struct x86_reg dest,
119 struct x86_reg src,
120 unsigned shuffle )
121 {
122 sse_shufps(cp->func, dest, src, shuffle);
123 }
124
125
126 static boolean load_input( struct aos_compilation *cp,
127 unsigned idx,
128 boolean linear )
129 {
130 unsigned format = cp->vaos->base.key.element[idx].in.format;
131 struct x86_reg src = cp->tmp_EAX;
132 struct x86_reg dataXMM = aos_get_xmm_reg(cp);
133
134 /* Figure out source pointer address:
135 */
136 get_src_ptr(cp->func,
137 src,
138 cp->machine_EDX,
139 linear ? cp->idx_EBX : x86_deref(cp->idx_EBX),
140 idx);
141
142 src = x86_deref(src);
143
144 aos_adopt_xmm_reg( cp,
145 dataXMM,
146 TGSI_FILE_INPUT,
147 idx,
148 TRUE );
149
150 switch (format) {
151 case PIPE_FORMAT_R32_FLOAT:
152 emit_load_R32(cp, dataXMM, src);
153 break;
154 case PIPE_FORMAT_R32G32_FLOAT:
155 emit_load_R32G32(cp, dataXMM, src);
156 break;
157 case PIPE_FORMAT_R32G32B32_FLOAT:
158 emit_load_R32G32B32(cp, dataXMM, src);
159 break;
160 case PIPE_FORMAT_R32G32B32A32_FLOAT:
161 emit_load_R32G32B32A32(cp, dataXMM, src);
162 break;
163 case PIPE_FORMAT_B8G8R8A8_UNORM:
164 emit_load_R8G8B8A8_UNORM(cp, dataXMM, src);
165 emit_swizzle(cp, dataXMM, dataXMM, SHUF(Z,Y,X,W));
166 break;
167 case PIPE_FORMAT_R8G8B8A8_UNORM:
168 emit_load_R8G8B8A8_UNORM(cp, dataXMM, src);
169 break;
170 default:
171 ERROR(cp, "unhandled input format");
172 return FALSE;
173 }
174
175 return TRUE;
176 }
177
178
179 boolean aos_fetch_inputs( struct aos_compilation *cp, boolean linear )
180 {
181 unsigned i;
182
183 for (i = 0; i < cp->vaos->base.key.nr_inputs; i++) {
184 if (!load_input( cp, i, linear ))
185 return FALSE;
186 cp->insn_counter++;
187 debug_printf("\n");
188 }
189
190 return TRUE;
191 }
192
193
194
195
196
197
198
199 static void emit_store_R32G32B32A32( struct aos_compilation *cp,
200 struct x86_reg dst_ptr,
201 struct x86_reg dataXMM )
202 {
203 sse_movups(cp->func, dst_ptr, dataXMM);
204 }
205
206 static void emit_store_R32G32B32( struct aos_compilation *cp,
207 struct x86_reg dst_ptr,
208 struct x86_reg dataXMM )
209 {
210 sse_movlps(cp->func, dst_ptr, dataXMM);
211 sse_shufps(cp->func, dataXMM, dataXMM, SHUF(Z,Z,Z,Z) ); /* NOTE! destructive */
212 sse_movss(cp->func, x86_make_disp(dst_ptr,8), dataXMM);
213 }
214
215 static void emit_store_R32G32( struct aos_compilation *cp,
216 struct x86_reg dst_ptr,
217 struct x86_reg dataXMM )
218 {
219 sse_movlps(cp->func, dst_ptr, dataXMM);
220 }
221
222 static void emit_store_R32( struct aos_compilation *cp,
223 struct x86_reg dst_ptr,
224 struct x86_reg dataXMM )
225 {
226 sse_movss(cp->func, dst_ptr, dataXMM);
227 }
228
229
230
231 static void emit_store_R8G8B8A8_UNORM( struct aos_compilation *cp,
232 struct x86_reg dst_ptr,
233 struct x86_reg dataXMM )
234 {
235 sse_mulps(cp->func, dataXMM, aos_get_internal(cp, IMM_255));
236 sse2_cvtps2dq(cp->func, dataXMM, dataXMM);
237 sse2_packssdw(cp->func, dataXMM, dataXMM);
238 sse2_packuswb(cp->func, dataXMM, dataXMM);
239 sse_movss(cp->func, dst_ptr, dataXMM);
240 }
241
242
243
244
245
246 static boolean emit_output( struct aos_compilation *cp,
247 struct x86_reg ptr,
248 struct x86_reg dataXMM,
249 unsigned format )
250 {
251 switch (format) {
252 case PIPE_FORMAT_R32_FLOAT:
253 emit_store_R32(cp, ptr, dataXMM);
254 break;
255 case PIPE_FORMAT_R32G32_FLOAT:
256 emit_store_R32G32(cp, ptr, dataXMM);
257 break;
258 case PIPE_FORMAT_R32G32B32_FLOAT:
259 emit_store_R32G32B32(cp, ptr, dataXMM);
260 break;
261 case PIPE_FORMAT_R32G32B32A32_FLOAT:
262 emit_store_R32G32B32A32(cp, ptr, dataXMM);
263 break;
264 case PIPE_FORMAT_B8G8R8A8_UNORM:
265 emit_swizzle(cp, dataXMM, dataXMM, SHUF(Z,Y,X,W));
266 emit_store_R8G8B8A8_UNORM(cp, ptr, dataXMM);
267 break;
268 case PIPE_FORMAT_R8G8B8A8_UNORM:
269 emit_store_R8G8B8A8_UNORM(cp, ptr, dataXMM);
270 break;
271 default:
272 ERROR(cp, "unhandled output format");
273 return FALSE;
274 }
275
276 return TRUE;
277 }
278
279
280
281 boolean aos_emit_outputs( struct aos_compilation *cp )
282 {
283 unsigned i;
284
285 for (i = 0; i < cp->vaos->base.key.nr_outputs; i++) {
286 unsigned format = cp->vaos->base.key.element[i].out.format;
287 unsigned offset = cp->vaos->base.key.element[i].out.offset;
288 unsigned vs_output = cp->vaos->base.key.element[i].out.vs_output;
289
290 struct x86_reg data = aos_get_shader_reg( cp,
291 TGSI_FILE_OUTPUT,
292 vs_output );
293
294 if (data.file != file_XMM) {
295 struct x86_reg tmp = aos_get_xmm_reg( cp );
296 sse_movups(cp->func, tmp, data);
297 data = tmp;
298 }
299
300 if (!emit_output( cp,
301 x86_make_disp( cp->outbuf_ECX, offset ),
302 data,
303 format ))
304 return FALSE;
305
306 aos_release_xmm_reg( cp, data.idx );
307
308 cp->insn_counter++;
309 debug_printf("\n");
310 }
311
312 return TRUE;
313 }
314
315 #endif