translate_sse: remove useless generated function wrappers
[mesa.git] / src / gallium / auxiliary / translate / translate_sse.c
1 /*
2 * Copyright 2003 Tungsten Graphics, inc.
3 * All Rights Reserved.
4 *
5 * Permission is hereby granted, free of charge, to any person obtaining a
6 * copy of this software and associated documentation files (the "Software"),
7 * to deal in the Software without restriction, including without limitation
8 * on the rights to use, copy, modify, merge, publish, distribute, sub
9 * license, and/or sell copies of the Software, and to permit persons to whom
10 * the Software is furnished to do so, subject to the following conditions:
11 *
12 * The above copyright notice and this permission notice (including the next
13 * paragraph) shall be included in all copies or substantial portions of the
14 * Software.
15 *
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
19 * TUNGSTEN GRAPHICS AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
20 * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
21 * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
22 * USE OR OTHER DEALINGS IN THE SOFTWARE.
23 *
24 * Authors:
25 * Keith Whitwell <keithw@tungstengraphics.com>
26 */
27
28
29 #include "pipe/p_config.h"
30 #include "pipe/p_compiler.h"
31 #include "util/u_memory.h"
32 #include "util/u_math.h"
33
34 #include "translate.h"
35
36
37 #if defined(PIPE_ARCH_X86)
38
39 #include "rtasm/rtasm_cpu.h"
40 #include "rtasm/rtasm_x86sse.h"
41
42
43 #define X 0
44 #define Y 1
45 #define Z 2
46 #define W 3
47
48
49 struct translate_buffer {
50 const void *base_ptr;
51 unsigned stride;
52 unsigned max_index;
53 };
54
55 struct translate_buffer_varient {
56 unsigned buffer_index;
57 unsigned instance_divisor;
58 void *ptr; /* updated either per vertex or per instance */
59 };
60
61
62 #define ELEMENT_BUFFER_INSTANCE_ID 1001
63
64
65 struct translate_sse {
66 struct translate translate;
67
68 struct x86_function linear_func;
69 struct x86_function elt_func;
70 struct x86_function *func;
71
72 boolean loaded_identity;
73 boolean loaded_255;
74 boolean loaded_inv_255;
75
76 float identity[4];
77 float float_255[4];
78 float inv_255[4];
79
80 struct translate_buffer buffer[PIPE_MAX_ATTRIBS];
81 unsigned nr_buffers;
82
83 /* Multiple buffer varients can map to a single buffer. */
84 struct translate_buffer_varient buffer_varient[PIPE_MAX_ATTRIBS];
85 unsigned nr_buffer_varients;
86
87 /* Multiple elements can map to a single buffer varient. */
88 unsigned element_to_buffer_varient[PIPE_MAX_ATTRIBS];
89
90 boolean use_instancing;
91 unsigned instance_id;
92
93 /* these are actually known values, but putting them in a struct
94 * like this is helpful to keep them in sync across the file.
95 */
96 struct x86_reg tmp_EAX;
97 struct x86_reg idx_EBX; /* either start+i or &elt[i] */
98 struct x86_reg outbuf_ECX;
99 struct x86_reg machine_EDX;
100 struct x86_reg count_ESI; /* decrements to zero */
101 };
102
103 static int get_offset( const void *a, const void *b )
104 {
105 return (const char *)b - (const char *)a;
106 }
107
108
109
110 static struct x86_reg get_identity( struct translate_sse *p )
111 {
112 struct x86_reg reg = x86_make_reg(file_XMM, 6);
113
114 if (!p->loaded_identity) {
115 p->loaded_identity = TRUE;
116 p->identity[0] = 0;
117 p->identity[1] = 0;
118 p->identity[2] = 0;
119 p->identity[3] = 1;
120
121 sse_movups(p->func, reg,
122 x86_make_disp(p->machine_EDX,
123 get_offset(p, &p->identity[0])));
124 }
125
126 return reg;
127 }
128
129 static struct x86_reg get_255( struct translate_sse *p )
130 {
131 struct x86_reg reg = x86_make_reg(file_XMM, 7);
132
133 if (!p->loaded_255) {
134 p->loaded_255 = TRUE;
135 p->float_255[0] =
136 p->float_255[1] =
137 p->float_255[2] =
138 p->float_255[3] = 255.0f;
139
140 sse_movups(p->func, reg,
141 x86_make_disp(p->machine_EDX,
142 get_offset(p, &p->float_255[0])));
143 }
144
145 return reg;
146 }
147
148 static struct x86_reg get_inv_255( struct translate_sse *p )
149 {
150 struct x86_reg reg = x86_make_reg(file_XMM, 5);
151
152 if (!p->loaded_inv_255) {
153 p->loaded_inv_255 = TRUE;
154 p->inv_255[0] =
155 p->inv_255[1] =
156 p->inv_255[2] =
157 p->inv_255[3] = 1.0f / 255.0f;
158
159 sse_movups(p->func, reg,
160 x86_make_disp(p->machine_EDX,
161 get_offset(p, &p->inv_255[0])));
162 }
163
164 return reg;
165 }
166
167
168 static void emit_load_R32G32B32A32( struct translate_sse *p,
169 struct x86_reg data,
170 struct x86_reg arg0 )
171 {
172 sse_movups(p->func, data, arg0);
173 }
174
175 static void emit_load_R32G32B32( struct translate_sse *p,
176 struct x86_reg data,
177 struct x86_reg arg0 )
178 {
179 /* Have to jump through some hoops:
180 *
181 * c 0 0 0
182 * c 0 0 1
183 * 0 0 c 1
184 * a b c 1
185 */
186 sse_movss(p->func, data, x86_make_disp(arg0, 8));
187 sse_shufps(p->func, data, get_identity(p), SHUF(X,Y,Z,W) );
188 sse_shufps(p->func, data, data, SHUF(Y,Z,X,W) );
189 sse_movlps(p->func, data, arg0);
190 }
191
192 static void emit_load_R32G32( struct translate_sse *p,
193 struct x86_reg data,
194 struct x86_reg arg0 )
195 {
196 /* 0 0 0 1
197 * a b 0 1
198 */
199 sse_movups(p->func, data, get_identity(p) );
200 sse_movlps(p->func, data, arg0);
201 }
202
203
204 static void emit_load_R32( struct translate_sse *p,
205 struct x86_reg data,
206 struct x86_reg arg0 )
207 {
208 /* a 0 0 0
209 * a 0 0 1
210 */
211 sse_movss(p->func, data, arg0);
212 sse_orps(p->func, data, get_identity(p) );
213 }
214
215
216 static void emit_load_R8G8B8A8_UNORM( struct translate_sse *p,
217 struct x86_reg data,
218 struct x86_reg src )
219 {
220
221 /* Load and unpack twice:
222 */
223 sse_movss(p->func, data, src);
224 sse2_punpcklbw(p->func, data, get_identity(p));
225 sse2_punpcklbw(p->func, data, get_identity(p));
226
227 /* Convert to float:
228 */
229 sse2_cvtdq2ps(p->func, data, data);
230
231
232 /* Scale by 1/255.0
233 */
234 sse_mulps(p->func, data, get_inv_255(p));
235 }
236
237
238
239
240 static void emit_store_R32G32B32A32( struct translate_sse *p,
241 struct x86_reg dest,
242 struct x86_reg dataXMM )
243 {
244 sse_movups(p->func, dest, dataXMM);
245 }
246
247 static void emit_store_R32G32B32( struct translate_sse *p,
248 struct x86_reg dest,
249 struct x86_reg dataXMM )
250 {
251 /* Emit two, shuffle, emit one.
252 */
253 sse_movlps(p->func, dest, dataXMM);
254 sse_shufps(p->func, dataXMM, dataXMM, SHUF(Z,Z,Z,Z) ); /* NOTE! destructive */
255 sse_movss(p->func, x86_make_disp(dest,8), dataXMM);
256 }
257
258 static void emit_store_R32G32( struct translate_sse *p,
259 struct x86_reg dest,
260 struct x86_reg dataXMM )
261 {
262 sse_movlps(p->func, dest, dataXMM);
263 }
264
265 static void emit_store_R32( struct translate_sse *p,
266 struct x86_reg dest,
267 struct x86_reg dataXMM )
268 {
269 sse_movss(p->func, dest, dataXMM);
270 }
271
272
273
274 static void emit_store_R8G8B8A8_UNORM( struct translate_sse *p,
275 struct x86_reg dest,
276 struct x86_reg dataXMM )
277 {
278 /* Scale by 255.0
279 */
280 sse_mulps(p->func, dataXMM, get_255(p));
281
282 /* Pack and emit:
283 */
284 sse2_cvtps2dq(p->func, dataXMM, dataXMM);
285 sse2_packssdw(p->func, dataXMM, dataXMM);
286 sse2_packuswb(p->func, dataXMM, dataXMM);
287 sse_movss(p->func, dest, dataXMM);
288 }
289
290
291
292
293
294 /* Extended swizzles? Maybe later.
295 */
296 static void emit_swizzle( struct translate_sse *p,
297 struct x86_reg dest,
298 struct x86_reg src,
299 unsigned char shuffle )
300 {
301 sse_shufps(p->func, dest, src, shuffle);
302 }
303
304
305 static boolean translate_attr( struct translate_sse *p,
306 const struct translate_element *a,
307 struct x86_reg srcECX,
308 struct x86_reg dstEAX)
309 {
310 struct x86_reg dataXMM = x86_make_reg(file_XMM, 0);
311
312 switch (a->input_format) {
313 case PIPE_FORMAT_R32_FLOAT:
314 emit_load_R32(p, dataXMM, srcECX);
315 break;
316 case PIPE_FORMAT_R32G32_FLOAT:
317 emit_load_R32G32(p, dataXMM, srcECX);
318 break;
319 case PIPE_FORMAT_R32G32B32_FLOAT:
320 emit_load_R32G32B32(p, dataXMM, srcECX);
321 break;
322 case PIPE_FORMAT_R32G32B32A32_FLOAT:
323 emit_load_R32G32B32A32(p, dataXMM, srcECX);
324 break;
325 case PIPE_FORMAT_B8G8R8A8_UNORM:
326 emit_load_R8G8B8A8_UNORM(p, dataXMM, srcECX);
327 emit_swizzle(p, dataXMM, dataXMM, SHUF(Z,Y,X,W));
328 break;
329 case PIPE_FORMAT_R8G8B8A8_UNORM:
330 emit_load_R8G8B8A8_UNORM(p, dataXMM, srcECX);
331 break;
332 default:
333 return FALSE;
334 }
335
336 switch (a->output_format) {
337 case PIPE_FORMAT_R32_FLOAT:
338 emit_store_R32(p, dstEAX, dataXMM);
339 break;
340 case PIPE_FORMAT_R32G32_FLOAT:
341 emit_store_R32G32(p, dstEAX, dataXMM);
342 break;
343 case PIPE_FORMAT_R32G32B32_FLOAT:
344 emit_store_R32G32B32(p, dstEAX, dataXMM);
345 break;
346 case PIPE_FORMAT_R32G32B32A32_FLOAT:
347 emit_store_R32G32B32A32(p, dstEAX, dataXMM);
348 break;
349 case PIPE_FORMAT_B8G8R8A8_UNORM:
350 emit_swizzle(p, dataXMM, dataXMM, SHUF(Z,Y,X,W));
351 emit_store_R8G8B8A8_UNORM(p, dstEAX, dataXMM);
352 break;
353 case PIPE_FORMAT_R8G8B8A8_UNORM:
354 emit_store_R8G8B8A8_UNORM(p, dstEAX, dataXMM);
355 break;
356 default:
357 return FALSE;
358 }
359
360 return TRUE;
361 }
362
363
364 static boolean init_inputs( struct translate_sse *p,
365 boolean linear )
366 {
367 unsigned i;
368 struct x86_reg instance_id = x86_make_disp(p->machine_EDX,
369 get_offset(p, &p->instance_id));
370
371 for (i = 0; i < p->nr_buffer_varients; i++) {
372 struct translate_buffer_varient *varient = &p->buffer_varient[i];
373 struct translate_buffer *buffer = &p->buffer[varient->buffer_index];
374
375 if (linear || varient->instance_divisor) {
376 struct x86_reg buf_stride = x86_make_disp(p->machine_EDX,
377 get_offset(p, &buffer->stride));
378 struct x86_reg buf_ptr = x86_make_disp(p->machine_EDX,
379 get_offset(p, &varient->ptr));
380 struct x86_reg buf_base_ptr = x86_make_disp(p->machine_EDX,
381 get_offset(p, &buffer->base_ptr));
382 struct x86_reg elt = p->idx_EBX;
383 struct x86_reg tmp_EAX = p->tmp_EAX;
384
385 /* Calculate pointer to first attrib:
386 * base_ptr + stride * index, where index depends on instance divisor
387 */
388 if (varient->instance_divisor) {
389 /* Our index is instance ID divided by instance divisor.
390 */
391 x86_mov(p->func, tmp_EAX, instance_id);
392
393 if (varient->instance_divisor != 1) {
394 struct x86_reg tmp_EDX = p->machine_EDX;
395 struct x86_reg tmp_ECX = p->outbuf_ECX;
396
397 /* TODO: Add x86_shr() to rtasm and use it whenever
398 * instance divisor is power of two.
399 */
400
401 x86_push(p->func, tmp_EDX);
402 x86_push(p->func, tmp_ECX);
403 x86_xor(p->func, tmp_EDX, tmp_EDX);
404 x86_mov_reg_imm(p->func, tmp_ECX, varient->instance_divisor);
405 x86_div(p->func, tmp_ECX); /* EAX = EDX:EAX / ECX */
406 x86_pop(p->func, tmp_ECX);
407 x86_pop(p->func, tmp_EDX);
408 }
409 } else {
410 x86_mov(p->func, tmp_EAX, elt);
411 }
412
413 /*
414 * TODO: Respect translate_buffer::max_index.
415 */
416
417 x86_imul(p->func, tmp_EAX, buf_stride);
418 x86_add(p->func, tmp_EAX, buf_base_ptr);
419
420
421 /* In the linear case, keep the buffer pointer instead of the
422 * index number.
423 */
424 if (linear && p->nr_buffer_varients == 1)
425 x86_mov(p->func, elt, tmp_EAX);
426 else
427 x86_mov(p->func, buf_ptr, tmp_EAX);
428 }
429 }
430
431 return TRUE;
432 }
433
434
435 static struct x86_reg get_buffer_ptr( struct translate_sse *p,
436 boolean linear,
437 unsigned var_idx,
438 struct x86_reg elt )
439 {
440 if (var_idx == ELEMENT_BUFFER_INSTANCE_ID) {
441 return x86_make_disp(p->machine_EDX,
442 get_offset(p, &p->instance_id));
443 }
444 if (linear && p->nr_buffer_varients == 1) {
445 return p->idx_EBX;
446 }
447 else if (linear || p->buffer_varient[var_idx].instance_divisor) {
448 struct x86_reg ptr = p->tmp_EAX;
449 struct x86_reg buf_ptr =
450 x86_make_disp(p->machine_EDX,
451 get_offset(p, &p->buffer_varient[var_idx].ptr));
452
453 x86_mov(p->func, ptr, buf_ptr);
454 return ptr;
455 }
456 else {
457 struct x86_reg ptr = p->tmp_EAX;
458 const struct translate_buffer_varient *varient = &p->buffer_varient[var_idx];
459
460 struct x86_reg buf_stride =
461 x86_make_disp(p->machine_EDX,
462 get_offset(p, &p->buffer[varient->buffer_index].stride));
463
464 struct x86_reg buf_base_ptr =
465 x86_make_disp(p->machine_EDX,
466 get_offset(p, &p->buffer[varient->buffer_index].base_ptr));
467
468
469
470 /* Calculate pointer to current attrib:
471 */
472 x86_mov(p->func, ptr, buf_stride);
473 x86_imul(p->func, ptr, elt);
474 x86_add(p->func, ptr, buf_base_ptr);
475 return ptr;
476 }
477 }
478
479
480
481 static boolean incr_inputs( struct translate_sse *p,
482 boolean linear )
483 {
484 if (linear && p->nr_buffer_varients == 1) {
485 struct x86_reg stride = x86_make_disp(p->machine_EDX,
486 get_offset(p, &p->buffer[0].stride));
487
488 if (p->buffer_varient[0].instance_divisor == 0) {
489 x86_add(p->func, p->idx_EBX, stride);
490 sse_prefetchnta(p->func, x86_make_disp(p->idx_EBX, 192));
491 }
492 }
493 else if (linear) {
494 unsigned i;
495
496 /* Is this worthwhile??
497 */
498 for (i = 0; i < p->nr_buffer_varients; i++) {
499 struct translate_buffer_varient *varient = &p->buffer_varient[i];
500 struct x86_reg buf_ptr = x86_make_disp(p->machine_EDX,
501 get_offset(p, &varient->ptr));
502 struct x86_reg buf_stride = x86_make_disp(p->machine_EDX,
503 get_offset(p, &p->buffer[varient->buffer_index].stride));
504
505 if (varient->instance_divisor == 0) {
506 x86_mov(p->func, p->tmp_EAX, buf_ptr);
507 x86_add(p->func, p->tmp_EAX, buf_stride);
508 if (i == 0) sse_prefetchnta(p->func, x86_make_disp(p->tmp_EAX, 192));
509 x86_mov(p->func, buf_ptr, p->tmp_EAX);
510 }
511 }
512 }
513 else {
514 x86_lea(p->func, p->idx_EBX, x86_make_disp(p->idx_EBX, 4));
515 }
516
517 return TRUE;
518 }
519
520
521 /* Build run( struct translate *machine,
522 * unsigned start,
523 * unsigned count,
524 * void *output_buffer )
525 * or
526 * run_elts( struct translate *machine,
527 * unsigned *elts,
528 * unsigned count,
529 * void *output_buffer )
530 *
531 * Lots of hardcoding
532 *
533 * EAX -- pointer to current output vertex
534 * ECX -- pointer to current attribute
535 *
536 */
537 static boolean build_vertex_emit( struct translate_sse *p,
538 struct x86_function *func,
539 boolean linear )
540 {
541 int fixup, label;
542 unsigned j;
543
544 p->tmp_EAX = x86_make_reg(file_REG32, reg_AX);
545 p->idx_EBX = x86_make_reg(file_REG32, reg_BX);
546 p->outbuf_ECX = x86_make_reg(file_REG32, reg_CX);
547 p->machine_EDX = x86_make_reg(file_REG32, reg_DX);
548 p->count_ESI = x86_make_reg(file_REG32, reg_SI);
549
550 p->func = func;
551 p->loaded_inv_255 = FALSE;
552 p->loaded_255 = FALSE;
553 p->loaded_identity = FALSE;
554
555 x86_init_func(p->func);
556
557 /* Push a few regs?
558 */
559 x86_push(p->func, p->idx_EBX);
560 x86_push(p->func, p->count_ESI);
561
562 /* Load arguments into regs:
563 */
564 x86_mov(p->func, p->machine_EDX, x86_fn_arg(p->func, 1));
565 x86_mov(p->func, p->idx_EBX, x86_fn_arg(p->func, 2));
566 x86_mov(p->func, p->count_ESI, x86_fn_arg(p->func, 3));
567 x86_mov(p->func, p->outbuf_ECX, x86_fn_arg(p->func, 5));
568
569 /* Load instance ID.
570 */
571 if (p->use_instancing) {
572 x86_mov(p->func,
573 p->tmp_EAX,
574 x86_fn_arg(p->func, 4));
575 x86_mov(p->func,
576 x86_make_disp(p->machine_EDX, get_offset(p, &p->instance_id)),
577 p->tmp_EAX);
578 }
579
580 /* Get vertex count, compare to zero
581 */
582 x86_xor(p->func, p->tmp_EAX, p->tmp_EAX);
583 x86_cmp(p->func, p->count_ESI, p->tmp_EAX);
584 fixup = x86_jcc_forward(p->func, cc_E);
585
586 /* always load, needed or not:
587 */
588 init_inputs(p, linear);
589
590 /* Note address for loop jump
591 */
592 label = x86_get_label(p->func);
593 {
594 struct x86_reg elt = linear ? p->idx_EBX : x86_deref(p->idx_EBX);
595 int last_varient = -1;
596 struct x86_reg vb;
597
598 for (j = 0; j < p->translate.key.nr_elements; j++) {
599 const struct translate_element *a = &p->translate.key.element[j];
600 unsigned varient = p->element_to_buffer_varient[j];
601
602 /* Figure out source pointer address:
603 */
604 if (varient != last_varient) {
605 last_varient = varient;
606 vb = get_buffer_ptr(p, linear, varient, elt);
607 }
608
609 if (!translate_attr( p, a,
610 x86_make_disp(vb, a->input_offset),
611 x86_make_disp(p->outbuf_ECX, a->output_offset)))
612 return FALSE;
613 }
614
615 /* Next output vertex:
616 */
617 x86_lea(p->func,
618 p->outbuf_ECX,
619 x86_make_disp(p->outbuf_ECX,
620 p->translate.key.output_stride));
621
622 /* Incr index
623 */
624 incr_inputs( p, linear );
625 }
626
627 /* decr count, loop if not zero
628 */
629 x86_dec(p->func, p->count_ESI);
630 x86_jcc(p->func, cc_NZ, label);
631
632 /* Exit mmx state?
633 */
634 if (p->func->need_emms)
635 mmx_emms(p->func);
636
637 /* Land forward jump here:
638 */
639 x86_fixup_fwd_jump(p->func, fixup);
640
641 /* Pop regs and return
642 */
643
644 x86_pop(p->func, p->count_ESI);
645 x86_pop(p->func, p->idx_EBX);
646 x86_ret(p->func);
647
648 return TRUE;
649 }
650
651
652
653
654
655
656
657 static void translate_sse_set_buffer( struct translate *translate,
658 unsigned buf,
659 const void *ptr,
660 unsigned stride,
661 unsigned max_index )
662 {
663 struct translate_sse *p = (struct translate_sse *)translate;
664
665 if (buf < p->nr_buffers) {
666 p->buffer[buf].base_ptr = (char *)ptr;
667 p->buffer[buf].stride = stride;
668 p->buffer[buf].max_index = max_index;
669 }
670
671 if (0) debug_printf("%s %d/%d: %p %d\n",
672 __FUNCTION__, buf,
673 p->nr_buffers,
674 ptr, stride);
675 }
676
677
678 static void translate_sse_release( struct translate *translate )
679 {
680 struct translate_sse *p = (struct translate_sse *)translate;
681
682 x86_release_func( &p->linear_func );
683 x86_release_func( &p->elt_func );
684
685 FREE(p);
686 }
687
688
689 struct translate *translate_sse2_create( const struct translate_key *key )
690 {
691 struct translate_sse *p = NULL;
692 unsigned i;
693
694 if (!rtasm_cpu_has_sse() || !rtasm_cpu_has_sse2())
695 goto fail;
696
697 p = CALLOC_STRUCT( translate_sse );
698 if (p == NULL)
699 goto fail;
700
701 p->translate.key = *key;
702 p->translate.release = translate_sse_release;
703 p->translate.set_buffer = translate_sse_set_buffer;
704
705 for (i = 0; i < key->nr_elements; i++) {
706 if (key->element[i].type == TRANSLATE_ELEMENT_NORMAL) {
707 unsigned j;
708
709 p->nr_buffers = MAX2(p->nr_buffers, key->element[i].input_buffer + 1);
710
711 if (key->element[i].instance_divisor) {
712 p->use_instancing = TRUE;
713 }
714
715 /*
716 * Map vertex element to vertex buffer varient.
717 */
718 for (j = 0; j < p->nr_buffer_varients; j++) {
719 if (p->buffer_varient[j].buffer_index == key->element[i].input_buffer &&
720 p->buffer_varient[j].instance_divisor == key->element[i].instance_divisor) {
721 break;
722 }
723 }
724 if (j == p->nr_buffer_varients) {
725 p->buffer_varient[j].buffer_index = key->element[i].input_buffer;
726 p->buffer_varient[j].instance_divisor = key->element[i].instance_divisor;
727 p->nr_buffer_varients++;
728 }
729 p->element_to_buffer_varient[i] = j;
730 } else {
731 assert(key->element[i].type == TRANSLATE_ELEMENT_INSTANCE_ID);
732
733 p->element_to_buffer_varient[i] = ELEMENT_BUFFER_INSTANCE_ID;
734 }
735 }
736
737 if (0) debug_printf("nr_buffers: %d\n", p->nr_buffers);
738
739 if (!build_vertex_emit(p, &p->linear_func, TRUE))
740 goto fail;
741
742 if (!build_vertex_emit(p, &p->elt_func, FALSE))
743 goto fail;
744
745 p->translate.run = (void*)x86_get_func(&p->linear_func);
746 if (p->translate.run == NULL)
747 goto fail;
748
749 p->translate.run_elts = (void*)x86_get_func(&p->elt_func);
750 if (p->translate.run_elts == NULL)
751 goto fail;
752
753 return &p->translate;
754
755 fail:
756 if (p)
757 translate_sse_release( &p->translate );
758
759 return NULL;
760 }
761
762
763
764 #else
765
766 struct translate *translate_sse2_create( const struct translate_key *key )
767 {
768 return NULL;
769 }
770
771 #endif