translate: add support for 8/16-bit indices
[mesa.git] / src / gallium / auxiliary / translate / translate_sse.c
1 /*
2 * Copyright 2003 Tungsten Graphics, inc.
3 * All Rights Reserved.
4 *
5 * Permission is hereby granted, free of charge, to any person obtaining a
6 * copy of this software and associated documentation files (the "Software"),
7 * to deal in the Software without restriction, including without limitation
8 * on the rights to use, copy, modify, merge, publish, distribute, sub
9 * license, and/or sell copies of the Software, and to permit persons to whom
10 * the Software is furnished to do so, subject to the following conditions:
11 *
12 * The above copyright notice and this permission notice (including the next
13 * paragraph) shall be included in all copies or substantial portions of the
14 * Software.
15 *
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
19 * TUNGSTEN GRAPHICS AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
20 * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
21 * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
22 * USE OR OTHER DEALINGS IN THE SOFTWARE.
23 *
24 * Authors:
25 * Keith Whitwell <keithw@tungstengraphics.com>
26 */
27
28
29 #include "pipe/p_config.h"
30 #include "pipe/p_compiler.h"
31 #include "util/u_memory.h"
32 #include "util/u_math.h"
33
34 #include "translate.h"
35
36
37 #if defined(PIPE_ARCH_X86)
38
39 #include "rtasm/rtasm_cpu.h"
40 #include "rtasm/rtasm_x86sse.h"
41
42
43 #define X 0
44 #define Y 1
45 #define Z 2
46 #define W 3
47
48
49 struct translate_buffer {
50 const void *base_ptr;
51 unsigned stride;
52 unsigned max_index;
53 };
54
55 struct translate_buffer_varient {
56 unsigned buffer_index;
57 unsigned instance_divisor;
58 void *ptr; /* updated either per vertex or per instance */
59 };
60
61
62 #define ELEMENT_BUFFER_INSTANCE_ID 1001
63
64
65 struct translate_sse {
66 struct translate translate;
67
68 struct x86_function linear_func;
69 struct x86_function elt_func;
70 struct x86_function elt16_func;
71 struct x86_function elt8_func;
72 struct x86_function *func;
73
74 boolean loaded_identity;
75 boolean loaded_255;
76 boolean loaded_inv_255;
77
78 float identity[4];
79 float float_255[4];
80 float inv_255[4];
81
82 struct translate_buffer buffer[PIPE_MAX_ATTRIBS];
83 unsigned nr_buffers;
84
85 /* Multiple buffer varients can map to a single buffer. */
86 struct translate_buffer_varient buffer_varient[PIPE_MAX_ATTRIBS];
87 unsigned nr_buffer_varients;
88
89 /* Multiple elements can map to a single buffer varient. */
90 unsigned element_to_buffer_varient[PIPE_MAX_ATTRIBS];
91
92 boolean use_instancing;
93 unsigned instance_id;
94
95 /* these are actually known values, but putting them in a struct
96 * like this is helpful to keep them in sync across the file.
97 */
98 struct x86_reg tmp_EAX;
99 struct x86_reg idx_EBX; /* either start+i or &elt[i] */
100 struct x86_reg outbuf_ECX;
101 struct x86_reg machine_EDX;
102 struct x86_reg count_ESI; /* decrements to zero */
103 };
104
105 static int get_offset( const void *a, const void *b )
106 {
107 return (const char *)b - (const char *)a;
108 }
109
110
111
112 static struct x86_reg get_identity( struct translate_sse *p )
113 {
114 struct x86_reg reg = x86_make_reg(file_XMM, 6);
115
116 if (!p->loaded_identity) {
117 p->loaded_identity = TRUE;
118 p->identity[0] = 0;
119 p->identity[1] = 0;
120 p->identity[2] = 0;
121 p->identity[3] = 1;
122
123 sse_movups(p->func, reg,
124 x86_make_disp(p->machine_EDX,
125 get_offset(p, &p->identity[0])));
126 }
127
128 return reg;
129 }
130
131 static struct x86_reg get_255( struct translate_sse *p )
132 {
133 struct x86_reg reg = x86_make_reg(file_XMM, 7);
134
135 if (!p->loaded_255) {
136 p->loaded_255 = TRUE;
137 p->float_255[0] =
138 p->float_255[1] =
139 p->float_255[2] =
140 p->float_255[3] = 255.0f;
141
142 sse_movups(p->func, reg,
143 x86_make_disp(p->machine_EDX,
144 get_offset(p, &p->float_255[0])));
145 }
146
147 return reg;
148 }
149
150 static struct x86_reg get_inv_255( struct translate_sse *p )
151 {
152 struct x86_reg reg = x86_make_reg(file_XMM, 5);
153
154 if (!p->loaded_inv_255) {
155 p->loaded_inv_255 = TRUE;
156 p->inv_255[0] =
157 p->inv_255[1] =
158 p->inv_255[2] =
159 p->inv_255[3] = 1.0f / 255.0f;
160
161 sse_movups(p->func, reg,
162 x86_make_disp(p->machine_EDX,
163 get_offset(p, &p->inv_255[0])));
164 }
165
166 return reg;
167 }
168
169
170 static void emit_load_R32G32B32A32( struct translate_sse *p,
171 struct x86_reg data,
172 struct x86_reg arg0 )
173 {
174 sse_movups(p->func, data, arg0);
175 }
176
177 static void emit_load_R32G32B32( struct translate_sse *p,
178 struct x86_reg data,
179 struct x86_reg arg0 )
180 {
181 /* Have to jump through some hoops:
182 *
183 * c 0 0 0
184 * c 0 0 1
185 * 0 0 c 1
186 * a b c 1
187 */
188 sse_movss(p->func, data, x86_make_disp(arg0, 8));
189 sse_shufps(p->func, data, get_identity(p), SHUF(X,Y,Z,W) );
190 sse_shufps(p->func, data, data, SHUF(Y,Z,X,W) );
191 sse_movlps(p->func, data, arg0);
192 }
193
194 static void emit_load_R32G32( struct translate_sse *p,
195 struct x86_reg data,
196 struct x86_reg arg0 )
197 {
198 /* 0 0 0 1
199 * a b 0 1
200 */
201 sse_movups(p->func, data, get_identity(p) );
202 sse_movlps(p->func, data, arg0);
203 }
204
205
206 static void emit_load_R32( struct translate_sse *p,
207 struct x86_reg data,
208 struct x86_reg arg0 )
209 {
210 /* a 0 0 0
211 * a 0 0 1
212 */
213 sse_movss(p->func, data, arg0);
214 sse_orps(p->func, data, get_identity(p) );
215 }
216
217
218 static void emit_load_R8G8B8A8_UNORM( struct translate_sse *p,
219 struct x86_reg data,
220 struct x86_reg src )
221 {
222
223 /* Load and unpack twice:
224 */
225 sse_movss(p->func, data, src);
226 sse2_punpcklbw(p->func, data, get_identity(p));
227 sse2_punpcklbw(p->func, data, get_identity(p));
228
229 /* Convert to float:
230 */
231 sse2_cvtdq2ps(p->func, data, data);
232
233
234 /* Scale by 1/255.0
235 */
236 sse_mulps(p->func, data, get_inv_255(p));
237 }
238
239
240
241
242 static void emit_store_R32G32B32A32( struct translate_sse *p,
243 struct x86_reg dest,
244 struct x86_reg dataXMM )
245 {
246 sse_movups(p->func, dest, dataXMM);
247 }
248
249 static void emit_store_R32G32B32( struct translate_sse *p,
250 struct x86_reg dest,
251 struct x86_reg dataXMM )
252 {
253 /* Emit two, shuffle, emit one.
254 */
255 sse_movlps(p->func, dest, dataXMM);
256 sse_shufps(p->func, dataXMM, dataXMM, SHUF(Z,Z,Z,Z) ); /* NOTE! destructive */
257 sse_movss(p->func, x86_make_disp(dest,8), dataXMM);
258 }
259
260 static void emit_store_R32G32( struct translate_sse *p,
261 struct x86_reg dest,
262 struct x86_reg dataXMM )
263 {
264 sse_movlps(p->func, dest, dataXMM);
265 }
266
267 static void emit_store_R32( struct translate_sse *p,
268 struct x86_reg dest,
269 struct x86_reg dataXMM )
270 {
271 sse_movss(p->func, dest, dataXMM);
272 }
273
274
275
276 static void emit_store_R8G8B8A8_UNORM( struct translate_sse *p,
277 struct x86_reg dest,
278 struct x86_reg dataXMM )
279 {
280 /* Scale by 255.0
281 */
282 sse_mulps(p->func, dataXMM, get_255(p));
283
284 /* Pack and emit:
285 */
286 sse2_cvtps2dq(p->func, dataXMM, dataXMM);
287 sse2_packssdw(p->func, dataXMM, dataXMM);
288 sse2_packuswb(p->func, dataXMM, dataXMM);
289 sse_movss(p->func, dest, dataXMM);
290 }
291
292
293
294
295
296 /* Extended swizzles? Maybe later.
297 */
298 static void emit_swizzle( struct translate_sse *p,
299 struct x86_reg dest,
300 struct x86_reg src,
301 unsigned char shuffle )
302 {
303 sse_shufps(p->func, dest, src, shuffle);
304 }
305
306
307 static boolean translate_attr( struct translate_sse *p,
308 const struct translate_element *a,
309 struct x86_reg srcECX,
310 struct x86_reg dstEAX)
311 {
312 struct x86_reg dataXMM = x86_make_reg(file_XMM, 0);
313
314 switch (a->input_format) {
315 case PIPE_FORMAT_R32_FLOAT:
316 emit_load_R32(p, dataXMM, srcECX);
317 break;
318 case PIPE_FORMAT_R32G32_FLOAT:
319 emit_load_R32G32(p, dataXMM, srcECX);
320 break;
321 case PIPE_FORMAT_R32G32B32_FLOAT:
322 emit_load_R32G32B32(p, dataXMM, srcECX);
323 break;
324 case PIPE_FORMAT_R32G32B32A32_FLOAT:
325 emit_load_R32G32B32A32(p, dataXMM, srcECX);
326 break;
327 case PIPE_FORMAT_B8G8R8A8_UNORM:
328 emit_load_R8G8B8A8_UNORM(p, dataXMM, srcECX);
329 emit_swizzle(p, dataXMM, dataXMM, SHUF(Z,Y,X,W));
330 break;
331 case PIPE_FORMAT_R8G8B8A8_UNORM:
332 emit_load_R8G8B8A8_UNORM(p, dataXMM, srcECX);
333 break;
334 default:
335 return FALSE;
336 }
337
338 switch (a->output_format) {
339 case PIPE_FORMAT_R32_FLOAT:
340 emit_store_R32(p, dstEAX, dataXMM);
341 break;
342 case PIPE_FORMAT_R32G32_FLOAT:
343 emit_store_R32G32(p, dstEAX, dataXMM);
344 break;
345 case PIPE_FORMAT_R32G32B32_FLOAT:
346 emit_store_R32G32B32(p, dstEAX, dataXMM);
347 break;
348 case PIPE_FORMAT_R32G32B32A32_FLOAT:
349 emit_store_R32G32B32A32(p, dstEAX, dataXMM);
350 break;
351 case PIPE_FORMAT_B8G8R8A8_UNORM:
352 emit_swizzle(p, dataXMM, dataXMM, SHUF(Z,Y,X,W));
353 emit_store_R8G8B8A8_UNORM(p, dstEAX, dataXMM);
354 break;
355 case PIPE_FORMAT_R8G8B8A8_UNORM:
356 emit_store_R8G8B8A8_UNORM(p, dstEAX, dataXMM);
357 break;
358 default:
359 return FALSE;
360 }
361
362 return TRUE;
363 }
364
365
366 static boolean init_inputs( struct translate_sse *p,
367 unsigned index_size )
368 {
369 unsigned i;
370 struct x86_reg instance_id = x86_make_disp(p->machine_EDX,
371 get_offset(p, &p->instance_id));
372
373 for (i = 0; i < p->nr_buffer_varients; i++) {
374 struct translate_buffer_varient *varient = &p->buffer_varient[i];
375 struct translate_buffer *buffer = &p->buffer[varient->buffer_index];
376
377 if (!index_size || varient->instance_divisor) {
378 struct x86_reg buf_stride = x86_make_disp(p->machine_EDX,
379 get_offset(p, &buffer->stride));
380 struct x86_reg buf_ptr = x86_make_disp(p->machine_EDX,
381 get_offset(p, &varient->ptr));
382 struct x86_reg buf_base_ptr = x86_make_disp(p->machine_EDX,
383 get_offset(p, &buffer->base_ptr));
384 struct x86_reg elt = p->idx_EBX;
385 struct x86_reg tmp_EAX = p->tmp_EAX;
386
387 /* Calculate pointer to first attrib:
388 * base_ptr + stride * index, where index depends on instance divisor
389 */
390 if (varient->instance_divisor) {
391 /* Our index is instance ID divided by instance divisor.
392 */
393 x86_mov(p->func, tmp_EAX, instance_id);
394
395 if (varient->instance_divisor != 1) {
396 struct x86_reg tmp_EDX = p->machine_EDX;
397 struct x86_reg tmp_ECX = p->outbuf_ECX;
398
399 /* TODO: Add x86_shr() to rtasm and use it whenever
400 * instance divisor is power of two.
401 */
402
403 x86_push(p->func, tmp_EDX);
404 x86_push(p->func, tmp_ECX);
405 x86_xor(p->func, tmp_EDX, tmp_EDX);
406 x86_mov_reg_imm(p->func, tmp_ECX, varient->instance_divisor);
407 x86_div(p->func, tmp_ECX); /* EAX = EDX:EAX / ECX */
408 x86_pop(p->func, tmp_ECX);
409 x86_pop(p->func, tmp_EDX);
410 }
411 } else {
412 x86_mov(p->func, tmp_EAX, elt);
413 }
414
415 /*
416 * TODO: Respect translate_buffer::max_index.
417 */
418
419 x86_imul(p->func, tmp_EAX, buf_stride);
420 x86_add(p->func, tmp_EAX, buf_base_ptr);
421
422
423 /* In the linear case, keep the buffer pointer instead of the
424 * index number.
425 */
426 if (!index_size && p->nr_buffer_varients == 1)
427 x86_mov(p->func, elt, tmp_EAX);
428 else
429 x86_mov(p->func, buf_ptr, tmp_EAX);
430 }
431 }
432
433 return TRUE;
434 }
435
436
437 static struct x86_reg get_buffer_ptr( struct translate_sse *p,
438 unsigned index_size,
439 unsigned var_idx,
440 struct x86_reg elt )
441 {
442 if (var_idx == ELEMENT_BUFFER_INSTANCE_ID) {
443 return x86_make_disp(p->machine_EDX,
444 get_offset(p, &p->instance_id));
445 }
446 if (!index_size && p->nr_buffer_varients == 1) {
447 return p->idx_EBX;
448 }
449 else if (!index_size || p->buffer_varient[var_idx].instance_divisor) {
450 struct x86_reg ptr = p->tmp_EAX;
451 struct x86_reg buf_ptr =
452 x86_make_disp(p->machine_EDX,
453 get_offset(p, &p->buffer_varient[var_idx].ptr));
454
455 x86_mov(p->func, ptr, buf_ptr);
456 return ptr;
457 }
458 else {
459 struct x86_reg ptr = p->tmp_EAX;
460 const struct translate_buffer_varient *varient = &p->buffer_varient[var_idx];
461
462 struct x86_reg buf_stride =
463 x86_make_disp(p->machine_EDX,
464 get_offset(p, &p->buffer[varient->buffer_index].stride));
465
466 struct x86_reg buf_base_ptr =
467 x86_make_disp(p->machine_EDX,
468 get_offset(p, &p->buffer[varient->buffer_index].base_ptr));
469
470
471
472 /* Calculate pointer to current attrib:
473 */
474 switch(index_size)
475 {
476 case 1:
477 x86_movzx8(p->func, ptr, elt);
478 break;
479 case 2:
480 x86_movzx16(p->func, ptr, elt);
481 break;
482 case 4:
483 x86_mov(p->func, ptr, elt);
484 break;
485 }
486 x86_imul(p->func, ptr, buf_stride);
487 x86_add(p->func, ptr, buf_base_ptr);
488 return ptr;
489 }
490 }
491
492
493
494 static boolean incr_inputs( struct translate_sse *p,
495 unsigned index_size )
496 {
497 if (!index_size && p->nr_buffer_varients == 1) {
498 struct x86_reg stride = x86_make_disp(p->machine_EDX,
499 get_offset(p, &p->buffer[0].stride));
500
501 if (p->buffer_varient[0].instance_divisor == 0) {
502 x86_add(p->func, p->idx_EBX, stride);
503 sse_prefetchnta(p->func, x86_make_disp(p->idx_EBX, 192));
504 }
505 }
506 else if (!index_size) {
507 unsigned i;
508
509 /* Is this worthwhile??
510 */
511 for (i = 0; i < p->nr_buffer_varients; i++) {
512 struct translate_buffer_varient *varient = &p->buffer_varient[i];
513 struct x86_reg buf_ptr = x86_make_disp(p->machine_EDX,
514 get_offset(p, &varient->ptr));
515 struct x86_reg buf_stride = x86_make_disp(p->machine_EDX,
516 get_offset(p, &p->buffer[varient->buffer_index].stride));
517
518 if (varient->instance_divisor == 0) {
519 x86_mov(p->func, p->tmp_EAX, buf_ptr);
520 x86_add(p->func, p->tmp_EAX, buf_stride);
521 if (i == 0) sse_prefetchnta(p->func, x86_make_disp(p->tmp_EAX, 192));
522 x86_mov(p->func, buf_ptr, p->tmp_EAX);
523 }
524 }
525 }
526 else {
527 x86_lea(p->func, p->idx_EBX, x86_make_disp(p->idx_EBX, index_size));
528 }
529
530 return TRUE;
531 }
532
533
534 /* Build run( struct translate *machine,
535 * unsigned start,
536 * unsigned count,
537 * void *output_buffer )
538 * or
539 * run_elts( struct translate *machine,
540 * unsigned *elts,
541 * unsigned count,
542 * void *output_buffer )
543 *
544 * Lots of hardcoding
545 *
546 * EAX -- pointer to current output vertex
547 * ECX -- pointer to current attribute
548 *
549 */
550 static boolean build_vertex_emit( struct translate_sse *p,
551 struct x86_function *func,
552 unsigned index_size )
553 {
554 int fixup, label;
555 unsigned j;
556
557 p->tmp_EAX = x86_make_reg(file_REG32, reg_AX);
558 p->idx_EBX = x86_make_reg(file_REG32, reg_BX);
559 p->outbuf_ECX = x86_make_reg(file_REG32, reg_CX);
560 p->machine_EDX = x86_make_reg(file_REG32, reg_DX);
561 p->count_ESI = x86_make_reg(file_REG32, reg_SI);
562
563 p->func = func;
564 p->loaded_inv_255 = FALSE;
565 p->loaded_255 = FALSE;
566 p->loaded_identity = FALSE;
567
568 x86_init_func(p->func);
569
570 /* Push a few regs?
571 */
572 x86_push(p->func, p->idx_EBX);
573 x86_push(p->func, p->count_ESI);
574
575 /* Load arguments into regs:
576 */
577 x86_mov(p->func, p->machine_EDX, x86_fn_arg(p->func, 1));
578 x86_mov(p->func, p->idx_EBX, x86_fn_arg(p->func, 2));
579 x86_mov(p->func, p->count_ESI, x86_fn_arg(p->func, 3));
580 x86_mov(p->func, p->outbuf_ECX, x86_fn_arg(p->func, 5));
581
582 /* Load instance ID.
583 */
584 if (p->use_instancing) {
585 x86_mov(p->func,
586 p->tmp_EAX,
587 x86_fn_arg(p->func, 4));
588 x86_mov(p->func,
589 x86_make_disp(p->machine_EDX, get_offset(p, &p->instance_id)),
590 p->tmp_EAX);
591 }
592
593 /* Get vertex count, compare to zero
594 */
595 x86_xor(p->func, p->tmp_EAX, p->tmp_EAX);
596 x86_cmp(p->func, p->count_ESI, p->tmp_EAX);
597 fixup = x86_jcc_forward(p->func, cc_E);
598
599 /* always load, needed or not:
600 */
601 init_inputs(p, index_size);
602
603 /* Note address for loop jump
604 */
605 label = x86_get_label(p->func);
606 {
607 struct x86_reg elt = !index_size ? p->idx_EBX : x86_deref(p->idx_EBX);
608 int last_varient = -1;
609 struct x86_reg vb;
610
611 for (j = 0; j < p->translate.key.nr_elements; j++) {
612 const struct translate_element *a = &p->translate.key.element[j];
613 unsigned varient = p->element_to_buffer_varient[j];
614
615 /* Figure out source pointer address:
616 */
617 if (varient != last_varient) {
618 last_varient = varient;
619 vb = get_buffer_ptr(p, index_size, varient, elt);
620 }
621
622 if (!translate_attr( p, a,
623 x86_make_disp(vb, a->input_offset),
624 x86_make_disp(p->outbuf_ECX, a->output_offset)))
625 return FALSE;
626 }
627
628 /* Next output vertex:
629 */
630 x86_lea(p->func,
631 p->outbuf_ECX,
632 x86_make_disp(p->outbuf_ECX,
633 p->translate.key.output_stride));
634
635 /* Incr index
636 */
637 incr_inputs( p, index_size );
638 }
639
640 /* decr count, loop if not zero
641 */
642 x86_dec(p->func, p->count_ESI);
643 x86_jcc(p->func, cc_NZ, label);
644
645 /* Exit mmx state?
646 */
647 if (p->func->need_emms)
648 mmx_emms(p->func);
649
650 /* Land forward jump here:
651 */
652 x86_fixup_fwd_jump(p->func, fixup);
653
654 /* Pop regs and return
655 */
656
657 x86_pop(p->func, p->count_ESI);
658 x86_pop(p->func, p->idx_EBX);
659 x86_ret(p->func);
660
661 return TRUE;
662 }
663
664
665
666
667
668
669
670 static void translate_sse_set_buffer( struct translate *translate,
671 unsigned buf,
672 const void *ptr,
673 unsigned stride,
674 unsigned max_index )
675 {
676 struct translate_sse *p = (struct translate_sse *)translate;
677
678 if (buf < p->nr_buffers) {
679 p->buffer[buf].base_ptr = (char *)ptr;
680 p->buffer[buf].stride = stride;
681 p->buffer[buf].max_index = max_index;
682 }
683
684 if (0) debug_printf("%s %d/%d: %p %d\n",
685 __FUNCTION__, buf,
686 p->nr_buffers,
687 ptr, stride);
688 }
689
690
691 static void translate_sse_release( struct translate *translate )
692 {
693 struct translate_sse *p = (struct translate_sse *)translate;
694
695 x86_release_func( &p->linear_func );
696 x86_release_func( &p->elt_func );
697
698 FREE(p);
699 }
700
701
702 struct translate *translate_sse2_create( const struct translate_key *key )
703 {
704 struct translate_sse *p = NULL;
705 unsigned i;
706
707 if (!rtasm_cpu_has_sse() || !rtasm_cpu_has_sse2())
708 goto fail;
709
710 p = CALLOC_STRUCT( translate_sse );
711 if (p == NULL)
712 goto fail;
713
714 p->translate.key = *key;
715 p->translate.release = translate_sse_release;
716 p->translate.set_buffer = translate_sse_set_buffer;
717
718 for (i = 0; i < key->nr_elements; i++) {
719 if (key->element[i].type == TRANSLATE_ELEMENT_NORMAL) {
720 unsigned j;
721
722 p->nr_buffers = MAX2(p->nr_buffers, key->element[i].input_buffer + 1);
723
724 if (key->element[i].instance_divisor) {
725 p->use_instancing = TRUE;
726 }
727
728 /*
729 * Map vertex element to vertex buffer varient.
730 */
731 for (j = 0; j < p->nr_buffer_varients; j++) {
732 if (p->buffer_varient[j].buffer_index == key->element[i].input_buffer &&
733 p->buffer_varient[j].instance_divisor == key->element[i].instance_divisor) {
734 break;
735 }
736 }
737 if (j == p->nr_buffer_varients) {
738 p->buffer_varient[j].buffer_index = key->element[i].input_buffer;
739 p->buffer_varient[j].instance_divisor = key->element[i].instance_divisor;
740 p->nr_buffer_varients++;
741 }
742 p->element_to_buffer_varient[i] = j;
743 } else {
744 assert(key->element[i].type == TRANSLATE_ELEMENT_INSTANCE_ID);
745
746 p->element_to_buffer_varient[i] = ELEMENT_BUFFER_INSTANCE_ID;
747 }
748 }
749
750 if (0) debug_printf("nr_buffers: %d\n", p->nr_buffers);
751
752 if (!build_vertex_emit(p, &p->linear_func, 0))
753 goto fail;
754
755 if (!build_vertex_emit(p, &p->elt_func, 4))
756 goto fail;
757
758 if (!build_vertex_emit(p, &p->elt16_func, 2))
759 goto fail;
760
761 if (!build_vertex_emit(p, &p->elt8_func, 1))
762 goto fail;
763
764 p->translate.run = (void*)x86_get_func(&p->linear_func);
765 if (p->translate.run == NULL)
766 goto fail;
767
768 p->translate.run_elts = (void*)x86_get_func(&p->elt_func);
769 if (p->translate.run_elts == NULL)
770 goto fail;
771
772 p->translate.run_elts16 = (void*)x86_get_func(&p->elt16_func);
773 if (p->translate.run_elts16 == NULL)
774 goto fail;
775
776 p->translate.run_elts8 = (void*)x86_get_func(&p->elt8_func);
777 if (p->translate.run_elts8 == NULL)
778 goto fail;
779
780 return &p->translate;
781
782 fail:
783 if (p)
784 translate_sse_release( &p->translate );
785
786 return NULL;
787 }
788
789
790
791 #else
792
793 struct translate *translate_sse2_create( const struct translate_key *key )
794 {
795 return NULL;
796 }
797
798 #endif