Implement instanced indexed draw.
[mesa.git] / src / gallium / auxiliary / translate / translate_sse.c
1 /*
2 * Copyright 2003 Tungsten Graphics, inc.
3 * All Rights Reserved.
4 *
5 * Permission is hereby granted, free of charge, to any person obtaining a
6 * copy of this software and associated documentation files (the "Software"),
7 * to deal in the Software without restriction, including without limitation
8 * on the rights to use, copy, modify, merge, publish, distribute, sub
9 * license, and/or sell copies of the Software, and to permit persons to whom
10 * the Software is furnished to do so, subject to the following conditions:
11 *
12 * The above copyright notice and this permission notice (including the next
13 * paragraph) shall be included in all copies or substantial portions of the
14 * Software.
15 *
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
19 * TUNGSTEN GRAPHICS AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
20 * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
21 * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
22 * USE OR OTHER DEALINGS IN THE SOFTWARE.
23 *
24 * Authors:
25 * Keith Whitwell <keithw@tungstengraphics.com>
26 */
27
28
29 #include "pipe/p_config.h"
30 #include "pipe/p_compiler.h"
31 #include "util/u_memory.h"
32 #include "util/u_math.h"
33
34 #include "translate.h"
35
36
37 #if defined(PIPE_ARCH_X86)
38
39 #include "rtasm/rtasm_cpu.h"
40 #include "rtasm/rtasm_x86sse.h"
41
42
43 #define X 0
44 #define Y 1
45 #define Z 2
46 #define W 3
47
48
49 typedef void (PIPE_CDECL *run_func)( struct translate *translate,
50 unsigned start,
51 unsigned count,
52 unsigned instance_id,
53 void *output_buffer );
54
55 typedef void (PIPE_CDECL *run_elts_func)( struct translate *translate,
56 const unsigned *elts,
57 unsigned count,
58 unsigned instance_id,
59 void *output_buffer );
60
61 struct translate_buffer {
62 const void *base_ptr;
63 unsigned stride;
64 };
65
66 struct translate_buffer_varient {
67 unsigned buffer_index;
68 unsigned instance_divisor;
69 void *ptr; /* updated either per vertex or per instance */
70 };
71
72
73 struct translate_sse {
74 struct translate translate;
75
76 struct x86_function linear_func;
77 struct x86_function elt_func;
78 struct x86_function *func;
79
80 boolean loaded_identity;
81 boolean loaded_255;
82 boolean loaded_inv_255;
83
84 float identity[4];
85 float float_255[4];
86 float inv_255[4];
87
88 struct translate_buffer buffer[PIPE_MAX_ATTRIBS];
89 unsigned nr_buffers;
90
91 /* Multiple buffer varients can map to a single buffer. */
92 struct translate_buffer_varient buffer_varient[PIPE_MAX_ATTRIBS];
93 unsigned nr_buffer_varients;
94
95 /* Multiple elements can map to a single buffer varient. */
96 unsigned element_to_buffer_varient[PIPE_MAX_ATTRIBS];
97
98 boolean use_instancing;
99 unsigned instance_id;
100
101 run_func gen_run;
102 run_elts_func gen_run_elts;
103
104 /* these are actually known values, but putting them in a struct
105 * like this is helpful to keep them in sync across the file.
106 */
107 struct x86_reg tmp_EAX;
108 struct x86_reg idx_EBX; /* either start+i or &elt[i] */
109 struct x86_reg outbuf_ECX;
110 struct x86_reg machine_EDX;
111 struct x86_reg count_ESI; /* decrements to zero */
112 };
113
114 static int get_offset( const void *a, const void *b )
115 {
116 return (const char *)b - (const char *)a;
117 }
118
119
120
121 static struct x86_reg get_identity( struct translate_sse *p )
122 {
123 struct x86_reg reg = x86_make_reg(file_XMM, 6);
124
125 if (!p->loaded_identity) {
126 p->loaded_identity = TRUE;
127 p->identity[0] = 0;
128 p->identity[1] = 0;
129 p->identity[2] = 0;
130 p->identity[3] = 1;
131
132 sse_movups(p->func, reg,
133 x86_make_disp(p->machine_EDX,
134 get_offset(p, &p->identity[0])));
135 }
136
137 return reg;
138 }
139
140 static struct x86_reg get_255( struct translate_sse *p )
141 {
142 struct x86_reg reg = x86_make_reg(file_XMM, 7);
143
144 if (!p->loaded_255) {
145 p->loaded_255 = TRUE;
146 p->float_255[0] =
147 p->float_255[1] =
148 p->float_255[2] =
149 p->float_255[3] = 255.0f;
150
151 sse_movups(p->func, reg,
152 x86_make_disp(p->machine_EDX,
153 get_offset(p, &p->float_255[0])));
154 }
155
156 return reg;
157 }
158
159 static struct x86_reg get_inv_255( struct translate_sse *p )
160 {
161 struct x86_reg reg = x86_make_reg(file_XMM, 5);
162
163 if (!p->loaded_inv_255) {
164 p->loaded_inv_255 = TRUE;
165 p->inv_255[0] =
166 p->inv_255[1] =
167 p->inv_255[2] =
168 p->inv_255[3] = 1.0f / 255.0f;
169
170 sse_movups(p->func, reg,
171 x86_make_disp(p->machine_EDX,
172 get_offset(p, &p->inv_255[0])));
173 }
174
175 return reg;
176 }
177
178
179 static void emit_load_R32G32B32A32( struct translate_sse *p,
180 struct x86_reg data,
181 struct x86_reg arg0 )
182 {
183 sse_movups(p->func, data, arg0);
184 }
185
186 static void emit_load_R32G32B32( struct translate_sse *p,
187 struct x86_reg data,
188 struct x86_reg arg0 )
189 {
190 /* Have to jump through some hoops:
191 *
192 * c 0 0 0
193 * c 0 0 1
194 * 0 0 c 1
195 * a b c 1
196 */
197 sse_movss(p->func, data, x86_make_disp(arg0, 8));
198 sse_shufps(p->func, data, get_identity(p), SHUF(X,Y,Z,W) );
199 sse_shufps(p->func, data, data, SHUF(Y,Z,X,W) );
200 sse_movlps(p->func, data, arg0);
201 }
202
203 static void emit_load_R32G32( struct translate_sse *p,
204 struct x86_reg data,
205 struct x86_reg arg0 )
206 {
207 /* 0 0 0 1
208 * a b 0 1
209 */
210 sse_movups(p->func, data, get_identity(p) );
211 sse_movlps(p->func, data, arg0);
212 }
213
214
215 static void emit_load_R32( struct translate_sse *p,
216 struct x86_reg data,
217 struct x86_reg arg0 )
218 {
219 /* a 0 0 0
220 * a 0 0 1
221 */
222 sse_movss(p->func, data, arg0);
223 sse_orps(p->func, data, get_identity(p) );
224 }
225
226
227 static void emit_load_R8G8B8A8_UNORM( struct translate_sse *p,
228 struct x86_reg data,
229 struct x86_reg src )
230 {
231
232 /* Load and unpack twice:
233 */
234 sse_movss(p->func, data, src);
235 sse2_punpcklbw(p->func, data, get_identity(p));
236 sse2_punpcklbw(p->func, data, get_identity(p));
237
238 /* Convert to float:
239 */
240 sse2_cvtdq2ps(p->func, data, data);
241
242
243 /* Scale by 1/255.0
244 */
245 sse_mulps(p->func, data, get_inv_255(p));
246 }
247
248
249
250
251 static void emit_store_R32G32B32A32( struct translate_sse *p,
252 struct x86_reg dest,
253 struct x86_reg dataXMM )
254 {
255 sse_movups(p->func, dest, dataXMM);
256 }
257
258 static void emit_store_R32G32B32( struct translate_sse *p,
259 struct x86_reg dest,
260 struct x86_reg dataXMM )
261 {
262 /* Emit two, shuffle, emit one.
263 */
264 sse_movlps(p->func, dest, dataXMM);
265 sse_shufps(p->func, dataXMM, dataXMM, SHUF(Z,Z,Z,Z) ); /* NOTE! destructive */
266 sse_movss(p->func, x86_make_disp(dest,8), dataXMM);
267 }
268
269 static void emit_store_R32G32( struct translate_sse *p,
270 struct x86_reg dest,
271 struct x86_reg dataXMM )
272 {
273 sse_movlps(p->func, dest, dataXMM);
274 }
275
276 static void emit_store_R32( struct translate_sse *p,
277 struct x86_reg dest,
278 struct x86_reg dataXMM )
279 {
280 sse_movss(p->func, dest, dataXMM);
281 }
282
283
284
285 static void emit_store_R8G8B8A8_UNORM( struct translate_sse *p,
286 struct x86_reg dest,
287 struct x86_reg dataXMM )
288 {
289 /* Scale by 255.0
290 */
291 sse_mulps(p->func, dataXMM, get_255(p));
292
293 /* Pack and emit:
294 */
295 sse2_cvtps2dq(p->func, dataXMM, dataXMM);
296 sse2_packssdw(p->func, dataXMM, dataXMM);
297 sse2_packuswb(p->func, dataXMM, dataXMM);
298 sse_movss(p->func, dest, dataXMM);
299 }
300
301
302
303
304
305 /* Extended swizzles? Maybe later.
306 */
307 static void emit_swizzle( struct translate_sse *p,
308 struct x86_reg dest,
309 struct x86_reg src,
310 unsigned char shuffle )
311 {
312 sse_shufps(p->func, dest, src, shuffle);
313 }
314
315
316 static boolean translate_attr( struct translate_sse *p,
317 const struct translate_element *a,
318 struct x86_reg srcECX,
319 struct x86_reg dstEAX)
320 {
321 struct x86_reg dataXMM = x86_make_reg(file_XMM, 0);
322
323 switch (a->input_format) {
324 case PIPE_FORMAT_R32_FLOAT:
325 emit_load_R32(p, dataXMM, srcECX);
326 break;
327 case PIPE_FORMAT_R32G32_FLOAT:
328 emit_load_R32G32(p, dataXMM, srcECX);
329 break;
330 case PIPE_FORMAT_R32G32B32_FLOAT:
331 emit_load_R32G32B32(p, dataXMM, srcECX);
332 break;
333 case PIPE_FORMAT_R32G32B32A32_FLOAT:
334 emit_load_R32G32B32A32(p, dataXMM, srcECX);
335 break;
336 case PIPE_FORMAT_B8G8R8A8_UNORM:
337 emit_load_R8G8B8A8_UNORM(p, dataXMM, srcECX);
338 emit_swizzle(p, dataXMM, dataXMM, SHUF(Z,Y,X,W));
339 break;
340 case PIPE_FORMAT_R8G8B8A8_UNORM:
341 emit_load_R8G8B8A8_UNORM(p, dataXMM, srcECX);
342 break;
343 default:
344 return FALSE;
345 }
346
347 switch (a->output_format) {
348 case PIPE_FORMAT_R32_FLOAT:
349 emit_store_R32(p, dstEAX, dataXMM);
350 break;
351 case PIPE_FORMAT_R32G32_FLOAT:
352 emit_store_R32G32(p, dstEAX, dataXMM);
353 break;
354 case PIPE_FORMAT_R32G32B32_FLOAT:
355 emit_store_R32G32B32(p, dstEAX, dataXMM);
356 break;
357 case PIPE_FORMAT_R32G32B32A32_FLOAT:
358 emit_store_R32G32B32A32(p, dstEAX, dataXMM);
359 break;
360 case PIPE_FORMAT_B8G8R8A8_UNORM:
361 emit_swizzle(p, dataXMM, dataXMM, SHUF(Z,Y,X,W));
362 emit_store_R8G8B8A8_UNORM(p, dstEAX, dataXMM);
363 break;
364 case PIPE_FORMAT_R8G8B8A8_UNORM:
365 emit_store_R8G8B8A8_UNORM(p, dstEAX, dataXMM);
366 break;
367 default:
368 return FALSE;
369 }
370
371 return TRUE;
372 }
373
374
375 static boolean init_inputs( struct translate_sse *p,
376 boolean linear )
377 {
378 unsigned i;
379 struct x86_reg instance_id = x86_make_disp(p->machine_EDX,
380 get_offset(p, &p->instance_id));
381
382 for (i = 0; i < p->nr_buffer_varients; i++) {
383 struct translate_buffer_varient *varient = &p->buffer_varient[i];
384 struct translate_buffer *buffer = &p->buffer[varient->buffer_index];
385
386 if (linear || varient->instance_divisor) {
387 struct x86_reg buf_stride = x86_make_disp(p->machine_EDX,
388 get_offset(p, &buffer->stride));
389 struct x86_reg buf_ptr = x86_make_disp(p->machine_EDX,
390 get_offset(p, &varient->ptr));
391 struct x86_reg buf_base_ptr = x86_make_disp(p->machine_EDX,
392 get_offset(p, &buffer->base_ptr));
393 struct x86_reg elt = p->idx_EBX;
394 struct x86_reg tmp_EAX = p->tmp_EAX;
395
396 /* Calculate pointer to first attrib:
397 * base_ptr + stride * index, where index depends on instance divisor
398 */
399 if (varient->instance_divisor) {
400 /* Our index is instance ID divided by instance divisor.
401 */
402 x86_mov(p->func, tmp_EAX, instance_id);
403
404 if (varient->instance_divisor != 1) {
405 struct x86_reg tmp_EDX = p->machine_EDX;
406 struct x86_reg tmp_ECX = p->outbuf_ECX;
407
408 /* TODO: Add x86_shr() to rtasm and use it whenever
409 * instance divisor is power of two.
410 */
411
412 x86_push(p->func, tmp_EDX);
413 x86_push(p->func, tmp_ECX);
414 x86_xor(p->func, tmp_EDX, tmp_EDX);
415 x86_mov_reg_imm(p->func, tmp_ECX, varient->instance_divisor);
416 x86_div(p->func, tmp_ECX); /* EAX = EDX:EAX / ECX */
417 x86_pop(p->func, tmp_ECX);
418 x86_pop(p->func, tmp_EDX);
419 }
420 } else {
421 x86_mov(p->func, tmp_EAX, elt);
422 }
423 x86_imul(p->func, tmp_EAX, buf_stride);
424 x86_add(p->func, tmp_EAX, buf_base_ptr);
425
426
427 /* In the linear case, keep the buffer pointer instead of the
428 * index number.
429 */
430 if (linear && p->nr_buffer_varients == 1)
431 x86_mov(p->func, elt, tmp_EAX);
432 else
433 x86_mov(p->func, buf_ptr, tmp_EAX);
434 }
435 }
436
437 return TRUE;
438 }
439
440
441 static struct x86_reg get_buffer_ptr( struct translate_sse *p,
442 boolean linear,
443 unsigned var_idx,
444 struct x86_reg elt )
445 {
446 if (linear && p->nr_buffer_varients == 1) {
447 return p->idx_EBX;
448 }
449 else if (linear || p->buffer_varient[var_idx].instance_divisor) {
450 struct x86_reg ptr = p->tmp_EAX;
451 struct x86_reg buf_ptr =
452 x86_make_disp(p->machine_EDX,
453 get_offset(p, &p->buffer_varient[var_idx].ptr));
454
455 x86_mov(p->func, ptr, buf_ptr);
456 return ptr;
457 }
458 else {
459 struct x86_reg ptr = p->tmp_EAX;
460 const struct translate_buffer_varient *varient = &p->buffer_varient[var_idx];
461
462 struct x86_reg buf_stride =
463 x86_make_disp(p->machine_EDX,
464 get_offset(p, &p->buffer[varient->buffer_index].stride));
465
466 struct x86_reg buf_base_ptr =
467 x86_make_disp(p->machine_EDX,
468 get_offset(p, &p->buffer[varient->buffer_index].base_ptr));
469
470
471
472 /* Calculate pointer to current attrib:
473 */
474 x86_mov(p->func, ptr, buf_stride);
475 x86_imul(p->func, ptr, elt);
476 x86_add(p->func, ptr, buf_base_ptr);
477 return ptr;
478 }
479 }
480
481
482
483 static boolean incr_inputs( struct translate_sse *p,
484 boolean linear )
485 {
486 if (linear && p->nr_buffer_varients == 1) {
487 struct x86_reg stride = x86_make_disp(p->machine_EDX,
488 get_offset(p, &p->buffer[0].stride));
489
490 if (p->buffer_varient[0].instance_divisor == 0) {
491 x86_add(p->func, p->idx_EBX, stride);
492 sse_prefetchnta(p->func, x86_make_disp(p->idx_EBX, 192));
493 }
494 }
495 else if (linear) {
496 unsigned i;
497
498 /* Is this worthwhile??
499 */
500 for (i = 0; i < p->nr_buffer_varients; i++) {
501 struct translate_buffer_varient *varient = &p->buffer_varient[i];
502 struct x86_reg buf_ptr = x86_make_disp(p->machine_EDX,
503 get_offset(p, &varient->ptr));
504 struct x86_reg buf_stride = x86_make_disp(p->machine_EDX,
505 get_offset(p, &p->buffer[varient->buffer_index].stride));
506
507 if (varient->instance_divisor == 0) {
508 x86_mov(p->func, p->tmp_EAX, buf_ptr);
509 x86_add(p->func, p->tmp_EAX, buf_stride);
510 if (i == 0) sse_prefetchnta(p->func, x86_make_disp(p->tmp_EAX, 192));
511 x86_mov(p->func, buf_ptr, p->tmp_EAX);
512 }
513 }
514 }
515 else {
516 x86_lea(p->func, p->idx_EBX, x86_make_disp(p->idx_EBX, 4));
517 }
518
519 return TRUE;
520 }
521
522
523 /* Build run( struct translate *machine,
524 * unsigned start,
525 * unsigned count,
526 * void *output_buffer )
527 * or
528 * run_elts( struct translate *machine,
529 * unsigned *elts,
530 * unsigned count,
531 * void *output_buffer )
532 *
533 * Lots of hardcoding
534 *
535 * EAX -- pointer to current output vertex
536 * ECX -- pointer to current attribute
537 *
538 */
539 static boolean build_vertex_emit( struct translate_sse *p,
540 struct x86_function *func,
541 boolean linear )
542 {
543 int fixup, label;
544 unsigned j;
545
546 p->tmp_EAX = x86_make_reg(file_REG32, reg_AX);
547 p->idx_EBX = x86_make_reg(file_REG32, reg_BX);
548 p->outbuf_ECX = x86_make_reg(file_REG32, reg_CX);
549 p->machine_EDX = x86_make_reg(file_REG32, reg_DX);
550 p->count_ESI = x86_make_reg(file_REG32, reg_SI);
551
552 p->func = func;
553 p->loaded_inv_255 = FALSE;
554 p->loaded_255 = FALSE;
555 p->loaded_identity = FALSE;
556
557 x86_init_func(p->func);
558
559 /* Push a few regs?
560 */
561 x86_push(p->func, p->idx_EBX);
562 x86_push(p->func, p->count_ESI);
563
564 /* Load arguments into regs:
565 */
566 x86_mov(p->func, p->machine_EDX, x86_fn_arg(p->func, 1));
567 x86_mov(p->func, p->idx_EBX, x86_fn_arg(p->func, 2));
568 x86_mov(p->func, p->count_ESI, x86_fn_arg(p->func, 3));
569 x86_mov(p->func, p->outbuf_ECX, x86_fn_arg(p->func, 5));
570
571 /* Load instance ID.
572 */
573 if (p->use_instancing) {
574 x86_mov(p->func,
575 p->tmp_EAX,
576 x86_fn_arg(p->func, 4));
577 x86_mov(p->func,
578 x86_make_disp(p->machine_EDX, get_offset(p, &p->instance_id)),
579 p->tmp_EAX);
580 }
581
582 /* Get vertex count, compare to zero
583 */
584 x86_xor(p->func, p->tmp_EAX, p->tmp_EAX);
585 x86_cmp(p->func, p->count_ESI, p->tmp_EAX);
586 fixup = x86_jcc_forward(p->func, cc_E);
587
588 /* always load, needed or not:
589 */
590 init_inputs(p, linear);
591
592 /* Note address for loop jump
593 */
594 label = x86_get_label(p->func);
595 {
596 struct x86_reg elt = linear ? p->idx_EBX : x86_deref(p->idx_EBX);
597 int last_varient = -1;
598 struct x86_reg vb;
599
600 for (j = 0; j < p->translate.key.nr_elements; j++) {
601 const struct translate_element *a = &p->translate.key.element[j];
602 unsigned varient = p->element_to_buffer_varient[j];
603
604 /* Figure out source pointer address:
605 */
606 if (varient != last_varient) {
607 last_varient = varient;
608 vb = get_buffer_ptr(p, linear, varient, elt);
609 }
610
611 if (!translate_attr( p, a,
612 x86_make_disp(vb, a->input_offset),
613 x86_make_disp(p->outbuf_ECX, a->output_offset)))
614 return FALSE;
615 }
616
617 /* Next output vertex:
618 */
619 x86_lea(p->func,
620 p->outbuf_ECX,
621 x86_make_disp(p->outbuf_ECX,
622 p->translate.key.output_stride));
623
624 /* Incr index
625 */
626 incr_inputs( p, linear );
627 }
628
629 /* decr count, loop if not zero
630 */
631 x86_dec(p->func, p->count_ESI);
632 x86_jcc(p->func, cc_NZ, label);
633
634 /* Exit mmx state?
635 */
636 if (p->func->need_emms)
637 mmx_emms(p->func);
638
639 /* Land forward jump here:
640 */
641 x86_fixup_fwd_jump(p->func, fixup);
642
643 /* Pop regs and return
644 */
645
646 x86_pop(p->func, p->count_ESI);
647 x86_pop(p->func, p->idx_EBX);
648 x86_ret(p->func);
649
650 return TRUE;
651 }
652
653
654
655
656
657
658
659 static void translate_sse_set_buffer( struct translate *translate,
660 unsigned buf,
661 const void *ptr,
662 unsigned stride )
663 {
664 struct translate_sse *p = (struct translate_sse *)translate;
665
666 if (buf < p->nr_buffers) {
667 p->buffer[buf].base_ptr = (char *)ptr;
668 p->buffer[buf].stride = stride;
669 }
670
671 if (0) debug_printf("%s %d/%d: %p %d\n",
672 __FUNCTION__, buf,
673 p->nr_buffers,
674 ptr, stride);
675 }
676
677
678 static void translate_sse_release( struct translate *translate )
679 {
680 struct translate_sse *p = (struct translate_sse *)translate;
681
682 x86_release_func( &p->linear_func );
683 x86_release_func( &p->elt_func );
684
685 FREE(p);
686 }
687
688 static void PIPE_CDECL translate_sse_run_elts( struct translate *translate,
689 const unsigned *elts,
690 unsigned count,
691 unsigned instance_id,
692 void *output_buffer )
693 {
694 struct translate_sse *p = (struct translate_sse *)translate;
695
696 p->gen_run_elts( translate,
697 elts,
698 count,
699 instance_id,
700 output_buffer );
701 }
702
703 static void PIPE_CDECL translate_sse_run( struct translate *translate,
704 unsigned start,
705 unsigned count,
706 unsigned instance_id,
707 void *output_buffer )
708 {
709 struct translate_sse *p = (struct translate_sse *)translate;
710
711 p->gen_run( translate,
712 start,
713 count,
714 instance_id,
715 output_buffer );
716 }
717
718
719 struct translate *translate_sse2_create( const struct translate_key *key )
720 {
721 struct translate_sse *p = NULL;
722 unsigned i;
723
724 if (!rtasm_cpu_has_sse() || !rtasm_cpu_has_sse2())
725 goto fail;
726
727 p = CALLOC_STRUCT( translate_sse );
728 if (p == NULL)
729 goto fail;
730
731 p->translate.key = *key;
732 p->translate.release = translate_sse_release;
733 p->translate.set_buffer = translate_sse_set_buffer;
734 p->translate.run_elts = translate_sse_run_elts;
735 p->translate.run = translate_sse_run;
736
737 for (i = 0; i < key->nr_elements; i++) {
738 unsigned j;
739
740 p->nr_buffers = MAX2( p->nr_buffers, key->element[i].input_buffer + 1 );
741
742 if (key->element[i].instance_divisor) {
743 p->use_instancing = TRUE;
744 }
745
746 /*
747 * Map vertex element to vertex buffer varient.
748 */
749 for (j = 0; j < p->nr_buffer_varients; j++) {
750 if (p->buffer_varient[j].buffer_index == key->element[i].input_buffer &&
751 p->buffer_varient[j].instance_divisor == key->element[i].instance_divisor) {
752 break;
753 }
754 }
755 if (j == p->nr_buffer_varients) {
756 p->buffer_varient[j].buffer_index = key->element[i].input_buffer;
757 p->buffer_varient[j].instance_divisor = key->element[i].instance_divisor;
758 p->nr_buffer_varients++;
759 }
760 p->element_to_buffer_varient[i] = j;
761 }
762
763 if (0) debug_printf("nr_buffers: %d\n", p->nr_buffers);
764
765 if (!build_vertex_emit(p, &p->linear_func, TRUE))
766 goto fail;
767
768 if (!build_vertex_emit(p, &p->elt_func, FALSE))
769 goto fail;
770
771 p->gen_run = (run_func)x86_get_func(&p->linear_func);
772 if (p->gen_run == NULL)
773 goto fail;
774
775 p->gen_run_elts = (run_elts_func)x86_get_func(&p->elt_func);
776 if (p->gen_run_elts == NULL)
777 goto fail;
778
779 return &p->translate;
780
781 fail:
782 if (p)
783 translate_sse_release( &p->translate );
784
785 return NULL;
786 }
787
788
789
790 #else
791
792 struct translate *translate_sse2_create( const struct translate_key *key )
793 {
794 return NULL;
795 }
796
797 #endif