Merge branch 'gallium-nopointsizeminmax'
[mesa.git] / src / gallium / auxiliary / translate / translate_sse.c
1 /*
2 * Copyright 2003 Tungsten Graphics, inc.
3 * All Rights Reserved.
4 *
5 * Permission is hereby granted, free of charge, to any person obtaining a
6 * copy of this software and associated documentation files (the "Software"),
7 * to deal in the Software without restriction, including without limitation
8 * on the rights to use, copy, modify, merge, publish, distribute, sub
9 * license, and/or sell copies of the Software, and to permit persons to whom
10 * the Software is furnished to do so, subject to the following conditions:
11 *
12 * The above copyright notice and this permission notice (including the next
13 * paragraph) shall be included in all copies or substantial portions of the
14 * Software.
15 *
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
19 * TUNGSTEN GRAPHICS AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
20 * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
21 * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
22 * USE OR OTHER DEALINGS IN THE SOFTWARE.
23 *
24 * Authors:
25 * Keith Whitwell <keithw@tungstengraphics.com>
26 */
27
28
29 #include "pipe/p_config.h"
30 #include "pipe/p_compiler.h"
31 #include "util/u_memory.h"
32 #include "util/u_math.h"
33
34 #include "translate.h"
35
36
37 #if defined(PIPE_ARCH_X86)
38
39 #include "rtasm/rtasm_cpu.h"
40 #include "rtasm/rtasm_x86sse.h"
41
42
43 #define X 0
44 #define Y 1
45 #define Z 2
46 #define W 3
47
48
49 typedef void (PIPE_CDECL *run_func)( struct translate *translate,
50 unsigned start,
51 unsigned count,
52 unsigned instance_id,
53 void *output_buffer);
54
55 typedef void (PIPE_CDECL *run_elts_func)( struct translate *translate,
56 const unsigned *elts,
57 unsigned count,
58 unsigned instance_id,
59 void *output_buffer);
60
61 struct translate_buffer {
62 const void *base_ptr;
63 unsigned stride;
64 };
65
66 struct translate_buffer_varient {
67 unsigned buffer_index;
68 unsigned instance_divisor;
69 void *ptr; /* updated either per vertex or per instance */
70 };
71
72
73 #define ELEMENT_BUFFER_INSTANCE_ID 1001
74
75
76 struct translate_sse {
77 struct translate translate;
78
79 struct x86_function linear_func;
80 struct x86_function elt_func;
81 struct x86_function *func;
82
83 boolean loaded_identity;
84 boolean loaded_255;
85 boolean loaded_inv_255;
86
87 float identity[4];
88 float float_255[4];
89 float inv_255[4];
90
91 struct translate_buffer buffer[PIPE_MAX_ATTRIBS];
92 unsigned nr_buffers;
93
94 /* Multiple buffer varients can map to a single buffer. */
95 struct translate_buffer_varient buffer_varient[PIPE_MAX_ATTRIBS];
96 unsigned nr_buffer_varients;
97
98 /* Multiple elements can map to a single buffer varient. */
99 unsigned element_to_buffer_varient[PIPE_MAX_ATTRIBS];
100
101 boolean use_instancing;
102 unsigned instance_id;
103
104 run_func gen_run;
105 run_elts_func gen_run_elts;
106
107 /* these are actually known values, but putting them in a struct
108 * like this is helpful to keep them in sync across the file.
109 */
110 struct x86_reg tmp_EAX;
111 struct x86_reg idx_EBX; /* either start+i or &elt[i] */
112 struct x86_reg outbuf_ECX;
113 struct x86_reg machine_EDX;
114 struct x86_reg count_ESI; /* decrements to zero */
115 };
116
117 static int get_offset( const void *a, const void *b )
118 {
119 return (const char *)b - (const char *)a;
120 }
121
122
123
124 static struct x86_reg get_identity( struct translate_sse *p )
125 {
126 struct x86_reg reg = x86_make_reg(file_XMM, 6);
127
128 if (!p->loaded_identity) {
129 p->loaded_identity = TRUE;
130 p->identity[0] = 0;
131 p->identity[1] = 0;
132 p->identity[2] = 0;
133 p->identity[3] = 1;
134
135 sse_movups(p->func, reg,
136 x86_make_disp(p->machine_EDX,
137 get_offset(p, &p->identity[0])));
138 }
139
140 return reg;
141 }
142
143 static struct x86_reg get_255( struct translate_sse *p )
144 {
145 struct x86_reg reg = x86_make_reg(file_XMM, 7);
146
147 if (!p->loaded_255) {
148 p->loaded_255 = TRUE;
149 p->float_255[0] =
150 p->float_255[1] =
151 p->float_255[2] =
152 p->float_255[3] = 255.0f;
153
154 sse_movups(p->func, reg,
155 x86_make_disp(p->machine_EDX,
156 get_offset(p, &p->float_255[0])));
157 }
158
159 return reg;
160 }
161
162 static struct x86_reg get_inv_255( struct translate_sse *p )
163 {
164 struct x86_reg reg = x86_make_reg(file_XMM, 5);
165
166 if (!p->loaded_inv_255) {
167 p->loaded_inv_255 = TRUE;
168 p->inv_255[0] =
169 p->inv_255[1] =
170 p->inv_255[2] =
171 p->inv_255[3] = 1.0f / 255.0f;
172
173 sse_movups(p->func, reg,
174 x86_make_disp(p->machine_EDX,
175 get_offset(p, &p->inv_255[0])));
176 }
177
178 return reg;
179 }
180
181
182 static void emit_load_R32G32B32A32( struct translate_sse *p,
183 struct x86_reg data,
184 struct x86_reg arg0 )
185 {
186 sse_movups(p->func, data, arg0);
187 }
188
189 static void emit_load_R32G32B32( struct translate_sse *p,
190 struct x86_reg data,
191 struct x86_reg arg0 )
192 {
193 /* Have to jump through some hoops:
194 *
195 * c 0 0 0
196 * c 0 0 1
197 * 0 0 c 1
198 * a b c 1
199 */
200 sse_movss(p->func, data, x86_make_disp(arg0, 8));
201 sse_shufps(p->func, data, get_identity(p), SHUF(X,Y,Z,W) );
202 sse_shufps(p->func, data, data, SHUF(Y,Z,X,W) );
203 sse_movlps(p->func, data, arg0);
204 }
205
206 static void emit_load_R32G32( struct translate_sse *p,
207 struct x86_reg data,
208 struct x86_reg arg0 )
209 {
210 /* 0 0 0 1
211 * a b 0 1
212 */
213 sse_movups(p->func, data, get_identity(p) );
214 sse_movlps(p->func, data, arg0);
215 }
216
217
218 static void emit_load_R32( struct translate_sse *p,
219 struct x86_reg data,
220 struct x86_reg arg0 )
221 {
222 /* a 0 0 0
223 * a 0 0 1
224 */
225 sse_movss(p->func, data, arg0);
226 sse_orps(p->func, data, get_identity(p) );
227 }
228
229
230 static void emit_load_R8G8B8A8_UNORM( struct translate_sse *p,
231 struct x86_reg data,
232 struct x86_reg src )
233 {
234
235 /* Load and unpack twice:
236 */
237 sse_movss(p->func, data, src);
238 sse2_punpcklbw(p->func, data, get_identity(p));
239 sse2_punpcklbw(p->func, data, get_identity(p));
240
241 /* Convert to float:
242 */
243 sse2_cvtdq2ps(p->func, data, data);
244
245
246 /* Scale by 1/255.0
247 */
248 sse_mulps(p->func, data, get_inv_255(p));
249 }
250
251
252
253
254 static void emit_store_R32G32B32A32( struct translate_sse *p,
255 struct x86_reg dest,
256 struct x86_reg dataXMM )
257 {
258 sse_movups(p->func, dest, dataXMM);
259 }
260
261 static void emit_store_R32G32B32( struct translate_sse *p,
262 struct x86_reg dest,
263 struct x86_reg dataXMM )
264 {
265 /* Emit two, shuffle, emit one.
266 */
267 sse_movlps(p->func, dest, dataXMM);
268 sse_shufps(p->func, dataXMM, dataXMM, SHUF(Z,Z,Z,Z) ); /* NOTE! destructive */
269 sse_movss(p->func, x86_make_disp(dest,8), dataXMM);
270 }
271
272 static void emit_store_R32G32( struct translate_sse *p,
273 struct x86_reg dest,
274 struct x86_reg dataXMM )
275 {
276 sse_movlps(p->func, dest, dataXMM);
277 }
278
279 static void emit_store_R32( struct translate_sse *p,
280 struct x86_reg dest,
281 struct x86_reg dataXMM )
282 {
283 sse_movss(p->func, dest, dataXMM);
284 }
285
286
287
288 static void emit_store_R8G8B8A8_UNORM( struct translate_sse *p,
289 struct x86_reg dest,
290 struct x86_reg dataXMM )
291 {
292 /* Scale by 255.0
293 */
294 sse_mulps(p->func, dataXMM, get_255(p));
295
296 /* Pack and emit:
297 */
298 sse2_cvtps2dq(p->func, dataXMM, dataXMM);
299 sse2_packssdw(p->func, dataXMM, dataXMM);
300 sse2_packuswb(p->func, dataXMM, dataXMM);
301 sse_movss(p->func, dest, dataXMM);
302 }
303
304
305
306
307
308 /* Extended swizzles? Maybe later.
309 */
310 static void emit_swizzle( struct translate_sse *p,
311 struct x86_reg dest,
312 struct x86_reg src,
313 unsigned char shuffle )
314 {
315 sse_shufps(p->func, dest, src, shuffle);
316 }
317
318
319 static boolean translate_attr( struct translate_sse *p,
320 const struct translate_element *a,
321 struct x86_reg srcECX,
322 struct x86_reg dstEAX)
323 {
324 struct x86_reg dataXMM = x86_make_reg(file_XMM, 0);
325
326 switch (a->input_format) {
327 case PIPE_FORMAT_R32_FLOAT:
328 emit_load_R32(p, dataXMM, srcECX);
329 break;
330 case PIPE_FORMAT_R32G32_FLOAT:
331 emit_load_R32G32(p, dataXMM, srcECX);
332 break;
333 case PIPE_FORMAT_R32G32B32_FLOAT:
334 emit_load_R32G32B32(p, dataXMM, srcECX);
335 break;
336 case PIPE_FORMAT_R32G32B32A32_FLOAT:
337 emit_load_R32G32B32A32(p, dataXMM, srcECX);
338 break;
339 case PIPE_FORMAT_B8G8R8A8_UNORM:
340 emit_load_R8G8B8A8_UNORM(p, dataXMM, srcECX);
341 emit_swizzle(p, dataXMM, dataXMM, SHUF(Z,Y,X,W));
342 break;
343 case PIPE_FORMAT_R8G8B8A8_UNORM:
344 emit_load_R8G8B8A8_UNORM(p, dataXMM, srcECX);
345 break;
346 default:
347 return FALSE;
348 }
349
350 switch (a->output_format) {
351 case PIPE_FORMAT_R32_FLOAT:
352 emit_store_R32(p, dstEAX, dataXMM);
353 break;
354 case PIPE_FORMAT_R32G32_FLOAT:
355 emit_store_R32G32(p, dstEAX, dataXMM);
356 break;
357 case PIPE_FORMAT_R32G32B32_FLOAT:
358 emit_store_R32G32B32(p, dstEAX, dataXMM);
359 break;
360 case PIPE_FORMAT_R32G32B32A32_FLOAT:
361 emit_store_R32G32B32A32(p, dstEAX, dataXMM);
362 break;
363 case PIPE_FORMAT_B8G8R8A8_UNORM:
364 emit_swizzle(p, dataXMM, dataXMM, SHUF(Z,Y,X,W));
365 emit_store_R8G8B8A8_UNORM(p, dstEAX, dataXMM);
366 break;
367 case PIPE_FORMAT_R8G8B8A8_UNORM:
368 emit_store_R8G8B8A8_UNORM(p, dstEAX, dataXMM);
369 break;
370 default:
371 return FALSE;
372 }
373
374 return TRUE;
375 }
376
377
378 static boolean init_inputs( struct translate_sse *p,
379 boolean linear )
380 {
381 unsigned i;
382 struct x86_reg instance_id = x86_make_disp(p->machine_EDX,
383 get_offset(p, &p->instance_id));
384
385 for (i = 0; i < p->nr_buffer_varients; i++) {
386 struct translate_buffer_varient *varient = &p->buffer_varient[i];
387 struct translate_buffer *buffer = &p->buffer[varient->buffer_index];
388
389 if (linear || varient->instance_divisor) {
390 struct x86_reg buf_stride = x86_make_disp(p->machine_EDX,
391 get_offset(p, &buffer->stride));
392 struct x86_reg buf_ptr = x86_make_disp(p->machine_EDX,
393 get_offset(p, &varient->ptr));
394 struct x86_reg buf_base_ptr = x86_make_disp(p->machine_EDX,
395 get_offset(p, &buffer->base_ptr));
396 struct x86_reg elt = p->idx_EBX;
397 struct x86_reg tmp_EAX = p->tmp_EAX;
398
399 /* Calculate pointer to first attrib:
400 * base_ptr + stride * index, where index depends on instance divisor
401 */
402 if (varient->instance_divisor) {
403 /* Our index is instance ID divided by instance divisor.
404 */
405 x86_mov(p->func, tmp_EAX, instance_id);
406
407 if (varient->instance_divisor != 1) {
408 struct x86_reg tmp_EDX = p->machine_EDX;
409 struct x86_reg tmp_ECX = p->outbuf_ECX;
410
411 /* TODO: Add x86_shr() to rtasm and use it whenever
412 * instance divisor is power of two.
413 */
414
415 x86_push(p->func, tmp_EDX);
416 x86_push(p->func, tmp_ECX);
417 x86_xor(p->func, tmp_EDX, tmp_EDX);
418 x86_mov_reg_imm(p->func, tmp_ECX, varient->instance_divisor);
419 x86_div(p->func, tmp_ECX); /* EAX = EDX:EAX / ECX */
420 x86_pop(p->func, tmp_ECX);
421 x86_pop(p->func, tmp_EDX);
422 }
423 } else {
424 x86_mov(p->func, tmp_EAX, elt);
425 }
426 x86_imul(p->func, tmp_EAX, buf_stride);
427 x86_add(p->func, tmp_EAX, buf_base_ptr);
428
429
430 /* In the linear case, keep the buffer pointer instead of the
431 * index number.
432 */
433 if (linear && p->nr_buffer_varients == 1)
434 x86_mov(p->func, elt, tmp_EAX);
435 else
436 x86_mov(p->func, buf_ptr, tmp_EAX);
437 }
438 }
439
440 return TRUE;
441 }
442
443
444 static struct x86_reg get_buffer_ptr( struct translate_sse *p,
445 boolean linear,
446 unsigned var_idx,
447 struct x86_reg elt )
448 {
449 if (var_idx == ELEMENT_BUFFER_INSTANCE_ID) {
450 return x86_make_disp(p->machine_EDX,
451 get_offset(p, &p->instance_id));
452 }
453 if (linear && p->nr_buffer_varients == 1) {
454 return p->idx_EBX;
455 }
456 else if (linear || p->buffer_varient[var_idx].instance_divisor) {
457 struct x86_reg ptr = p->tmp_EAX;
458 struct x86_reg buf_ptr =
459 x86_make_disp(p->machine_EDX,
460 get_offset(p, &p->buffer_varient[var_idx].ptr));
461
462 x86_mov(p->func, ptr, buf_ptr);
463 return ptr;
464 }
465 else {
466 struct x86_reg ptr = p->tmp_EAX;
467 const struct translate_buffer_varient *varient = &p->buffer_varient[var_idx];
468
469 struct x86_reg buf_stride =
470 x86_make_disp(p->machine_EDX,
471 get_offset(p, &p->buffer[varient->buffer_index].stride));
472
473 struct x86_reg buf_base_ptr =
474 x86_make_disp(p->machine_EDX,
475 get_offset(p, &p->buffer[varient->buffer_index].base_ptr));
476
477
478
479 /* Calculate pointer to current attrib:
480 */
481 x86_mov(p->func, ptr, buf_stride);
482 x86_imul(p->func, ptr, elt);
483 x86_add(p->func, ptr, buf_base_ptr);
484 return ptr;
485 }
486 }
487
488
489
490 static boolean incr_inputs( struct translate_sse *p,
491 boolean linear )
492 {
493 if (linear && p->nr_buffer_varients == 1) {
494 struct x86_reg stride = x86_make_disp(p->machine_EDX,
495 get_offset(p, &p->buffer[0].stride));
496
497 if (p->buffer_varient[0].instance_divisor == 0) {
498 x86_add(p->func, p->idx_EBX, stride);
499 sse_prefetchnta(p->func, x86_make_disp(p->idx_EBX, 192));
500 }
501 }
502 else if (linear) {
503 unsigned i;
504
505 /* Is this worthwhile??
506 */
507 for (i = 0; i < p->nr_buffer_varients; i++) {
508 struct translate_buffer_varient *varient = &p->buffer_varient[i];
509 struct x86_reg buf_ptr = x86_make_disp(p->machine_EDX,
510 get_offset(p, &varient->ptr));
511 struct x86_reg buf_stride = x86_make_disp(p->machine_EDX,
512 get_offset(p, &p->buffer[varient->buffer_index].stride));
513
514 if (varient->instance_divisor == 0) {
515 x86_mov(p->func, p->tmp_EAX, buf_ptr);
516 x86_add(p->func, p->tmp_EAX, buf_stride);
517 if (i == 0) sse_prefetchnta(p->func, x86_make_disp(p->tmp_EAX, 192));
518 x86_mov(p->func, buf_ptr, p->tmp_EAX);
519 }
520 }
521 }
522 else {
523 x86_lea(p->func, p->idx_EBX, x86_make_disp(p->idx_EBX, 4));
524 }
525
526 return TRUE;
527 }
528
529
530 /* Build run( struct translate *machine,
531 * unsigned start,
532 * unsigned count,
533 * void *output_buffer )
534 * or
535 * run_elts( struct translate *machine,
536 * unsigned *elts,
537 * unsigned count,
538 * void *output_buffer )
539 *
540 * Lots of hardcoding
541 *
542 * EAX -- pointer to current output vertex
543 * ECX -- pointer to current attribute
544 *
545 */
546 static boolean build_vertex_emit( struct translate_sse *p,
547 struct x86_function *func,
548 boolean linear )
549 {
550 int fixup, label;
551 unsigned j;
552
553 p->tmp_EAX = x86_make_reg(file_REG32, reg_AX);
554 p->idx_EBX = x86_make_reg(file_REG32, reg_BX);
555 p->outbuf_ECX = x86_make_reg(file_REG32, reg_CX);
556 p->machine_EDX = x86_make_reg(file_REG32, reg_DX);
557 p->count_ESI = x86_make_reg(file_REG32, reg_SI);
558
559 p->func = func;
560 p->loaded_inv_255 = FALSE;
561 p->loaded_255 = FALSE;
562 p->loaded_identity = FALSE;
563
564 x86_init_func(p->func);
565
566 /* Push a few regs?
567 */
568 x86_push(p->func, p->idx_EBX);
569 x86_push(p->func, p->count_ESI);
570
571 /* Load arguments into regs:
572 */
573 x86_mov(p->func, p->machine_EDX, x86_fn_arg(p->func, 1));
574 x86_mov(p->func, p->idx_EBX, x86_fn_arg(p->func, 2));
575 x86_mov(p->func, p->count_ESI, x86_fn_arg(p->func, 3));
576 x86_mov(p->func, p->outbuf_ECX, x86_fn_arg(p->func, 5));
577
578 /* Load instance ID.
579 */
580 if (p->use_instancing) {
581 x86_mov(p->func,
582 p->tmp_EAX,
583 x86_fn_arg(p->func, 4));
584 x86_mov(p->func,
585 x86_make_disp(p->machine_EDX, get_offset(p, &p->instance_id)),
586 p->tmp_EAX);
587 }
588
589 /* Get vertex count, compare to zero
590 */
591 x86_xor(p->func, p->tmp_EAX, p->tmp_EAX);
592 x86_cmp(p->func, p->count_ESI, p->tmp_EAX);
593 fixup = x86_jcc_forward(p->func, cc_E);
594
595 /* always load, needed or not:
596 */
597 init_inputs(p, linear);
598
599 /* Note address for loop jump
600 */
601 label = x86_get_label(p->func);
602 {
603 struct x86_reg elt = linear ? p->idx_EBX : x86_deref(p->idx_EBX);
604 int last_varient = -1;
605 struct x86_reg vb;
606
607 for (j = 0; j < p->translate.key.nr_elements; j++) {
608 const struct translate_element *a = &p->translate.key.element[j];
609 unsigned varient = p->element_to_buffer_varient[j];
610
611 /* Figure out source pointer address:
612 */
613 if (varient != last_varient) {
614 last_varient = varient;
615 vb = get_buffer_ptr(p, linear, varient, elt);
616 }
617
618 if (!translate_attr( p, a,
619 x86_make_disp(vb, a->input_offset),
620 x86_make_disp(p->outbuf_ECX, a->output_offset)))
621 return FALSE;
622 }
623
624 /* Next output vertex:
625 */
626 x86_lea(p->func,
627 p->outbuf_ECX,
628 x86_make_disp(p->outbuf_ECX,
629 p->translate.key.output_stride));
630
631 /* Incr index
632 */
633 incr_inputs( p, linear );
634 }
635
636 /* decr count, loop if not zero
637 */
638 x86_dec(p->func, p->count_ESI);
639 x86_jcc(p->func, cc_NZ, label);
640
641 /* Exit mmx state?
642 */
643 if (p->func->need_emms)
644 mmx_emms(p->func);
645
646 /* Land forward jump here:
647 */
648 x86_fixup_fwd_jump(p->func, fixup);
649
650 /* Pop regs and return
651 */
652
653 x86_pop(p->func, p->count_ESI);
654 x86_pop(p->func, p->idx_EBX);
655 x86_ret(p->func);
656
657 return TRUE;
658 }
659
660
661
662
663
664
665
666 static void translate_sse_set_buffer( struct translate *translate,
667 unsigned buf,
668 const void *ptr,
669 unsigned stride )
670 {
671 struct translate_sse *p = (struct translate_sse *)translate;
672
673 if (buf < p->nr_buffers) {
674 p->buffer[buf].base_ptr = (char *)ptr;
675 p->buffer[buf].stride = stride;
676 }
677
678 if (0) debug_printf("%s %d/%d: %p %d\n",
679 __FUNCTION__, buf,
680 p->nr_buffers,
681 ptr, stride);
682 }
683
684
685 static void translate_sse_release( struct translate *translate )
686 {
687 struct translate_sse *p = (struct translate_sse *)translate;
688
689 x86_release_func( &p->linear_func );
690 x86_release_func( &p->elt_func );
691
692 FREE(p);
693 }
694
695 static void PIPE_CDECL translate_sse_run_elts( struct translate *translate,
696 const unsigned *elts,
697 unsigned count,
698 unsigned instance_id,
699 void *output_buffer )
700 {
701 struct translate_sse *p = (struct translate_sse *)translate;
702
703 p->gen_run_elts( translate,
704 elts,
705 count,
706 instance_id,
707 output_buffer);
708 }
709
710 static void PIPE_CDECL translate_sse_run( struct translate *translate,
711 unsigned start,
712 unsigned count,
713 unsigned instance_id,
714 void *output_buffer )
715 {
716 struct translate_sse *p = (struct translate_sse *)translate;
717
718 p->gen_run( translate,
719 start,
720 count,
721 instance_id,
722 output_buffer);
723 }
724
725
726 struct translate *translate_sse2_create( const struct translate_key *key )
727 {
728 struct translate_sse *p = NULL;
729 unsigned i;
730
731 if (!rtasm_cpu_has_sse() || !rtasm_cpu_has_sse2())
732 goto fail;
733
734 p = CALLOC_STRUCT( translate_sse );
735 if (p == NULL)
736 goto fail;
737
738 p->translate.key = *key;
739 p->translate.release = translate_sse_release;
740 p->translate.set_buffer = translate_sse_set_buffer;
741 p->translate.run_elts = translate_sse_run_elts;
742 p->translate.run = translate_sse_run;
743
744 for (i = 0; i < key->nr_elements; i++) {
745 if (key->element[i].type == TRANSLATE_ELEMENT_NORMAL) {
746 unsigned j;
747
748 p->nr_buffers = MAX2(p->nr_buffers, key->element[i].input_buffer + 1);
749
750 if (key->element[i].instance_divisor) {
751 p->use_instancing = TRUE;
752 }
753
754 /*
755 * Map vertex element to vertex buffer varient.
756 */
757 for (j = 0; j < p->nr_buffer_varients; j++) {
758 if (p->buffer_varient[j].buffer_index == key->element[i].input_buffer &&
759 p->buffer_varient[j].instance_divisor == key->element[i].instance_divisor) {
760 break;
761 }
762 }
763 if (j == p->nr_buffer_varients) {
764 p->buffer_varient[j].buffer_index = key->element[i].input_buffer;
765 p->buffer_varient[j].instance_divisor = key->element[i].instance_divisor;
766 p->nr_buffer_varients++;
767 }
768 p->element_to_buffer_varient[i] = j;
769 } else {
770 assert(key->element[i].type == TRANSLATE_ELEMENT_INSTANCE_ID);
771
772 p->element_to_buffer_varient[i] = ELEMENT_BUFFER_INSTANCE_ID;
773 }
774 }
775
776 if (0) debug_printf("nr_buffers: %d\n", p->nr_buffers);
777
778 if (!build_vertex_emit(p, &p->linear_func, TRUE))
779 goto fail;
780
781 if (!build_vertex_emit(p, &p->elt_func, FALSE))
782 goto fail;
783
784 p->gen_run = (run_func)x86_get_func(&p->linear_func);
785 if (p->gen_run == NULL)
786 goto fail;
787
788 p->gen_run_elts = (run_elts_func)x86_get_func(&p->elt_func);
789 if (p->gen_run_elts == NULL)
790 goto fail;
791
792 return &p->translate;
793
794 fail:
795 if (p)
796 translate_sse_release( &p->translate );
797
798 return NULL;
799 }
800
801
802
803 #else
804
805 struct translate *translate_sse2_create( const struct translate_key *key )
806 {
807 return NULL;
808 }
809
810 #endif