ae4033b60861012f09cfe27881b84c87657bfc83
[mesa.git] / src / gallium / auxiliary / gallivm / lp_bld_swizzle.c
1 /**************************************************************************
2 *
3 * Copyright 2009 VMware, Inc.
4 * All Rights Reserved.
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a
7 * copy of this software and associated documentation files (the
8 * "Software"), to deal in the Software without restriction, including
9 * without limitation the rights to use, copy, modify, merge, publish,
10 * distribute, sub license, and/or sell copies of the Software, and to
11 * permit persons to whom the Software is furnished to do so, subject to
12 * the following conditions:
13 *
14 * The above copyright notice and this permission notice (including the
15 * next paragraph) shall be included in all copies or substantial portions
16 * of the Software.
17 *
18 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
19 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
20 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
21 * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR
22 * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
23 * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
24 * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
25 *
26 **************************************************************************/
27
28 /**
29 * @file
30 * Helper functions for swizzling/shuffling.
31 *
32 * @author Jose Fonseca <jfonseca@vmware.com>
33 */
34
35 #include <inttypes.h> /* for PRIx64 macro */
36 #include "util/u_debug.h"
37
38 #include "lp_bld_type.h"
39 #include "lp_bld_const.h"
40 #include "lp_bld_init.h"
41 #include "lp_bld_logic.h"
42 #include "lp_bld_swizzle.h"
43 #include "lp_bld_pack.h"
44
45
46 LLVMValueRef
47 lp_build_broadcast(struct gallivm_state *gallivm,
48 LLVMTypeRef vec_type,
49 LLVMValueRef scalar)
50 {
51 LLVMValueRef res;
52
53 if (LLVMGetTypeKind(vec_type) != LLVMVectorTypeKind) {
54 /* scalar */
55 assert(vec_type == LLVMTypeOf(scalar));
56 res = scalar;
57 } else {
58 LLVMBuilderRef builder = gallivm->builder;
59 const unsigned length = LLVMGetVectorSize(vec_type);
60 LLVMValueRef undef = LLVMGetUndef(vec_type);
61 LLVMTypeRef i32_type = LLVMInt32TypeInContext(gallivm->context);
62
63 assert(LLVMGetElementType(vec_type) == LLVMTypeOf(scalar));
64
65 if (HAVE_LLVM >= 0x207) {
66 /* The shuffle vector is always made of int32 elements */
67 LLVMTypeRef i32_vec_type = LLVMVectorType(i32_type, length);
68 res = LLVMBuildInsertElement(builder, undef, scalar, LLVMConstNull(i32_type), "");
69 res = LLVMBuildShuffleVector(builder, res, undef, LLVMConstNull(i32_vec_type), "");
70 } else {
71 /* XXX: The above path provokes a bug in LLVM 2.6 */
72 unsigned i;
73 res = undef;
74 for(i = 0; i < length; ++i) {
75 LLVMValueRef index = lp_build_const_int32(gallivm, i);
76 res = LLVMBuildInsertElement(builder, res, scalar, index, "");
77 }
78 }
79 }
80
81 return res;
82 }
83
84
85 /**
86 * Broadcast
87 */
88 LLVMValueRef
89 lp_build_broadcast_scalar(struct lp_build_context *bld,
90 LLVMValueRef scalar)
91 {
92 assert(lp_check_elem_type(bld->type, LLVMTypeOf(scalar)));
93
94 return lp_build_broadcast(bld->gallivm, bld->vec_type, scalar);
95 }
96
97
98 /**
99 * Combined extract and broadcast (mere shuffle in most cases)
100 */
101 LLVMValueRef
102 lp_build_extract_broadcast(struct gallivm_state *gallivm,
103 struct lp_type src_type,
104 struct lp_type dst_type,
105 LLVMValueRef vector,
106 LLVMValueRef index)
107 {
108 LLVMTypeRef i32t = LLVMInt32TypeInContext(gallivm->context);
109 LLVMValueRef res;
110
111 assert(src_type.floating == dst_type.floating);
112 assert(src_type.width == dst_type.width);
113
114 assert(lp_check_value(src_type, vector));
115 assert(LLVMTypeOf(index) == i32t);
116
117 if (src_type.length == 1) {
118 if (dst_type.length == 1) {
119 /*
120 * Trivial scalar -> scalar.
121 */
122
123 res = vector;
124 }
125 else {
126 /*
127 * Broadcast scalar -> vector.
128 */
129
130 res = lp_build_broadcast(gallivm,
131 lp_build_vec_type(gallivm, dst_type),
132 vector);
133 }
134 }
135 else {
136 if (dst_type.length > 1) {
137 /*
138 * shuffle - result can be of different length.
139 */
140
141 LLVMValueRef shuffle;
142 shuffle = lp_build_broadcast(gallivm,
143 LLVMVectorType(i32t, dst_type.length),
144 index);
145 res = LLVMBuildShuffleVector(gallivm->builder, vector,
146 LLVMGetUndef(lp_build_vec_type(gallivm, src_type)),
147 shuffle, "");
148 }
149 else {
150 /*
151 * Trivial extract scalar from vector.
152 */
153 res = LLVMBuildExtractElement(gallivm->builder, vector, index, "");
154 }
155 }
156
157 return res;
158 }
159
160
161 /**
162 * Swizzle one channel into other channels.
163 */
164 LLVMValueRef
165 lp_build_swizzle_scalar_aos(struct lp_build_context *bld,
166 LLVMValueRef a,
167 unsigned channel,
168 unsigned num_channels)
169 {
170 LLVMBuilderRef builder = bld->gallivm->builder;
171 const struct lp_type type = bld->type;
172 const unsigned n = type.length;
173 unsigned i, j;
174
175 if(a == bld->undef || a == bld->zero || a == bld->one || num_channels == 1)
176 return a;
177
178 assert(num_channels == 2 || num_channels == 4);
179
180 /* XXX: SSE3 has PSHUFB which should be better than bitmasks, but forcing
181 * using shuffles here actually causes worst results. More investigation is
182 * needed. */
183 if (type.width >= 16) {
184 /*
185 * Shuffle.
186 */
187 LLVMTypeRef elem_type = LLVMInt32TypeInContext(bld->gallivm->context);
188 LLVMValueRef shuffles[LP_MAX_VECTOR_LENGTH];
189
190 for(j = 0; j < n; j += num_channels)
191 for(i = 0; i < num_channels; ++i)
192 shuffles[j + i] = LLVMConstInt(elem_type, j + channel, 0);
193
194 return LLVMBuildShuffleVector(builder, a, bld->undef, LLVMConstVector(shuffles, n), "");
195 }
196 else if (num_channels == 2) {
197 /*
198 * Bit mask and shifts
199 *
200 * XY XY .... XY <= input
201 * 0Y 0Y .... 0Y
202 * YY YY .... YY
203 * YY YY .... YY <= output
204 */
205 struct lp_type type2;
206 LLVMValueRef tmp = NULL;
207 int shift;
208
209 a = LLVMBuildAnd(builder, a,
210 lp_build_const_mask_aos(bld->gallivm,
211 type, 1 << channel, num_channels), "");
212
213 type2 = type;
214 type2.floating = FALSE;
215 type2.width *= 2;
216 type2.length /= 2;
217
218 a = LLVMBuildBitCast(builder, a, lp_build_vec_type(bld->gallivm, type2), "");
219
220 #ifdef PIPE_ARCH_LITTLE_ENDIAN
221 shift = channel == 0 ? 1 : -1;
222 #else
223 shift = channel == 0 ? -1 : 1;
224 #endif
225
226 if (shift > 0) {
227 tmp = LLVMBuildShl(builder, a, lp_build_const_int_vec(bld->gallivm, type2, shift * type.width), "");
228 } else if (shift < 0) {
229 tmp = LLVMBuildLShr(builder, a, lp_build_const_int_vec(bld->gallivm, type2, -shift * type.width), "");
230 }
231
232 assert(tmp);
233 if (tmp) {
234 a = LLVMBuildOr(builder, a, tmp, "");
235 }
236
237 return LLVMBuildBitCast(builder, a, lp_build_vec_type(bld->gallivm, type), "");
238 }
239 else {
240 /*
241 * Bit mask and recursive shifts
242 *
243 * XYZW XYZW .... XYZW <= input
244 * 0Y00 0Y00 .... 0Y00
245 * YY00 YY00 .... YY00
246 * YYYY YYYY .... YYYY <= output
247 */
248 struct lp_type type4;
249 const int shifts[4][2] = {
250 { 1, 2},
251 {-1, 2},
252 { 1, -2},
253 {-1, -2}
254 };
255 unsigned i;
256
257 a = LLVMBuildAnd(builder, a,
258 lp_build_const_mask_aos(bld->gallivm,
259 type, 1 << channel, 4), "");
260
261 /*
262 * Build a type where each element is an integer that cover the four
263 * channels.
264 */
265
266 type4 = type;
267 type4.floating = FALSE;
268 type4.width *= 4;
269 type4.length /= 4;
270
271 a = LLVMBuildBitCast(builder, a, lp_build_vec_type(bld->gallivm, type4), "");
272
273 for(i = 0; i < 2; ++i) {
274 LLVMValueRef tmp = NULL;
275 int shift = shifts[channel][i];
276
277 #ifdef PIPE_ARCH_LITTLE_ENDIAN
278 shift = -shift;
279 #endif
280
281 if(shift > 0)
282 tmp = LLVMBuildLShr(builder, a, lp_build_const_int_vec(bld->gallivm, type4, shift*type.width), "");
283 if(shift < 0)
284 tmp = LLVMBuildShl(builder, a, lp_build_const_int_vec(bld->gallivm, type4, -shift*type.width), "");
285
286 assert(tmp);
287 if(tmp)
288 a = LLVMBuildOr(builder, a, tmp, "");
289 }
290
291 return LLVMBuildBitCast(builder, a, lp_build_vec_type(bld->gallivm, type), "");
292 }
293 }
294
295
296 /**
297 * Swizzle a vector consisting of an array of XYZW structs.
298 *
299 * This fills a vector of dst_len length with the swizzled channels from src.
300 *
301 * e.g. with swizzles = { 2, 1, 0 } and swizzle_count = 6 results in
302 * RGBA RGBA = BGR BGR BG
303 *
304 * @param swizzles the swizzle array
305 * @param num_swizzles the number of elements in swizzles
306 * @param dst_len the length of the result
307 */
308 LLVMValueRef
309 lp_build_swizzle_aos_n(struct gallivm_state* gallivm,
310 LLVMValueRef src,
311 const unsigned char* swizzles,
312 unsigned num_swizzles,
313 unsigned dst_len)
314 {
315 LLVMBuilderRef builder = gallivm->builder;
316 LLVMValueRef shuffles[LP_MAX_VECTOR_WIDTH];
317 unsigned i;
318
319 assert(dst_len < LP_MAX_VECTOR_WIDTH);
320
321 for (i = 0; i < dst_len; ++i) {
322 int swizzle = swizzles[i % num_swizzles];
323
324 if (swizzle == LP_BLD_SWIZZLE_DONTCARE) {
325 shuffles[i] = LLVMGetUndef(LLVMInt32TypeInContext(gallivm->context));
326 } else {
327 shuffles[i] = lp_build_const_int32(gallivm, swizzle);
328 }
329 }
330
331 return LLVMBuildShuffleVector(builder, src, LLVMGetUndef(LLVMTypeOf(src)), LLVMConstVector(shuffles, dst_len), "");
332 }
333
334
335 LLVMValueRef
336 lp_build_swizzle_aos(struct lp_build_context *bld,
337 LLVMValueRef a,
338 const unsigned char swizzles[4])
339 {
340 LLVMBuilderRef builder = bld->gallivm->builder;
341 const struct lp_type type = bld->type;
342 const unsigned n = type.length;
343 unsigned i, j;
344
345 if (swizzles[0] == PIPE_SWIZZLE_RED &&
346 swizzles[1] == PIPE_SWIZZLE_GREEN &&
347 swizzles[2] == PIPE_SWIZZLE_BLUE &&
348 swizzles[3] == PIPE_SWIZZLE_ALPHA) {
349 return a;
350 }
351
352 if (swizzles[0] == swizzles[1] &&
353 swizzles[1] == swizzles[2] &&
354 swizzles[2] == swizzles[3]) {
355 switch (swizzles[0]) {
356 case PIPE_SWIZZLE_RED:
357 case PIPE_SWIZZLE_GREEN:
358 case PIPE_SWIZZLE_BLUE:
359 case PIPE_SWIZZLE_ALPHA:
360 return lp_build_swizzle_scalar_aos(bld, a, swizzles[0], 4);
361 case PIPE_SWIZZLE_ZERO:
362 return bld->zero;
363 case PIPE_SWIZZLE_ONE:
364 return bld->one;
365 case LP_BLD_SWIZZLE_DONTCARE:
366 return bld->undef;
367 default:
368 assert(0);
369 return bld->undef;
370 }
371 }
372
373 if (type.width >= 16) {
374 /*
375 * Shuffle.
376 */
377 LLVMValueRef undef = LLVMGetUndef(lp_build_elem_type(bld->gallivm, type));
378 LLVMTypeRef i32t = LLVMInt32TypeInContext(bld->gallivm->context);
379 LLVMValueRef shuffles[LP_MAX_VECTOR_LENGTH];
380 LLVMValueRef aux[LP_MAX_VECTOR_LENGTH];
381
382 memset(aux, 0, sizeof aux);
383
384 for(j = 0; j < n; j += 4) {
385 for(i = 0; i < 4; ++i) {
386 unsigned shuffle;
387 switch (swizzles[i]) {
388 default:
389 assert(0);
390 /* fall through */
391 case PIPE_SWIZZLE_RED:
392 case PIPE_SWIZZLE_GREEN:
393 case PIPE_SWIZZLE_BLUE:
394 case PIPE_SWIZZLE_ALPHA:
395 shuffle = j + swizzles[i];
396 shuffles[j + i] = LLVMConstInt(i32t, shuffle, 0);
397 break;
398 case PIPE_SWIZZLE_ZERO:
399 shuffle = type.length + 0;
400 shuffles[j + i] = LLVMConstInt(i32t, shuffle, 0);
401 if (!aux[0]) {
402 aux[0] = lp_build_const_elem(bld->gallivm, type, 0.0);
403 }
404 break;
405 case PIPE_SWIZZLE_ONE:
406 shuffle = type.length + 1;
407 shuffles[j + i] = LLVMConstInt(i32t, shuffle, 0);
408 if (!aux[1]) {
409 aux[1] = lp_build_const_elem(bld->gallivm, type, 1.0);
410 }
411 break;
412 case LP_BLD_SWIZZLE_DONTCARE:
413 shuffles[j + i] = LLVMGetUndef(i32t);
414 break;
415 }
416 }
417 }
418
419 for (i = 0; i < n; ++i) {
420 if (!aux[i]) {
421 aux[i] = undef;
422 }
423 }
424
425 return LLVMBuildShuffleVector(builder, a,
426 LLVMConstVector(aux, n),
427 LLVMConstVector(shuffles, n), "");
428 } else {
429 /*
430 * Bit mask and shifts.
431 *
432 * For example, this will convert BGRA to RGBA by doing
433 *
434 * rgba = (bgra & 0x00ff0000) >> 16
435 * | (bgra & 0xff00ff00)
436 * | (bgra & 0x000000ff) << 16
437 *
438 * This is necessary not only for faster cause, but because X86 backend
439 * will refuse shuffles of <4 x i8> vectors
440 */
441 LLVMValueRef res;
442 struct lp_type type4;
443 unsigned cond = 0;
444 unsigned chan;
445 int shift;
446
447 /*
448 * Start with a mixture of 1 and 0.
449 */
450 for (chan = 0; chan < 4; ++chan) {
451 if (swizzles[chan] == PIPE_SWIZZLE_ONE) {
452 cond |= 1 << chan;
453 }
454 }
455 res = lp_build_select_aos(bld, cond, bld->one, bld->zero, 4);
456
457 /*
458 * Build a type where each element is an integer that cover the four
459 * channels.
460 */
461 type4 = type;
462 type4.floating = FALSE;
463 type4.width *= 4;
464 type4.length /= 4;
465
466 a = LLVMBuildBitCast(builder, a, lp_build_vec_type(bld->gallivm, type4), "");
467 res = LLVMBuildBitCast(builder, res, lp_build_vec_type(bld->gallivm, type4), "");
468
469 /*
470 * Mask and shift the channels, trying to group as many channels in the
471 * same shift as possible
472 */
473 for (shift = -3; shift <= 3; ++shift) {
474 uint64_t mask = 0;
475
476 assert(type4.width <= sizeof(mask)*8);
477
478 for (chan = 0; chan < 4; ++chan) {
479 /* FIXME: big endian */
480 if (swizzles[chan] < 4 &&
481 chan - swizzles[chan] == shift) {
482 mask |= ((1ULL << type.width) - 1) << (swizzles[chan] * type.width);
483 }
484 }
485
486 if (mask) {
487 LLVMValueRef masked;
488 LLVMValueRef shifted;
489 if (0)
490 debug_printf("shift = %i, mask = %" PRIx64 "\n", shift, mask);
491
492 masked = LLVMBuildAnd(builder, a,
493 lp_build_const_int_vec(bld->gallivm, type4, mask), "");
494 if (shift > 0) {
495 shifted = LLVMBuildShl(builder, masked,
496 lp_build_const_int_vec(bld->gallivm, type4, shift*type.width), "");
497 } else if (shift < 0) {
498 shifted = LLVMBuildLShr(builder, masked,
499 lp_build_const_int_vec(bld->gallivm, type4, -shift*type.width), "");
500 } else {
501 shifted = masked;
502 }
503
504 res = LLVMBuildOr(builder, res, shifted, "");
505 }
506 }
507
508 return LLVMBuildBitCast(builder, res,
509 lp_build_vec_type(bld->gallivm, type), "");
510 }
511 }
512
513
514 /**
515 * Extended swizzle of a single channel of a SoA vector.
516 *
517 * @param bld building context
518 * @param unswizzled array with the 4 unswizzled values
519 * @param swizzle one of the PIPE_SWIZZLE_*
520 *
521 * @return the swizzled value.
522 */
523 LLVMValueRef
524 lp_build_swizzle_soa_channel(struct lp_build_context *bld,
525 const LLVMValueRef *unswizzled,
526 unsigned swizzle)
527 {
528 switch (swizzle) {
529 case PIPE_SWIZZLE_RED:
530 case PIPE_SWIZZLE_GREEN:
531 case PIPE_SWIZZLE_BLUE:
532 case PIPE_SWIZZLE_ALPHA:
533 return unswizzled[swizzle];
534 case PIPE_SWIZZLE_ZERO:
535 return bld->zero;
536 case PIPE_SWIZZLE_ONE:
537 return bld->one;
538 default:
539 assert(0);
540 return bld->undef;
541 }
542 }
543
544
545 /**
546 * Extended swizzle of a SoA vector.
547 *
548 * @param bld building context
549 * @param unswizzled array with the 4 unswizzled values
550 * @param swizzles array of PIPE_SWIZZLE_*
551 * @param swizzled output swizzled values
552 */
553 void
554 lp_build_swizzle_soa(struct lp_build_context *bld,
555 const LLVMValueRef *unswizzled,
556 const unsigned char swizzles[4],
557 LLVMValueRef *swizzled)
558 {
559 unsigned chan;
560
561 for (chan = 0; chan < 4; ++chan) {
562 swizzled[chan] = lp_build_swizzle_soa_channel(bld, unswizzled,
563 swizzles[chan]);
564 }
565 }
566
567
568 /**
569 * Do an extended swizzle of a SoA vector inplace.
570 *
571 * @param bld building context
572 * @param values intput/output array with the 4 values
573 * @param swizzles array of PIPE_SWIZZLE_*
574 */
575 void
576 lp_build_swizzle_soa_inplace(struct lp_build_context *bld,
577 LLVMValueRef *values,
578 const unsigned char swizzles[4])
579 {
580 LLVMValueRef unswizzled[4];
581 unsigned chan;
582
583 for (chan = 0; chan < 4; ++chan) {
584 unswizzled[chan] = values[chan];
585 }
586
587 lp_build_swizzle_soa(bld, unswizzled, swizzles, values);
588 }
589
590
591 /**
592 * Transpose from AOS <-> SOA
593 *
594 * @param single_type_lp type of pixels
595 * @param src the 4 * n pixel input
596 * @param dst the 4 * n pixel output
597 */
598 void
599 lp_build_transpose_aos(struct gallivm_state *gallivm,
600 struct lp_type single_type_lp,
601 const LLVMValueRef src[4],
602 LLVMValueRef dst[4])
603 {
604 struct lp_type double_type_lp = single_type_lp;
605 LLVMTypeRef single_type;
606 LLVMTypeRef double_type;
607 LLVMValueRef t0, t1, t2, t3;
608
609 double_type_lp.length >>= 1;
610 double_type_lp.width <<= 1;
611
612 double_type = lp_build_vec_type(gallivm, double_type_lp);
613 single_type = lp_build_vec_type(gallivm, single_type_lp);
614
615 /* Interleave x, y, z, w -> xy and zw */
616 t0 = lp_build_interleave2_half(gallivm, single_type_lp, src[0], src[1], 0);
617 t1 = lp_build_interleave2_half(gallivm, single_type_lp, src[2], src[3], 0);
618 t2 = lp_build_interleave2_half(gallivm, single_type_lp, src[0], src[1], 1);
619 t3 = lp_build_interleave2_half(gallivm, single_type_lp, src[2], src[3], 1);
620
621 /* Cast to double width type for second interleave */
622 t0 = LLVMBuildBitCast(gallivm->builder, t0, double_type, "t0");
623 t1 = LLVMBuildBitCast(gallivm->builder, t1, double_type, "t1");
624 t2 = LLVMBuildBitCast(gallivm->builder, t2, double_type, "t2");
625 t3 = LLVMBuildBitCast(gallivm->builder, t3, double_type, "t3");
626
627 /* Interleave xy, zw -> xyzw */
628 dst[0] = lp_build_interleave2_half(gallivm, double_type_lp, t0, t1, 0);
629 dst[1] = lp_build_interleave2_half(gallivm, double_type_lp, t0, t1, 1);
630 dst[2] = lp_build_interleave2_half(gallivm, double_type_lp, t2, t3, 0);
631 dst[3] = lp_build_interleave2_half(gallivm, double_type_lp, t2, t3, 1);
632
633 /* Cast back to original single width type */
634 dst[0] = LLVMBuildBitCast(gallivm->builder, dst[0], single_type, "dst0");
635 dst[1] = LLVMBuildBitCast(gallivm->builder, dst[1], single_type, "dst1");
636 dst[2] = LLVMBuildBitCast(gallivm->builder, dst[2], single_type, "dst2");
637 dst[3] = LLVMBuildBitCast(gallivm->builder, dst[3], single_type, "dst3");
638 }
639
640
641 /**
642 * Transpose from AOS <-> SOA for num_srcs
643 */
644 void
645 lp_build_transpose_aos_n(struct gallivm_state *gallivm,
646 struct lp_type type,
647 const LLVMValueRef* src,
648 unsigned num_srcs,
649 LLVMValueRef* dst)
650 {
651 switch (num_srcs) {
652 case 1:
653 dst[0] = src[0];
654 break;
655
656 case 2:
657 {
658 /* Note: we must use a temporary incase src == dst */
659 LLVMValueRef lo, hi;
660
661 lo = lp_build_interleave2_half(gallivm, type, src[0], src[1], 0);
662 hi = lp_build_interleave2_half(gallivm, type, src[0], src[1], 1);
663
664 dst[0] = lo;
665 dst[1] = hi;
666 break;
667 }
668
669 case 4:
670 lp_build_transpose_aos(gallivm, type, src, dst);
671 break;
672
673 default:
674 assert(0);
675 };
676 }
677
678
679 /**
680 * Pack n-th element of aos values,
681 * pad out to destination size.
682 * i.e. x1 y1 _ _ x2 y2 _ _ will become x1 x2 _ _
683 */
684 LLVMValueRef
685 lp_build_pack_aos_scalars(struct gallivm_state *gallivm,
686 struct lp_type src_type,
687 struct lp_type dst_type,
688 const LLVMValueRef src,
689 unsigned channel)
690 {
691 LLVMTypeRef i32t = LLVMInt32TypeInContext(gallivm->context);
692 LLVMValueRef undef = LLVMGetUndef(i32t);
693 LLVMValueRef shuffles[LP_MAX_VECTOR_LENGTH];
694 unsigned num_src = src_type.length / 4;
695 unsigned num_dst = dst_type.length;
696 unsigned i;
697
698 assert(num_src <= num_dst);
699
700 for (i = 0; i < num_src; i++) {
701 shuffles[i] = LLVMConstInt(i32t, i * 4 + channel, 0);
702 }
703 for (i = num_src; i < num_dst; i++) {
704 shuffles[i] = undef;
705 }
706
707 if (num_dst == 1) {
708 return LLVMBuildExtractElement(gallivm->builder, src, shuffles[0], "");
709 }
710 else {
711 return LLVMBuildShuffleVector(gallivm->builder, src, src,
712 LLVMConstVector(shuffles, num_dst), "");
713 }
714 }
715
716
717 /**
718 * Unpack and broadcast packed aos values consisting of only the
719 * first value, i.e. x1 x2 _ _ will become x1 x1 x1 x1 x2 x2 x2 x2
720 */
721 LLVMValueRef
722 lp_build_unpack_broadcast_aos_scalars(struct gallivm_state *gallivm,
723 struct lp_type src_type,
724 struct lp_type dst_type,
725 const LLVMValueRef src)
726 {
727 LLVMTypeRef i32t = LLVMInt32TypeInContext(gallivm->context);
728 LLVMValueRef shuffles[LP_MAX_VECTOR_LENGTH];
729 unsigned num_dst = dst_type.length;
730 unsigned num_src = dst_type.length / 4;
731 unsigned i;
732
733 assert(num_dst / 4 <= src_type.length);
734
735 for (i = 0; i < num_src; i++) {
736 shuffles[i*4] = LLVMConstInt(i32t, i, 0);
737 shuffles[i*4+1] = LLVMConstInt(i32t, i, 0);
738 shuffles[i*4+2] = LLVMConstInt(i32t, i, 0);
739 shuffles[i*4+3] = LLVMConstInt(i32t, i, 0);
740 }
741
742 if (num_src == 1) {
743 return lp_build_extract_broadcast(gallivm, src_type, dst_type,
744 src, shuffles[0]);
745 }
746 else {
747 return LLVMBuildShuffleVector(gallivm->builder, src, src,
748 LLVMConstVector(shuffles, num_dst), "");
749 }
750 }
751