ae4033b60861012f09cfe27881b84c87657bfc83
1 /**************************************************************************
3 * Copyright 2009 VMware, Inc.
6 * Permission is hereby granted, free of charge, to any person obtaining a
7 * copy of this software and associated documentation files (the
8 * "Software"), to deal in the Software without restriction, including
9 * without limitation the rights to use, copy, modify, merge, publish,
10 * distribute, sub license, and/or sell copies of the Software, and to
11 * permit persons to whom the Software is furnished to do so, subject to
12 * the following conditions:
14 * The above copyright notice and this permission notice (including the
15 * next paragraph) shall be included in all copies or substantial portions
18 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
19 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
20 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
21 * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR
22 * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
23 * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
24 * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
26 **************************************************************************/
30 * Helper functions for swizzling/shuffling.
32 * @author Jose Fonseca <jfonseca@vmware.com>
35 #include <inttypes.h> /* for PRIx64 macro */
36 #include "util/u_debug.h"
38 #include "lp_bld_type.h"
39 #include "lp_bld_const.h"
40 #include "lp_bld_init.h"
41 #include "lp_bld_logic.h"
42 #include "lp_bld_swizzle.h"
43 #include "lp_bld_pack.h"
47 lp_build_broadcast(struct gallivm_state
*gallivm
,
53 if (LLVMGetTypeKind(vec_type
) != LLVMVectorTypeKind
) {
55 assert(vec_type
== LLVMTypeOf(scalar
));
58 LLVMBuilderRef builder
= gallivm
->builder
;
59 const unsigned length
= LLVMGetVectorSize(vec_type
);
60 LLVMValueRef undef
= LLVMGetUndef(vec_type
);
61 LLVMTypeRef i32_type
= LLVMInt32TypeInContext(gallivm
->context
);
63 assert(LLVMGetElementType(vec_type
) == LLVMTypeOf(scalar
));
65 if (HAVE_LLVM
>= 0x207) {
66 /* The shuffle vector is always made of int32 elements */
67 LLVMTypeRef i32_vec_type
= LLVMVectorType(i32_type
, length
);
68 res
= LLVMBuildInsertElement(builder
, undef
, scalar
, LLVMConstNull(i32_type
), "");
69 res
= LLVMBuildShuffleVector(builder
, res
, undef
, LLVMConstNull(i32_vec_type
), "");
71 /* XXX: The above path provokes a bug in LLVM 2.6 */
74 for(i
= 0; i
< length
; ++i
) {
75 LLVMValueRef index
= lp_build_const_int32(gallivm
, i
);
76 res
= LLVMBuildInsertElement(builder
, res
, scalar
, index
, "");
89 lp_build_broadcast_scalar(struct lp_build_context
*bld
,
92 assert(lp_check_elem_type(bld
->type
, LLVMTypeOf(scalar
)));
94 return lp_build_broadcast(bld
->gallivm
, bld
->vec_type
, scalar
);
99 * Combined extract and broadcast (mere shuffle in most cases)
102 lp_build_extract_broadcast(struct gallivm_state
*gallivm
,
103 struct lp_type src_type
,
104 struct lp_type dst_type
,
108 LLVMTypeRef i32t
= LLVMInt32TypeInContext(gallivm
->context
);
111 assert(src_type
.floating
== dst_type
.floating
);
112 assert(src_type
.width
== dst_type
.width
);
114 assert(lp_check_value(src_type
, vector
));
115 assert(LLVMTypeOf(index
) == i32t
);
117 if (src_type
.length
== 1) {
118 if (dst_type
.length
== 1) {
120 * Trivial scalar -> scalar.
127 * Broadcast scalar -> vector.
130 res
= lp_build_broadcast(gallivm
,
131 lp_build_vec_type(gallivm
, dst_type
),
136 if (dst_type
.length
> 1) {
138 * shuffle - result can be of different length.
141 LLVMValueRef shuffle
;
142 shuffle
= lp_build_broadcast(gallivm
,
143 LLVMVectorType(i32t
, dst_type
.length
),
145 res
= LLVMBuildShuffleVector(gallivm
->builder
, vector
,
146 LLVMGetUndef(lp_build_vec_type(gallivm
, src_type
)),
151 * Trivial extract scalar from vector.
153 res
= LLVMBuildExtractElement(gallivm
->builder
, vector
, index
, "");
162 * Swizzle one channel into other channels.
165 lp_build_swizzle_scalar_aos(struct lp_build_context
*bld
,
168 unsigned num_channels
)
170 LLVMBuilderRef builder
= bld
->gallivm
->builder
;
171 const struct lp_type type
= bld
->type
;
172 const unsigned n
= type
.length
;
175 if(a
== bld
->undef
|| a
== bld
->zero
|| a
== bld
->one
|| num_channels
== 1)
178 assert(num_channels
== 2 || num_channels
== 4);
180 /* XXX: SSE3 has PSHUFB which should be better than bitmasks, but forcing
181 * using shuffles here actually causes worst results. More investigation is
183 if (type
.width
>= 16) {
187 LLVMTypeRef elem_type
= LLVMInt32TypeInContext(bld
->gallivm
->context
);
188 LLVMValueRef shuffles
[LP_MAX_VECTOR_LENGTH
];
190 for(j
= 0; j
< n
; j
+= num_channels
)
191 for(i
= 0; i
< num_channels
; ++i
)
192 shuffles
[j
+ i
] = LLVMConstInt(elem_type
, j
+ channel
, 0);
194 return LLVMBuildShuffleVector(builder
, a
, bld
->undef
, LLVMConstVector(shuffles
, n
), "");
196 else if (num_channels
== 2) {
198 * Bit mask and shifts
200 * XY XY .... XY <= input
203 * YY YY .... YY <= output
205 struct lp_type type2
;
206 LLVMValueRef tmp
= NULL
;
209 a
= LLVMBuildAnd(builder
, a
,
210 lp_build_const_mask_aos(bld
->gallivm
,
211 type
, 1 << channel
, num_channels
), "");
214 type2
.floating
= FALSE
;
218 a
= LLVMBuildBitCast(builder
, a
, lp_build_vec_type(bld
->gallivm
, type2
), "");
220 #ifdef PIPE_ARCH_LITTLE_ENDIAN
221 shift
= channel
== 0 ? 1 : -1;
223 shift
= channel
== 0 ? -1 : 1;
227 tmp
= LLVMBuildShl(builder
, a
, lp_build_const_int_vec(bld
->gallivm
, type2
, shift
* type
.width
), "");
228 } else if (shift
< 0) {
229 tmp
= LLVMBuildLShr(builder
, a
, lp_build_const_int_vec(bld
->gallivm
, type2
, -shift
* type
.width
), "");
234 a
= LLVMBuildOr(builder
, a
, tmp
, "");
237 return LLVMBuildBitCast(builder
, a
, lp_build_vec_type(bld
->gallivm
, type
), "");
241 * Bit mask and recursive shifts
243 * XYZW XYZW .... XYZW <= input
244 * 0Y00 0Y00 .... 0Y00
245 * YY00 YY00 .... YY00
246 * YYYY YYYY .... YYYY <= output
248 struct lp_type type4
;
249 const int shifts
[4][2] = {
257 a
= LLVMBuildAnd(builder
, a
,
258 lp_build_const_mask_aos(bld
->gallivm
,
259 type
, 1 << channel
, 4), "");
262 * Build a type where each element is an integer that cover the four
267 type4
.floating
= FALSE
;
271 a
= LLVMBuildBitCast(builder
, a
, lp_build_vec_type(bld
->gallivm
, type4
), "");
273 for(i
= 0; i
< 2; ++i
) {
274 LLVMValueRef tmp
= NULL
;
275 int shift
= shifts
[channel
][i
];
277 #ifdef PIPE_ARCH_LITTLE_ENDIAN
282 tmp
= LLVMBuildLShr(builder
, a
, lp_build_const_int_vec(bld
->gallivm
, type4
, shift
*type
.width
), "");
284 tmp
= LLVMBuildShl(builder
, a
, lp_build_const_int_vec(bld
->gallivm
, type4
, -shift
*type
.width
), "");
288 a
= LLVMBuildOr(builder
, a
, tmp
, "");
291 return LLVMBuildBitCast(builder
, a
, lp_build_vec_type(bld
->gallivm
, type
), "");
297 * Swizzle a vector consisting of an array of XYZW structs.
299 * This fills a vector of dst_len length with the swizzled channels from src.
301 * e.g. with swizzles = { 2, 1, 0 } and swizzle_count = 6 results in
302 * RGBA RGBA = BGR BGR BG
304 * @param swizzles the swizzle array
305 * @param num_swizzles the number of elements in swizzles
306 * @param dst_len the length of the result
309 lp_build_swizzle_aos_n(struct gallivm_state
* gallivm
,
311 const unsigned char* swizzles
,
312 unsigned num_swizzles
,
315 LLVMBuilderRef builder
= gallivm
->builder
;
316 LLVMValueRef shuffles
[LP_MAX_VECTOR_WIDTH
];
319 assert(dst_len
< LP_MAX_VECTOR_WIDTH
);
321 for (i
= 0; i
< dst_len
; ++i
) {
322 int swizzle
= swizzles
[i
% num_swizzles
];
324 if (swizzle
== LP_BLD_SWIZZLE_DONTCARE
) {
325 shuffles
[i
] = LLVMGetUndef(LLVMInt32TypeInContext(gallivm
->context
));
327 shuffles
[i
] = lp_build_const_int32(gallivm
, swizzle
);
331 return LLVMBuildShuffleVector(builder
, src
, LLVMGetUndef(LLVMTypeOf(src
)), LLVMConstVector(shuffles
, dst_len
), "");
336 lp_build_swizzle_aos(struct lp_build_context
*bld
,
338 const unsigned char swizzles
[4])
340 LLVMBuilderRef builder
= bld
->gallivm
->builder
;
341 const struct lp_type type
= bld
->type
;
342 const unsigned n
= type
.length
;
345 if (swizzles
[0] == PIPE_SWIZZLE_RED
&&
346 swizzles
[1] == PIPE_SWIZZLE_GREEN
&&
347 swizzles
[2] == PIPE_SWIZZLE_BLUE
&&
348 swizzles
[3] == PIPE_SWIZZLE_ALPHA
) {
352 if (swizzles
[0] == swizzles
[1] &&
353 swizzles
[1] == swizzles
[2] &&
354 swizzles
[2] == swizzles
[3]) {
355 switch (swizzles
[0]) {
356 case PIPE_SWIZZLE_RED
:
357 case PIPE_SWIZZLE_GREEN
:
358 case PIPE_SWIZZLE_BLUE
:
359 case PIPE_SWIZZLE_ALPHA
:
360 return lp_build_swizzle_scalar_aos(bld
, a
, swizzles
[0], 4);
361 case PIPE_SWIZZLE_ZERO
:
363 case PIPE_SWIZZLE_ONE
:
365 case LP_BLD_SWIZZLE_DONTCARE
:
373 if (type
.width
>= 16) {
377 LLVMValueRef undef
= LLVMGetUndef(lp_build_elem_type(bld
->gallivm
, type
));
378 LLVMTypeRef i32t
= LLVMInt32TypeInContext(bld
->gallivm
->context
);
379 LLVMValueRef shuffles
[LP_MAX_VECTOR_LENGTH
];
380 LLVMValueRef aux
[LP_MAX_VECTOR_LENGTH
];
382 memset(aux
, 0, sizeof aux
);
384 for(j
= 0; j
< n
; j
+= 4) {
385 for(i
= 0; i
< 4; ++i
) {
387 switch (swizzles
[i
]) {
391 case PIPE_SWIZZLE_RED
:
392 case PIPE_SWIZZLE_GREEN
:
393 case PIPE_SWIZZLE_BLUE
:
394 case PIPE_SWIZZLE_ALPHA
:
395 shuffle
= j
+ swizzles
[i
];
396 shuffles
[j
+ i
] = LLVMConstInt(i32t
, shuffle
, 0);
398 case PIPE_SWIZZLE_ZERO
:
399 shuffle
= type
.length
+ 0;
400 shuffles
[j
+ i
] = LLVMConstInt(i32t
, shuffle
, 0);
402 aux
[0] = lp_build_const_elem(bld
->gallivm
, type
, 0.0);
405 case PIPE_SWIZZLE_ONE
:
406 shuffle
= type
.length
+ 1;
407 shuffles
[j
+ i
] = LLVMConstInt(i32t
, shuffle
, 0);
409 aux
[1] = lp_build_const_elem(bld
->gallivm
, type
, 1.0);
412 case LP_BLD_SWIZZLE_DONTCARE
:
413 shuffles
[j
+ i
] = LLVMGetUndef(i32t
);
419 for (i
= 0; i
< n
; ++i
) {
425 return LLVMBuildShuffleVector(builder
, a
,
426 LLVMConstVector(aux
, n
),
427 LLVMConstVector(shuffles
, n
), "");
430 * Bit mask and shifts.
432 * For example, this will convert BGRA to RGBA by doing
434 * rgba = (bgra & 0x00ff0000) >> 16
435 * | (bgra & 0xff00ff00)
436 * | (bgra & 0x000000ff) << 16
438 * This is necessary not only for faster cause, but because X86 backend
439 * will refuse shuffles of <4 x i8> vectors
442 struct lp_type type4
;
448 * Start with a mixture of 1 and 0.
450 for (chan
= 0; chan
< 4; ++chan
) {
451 if (swizzles
[chan
] == PIPE_SWIZZLE_ONE
) {
455 res
= lp_build_select_aos(bld
, cond
, bld
->one
, bld
->zero
, 4);
458 * Build a type where each element is an integer that cover the four
462 type4
.floating
= FALSE
;
466 a
= LLVMBuildBitCast(builder
, a
, lp_build_vec_type(bld
->gallivm
, type4
), "");
467 res
= LLVMBuildBitCast(builder
, res
, lp_build_vec_type(bld
->gallivm
, type4
), "");
470 * Mask and shift the channels, trying to group as many channels in the
471 * same shift as possible
473 for (shift
= -3; shift
<= 3; ++shift
) {
476 assert(type4
.width
<= sizeof(mask
)*8);
478 for (chan
= 0; chan
< 4; ++chan
) {
479 /* FIXME: big endian */
480 if (swizzles
[chan
] < 4 &&
481 chan
- swizzles
[chan
] == shift
) {
482 mask
|= ((1ULL << type
.width
) - 1) << (swizzles
[chan
] * type
.width
);
488 LLVMValueRef shifted
;
490 debug_printf("shift = %i, mask = %" PRIx64
"\n", shift
, mask
);
492 masked
= LLVMBuildAnd(builder
, a
,
493 lp_build_const_int_vec(bld
->gallivm
, type4
, mask
), "");
495 shifted
= LLVMBuildShl(builder
, masked
,
496 lp_build_const_int_vec(bld
->gallivm
, type4
, shift
*type
.width
), "");
497 } else if (shift
< 0) {
498 shifted
= LLVMBuildLShr(builder
, masked
,
499 lp_build_const_int_vec(bld
->gallivm
, type4
, -shift
*type
.width
), "");
504 res
= LLVMBuildOr(builder
, res
, shifted
, "");
508 return LLVMBuildBitCast(builder
, res
,
509 lp_build_vec_type(bld
->gallivm
, type
), "");
515 * Extended swizzle of a single channel of a SoA vector.
517 * @param bld building context
518 * @param unswizzled array with the 4 unswizzled values
519 * @param swizzle one of the PIPE_SWIZZLE_*
521 * @return the swizzled value.
524 lp_build_swizzle_soa_channel(struct lp_build_context
*bld
,
525 const LLVMValueRef
*unswizzled
,
529 case PIPE_SWIZZLE_RED
:
530 case PIPE_SWIZZLE_GREEN
:
531 case PIPE_SWIZZLE_BLUE
:
532 case PIPE_SWIZZLE_ALPHA
:
533 return unswizzled
[swizzle
];
534 case PIPE_SWIZZLE_ZERO
:
536 case PIPE_SWIZZLE_ONE
:
546 * Extended swizzle of a SoA vector.
548 * @param bld building context
549 * @param unswizzled array with the 4 unswizzled values
550 * @param swizzles array of PIPE_SWIZZLE_*
551 * @param swizzled output swizzled values
554 lp_build_swizzle_soa(struct lp_build_context
*bld
,
555 const LLVMValueRef
*unswizzled
,
556 const unsigned char swizzles
[4],
557 LLVMValueRef
*swizzled
)
561 for (chan
= 0; chan
< 4; ++chan
) {
562 swizzled
[chan
] = lp_build_swizzle_soa_channel(bld
, unswizzled
,
569 * Do an extended swizzle of a SoA vector inplace.
571 * @param bld building context
572 * @param values intput/output array with the 4 values
573 * @param swizzles array of PIPE_SWIZZLE_*
576 lp_build_swizzle_soa_inplace(struct lp_build_context
*bld
,
577 LLVMValueRef
*values
,
578 const unsigned char swizzles
[4])
580 LLVMValueRef unswizzled
[4];
583 for (chan
= 0; chan
< 4; ++chan
) {
584 unswizzled
[chan
] = values
[chan
];
587 lp_build_swizzle_soa(bld
, unswizzled
, swizzles
, values
);
592 * Transpose from AOS <-> SOA
594 * @param single_type_lp type of pixels
595 * @param src the 4 * n pixel input
596 * @param dst the 4 * n pixel output
599 lp_build_transpose_aos(struct gallivm_state
*gallivm
,
600 struct lp_type single_type_lp
,
601 const LLVMValueRef src
[4],
604 struct lp_type double_type_lp
= single_type_lp
;
605 LLVMTypeRef single_type
;
606 LLVMTypeRef double_type
;
607 LLVMValueRef t0
, t1
, t2
, t3
;
609 double_type_lp
.length
>>= 1;
610 double_type_lp
.width
<<= 1;
612 double_type
= lp_build_vec_type(gallivm
, double_type_lp
);
613 single_type
= lp_build_vec_type(gallivm
, single_type_lp
);
615 /* Interleave x, y, z, w -> xy and zw */
616 t0
= lp_build_interleave2_half(gallivm
, single_type_lp
, src
[0], src
[1], 0);
617 t1
= lp_build_interleave2_half(gallivm
, single_type_lp
, src
[2], src
[3], 0);
618 t2
= lp_build_interleave2_half(gallivm
, single_type_lp
, src
[0], src
[1], 1);
619 t3
= lp_build_interleave2_half(gallivm
, single_type_lp
, src
[2], src
[3], 1);
621 /* Cast to double width type for second interleave */
622 t0
= LLVMBuildBitCast(gallivm
->builder
, t0
, double_type
, "t0");
623 t1
= LLVMBuildBitCast(gallivm
->builder
, t1
, double_type
, "t1");
624 t2
= LLVMBuildBitCast(gallivm
->builder
, t2
, double_type
, "t2");
625 t3
= LLVMBuildBitCast(gallivm
->builder
, t3
, double_type
, "t3");
627 /* Interleave xy, zw -> xyzw */
628 dst
[0] = lp_build_interleave2_half(gallivm
, double_type_lp
, t0
, t1
, 0);
629 dst
[1] = lp_build_interleave2_half(gallivm
, double_type_lp
, t0
, t1
, 1);
630 dst
[2] = lp_build_interleave2_half(gallivm
, double_type_lp
, t2
, t3
, 0);
631 dst
[3] = lp_build_interleave2_half(gallivm
, double_type_lp
, t2
, t3
, 1);
633 /* Cast back to original single width type */
634 dst
[0] = LLVMBuildBitCast(gallivm
->builder
, dst
[0], single_type
, "dst0");
635 dst
[1] = LLVMBuildBitCast(gallivm
->builder
, dst
[1], single_type
, "dst1");
636 dst
[2] = LLVMBuildBitCast(gallivm
->builder
, dst
[2], single_type
, "dst2");
637 dst
[3] = LLVMBuildBitCast(gallivm
->builder
, dst
[3], single_type
, "dst3");
642 * Transpose from AOS <-> SOA for num_srcs
645 lp_build_transpose_aos_n(struct gallivm_state
*gallivm
,
647 const LLVMValueRef
* src
,
658 /* Note: we must use a temporary incase src == dst */
661 lo
= lp_build_interleave2_half(gallivm
, type
, src
[0], src
[1], 0);
662 hi
= lp_build_interleave2_half(gallivm
, type
, src
[0], src
[1], 1);
670 lp_build_transpose_aos(gallivm
, type
, src
, dst
);
680 * Pack n-th element of aos values,
681 * pad out to destination size.
682 * i.e. x1 y1 _ _ x2 y2 _ _ will become x1 x2 _ _
685 lp_build_pack_aos_scalars(struct gallivm_state
*gallivm
,
686 struct lp_type src_type
,
687 struct lp_type dst_type
,
688 const LLVMValueRef src
,
691 LLVMTypeRef i32t
= LLVMInt32TypeInContext(gallivm
->context
);
692 LLVMValueRef undef
= LLVMGetUndef(i32t
);
693 LLVMValueRef shuffles
[LP_MAX_VECTOR_LENGTH
];
694 unsigned num_src
= src_type
.length
/ 4;
695 unsigned num_dst
= dst_type
.length
;
698 assert(num_src
<= num_dst
);
700 for (i
= 0; i
< num_src
; i
++) {
701 shuffles
[i
] = LLVMConstInt(i32t
, i
* 4 + channel
, 0);
703 for (i
= num_src
; i
< num_dst
; i
++) {
708 return LLVMBuildExtractElement(gallivm
->builder
, src
, shuffles
[0], "");
711 return LLVMBuildShuffleVector(gallivm
->builder
, src
, src
,
712 LLVMConstVector(shuffles
, num_dst
), "");
718 * Unpack and broadcast packed aos values consisting of only the
719 * first value, i.e. x1 x2 _ _ will become x1 x1 x1 x1 x2 x2 x2 x2
722 lp_build_unpack_broadcast_aos_scalars(struct gallivm_state
*gallivm
,
723 struct lp_type src_type
,
724 struct lp_type dst_type
,
725 const LLVMValueRef src
)
727 LLVMTypeRef i32t
= LLVMInt32TypeInContext(gallivm
->context
);
728 LLVMValueRef shuffles
[LP_MAX_VECTOR_LENGTH
];
729 unsigned num_dst
= dst_type
.length
;
730 unsigned num_src
= dst_type
.length
/ 4;
733 assert(num_dst
/ 4 <= src_type
.length
);
735 for (i
= 0; i
< num_src
; i
++) {
736 shuffles
[i
*4] = LLVMConstInt(i32t
, i
, 0);
737 shuffles
[i
*4+1] = LLVMConstInt(i32t
, i
, 0);
738 shuffles
[i
*4+2] = LLVMConstInt(i32t
, i
, 0);
739 shuffles
[i
*4+3] = LLVMConstInt(i32t
, i
, 0);
743 return lp_build_extract_broadcast(gallivm
, src_type
, dst_type
,
747 return LLVMBuildShuffleVector(gallivm
->builder
, src
, src
,
748 LLVMConstVector(shuffles
, num_dst
), "");