1 /**************************************************************************
3 * Copyright 2009 VMware, Inc.
6 * Permission is hereby granted, free of charge, to any person obtaining a
7 * copy of this software and associated documentation files (the
8 * "Software"), to deal in the Software without restriction, including
9 * without limitation the rights to use, copy, modify, merge, publish,
10 * distribute, sub license, and/or sell copies of the Software, and to
11 * permit persons to whom the Software is furnished to do so, subject to
12 * the following conditions:
14 * The above copyright notice and this permission notice (including the
15 * next paragraph) shall be included in all copies or substantial portions
18 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
19 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
20 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
21 * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR
22 * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
23 * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
24 * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
26 **************************************************************************/
30 * Helper functions for swizzling/shuffling.
32 * @author Jose Fonseca <jfonseca@vmware.com>
36 #include "util/u_debug.h"
38 #include "lp_bld_type.h"
39 #include "lp_bld_const.h"
40 #include "lp_bld_init.h"
41 #include "lp_bld_logic.h"
42 #include "lp_bld_swizzle.h"
43 #include "lp_bld_pack.h"
47 lp_build_broadcast(struct gallivm_state
*gallivm
,
53 if (LLVMGetTypeKind(vec_type
) != LLVMVectorTypeKind
) {
55 assert(vec_type
== LLVMTypeOf(scalar
));
58 LLVMBuilderRef builder
= gallivm
->builder
;
59 const unsigned length
= LLVMGetVectorSize(vec_type
);
60 LLVMValueRef undef
= LLVMGetUndef(vec_type
);
61 LLVMTypeRef i32_type
= LLVMInt32TypeInContext(gallivm
->context
);
63 assert(LLVMGetElementType(vec_type
) == LLVMTypeOf(scalar
));
65 if (HAVE_LLVM
>= 0x207) {
66 /* The shuffle vector is always made of int32 elements */
67 LLVMTypeRef i32_vec_type
= LLVMVectorType(i32_type
, length
);
68 res
= LLVMBuildInsertElement(builder
, undef
, scalar
, LLVMConstNull(i32_type
), "");
69 res
= LLVMBuildShuffleVector(builder
, res
, undef
, LLVMConstNull(i32_vec_type
), "");
71 /* XXX: The above path provokes a bug in LLVM 2.6 */
74 for(i
= 0; i
< length
; ++i
) {
75 LLVMValueRef index
= lp_build_const_int32(gallivm
, i
);
76 res
= LLVMBuildInsertElement(builder
, res
, scalar
, index
, "");
89 lp_build_broadcast_scalar(struct lp_build_context
*bld
,
92 assert(lp_check_elem_type(bld
->type
, LLVMTypeOf(scalar
)));
94 return lp_build_broadcast(bld
->gallivm
, bld
->vec_type
, scalar
);
99 * Combined extract and broadcast (mere shuffle in most cases)
102 lp_build_extract_broadcast(struct gallivm_state
*gallivm
,
103 struct lp_type src_type
,
104 struct lp_type dst_type
,
108 LLVMTypeRef i32t
= LLVMInt32TypeInContext(gallivm
->context
);
111 assert(src_type
.floating
== dst_type
.floating
);
112 assert(src_type
.width
== dst_type
.width
);
114 assert(lp_check_value(src_type
, vector
));
115 assert(LLVMTypeOf(index
) == i32t
);
117 if (src_type
.length
== 1) {
118 if (dst_type
.length
== 1) {
120 * Trivial scalar -> scalar.
127 * Broadcast scalar -> vector.
130 res
= lp_build_broadcast(gallivm
,
131 lp_build_vec_type(gallivm
, dst_type
),
136 if (dst_type
.length
> 1) {
138 * shuffle - result can be of different length.
141 LLVMValueRef shuffle
;
142 shuffle
= lp_build_broadcast(gallivm
,
143 LLVMVectorType(i32t
, dst_type
.length
),
145 res
= LLVMBuildShuffleVector(gallivm
->builder
, vector
,
146 LLVMGetUndef(lp_build_vec_type(gallivm
, src_type
)),
151 * Trivial extract scalar from vector.
153 res
= LLVMBuildExtractElement(gallivm
->builder
, vector
, index
, "");
162 * Swizzle one channel into all other three channels.
165 lp_build_swizzle_scalar_aos(struct lp_build_context
*bld
,
169 LLVMBuilderRef builder
= bld
->gallivm
->builder
;
170 const struct lp_type type
= bld
->type
;
171 const unsigned n
= type
.length
;
174 if(a
== bld
->undef
|| a
== bld
->zero
|| a
== bld
->one
)
177 /* XXX: SSE3 has PSHUFB which should be better than bitmasks, but forcing
178 * using shuffles here actually causes worst results. More investigation is
180 if (type
.width
>= 16) {
184 LLVMTypeRef elem_type
= LLVMInt32TypeInContext(bld
->gallivm
->context
);
185 LLVMValueRef shuffles
[LP_MAX_VECTOR_LENGTH
];
187 for(j
= 0; j
< n
; j
+= 4)
188 for(i
= 0; i
< 4; ++i
)
189 shuffles
[j
+ i
] = LLVMConstInt(elem_type
, j
+ channel
, 0);
191 return LLVMBuildShuffleVector(builder
, a
, bld
->undef
, LLVMConstVector(shuffles
, n
), "");
195 * Bit mask and recursive shifts
197 * XYZW XYZW .... XYZW <= input
198 * 0Y00 0Y00 .... 0Y00
199 * YY00 YY00 .... YY00
200 * YYYY YYYY .... YYYY <= output
202 struct lp_type type4
;
203 const char shifts
[4][2] = {
211 a
= LLVMBuildAnd(builder
, a
,
212 lp_build_const_mask_aos(bld
->gallivm
,
213 type
, 1 << channel
), "");
216 * Build a type where each element is an integer that cover the four
221 type4
.floating
= FALSE
;
225 a
= LLVMBuildBitCast(builder
, a
, lp_build_vec_type(bld
->gallivm
, type4
), "");
227 for(i
= 0; i
< 2; ++i
) {
228 LLVMValueRef tmp
= NULL
;
229 int shift
= shifts
[channel
][i
];
231 #ifdef PIPE_ARCH_LITTLE_ENDIAN
236 tmp
= LLVMBuildLShr(builder
, a
, lp_build_const_int_vec(bld
->gallivm
, type4
, shift
*type
.width
), "");
238 tmp
= LLVMBuildShl(builder
, a
, lp_build_const_int_vec(bld
->gallivm
, type4
, -shift
*type
.width
), "");
242 a
= LLVMBuildOr(builder
, a
, tmp
, "");
245 return LLVMBuildBitCast(builder
, a
, lp_build_vec_type(bld
->gallivm
, type
), "");
251 lp_build_swizzle_aos(struct lp_build_context
*bld
,
253 const unsigned char swizzles
[4])
255 LLVMBuilderRef builder
= bld
->gallivm
->builder
;
256 const struct lp_type type
= bld
->type
;
257 const unsigned n
= type
.length
;
260 if (swizzles
[0] == PIPE_SWIZZLE_RED
&&
261 swizzles
[1] == PIPE_SWIZZLE_GREEN
&&
262 swizzles
[2] == PIPE_SWIZZLE_BLUE
&&
263 swizzles
[3] == PIPE_SWIZZLE_ALPHA
) {
267 if (swizzles
[0] == swizzles
[1] &&
268 swizzles
[1] == swizzles
[2] &&
269 swizzles
[2] == swizzles
[3]) {
270 switch (swizzles
[0]) {
271 case PIPE_SWIZZLE_RED
:
272 case PIPE_SWIZZLE_GREEN
:
273 case PIPE_SWIZZLE_BLUE
:
274 case PIPE_SWIZZLE_ALPHA
:
275 return lp_build_swizzle_scalar_aos(bld
, a
, swizzles
[0]);
276 case PIPE_SWIZZLE_ZERO
:
278 case PIPE_SWIZZLE_ONE
:
280 case LP_BLD_SWIZZLE_DONTCARE
:
288 if (type
.width
>= 16) {
292 LLVMValueRef undef
= LLVMGetUndef(lp_build_elem_type(bld
->gallivm
, type
));
293 LLVMTypeRef i32t
= LLVMInt32TypeInContext(bld
->gallivm
->context
);
294 LLVMValueRef shuffles
[LP_MAX_VECTOR_LENGTH
];
295 LLVMValueRef aux
[LP_MAX_VECTOR_LENGTH
];
297 memset(aux
, 0, sizeof aux
);
299 for(j
= 0; j
< n
; j
+= 4) {
300 for(i
= 0; i
< 4; ++i
) {
302 switch (swizzles
[i
]) {
306 case PIPE_SWIZZLE_RED
:
307 case PIPE_SWIZZLE_GREEN
:
308 case PIPE_SWIZZLE_BLUE
:
309 case PIPE_SWIZZLE_ALPHA
:
310 shuffle
= j
+ swizzles
[i
];
311 shuffles
[j
+ i
] = LLVMConstInt(i32t
, shuffle
, 0);
313 case PIPE_SWIZZLE_ZERO
:
314 shuffle
= type
.length
+ 0;
315 shuffles
[j
+ i
] = LLVMConstInt(i32t
, shuffle
, 0);
317 aux
[0] = lp_build_const_elem(bld
->gallivm
, type
, 0.0);
320 case PIPE_SWIZZLE_ONE
:
321 shuffle
= type
.length
+ 1;
322 shuffles
[j
+ i
] = LLVMConstInt(i32t
, shuffle
, 0);
324 aux
[1] = lp_build_const_elem(bld
->gallivm
, type
, 1.0);
327 case LP_BLD_SWIZZLE_DONTCARE
:
328 shuffles
[j
+ i
] = LLVMGetUndef(i32t
);
334 for (i
= 0; i
< n
; ++i
) {
340 return LLVMBuildShuffleVector(builder
, a
,
341 LLVMConstVector(aux
, n
),
342 LLVMConstVector(shuffles
, n
), "");
345 * Bit mask and shifts.
347 * For example, this will convert BGRA to RGBA by doing
349 * rgba = (bgra & 0x00ff0000) >> 16
350 * | (bgra & 0xff00ff00)
351 * | (bgra & 0x000000ff) << 16
353 * This is necessary not only for faster cause, but because X86 backend
354 * will refuse shuffles of <4 x i8> vectors
357 struct lp_type type4
;
363 * Start with a mixture of 1 and 0.
365 for (chan
= 0; chan
< 4; ++chan
) {
366 if (swizzles
[chan
] == PIPE_SWIZZLE_ONE
) {
370 res
= lp_build_select_aos(bld
, cond
, bld
->one
, bld
->zero
);
373 * Build a type where each element is an integer that cover the four
377 type4
.floating
= FALSE
;
381 a
= LLVMBuildBitCast(builder
, a
, lp_build_vec_type(bld
->gallivm
, type4
), "");
382 res
= LLVMBuildBitCast(builder
, res
, lp_build_vec_type(bld
->gallivm
, type4
), "");
385 * Mask and shift the channels, trying to group as many channels in the
386 * same shift as possible
388 for (shift
= -3; shift
<= 3; ++shift
) {
389 unsigned long long mask
= 0;
391 assert(type4
.width
<= sizeof(mask
)*8);
393 for (chan
= 0; chan
< 4; ++chan
) {
394 /* FIXME: big endian */
395 if (swizzles
[chan
] < 4 &&
396 chan
- swizzles
[chan
] == shift
) {
397 mask
|= ((1ULL << type
.width
) - 1) << (swizzles
[chan
] * type
.width
);
403 LLVMValueRef shifted
;
406 debug_printf("shift = %i, mask = 0x%08llx\n", shift
, mask
);
408 masked
= LLVMBuildAnd(builder
, a
,
409 lp_build_const_int_vec(bld
->gallivm
, type4
, mask
), "");
411 shifted
= LLVMBuildShl(builder
, masked
,
412 lp_build_const_int_vec(bld
->gallivm
, type4
, shift
*type
.width
), "");
413 } else if (shift
< 0) {
414 shifted
= LLVMBuildLShr(builder
, masked
,
415 lp_build_const_int_vec(bld
->gallivm
, type4
, -shift
*type
.width
), "");
420 res
= LLVMBuildOr(builder
, res
, shifted
, "");
424 return LLVMBuildBitCast(builder
, res
,
425 lp_build_vec_type(bld
->gallivm
, type
), "");
431 * Extended swizzle of a single channel of a SoA vector.
433 * @param bld building context
434 * @param unswizzled array with the 4 unswizzled values
435 * @param swizzle one of the PIPE_SWIZZLE_*
437 * @return the swizzled value.
440 lp_build_swizzle_soa_channel(struct lp_build_context
*bld
,
441 const LLVMValueRef
*unswizzled
,
445 case PIPE_SWIZZLE_RED
:
446 case PIPE_SWIZZLE_GREEN
:
447 case PIPE_SWIZZLE_BLUE
:
448 case PIPE_SWIZZLE_ALPHA
:
449 return unswizzled
[swizzle
];
450 case PIPE_SWIZZLE_ZERO
:
452 case PIPE_SWIZZLE_ONE
:
462 * Extended swizzle of a SoA vector.
464 * @param bld building context
465 * @param unswizzled array with the 4 unswizzled values
466 * @param swizzles array of PIPE_SWIZZLE_*
467 * @param swizzled output swizzled values
470 lp_build_swizzle_soa(struct lp_build_context
*bld
,
471 const LLVMValueRef
*unswizzled
,
472 const unsigned char swizzles
[4],
473 LLVMValueRef
*swizzled
)
477 for (chan
= 0; chan
< 4; ++chan
) {
478 swizzled
[chan
] = lp_build_swizzle_soa_channel(bld
, unswizzled
,
485 * Do an extended swizzle of a SoA vector inplace.
487 * @param bld building context
488 * @param values intput/output array with the 4 values
489 * @param swizzles array of PIPE_SWIZZLE_*
492 lp_build_swizzle_soa_inplace(struct lp_build_context
*bld
,
493 LLVMValueRef
*values
,
494 const unsigned char swizzles
[4])
496 LLVMValueRef unswizzled
[4];
499 for (chan
= 0; chan
< 4; ++chan
) {
500 unswizzled
[chan
] = values
[chan
];
503 lp_build_swizzle_soa(bld
, unswizzled
, swizzles
, values
);
508 * Transpose from AOS <-> SOA
510 * @param single_type_lp type of pixels
511 * @param src the 4 * n pixel input
512 * @param dst the 4 * n pixel output
515 lp_build_transpose_aos(struct gallivm_state
*gallivm
,
516 struct lp_type single_type_lp
,
517 const LLVMValueRef src
[4],
520 struct lp_type double_type_lp
= single_type_lp
;
521 LLVMTypeRef single_type
;
522 LLVMTypeRef double_type
;
523 LLVMValueRef t0
, t1
, t2
, t3
;
525 double_type_lp
.length
>>= 1;
526 double_type_lp
.width
<<= 1;
528 double_type
= lp_build_vec_type(gallivm
, double_type_lp
);
529 single_type
= lp_build_vec_type(gallivm
, single_type_lp
);
531 /* Interleave x, y, z, w -> xy and zw */
532 t0
= lp_build_interleave2_half(gallivm
, single_type_lp
, src
[0], src
[1], 0);
533 t1
= lp_build_interleave2_half(gallivm
, single_type_lp
, src
[2], src
[3], 0);
534 t2
= lp_build_interleave2_half(gallivm
, single_type_lp
, src
[0], src
[1], 1);
535 t3
= lp_build_interleave2_half(gallivm
, single_type_lp
, src
[2], src
[3], 1);
537 /* Cast to double width type for second interleave */
538 t0
= LLVMBuildBitCast(gallivm
->builder
, t0
, double_type
, "t0");
539 t1
= LLVMBuildBitCast(gallivm
->builder
, t1
, double_type
, "t1");
540 t2
= LLVMBuildBitCast(gallivm
->builder
, t2
, double_type
, "t2");
541 t3
= LLVMBuildBitCast(gallivm
->builder
, t3
, double_type
, "t3");
543 /* Interleave xy, zw -> xyzw */
544 dst
[0] = lp_build_interleave2_half(gallivm
, double_type_lp
, t0
, t1
, 0);
545 dst
[1] = lp_build_interleave2_half(gallivm
, double_type_lp
, t0
, t1
, 1);
546 dst
[2] = lp_build_interleave2_half(gallivm
, double_type_lp
, t2
, t3
, 0);
547 dst
[3] = lp_build_interleave2_half(gallivm
, double_type_lp
, t2
, t3
, 1);
549 /* Cast back to original single width type */
550 dst
[0] = LLVMBuildBitCast(gallivm
->builder
, dst
[0], single_type
, "dst0");
551 dst
[1] = LLVMBuildBitCast(gallivm
->builder
, dst
[1], single_type
, "dst1");
552 dst
[2] = LLVMBuildBitCast(gallivm
->builder
, dst
[2], single_type
, "dst2");
553 dst
[3] = LLVMBuildBitCast(gallivm
->builder
, dst
[3], single_type
, "dst3");
558 * Pack first element of aos values,
559 * pad out to destination size.
560 * i.e. x1 _ _ _ x2 _ _ _ will become x1 x2 _ _
563 lp_build_pack_aos_scalars(struct gallivm_state
*gallivm
,
564 struct lp_type src_type
,
565 struct lp_type dst_type
,
566 const LLVMValueRef src
)
568 LLVMTypeRef i32t
= LLVMInt32TypeInContext(gallivm
->context
);
569 LLVMValueRef undef
= LLVMGetUndef(i32t
);
570 LLVMValueRef shuffles
[LP_MAX_VECTOR_LENGTH
];
571 unsigned num_src
= src_type
.length
/ 4;
572 unsigned num_dst
= dst_type
.length
;
575 assert(num_src
<= num_dst
);
577 for (i
= 0; i
< num_src
; i
++) {
578 shuffles
[i
] = LLVMConstInt(i32t
, i
* 4, 0);
580 for (i
= num_src
; i
< num_dst
; i
++) {
585 return LLVMBuildExtractElement(gallivm
->builder
, src
, shuffles
[0], "");
588 return LLVMBuildShuffleVector(gallivm
->builder
, src
, src
,
589 LLVMConstVector(shuffles
, num_dst
), "");
595 * Unpack and broadcast packed aos values consisting of only the
596 * first value, i.e. x1 x2 _ _ will become x1 x1 x1 x1 x2 x2 x2 x2
599 lp_build_unpack_broadcast_aos_scalars(struct gallivm_state
*gallivm
,
600 struct lp_type src_type
,
601 struct lp_type dst_type
,
602 const LLVMValueRef src
)
604 LLVMTypeRef i32t
= LLVMInt32TypeInContext(gallivm
->context
);
605 LLVMValueRef shuffles
[LP_MAX_VECTOR_LENGTH
];
606 unsigned num_dst
= dst_type
.length
;
607 unsigned num_src
= dst_type
.length
/ 4;
610 assert(num_dst
/ 4 <= src_type
.length
);
612 for (i
= 0; i
< num_src
; i
++) {
613 shuffles
[i
*4] = LLVMConstInt(i32t
, i
, 0);
614 shuffles
[i
*4+1] = LLVMConstInt(i32t
, i
, 0);
615 shuffles
[i
*4+2] = LLVMConstInt(i32t
, i
, 0);
616 shuffles
[i
*4+3] = LLVMConstInt(i32t
, i
, 0);
620 return lp_build_extract_broadcast(gallivm
, src_type
, dst_type
,
624 return LLVMBuildShuffleVector(gallivm
->builder
, src
, src
,
625 LLVMConstVector(shuffles
, num_dst
), "");