20cf96ca66991ed28303aecf1fa47cb1dcd4da65
1 /**************************************************************************
3 * Copyright 2009 VMware, Inc.
6 * Permission is hereby granted, free of charge, to any person obtaining a
7 * copy of this software and associated documentation files (the
8 * "Software"), to deal in the Software without restriction, including
9 * without limitation the rights to use, copy, modify, merge, publish,
10 * distribute, sub license, and/or sell copies of the Software, and to
11 * permit persons to whom the Software is furnished to do so, subject to
12 * the following conditions:
14 * The above copyright notice and this permission notice (including the
15 * next paragraph) shall be included in all copies or substantial portions
18 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
19 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
20 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
21 * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR
22 * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
23 * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
24 * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
26 **************************************************************************/
30 * Helper functions for swizzling/shuffling.
32 * @author Jose Fonseca <jfonseca@vmware.com>
36 #include "util/u_debug.h"
38 #include "lp_bld_type.h"
39 #include "lp_bld_const.h"
40 #include "lp_bld_logic.h"
41 #include "lp_bld_swizzle.h"
45 lp_build_broadcast(LLVMBuilderRef builder
,
49 const unsigned n
= LLVMGetVectorSize(vec_type
);
53 res
= LLVMGetUndef(vec_type
);
54 for(i
= 0; i
< n
; ++i
) {
55 LLVMValueRef index
= LLVMConstInt(LLVMInt32Type(), i
, 0);
56 res
= LLVMBuildInsertElement(builder
, res
, scalar
, index
, "");
67 lp_build_broadcast_scalar(struct lp_build_context
*bld
,
70 const struct lp_type type
= bld
->type
;
72 assert(lp_check_elem_type(type
, LLVMTypeOf(scalar
)));
74 if (type
.length
== 1) {
79 #if HAVE_LLVM >= 0x207
80 res
= LLVMBuildInsertElement(bld
->builder
, bld
->undef
, scalar
,
81 LLVMConstInt(LLVMInt32Type(), 0, 0), "");
82 res
= LLVMBuildShuffleVector(bld
->builder
, res
, bld
->undef
,
83 lp_build_const_int_vec(type
, 0), "");
85 /* XXX: The above path provokes a bug in LLVM 2.6 */
88 for(i
= 0; i
< type
.length
; ++i
) {
89 LLVMValueRef index
= LLVMConstInt(LLVMInt32Type(), i
, 0);
90 res
= LLVMBuildInsertElement(bld
->builder
, res
, scalar
, index
, "");
99 lp_build_broadcast_aos(struct lp_build_context
*bld
,
103 const struct lp_type type
= bld
->type
;
104 const unsigned n
= type
.length
;
107 if(a
== bld
->undef
|| a
== bld
->zero
|| a
== bld
->one
)
110 /* XXX: SSE3 has PSHUFB which should be better than bitmasks, but forcing
111 * using shuffles here actually causes worst results. More investigation is
113 if (type
.width
>= 16) {
117 LLVMTypeRef elem_type
= LLVMInt32Type();
118 LLVMValueRef shuffles
[LP_MAX_VECTOR_LENGTH
];
120 for(j
= 0; j
< n
; j
+= 4)
121 for(i
= 0; i
< 4; ++i
)
122 shuffles
[j
+ i
] = LLVMConstInt(elem_type
, j
+ channel
, 0);
124 return LLVMBuildShuffleVector(bld
->builder
, a
, bld
->undef
, LLVMConstVector(shuffles
, n
), "");
128 * Bit mask and recursive shifts
130 * XYZW XYZW .... XYZW <= input
131 * 0Y00 0Y00 .... 0Y00
132 * YY00 YY00 .... YY00
133 * YYYY YYYY .... YYYY <= output
135 struct lp_type type4
;
136 const char shifts
[4][2] = {
145 memset(cond
, 0, sizeof cond
);
148 a
= LLVMBuildAnd(bld
->builder
, a
, lp_build_const_mask_aos(type
, cond
), "");
151 * Build a type where each element is an integer that cover the four
156 type4
.floating
= FALSE
;
160 a
= LLVMBuildBitCast(bld
->builder
, a
, lp_build_vec_type(type4
), "");
162 for(i
= 0; i
< 2; ++i
) {
163 LLVMValueRef tmp
= NULL
;
164 int shift
= shifts
[channel
][i
];
166 #ifdef PIPE_ARCH_LITTLE_ENDIAN
171 tmp
= LLVMBuildLShr(bld
->builder
, a
, lp_build_const_int_vec(type4
, shift
*type
.width
), "");
173 tmp
= LLVMBuildShl(bld
->builder
, a
, lp_build_const_int_vec(type4
, -shift
*type
.width
), "");
177 a
= LLVMBuildOr(bld
->builder
, a
, tmp
, "");
180 return LLVMBuildBitCast(bld
->builder
, a
, lp_build_vec_type(type
), "");
186 lp_build_swizzle_aos(struct lp_build_context
*bld
,
188 const unsigned char swizzles
[4])
190 const struct lp_type type
= bld
->type
;
191 const unsigned n
= type
.length
;
194 if (swizzles
[0] == PIPE_SWIZZLE_RED
&&
195 swizzles
[1] == PIPE_SWIZZLE_GREEN
&&
196 swizzles
[2] == PIPE_SWIZZLE_BLUE
&&
197 swizzles
[3] == PIPE_SWIZZLE_ALPHA
) {
201 if (swizzles
[0] == swizzles
[1] &&
202 swizzles
[1] == swizzles
[2] &&
203 swizzles
[2] == swizzles
[3]) {
204 switch (swizzles
[0]) {
205 case PIPE_SWIZZLE_RED
:
206 case PIPE_SWIZZLE_GREEN
:
207 case PIPE_SWIZZLE_BLUE
:
208 case PIPE_SWIZZLE_ALPHA
:
209 return lp_build_broadcast_aos(bld
, a
, swizzles
[0]);
210 case PIPE_SWIZZLE_ZERO
:
212 case PIPE_SWIZZLE_ONE
:
220 if (type
.width
>= 16) {
224 LLVMValueRef undef
= LLVMGetUndef(lp_build_elem_type(type
));
225 LLVMTypeRef i32t
= LLVMInt32Type();
226 LLVMValueRef shuffles
[LP_MAX_VECTOR_LENGTH
];
227 LLVMValueRef aux
[LP_MAX_VECTOR_LENGTH
];
229 memset(aux
, 0, sizeof aux
);
231 for(j
= 0; j
< n
; j
+= 4) {
232 for(i
= 0; i
< 4; ++i
) {
234 switch (swizzles
[i
]) {
238 case PIPE_SWIZZLE_RED
:
239 case PIPE_SWIZZLE_GREEN
:
240 case PIPE_SWIZZLE_BLUE
:
241 case PIPE_SWIZZLE_ALPHA
:
242 shuffle
= j
+ swizzles
[i
];
244 case PIPE_SWIZZLE_ZERO
:
245 shuffle
= type
.length
+ 0;
247 aux
[0] = lp_build_const_elem(type
, 0.0);
250 case PIPE_SWIZZLE_ONE
:
251 shuffle
= type
.length
+ 1;
253 aux
[1] = lp_build_const_elem(type
, 1.0);
257 shuffles
[j
+ i
] = LLVMConstInt(i32t
, shuffle
, 0);
261 for (i
= 0; i
< n
; ++i
) {
267 return LLVMBuildShuffleVector(bld
->builder
, a
,
268 LLVMConstVector(aux
, n
),
269 LLVMConstVector(shuffles
, n
), "");
272 * Bit mask and shifts.
274 * For example, this will convert BGRA to RGBA by doing
276 * rgba = (bgra & 0x00ff0000) >> 16
277 * | (bgra & 0xff00ff00)
278 * | (bgra & 0x000000ff) << 16
280 * This is necessary not only for faster cause, but because X86 backend
281 * will refuse shuffles of <4 x i8> vectors
284 struct lp_type type4
;
290 * Start with a mixture of 1 and 0.
292 for (chan
= 0; chan
< 4; ++chan
) {
293 cond
[chan
] = swizzles
[chan
] == PIPE_SWIZZLE_ONE
? TRUE
: FALSE
;
295 res
= lp_build_select_aos(bld
, bld
->one
, bld
->zero
, cond
);
298 * Build a type where each element is an integer that cover the four
302 type4
.floating
= FALSE
;
306 a
= LLVMBuildBitCast(bld
->builder
, a
, lp_build_vec_type(type4
), "");
307 res
= LLVMBuildBitCast(bld
->builder
, res
, lp_build_vec_type(type4
), "");
310 * Mask and shift the channels, trying to group as many channels in the
311 * same shift as possible
313 for (shift
= -3; shift
<= 3; ++shift
) {
314 unsigned long long mask
= 0;
316 assert(type4
.width
<= sizeof(mask
)*8);
318 for (chan
= 0; chan
< 4; ++chan
) {
319 /* FIXME: big endian */
320 if (swizzles
[chan
] < 4 &&
321 chan
- swizzles
[chan
] == shift
) {
322 mask
|= ((1ULL << type
.width
) - 1) << (swizzles
[chan
] * type
.width
);
328 LLVMValueRef shifted
;
331 debug_printf("shift = %i, mask = 0x%08llx\n", shift
, mask
);
333 masked
= LLVMBuildAnd(bld
->builder
, a
,
334 lp_build_const_int_vec(type4
, mask
), "");
336 shifted
= LLVMBuildShl(bld
->builder
, masked
,
337 lp_build_const_int_vec(type4
, shift
*type
.width
), "");
338 } else if (shift
< 0) {
339 shifted
= LLVMBuildLShr(bld
->builder
, masked
,
340 lp_build_const_int_vec(type4
, -shift
*type
.width
), "");
345 res
= LLVMBuildOr(bld
->builder
, res
, shifted
, "");
349 return LLVMBuildBitCast(bld
->builder
, res
, lp_build_vec_type(type
), "");
355 * Extended swizzle of a single channel of a SoA vector.
357 * @param bld building context
358 * @param unswizzled array with the 4 unswizzled values
359 * @param swizzle one of the PIPE_SWIZZLE_*
361 * @return the swizzled value.
364 lp_build_swizzle_soa_channel(struct lp_build_context
*bld
,
365 const LLVMValueRef
*unswizzled
,
369 case PIPE_SWIZZLE_RED
:
370 case PIPE_SWIZZLE_GREEN
:
371 case PIPE_SWIZZLE_BLUE
:
372 case PIPE_SWIZZLE_ALPHA
:
373 return unswizzled
[swizzle
];
374 case PIPE_SWIZZLE_ZERO
:
376 case PIPE_SWIZZLE_ONE
:
386 * Extended swizzle of a SoA vector.
388 * @param bld building context
389 * @param unswizzled array with the 4 unswizzled values
390 * @param swizzles array of PIPE_SWIZZLE_*
391 * @param swizzled output swizzled values
394 lp_build_swizzle_soa(struct lp_build_context
*bld
,
395 const LLVMValueRef
*unswizzled
,
396 const unsigned char swizzles
[4],
397 LLVMValueRef
*swizzled
)
401 for (chan
= 0; chan
< 4; ++chan
) {
402 swizzled
[chan
] = lp_build_swizzle_soa_channel(bld
, unswizzled
,
409 * Do an extended swizzle of a SoA vector inplace.
411 * @param bld building context
412 * @param values intput/output array with the 4 values
413 * @param swizzles array of PIPE_SWIZZLE_*
416 lp_build_swizzle_soa_inplace(struct lp_build_context
*bld
,
417 LLVMValueRef
*values
,
418 const unsigned char swizzles
[4])
420 LLVMValueRef unswizzled
[4];
423 for (chan
= 0; chan
< 4; ++chan
) {
424 unswizzled
[chan
] = values
[chan
];
427 lp_build_swizzle_soa(bld
, unswizzled
, swizzles
, values
);