1 /**************************************************************************
3 * Copyright 2009 VMware, Inc.
6 * Permission is hereby granted, free of charge, to any person obtaining a
7 * copy of this software and associated documentation files (the
8 * "Software"), to deal in the Software without restriction, including
9 * without limitation the rights to use, copy, modify, merge, publish,
10 * distribute, sub license, and/or sell copies of the Software, and to
11 * permit persons to whom the Software is furnished to do so, subject to
12 * the following conditions:
14 * The above copyright notice and this permission notice (including the
15 * next paragraph) shall be included in all copies or substantial portions
18 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
19 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
20 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
21 * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR
22 * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
23 * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
24 * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
26 **************************************************************************/
30 * Helper functions for swizzling/shuffling.
32 * @author Jose Fonseca <jfonseca@vmware.com>
36 #include "util/u_debug.h"
38 #include "lp_bld_type.h"
39 #include "lp_bld_const.h"
40 #include "lp_bld_logic.h"
41 #include "lp_bld_swizzle.h"
45 lp_build_broadcast(LLVMBuilderRef builder
,
49 const unsigned n
= LLVMGetVectorSize(vec_type
);
53 res
= LLVMGetUndef(vec_type
);
54 for(i
= 0; i
< n
; ++i
) {
55 LLVMValueRef index
= LLVMConstInt(LLVMInt32Type(), i
, 0);
56 res
= LLVMBuildInsertElement(builder
, res
, scalar
, index
, "");
67 lp_build_broadcast_scalar(struct lp_build_context
*bld
,
70 const struct lp_type type
= bld
->type
;
72 assert(lp_check_elem_type(type
, LLVMTypeOf(scalar
)));
74 if (type
.length
== 1) {
79 res
= LLVMBuildInsertElement(bld
->builder
, bld
->undef
, scalar
,
80 LLVMConstInt(LLVMInt32Type(), 0, 0), "");
81 res
= LLVMBuildShuffleVector(bld
->builder
, res
, bld
->undef
,
82 lp_build_const_int_vec(type
, 0), "");
89 lp_build_broadcast_aos(struct lp_build_context
*bld
,
93 const struct lp_type type
= bld
->type
;
94 const unsigned n
= type
.length
;
97 if(a
== bld
->undef
|| a
== bld
->zero
|| a
== bld
->one
)
100 /* XXX: SSE3 has PSHUFB which should be better than bitmasks, but forcing
101 * using shuffles here actually causes worst results. More investigation is
107 LLVMTypeRef elem_type
= LLVMInt32Type();
108 LLVMValueRef shuffles
[LP_MAX_VECTOR_LENGTH
];
110 for(j
= 0; j
< n
; j
+= 4)
111 for(i
= 0; i
< 4; ++i
)
112 shuffles
[j
+ i
] = LLVMConstInt(elem_type
, j
+ channel
, 0);
114 return LLVMBuildShuffleVector(bld
->builder
, a
, bld
->undef
, LLVMConstVector(shuffles
, n
), "");
118 * Bit mask and recursive shifts
120 * XYZW XYZW .... XYZW <= input
121 * 0Y00 0Y00 .... 0Y00
122 * YY00 YY00 .... YY00
123 * YYYY YYYY .... YYYY <= output
125 struct lp_type type4
= type
;
126 const char shifts
[4][2] = {
135 memset(cond
, 0, sizeof cond
);
138 a
= LLVMBuildAnd(bld
->builder
, a
, lp_build_const_mask_aos(type
, cond
), "");
143 a
= LLVMBuildBitCast(bld
->builder
, a
, lp_build_vec_type(type4
), "");
145 for(i
= 0; i
< 2; ++i
) {
146 LLVMValueRef tmp
= NULL
;
147 int shift
= shifts
[channel
][i
];
149 #ifdef PIPE_ARCH_LITTLE_ENDIAN
154 tmp
= LLVMBuildLShr(bld
->builder
, a
, lp_build_const_int_vec(type4
, shift
*type
.width
), "");
156 tmp
= LLVMBuildShl(bld
->builder
, a
, lp_build_const_int_vec(type4
, -shift
*type
.width
), "");
160 a
= LLVMBuildOr(bld
->builder
, a
, tmp
, "");
163 return LLVMBuildBitCast(bld
->builder
, a
, lp_build_vec_type(type
), "");
169 lp_build_swizzle1_aos(struct lp_build_context
*bld
,
171 const unsigned char swizzle
[4])
173 const unsigned n
= bld
->type
.length
;
176 if(a
== bld
->undef
|| a
== bld
->zero
|| a
== bld
->one
)
179 if(swizzle
[0] == swizzle
[1] && swizzle
[1] == swizzle
[2] && swizzle
[2] == swizzle
[3])
180 return lp_build_broadcast_aos(bld
, a
, swizzle
[0]);
186 LLVMTypeRef elem_type
= LLVMInt32Type();
187 LLVMValueRef shuffles
[LP_MAX_VECTOR_LENGTH
];
189 for(j
= 0; j
< n
; j
+= 4)
190 for(i
= 0; i
< 4; ++i
)
191 shuffles
[j
+ i
] = LLVMConstInt(elem_type
, j
+ swizzle
[i
], 0);
193 return LLVMBuildShuffleVector(bld
->builder
, a
, bld
->undef
, LLVMConstVector(shuffles
, n
), "");
199 lp_build_swizzle2_aos(struct lp_build_context
*bld
,
202 const unsigned char swizzle
[4])
204 const unsigned n
= bld
->type
.length
;
207 if(swizzle
[0] < 4 && swizzle
[1] < 4 && swizzle
[2] < 4 && swizzle
[3] < 4)
208 return lp_build_swizzle1_aos(bld
, a
, swizzle
);
211 unsigned char swizzle1
[4];
212 swizzle1
[0] = swizzle
[0] % 4;
213 swizzle1
[1] = swizzle
[1] % 4;
214 swizzle1
[2] = swizzle
[2] % 4;
215 swizzle1
[3] = swizzle
[3] % 4;
216 return lp_build_swizzle1_aos(bld
, a
, swizzle1
);
219 if(swizzle
[0] % 4 == 0 &&
220 swizzle
[1] % 4 == 1 &&
221 swizzle
[2] % 4 == 2 &&
222 swizzle
[3] % 4 == 3) {
224 cond
[0] = swizzle
[0] / 4;
225 cond
[1] = swizzle
[1] / 4;
226 cond
[2] = swizzle
[2] / 4;
227 cond
[3] = swizzle
[3] / 4;
228 return lp_build_select_aos(bld
, a
, b
, cond
);
235 LLVMTypeRef elem_type
= LLVMInt32Type();
236 LLVMValueRef shuffles
[LP_MAX_VECTOR_LENGTH
];
238 for(j
= 0; j
< n
; j
+= 4)
239 for(i
= 0; i
< 4; ++i
)
240 shuffles
[j
+ i
] = LLVMConstInt(elem_type
, j
+ (swizzle
[i
] % 4) + (swizzle
[i
] / 4 * n
), 0);
242 return LLVMBuildShuffleVector(bld
->builder
, a
, b
, LLVMConstVector(shuffles
, n
), "");
248 * Extended swizzle of a single channel of a SoA vector.
250 * @param bld building context
251 * @param unswizzled array with the 4 unswizzled values
252 * @param swizzle one of the PIPE_SWIZZLE_*
254 * @return the swizzled value.
257 lp_build_swizzle_soa_channel(struct lp_build_context
*bld
,
258 const LLVMValueRef
*unswizzled
,
262 case PIPE_SWIZZLE_RED
:
263 case PIPE_SWIZZLE_GREEN
:
264 case PIPE_SWIZZLE_BLUE
:
265 case PIPE_SWIZZLE_ALPHA
:
266 return unswizzled
[swizzle
];
267 case PIPE_SWIZZLE_ZERO
:
269 case PIPE_SWIZZLE_ONE
:
279 * Extended swizzle of a SoA vector.
281 * @param bld building context
282 * @param unswizzled array with the 4 unswizzled values
283 * @param swizzles array of PIPE_SWIZZLE_*
284 * @param swizzled output swizzled values
287 lp_build_swizzle_soa(struct lp_build_context
*bld
,
288 const LLVMValueRef
*unswizzled
,
289 const unsigned char swizzles
[4],
290 LLVMValueRef
*swizzled
)
294 for (chan
= 0; chan
< 4; ++chan
) {
295 swizzled
[chan
] = lp_build_swizzle_soa_channel(bld
, unswizzled
,
302 * Do an extended swizzle of a SoA vector inplace.
304 * @param bld building context
305 * @param values intput/output array with the 4 values
306 * @param swizzles array of PIPE_SWIZZLE_*
309 lp_build_swizzle_soa_inplace(struct lp_build_context
*bld
,
310 LLVMValueRef
*values
,
311 const unsigned char swizzles
[4])
313 LLVMValueRef unswizzled
[4];
316 for (chan
= 0; chan
< 4; ++chan
) {
317 unswizzled
[chan
] = values
[chan
];
320 lp_build_swizzle_soa(bld
, unswizzled
, swizzles
, values
);