1 /**************************************************************************
3 * Copyright 2010 VMware, Inc.
6 * Permission is hereby granted, free of charge, to any person obtaining a
7 * copy of this software and associated documentation files (the
8 * "Software"), to deal in the Software without restriction, including
9 * without limitation the rights to use, copy, modify, merge, publish,
10 * distribute, sub license, and/or sell copies of the Software, and to
11 * permit persons to whom the Software is furnished to do so, subject to
12 * the following conditions:
14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16 * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
17 * THE COPYRIGHT HOLDERS, AUTHORS AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM,
18 * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
19 * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
20 * USE OR OTHER DEALINGS IN THE SOFTWARE.
22 * The above copyright notice and this permission notice (including the
23 * next paragraph) shall be included in all copies or substantial portions
26 **************************************************************************/
29 #include "util/u_debug.h"
30 #include "util/u_cpu_detect.h"
31 #include "lp_bld_debug.h"
32 #include "lp_bld_const.h"
33 #include "lp_bld_format.h"
34 #include "lp_bld_gather.h"
35 #include "lp_bld_swizzle.h"
36 #include "lp_bld_init.h"
37 #include "lp_bld_intr.h"
41 * Get the pointer to one element from scatter positions in memory.
43 * @sa lp_build_gather()
46 lp_build_gather_elem_ptr(struct gallivm_state
*gallivm
,
48 LLVMValueRef base_ptr
,
55 assert(LLVMTypeOf(base_ptr
) == LLVMPointerType(LLVMInt8TypeInContext(gallivm
->context
), 0));
61 LLVMValueRef index
= lp_build_const_int32(gallivm
, i
);
62 offset
= LLVMBuildExtractElement(gallivm
->builder
, offsets
, index
, "");
65 ptr
= LLVMBuildGEP(gallivm
->builder
, base_ptr
, &offset
, 1, "");
72 * Gather one element from scatter positions in memory.
74 * @sa lp_build_gather()
77 lp_build_gather_elem(struct gallivm_state
*gallivm
,
82 LLVMValueRef base_ptr
,
85 boolean vector_justify
)
87 LLVMTypeRef src_type
= LLVMIntTypeInContext(gallivm
->context
, src_width
);
88 LLVMTypeRef src_ptr_type
= LLVMPointerType(src_type
, 0);
89 LLVMTypeRef dst_elem_type
= LLVMIntTypeInContext(gallivm
->context
, dst_width
);
93 assert(LLVMTypeOf(base_ptr
) == LLVMPointerType(LLVMInt8TypeInContext(gallivm
->context
), 0));
95 ptr
= lp_build_gather_elem_ptr(gallivm
, length
, base_ptr
, offsets
, i
);
96 ptr
= LLVMBuildBitCast(gallivm
->builder
, ptr
, src_ptr_type
, "");
97 res
= LLVMBuildLoad(gallivm
->builder
, ptr
, "");
100 * On some archs we probably really want to avoid having to deal
101 * with alignments lower than 4 bytes (if fetch size is a power of
102 * two >= 32). On x86 it doesn't matter, however.
103 * We should be able to guarantee full alignment for any kind of texture
104 * fetch (except ARB_texture_buffer_range, oops), but not vertex fetch
105 * (there's PIPE_CAP_VERTEX_BUFFER_OFFSET_4BYTE_ALIGNED_ONLY and friends
106 * but I don't think that's quite what we wanted).
107 * For ARB_texture_buffer_range, PIPE_CAP_TEXTURE_BUFFER_OFFSET_ALIGNMENT
108 * looks like a good fit, but it seems this cap bit (and OpenGL) aren't
109 * enforcing what we want (which is what d3d10 does, the offset needs to
110 * be aligned to element size, but GL has bytes regardless of element
111 * size which would only leave us with minimum alignment restriction of 16
112 * which doesn't make much sense if the type isn't 4x32bit). Due to
113 * translation of offsets to first_elem in sampler_views it actually seems
114 * gallium could not do anything else except 16 no matter what...
117 LLVMSetAlignment(res
, 1);
120 assert(src_width
<= dst_width
);
121 if (src_width
> dst_width
) {
122 res
= LLVMBuildTrunc(gallivm
->builder
, res
, dst_elem_type
, "");
123 } else if (src_width
< dst_width
) {
124 res
= LLVMBuildZExt(gallivm
->builder
, res
, dst_elem_type
, "");
125 if (vector_justify
) {
126 #ifdef PIPE_ARCH_BIG_ENDIAN
127 res
= LLVMBuildShl(gallivm
->builder
, res
,
128 LLVMConstInt(dst_elem_type
, dst_width
- src_width
, 0), "");
138 lp_build_gather_avx2(struct gallivm_state
*gallivm
,
142 LLVMValueRef base_ptr
,
143 LLVMValueRef offsets
)
145 LLVMBuilderRef builder
= gallivm
->builder
;
146 LLVMTypeRef dst_type
= LLVMIntTypeInContext(gallivm
->context
, dst_width
);
147 LLVMTypeRef dst_vec_type
= LLVMVectorType(dst_type
, length
);
148 LLVMTypeRef src_type
= LLVMIntTypeInContext(gallivm
->context
, src_width
);
149 LLVMTypeRef src_vec_type
= LLVMVectorType(src_type
, length
);
152 assert(LLVMTypeOf(base_ptr
) == LLVMPointerType(LLVMInt8TypeInContext(gallivm
->context
), 0));
156 * XXX: This will cause LLVM pre 3.7 to hang; it works on LLVM 3.8 but
157 * will not use the AVX2 gather instrinsics. See
158 * http://lists.llvm.org/pipermail/llvm-dev/2016-January/094448.html
160 LLVMTypeRef i32_type
= LLVMIntTypeInContext(gallivm
->context
, 32);
161 LLVMTypeRef i32_vec_type
= LLVMVectorType(i32_type
, length
);
162 LLVMTypeRef i1_type
= LLVMIntTypeInContext(gallivm
->context
, 1);
163 LLVMTypeRef i1_vec_type
= LLVMVectorType(i1_type
, length
);
164 LLVMTypeRef src_ptr_type
= LLVMPointerType(src_type
, 0);
165 LLVMValueRef src_ptr
;
167 base_ptr
= LLVMBuildBitCast(builder
, base_ptr
, src_ptr_type
, "");
169 /* Rescale offsets from bytes to elements */
170 LLVMValueRef scale
= LLVMConstInt(i32_type
, src_width
/8, 0);
171 scale
= lp_build_broadcast(gallivm
, i32_vec_type
, scale
);
172 assert(LLVMTypeOf(offsets
) == i32_vec_type
);
173 offsets
= LLVMBuildSDiv(builder
, offsets
, scale
, "");
175 src_ptr
= LLVMBuildGEP(builder
, base_ptr
, &offsets
, 1, "vector-gep");
178 util_snprintf(intrinsic
, sizeof intrinsic
, "llvm.masked.gather.v%ui%u", length
, src_width
);
179 LLVMValueRef alignment
= LLVMConstInt(i32_type
, src_width
/8, 0);
180 LLVMValueRef mask
= LLVMConstAllOnes(i1_vec_type
);
181 LLVMValueRef passthru
= LLVMGetUndef(src_vec_type
);
183 LLVMValueRef args
[] = { src_ptr
, alignment
, mask
, passthru
};
185 res
= lp_build_intrinsic(builder
, intrinsic
, src_vec_type
, args
, 4, 0);
187 assert(src_width
== 32);
189 LLVMTypeRef i8_type
= LLVMIntTypeInContext(gallivm
->context
, 8);
192 * We should get the caller to give more type information so we can use
193 * the intrinsics for the right int/float domain. Int should be the most
196 const char *intrinsic
= NULL
;
199 intrinsic
= "llvm.x86.avx2.gather.d.d";
202 intrinsic
= "llvm.x86.avx2.gather.d.d.256";
208 LLVMValueRef passthru
= LLVMGetUndef(src_vec_type
);
209 LLVMValueRef mask
= LLVMConstAllOnes(src_vec_type
);
210 mask
= LLVMConstBitCast(mask
, src_vec_type
);
211 LLVMValueRef scale
= LLVMConstInt(i8_type
, 1, 0);
213 LLVMValueRef args
[] = { passthru
, base_ptr
, offsets
, mask
, scale
};
215 res
= lp_build_intrinsic(builder
, intrinsic
, src_vec_type
, args
, 5, 0);
218 if (src_width
> dst_width
) {
219 res
= LLVMBuildTrunc(builder
, res
, dst_vec_type
, "");
220 } else if (src_width
< dst_width
) {
221 res
= LLVMBuildZExt(builder
, res
, dst_vec_type
, "");
229 * Gather elements from scatter positions in memory into a single vector.
230 * Use for fetching texels from a texture.
231 * For SSE, typical values are length=4, src_width=32, dst_width=32.
233 * When src_width < dst_width, the return value can be justified in
235 * "integer justification" is used when the caller treats the destination
236 * as a packed integer bitmask, as described by the channels' "shift" and
238 * "vector justification" is used when the caller casts the destination
239 * to a vector and needs channel X to be in vector element 0.
241 * @param length length of the offsets
242 * @param src_width src element width in bits
243 * @param dst_width result element width in bits (src will be expanded to fit)
244 * @param aligned whether the data is guaranteed to be aligned (to src_width)
245 * @param base_ptr base pointer, should be a i8 pointer type.
246 * @param offsets vector with offsets
247 * @param vector_justify select vector rather than integer justification
250 lp_build_gather(struct gallivm_state
*gallivm
,
255 LLVMValueRef base_ptr
,
256 LLVMValueRef offsets
,
257 boolean vector_justify
)
263 return lp_build_gather_elem(gallivm
, length
,
264 src_width
, dst_width
, aligned
,
265 base_ptr
, offsets
, 0, vector_justify
);
266 } else if (util_cpu_caps
.has_avx2
&& src_width
== 32 && (length
== 4 || length
== 8)) {
267 return lp_build_gather_avx2(gallivm
, length
, src_width
, dst_width
, base_ptr
, offsets
);
271 LLVMTypeRef dst_elem_type
= LLVMIntTypeInContext(gallivm
->context
, dst_width
);
272 LLVMTypeRef dst_vec_type
= LLVMVectorType(dst_elem_type
, length
);
275 res
= LLVMGetUndef(dst_vec_type
);
276 for (i
= 0; i
< length
; ++i
) {
277 LLVMValueRef index
= lp_build_const_int32(gallivm
, i
);
279 elem
= lp_build_gather_elem(gallivm
, length
,
280 src_width
, dst_width
, aligned
,
281 base_ptr
, offsets
, i
, vector_justify
);
282 res
= LLVMBuildInsertElement(gallivm
->builder
, res
, elem
, index
, "");
290 lp_build_gather_values(struct gallivm_state
* gallivm
,
291 LLVMValueRef
* values
,
292 unsigned value_count
)
294 LLVMTypeRef vec_type
= LLVMVectorType(LLVMTypeOf(values
[0]), value_count
);
295 LLVMBuilderRef builder
= gallivm
->builder
;
296 LLVMValueRef vec
= LLVMGetUndef(vec_type
);
299 for (i
= 0; i
< value_count
; i
++) {
300 LLVMValueRef index
= lp_build_const_int32(gallivm
, i
);
301 vec
= LLVMBuildInsertElement(builder
, vec
, values
[i
], index
, "");