radeonsi: emit sample locations also when nr_samples == 1
[mesa.git] / src / gallium / auxiliary / gallivm / lp_bld_gather.c
1 /**************************************************************************
2 *
3 * Copyright 2010 VMware, Inc.
4 * All Rights Reserved.
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a
7 * copy of this software and associated documentation files (the
8 * "Software"), to deal in the Software without restriction, including
9 * without limitation the rights to use, copy, modify, merge, publish,
10 * distribute, sub license, and/or sell copies of the Software, and to
11 * permit persons to whom the Software is furnished to do so, subject to
12 * the following conditions:
13 *
14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16 * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
17 * THE COPYRIGHT HOLDERS, AUTHORS AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM,
18 * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
19 * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
20 * USE OR OTHER DEALINGS IN THE SOFTWARE.
21 *
22 * The above copyright notice and this permission notice (including the
23 * next paragraph) shall be included in all copies or substantial portions
24 * of the Software.
25 *
26 **************************************************************************/
27
28
29 #include "util/u_debug.h"
30 #include "util/u_cpu_detect.h"
31 #include "lp_bld_debug.h"
32 #include "lp_bld_const.h"
33 #include "lp_bld_format.h"
34 #include "lp_bld_gather.h"
35 #include "lp_bld_swizzle.h"
36 #include "lp_bld_init.h"
37 #include "lp_bld_intr.h"
38
39
40 /**
41 * Get the pointer to one element from scatter positions in memory.
42 *
43 * @sa lp_build_gather()
44 */
45 LLVMValueRef
46 lp_build_gather_elem_ptr(struct gallivm_state *gallivm,
47 unsigned length,
48 LLVMValueRef base_ptr,
49 LLVMValueRef offsets,
50 unsigned i)
51 {
52 LLVMValueRef offset;
53 LLVMValueRef ptr;
54
55 assert(LLVMTypeOf(base_ptr) == LLVMPointerType(LLVMInt8TypeInContext(gallivm->context), 0));
56
57 if (length == 1) {
58 assert(i == 0);
59 offset = offsets;
60 } else {
61 LLVMValueRef index = lp_build_const_int32(gallivm, i);
62 offset = LLVMBuildExtractElement(gallivm->builder, offsets, index, "");
63 }
64
65 ptr = LLVMBuildGEP(gallivm->builder, base_ptr, &offset, 1, "");
66
67 return ptr;
68 }
69
70
71 /**
72 * Gather one element from scatter positions in memory.
73 *
74 * @sa lp_build_gather()
75 */
76 LLVMValueRef
77 lp_build_gather_elem(struct gallivm_state *gallivm,
78 unsigned length,
79 unsigned src_width,
80 unsigned dst_width,
81 boolean aligned,
82 LLVMValueRef base_ptr,
83 LLVMValueRef offsets,
84 unsigned i,
85 boolean vector_justify)
86 {
87 LLVMTypeRef src_type = LLVMIntTypeInContext(gallivm->context, src_width);
88 LLVMTypeRef src_ptr_type = LLVMPointerType(src_type, 0);
89 LLVMTypeRef dst_elem_type = LLVMIntTypeInContext(gallivm->context, dst_width);
90 LLVMValueRef ptr;
91 LLVMValueRef res;
92
93 assert(LLVMTypeOf(base_ptr) == LLVMPointerType(LLVMInt8TypeInContext(gallivm->context), 0));
94
95 ptr = lp_build_gather_elem_ptr(gallivm, length, base_ptr, offsets, i);
96 ptr = LLVMBuildBitCast(gallivm->builder, ptr, src_ptr_type, "");
97 res = LLVMBuildLoad(gallivm->builder, ptr, "");
98
99 /* XXX
100 * On some archs we probably really want to avoid having to deal
101 * with alignments lower than 4 bytes (if fetch size is a power of
102 * two >= 32). On x86 it doesn't matter, however.
103 * We should be able to guarantee full alignment for any kind of texture
104 * fetch (except ARB_texture_buffer_range, oops), but not vertex fetch
105 * (there's PIPE_CAP_VERTEX_BUFFER_OFFSET_4BYTE_ALIGNED_ONLY and friends
106 * but I don't think that's quite what we wanted).
107 * For ARB_texture_buffer_range, PIPE_CAP_TEXTURE_BUFFER_OFFSET_ALIGNMENT
108 * looks like a good fit, but it seems this cap bit (and OpenGL) aren't
109 * enforcing what we want (which is what d3d10 does, the offset needs to
110 * be aligned to element size, but GL has bytes regardless of element
111 * size which would only leave us with minimum alignment restriction of 16
112 * which doesn't make much sense if the type isn't 4x32bit). Due to
113 * translation of offsets to first_elem in sampler_views it actually seems
114 * gallium could not do anything else except 16 no matter what...
115 */
116 if (!aligned) {
117 LLVMSetAlignment(res, 1);
118 }
119
120 assert(src_width <= dst_width);
121 if (src_width > dst_width) {
122 res = LLVMBuildTrunc(gallivm->builder, res, dst_elem_type, "");
123 } else if (src_width < dst_width) {
124 res = LLVMBuildZExt(gallivm->builder, res, dst_elem_type, "");
125 if (vector_justify) {
126 #ifdef PIPE_ARCH_BIG_ENDIAN
127 res = LLVMBuildShl(gallivm->builder, res,
128 LLVMConstInt(dst_elem_type, dst_width - src_width, 0), "");
129 #endif
130 }
131 }
132
133 return res;
134 }
135
136
137 static LLVMValueRef
138 lp_build_gather_avx2(struct gallivm_state *gallivm,
139 unsigned length,
140 unsigned src_width,
141 unsigned dst_width,
142 LLVMValueRef base_ptr,
143 LLVMValueRef offsets)
144 {
145 LLVMBuilderRef builder = gallivm->builder;
146 LLVMTypeRef dst_type = LLVMIntTypeInContext(gallivm->context, dst_width);
147 LLVMTypeRef dst_vec_type = LLVMVectorType(dst_type, length);
148 LLVMTypeRef src_type = LLVMIntTypeInContext(gallivm->context, src_width);
149 LLVMTypeRef src_vec_type = LLVMVectorType(src_type, length);
150 LLVMValueRef res;
151
152 assert(LLVMTypeOf(base_ptr) == LLVMPointerType(LLVMInt8TypeInContext(gallivm->context), 0));
153
154 if (0) {
155 /*
156 * XXX: This will cause LLVM pre 3.7 to hang; it works on LLVM 3.8 but
157 * will not use the AVX2 gather instrinsics. See
158 * http://lists.llvm.org/pipermail/llvm-dev/2016-January/094448.html
159 */
160 LLVMTypeRef i32_type = LLVMIntTypeInContext(gallivm->context, 32);
161 LLVMTypeRef i32_vec_type = LLVMVectorType(i32_type, length);
162 LLVMTypeRef i1_type = LLVMIntTypeInContext(gallivm->context, 1);
163 LLVMTypeRef i1_vec_type = LLVMVectorType(i1_type, length);
164 LLVMTypeRef src_ptr_type = LLVMPointerType(src_type, 0);
165 LLVMValueRef src_ptr;
166
167 base_ptr = LLVMBuildBitCast(builder, base_ptr, src_ptr_type, "");
168
169 /* Rescale offsets from bytes to elements */
170 LLVMValueRef scale = LLVMConstInt(i32_type, src_width/8, 0);
171 scale = lp_build_broadcast(gallivm, i32_vec_type, scale);
172 assert(LLVMTypeOf(offsets) == i32_vec_type);
173 offsets = LLVMBuildSDiv(builder, offsets, scale, "");
174
175 src_ptr = LLVMBuildGEP(builder, base_ptr, &offsets, 1, "vector-gep");
176
177 char intrinsic[64];
178 util_snprintf(intrinsic, sizeof intrinsic, "llvm.masked.gather.v%ui%u", length, src_width);
179 LLVMValueRef alignment = LLVMConstInt(i32_type, src_width/8, 0);
180 LLVMValueRef mask = LLVMConstAllOnes(i1_vec_type);
181 LLVMValueRef passthru = LLVMGetUndef(src_vec_type);
182
183 LLVMValueRef args[] = { src_ptr, alignment, mask, passthru };
184
185 res = lp_build_intrinsic(builder, intrinsic, src_vec_type, args, 4, 0);
186 } else {
187 assert(src_width == 32);
188
189 LLVMTypeRef i8_type = LLVMIntTypeInContext(gallivm->context, 8);
190
191 /*
192 * We should get the caller to give more type information so we can use
193 * the intrinsics for the right int/float domain. Int should be the most
194 * common.
195 */
196 const char *intrinsic = NULL;
197 switch (length) {
198 case 4:
199 intrinsic = "llvm.x86.avx2.gather.d.d";
200 break;
201 case 8:
202 intrinsic = "llvm.x86.avx2.gather.d.d.256";
203 break;
204 default:
205 assert(0);
206 }
207
208 LLVMValueRef passthru = LLVMGetUndef(src_vec_type);
209 LLVMValueRef mask = LLVMConstAllOnes(src_vec_type);
210 mask = LLVMConstBitCast(mask, src_vec_type);
211 LLVMValueRef scale = LLVMConstInt(i8_type, 1, 0);
212
213 LLVMValueRef args[] = { passthru, base_ptr, offsets, mask, scale };
214
215 res = lp_build_intrinsic(builder, intrinsic, src_vec_type, args, 5, 0);
216 }
217
218 if (src_width > dst_width) {
219 res = LLVMBuildTrunc(builder, res, dst_vec_type, "");
220 } else if (src_width < dst_width) {
221 res = LLVMBuildZExt(builder, res, dst_vec_type, "");
222 }
223
224 return res;
225 }
226
227
228 /**
229 * Gather elements from scatter positions in memory into a single vector.
230 * Use for fetching texels from a texture.
231 * For SSE, typical values are length=4, src_width=32, dst_width=32.
232 *
233 * When src_width < dst_width, the return value can be justified in
234 * one of two ways:
235 * "integer justification" is used when the caller treats the destination
236 * as a packed integer bitmask, as described by the channels' "shift" and
237 * "width" fields;
238 * "vector justification" is used when the caller casts the destination
239 * to a vector and needs channel X to be in vector element 0.
240 *
241 * @param length length of the offsets
242 * @param src_width src element width in bits
243 * @param dst_width result element width in bits (src will be expanded to fit)
244 * @param aligned whether the data is guaranteed to be aligned (to src_width)
245 * @param base_ptr base pointer, should be a i8 pointer type.
246 * @param offsets vector with offsets
247 * @param vector_justify select vector rather than integer justification
248 */
249 LLVMValueRef
250 lp_build_gather(struct gallivm_state *gallivm,
251 unsigned length,
252 unsigned src_width,
253 unsigned dst_width,
254 boolean aligned,
255 LLVMValueRef base_ptr,
256 LLVMValueRef offsets,
257 boolean vector_justify)
258 {
259 LLVMValueRef res;
260
261 if (length == 1) {
262 /* Scalar */
263 return lp_build_gather_elem(gallivm, length,
264 src_width, dst_width, aligned,
265 base_ptr, offsets, 0, vector_justify);
266 } else if (util_cpu_caps.has_avx2 && src_width == 32 && (length == 4 || length == 8)) {
267 return lp_build_gather_avx2(gallivm, length, src_width, dst_width, base_ptr, offsets);
268 } else {
269 /* Vector */
270
271 LLVMTypeRef dst_elem_type = LLVMIntTypeInContext(gallivm->context, dst_width);
272 LLVMTypeRef dst_vec_type = LLVMVectorType(dst_elem_type, length);
273 unsigned i;
274
275 res = LLVMGetUndef(dst_vec_type);
276 for (i = 0; i < length; ++i) {
277 LLVMValueRef index = lp_build_const_int32(gallivm, i);
278 LLVMValueRef elem;
279 elem = lp_build_gather_elem(gallivm, length,
280 src_width, dst_width, aligned,
281 base_ptr, offsets, i, vector_justify);
282 res = LLVMBuildInsertElement(gallivm->builder, res, elem, index, "");
283 }
284 }
285
286 return res;
287 }
288
289 LLVMValueRef
290 lp_build_gather_values(struct gallivm_state * gallivm,
291 LLVMValueRef * values,
292 unsigned value_count)
293 {
294 LLVMTypeRef vec_type = LLVMVectorType(LLVMTypeOf(values[0]), value_count);
295 LLVMBuilderRef builder = gallivm->builder;
296 LLVMValueRef vec = LLVMGetUndef(vec_type);
297 unsigned i;
298
299 for (i = 0; i < value_count; i++) {
300 LLVMValueRef index = lp_build_const_int32(gallivm, i);
301 vec = LLVMBuildInsertElement(builder, vec, values[i], index, "");
302 }
303 return vec;
304 }