radeonsi: move code for shader resources into si_shader_llvm_resources.c
[mesa.git] / src / gallium / drivers / radeonsi / si_shader_llvm_resources.c
1 /*
2 * Copyright 2020 Advanced Micro Devices, Inc.
3 * All Rights Reserved.
4 *
5 * Permission is hereby granted, free of charge, to any person obtaining a
6 * copy of this software and associated documentation files (the "Software"),
7 * to deal in the Software without restriction, including without limitation
8 * on the rights to use, copy, modify, merge, publish, distribute, sub
9 * license, and/or sell copies of the Software, and to permit persons to whom
10 * the Software is furnished to do so, subject to the following conditions:
11 *
12 * The above copyright notice and this permission notice (including the next
13 * paragraph) shall be included in all copies or substantial portions of the
14 * Software.
15 *
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
19 * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
20 * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
21 * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
22 * USE OR OTHER DEALINGS IN THE SOFTWARE.
23 */
24
25 #include "si_shader_internal.h"
26 #include "si_pipe.h"
27 #include "sid.h"
28
29 /**
30 * Return a value that is equal to the given i32 \p index if it lies in [0,num)
31 * or an undefined value in the same interval otherwise.
32 */
33 static LLVMValueRef si_llvm_bound_index(struct si_shader_context *ctx,
34 LLVMValueRef index,
35 unsigned num)
36 {
37 LLVMBuilderRef builder = ctx->ac.builder;
38 LLVMValueRef c_max = LLVMConstInt(ctx->i32, num - 1, 0);
39 LLVMValueRef cc;
40
41 if (util_is_power_of_two_or_zero(num)) {
42 index = LLVMBuildAnd(builder, index, c_max, "");
43 } else {
44 /* In theory, this MAX pattern should result in code that is
45 * as good as the bit-wise AND above.
46 *
47 * In practice, LLVM generates worse code (at the time of
48 * writing), because its value tracking is not strong enough.
49 */
50 cc = LLVMBuildICmp(builder, LLVMIntULE, index, c_max, "");
51 index = LLVMBuildSelect(builder, cc, index, c_max, "");
52 }
53
54 return index;
55 }
56
57 static LLVMValueRef load_const_buffer_desc_fast_path(struct si_shader_context *ctx)
58 {
59 LLVMValueRef ptr =
60 ac_get_arg(&ctx->ac, ctx->const_and_shader_buffers);
61 struct si_shader_selector *sel = ctx->shader->selector;
62
63 /* Do the bounds checking with a descriptor, because
64 * doing computation and manual bounds checking of 64-bit
65 * addresses generates horrible VALU code with very high
66 * VGPR usage and very low SIMD occupancy.
67 */
68 ptr = LLVMBuildPtrToInt(ctx->ac.builder, ptr, ctx->ac.intptr, "");
69
70 LLVMValueRef desc0, desc1;
71 desc0 = ptr;
72 desc1 = LLVMConstInt(ctx->i32,
73 S_008F04_BASE_ADDRESS_HI(ctx->screen->info.address32_hi), 0);
74
75 uint32_t rsrc3 = S_008F0C_DST_SEL_X(V_008F0C_SQ_SEL_X) |
76 S_008F0C_DST_SEL_Y(V_008F0C_SQ_SEL_Y) |
77 S_008F0C_DST_SEL_Z(V_008F0C_SQ_SEL_Z) |
78 S_008F0C_DST_SEL_W(V_008F0C_SQ_SEL_W);
79
80 if (ctx->screen->info.chip_class >= GFX10)
81 rsrc3 |= S_008F0C_FORMAT(V_008F0C_IMG_FORMAT_32_FLOAT) |
82 S_008F0C_OOB_SELECT(V_008F0C_OOB_SELECT_RAW) |
83 S_008F0C_RESOURCE_LEVEL(1);
84 else
85 rsrc3 |= S_008F0C_NUM_FORMAT(V_008F0C_BUF_NUM_FORMAT_FLOAT) |
86 S_008F0C_DATA_FORMAT(V_008F0C_BUF_DATA_FORMAT_32);
87
88 LLVMValueRef desc_elems[] = {
89 desc0,
90 desc1,
91 LLVMConstInt(ctx->i32, sel->info.constbuf0_num_slots * 16, 0),
92 LLVMConstInt(ctx->i32, rsrc3, false)
93 };
94
95 return ac_build_gather_values(&ctx->ac, desc_elems, 4);
96 }
97
98 static LLVMValueRef load_ubo(struct ac_shader_abi *abi, LLVMValueRef index)
99 {
100 struct si_shader_context *ctx = si_shader_context_from_abi(abi);
101 struct si_shader_selector *sel = ctx->shader->selector;
102
103 LLVMValueRef ptr = ac_get_arg(&ctx->ac, ctx->const_and_shader_buffers);
104
105 if (sel->info.const_buffers_declared == 1 &&
106 sel->info.shader_buffers_declared == 0) {
107 return load_const_buffer_desc_fast_path(ctx);
108 }
109
110 index = si_llvm_bound_index(ctx, index, ctx->num_const_buffers);
111 index = LLVMBuildAdd(ctx->ac.builder, index,
112 LLVMConstInt(ctx->i32, SI_NUM_SHADER_BUFFERS, 0), "");
113
114 return ac_build_load_to_sgpr(&ctx->ac, ptr, index);
115 }
116
117 static LLVMValueRef
118 load_ssbo(struct ac_shader_abi *abi, LLVMValueRef index, bool write)
119 {
120 struct si_shader_context *ctx = si_shader_context_from_abi(abi);
121 LLVMValueRef rsrc_ptr = ac_get_arg(&ctx->ac,
122 ctx->const_and_shader_buffers);
123
124 index = si_llvm_bound_index(ctx, index, ctx->num_shader_buffers);
125 index = LLVMBuildSub(ctx->ac.builder,
126 LLVMConstInt(ctx->i32, SI_NUM_SHADER_BUFFERS - 1, 0),
127 index, "");
128
129 return ac_build_load_to_sgpr(&ctx->ac, rsrc_ptr, index);
130 }
131
132 /**
133 * Given a 256-bit resource descriptor, force the DCC enable bit to off.
134 *
135 * At least on Tonga, executing image stores on images with DCC enabled and
136 * non-trivial can eventually lead to lockups. This can occur when an
137 * application binds an image as read-only but then uses a shader that writes
138 * to it. The OpenGL spec allows almost arbitrarily bad behavior (including
139 * program termination) in this case, but it doesn't cost much to be a bit
140 * nicer: disabling DCC in the shader still leads to undefined results but
141 * avoids the lockup.
142 */
143 static LLVMValueRef force_dcc_off(struct si_shader_context *ctx,
144 LLVMValueRef rsrc)
145 {
146 if (ctx->screen->info.chip_class <= GFX7) {
147 return rsrc;
148 } else {
149 LLVMValueRef i32_6 = LLVMConstInt(ctx->i32, 6, 0);
150 LLVMValueRef i32_C = LLVMConstInt(ctx->i32, C_008F28_COMPRESSION_EN, 0);
151 LLVMValueRef tmp;
152
153 tmp = LLVMBuildExtractElement(ctx->ac.builder, rsrc, i32_6, "");
154 tmp = LLVMBuildAnd(ctx->ac.builder, tmp, i32_C, "");
155 return LLVMBuildInsertElement(ctx->ac.builder, rsrc, tmp, i32_6, "");
156 }
157 }
158
159 /* AC_DESC_FMASK is handled exactly like AC_DESC_IMAGE. The caller should
160 * adjust "index" to point to FMASK. */
161 static LLVMValueRef si_load_image_desc(struct si_shader_context *ctx,
162 LLVMValueRef list, LLVMValueRef index,
163 enum ac_descriptor_type desc_type,
164 bool uses_store, bool bindless)
165 {
166 LLVMBuilderRef builder = ctx->ac.builder;
167 LLVMValueRef rsrc;
168
169 if (desc_type == AC_DESC_BUFFER) {
170 index = ac_build_imad(&ctx->ac, index, LLVMConstInt(ctx->i32, 2, 0),
171 ctx->i32_1);
172 list = LLVMBuildPointerCast(builder, list,
173 ac_array_in_const32_addr_space(ctx->v4i32), "");
174 } else {
175 assert(desc_type == AC_DESC_IMAGE ||
176 desc_type == AC_DESC_FMASK);
177 }
178
179 if (bindless)
180 rsrc = ac_build_load_to_sgpr_uint_wraparound(&ctx->ac, list, index);
181 else
182 rsrc = ac_build_load_to_sgpr(&ctx->ac, list, index);
183
184 if (desc_type == AC_DESC_IMAGE && uses_store)
185 rsrc = force_dcc_off(ctx, rsrc);
186 return rsrc;
187 }
188
189 /**
190 * Load an image view, fmask view. or sampler state descriptor.
191 */
192 static LLVMValueRef si_load_sampler_desc(struct si_shader_context *ctx,
193 LLVMValueRef list, LLVMValueRef index,
194 enum ac_descriptor_type type)
195 {
196 LLVMBuilderRef builder = ctx->ac.builder;
197
198 switch (type) {
199 case AC_DESC_IMAGE:
200 /* The image is at [0:7]. */
201 index = LLVMBuildMul(builder, index, LLVMConstInt(ctx->i32, 2, 0), "");
202 break;
203 case AC_DESC_BUFFER:
204 /* The buffer is in [4:7]. */
205 index = ac_build_imad(&ctx->ac, index, LLVMConstInt(ctx->i32, 4, 0),
206 ctx->i32_1);
207 list = LLVMBuildPointerCast(builder, list,
208 ac_array_in_const32_addr_space(ctx->v4i32), "");
209 break;
210 case AC_DESC_FMASK:
211 /* The FMASK is at [8:15]. */
212 index = ac_build_imad(&ctx->ac, index, LLVMConstInt(ctx->i32, 2, 0),
213 ctx->i32_1);
214 break;
215 case AC_DESC_SAMPLER:
216 /* The sampler state is at [12:15]. */
217 index = ac_build_imad(&ctx->ac, index, LLVMConstInt(ctx->i32, 4, 0),
218 LLVMConstInt(ctx->i32, 3, 0));
219 list = LLVMBuildPointerCast(builder, list,
220 ac_array_in_const32_addr_space(ctx->v4i32), "");
221 break;
222 case AC_DESC_PLANE_0:
223 case AC_DESC_PLANE_1:
224 case AC_DESC_PLANE_2:
225 /* Only used for the multiplane image support for Vulkan. Should
226 * never be reached in radeonsi.
227 */
228 unreachable("Plane descriptor requested in radeonsi.");
229 }
230
231 return ac_build_load_to_sgpr(&ctx->ac, list, index);
232 }
233
234 static LLVMValueRef
235 si_nir_load_sampler_desc(struct ac_shader_abi *abi,
236 unsigned descriptor_set, unsigned base_index,
237 unsigned constant_index, LLVMValueRef dynamic_index,
238 enum ac_descriptor_type desc_type, bool image,
239 bool write, bool bindless)
240 {
241 struct si_shader_context *ctx = si_shader_context_from_abi(abi);
242 LLVMBuilderRef builder = ctx->ac.builder;
243 unsigned const_index = base_index + constant_index;
244
245 assert(!descriptor_set);
246 assert(desc_type <= AC_DESC_BUFFER);
247
248 if (bindless) {
249 LLVMValueRef list = ac_get_arg(&ctx->ac, ctx->bindless_samplers_and_images);
250
251 /* dynamic_index is the bindless handle */
252 if (image) {
253 /* Bindless image descriptors use 16-dword slots. */
254 dynamic_index = LLVMBuildMul(ctx->ac.builder, dynamic_index,
255 LLVMConstInt(ctx->i64, 2, 0), "");
256 /* FMASK is right after the image. */
257 if (desc_type == AC_DESC_FMASK) {
258 dynamic_index = LLVMBuildAdd(ctx->ac.builder, dynamic_index,
259 ctx->i32_1, "");
260 }
261
262 return si_load_image_desc(ctx, list, dynamic_index, desc_type,
263 write, true);
264 }
265
266 /* Since bindless handle arithmetic can contain an unsigned integer
267 * wraparound and si_load_sampler_desc assumes there isn't any,
268 * use GEP without "inbounds" (inside ac_build_pointer_add)
269 * to prevent incorrect code generation and hangs.
270 */
271 dynamic_index = LLVMBuildMul(ctx->ac.builder, dynamic_index,
272 LLVMConstInt(ctx->i64, 2, 0), "");
273 list = ac_build_pointer_add(&ctx->ac, list, dynamic_index);
274 return si_load_sampler_desc(ctx, list, ctx->i32_0, desc_type);
275 }
276
277 unsigned num_slots = image ? ctx->num_images : ctx->num_samplers;
278 assert(const_index < num_slots || dynamic_index);
279
280 LLVMValueRef list = ac_get_arg(&ctx->ac, ctx->samplers_and_images);
281 LLVMValueRef index = LLVMConstInt(ctx->ac.i32, const_index, false);
282
283 if (dynamic_index) {
284 index = LLVMBuildAdd(builder, index, dynamic_index, "");
285
286 /* From the GL_ARB_shader_image_load_store extension spec:
287 *
288 * If a shader performs an image load, store, or atomic
289 * operation using an image variable declared as an array,
290 * and if the index used to select an individual element is
291 * negative or greater than or equal to the size of the
292 * array, the results of the operation are undefined but may
293 * not lead to termination.
294 */
295 index = si_llvm_bound_index(ctx, index, num_slots);
296 }
297
298 if (image) {
299 /* FMASKs are separate from images. */
300 if (desc_type == AC_DESC_FMASK) {
301 index = LLVMBuildAdd(ctx->ac.builder, index,
302 LLVMConstInt(ctx->i32, SI_NUM_IMAGES, 0), "");
303 }
304 index = LLVMBuildSub(ctx->ac.builder,
305 LLVMConstInt(ctx->i32, SI_NUM_IMAGE_SLOTS - 1, 0),
306 index, "");
307 return si_load_image_desc(ctx, list, index, desc_type, write, false);
308 }
309
310 index = LLVMBuildAdd(ctx->ac.builder, index,
311 LLVMConstInt(ctx->i32, SI_NUM_IMAGE_SLOTS / 2, 0), "");
312 return si_load_sampler_desc(ctx, list, index, desc_type);
313 }
314
315 void si_llvm_init_resource_callbacks(struct si_shader_context *ctx)
316 {
317 ctx->abi.load_ubo = load_ubo;
318 ctx->abi.load_ssbo = load_ssbo;
319 ctx->abi.load_sampler_desc = si_nir_load_sampler_desc;
320 }