radeonsi: check ctx->sdma_cs before using it
[mesa.git] / src / gallium / drivers / radeonsi / si_shader_llvm_build.c
1 /*
2 * Copyright 2017 Advanced Micro Devices, Inc.
3 * All Rights Reserved.
4 *
5 * Permission is hereby granted, free of charge, to any person obtaining a
6 * copy of this software and associated documentation files (the "Software"),
7 * to deal in the Software without restriction, including without limitation
8 * on the rights to use, copy, modify, merge, publish, distribute, sub
9 * license, and/or sell copies of the Software, and to permit persons to whom
10 * the Software is furnished to do so, subject to the following conditions:
11 *
12 * The above copyright notice and this permission notice (including the next
13 * paragraph) shall be included in all copies or substantial portions of the
14 * Software.
15 *
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
19 * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
20 * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
21 * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
22 * USE OR OTHER DEALINGS IN THE SOFTWARE.
23 */
24
25 #include <llvm/Config/llvm-config.h>
26
27 #include "si_shader_internal.h"
28 #include "si_pipe.h"
29 #include "sid.h"
30 #include "ac_llvm_util.h"
31
32 /**
33 * Return a value that is equal to the given i32 \p index if it lies in [0,num)
34 * or an undefined value in the same interval otherwise.
35 */
36 LLVMValueRef si_llvm_bound_index(struct si_shader_context *ctx,
37 LLVMValueRef index,
38 unsigned num)
39 {
40 LLVMBuilderRef builder = ctx->ac.builder;
41 LLVMValueRef c_max = LLVMConstInt(ctx->i32, num - 1, 0);
42 LLVMValueRef cc;
43
44 if (util_is_power_of_two_or_zero(num)) {
45 index = LLVMBuildAnd(builder, index, c_max, "");
46 } else {
47 /* In theory, this MAX pattern should result in code that is
48 * as good as the bit-wise AND above.
49 *
50 * In practice, LLVM generates worse code (at the time of
51 * writing), because its value tracking is not strong enough.
52 */
53 cc = LLVMBuildICmp(builder, LLVMIntULE, index, c_max, "");
54 index = LLVMBuildSelect(builder, cc, index, c_max, "");
55 }
56
57 return index;
58 }
59
60 /**
61 * Given a 256-bit resource descriptor, force the DCC enable bit to off.
62 *
63 * At least on Tonga, executing image stores on images with DCC enabled and
64 * non-trivial can eventually lead to lockups. This can occur when an
65 * application binds an image as read-only but then uses a shader that writes
66 * to it. The OpenGL spec allows almost arbitrarily bad behavior (including
67 * program termination) in this case, but it doesn't cost much to be a bit
68 * nicer: disabling DCC in the shader still leads to undefined results but
69 * avoids the lockup.
70 */
71 static LLVMValueRef force_dcc_off(struct si_shader_context *ctx,
72 LLVMValueRef rsrc)
73 {
74 if (ctx->screen->info.chip_class <= GFX7) {
75 return rsrc;
76 } else {
77 LLVMValueRef i32_6 = LLVMConstInt(ctx->i32, 6, 0);
78 LLVMValueRef i32_C = LLVMConstInt(ctx->i32, C_008F28_COMPRESSION_EN, 0);
79 LLVMValueRef tmp;
80
81 tmp = LLVMBuildExtractElement(ctx->ac.builder, rsrc, i32_6, "");
82 tmp = LLVMBuildAnd(ctx->ac.builder, tmp, i32_C, "");
83 return LLVMBuildInsertElement(ctx->ac.builder, rsrc, tmp, i32_6, "");
84 }
85 }
86
87 /* AC_DESC_FMASK is handled exactly like AC_DESC_IMAGE. The caller should
88 * adjust "index" to point to FMASK. */
89 LLVMValueRef si_load_image_desc(struct si_shader_context *ctx,
90 LLVMValueRef list, LLVMValueRef index,
91 enum ac_descriptor_type desc_type,
92 bool uses_store, bool bindless)
93 {
94 LLVMBuilderRef builder = ctx->ac.builder;
95 LLVMValueRef rsrc;
96
97 if (desc_type == AC_DESC_BUFFER) {
98 index = ac_build_imad(&ctx->ac, index, LLVMConstInt(ctx->i32, 2, 0),
99 ctx->i32_1);
100 list = LLVMBuildPointerCast(builder, list,
101 ac_array_in_const32_addr_space(ctx->v4i32), "");
102 } else {
103 assert(desc_type == AC_DESC_IMAGE ||
104 desc_type == AC_DESC_FMASK);
105 }
106
107 if (bindless)
108 rsrc = ac_build_load_to_sgpr_uint_wraparound(&ctx->ac, list, index);
109 else
110 rsrc = ac_build_load_to_sgpr(&ctx->ac, list, index);
111
112 if (desc_type == AC_DESC_IMAGE && uses_store)
113 rsrc = force_dcc_off(ctx, rsrc);
114 return rsrc;
115 }
116
117 /**
118 * Load an image view, fmask view. or sampler state descriptor.
119 */
120 LLVMValueRef si_load_sampler_desc(struct si_shader_context *ctx,
121 LLVMValueRef list, LLVMValueRef index,
122 enum ac_descriptor_type type)
123 {
124 LLVMBuilderRef builder = ctx->ac.builder;
125
126 switch (type) {
127 case AC_DESC_IMAGE:
128 /* The image is at [0:7]. */
129 index = LLVMBuildMul(builder, index, LLVMConstInt(ctx->i32, 2, 0), "");
130 break;
131 case AC_DESC_BUFFER:
132 /* The buffer is in [4:7]. */
133 index = ac_build_imad(&ctx->ac, index, LLVMConstInt(ctx->i32, 4, 0),
134 ctx->i32_1);
135 list = LLVMBuildPointerCast(builder, list,
136 ac_array_in_const32_addr_space(ctx->v4i32), "");
137 break;
138 case AC_DESC_FMASK:
139 /* The FMASK is at [8:15]. */
140 index = ac_build_imad(&ctx->ac, index, LLVMConstInt(ctx->i32, 2, 0),
141 ctx->i32_1);
142 break;
143 case AC_DESC_SAMPLER:
144 /* The sampler state is at [12:15]. */
145 index = ac_build_imad(&ctx->ac, index, LLVMConstInt(ctx->i32, 4, 0),
146 LLVMConstInt(ctx->i32, 3, 0));
147 list = LLVMBuildPointerCast(builder, list,
148 ac_array_in_const32_addr_space(ctx->v4i32), "");
149 break;
150 case AC_DESC_PLANE_0:
151 case AC_DESC_PLANE_1:
152 case AC_DESC_PLANE_2:
153 /* Only used for the multiplane image support for Vulkan. Should
154 * never be reached in radeonsi.
155 */
156 unreachable("Plane descriptor requested in radeonsi.");
157 }
158
159 return ac_build_load_to_sgpr(&ctx->ac, list, index);
160 }
161
162 LLVMValueRef si_nir_emit_fbfetch(struct ac_shader_abi *abi)
163 {
164 struct si_shader_context *ctx = si_shader_context_from_abi(abi);
165 struct ac_image_args args = {};
166 LLVMValueRef ptr, image, fmask;
167
168 /* Ignore src0, because KHR_blend_func_extended disallows multiple render
169 * targets.
170 */
171
172 /* Load the image descriptor. */
173 STATIC_ASSERT(SI_PS_IMAGE_COLORBUF0 % 2 == 0);
174 ptr = ac_get_arg(&ctx->ac, ctx->rw_buffers);
175 ptr = LLVMBuildPointerCast(ctx->ac.builder, ptr,
176 ac_array_in_const32_addr_space(ctx->v8i32), "");
177 image = ac_build_load_to_sgpr(&ctx->ac, ptr,
178 LLVMConstInt(ctx->i32, SI_PS_IMAGE_COLORBUF0 / 2, 0));
179
180 unsigned chan = 0;
181
182 args.coords[chan++] = si_unpack_param(ctx, ctx->pos_fixed_pt, 0, 16);
183
184 if (!ctx->shader->key.mono.u.ps.fbfetch_is_1D)
185 args.coords[chan++] = si_unpack_param(ctx, ctx->pos_fixed_pt, 16, 16);
186
187 /* Get the current render target layer index. */
188 if (ctx->shader->key.mono.u.ps.fbfetch_layered)
189 args.coords[chan++] = si_unpack_param(ctx, ctx->args.ancillary, 16, 11);
190
191 if (ctx->shader->key.mono.u.ps.fbfetch_msaa)
192 args.coords[chan++] = si_get_sample_id(ctx);
193
194 if (ctx->shader->key.mono.u.ps.fbfetch_msaa &&
195 !(ctx->screen->debug_flags & DBG(NO_FMASK))) {
196 fmask = ac_build_load_to_sgpr(&ctx->ac, ptr,
197 LLVMConstInt(ctx->i32, SI_PS_IMAGE_COLORBUF0_FMASK / 2, 0));
198
199 ac_apply_fmask_to_sample(&ctx->ac, fmask, args.coords,
200 ctx->shader->key.mono.u.ps.fbfetch_layered);
201 }
202
203 args.opcode = ac_image_load;
204 args.resource = image;
205 args.dmask = 0xf;
206 args.attributes = AC_FUNC_ATTR_READNONE;
207
208 if (ctx->shader->key.mono.u.ps.fbfetch_msaa)
209 args.dim = ctx->shader->key.mono.u.ps.fbfetch_layered ?
210 ac_image_2darraymsaa : ac_image_2dmsaa;
211 else if (ctx->shader->key.mono.u.ps.fbfetch_is_1D)
212 args.dim = ctx->shader->key.mono.u.ps.fbfetch_layered ?
213 ac_image_1darray : ac_image_1d;
214 else
215 args.dim = ctx->shader->key.mono.u.ps.fbfetch_layered ?
216 ac_image_2darray : ac_image_2d;
217
218 return ac_build_image_opcode(&ctx->ac, &args);
219 }