2 * Copyright 2017 Advanced Micro Devices, Inc.
5 * Permission is hereby granted, free of charge, to any person obtaining a
6 * copy of this software and associated documentation files (the "Software"),
7 * to deal in the Software without restriction, including without limitation
8 * on the rights to use, copy, modify, merge, publish, distribute, sub
9 * license, and/or sell copies of the Software, and to permit persons to whom
10 * the Software is furnished to do so, subject to the following conditions:
12 * The above copyright notice and this permission notice (including the next
13 * paragraph) shall be included in all copies or substantial portions of the
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
19 * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
20 * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
21 * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
22 * USE OR OTHER DEALINGS IN THE SOFTWARE.
25 #include <llvm/Config/llvm-config.h>
27 #include "si_shader_internal.h"
30 #include "ac_llvm_util.h"
33 * Return a value that is equal to the given i32 \p index if it lies in [0,num)
34 * or an undefined value in the same interval otherwise.
36 LLVMValueRef
si_llvm_bound_index(struct si_shader_context
*ctx
,
40 LLVMBuilderRef builder
= ctx
->ac
.builder
;
41 LLVMValueRef c_max
= LLVMConstInt(ctx
->i32
, num
- 1, 0);
44 if (util_is_power_of_two_or_zero(num
)) {
45 index
= LLVMBuildAnd(builder
, index
, c_max
, "");
47 /* In theory, this MAX pattern should result in code that is
48 * as good as the bit-wise AND above.
50 * In practice, LLVM generates worse code (at the time of
51 * writing), because its value tracking is not strong enough.
53 cc
= LLVMBuildICmp(builder
, LLVMIntULE
, index
, c_max
, "");
54 index
= LLVMBuildSelect(builder
, cc
, index
, c_max
, "");
61 * Given a 256-bit resource descriptor, force the DCC enable bit to off.
63 * At least on Tonga, executing image stores on images with DCC enabled and
64 * non-trivial can eventually lead to lockups. This can occur when an
65 * application binds an image as read-only but then uses a shader that writes
66 * to it. The OpenGL spec allows almost arbitrarily bad behavior (including
67 * program termination) in this case, but it doesn't cost much to be a bit
68 * nicer: disabling DCC in the shader still leads to undefined results but
71 static LLVMValueRef
force_dcc_off(struct si_shader_context
*ctx
,
74 if (ctx
->screen
->info
.chip_class
<= GFX7
) {
77 LLVMValueRef i32_6
= LLVMConstInt(ctx
->i32
, 6, 0);
78 LLVMValueRef i32_C
= LLVMConstInt(ctx
->i32
, C_008F28_COMPRESSION_EN
, 0);
81 tmp
= LLVMBuildExtractElement(ctx
->ac
.builder
, rsrc
, i32_6
, "");
82 tmp
= LLVMBuildAnd(ctx
->ac
.builder
, tmp
, i32_C
, "");
83 return LLVMBuildInsertElement(ctx
->ac
.builder
, rsrc
, tmp
, i32_6
, "");
87 /* AC_DESC_FMASK is handled exactly like AC_DESC_IMAGE. The caller should
88 * adjust "index" to point to FMASK. */
89 LLVMValueRef
si_load_image_desc(struct si_shader_context
*ctx
,
90 LLVMValueRef list
, LLVMValueRef index
,
91 enum ac_descriptor_type desc_type
,
92 bool uses_store
, bool bindless
)
94 LLVMBuilderRef builder
= ctx
->ac
.builder
;
97 if (desc_type
== AC_DESC_BUFFER
) {
98 index
= ac_build_imad(&ctx
->ac
, index
, LLVMConstInt(ctx
->i32
, 2, 0),
100 list
= LLVMBuildPointerCast(builder
, list
,
101 ac_array_in_const32_addr_space(ctx
->v4i32
), "");
103 assert(desc_type
== AC_DESC_IMAGE
||
104 desc_type
== AC_DESC_FMASK
);
108 rsrc
= ac_build_load_to_sgpr_uint_wraparound(&ctx
->ac
, list
, index
);
110 rsrc
= ac_build_load_to_sgpr(&ctx
->ac
, list
, index
);
112 if (desc_type
== AC_DESC_IMAGE
&& uses_store
)
113 rsrc
= force_dcc_off(ctx
, rsrc
);
118 * Load an image view, fmask view. or sampler state descriptor.
120 LLVMValueRef
si_load_sampler_desc(struct si_shader_context
*ctx
,
121 LLVMValueRef list
, LLVMValueRef index
,
122 enum ac_descriptor_type type
)
124 LLVMBuilderRef builder
= ctx
->ac
.builder
;
128 /* The image is at [0:7]. */
129 index
= LLVMBuildMul(builder
, index
, LLVMConstInt(ctx
->i32
, 2, 0), "");
132 /* The buffer is in [4:7]. */
133 index
= ac_build_imad(&ctx
->ac
, index
, LLVMConstInt(ctx
->i32
, 4, 0),
135 list
= LLVMBuildPointerCast(builder
, list
,
136 ac_array_in_const32_addr_space(ctx
->v4i32
), "");
139 /* The FMASK is at [8:15]. */
140 index
= ac_build_imad(&ctx
->ac
, index
, LLVMConstInt(ctx
->i32
, 2, 0),
143 case AC_DESC_SAMPLER
:
144 /* The sampler state is at [12:15]. */
145 index
= ac_build_imad(&ctx
->ac
, index
, LLVMConstInt(ctx
->i32
, 4, 0),
146 LLVMConstInt(ctx
->i32
, 3, 0));
147 list
= LLVMBuildPointerCast(builder
, list
,
148 ac_array_in_const32_addr_space(ctx
->v4i32
), "");
150 case AC_DESC_PLANE_0
:
151 case AC_DESC_PLANE_1
:
152 case AC_DESC_PLANE_2
:
153 /* Only used for the multiplane image support for Vulkan. Should
154 * never be reached in radeonsi.
156 unreachable("Plane descriptor requested in radeonsi.");
159 return ac_build_load_to_sgpr(&ctx
->ac
, list
, index
);
162 LLVMValueRef
si_nir_emit_fbfetch(struct ac_shader_abi
*abi
)
164 struct si_shader_context
*ctx
= si_shader_context_from_abi(abi
);
165 struct ac_image_args args
= {};
166 LLVMValueRef ptr
, image
, fmask
;
168 /* Ignore src0, because KHR_blend_func_extended disallows multiple render
172 /* Load the image descriptor. */
173 STATIC_ASSERT(SI_PS_IMAGE_COLORBUF0
% 2 == 0);
174 ptr
= ac_get_arg(&ctx
->ac
, ctx
->rw_buffers
);
175 ptr
= LLVMBuildPointerCast(ctx
->ac
.builder
, ptr
,
176 ac_array_in_const32_addr_space(ctx
->v8i32
), "");
177 image
= ac_build_load_to_sgpr(&ctx
->ac
, ptr
,
178 LLVMConstInt(ctx
->i32
, SI_PS_IMAGE_COLORBUF0
/ 2, 0));
182 args
.coords
[chan
++] = si_unpack_param(ctx
, ctx
->pos_fixed_pt
, 0, 16);
184 if (!ctx
->shader
->key
.mono
.u
.ps
.fbfetch_is_1D
)
185 args
.coords
[chan
++] = si_unpack_param(ctx
, ctx
->pos_fixed_pt
, 16, 16);
187 /* Get the current render target layer index. */
188 if (ctx
->shader
->key
.mono
.u
.ps
.fbfetch_layered
)
189 args
.coords
[chan
++] = si_unpack_param(ctx
, ctx
->args
.ancillary
, 16, 11);
191 if (ctx
->shader
->key
.mono
.u
.ps
.fbfetch_msaa
)
192 args
.coords
[chan
++] = si_get_sample_id(ctx
);
194 if (ctx
->shader
->key
.mono
.u
.ps
.fbfetch_msaa
&&
195 !(ctx
->screen
->debug_flags
& DBG(NO_FMASK
))) {
196 fmask
= ac_build_load_to_sgpr(&ctx
->ac
, ptr
,
197 LLVMConstInt(ctx
->i32
, SI_PS_IMAGE_COLORBUF0_FMASK
/ 2, 0));
199 ac_apply_fmask_to_sample(&ctx
->ac
, fmask
, args
.coords
,
200 ctx
->shader
->key
.mono
.u
.ps
.fbfetch_layered
);
203 args
.opcode
= ac_image_load
;
204 args
.resource
= image
;
206 args
.attributes
= AC_FUNC_ATTR_READNONE
;
208 if (ctx
->shader
->key
.mono
.u
.ps
.fbfetch_msaa
)
209 args
.dim
= ctx
->shader
->key
.mono
.u
.ps
.fbfetch_layered
?
210 ac_image_2darraymsaa
: ac_image_2dmsaa
;
211 else if (ctx
->shader
->key
.mono
.u
.ps
.fbfetch_is_1D
)
212 args
.dim
= ctx
->shader
->key
.mono
.u
.ps
.fbfetch_layered
?
213 ac_image_1darray
: ac_image_1d
;
215 args
.dim
= ctx
->shader
->key
.mono
.u
.ps
.fbfetch_layered
?
216 ac_image_2darray
: ac_image_2d
;
218 return ac_build_image_opcode(&ctx
->ac
, &args
);