2 * Copyright 2017 Advanced Micro Devices, Inc.
5 * Permission is hereby granted, free of charge, to any person obtaining a
6 * copy of this software and associated documentation files (the "Software"),
7 * to deal in the Software without restriction, including without limitation
8 * on the rights to use, copy, modify, merge, publish, distribute, sub
9 * license, and/or sell copies of the Software, and to permit persons to whom
10 * the Software is furnished to do so, subject to the following conditions:
12 * The above copyright notice and this permission notice (including the next
13 * paragraph) shall be included in all copies or substantial portions of the
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
19 * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
20 * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
21 * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
22 * USE OR OTHER DEALINGS IN THE SOFTWARE.
25 #include <llvm/Config/llvm-config.h>
27 #include "si_shader_internal.h"
30 #include "ac_llvm_util.h"
33 * Return a value that is equal to the given i32 \p index if it lies in [0,num)
34 * or an undefined value in the same interval otherwise.
36 LLVMValueRef
si_llvm_bound_index(struct si_shader_context
*ctx
,
40 LLVMBuilderRef builder
= ctx
->ac
.builder
;
41 LLVMValueRef c_max
= LLVMConstInt(ctx
->i32
, num
- 1, 0);
44 if (util_is_power_of_two_or_zero(num
)) {
45 index
= LLVMBuildAnd(builder
, index
, c_max
, "");
47 /* In theory, this MAX pattern should result in code that is
48 * as good as the bit-wise AND above.
50 * In practice, LLVM generates worse code (at the time of
51 * writing), because its value tracking is not strong enough.
53 cc
= LLVMBuildICmp(builder
, LLVMIntULE
, index
, c_max
, "");
54 index
= LLVMBuildSelect(builder
, cc
, index
, c_max
, "");
61 * Given a 256-bit resource descriptor, force the DCC enable bit to off.
63 * At least on Tonga, executing image stores on images with DCC enabled and
64 * non-trivial can eventually lead to lockups. This can occur when an
65 * application binds an image as read-only but then uses a shader that writes
66 * to it. The OpenGL spec allows almost arbitrarily bad behavior (including
67 * program termination) in this case, but it doesn't cost much to be a bit
68 * nicer: disabling DCC in the shader still leads to undefined results but
71 static LLVMValueRef
force_dcc_off(struct si_shader_context
*ctx
,
74 if (ctx
->screen
->info
.chip_class
<= GFX7
) {
77 LLVMValueRef i32_6
= LLVMConstInt(ctx
->i32
, 6, 0);
78 LLVMValueRef i32_C
= LLVMConstInt(ctx
->i32
, C_008F28_COMPRESSION_EN
, 0);
81 tmp
= LLVMBuildExtractElement(ctx
->ac
.builder
, rsrc
, i32_6
, "");
82 tmp
= LLVMBuildAnd(ctx
->ac
.builder
, tmp
, i32_C
, "");
83 return LLVMBuildInsertElement(ctx
->ac
.builder
, rsrc
, tmp
, i32_6
, "");
87 /* AC_DESC_FMASK is handled exactly like AC_DESC_IMAGE. The caller should
88 * adjust "index" to point to FMASK. */
89 LLVMValueRef
si_load_image_desc(struct si_shader_context
*ctx
,
90 LLVMValueRef list
, LLVMValueRef index
,
91 enum ac_descriptor_type desc_type
,
92 bool uses_store
, bool bindless
)
94 LLVMBuilderRef builder
= ctx
->ac
.builder
;
97 if (desc_type
== AC_DESC_BUFFER
) {
98 index
= ac_build_imad(&ctx
->ac
, index
, LLVMConstInt(ctx
->i32
, 2, 0),
100 list
= LLVMBuildPointerCast(builder
, list
,
101 ac_array_in_const32_addr_space(ctx
->v4i32
), "");
103 assert(desc_type
== AC_DESC_IMAGE
||
104 desc_type
== AC_DESC_FMASK
);
108 rsrc
= ac_build_load_to_sgpr_uint_wraparound(&ctx
->ac
, list
, index
);
110 rsrc
= ac_build_load_to_sgpr(&ctx
->ac
, list
, index
);
112 if (desc_type
== AC_DESC_IMAGE
&& uses_store
)
113 rsrc
= force_dcc_off(ctx
, rsrc
);
118 * Load an image view, fmask view. or sampler state descriptor.
120 LLVMValueRef
si_load_sampler_desc(struct si_shader_context
*ctx
,
121 LLVMValueRef list
, LLVMValueRef index
,
122 enum ac_descriptor_type type
)
124 LLVMBuilderRef builder
= ctx
->ac
.builder
;
128 /* The image is at [0:7]. */
129 index
= LLVMBuildMul(builder
, index
, LLVMConstInt(ctx
->i32
, 2, 0), "");
132 /* The buffer is in [4:7]. */
133 index
= ac_build_imad(&ctx
->ac
, index
, LLVMConstInt(ctx
->i32
, 4, 0),
135 list
= LLVMBuildPointerCast(builder
, list
,
136 ac_array_in_const32_addr_space(ctx
->v4i32
), "");
139 /* The FMASK is at [8:15]. */
140 index
= ac_build_imad(&ctx
->ac
, index
, LLVMConstInt(ctx
->i32
, 2, 0),
143 case AC_DESC_SAMPLER
:
144 /* The sampler state is at [12:15]. */
145 index
= ac_build_imad(&ctx
->ac
, index
, LLVMConstInt(ctx
->i32
, 4, 0),
146 LLVMConstInt(ctx
->i32
, 3, 0));
147 list
= LLVMBuildPointerCast(builder
, list
,
148 ac_array_in_const32_addr_space(ctx
->v4i32
), "");
150 case AC_DESC_PLANE_0
:
151 case AC_DESC_PLANE_1
:
152 case AC_DESC_PLANE_2
:
153 /* Only used for the multiplane image support for Vulkan. Should
154 * never be reached in radeonsi.
156 unreachable("Plane descriptor requested in radeonsi.");
159 return ac_build_load_to_sgpr(&ctx
->ac
, list
, index
);
163 * Load a dword from a constant buffer.
165 LLVMValueRef
si_buffer_load_const(struct si_shader_context
*ctx
,
166 LLVMValueRef resource
, LLVMValueRef offset
)
168 return ac_build_buffer_load(&ctx
->ac
, resource
, 1, NULL
, offset
, NULL
,
172 void si_llvm_build_ret(struct si_shader_context
*ctx
, LLVMValueRef ret
)
174 if (LLVMGetTypeKind(LLVMTypeOf(ret
)) == LLVMVoidTypeKind
)
175 LLVMBuildRetVoid(ctx
->ac
.builder
);
177 LLVMBuildRet(ctx
->ac
.builder
, ret
);
180 LLVMValueRef
si_insert_input_ret(struct si_shader_context
*ctx
, LLVMValueRef ret
,
181 struct ac_arg param
, unsigned return_index
)
183 return LLVMBuildInsertValue(ctx
->ac
.builder
, ret
,
184 ac_get_arg(&ctx
->ac
, param
),
188 LLVMValueRef
si_insert_input_ret_float(struct si_shader_context
*ctx
, LLVMValueRef ret
,
189 struct ac_arg param
, unsigned return_index
)
191 LLVMBuilderRef builder
= ctx
->ac
.builder
;
192 LLVMValueRef p
= ac_get_arg(&ctx
->ac
, param
);
194 return LLVMBuildInsertValue(builder
, ret
,
195 ac_to_float(&ctx
->ac
, p
),
199 LLVMValueRef
si_insert_input_ptr(struct si_shader_context
*ctx
, LLVMValueRef ret
,
200 struct ac_arg param
, unsigned return_index
)
202 LLVMBuilderRef builder
= ctx
->ac
.builder
;
203 LLVMValueRef ptr
= ac_get_arg(&ctx
->ac
, param
);
204 ptr
= LLVMBuildPtrToInt(builder
, ptr
, ctx
->i32
, "");
205 return LLVMBuildInsertValue(builder
, ret
, ptr
, return_index
, "");
208 LLVMValueRef
si_prolog_get_rw_buffers(struct si_shader_context
*ctx
)
210 LLVMValueRef ptr
[2], list
;
211 bool merged_shader
= si_is_merged_shader(ctx
);
213 ptr
[0] = LLVMGetParam(ctx
->main_fn
, (merged_shader
? 8 : 0) + SI_SGPR_RW_BUFFERS
);
214 list
= LLVMBuildIntToPtr(ctx
->ac
.builder
, ptr
[0],
215 ac_array_in_const32_addr_space(ctx
->v4i32
), "");