2 * Copyright © 2016 Bas Nieuwenhuizen
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
24 #include "ac_nir_to_llvm.h"
25 #include "ac_llvm_build.h"
26 #include "ac_llvm_util.h"
27 #include "ac_binary.h"
30 #include "util/bitscan.h"
31 #include "ac_shader_abi.h"
32 #include "ac_shader_util.h"
34 struct ac_nir_context
{
35 struct ac_llvm_context ac
;
36 struct ac_shader_abi
*abi
;
38 gl_shader_stage stage
;
40 struct hash_table
*defs
;
41 struct hash_table
*phis
;
42 struct hash_table
*vars
;
44 LLVMValueRef main_function
;
45 LLVMBasicBlockRef continue_block
;
46 LLVMBasicBlockRef break_block
;
52 static LLVMValueRef
get_sampler_desc(struct ac_nir_context
*ctx
,
53 const nir_deref_var
*deref
,
54 enum ac_descriptor_type desc_type
,
55 const nir_tex_instr
*instr
,
56 bool image
, bool write
);
59 build_store_values_extended(struct ac_llvm_context
*ac
,
62 unsigned value_stride
,
65 LLVMBuilderRef builder
= ac
->builder
;
68 for (i
= 0; i
< value_count
; i
++) {
69 LLVMValueRef ptr
= values
[i
* value_stride
];
70 LLVMValueRef index
= LLVMConstInt(ac
->i32
, i
, false);
71 LLVMValueRef value
= LLVMBuildExtractElement(builder
, vec
, index
, "");
72 LLVMBuildStore(builder
, value
, ptr
);
76 static LLVMTypeRef
get_def_type(struct ac_nir_context
*ctx
,
77 const nir_ssa_def
*def
)
79 LLVMTypeRef type
= LLVMIntTypeInContext(ctx
->ac
.context
, def
->bit_size
);
80 if (def
->num_components
> 1) {
81 type
= LLVMVectorType(type
, def
->num_components
);
86 static LLVMValueRef
get_src(struct ac_nir_context
*nir
, nir_src src
)
89 struct hash_entry
*entry
= _mesa_hash_table_search(nir
->defs
, src
.ssa
);
90 return (LLVMValueRef
)entry
->data
;
94 get_memory_ptr(struct ac_nir_context
*ctx
, nir_src src
)
96 LLVMValueRef ptr
= get_src(ctx
, src
);
97 ptr
= LLVMBuildGEP(ctx
->ac
.builder
, ctx
->ac
.lds
, &ptr
, 1, "");
98 int addr_space
= LLVMGetPointerAddressSpace(LLVMTypeOf(ptr
));
100 return LLVMBuildBitCast(ctx
->ac
.builder
, ptr
,
101 LLVMPointerType(ctx
->ac
.i32
, addr_space
), "");
104 static LLVMBasicBlockRef
get_block(struct ac_nir_context
*nir
,
105 const struct nir_block
*b
)
107 struct hash_entry
*entry
= _mesa_hash_table_search(nir
->defs
, b
);
108 return (LLVMBasicBlockRef
)entry
->data
;
111 static LLVMValueRef
get_alu_src(struct ac_nir_context
*ctx
,
113 unsigned num_components
)
115 LLVMValueRef value
= get_src(ctx
, src
.src
);
116 bool need_swizzle
= false;
119 unsigned src_components
= ac_get_llvm_num_components(value
);
120 for (unsigned i
= 0; i
< num_components
; ++i
) {
121 assert(src
.swizzle
[i
] < src_components
);
122 if (src
.swizzle
[i
] != i
)
126 if (need_swizzle
|| num_components
!= src_components
) {
127 LLVMValueRef masks
[] = {
128 LLVMConstInt(ctx
->ac
.i32
, src
.swizzle
[0], false),
129 LLVMConstInt(ctx
->ac
.i32
, src
.swizzle
[1], false),
130 LLVMConstInt(ctx
->ac
.i32
, src
.swizzle
[2], false),
131 LLVMConstInt(ctx
->ac
.i32
, src
.swizzle
[3], false)};
133 if (src_components
> 1 && num_components
== 1) {
134 value
= LLVMBuildExtractElement(ctx
->ac
.builder
, value
,
136 } else if (src_components
== 1 && num_components
> 1) {
137 LLVMValueRef values
[] = {value
, value
, value
, value
};
138 value
= ac_build_gather_values(&ctx
->ac
, values
, num_components
);
140 LLVMValueRef swizzle
= LLVMConstVector(masks
, num_components
);
141 value
= LLVMBuildShuffleVector(ctx
->ac
.builder
, value
, value
,
150 static LLVMValueRef
emit_int_cmp(struct ac_llvm_context
*ctx
,
151 LLVMIntPredicate pred
, LLVMValueRef src0
,
154 LLVMValueRef result
= LLVMBuildICmp(ctx
->builder
, pred
, src0
, src1
, "");
155 return LLVMBuildSelect(ctx
->builder
, result
,
156 LLVMConstInt(ctx
->i32
, 0xFFFFFFFF, false),
160 static LLVMValueRef
emit_float_cmp(struct ac_llvm_context
*ctx
,
161 LLVMRealPredicate pred
, LLVMValueRef src0
,
165 src0
= ac_to_float(ctx
, src0
);
166 src1
= ac_to_float(ctx
, src1
);
167 result
= LLVMBuildFCmp(ctx
->builder
, pred
, src0
, src1
, "");
168 return LLVMBuildSelect(ctx
->builder
, result
,
169 LLVMConstInt(ctx
->i32
, 0xFFFFFFFF, false),
173 static LLVMValueRef
emit_intrin_1f_param(struct ac_llvm_context
*ctx
,
175 LLVMTypeRef result_type
,
179 LLVMValueRef params
[] = {
180 ac_to_float(ctx
, src0
),
183 MAYBE_UNUSED
const int length
= snprintf(name
, sizeof(name
), "%s.f%d", intrin
,
184 ac_get_elem_bits(ctx
, result_type
));
185 assert(length
< sizeof(name
));
186 return ac_build_intrinsic(ctx
, name
, result_type
, params
, 1, AC_FUNC_ATTR_READNONE
);
189 static LLVMValueRef
emit_intrin_2f_param(struct ac_llvm_context
*ctx
,
191 LLVMTypeRef result_type
,
192 LLVMValueRef src0
, LLVMValueRef src1
)
195 LLVMValueRef params
[] = {
196 ac_to_float(ctx
, src0
),
197 ac_to_float(ctx
, src1
),
200 MAYBE_UNUSED
const int length
= snprintf(name
, sizeof(name
), "%s.f%d", intrin
,
201 ac_get_elem_bits(ctx
, result_type
));
202 assert(length
< sizeof(name
));
203 return ac_build_intrinsic(ctx
, name
, result_type
, params
, 2, AC_FUNC_ATTR_READNONE
);
206 static LLVMValueRef
emit_intrin_3f_param(struct ac_llvm_context
*ctx
,
208 LLVMTypeRef result_type
,
209 LLVMValueRef src0
, LLVMValueRef src1
, LLVMValueRef src2
)
212 LLVMValueRef params
[] = {
213 ac_to_float(ctx
, src0
),
214 ac_to_float(ctx
, src1
),
215 ac_to_float(ctx
, src2
),
218 MAYBE_UNUSED
const int length
= snprintf(name
, sizeof(name
), "%s.f%d", intrin
,
219 ac_get_elem_bits(ctx
, result_type
));
220 assert(length
< sizeof(name
));
221 return ac_build_intrinsic(ctx
, name
, result_type
, params
, 3, AC_FUNC_ATTR_READNONE
);
224 static LLVMValueRef
emit_bcsel(struct ac_llvm_context
*ctx
,
225 LLVMValueRef src0
, LLVMValueRef src1
, LLVMValueRef src2
)
227 LLVMValueRef v
= LLVMBuildICmp(ctx
->builder
, LLVMIntNE
, src0
,
229 return LLVMBuildSelect(ctx
->builder
, v
, ac_to_integer(ctx
, src1
),
230 ac_to_integer(ctx
, src2
), "");
233 static LLVMValueRef
emit_minmax_int(struct ac_llvm_context
*ctx
,
234 LLVMIntPredicate pred
,
235 LLVMValueRef src0
, LLVMValueRef src1
)
237 return LLVMBuildSelect(ctx
->builder
,
238 LLVMBuildICmp(ctx
->builder
, pred
, src0
, src1
, ""),
243 static LLVMValueRef
emit_iabs(struct ac_llvm_context
*ctx
,
246 return emit_minmax_int(ctx
, LLVMIntSGT
, src0
,
247 LLVMBuildNeg(ctx
->builder
, src0
, ""));
250 static LLVMValueRef
emit_uint_carry(struct ac_llvm_context
*ctx
,
252 LLVMValueRef src0
, LLVMValueRef src1
)
254 LLVMTypeRef ret_type
;
255 LLVMTypeRef types
[] = { ctx
->i32
, ctx
->i1
};
257 LLVMValueRef params
[] = { src0
, src1
};
258 ret_type
= LLVMStructTypeInContext(ctx
->context
, types
,
261 res
= ac_build_intrinsic(ctx
, intrin
, ret_type
,
262 params
, 2, AC_FUNC_ATTR_READNONE
);
264 res
= LLVMBuildExtractValue(ctx
->builder
, res
, 1, "");
265 res
= LLVMBuildZExt(ctx
->builder
, res
, ctx
->i32
, "");
269 static LLVMValueRef
emit_b2f(struct ac_llvm_context
*ctx
,
272 return LLVMBuildAnd(ctx
->builder
, src0
, LLVMBuildBitCast(ctx
->builder
, LLVMConstReal(ctx
->f32
, 1.0), ctx
->i32
, ""), "");
275 static LLVMValueRef
emit_f2b(struct ac_llvm_context
*ctx
,
278 src0
= ac_to_float(ctx
, src0
);
279 LLVMValueRef zero
= LLVMConstNull(LLVMTypeOf(src0
));
280 return LLVMBuildSExt(ctx
->builder
,
281 LLVMBuildFCmp(ctx
->builder
, LLVMRealUNE
, src0
, zero
, ""),
285 static LLVMValueRef
emit_b2i(struct ac_llvm_context
*ctx
,
289 LLVMValueRef result
= LLVMBuildAnd(ctx
->builder
, src0
, ctx
->i32_1
, "");
294 return LLVMBuildZExt(ctx
->builder
, result
, ctx
->i64
, "");
297 static LLVMValueRef
emit_i2b(struct ac_llvm_context
*ctx
,
300 LLVMValueRef zero
= LLVMConstNull(LLVMTypeOf(src0
));
301 return LLVMBuildSExt(ctx
->builder
,
302 LLVMBuildICmp(ctx
->builder
, LLVMIntNE
, src0
, zero
, ""),
306 static LLVMValueRef
emit_f2f16(struct ac_llvm_context
*ctx
,
310 LLVMValueRef cond
= NULL
;
312 src0
= ac_to_float(ctx
, src0
);
313 result
= LLVMBuildFPTrunc(ctx
->builder
, src0
, ctx
->f16
, "");
315 if (ctx
->chip_class
>= VI
) {
316 LLVMValueRef args
[2];
317 /* Check if the result is a denormal - and flush to 0 if so. */
319 args
[1] = LLVMConstInt(ctx
->i32
, N_SUBNORMAL
| P_SUBNORMAL
, false);
320 cond
= ac_build_intrinsic(ctx
, "llvm.amdgcn.class.f16", ctx
->i1
, args
, 2, AC_FUNC_ATTR_READNONE
);
323 /* need to convert back up to f32 */
324 result
= LLVMBuildFPExt(ctx
->builder
, result
, ctx
->f32
, "");
326 if (ctx
->chip_class
>= VI
)
327 result
= LLVMBuildSelect(ctx
->builder
, cond
, ctx
->f32_0
, result
, "");
330 /* 0x38800000 is smallest half float value (2^-14) in 32-bit float,
331 * so compare the result and flush to 0 if it's smaller.
333 LLVMValueRef temp
, cond2
;
334 temp
= emit_intrin_1f_param(ctx
, "llvm.fabs", ctx
->f32
, result
);
335 cond
= LLVMBuildFCmp(ctx
->builder
, LLVMRealUGT
,
336 LLVMBuildBitCast(ctx
->builder
, LLVMConstInt(ctx
->i32
, 0x38800000, false), ctx
->f32
, ""),
338 cond2
= LLVMBuildFCmp(ctx
->builder
, LLVMRealUNE
,
339 temp
, ctx
->f32_0
, "");
340 cond
= LLVMBuildAnd(ctx
->builder
, cond
, cond2
, "");
341 result
= LLVMBuildSelect(ctx
->builder
, cond
, ctx
->f32_0
, result
, "");
346 static LLVMValueRef
emit_umul_high(struct ac_llvm_context
*ctx
,
347 LLVMValueRef src0
, LLVMValueRef src1
)
349 LLVMValueRef dst64
, result
;
350 src0
= LLVMBuildZExt(ctx
->builder
, src0
, ctx
->i64
, "");
351 src1
= LLVMBuildZExt(ctx
->builder
, src1
, ctx
->i64
, "");
353 dst64
= LLVMBuildMul(ctx
->builder
, src0
, src1
, "");
354 dst64
= LLVMBuildLShr(ctx
->builder
, dst64
, LLVMConstInt(ctx
->i64
, 32, false), "");
355 result
= LLVMBuildTrunc(ctx
->builder
, dst64
, ctx
->i32
, "");
359 static LLVMValueRef
emit_imul_high(struct ac_llvm_context
*ctx
,
360 LLVMValueRef src0
, LLVMValueRef src1
)
362 LLVMValueRef dst64
, result
;
363 src0
= LLVMBuildSExt(ctx
->builder
, src0
, ctx
->i64
, "");
364 src1
= LLVMBuildSExt(ctx
->builder
, src1
, ctx
->i64
, "");
366 dst64
= LLVMBuildMul(ctx
->builder
, src0
, src1
, "");
367 dst64
= LLVMBuildAShr(ctx
->builder
, dst64
, LLVMConstInt(ctx
->i64
, 32, false), "");
368 result
= LLVMBuildTrunc(ctx
->builder
, dst64
, ctx
->i32
, "");
372 static LLVMValueRef
emit_bitfield_extract(struct ac_llvm_context
*ctx
,
374 const LLVMValueRef srcs
[3])
377 LLVMValueRef icond
= LLVMBuildICmp(ctx
->builder
, LLVMIntEQ
, srcs
[2], LLVMConstInt(ctx
->i32
, 32, false), "");
379 result
= ac_build_bfe(ctx
, srcs
[0], srcs
[1], srcs
[2], is_signed
);
380 result
= LLVMBuildSelect(ctx
->builder
, icond
, srcs
[0], result
, "");
384 static LLVMValueRef
emit_bitfield_insert(struct ac_llvm_context
*ctx
,
385 LLVMValueRef src0
, LLVMValueRef src1
,
386 LLVMValueRef src2
, LLVMValueRef src3
)
388 LLVMValueRef bfi_args
[3], result
;
390 bfi_args
[0] = LLVMBuildShl(ctx
->builder
,
391 LLVMBuildSub(ctx
->builder
,
392 LLVMBuildShl(ctx
->builder
,
397 bfi_args
[1] = LLVMBuildShl(ctx
->builder
, src1
, src2
, "");
400 LLVMValueRef icond
= LLVMBuildICmp(ctx
->builder
, LLVMIntEQ
, src3
, LLVMConstInt(ctx
->i32
, 32, false), "");
403 * (arg0 & arg1) | (~arg0 & arg2) = arg2 ^ (arg0 & (arg1 ^ arg2)
404 * Use the right-hand side, which the LLVM backend can convert to V_BFI.
406 result
= LLVMBuildXor(ctx
->builder
, bfi_args
[2],
407 LLVMBuildAnd(ctx
->builder
, bfi_args
[0],
408 LLVMBuildXor(ctx
->builder
, bfi_args
[1], bfi_args
[2], ""), ""), "");
410 result
= LLVMBuildSelect(ctx
->builder
, icond
, src1
, result
, "");
414 static LLVMValueRef
emit_pack_half_2x16(struct ac_llvm_context
*ctx
,
417 LLVMValueRef comp
[2];
419 src0
= ac_to_float(ctx
, src0
);
420 comp
[0] = LLVMBuildExtractElement(ctx
->builder
, src0
, ctx
->i32_0
, "");
421 comp
[1] = LLVMBuildExtractElement(ctx
->builder
, src0
, ctx
->i32_1
, "");
423 return ac_build_cvt_pkrtz_f16(ctx
, comp
);
426 static LLVMValueRef
emit_unpack_half_2x16(struct ac_llvm_context
*ctx
,
429 LLVMValueRef const16
= LLVMConstInt(ctx
->i32
, 16, false);
430 LLVMValueRef temps
[2], result
, val
;
433 for (i
= 0; i
< 2; i
++) {
434 val
= i
== 1 ? LLVMBuildLShr(ctx
->builder
, src0
, const16
, "") : src0
;
435 val
= LLVMBuildTrunc(ctx
->builder
, val
, ctx
->i16
, "");
436 val
= LLVMBuildBitCast(ctx
->builder
, val
, ctx
->f16
, "");
437 temps
[i
] = LLVMBuildFPExt(ctx
->builder
, val
, ctx
->f32
, "");
440 result
= LLVMBuildInsertElement(ctx
->builder
, LLVMGetUndef(ctx
->v2f32
), temps
[0],
442 result
= LLVMBuildInsertElement(ctx
->builder
, result
, temps
[1],
447 static LLVMValueRef
emit_ddxy(struct ac_nir_context
*ctx
,
455 if (op
== nir_op_fddx_fine
)
456 mask
= AC_TID_MASK_LEFT
;
457 else if (op
== nir_op_fddy_fine
)
458 mask
= AC_TID_MASK_TOP
;
460 mask
= AC_TID_MASK_TOP_LEFT
;
462 /* for DDX we want to next X pixel, DDY next Y pixel. */
463 if (op
== nir_op_fddx_fine
||
464 op
== nir_op_fddx_coarse
||
470 result
= ac_build_ddxy(&ctx
->ac
, mask
, idx
, src0
);
475 * this takes an I,J coordinate pair,
476 * and works out the X and Y derivatives.
477 * it returns DDX(I), DDX(J), DDY(I), DDY(J).
479 static LLVMValueRef
emit_ddxy_interp(
480 struct ac_nir_context
*ctx
,
481 LLVMValueRef interp_ij
)
483 LLVMValueRef result
[4], a
;
486 for (i
= 0; i
< 2; i
++) {
487 a
= LLVMBuildExtractElement(ctx
->ac
.builder
, interp_ij
,
488 LLVMConstInt(ctx
->ac
.i32
, i
, false), "");
489 result
[i
] = emit_ddxy(ctx
, nir_op_fddx
, a
);
490 result
[2+i
] = emit_ddxy(ctx
, nir_op_fddy
, a
);
492 return ac_build_gather_values(&ctx
->ac
, result
, 4);
495 static void visit_alu(struct ac_nir_context
*ctx
, const nir_alu_instr
*instr
)
497 LLVMValueRef src
[4], result
= NULL
;
498 unsigned num_components
= instr
->dest
.dest
.ssa
.num_components
;
499 unsigned src_components
;
500 LLVMTypeRef def_type
= get_def_type(ctx
, &instr
->dest
.dest
.ssa
);
502 assert(nir_op_infos
[instr
->op
].num_inputs
<= ARRAY_SIZE(src
));
509 case nir_op_pack_half_2x16
:
512 case nir_op_unpack_half_2x16
:
515 case nir_op_cube_face_coord
:
516 case nir_op_cube_face_index
:
520 src_components
= num_components
;
523 for (unsigned i
= 0; i
< nir_op_infos
[instr
->op
].num_inputs
; i
++)
524 src
[i
] = get_alu_src(ctx
, instr
->src
[i
], src_components
);
532 src
[0] = ac_to_float(&ctx
->ac
, src
[0]);
533 result
= LLVMBuildFNeg(ctx
->ac
.builder
, src
[0], "");
536 result
= LLVMBuildNeg(ctx
->ac
.builder
, src
[0], "");
539 result
= LLVMBuildNot(ctx
->ac
.builder
, src
[0], "");
542 result
= LLVMBuildAdd(ctx
->ac
.builder
, src
[0], src
[1], "");
545 src
[0] = ac_to_float(&ctx
->ac
, src
[0]);
546 src
[1] = ac_to_float(&ctx
->ac
, src
[1]);
547 result
= LLVMBuildFAdd(ctx
->ac
.builder
, src
[0], src
[1], "");
550 src
[0] = ac_to_float(&ctx
->ac
, src
[0]);
551 src
[1] = ac_to_float(&ctx
->ac
, src
[1]);
552 result
= LLVMBuildFSub(ctx
->ac
.builder
, src
[0], src
[1], "");
555 result
= LLVMBuildSub(ctx
->ac
.builder
, src
[0], src
[1], "");
558 result
= LLVMBuildMul(ctx
->ac
.builder
, src
[0], src
[1], "");
561 result
= LLVMBuildSRem(ctx
->ac
.builder
, src
[0], src
[1], "");
564 result
= LLVMBuildURem(ctx
->ac
.builder
, src
[0], src
[1], "");
567 src
[0] = ac_to_float(&ctx
->ac
, src
[0]);
568 src
[1] = ac_to_float(&ctx
->ac
, src
[1]);
569 result
= ac_build_fdiv(&ctx
->ac
, src
[0], src
[1]);
570 result
= emit_intrin_1f_param(&ctx
->ac
, "llvm.floor",
571 ac_to_float_type(&ctx
->ac
, def_type
), result
);
572 result
= LLVMBuildFMul(ctx
->ac
.builder
, src
[1] , result
, "");
573 result
= LLVMBuildFSub(ctx
->ac
.builder
, src
[0], result
, "");
576 src
[0] = ac_to_float(&ctx
->ac
, src
[0]);
577 src
[1] = ac_to_float(&ctx
->ac
, src
[1]);
578 result
= LLVMBuildFRem(ctx
->ac
.builder
, src
[0], src
[1], "");
581 result
= LLVMBuildSRem(ctx
->ac
.builder
, src
[0], src
[1], "");
584 result
= LLVMBuildSDiv(ctx
->ac
.builder
, src
[0], src
[1], "");
587 result
= LLVMBuildUDiv(ctx
->ac
.builder
, src
[0], src
[1], "");
590 src
[0] = ac_to_float(&ctx
->ac
, src
[0]);
591 src
[1] = ac_to_float(&ctx
->ac
, src
[1]);
592 result
= LLVMBuildFMul(ctx
->ac
.builder
, src
[0], src
[1], "");
595 src
[0] = ac_to_float(&ctx
->ac
, src
[0]);
596 result
= ac_build_fdiv(&ctx
->ac
, instr
->dest
.dest
.ssa
.bit_size
== 32 ? ctx
->ac
.f32_1
: ctx
->ac
.f64_1
,
600 result
= LLVMBuildAnd(ctx
->ac
.builder
, src
[0], src
[1], "");
603 result
= LLVMBuildOr(ctx
->ac
.builder
, src
[0], src
[1], "");
606 result
= LLVMBuildXor(ctx
->ac
.builder
, src
[0], src
[1], "");
609 result
= LLVMBuildShl(ctx
->ac
.builder
, src
[0],
610 LLVMBuildZExt(ctx
->ac
.builder
, src
[1],
611 LLVMTypeOf(src
[0]), ""),
615 result
= LLVMBuildAShr(ctx
->ac
.builder
, src
[0],
616 LLVMBuildZExt(ctx
->ac
.builder
, src
[1],
617 LLVMTypeOf(src
[0]), ""),
621 result
= LLVMBuildLShr(ctx
->ac
.builder
, src
[0],
622 LLVMBuildZExt(ctx
->ac
.builder
, src
[1],
623 LLVMTypeOf(src
[0]), ""),
627 result
= emit_int_cmp(&ctx
->ac
, LLVMIntSLT
, src
[0], src
[1]);
630 result
= emit_int_cmp(&ctx
->ac
, LLVMIntNE
, src
[0], src
[1]);
633 result
= emit_int_cmp(&ctx
->ac
, LLVMIntEQ
, src
[0], src
[1]);
636 result
= emit_int_cmp(&ctx
->ac
, LLVMIntSGE
, src
[0], src
[1]);
639 result
= emit_int_cmp(&ctx
->ac
, LLVMIntULT
, src
[0], src
[1]);
642 result
= emit_int_cmp(&ctx
->ac
, LLVMIntUGE
, src
[0], src
[1]);
645 result
= emit_float_cmp(&ctx
->ac
, LLVMRealOEQ
, src
[0], src
[1]);
648 result
= emit_float_cmp(&ctx
->ac
, LLVMRealUNE
, src
[0], src
[1]);
651 result
= emit_float_cmp(&ctx
->ac
, LLVMRealOLT
, src
[0], src
[1]);
654 result
= emit_float_cmp(&ctx
->ac
, LLVMRealOGE
, src
[0], src
[1]);
657 result
= emit_intrin_1f_param(&ctx
->ac
, "llvm.fabs",
658 ac_to_float_type(&ctx
->ac
, def_type
), src
[0]);
661 result
= emit_iabs(&ctx
->ac
, src
[0]);
664 result
= emit_minmax_int(&ctx
->ac
, LLVMIntSGT
, src
[0], src
[1]);
667 result
= emit_minmax_int(&ctx
->ac
, LLVMIntSLT
, src
[0], src
[1]);
670 result
= emit_minmax_int(&ctx
->ac
, LLVMIntUGT
, src
[0], src
[1]);
673 result
= emit_minmax_int(&ctx
->ac
, LLVMIntULT
, src
[0], src
[1]);
676 result
= ac_build_isign(&ctx
->ac
, src
[0],
677 instr
->dest
.dest
.ssa
.bit_size
);
680 src
[0] = ac_to_float(&ctx
->ac
, src
[0]);
681 result
= ac_build_fsign(&ctx
->ac
, src
[0],
682 instr
->dest
.dest
.ssa
.bit_size
);
685 result
= emit_intrin_1f_param(&ctx
->ac
, "llvm.floor",
686 ac_to_float_type(&ctx
->ac
, def_type
), src
[0]);
689 result
= emit_intrin_1f_param(&ctx
->ac
, "llvm.trunc",
690 ac_to_float_type(&ctx
->ac
, def_type
), src
[0]);
693 result
= emit_intrin_1f_param(&ctx
->ac
, "llvm.ceil",
694 ac_to_float_type(&ctx
->ac
, def_type
), src
[0]);
696 case nir_op_fround_even
:
697 result
= emit_intrin_1f_param(&ctx
->ac
, "llvm.rint",
698 ac_to_float_type(&ctx
->ac
, def_type
),src
[0]);
701 src
[0] = ac_to_float(&ctx
->ac
, src
[0]);
702 result
= ac_build_fract(&ctx
->ac
, src
[0],
703 instr
->dest
.dest
.ssa
.bit_size
);
706 result
= emit_intrin_1f_param(&ctx
->ac
, "llvm.sin",
707 ac_to_float_type(&ctx
->ac
, def_type
), src
[0]);
710 result
= emit_intrin_1f_param(&ctx
->ac
, "llvm.cos",
711 ac_to_float_type(&ctx
->ac
, def_type
), src
[0]);
714 result
= emit_intrin_1f_param(&ctx
->ac
, "llvm.sqrt",
715 ac_to_float_type(&ctx
->ac
, def_type
), src
[0]);
718 result
= emit_intrin_1f_param(&ctx
->ac
, "llvm.exp2",
719 ac_to_float_type(&ctx
->ac
, def_type
), src
[0]);
722 result
= emit_intrin_1f_param(&ctx
->ac
, "llvm.log2",
723 ac_to_float_type(&ctx
->ac
, def_type
), src
[0]);
726 result
= emit_intrin_1f_param(&ctx
->ac
, "llvm.sqrt",
727 ac_to_float_type(&ctx
->ac
, def_type
), src
[0]);
728 result
= ac_build_fdiv(&ctx
->ac
, instr
->dest
.dest
.ssa
.bit_size
== 32 ? ctx
->ac
.f32_1
: ctx
->ac
.f64_1
,
731 case nir_op_frexp_exp
:
732 src
[0] = ac_to_float(&ctx
->ac
, src
[0]);
733 result
= ac_build_intrinsic(&ctx
->ac
, "llvm.amdgcn.frexp.exp.i32.f64",
734 ctx
->ac
.i32
, src
, 1, AC_FUNC_ATTR_READNONE
);
737 case nir_op_frexp_sig
:
738 src
[0] = ac_to_float(&ctx
->ac
, src
[0]);
739 result
= ac_build_intrinsic(&ctx
->ac
, "llvm.amdgcn.frexp.mant.f64",
740 ctx
->ac
.f64
, src
, 1, AC_FUNC_ATTR_READNONE
);
743 result
= emit_intrin_2f_param(&ctx
->ac
, "llvm.maxnum",
744 ac_to_float_type(&ctx
->ac
, def_type
), src
[0], src
[1]);
745 if (ctx
->ac
.chip_class
< GFX9
&&
746 instr
->dest
.dest
.ssa
.bit_size
== 32) {
747 /* Only pre-GFX9 chips do not flush denorms. */
748 result
= emit_intrin_1f_param(&ctx
->ac
, "llvm.canonicalize",
749 ac_to_float_type(&ctx
->ac
, def_type
),
754 result
= emit_intrin_2f_param(&ctx
->ac
, "llvm.minnum",
755 ac_to_float_type(&ctx
->ac
, def_type
), src
[0], src
[1]);
756 if (ctx
->ac
.chip_class
< GFX9
&&
757 instr
->dest
.dest
.ssa
.bit_size
== 32) {
758 /* Only pre-GFX9 chips do not flush denorms. */
759 result
= emit_intrin_1f_param(&ctx
->ac
, "llvm.canonicalize",
760 ac_to_float_type(&ctx
->ac
, def_type
),
765 result
= emit_intrin_3f_param(&ctx
->ac
, "llvm.fmuladd",
766 ac_to_float_type(&ctx
->ac
, def_type
), src
[0], src
[1], src
[2]);
769 src
[0] = ac_to_float(&ctx
->ac
, src
[0]);
770 if (ac_get_elem_bits(&ctx
->ac
, LLVMTypeOf(src
[0])) == 32)
771 result
= ac_build_intrinsic(&ctx
->ac
, "llvm.amdgcn.ldexp.f32", ctx
->ac
.f32
, src
, 2, AC_FUNC_ATTR_READNONE
);
773 result
= ac_build_intrinsic(&ctx
->ac
, "llvm.amdgcn.ldexp.f64", ctx
->ac
.f64
, src
, 2, AC_FUNC_ATTR_READNONE
);
775 case nir_op_ibitfield_extract
:
776 result
= emit_bitfield_extract(&ctx
->ac
, true, src
);
778 case nir_op_ubitfield_extract
:
779 result
= emit_bitfield_extract(&ctx
->ac
, false, src
);
781 case nir_op_bitfield_insert
:
782 result
= emit_bitfield_insert(&ctx
->ac
, src
[0], src
[1], src
[2], src
[3]);
784 case nir_op_bitfield_reverse
:
785 result
= ac_build_intrinsic(&ctx
->ac
, "llvm.bitreverse.i32", ctx
->ac
.i32
, src
, 1, AC_FUNC_ATTR_READNONE
);
787 case nir_op_bit_count
:
788 if (ac_get_elem_bits(&ctx
->ac
, LLVMTypeOf(src
[0])) == 32)
789 result
= ac_build_intrinsic(&ctx
->ac
, "llvm.ctpop.i32", ctx
->ac
.i32
, src
, 1, AC_FUNC_ATTR_READNONE
);
791 result
= ac_build_intrinsic(&ctx
->ac
, "llvm.ctpop.i64", ctx
->ac
.i64
, src
, 1, AC_FUNC_ATTR_READNONE
);
792 result
= LLVMBuildTrunc(ctx
->ac
.builder
, result
, ctx
->ac
.i32
, "");
798 for (unsigned i
= 0; i
< nir_op_infos
[instr
->op
].num_inputs
; i
++)
799 src
[i
] = ac_to_integer(&ctx
->ac
, src
[i
]);
800 result
= ac_build_gather_values(&ctx
->ac
, src
, num_components
);
804 src
[0] = ac_to_float(&ctx
->ac
, src
[0]);
805 result
= LLVMBuildFPToSI(ctx
->ac
.builder
, src
[0], def_type
, "");
809 src
[0] = ac_to_float(&ctx
->ac
, src
[0]);
810 result
= LLVMBuildFPToUI(ctx
->ac
.builder
, src
[0], def_type
, "");
814 src
[0] = ac_to_integer(&ctx
->ac
, src
[0]);
815 result
= LLVMBuildSIToFP(ctx
->ac
.builder
, src
[0], ac_to_float_type(&ctx
->ac
, def_type
), "");
819 src
[0] = ac_to_integer(&ctx
->ac
, src
[0]);
820 result
= LLVMBuildUIToFP(ctx
->ac
.builder
, src
[0], ac_to_float_type(&ctx
->ac
, def_type
), "");
823 src
[0] = ac_to_float(&ctx
->ac
, src
[0]);
824 result
= LLVMBuildFPExt(ctx
->ac
.builder
, src
[0], ac_to_float_type(&ctx
->ac
, def_type
), "");
827 src
[0] = ac_to_float(&ctx
->ac
, src
[0]);
828 result
= LLVMBuildFPTrunc(ctx
->ac
.builder
, src
[0], ac_to_float_type(&ctx
->ac
, def_type
), "");
832 src
[0] = ac_to_integer(&ctx
->ac
, src
[0]);
833 if (ac_get_elem_bits(&ctx
->ac
, LLVMTypeOf(src
[0])) < ac_get_elem_bits(&ctx
->ac
, def_type
))
834 result
= LLVMBuildZExt(ctx
->ac
.builder
, src
[0], def_type
, "");
836 result
= LLVMBuildTrunc(ctx
->ac
.builder
, src
[0], def_type
, "");
840 src
[0] = ac_to_integer(&ctx
->ac
, src
[0]);
841 if (ac_get_elem_bits(&ctx
->ac
, LLVMTypeOf(src
[0])) < ac_get_elem_bits(&ctx
->ac
, def_type
))
842 result
= LLVMBuildSExt(ctx
->ac
.builder
, src
[0], def_type
, "");
844 result
= LLVMBuildTrunc(ctx
->ac
.builder
, src
[0], def_type
, "");
847 result
= emit_bcsel(&ctx
->ac
, src
[0], src
[1], src
[2]);
849 case nir_op_find_lsb
:
850 src
[0] = ac_to_integer(&ctx
->ac
, src
[0]);
851 result
= ac_find_lsb(&ctx
->ac
, ctx
->ac
.i32
, src
[0]);
853 case nir_op_ufind_msb
:
854 src
[0] = ac_to_integer(&ctx
->ac
, src
[0]);
855 result
= ac_build_umsb(&ctx
->ac
, src
[0], ctx
->ac
.i32
);
857 case nir_op_ifind_msb
:
858 src
[0] = ac_to_integer(&ctx
->ac
, src
[0]);
859 result
= ac_build_imsb(&ctx
->ac
, src
[0], ctx
->ac
.i32
);
861 case nir_op_uadd_carry
:
862 src
[0] = ac_to_integer(&ctx
->ac
, src
[0]);
863 src
[1] = ac_to_integer(&ctx
->ac
, src
[1]);
864 result
= emit_uint_carry(&ctx
->ac
, "llvm.uadd.with.overflow.i32", src
[0], src
[1]);
866 case nir_op_usub_borrow
:
867 src
[0] = ac_to_integer(&ctx
->ac
, src
[0]);
868 src
[1] = ac_to_integer(&ctx
->ac
, src
[1]);
869 result
= emit_uint_carry(&ctx
->ac
, "llvm.usub.with.overflow.i32", src
[0], src
[1]);
872 result
= emit_b2f(&ctx
->ac
, src
[0]);
875 result
= emit_f2b(&ctx
->ac
, src
[0]);
878 result
= emit_b2i(&ctx
->ac
, src
[0], instr
->dest
.dest
.ssa
.bit_size
);
881 src
[0] = ac_to_integer(&ctx
->ac
, src
[0]);
882 result
= emit_i2b(&ctx
->ac
, src
[0]);
884 case nir_op_fquantize2f16
:
885 result
= emit_f2f16(&ctx
->ac
, src
[0]);
887 case nir_op_umul_high
:
888 src
[0] = ac_to_integer(&ctx
->ac
, src
[0]);
889 src
[1] = ac_to_integer(&ctx
->ac
, src
[1]);
890 result
= emit_umul_high(&ctx
->ac
, src
[0], src
[1]);
892 case nir_op_imul_high
:
893 src
[0] = ac_to_integer(&ctx
->ac
, src
[0]);
894 src
[1] = ac_to_integer(&ctx
->ac
, src
[1]);
895 result
= emit_imul_high(&ctx
->ac
, src
[0], src
[1]);
897 case nir_op_pack_half_2x16
:
898 result
= emit_pack_half_2x16(&ctx
->ac
, src
[0]);
900 case nir_op_unpack_half_2x16
:
901 result
= emit_unpack_half_2x16(&ctx
->ac
, src
[0]);
905 case nir_op_fddx_fine
:
906 case nir_op_fddy_fine
:
907 case nir_op_fddx_coarse
:
908 case nir_op_fddy_coarse
:
909 result
= emit_ddxy(ctx
, instr
->op
, src
[0]);
912 case nir_op_unpack_64_2x32_split_x
: {
913 assert(ac_get_llvm_num_components(src
[0]) == 1);
914 LLVMValueRef tmp
= LLVMBuildBitCast(ctx
->ac
.builder
, src
[0],
917 result
= LLVMBuildExtractElement(ctx
->ac
.builder
, tmp
,
922 case nir_op_unpack_64_2x32_split_y
: {
923 assert(ac_get_llvm_num_components(src
[0]) == 1);
924 LLVMValueRef tmp
= LLVMBuildBitCast(ctx
->ac
.builder
, src
[0],
927 result
= LLVMBuildExtractElement(ctx
->ac
.builder
, tmp
,
932 case nir_op_pack_64_2x32_split
: {
933 LLVMValueRef tmp
= LLVMGetUndef(ctx
->ac
.v2i32
);
934 tmp
= LLVMBuildInsertElement(ctx
->ac
.builder
, tmp
,
935 src
[0], ctx
->ac
.i32_0
, "");
936 tmp
= LLVMBuildInsertElement(ctx
->ac
.builder
, tmp
,
937 src
[1], ctx
->ac
.i32_1
, "");
938 result
= LLVMBuildBitCast(ctx
->ac
.builder
, tmp
, ctx
->ac
.i64
, "");
942 case nir_op_cube_face_coord
: {
943 src
[0] = ac_to_float(&ctx
->ac
, src
[0]);
944 LLVMValueRef results
[2];
946 for (unsigned chan
= 0; chan
< 3; chan
++)
947 in
[chan
] = ac_llvm_extract_elem(&ctx
->ac
, src
[0], chan
);
948 results
[0] = ac_build_intrinsic(&ctx
->ac
, "llvm.amdgcn.cubetc",
949 ctx
->ac
.f32
, in
, 3, AC_FUNC_ATTR_READNONE
);
950 results
[1] = ac_build_intrinsic(&ctx
->ac
, "llvm.amdgcn.cubesc",
951 ctx
->ac
.f32
, in
, 3, AC_FUNC_ATTR_READNONE
);
952 result
= ac_build_gather_values(&ctx
->ac
, results
, 2);
956 case nir_op_cube_face_index
: {
957 src
[0] = ac_to_float(&ctx
->ac
, src
[0]);
959 for (unsigned chan
= 0; chan
< 3; chan
++)
960 in
[chan
] = ac_llvm_extract_elem(&ctx
->ac
, src
[0], chan
);
961 result
= ac_build_intrinsic(&ctx
->ac
, "llvm.amdgcn.cubeid",
962 ctx
->ac
.f32
, in
, 3, AC_FUNC_ATTR_READNONE
);
967 fprintf(stderr
, "Unknown NIR alu instr: ");
968 nir_print_instr(&instr
->instr
, stderr
);
969 fprintf(stderr
, "\n");
974 assert(instr
->dest
.dest
.is_ssa
);
975 result
= ac_to_integer(&ctx
->ac
, result
);
976 _mesa_hash_table_insert(ctx
->defs
, &instr
->dest
.dest
.ssa
,
981 static void visit_load_const(struct ac_nir_context
*ctx
,
982 const nir_load_const_instr
*instr
)
984 LLVMValueRef values
[4], value
= NULL
;
985 LLVMTypeRef element_type
=
986 LLVMIntTypeInContext(ctx
->ac
.context
, instr
->def
.bit_size
);
988 for (unsigned i
= 0; i
< instr
->def
.num_components
; ++i
) {
989 switch (instr
->def
.bit_size
) {
991 values
[i
] = LLVMConstInt(element_type
,
992 instr
->value
.u32
[i
], false);
995 values
[i
] = LLVMConstInt(element_type
,
996 instr
->value
.u64
[i
], false);
1000 "unsupported nir load_const bit_size: %d\n",
1001 instr
->def
.bit_size
);
1005 if (instr
->def
.num_components
> 1) {
1006 value
= LLVMConstVector(values
, instr
->def
.num_components
);
1010 _mesa_hash_table_insert(ctx
->defs
, &instr
->def
, value
);
1014 get_buffer_size(struct ac_nir_context
*ctx
, LLVMValueRef descriptor
, bool in_elements
)
1017 LLVMBuildExtractElement(ctx
->ac
.builder
, descriptor
,
1018 LLVMConstInt(ctx
->ac
.i32
, 2, false), "");
1021 if (ctx
->ac
.chip_class
== VI
&& in_elements
) {
1022 /* On VI, the descriptor contains the size in bytes,
1023 * but TXQ must return the size in elements.
1024 * The stride is always non-zero for resources using TXQ.
1026 LLVMValueRef stride
=
1027 LLVMBuildExtractElement(ctx
->ac
.builder
, descriptor
,
1029 stride
= LLVMBuildLShr(ctx
->ac
.builder
, stride
,
1030 LLVMConstInt(ctx
->ac
.i32
, 16, false), "");
1031 stride
= LLVMBuildAnd(ctx
->ac
.builder
, stride
,
1032 LLVMConstInt(ctx
->ac
.i32
, 0x3fff, false), "");
1034 size
= LLVMBuildUDiv(ctx
->ac
.builder
, size
, stride
, "");
1040 * Given the i32 or vNi32 \p type, generate the textual name (e.g. for use with
1043 static void build_int_type_name(
1045 char *buf
, unsigned bufsize
)
1047 assert(bufsize
>= 6);
1049 if (LLVMGetTypeKind(type
) == LLVMVectorTypeKind
)
1050 snprintf(buf
, bufsize
, "v%ui32",
1051 LLVMGetVectorSize(type
));
1056 static LLVMValueRef
lower_gather4_integer(struct ac_llvm_context
*ctx
,
1057 struct ac_image_args
*args
,
1058 const nir_tex_instr
*instr
)
1060 enum glsl_base_type stype
= glsl_get_sampler_result_type(instr
->texture
->var
->type
);
1061 LLVMValueRef coord
= args
->addr
;
1062 LLVMValueRef half_texel
[2];
1063 LLVMValueRef compare_cube_wa
= NULL
;
1064 LLVMValueRef result
;
1066 unsigned coord_vgpr_index
= (unsigned)args
->offset
+ (unsigned)args
->compare
;
1070 struct ac_image_args txq_args
= { 0 };
1072 txq_args
.da
= instr
->is_array
|| instr
->sampler_dim
== GLSL_SAMPLER_DIM_CUBE
;
1073 txq_args
.opcode
= ac_image_get_resinfo
;
1074 txq_args
.dmask
= 0xf;
1075 txq_args
.addr
= ctx
->i32_0
;
1076 txq_args
.resource
= args
->resource
;
1077 LLVMValueRef size
= ac_build_image_opcode(ctx
, &txq_args
);
1079 for (c
= 0; c
< 2; c
++) {
1080 half_texel
[c
] = LLVMBuildExtractElement(ctx
->builder
, size
,
1081 LLVMConstInt(ctx
->i32
, c
, false), "");
1082 half_texel
[c
] = LLVMBuildUIToFP(ctx
->builder
, half_texel
[c
], ctx
->f32
, "");
1083 half_texel
[c
] = ac_build_fdiv(ctx
, ctx
->f32_1
, half_texel
[c
]);
1084 half_texel
[c
] = LLVMBuildFMul(ctx
->builder
, half_texel
[c
],
1085 LLVMConstReal(ctx
->f32
, -0.5), "");
1089 LLVMValueRef orig_coords
= args
->addr
;
1091 for (c
= 0; c
< 2; c
++) {
1093 LLVMValueRef index
= LLVMConstInt(ctx
->i32
, coord_vgpr_index
+ c
, 0);
1094 tmp
= LLVMBuildExtractElement(ctx
->builder
, coord
, index
, "");
1095 tmp
= LLVMBuildBitCast(ctx
->builder
, tmp
, ctx
->f32
, "");
1096 tmp
= LLVMBuildFAdd(ctx
->builder
, tmp
, half_texel
[c
], "");
1097 tmp
= LLVMBuildBitCast(ctx
->builder
, tmp
, ctx
->i32
, "");
1098 coord
= LLVMBuildInsertElement(ctx
->builder
, coord
, tmp
, index
, "");
1103 * Apparantly cube has issue with integer types that the workaround doesn't solve,
1104 * so this tests if the format is 8_8_8_8 and an integer type do an alternate
1105 * workaround by sampling using a scaled type and converting.
1106 * This is taken from amdgpu-pro shaders.
1108 /* NOTE this produces some ugly code compared to amdgpu-pro,
1109 * LLVM ends up dumping SGPRs into VGPRs to deal with the compare/select,
1110 * and then reads them back. -pro generates two selects,
1111 * one s_cmp for the descriptor rewriting
1112 * one v_cmp for the coordinate and result changes.
1114 if (instr
->sampler_dim
== GLSL_SAMPLER_DIM_CUBE
) {
1115 LLVMValueRef tmp
, tmp2
;
1117 /* workaround 8/8/8/8 uint/sint cube gather bug */
1118 /* first detect it then change to a scaled read and f2i */
1119 tmp
= LLVMBuildExtractElement(ctx
->builder
, args
->resource
, ctx
->i32_1
, "");
1122 /* extract the DATA_FORMAT */
1123 tmp
= ac_build_bfe(ctx
, tmp
, LLVMConstInt(ctx
->i32
, 20, false),
1124 LLVMConstInt(ctx
->i32
, 6, false), false);
1126 /* is the DATA_FORMAT == 8_8_8_8 */
1127 compare_cube_wa
= LLVMBuildICmp(ctx
->builder
, LLVMIntEQ
, tmp
, LLVMConstInt(ctx
->i32
, V_008F14_IMG_DATA_FORMAT_8_8_8_8
, false), "");
1129 if (stype
== GLSL_TYPE_UINT
)
1130 /* Create a NUM FORMAT - 0x2 or 0x4 - USCALED or UINT */
1131 tmp
= LLVMBuildSelect(ctx
->builder
, compare_cube_wa
, LLVMConstInt(ctx
->i32
, 0x8000000, false),
1132 LLVMConstInt(ctx
->i32
, 0x10000000, false), "");
1134 /* Create a NUM FORMAT - 0x3 or 0x5 - SSCALED or SINT */
1135 tmp
= LLVMBuildSelect(ctx
->builder
, compare_cube_wa
, LLVMConstInt(ctx
->i32
, 0xc000000, false),
1136 LLVMConstInt(ctx
->i32
, 0x14000000, false), "");
1138 /* replace the NUM FORMAT in the descriptor */
1139 tmp2
= LLVMBuildAnd(ctx
->builder
, tmp2
, LLVMConstInt(ctx
->i32
, C_008F14_NUM_FORMAT_GFX6
, false), "");
1140 tmp2
= LLVMBuildOr(ctx
->builder
, tmp2
, tmp
, "");
1142 args
->resource
= LLVMBuildInsertElement(ctx
->builder
, args
->resource
, tmp2
, ctx
->i32_1
, "");
1144 /* don't modify the coordinates for this case */
1145 coord
= LLVMBuildSelect(ctx
->builder
, compare_cube_wa
, orig_coords
, coord
, "");
1148 result
= ac_build_image_opcode(ctx
, args
);
1150 if (instr
->sampler_dim
== GLSL_SAMPLER_DIM_CUBE
) {
1151 LLVMValueRef tmp
, tmp2
;
1153 /* if the cube workaround is in place, f2i the result. */
1154 for (c
= 0; c
< 4; c
++) {
1155 tmp
= LLVMBuildExtractElement(ctx
->builder
, result
, LLVMConstInt(ctx
->i32
, c
, false), "");
1156 if (stype
== GLSL_TYPE_UINT
)
1157 tmp2
= LLVMBuildFPToUI(ctx
->builder
, tmp
, ctx
->i32
, "");
1159 tmp2
= LLVMBuildFPToSI(ctx
->builder
, tmp
, ctx
->i32
, "");
1160 tmp
= LLVMBuildBitCast(ctx
->builder
, tmp
, ctx
->i32
, "");
1161 tmp2
= LLVMBuildBitCast(ctx
->builder
, tmp2
, ctx
->i32
, "");
1162 tmp
= LLVMBuildSelect(ctx
->builder
, compare_cube_wa
, tmp2
, tmp
, "");
1163 tmp
= LLVMBuildBitCast(ctx
->builder
, tmp
, ctx
->f32
, "");
1164 result
= LLVMBuildInsertElement(ctx
->builder
, result
, tmp
, LLVMConstInt(ctx
->i32
, c
, false), "");
1170 static LLVMValueRef
build_tex_intrinsic(struct ac_nir_context
*ctx
,
1171 const nir_tex_instr
*instr
,
1173 struct ac_image_args
*args
)
1175 if (instr
->sampler_dim
== GLSL_SAMPLER_DIM_BUF
) {
1176 unsigned mask
= nir_ssa_def_components_read(&instr
->dest
.ssa
);
1178 return ac_build_buffer_load_format(&ctx
->ac
,
1182 util_last_bit(mask
),
1186 args
->opcode
= ac_image_sample
;
1187 args
->compare
= instr
->is_shadow
;
1189 switch (instr
->op
) {
1191 case nir_texop_txf_ms
:
1192 case nir_texop_samples_identical
:
1193 args
->opcode
= lod_is_zero
||
1194 instr
->sampler_dim
== GLSL_SAMPLER_DIM_MS
?
1195 ac_image_load
: ac_image_load_mip
;
1196 args
->compare
= false;
1197 args
->offset
= false;
1204 args
->level_zero
= true;
1209 case nir_texop_query_levels
:
1210 args
->opcode
= ac_image_get_resinfo
;
1213 if (ctx
->stage
!= MESA_SHADER_FRAGMENT
)
1214 args
->level_zero
= true;
1220 args
->opcode
= ac_image_gather4
;
1221 args
->level_zero
= true;
1224 args
->opcode
= ac_image_get_lod
;
1225 args
->compare
= false;
1226 args
->offset
= false;
1232 if (instr
->op
== nir_texop_tg4
&& ctx
->ac
.chip_class
<= VI
) {
1233 enum glsl_base_type stype
= glsl_get_sampler_result_type(instr
->texture
->var
->type
);
1234 if (stype
== GLSL_TYPE_UINT
|| stype
== GLSL_TYPE_INT
) {
1235 return lower_gather4_integer(&ctx
->ac
, args
, instr
);
1238 return ac_build_image_opcode(&ctx
->ac
, args
);
1241 static LLVMValueRef
visit_vulkan_resource_reindex(struct ac_nir_context
*ctx
,
1242 nir_intrinsic_instr
*instr
)
1244 LLVMValueRef ptr
= get_src(ctx
, instr
->src
[0]);
1245 LLVMValueRef index
= get_src(ctx
, instr
->src
[1]);
1247 LLVMValueRef result
= LLVMBuildGEP(ctx
->ac
.builder
, ptr
, &index
, 1, "");
1248 LLVMSetMetadata(result
, ctx
->ac
.uniform_md_kind
, ctx
->ac
.empty_md
);
1252 static LLVMValueRef
visit_load_push_constant(struct ac_nir_context
*ctx
,
1253 nir_intrinsic_instr
*instr
)
1255 LLVMValueRef ptr
, addr
;
1257 addr
= LLVMConstInt(ctx
->ac
.i32
, nir_intrinsic_base(instr
), 0);
1258 addr
= LLVMBuildAdd(ctx
->ac
.builder
, addr
,
1259 get_src(ctx
, instr
->src
[0]), "");
1261 ptr
= ac_build_gep0(&ctx
->ac
, ctx
->abi
->push_constants
, addr
);
1262 ptr
= ac_cast_ptr(&ctx
->ac
, ptr
, get_def_type(ctx
, &instr
->dest
.ssa
));
1264 return LLVMBuildLoad(ctx
->ac
.builder
, ptr
, "");
1267 static LLVMValueRef
visit_get_buffer_size(struct ac_nir_context
*ctx
,
1268 const nir_intrinsic_instr
*instr
)
1270 LLVMValueRef index
= get_src(ctx
, instr
->src
[0]);
1272 return get_buffer_size(ctx
, ctx
->abi
->load_ssbo(ctx
->abi
, index
, false), false);
1275 static uint32_t widen_mask(uint32_t mask
, unsigned multiplier
)
1277 uint32_t new_mask
= 0;
1278 for(unsigned i
= 0; i
< 32 && (1u << i
) <= mask
; ++i
)
1279 if (mask
& (1u << i
))
1280 new_mask
|= ((1u << multiplier
) - 1u) << (i
* multiplier
);
1284 static LLVMValueRef
extract_vector_range(struct ac_llvm_context
*ctx
, LLVMValueRef src
,
1285 unsigned start
, unsigned count
)
1287 LLVMTypeRef type
= LLVMTypeOf(src
);
1289 if (LLVMGetTypeKind(type
) != LLVMVectorTypeKind
) {
1295 unsigned src_elements
= LLVMGetVectorSize(type
);
1296 assert(start
< src_elements
);
1297 assert(start
+ count
<= src_elements
);
1299 if (start
== 0 && count
== src_elements
)
1303 return LLVMBuildExtractElement(ctx
->builder
, src
, LLVMConstInt(ctx
->i32
, start
, false), "");
1306 LLVMValueRef indices
[8];
1307 for (unsigned i
= 0; i
< count
; ++i
)
1308 indices
[i
] = LLVMConstInt(ctx
->i32
, start
+ i
, false);
1310 LLVMValueRef swizzle
= LLVMConstVector(indices
, count
);
1311 return LLVMBuildShuffleVector(ctx
->builder
, src
, src
, swizzle
, "");
1314 static void visit_store_ssbo(struct ac_nir_context
*ctx
,
1315 nir_intrinsic_instr
*instr
)
1317 const char *store_name
;
1318 LLVMValueRef src_data
= get_src(ctx
, instr
->src
[0]);
1319 LLVMTypeRef data_type
= ctx
->ac
.f32
;
1320 int elem_size_mult
= ac_get_elem_bits(&ctx
->ac
, LLVMTypeOf(src_data
)) / 32;
1321 int components_32bit
= elem_size_mult
* instr
->num_components
;
1322 unsigned writemask
= nir_intrinsic_write_mask(instr
);
1323 LLVMValueRef base_data
, base_offset
;
1324 LLVMValueRef params
[6];
1326 params
[1] = ctx
->abi
->load_ssbo(ctx
->abi
,
1327 get_src(ctx
, instr
->src
[1]), true);
1328 params
[2] = ctx
->ac
.i32_0
; /* vindex */
1329 params
[4] = ctx
->ac
.i1false
; /* glc */
1330 params
[5] = ctx
->ac
.i1false
; /* slc */
1332 if (components_32bit
> 1)
1333 data_type
= LLVMVectorType(ctx
->ac
.f32
, components_32bit
);
1335 writemask
= widen_mask(writemask
, elem_size_mult
);
1337 base_data
= ac_to_float(&ctx
->ac
, src_data
);
1338 base_data
= ac_trim_vector(&ctx
->ac
, base_data
, instr
->num_components
);
1339 base_data
= LLVMBuildBitCast(ctx
->ac
.builder
, base_data
,
1341 base_offset
= get_src(ctx
, instr
->src
[2]); /* voffset */
1345 LLVMValueRef offset
;
1347 u_bit_scan_consecutive_range(&writemask
, &start
, &count
);
1349 /* Due to an LLVM limitation, split 3-element writes
1350 * into a 2-element and a 1-element write. */
1352 writemask
|= 1 << (start
+ 2);
1357 writemask
|= ((1u << (count
- 4)) - 1u) << (start
+ 4);
1362 store_name
= "llvm.amdgcn.buffer.store.v4f32";
1363 } else if (count
== 2) {
1364 store_name
= "llvm.amdgcn.buffer.store.v2f32";
1368 store_name
= "llvm.amdgcn.buffer.store.f32";
1370 data
= extract_vector_range(&ctx
->ac
, base_data
, start
, count
);
1372 offset
= base_offset
;
1374 offset
= LLVMBuildAdd(ctx
->ac
.builder
, offset
, LLVMConstInt(ctx
->ac
.i32
, start
* 4, false), "");
1378 ac_build_intrinsic(&ctx
->ac
, store_name
,
1379 ctx
->ac
.voidt
, params
, 6, 0);
1383 static LLVMValueRef
visit_atomic_ssbo(struct ac_nir_context
*ctx
,
1384 const nir_intrinsic_instr
*instr
)
1387 LLVMValueRef params
[6];
1390 if (instr
->intrinsic
== nir_intrinsic_ssbo_atomic_comp_swap
) {
1391 params
[arg_count
++] = ac_llvm_extract_elem(&ctx
->ac
, get_src(ctx
, instr
->src
[3]), 0);
1393 params
[arg_count
++] = ac_llvm_extract_elem(&ctx
->ac
, get_src(ctx
, instr
->src
[2]), 0);
1394 params
[arg_count
++] = ctx
->abi
->load_ssbo(ctx
->abi
,
1395 get_src(ctx
, instr
->src
[0]),
1397 params
[arg_count
++] = ctx
->ac
.i32_0
; /* vindex */
1398 params
[arg_count
++] = get_src(ctx
, instr
->src
[1]); /* voffset */
1399 params
[arg_count
++] = LLVMConstInt(ctx
->ac
.i1
, 0, false); /* slc */
1401 switch (instr
->intrinsic
) {
1402 case nir_intrinsic_ssbo_atomic_add
:
1403 name
= "llvm.amdgcn.buffer.atomic.add";
1405 case nir_intrinsic_ssbo_atomic_imin
:
1406 name
= "llvm.amdgcn.buffer.atomic.smin";
1408 case nir_intrinsic_ssbo_atomic_umin
:
1409 name
= "llvm.amdgcn.buffer.atomic.umin";
1411 case nir_intrinsic_ssbo_atomic_imax
:
1412 name
= "llvm.amdgcn.buffer.atomic.smax";
1414 case nir_intrinsic_ssbo_atomic_umax
:
1415 name
= "llvm.amdgcn.buffer.atomic.umax";
1417 case nir_intrinsic_ssbo_atomic_and
:
1418 name
= "llvm.amdgcn.buffer.atomic.and";
1420 case nir_intrinsic_ssbo_atomic_or
:
1421 name
= "llvm.amdgcn.buffer.atomic.or";
1423 case nir_intrinsic_ssbo_atomic_xor
:
1424 name
= "llvm.amdgcn.buffer.atomic.xor";
1426 case nir_intrinsic_ssbo_atomic_exchange
:
1427 name
= "llvm.amdgcn.buffer.atomic.swap";
1429 case nir_intrinsic_ssbo_atomic_comp_swap
:
1430 name
= "llvm.amdgcn.buffer.atomic.cmpswap";
1436 return ac_build_intrinsic(&ctx
->ac
, name
, ctx
->ac
.i32
, params
, arg_count
, 0);
1439 static LLVMValueRef
visit_load_buffer(struct ac_nir_context
*ctx
,
1440 const nir_intrinsic_instr
*instr
)
1442 LLVMValueRef results
[2];
1443 int load_components
;
1444 int num_components
= instr
->num_components
;
1445 if (instr
->dest
.ssa
.bit_size
== 64)
1446 num_components
*= 2;
1448 for (int i
= 0; i
< num_components
; i
+= load_components
) {
1449 load_components
= MIN2(num_components
- i
, 4);
1450 const char *load_name
;
1451 LLVMTypeRef data_type
= ctx
->ac
.f32
;
1452 LLVMValueRef offset
= LLVMConstInt(ctx
->ac
.i32
, i
* 4, false);
1453 offset
= LLVMBuildAdd(ctx
->ac
.builder
, get_src(ctx
, instr
->src
[1]), offset
, "");
1455 if (load_components
== 3)
1456 data_type
= LLVMVectorType(ctx
->ac
.f32
, 4);
1457 else if (load_components
> 1)
1458 data_type
= LLVMVectorType(ctx
->ac
.f32
, load_components
);
1460 if (load_components
>= 3)
1461 load_name
= "llvm.amdgcn.buffer.load.v4f32";
1462 else if (load_components
== 2)
1463 load_name
= "llvm.amdgcn.buffer.load.v2f32";
1464 else if (load_components
== 1)
1465 load_name
= "llvm.amdgcn.buffer.load.f32";
1467 unreachable("unhandled number of components");
1469 LLVMValueRef params
[] = {
1470 ctx
->abi
->load_ssbo(ctx
->abi
,
1471 get_src(ctx
, instr
->src
[0]),
1479 results
[i
> 0 ? 1 : 0] = ac_build_intrinsic(&ctx
->ac
, load_name
, data_type
, params
, 5, 0);
1483 LLVMValueRef ret
= results
[0];
1484 if (num_components
> 4 || num_components
== 3) {
1485 LLVMValueRef masks
[] = {
1486 LLVMConstInt(ctx
->ac
.i32
, 0, false), LLVMConstInt(ctx
->ac
.i32
, 1, false),
1487 LLVMConstInt(ctx
->ac
.i32
, 2, false), LLVMConstInt(ctx
->ac
.i32
, 3, false),
1488 LLVMConstInt(ctx
->ac
.i32
, 4, false), LLVMConstInt(ctx
->ac
.i32
, 5, false),
1489 LLVMConstInt(ctx
->ac
.i32
, 6, false), LLVMConstInt(ctx
->ac
.i32
, 7, false)
1492 LLVMValueRef swizzle
= LLVMConstVector(masks
, num_components
);
1493 ret
= LLVMBuildShuffleVector(ctx
->ac
.builder
, results
[0],
1494 results
[num_components
> 4 ? 1 : 0], swizzle
, "");
1497 return LLVMBuildBitCast(ctx
->ac
.builder
, ret
,
1498 get_def_type(ctx
, &instr
->dest
.ssa
), "");
1501 static LLVMValueRef
visit_load_ubo_buffer(struct ac_nir_context
*ctx
,
1502 const nir_intrinsic_instr
*instr
)
1505 LLVMValueRef rsrc
= get_src(ctx
, instr
->src
[0]);
1506 LLVMValueRef offset
= get_src(ctx
, instr
->src
[1]);
1507 int num_components
= instr
->num_components
;
1509 if (ctx
->abi
->load_ubo
)
1510 rsrc
= ctx
->abi
->load_ubo(ctx
->abi
, rsrc
);
1512 if (instr
->dest
.ssa
.bit_size
== 64)
1513 num_components
*= 2;
1515 ret
= ac_build_buffer_load(&ctx
->ac
, rsrc
, num_components
, NULL
, offset
,
1516 NULL
, 0, false, false, true, true);
1517 ret
= ac_trim_vector(&ctx
->ac
, ret
, num_components
);
1518 return LLVMBuildBitCast(ctx
->ac
.builder
, ret
,
1519 get_def_type(ctx
, &instr
->dest
.ssa
), "");
1523 get_deref_offset(struct ac_nir_context
*ctx
, nir_deref_var
*deref
,
1524 bool vs_in
, unsigned *vertex_index_out
,
1525 LLVMValueRef
*vertex_index_ref
,
1526 unsigned *const_out
, LLVMValueRef
*indir_out
)
1528 unsigned const_offset
= 0;
1529 nir_deref
*tail
= &deref
->deref
;
1530 LLVMValueRef offset
= NULL
;
1532 if (vertex_index_out
!= NULL
|| vertex_index_ref
!= NULL
) {
1534 nir_deref_array
*deref_array
= nir_deref_as_array(tail
);
1535 if (vertex_index_out
)
1536 *vertex_index_out
= deref_array
->base_offset
;
1538 if (vertex_index_ref
) {
1539 LLVMValueRef vtx
= LLVMConstInt(ctx
->ac
.i32
, deref_array
->base_offset
, false);
1540 if (deref_array
->deref_array_type
== nir_deref_array_type_indirect
) {
1541 vtx
= LLVMBuildAdd(ctx
->ac
.builder
, vtx
, get_src(ctx
, deref_array
->indirect
), "");
1543 *vertex_index_ref
= vtx
;
1547 if (deref
->var
->data
.compact
) {
1548 assert(tail
->child
->deref_type
== nir_deref_type_array
);
1549 assert(glsl_type_is_scalar(glsl_without_array(deref
->var
->type
)));
1550 nir_deref_array
*deref_array
= nir_deref_as_array(tail
->child
);
1551 /* We always lower indirect dereferences for "compact" array vars. */
1552 assert(deref_array
->deref_array_type
== nir_deref_array_type_direct
);
1554 const_offset
= deref_array
->base_offset
;
1558 while (tail
->child
!= NULL
) {
1559 const struct glsl_type
*parent_type
= tail
->type
;
1562 if (tail
->deref_type
== nir_deref_type_array
) {
1563 nir_deref_array
*deref_array
= nir_deref_as_array(tail
);
1564 LLVMValueRef index
, stride
, local_offset
;
1565 unsigned size
= glsl_count_attribute_slots(tail
->type
, vs_in
);
1567 const_offset
+= size
* deref_array
->base_offset
;
1568 if (deref_array
->deref_array_type
== nir_deref_array_type_direct
)
1571 assert(deref_array
->deref_array_type
== nir_deref_array_type_indirect
);
1572 index
= get_src(ctx
, deref_array
->indirect
);
1573 stride
= LLVMConstInt(ctx
->ac
.i32
, size
, 0);
1574 local_offset
= LLVMBuildMul(ctx
->ac
.builder
, stride
, index
, "");
1577 offset
= LLVMBuildAdd(ctx
->ac
.builder
, offset
, local_offset
, "");
1579 offset
= local_offset
;
1580 } else if (tail
->deref_type
== nir_deref_type_struct
) {
1581 nir_deref_struct
*deref_struct
= nir_deref_as_struct(tail
);
1583 for (unsigned i
= 0; i
< deref_struct
->index
; i
++) {
1584 const struct glsl_type
*ft
= glsl_get_struct_field(parent_type
, i
);
1585 const_offset
+= glsl_count_attribute_slots(ft
, vs_in
);
1588 unreachable("unsupported deref type");
1592 if (const_offset
&& offset
)
1593 offset
= LLVMBuildAdd(ctx
->ac
.builder
, offset
,
1594 LLVMConstInt(ctx
->ac
.i32
, const_offset
, 0),
1597 *const_out
= const_offset
;
1598 *indir_out
= offset
;
1602 build_gep_for_deref(struct ac_nir_context
*ctx
,
1603 nir_deref_var
*deref
)
1605 struct hash_entry
*entry
= _mesa_hash_table_search(ctx
->vars
, deref
->var
);
1606 assert(entry
->data
);
1607 LLVMValueRef val
= entry
->data
;
1608 nir_deref
*tail
= deref
->deref
.child
;
1609 while (tail
!= NULL
) {
1610 LLVMValueRef offset
;
1611 switch (tail
->deref_type
) {
1612 case nir_deref_type_array
: {
1613 nir_deref_array
*array
= nir_deref_as_array(tail
);
1614 offset
= LLVMConstInt(ctx
->ac
.i32
, array
->base_offset
, 0);
1615 if (array
->deref_array_type
==
1616 nir_deref_array_type_indirect
) {
1617 offset
= LLVMBuildAdd(ctx
->ac
.builder
, offset
,
1624 case nir_deref_type_struct
: {
1625 nir_deref_struct
*deref_struct
=
1626 nir_deref_as_struct(tail
);
1627 offset
= LLVMConstInt(ctx
->ac
.i32
,
1628 deref_struct
->index
, 0);
1632 unreachable("bad deref type");
1634 val
= ac_build_gep0(&ctx
->ac
, val
, offset
);
1640 static LLVMValueRef
load_tess_varyings(struct ac_nir_context
*ctx
,
1641 nir_intrinsic_instr
*instr
,
1644 LLVMValueRef result
;
1645 LLVMValueRef vertex_index
= NULL
;
1646 LLVMValueRef indir_index
= NULL
;
1647 unsigned const_index
= 0;
1648 unsigned location
= instr
->variables
[0]->var
->data
.location
;
1649 unsigned driver_location
= instr
->variables
[0]->var
->data
.driver_location
;
1650 const bool is_patch
= instr
->variables
[0]->var
->data
.patch
;
1651 const bool is_compact
= instr
->variables
[0]->var
->data
.compact
;
1653 get_deref_offset(ctx
, instr
->variables
[0],
1654 false, NULL
, is_patch
? NULL
: &vertex_index
,
1655 &const_index
, &indir_index
);
1657 LLVMTypeRef dest_type
= get_def_type(ctx
, &instr
->dest
.ssa
);
1659 LLVMTypeRef src_component_type
;
1660 if (LLVMGetTypeKind(dest_type
) == LLVMVectorTypeKind
)
1661 src_component_type
= LLVMGetElementType(dest_type
);
1663 src_component_type
= dest_type
;
1665 result
= ctx
->abi
->load_tess_varyings(ctx
->abi
, src_component_type
,
1666 vertex_index
, indir_index
,
1667 const_index
, location
, driver_location
,
1668 instr
->variables
[0]->var
->data
.location_frac
,
1669 instr
->num_components
,
1670 is_patch
, is_compact
, load_inputs
);
1671 return LLVMBuildBitCast(ctx
->ac
.builder
, result
, dest_type
, "");
1674 static LLVMValueRef
visit_load_var(struct ac_nir_context
*ctx
,
1675 nir_intrinsic_instr
*instr
)
1677 LLVMValueRef values
[8];
1678 int idx
= instr
->variables
[0]->var
->data
.driver_location
;
1679 int ve
= instr
->dest
.ssa
.num_components
;
1680 unsigned comp
= instr
->variables
[0]->var
->data
.location_frac
;
1681 LLVMValueRef indir_index
;
1683 unsigned const_index
;
1684 unsigned stride
= instr
->variables
[0]->var
->data
.compact
? 1 : 4;
1685 bool vs_in
= ctx
->stage
== MESA_SHADER_VERTEX
&&
1686 instr
->variables
[0]->var
->data
.mode
== nir_var_shader_in
;
1687 get_deref_offset(ctx
, instr
->variables
[0], vs_in
, NULL
, NULL
,
1688 &const_index
, &indir_index
);
1690 if (instr
->dest
.ssa
.bit_size
== 64)
1693 switch (instr
->variables
[0]->var
->data
.mode
) {
1694 case nir_var_shader_in
:
1695 if (ctx
->stage
== MESA_SHADER_TESS_CTRL
||
1696 ctx
->stage
== MESA_SHADER_TESS_EVAL
) {
1697 return load_tess_varyings(ctx
, instr
, true);
1700 if (ctx
->stage
== MESA_SHADER_GEOMETRY
) {
1701 LLVMTypeRef type
= LLVMIntTypeInContext(ctx
->ac
.context
, instr
->dest
.ssa
.bit_size
);
1702 LLVMValueRef indir_index
;
1703 unsigned const_index
, vertex_index
;
1704 get_deref_offset(ctx
, instr
->variables
[0],
1705 false, &vertex_index
, NULL
,
1706 &const_index
, &indir_index
);
1708 return ctx
->abi
->load_inputs(ctx
->abi
, instr
->variables
[0]->var
->data
.location
,
1709 instr
->variables
[0]->var
->data
.driver_location
,
1710 instr
->variables
[0]->var
->data
.location_frac
,
1711 instr
->num_components
, vertex_index
, const_index
, type
);
1714 for (unsigned chan
= comp
; chan
< ve
+ comp
; chan
++) {
1716 unsigned count
= glsl_count_attribute_slots(
1717 instr
->variables
[0]->var
->type
,
1718 ctx
->stage
== MESA_SHADER_VERTEX
);
1720 LLVMValueRef tmp_vec
= ac_build_gather_values_extended(
1721 &ctx
->ac
, ctx
->abi
->inputs
+ idx
+ chan
, count
,
1722 stride
, false, true);
1724 values
[chan
] = LLVMBuildExtractElement(ctx
->ac
.builder
,
1728 values
[chan
] = ctx
->abi
->inputs
[idx
+ chan
+ const_index
* stride
];
1732 for (unsigned chan
= 0; chan
< ve
; chan
++) {
1734 unsigned count
= glsl_count_attribute_slots(
1735 instr
->variables
[0]->var
->type
, false);
1737 LLVMValueRef tmp_vec
= ac_build_gather_values_extended(
1738 &ctx
->ac
, ctx
->locals
+ idx
+ chan
, count
,
1739 stride
, true, true);
1741 values
[chan
] = LLVMBuildExtractElement(ctx
->ac
.builder
,
1745 values
[chan
] = LLVMBuildLoad(ctx
->ac
.builder
, ctx
->locals
[idx
+ chan
+ const_index
* stride
], "");
1749 case nir_var_shared
: {
1750 LLVMValueRef address
= build_gep_for_deref(ctx
,
1751 instr
->variables
[0]);
1752 LLVMValueRef val
= LLVMBuildLoad(ctx
->ac
.builder
, address
, "");
1753 return LLVMBuildBitCast(ctx
->ac
.builder
, val
,
1754 get_def_type(ctx
, &instr
->dest
.ssa
),
1757 case nir_var_shader_out
:
1758 if (ctx
->stage
== MESA_SHADER_TESS_CTRL
) {
1759 return load_tess_varyings(ctx
, instr
, false);
1762 for (unsigned chan
= comp
; chan
< ve
+ comp
; chan
++) {
1764 unsigned count
= glsl_count_attribute_slots(
1765 instr
->variables
[0]->var
->type
, false);
1767 LLVMValueRef tmp_vec
= ac_build_gather_values_extended(
1768 &ctx
->ac
, ctx
->abi
->outputs
+ idx
+ chan
, count
,
1769 stride
, true, true);
1771 values
[chan
] = LLVMBuildExtractElement(ctx
->ac
.builder
,
1775 values
[chan
] = LLVMBuildLoad(ctx
->ac
.builder
,
1776 ctx
->abi
->outputs
[idx
+ chan
+ const_index
* stride
],
1782 unreachable("unhandle variable mode");
1784 ret
= ac_build_varying_gather_values(&ctx
->ac
, values
, ve
, comp
);
1785 return LLVMBuildBitCast(ctx
->ac
.builder
, ret
, get_def_type(ctx
, &instr
->dest
.ssa
), "");
1789 visit_store_var(struct ac_nir_context
*ctx
,
1790 nir_intrinsic_instr
*instr
)
1792 LLVMValueRef temp_ptr
, value
;
1793 int idx
= instr
->variables
[0]->var
->data
.driver_location
;
1794 unsigned comp
= instr
->variables
[0]->var
->data
.location_frac
;
1795 LLVMValueRef src
= ac_to_float(&ctx
->ac
, get_src(ctx
, instr
->src
[0]));
1796 int writemask
= instr
->const_index
[0];
1797 LLVMValueRef indir_index
;
1798 unsigned const_index
;
1799 get_deref_offset(ctx
, instr
->variables
[0], false,
1800 NULL
, NULL
, &const_index
, &indir_index
);
1802 if (ac_get_elem_bits(&ctx
->ac
, LLVMTypeOf(src
)) == 64) {
1804 src
= LLVMBuildBitCast(ctx
->ac
.builder
, src
,
1805 LLVMVectorType(ctx
->ac
.f32
, ac_get_llvm_num_components(src
) * 2),
1808 writemask
= widen_mask(writemask
, 2);
1811 writemask
= writemask
<< comp
;
1813 switch (instr
->variables
[0]->var
->data
.mode
) {
1814 case nir_var_shader_out
:
1816 if (ctx
->stage
== MESA_SHADER_TESS_CTRL
) {
1817 LLVMValueRef vertex_index
= NULL
;
1818 LLVMValueRef indir_index
= NULL
;
1819 unsigned const_index
= 0;
1820 const bool is_patch
= instr
->variables
[0]->var
->data
.patch
;
1822 get_deref_offset(ctx
, instr
->variables
[0],
1823 false, NULL
, is_patch
? NULL
: &vertex_index
,
1824 &const_index
, &indir_index
);
1826 ctx
->abi
->store_tcs_outputs(ctx
->abi
, instr
->variables
[0]->var
,
1827 vertex_index
, indir_index
,
1828 const_index
, src
, writemask
);
1832 for (unsigned chan
= 0; chan
< 8; chan
++) {
1834 if (!(writemask
& (1 << chan
)))
1837 value
= ac_llvm_extract_elem(&ctx
->ac
, src
, chan
- comp
);
1839 if (instr
->variables
[0]->var
->data
.compact
)
1842 unsigned count
= glsl_count_attribute_slots(
1843 instr
->variables
[0]->var
->type
, false);
1845 LLVMValueRef tmp_vec
= ac_build_gather_values_extended(
1846 &ctx
->ac
, ctx
->abi
->outputs
+ idx
+ chan
, count
,
1847 stride
, true, true);
1849 tmp_vec
= LLVMBuildInsertElement(ctx
->ac
.builder
, tmp_vec
,
1850 value
, indir_index
, "");
1851 build_store_values_extended(&ctx
->ac
, ctx
->abi
->outputs
+ idx
+ chan
,
1852 count
, stride
, tmp_vec
);
1855 temp_ptr
= ctx
->abi
->outputs
[idx
+ chan
+ const_index
* stride
];
1857 LLVMBuildStore(ctx
->ac
.builder
, value
, temp_ptr
);
1862 for (unsigned chan
= 0; chan
< 8; chan
++) {
1863 if (!(writemask
& (1 << chan
)))
1866 value
= ac_llvm_extract_elem(&ctx
->ac
, src
, chan
);
1868 unsigned count
= glsl_count_attribute_slots(
1869 instr
->variables
[0]->var
->type
, false);
1871 LLVMValueRef tmp_vec
= ac_build_gather_values_extended(
1872 &ctx
->ac
, ctx
->locals
+ idx
+ chan
, count
,
1875 tmp_vec
= LLVMBuildInsertElement(ctx
->ac
.builder
, tmp_vec
,
1876 value
, indir_index
, "");
1877 build_store_values_extended(&ctx
->ac
, ctx
->locals
+ idx
+ chan
,
1880 temp_ptr
= ctx
->locals
[idx
+ chan
+ const_index
* 4];
1882 LLVMBuildStore(ctx
->ac
.builder
, value
, temp_ptr
);
1886 case nir_var_shared
: {
1887 int writemask
= instr
->const_index
[0];
1888 LLVMValueRef address
= build_gep_for_deref(ctx
,
1889 instr
->variables
[0]);
1890 LLVMValueRef val
= get_src(ctx
, instr
->src
[0]);
1891 unsigned components
=
1892 glsl_get_vector_elements(
1893 nir_deref_tail(&instr
->variables
[0]->deref
)->type
);
1894 if (writemask
== (1 << components
) - 1) {
1895 val
= LLVMBuildBitCast(
1896 ctx
->ac
.builder
, val
,
1897 LLVMGetElementType(LLVMTypeOf(address
)), "");
1898 LLVMBuildStore(ctx
->ac
.builder
, val
, address
);
1900 for (unsigned chan
= 0; chan
< 4; chan
++) {
1901 if (!(writemask
& (1 << chan
)))
1904 LLVMBuildStructGEP(ctx
->ac
.builder
,
1906 LLVMValueRef src
= ac_llvm_extract_elem(&ctx
->ac
, val
,
1908 src
= LLVMBuildBitCast(
1909 ctx
->ac
.builder
, src
,
1910 LLVMGetElementType(LLVMTypeOf(ptr
)), "");
1911 LLVMBuildStore(ctx
->ac
.builder
, src
, ptr
);
1921 static int image_type_to_components_count(enum glsl_sampler_dim dim
, bool array
)
1924 case GLSL_SAMPLER_DIM_BUF
:
1926 case GLSL_SAMPLER_DIM_1D
:
1927 return array
? 2 : 1;
1928 case GLSL_SAMPLER_DIM_2D
:
1929 return array
? 3 : 2;
1930 case GLSL_SAMPLER_DIM_MS
:
1931 return array
? 4 : 3;
1932 case GLSL_SAMPLER_DIM_3D
:
1933 case GLSL_SAMPLER_DIM_CUBE
:
1935 case GLSL_SAMPLER_DIM_RECT
:
1936 case GLSL_SAMPLER_DIM_SUBPASS
:
1938 case GLSL_SAMPLER_DIM_SUBPASS_MS
:
1947 glsl_is_array_image(const struct glsl_type
*type
)
1949 const enum glsl_sampler_dim dim
= glsl_get_sampler_dim(type
);
1951 if (glsl_sampler_type_is_array(type
))
1954 return dim
== GLSL_SAMPLER_DIM_CUBE
||
1955 dim
== GLSL_SAMPLER_DIM_3D
||
1956 dim
== GLSL_SAMPLER_DIM_SUBPASS
||
1957 dim
== GLSL_SAMPLER_DIM_SUBPASS_MS
;
1961 /* Adjust the sample index according to FMASK.
1963 * For uncompressed MSAA surfaces, FMASK should return 0x76543210,
1964 * which is the identity mapping. Each nibble says which physical sample
1965 * should be fetched to get that sample.
1967 * For example, 0x11111100 means there are only 2 samples stored and
1968 * the second sample covers 3/4 of the pixel. When reading samples 0
1969 * and 1, return physical sample 0 (determined by the first two 0s
1970 * in FMASK), otherwise return physical sample 1.
1972 * The sample index should be adjusted as follows:
1973 * sample_index = (fmask >> (sample_index * 4)) & 0xF;
1975 static LLVMValueRef
adjust_sample_index_using_fmask(struct ac_llvm_context
*ctx
,
1976 LLVMValueRef coord_x
, LLVMValueRef coord_y
,
1977 LLVMValueRef coord_z
,
1978 LLVMValueRef sample_index
,
1979 LLVMValueRef fmask_desc_ptr
)
1981 LLVMValueRef fmask_load_address
[4];
1984 fmask_load_address
[0] = coord_x
;
1985 fmask_load_address
[1] = coord_y
;
1987 fmask_load_address
[2] = coord_z
;
1988 fmask_load_address
[3] = LLVMGetUndef(ctx
->i32
);
1991 struct ac_image_args args
= {0};
1993 args
.opcode
= ac_image_load
;
1994 args
.da
= coord_z
? true : false;
1995 args
.resource
= fmask_desc_ptr
;
1997 args
.addr
= ac_build_gather_values(ctx
, fmask_load_address
, coord_z
? 4 : 2);
1999 res
= ac_build_image_opcode(ctx
, &args
);
2001 res
= ac_to_integer(ctx
, res
);
2002 LLVMValueRef four
= LLVMConstInt(ctx
->i32
, 4, false);
2003 LLVMValueRef F
= LLVMConstInt(ctx
->i32
, 0xf, false);
2005 LLVMValueRef fmask
= LLVMBuildExtractElement(ctx
->builder
,
2009 LLVMValueRef sample_index4
=
2010 LLVMBuildMul(ctx
->builder
, sample_index
, four
, "");
2011 LLVMValueRef shifted_fmask
=
2012 LLVMBuildLShr(ctx
->builder
, fmask
, sample_index4
, "");
2013 LLVMValueRef final_sample
=
2014 LLVMBuildAnd(ctx
->builder
, shifted_fmask
, F
, "");
2016 /* Don't rewrite the sample index if WORD1.DATA_FORMAT of the FMASK
2017 * resource descriptor is 0 (invalid),
2019 LLVMValueRef fmask_desc
=
2020 LLVMBuildBitCast(ctx
->builder
, fmask_desc_ptr
,
2023 LLVMValueRef fmask_word1
=
2024 LLVMBuildExtractElement(ctx
->builder
, fmask_desc
,
2027 LLVMValueRef word1_is_nonzero
=
2028 LLVMBuildICmp(ctx
->builder
, LLVMIntNE
,
2029 fmask_word1
, ctx
->i32_0
, "");
2031 /* Replace the MSAA sample index. */
2033 LLVMBuildSelect(ctx
->builder
, word1_is_nonzero
,
2034 final_sample
, sample_index
, "");
2035 return sample_index
;
2038 static LLVMValueRef
get_image_coords(struct ac_nir_context
*ctx
,
2039 const nir_intrinsic_instr
*instr
)
2041 const struct glsl_type
*type
= glsl_without_array(instr
->variables
[0]->var
->type
);
2043 LLVMValueRef src0
= get_src(ctx
, instr
->src
[0]);
2044 LLVMValueRef coords
[4];
2045 LLVMValueRef masks
[] = {
2046 LLVMConstInt(ctx
->ac
.i32
, 0, false), LLVMConstInt(ctx
->ac
.i32
, 1, false),
2047 LLVMConstInt(ctx
->ac
.i32
, 2, false), LLVMConstInt(ctx
->ac
.i32
, 3, false),
2050 LLVMValueRef sample_index
= ac_llvm_extract_elem(&ctx
->ac
, get_src(ctx
, instr
->src
[1]), 0);
2053 enum glsl_sampler_dim dim
= glsl_get_sampler_dim(type
);
2054 bool is_array
= glsl_sampler_type_is_array(type
);
2055 bool add_frag_pos
= (dim
== GLSL_SAMPLER_DIM_SUBPASS
||
2056 dim
== GLSL_SAMPLER_DIM_SUBPASS_MS
);
2057 bool is_ms
= (dim
== GLSL_SAMPLER_DIM_MS
||
2058 dim
== GLSL_SAMPLER_DIM_SUBPASS_MS
);
2059 bool gfx9_1d
= ctx
->ac
.chip_class
>= GFX9
&& dim
== GLSL_SAMPLER_DIM_1D
;
2060 count
= image_type_to_components_count(dim
, is_array
);
2063 LLVMValueRef fmask_load_address
[3];
2066 fmask_load_address
[0] = LLVMBuildExtractElement(ctx
->ac
.builder
, src0
, masks
[0], "");
2067 fmask_load_address
[1] = LLVMBuildExtractElement(ctx
->ac
.builder
, src0
, masks
[1], "");
2069 fmask_load_address
[2] = LLVMBuildExtractElement(ctx
->ac
.builder
, src0
, masks
[2], "");
2071 fmask_load_address
[2] = NULL
;
2073 for (chan
= 0; chan
< 2; ++chan
)
2074 fmask_load_address
[chan
] =
2075 LLVMBuildAdd(ctx
->ac
.builder
, fmask_load_address
[chan
],
2076 LLVMBuildFPToUI(ctx
->ac
.builder
, ctx
->abi
->frag_pos
[chan
],
2077 ctx
->ac
.i32
, ""), "");
2078 fmask_load_address
[2] = ac_to_integer(&ctx
->ac
, ctx
->abi
->inputs
[ac_llvm_reg_index_soa(VARYING_SLOT_LAYER
, 0)]);
2080 sample_index
= adjust_sample_index_using_fmask(&ctx
->ac
,
2081 fmask_load_address
[0],
2082 fmask_load_address
[1],
2083 fmask_load_address
[2],
2085 get_sampler_desc(ctx
, instr
->variables
[0], AC_DESC_FMASK
, NULL
, true, false));
2087 if (count
== 1 && !gfx9_1d
) {
2088 if (instr
->src
[0].ssa
->num_components
)
2089 res
= LLVMBuildExtractElement(ctx
->ac
.builder
, src0
, masks
[0], "");
2096 for (chan
= 0; chan
< count
; ++chan
) {
2097 coords
[chan
] = ac_llvm_extract_elem(&ctx
->ac
, src0
, chan
);
2100 for (chan
= 0; chan
< 2; ++chan
)
2101 coords
[chan
] = LLVMBuildAdd(ctx
->ac
.builder
, coords
[chan
], LLVMBuildFPToUI(ctx
->ac
.builder
, ctx
->abi
->frag_pos
[chan
],
2102 ctx
->ac
.i32
, ""), "");
2103 coords
[2] = ac_to_integer(&ctx
->ac
, ctx
->abi
->inputs
[ac_llvm_reg_index_soa(VARYING_SLOT_LAYER
, 0)]);
2109 coords
[2] = coords
[1];
2110 coords
[1] = ctx
->ac
.i32_0
;
2112 coords
[1] = ctx
->ac
.i32_0
;
2117 coords
[count
] = sample_index
;
2122 coords
[3] = LLVMGetUndef(ctx
->ac
.i32
);
2125 res
= ac_build_gather_values(&ctx
->ac
, coords
, count
);
2130 static LLVMValueRef
visit_image_load(struct ac_nir_context
*ctx
,
2131 const nir_intrinsic_instr
*instr
)
2133 LLVMValueRef params
[7];
2135 char intrinsic_name
[64];
2136 const nir_variable
*var
= instr
->variables
[0]->var
;
2137 const struct glsl_type
*type
= var
->type
;
2139 if(instr
->variables
[0]->deref
.child
)
2140 type
= instr
->variables
[0]->deref
.child
->type
;
2142 type
= glsl_without_array(type
);
2144 const enum glsl_sampler_dim dim
= glsl_get_sampler_dim(type
);
2145 if (dim
== GLSL_SAMPLER_DIM_BUF
) {
2146 unsigned mask
= nir_ssa_def_components_read(&instr
->dest
.ssa
);
2147 unsigned num_channels
= util_last_bit(mask
);
2148 LLVMValueRef rsrc
, vindex
;
2150 rsrc
= get_sampler_desc(ctx
, instr
->variables
[0], AC_DESC_BUFFER
, NULL
, true, false);
2151 vindex
= LLVMBuildExtractElement(ctx
->ac
.builder
, get_src(ctx
, instr
->src
[0]),
2154 /* TODO: set "glc" and "can_speculate" when OpenGL needs it. */
2155 res
= ac_build_buffer_load_format(&ctx
->ac
, rsrc
, vindex
,
2156 ctx
->ac
.i32_0
, num_channels
,
2158 res
= ac_build_expand_to_vec4(&ctx
->ac
, res
, num_channels
);
2160 res
= ac_trim_vector(&ctx
->ac
, res
, instr
->dest
.ssa
.num_components
);
2161 res
= ac_to_integer(&ctx
->ac
, res
);
2163 LLVMValueRef da
= glsl_is_array_image(type
) ? ctx
->ac
.i1true
: ctx
->ac
.i1false
;
2164 LLVMValueRef slc
= ctx
->ac
.i1false
;
2166 params
[0] = get_image_coords(ctx
, instr
);
2167 params
[1] = get_sampler_desc(ctx
, instr
->variables
[0], AC_DESC_IMAGE
, NULL
, true, false);
2168 params
[2] = LLVMConstInt(ctx
->ac
.i32
, 15, false); /* dmask */
2169 params
[3] = (var
->data
.image
._volatile
|| var
->data
.image
.coherent
) ?
2170 ctx
->ac
.i1true
: ctx
->ac
.i1false
;
2172 params
[5] = ctx
->ac
.i1false
;
2175 ac_get_image_intr_name("llvm.amdgcn.image.load",
2176 ctx
->ac
.v4f32
, /* vdata */
2177 LLVMTypeOf(params
[0]), /* coords */
2178 LLVMTypeOf(params
[1]), /* rsrc */
2179 intrinsic_name
, sizeof(intrinsic_name
));
2181 res
= ac_build_intrinsic(&ctx
->ac
, intrinsic_name
, ctx
->ac
.v4f32
,
2182 params
, 7, AC_FUNC_ATTR_READONLY
);
2184 return ac_to_integer(&ctx
->ac
, res
);
2187 static void visit_image_store(struct ac_nir_context
*ctx
,
2188 nir_intrinsic_instr
*instr
)
2190 LLVMValueRef params
[8];
2191 char intrinsic_name
[64];
2192 const nir_variable
*var
= instr
->variables
[0]->var
;
2193 const struct glsl_type
*type
= glsl_without_array(var
->type
);
2194 const enum glsl_sampler_dim dim
= glsl_get_sampler_dim(type
);
2195 LLVMValueRef glc
= ctx
->ac
.i1false
;
2196 bool force_glc
= ctx
->ac
.chip_class
== SI
;
2198 glc
= ctx
->ac
.i1true
;
2200 if (dim
== GLSL_SAMPLER_DIM_BUF
) {
2201 params
[0] = ac_to_float(&ctx
->ac
, get_src(ctx
, instr
->src
[2])); /* data */
2202 params
[1] = get_sampler_desc(ctx
, instr
->variables
[0], AC_DESC_BUFFER
, NULL
, true, true);
2203 params
[2] = LLVMBuildExtractElement(ctx
->ac
.builder
, get_src(ctx
, instr
->src
[0]),
2204 ctx
->ac
.i32_0
, ""); /* vindex */
2205 params
[3] = ctx
->ac
.i32_0
; /* voffset */
2206 params
[4] = glc
; /* glc */
2207 params
[5] = ctx
->ac
.i1false
; /* slc */
2208 ac_build_intrinsic(&ctx
->ac
, "llvm.amdgcn.buffer.store.format.v4f32", ctx
->ac
.voidt
,
2211 LLVMValueRef da
= glsl_is_array_image(type
) ? ctx
->ac
.i1true
: ctx
->ac
.i1false
;
2212 LLVMValueRef slc
= ctx
->ac
.i1false
;
2214 params
[0] = ac_to_float(&ctx
->ac
, get_src(ctx
, instr
->src
[2]));
2215 params
[1] = get_image_coords(ctx
, instr
); /* coords */
2216 params
[2] = get_sampler_desc(ctx
, instr
->variables
[0], AC_DESC_IMAGE
, NULL
, true, true);
2217 params
[3] = LLVMConstInt(ctx
->ac
.i32
, 15, false); /* dmask */
2218 params
[4] = (force_glc
|| var
->data
.image
._volatile
|| var
->data
.image
.coherent
) ?
2219 ctx
->ac
.i1true
: ctx
->ac
.i1false
;
2221 params
[6] = ctx
->ac
.i1false
;
2224 ac_get_image_intr_name("llvm.amdgcn.image.store",
2225 LLVMTypeOf(params
[0]), /* vdata */
2226 LLVMTypeOf(params
[1]), /* coords */
2227 LLVMTypeOf(params
[2]), /* rsrc */
2228 intrinsic_name
, sizeof(intrinsic_name
));
2230 ac_build_intrinsic(&ctx
->ac
, intrinsic_name
, ctx
->ac
.voidt
,
2236 static LLVMValueRef
visit_image_atomic(struct ac_nir_context
*ctx
,
2237 const nir_intrinsic_instr
*instr
)
2239 LLVMValueRef params
[7];
2240 int param_count
= 0;
2241 const nir_variable
*var
= instr
->variables
[0]->var
;
2243 const char *atomic_name
;
2244 char intrinsic_name
[41];
2245 const struct glsl_type
*type
= glsl_without_array(var
->type
);
2246 MAYBE_UNUSED
int length
;
2248 bool is_unsigned
= glsl_get_sampler_result_type(type
) == GLSL_TYPE_UINT
;
2250 switch (instr
->intrinsic
) {
2251 case nir_intrinsic_image_var_atomic_add
:
2252 atomic_name
= "add";
2254 case nir_intrinsic_image_var_atomic_min
:
2255 atomic_name
= is_unsigned
? "umin" : "smin";
2257 case nir_intrinsic_image_var_atomic_max
:
2258 atomic_name
= is_unsigned
? "umax" : "smax";
2260 case nir_intrinsic_image_var_atomic_and
:
2261 atomic_name
= "and";
2263 case nir_intrinsic_image_var_atomic_or
:
2266 case nir_intrinsic_image_var_atomic_xor
:
2267 atomic_name
= "xor";
2269 case nir_intrinsic_image_var_atomic_exchange
:
2270 atomic_name
= "swap";
2272 case nir_intrinsic_image_var_atomic_comp_swap
:
2273 atomic_name
= "cmpswap";
2279 if (instr
->intrinsic
== nir_intrinsic_image_var_atomic_comp_swap
)
2280 params
[param_count
++] = get_src(ctx
, instr
->src
[3]);
2281 params
[param_count
++] = get_src(ctx
, instr
->src
[2]);
2283 if (glsl_get_sampler_dim(type
) == GLSL_SAMPLER_DIM_BUF
) {
2284 params
[param_count
++] = get_sampler_desc(ctx
, instr
->variables
[0], AC_DESC_BUFFER
,
2286 params
[param_count
++] = LLVMBuildExtractElement(ctx
->ac
.builder
, get_src(ctx
, instr
->src
[0]),
2287 ctx
->ac
.i32_0
, ""); /* vindex */
2288 params
[param_count
++] = ctx
->ac
.i32_0
; /* voffset */
2289 params
[param_count
++] = ctx
->ac
.i1false
; /* slc */
2291 length
= snprintf(intrinsic_name
, sizeof(intrinsic_name
),
2292 "llvm.amdgcn.buffer.atomic.%s", atomic_name
);
2294 char coords_type
[8];
2296 LLVMValueRef coords
= params
[param_count
++] = get_image_coords(ctx
, instr
);
2297 params
[param_count
++] = get_sampler_desc(ctx
, instr
->variables
[0], AC_DESC_IMAGE
,
2299 params
[param_count
++] = ctx
->ac
.i1false
; /* r128 */
2300 params
[param_count
++] = glsl_is_array_image(type
) ? ctx
->ac
.i1true
: ctx
->ac
.i1false
; /* da */
2301 params
[param_count
++] = ctx
->ac
.i1false
; /* slc */
2303 build_int_type_name(LLVMTypeOf(coords
),
2304 coords_type
, sizeof(coords_type
));
2306 length
= snprintf(intrinsic_name
, sizeof(intrinsic_name
),
2307 "llvm.amdgcn.image.atomic.%s.%s", atomic_name
, coords_type
);
2310 assert(length
< sizeof(intrinsic_name
));
2311 return ac_build_intrinsic(&ctx
->ac
, intrinsic_name
, ctx
->ac
.i32
, params
, param_count
, 0);
2314 static LLVMValueRef
visit_image_samples(struct ac_nir_context
*ctx
,
2315 const nir_intrinsic_instr
*instr
)
2317 const nir_variable
*var
= instr
->variables
[0]->var
;
2318 const struct glsl_type
*type
= glsl_without_array(var
->type
);
2320 struct ac_image_args args
= { 0 };
2321 args
.da
= glsl_is_array_image(type
);
2323 args
.resource
= get_sampler_desc(ctx
, instr
->variables
[0],
2324 AC_DESC_IMAGE
, NULL
, true, false);
2325 args
.opcode
= ac_image_get_resinfo
;
2326 args
.addr
= ctx
->ac
.i32_0
;
2328 return ac_build_image_opcode(&ctx
->ac
, &args
);
2331 static LLVMValueRef
visit_image_size(struct ac_nir_context
*ctx
,
2332 const nir_intrinsic_instr
*instr
)
2335 const nir_variable
*var
= instr
->variables
[0]->var
;
2336 const struct glsl_type
*type
= glsl_without_array(var
->type
);
2338 if (glsl_get_sampler_dim(type
) == GLSL_SAMPLER_DIM_BUF
)
2339 return get_buffer_size(ctx
,
2340 get_sampler_desc(ctx
, instr
->variables
[0],
2341 AC_DESC_BUFFER
, NULL
, true, false), true);
2343 struct ac_image_args args
= { 0 };
2345 args
.da
= glsl_is_array_image(type
);
2347 args
.resource
= get_sampler_desc(ctx
, instr
->variables
[0], AC_DESC_IMAGE
, NULL
, true, false);
2348 args
.opcode
= ac_image_get_resinfo
;
2349 args
.addr
= ctx
->ac
.i32_0
;
2351 res
= ac_build_image_opcode(&ctx
->ac
, &args
);
2353 LLVMValueRef two
= LLVMConstInt(ctx
->ac
.i32
, 2, false);
2355 if (glsl_get_sampler_dim(type
) == GLSL_SAMPLER_DIM_CUBE
&&
2356 glsl_sampler_type_is_array(type
)) {
2357 LLVMValueRef six
= LLVMConstInt(ctx
->ac
.i32
, 6, false);
2358 LLVMValueRef z
= LLVMBuildExtractElement(ctx
->ac
.builder
, res
, two
, "");
2359 z
= LLVMBuildSDiv(ctx
->ac
.builder
, z
, six
, "");
2360 res
= LLVMBuildInsertElement(ctx
->ac
.builder
, res
, z
, two
, "");
2362 if (ctx
->ac
.chip_class
>= GFX9
&&
2363 glsl_get_sampler_dim(type
) == GLSL_SAMPLER_DIM_1D
&&
2364 glsl_sampler_type_is_array(type
)) {
2365 LLVMValueRef layers
= LLVMBuildExtractElement(ctx
->ac
.builder
, res
, two
, "");
2366 res
= LLVMBuildInsertElement(ctx
->ac
.builder
, res
, layers
,
2373 #define NOOP_WAITCNT 0xf7f
2374 #define LGKM_CNT 0x07f
2375 #define VM_CNT 0xf70
2377 static void emit_membar(struct ac_llvm_context
*ac
,
2378 const nir_intrinsic_instr
*instr
)
2380 unsigned waitcnt
= NOOP_WAITCNT
;
2382 switch (instr
->intrinsic
) {
2383 case nir_intrinsic_memory_barrier
:
2384 case nir_intrinsic_group_memory_barrier
:
2385 waitcnt
&= VM_CNT
& LGKM_CNT
;
2387 case nir_intrinsic_memory_barrier_atomic_counter
:
2388 case nir_intrinsic_memory_barrier_buffer
:
2389 case nir_intrinsic_memory_barrier_image
:
2392 case nir_intrinsic_memory_barrier_shared
:
2393 waitcnt
&= LGKM_CNT
;
2398 if (waitcnt
!= NOOP_WAITCNT
)
2399 ac_build_waitcnt(ac
, waitcnt
);
2402 void ac_emit_barrier(struct ac_llvm_context
*ac
, gl_shader_stage stage
)
2404 /* SI only (thanks to a hw bug workaround):
2405 * The real barrier instruction isn’t needed, because an entire patch
2406 * always fits into a single wave.
2408 if (ac
->chip_class
== SI
&& stage
== MESA_SHADER_TESS_CTRL
) {
2409 ac_build_waitcnt(ac
, LGKM_CNT
& VM_CNT
);
2412 ac_build_intrinsic(ac
, "llvm.amdgcn.s.barrier",
2413 ac
->voidt
, NULL
, 0, AC_FUNC_ATTR_CONVERGENT
);
2416 static void emit_discard(struct ac_nir_context
*ctx
,
2417 const nir_intrinsic_instr
*instr
)
2421 if (instr
->intrinsic
== nir_intrinsic_discard_if
) {
2422 cond
= LLVMBuildICmp(ctx
->ac
.builder
, LLVMIntEQ
,
2423 get_src(ctx
, instr
->src
[0]),
2426 assert(instr
->intrinsic
== nir_intrinsic_discard
);
2427 cond
= LLVMConstInt(ctx
->ac
.i1
, false, 0);
2430 ctx
->abi
->emit_kill(ctx
->abi
, cond
);
2434 visit_load_helper_invocation(struct ac_nir_context
*ctx
)
2436 LLVMValueRef result
= ac_build_intrinsic(&ctx
->ac
,
2437 "llvm.amdgcn.ps.live",
2438 ctx
->ac
.i1
, NULL
, 0,
2439 AC_FUNC_ATTR_READNONE
);
2440 result
= LLVMBuildNot(ctx
->ac
.builder
, result
, "");
2441 return LLVMBuildSExt(ctx
->ac
.builder
, result
, ctx
->ac
.i32
, "");
2445 visit_load_local_invocation_index(struct ac_nir_context
*ctx
)
2447 LLVMValueRef result
;
2448 LLVMValueRef thread_id
= ac_get_thread_id(&ctx
->ac
);
2449 result
= LLVMBuildAnd(ctx
->ac
.builder
, ctx
->abi
->tg_size
,
2450 LLVMConstInt(ctx
->ac
.i32
, 0xfc0, false), "");
2452 return LLVMBuildAdd(ctx
->ac
.builder
, result
, thread_id
, "");
2456 visit_load_subgroup_id(struct ac_nir_context
*ctx
)
2458 if (ctx
->stage
== MESA_SHADER_COMPUTE
) {
2459 LLVMValueRef result
;
2460 result
= LLVMBuildAnd(ctx
->ac
.builder
, ctx
->abi
->tg_size
,
2461 LLVMConstInt(ctx
->ac
.i32
, 0xfc0, false), "");
2462 return LLVMBuildLShr(ctx
->ac
.builder
, result
, LLVMConstInt(ctx
->ac
.i32
, 6, false), "");
2464 return LLVMConstInt(ctx
->ac
.i32
, 0, false);
2469 visit_load_num_subgroups(struct ac_nir_context
*ctx
)
2471 if (ctx
->stage
== MESA_SHADER_COMPUTE
) {
2472 return LLVMBuildAnd(ctx
->ac
.builder
, ctx
->abi
->tg_size
,
2473 LLVMConstInt(ctx
->ac
.i32
, 0x3f, false), "");
2475 return LLVMConstInt(ctx
->ac
.i32
, 1, false);
2480 visit_first_invocation(struct ac_nir_context
*ctx
)
2482 LLVMValueRef active_set
= ac_build_ballot(&ctx
->ac
, ctx
->ac
.i32_1
);
2484 /* The second argument is whether cttz(0) should be defined, but we do not care. */
2485 LLVMValueRef args
[] = {active_set
, LLVMConstInt(ctx
->ac
.i1
, 0, false)};
2486 LLVMValueRef result
= ac_build_intrinsic(&ctx
->ac
,
2488 ctx
->ac
.i64
, args
, 2,
2489 AC_FUNC_ATTR_NOUNWIND
|
2490 AC_FUNC_ATTR_READNONE
);
2492 return LLVMBuildTrunc(ctx
->ac
.builder
, result
, ctx
->ac
.i32
, "");
2496 visit_load_shared(struct ac_nir_context
*ctx
,
2497 const nir_intrinsic_instr
*instr
)
2499 LLVMValueRef values
[4], derived_ptr
, index
, ret
;
2501 LLVMValueRef ptr
= get_memory_ptr(ctx
, instr
->src
[0]);
2503 for (int chan
= 0; chan
< instr
->num_components
; chan
++) {
2504 index
= LLVMConstInt(ctx
->ac
.i32
, chan
, 0);
2505 derived_ptr
= LLVMBuildGEP(ctx
->ac
.builder
, ptr
, &index
, 1, "");
2506 values
[chan
] = LLVMBuildLoad(ctx
->ac
.builder
, derived_ptr
, "");
2509 ret
= ac_build_gather_values(&ctx
->ac
, values
, instr
->num_components
);
2510 return LLVMBuildBitCast(ctx
->ac
.builder
, ret
, get_def_type(ctx
, &instr
->dest
.ssa
), "");
2514 visit_store_shared(struct ac_nir_context
*ctx
,
2515 const nir_intrinsic_instr
*instr
)
2517 LLVMValueRef derived_ptr
, data
,index
;
2518 LLVMBuilderRef builder
= ctx
->ac
.builder
;
2520 LLVMValueRef ptr
= get_memory_ptr(ctx
, instr
->src
[1]);
2521 LLVMValueRef src
= get_src(ctx
, instr
->src
[0]);
2523 int writemask
= nir_intrinsic_write_mask(instr
);
2524 for (int chan
= 0; chan
< 4; chan
++) {
2525 if (!(writemask
& (1 << chan
))) {
2528 data
= ac_llvm_extract_elem(&ctx
->ac
, src
, chan
);
2529 index
= LLVMConstInt(ctx
->ac
.i32
, chan
, 0);
2530 derived_ptr
= LLVMBuildGEP(builder
, ptr
, &index
, 1, "");
2531 LLVMBuildStore(builder
, data
, derived_ptr
);
2535 static LLVMValueRef
visit_var_atomic(struct ac_nir_context
*ctx
,
2536 const nir_intrinsic_instr
*instr
,
2537 LLVMValueRef ptr
, int src_idx
)
2539 LLVMValueRef result
;
2540 LLVMValueRef src
= get_src(ctx
, instr
->src
[src_idx
]);
2542 if (instr
->intrinsic
== nir_intrinsic_var_atomic_comp_swap
||
2543 instr
->intrinsic
== nir_intrinsic_shared_atomic_comp_swap
) {
2544 LLVMValueRef src1
= get_src(ctx
, instr
->src
[src_idx
+ 1]);
2545 result
= LLVMBuildAtomicCmpXchg(ctx
->ac
.builder
,
2547 LLVMAtomicOrderingSequentiallyConsistent
,
2548 LLVMAtomicOrderingSequentiallyConsistent
,
2551 LLVMAtomicRMWBinOp op
;
2552 switch (instr
->intrinsic
) {
2553 case nir_intrinsic_var_atomic_add
:
2554 case nir_intrinsic_shared_atomic_add
:
2555 op
= LLVMAtomicRMWBinOpAdd
;
2557 case nir_intrinsic_var_atomic_umin
:
2558 case nir_intrinsic_shared_atomic_umin
:
2559 op
= LLVMAtomicRMWBinOpUMin
;
2561 case nir_intrinsic_var_atomic_umax
:
2562 case nir_intrinsic_shared_atomic_umax
:
2563 op
= LLVMAtomicRMWBinOpUMax
;
2565 case nir_intrinsic_var_atomic_imin
:
2566 case nir_intrinsic_shared_atomic_imin
:
2567 op
= LLVMAtomicRMWBinOpMin
;
2569 case nir_intrinsic_var_atomic_imax
:
2570 case nir_intrinsic_shared_atomic_imax
:
2571 op
= LLVMAtomicRMWBinOpMax
;
2573 case nir_intrinsic_var_atomic_and
:
2574 case nir_intrinsic_shared_atomic_and
:
2575 op
= LLVMAtomicRMWBinOpAnd
;
2577 case nir_intrinsic_var_atomic_or
:
2578 case nir_intrinsic_shared_atomic_or
:
2579 op
= LLVMAtomicRMWBinOpOr
;
2581 case nir_intrinsic_var_atomic_xor
:
2582 case nir_intrinsic_shared_atomic_xor
:
2583 op
= LLVMAtomicRMWBinOpXor
;
2585 case nir_intrinsic_var_atomic_exchange
:
2586 case nir_intrinsic_shared_atomic_exchange
:
2587 op
= LLVMAtomicRMWBinOpXchg
;
2593 result
= LLVMBuildAtomicRMW(ctx
->ac
.builder
, op
, ptr
, ac_to_integer(&ctx
->ac
, src
),
2594 LLVMAtomicOrderingSequentiallyConsistent
,
2600 static LLVMValueRef
load_sample_pos(struct ac_nir_context
*ctx
)
2602 LLVMValueRef values
[2];
2603 LLVMValueRef pos
[2];
2605 pos
[0] = ac_to_float(&ctx
->ac
, ctx
->abi
->frag_pos
[0]);
2606 pos
[1] = ac_to_float(&ctx
->ac
, ctx
->abi
->frag_pos
[1]);
2608 values
[0] = ac_build_fract(&ctx
->ac
, pos
[0], 32);
2609 values
[1] = ac_build_fract(&ctx
->ac
, pos
[1], 32);
2610 return ac_build_gather_values(&ctx
->ac
, values
, 2);
2613 static LLVMValueRef
visit_interp(struct ac_nir_context
*ctx
,
2614 const nir_intrinsic_instr
*instr
)
2616 LLVMValueRef result
[4];
2617 LLVMValueRef interp_param
, attr_number
;
2620 LLVMValueRef src_c0
= NULL
;
2621 LLVMValueRef src_c1
= NULL
;
2622 LLVMValueRef src0
= NULL
;
2623 int input_index
= instr
->variables
[0]->var
->data
.location
- VARYING_SLOT_VAR0
;
2624 switch (instr
->intrinsic
) {
2625 case nir_intrinsic_interp_var_at_centroid
:
2626 location
= INTERP_CENTROID
;
2628 case nir_intrinsic_interp_var_at_sample
:
2629 case nir_intrinsic_interp_var_at_offset
:
2630 location
= INTERP_CENTER
;
2631 src0
= get_src(ctx
, instr
->src
[0]);
2637 if (instr
->intrinsic
== nir_intrinsic_interp_var_at_offset
) {
2638 src_c0
= ac_to_float(&ctx
->ac
, LLVMBuildExtractElement(ctx
->ac
.builder
, src0
, ctx
->ac
.i32_0
, ""));
2639 src_c1
= ac_to_float(&ctx
->ac
, LLVMBuildExtractElement(ctx
->ac
.builder
, src0
, ctx
->ac
.i32_1
, ""));
2640 } else if (instr
->intrinsic
== nir_intrinsic_interp_var_at_sample
) {
2641 LLVMValueRef sample_position
;
2642 LLVMValueRef halfval
= LLVMConstReal(ctx
->ac
.f32
, 0.5f
);
2644 /* fetch sample ID */
2645 sample_position
= ctx
->abi
->load_sample_position(ctx
->abi
, src0
);
2647 src_c0
= LLVMBuildExtractElement(ctx
->ac
.builder
, sample_position
, ctx
->ac
.i32_0
, "");
2648 src_c0
= LLVMBuildFSub(ctx
->ac
.builder
, src_c0
, halfval
, "");
2649 src_c1
= LLVMBuildExtractElement(ctx
->ac
.builder
, sample_position
, ctx
->ac
.i32_1
, "");
2650 src_c1
= LLVMBuildFSub(ctx
->ac
.builder
, src_c1
, halfval
, "");
2652 interp_param
= ctx
->abi
->lookup_interp_param(ctx
->abi
, instr
->variables
[0]->var
->data
.interpolation
, location
);
2653 attr_number
= LLVMConstInt(ctx
->ac
.i32
, input_index
, false);
2655 if (location
== INTERP_CENTER
) {
2656 LLVMValueRef ij_out
[2];
2657 LLVMValueRef ddxy_out
= emit_ddxy_interp(ctx
, interp_param
);
2660 * take the I then J parameters, and the DDX/Y for it, and
2661 * calculate the IJ inputs for the interpolator.
2662 * temp1 = ddx * offset/sample.x + I;
2663 * interp_param.I = ddy * offset/sample.y + temp1;
2664 * temp1 = ddx * offset/sample.x + J;
2665 * interp_param.J = ddy * offset/sample.y + temp1;
2667 for (unsigned i
= 0; i
< 2; i
++) {
2668 LLVMValueRef ix_ll
= LLVMConstInt(ctx
->ac
.i32
, i
, false);
2669 LLVMValueRef iy_ll
= LLVMConstInt(ctx
->ac
.i32
, i
+ 2, false);
2670 LLVMValueRef ddx_el
= LLVMBuildExtractElement(ctx
->ac
.builder
,
2671 ddxy_out
, ix_ll
, "");
2672 LLVMValueRef ddy_el
= LLVMBuildExtractElement(ctx
->ac
.builder
,
2673 ddxy_out
, iy_ll
, "");
2674 LLVMValueRef interp_el
= LLVMBuildExtractElement(ctx
->ac
.builder
,
2675 interp_param
, ix_ll
, "");
2676 LLVMValueRef temp1
, temp2
;
2678 interp_el
= LLVMBuildBitCast(ctx
->ac
.builder
, interp_el
,
2681 temp1
= LLVMBuildFMul(ctx
->ac
.builder
, ddx_el
, src_c0
, "");
2682 temp1
= LLVMBuildFAdd(ctx
->ac
.builder
, temp1
, interp_el
, "");
2684 temp2
= LLVMBuildFMul(ctx
->ac
.builder
, ddy_el
, src_c1
, "");
2685 temp2
= LLVMBuildFAdd(ctx
->ac
.builder
, temp2
, temp1
, "");
2687 ij_out
[i
] = LLVMBuildBitCast(ctx
->ac
.builder
,
2688 temp2
, ctx
->ac
.i32
, "");
2690 interp_param
= ac_build_gather_values(&ctx
->ac
, ij_out
, 2);
2694 for (chan
= 0; chan
< 4; chan
++) {
2695 LLVMValueRef llvm_chan
= LLVMConstInt(ctx
->ac
.i32
, chan
, false);
2698 interp_param
= LLVMBuildBitCast(ctx
->ac
.builder
,
2699 interp_param
, ctx
->ac
.v2f32
, "");
2700 LLVMValueRef i
= LLVMBuildExtractElement(
2701 ctx
->ac
.builder
, interp_param
, ctx
->ac
.i32_0
, "");
2702 LLVMValueRef j
= LLVMBuildExtractElement(
2703 ctx
->ac
.builder
, interp_param
, ctx
->ac
.i32_1
, "");
2705 result
[chan
] = ac_build_fs_interp(&ctx
->ac
,
2706 llvm_chan
, attr_number
,
2707 ctx
->abi
->prim_mask
, i
, j
);
2709 result
[chan
] = ac_build_fs_interp_mov(&ctx
->ac
,
2710 LLVMConstInt(ctx
->ac
.i32
, 2, false),
2711 llvm_chan
, attr_number
,
2712 ctx
->abi
->prim_mask
);
2715 return ac_build_varying_gather_values(&ctx
->ac
, result
, instr
->num_components
,
2716 instr
->variables
[0]->var
->data
.location_frac
);
2719 static void visit_intrinsic(struct ac_nir_context
*ctx
,
2720 nir_intrinsic_instr
*instr
)
2722 LLVMValueRef result
= NULL
;
2724 switch (instr
->intrinsic
) {
2725 case nir_intrinsic_ballot
:
2726 result
= ac_build_ballot(&ctx
->ac
, get_src(ctx
, instr
->src
[0]));
2728 case nir_intrinsic_read_invocation
:
2729 case nir_intrinsic_read_first_invocation
: {
2730 LLVMValueRef args
[2];
2733 args
[0] = get_src(ctx
, instr
->src
[0]);
2736 const char *intr_name
;
2737 if (instr
->intrinsic
== nir_intrinsic_read_invocation
) {
2739 intr_name
= "llvm.amdgcn.readlane";
2742 args
[1] = get_src(ctx
, instr
->src
[1]);
2745 intr_name
= "llvm.amdgcn.readfirstlane";
2748 /* We currently have no other way to prevent LLVM from lifting the icmp
2749 * calls to a dominating basic block.
2751 ac_build_optimization_barrier(&ctx
->ac
, &args
[0]);
2753 result
= ac_build_intrinsic(&ctx
->ac
, intr_name
,
2754 ctx
->ac
.i32
, args
, num_args
,
2755 AC_FUNC_ATTR_READNONE
|
2756 AC_FUNC_ATTR_CONVERGENT
);
2759 case nir_intrinsic_load_subgroup_invocation
:
2760 result
= ac_get_thread_id(&ctx
->ac
);
2762 case nir_intrinsic_load_work_group_id
: {
2763 LLVMValueRef values
[3];
2765 for (int i
= 0; i
< 3; i
++) {
2766 values
[i
] = ctx
->abi
->workgroup_ids
[i
] ?
2767 ctx
->abi
->workgroup_ids
[i
] : ctx
->ac
.i32_0
;
2770 result
= ac_build_gather_values(&ctx
->ac
, values
, 3);
2773 case nir_intrinsic_load_base_vertex
: {
2774 result
= ctx
->abi
->load_base_vertex(ctx
->abi
);
2777 case nir_intrinsic_load_local_group_size
:
2778 result
= ctx
->abi
->load_local_group_size(ctx
->abi
);
2780 case nir_intrinsic_load_vertex_id
:
2781 result
= LLVMBuildAdd(ctx
->ac
.builder
, ctx
->abi
->vertex_id
,
2782 ctx
->abi
->base_vertex
, "");
2784 case nir_intrinsic_load_vertex_id_zero_base
: {
2785 result
= ctx
->abi
->vertex_id
;
2788 case nir_intrinsic_load_local_invocation_id
: {
2789 result
= ctx
->abi
->local_invocation_ids
;
2792 case nir_intrinsic_load_base_instance
:
2793 result
= ctx
->abi
->start_instance
;
2795 case nir_intrinsic_load_draw_id
:
2796 result
= ctx
->abi
->draw_id
;
2798 case nir_intrinsic_load_view_index
:
2799 result
= ctx
->abi
->view_index
;
2801 case nir_intrinsic_load_invocation_id
:
2802 if (ctx
->stage
== MESA_SHADER_TESS_CTRL
)
2803 result
= ac_unpack_param(&ctx
->ac
, ctx
->abi
->tcs_rel_ids
, 8, 5);
2805 result
= ctx
->abi
->gs_invocation_id
;
2807 case nir_intrinsic_load_primitive_id
:
2808 if (ctx
->stage
== MESA_SHADER_GEOMETRY
) {
2809 result
= ctx
->abi
->gs_prim_id
;
2810 } else if (ctx
->stage
== MESA_SHADER_TESS_CTRL
) {
2811 result
= ctx
->abi
->tcs_patch_id
;
2812 } else if (ctx
->stage
== MESA_SHADER_TESS_EVAL
) {
2813 result
= ctx
->abi
->tes_patch_id
;
2815 fprintf(stderr
, "Unknown primitive id intrinsic: %d", ctx
->stage
);
2817 case nir_intrinsic_load_sample_id
:
2818 result
= ac_unpack_param(&ctx
->ac
, ctx
->abi
->ancillary
, 8, 4);
2820 case nir_intrinsic_load_sample_pos
:
2821 result
= load_sample_pos(ctx
);
2823 case nir_intrinsic_load_sample_mask_in
:
2824 result
= ctx
->abi
->load_sample_mask_in(ctx
->abi
);
2826 case nir_intrinsic_load_frag_coord
: {
2827 LLVMValueRef values
[4] = {
2828 ctx
->abi
->frag_pos
[0],
2829 ctx
->abi
->frag_pos
[1],
2830 ctx
->abi
->frag_pos
[2],
2831 ac_build_fdiv(&ctx
->ac
, ctx
->ac
.f32_1
, ctx
->abi
->frag_pos
[3])
2833 result
= ac_build_gather_values(&ctx
->ac
, values
, 4);
2836 case nir_intrinsic_load_front_face
:
2837 result
= ctx
->abi
->front_face
;
2839 case nir_intrinsic_load_helper_invocation
:
2840 result
= visit_load_helper_invocation(ctx
);
2842 case nir_intrinsic_load_instance_id
:
2843 result
= ctx
->abi
->instance_id
;
2845 case nir_intrinsic_load_num_work_groups
:
2846 result
= ctx
->abi
->num_work_groups
;
2848 case nir_intrinsic_load_local_invocation_index
:
2849 result
= visit_load_local_invocation_index(ctx
);
2851 case nir_intrinsic_load_subgroup_id
:
2852 result
= visit_load_subgroup_id(ctx
);
2854 case nir_intrinsic_load_num_subgroups
:
2855 result
= visit_load_num_subgroups(ctx
);
2857 case nir_intrinsic_first_invocation
:
2858 result
= visit_first_invocation(ctx
);
2860 case nir_intrinsic_load_push_constant
:
2861 result
= visit_load_push_constant(ctx
, instr
);
2863 case nir_intrinsic_vulkan_resource_index
: {
2864 LLVMValueRef index
= get_src(ctx
, instr
->src
[0]);
2865 unsigned desc_set
= nir_intrinsic_desc_set(instr
);
2866 unsigned binding
= nir_intrinsic_binding(instr
);
2868 result
= ctx
->abi
->load_resource(ctx
->abi
, index
, desc_set
,
2872 case nir_intrinsic_vulkan_resource_reindex
:
2873 result
= visit_vulkan_resource_reindex(ctx
, instr
);
2875 case nir_intrinsic_store_ssbo
:
2876 visit_store_ssbo(ctx
, instr
);
2878 case nir_intrinsic_load_ssbo
:
2879 result
= visit_load_buffer(ctx
, instr
);
2881 case nir_intrinsic_ssbo_atomic_add
:
2882 case nir_intrinsic_ssbo_atomic_imin
:
2883 case nir_intrinsic_ssbo_atomic_umin
:
2884 case nir_intrinsic_ssbo_atomic_imax
:
2885 case nir_intrinsic_ssbo_atomic_umax
:
2886 case nir_intrinsic_ssbo_atomic_and
:
2887 case nir_intrinsic_ssbo_atomic_or
:
2888 case nir_intrinsic_ssbo_atomic_xor
:
2889 case nir_intrinsic_ssbo_atomic_exchange
:
2890 case nir_intrinsic_ssbo_atomic_comp_swap
:
2891 result
= visit_atomic_ssbo(ctx
, instr
);
2893 case nir_intrinsic_load_ubo
:
2894 result
= visit_load_ubo_buffer(ctx
, instr
);
2896 case nir_intrinsic_get_buffer_size
:
2897 result
= visit_get_buffer_size(ctx
, instr
);
2899 case nir_intrinsic_load_var
:
2900 result
= visit_load_var(ctx
, instr
);
2902 case nir_intrinsic_store_var
:
2903 visit_store_var(ctx
, instr
);
2905 case nir_intrinsic_load_shared
:
2906 result
= visit_load_shared(ctx
, instr
);
2908 case nir_intrinsic_store_shared
:
2909 visit_store_shared(ctx
, instr
);
2911 case nir_intrinsic_image_var_samples
:
2912 result
= visit_image_samples(ctx
, instr
);
2914 case nir_intrinsic_image_var_load
:
2915 result
= visit_image_load(ctx
, instr
);
2917 case nir_intrinsic_image_var_store
:
2918 visit_image_store(ctx
, instr
);
2920 case nir_intrinsic_image_var_atomic_add
:
2921 case nir_intrinsic_image_var_atomic_min
:
2922 case nir_intrinsic_image_var_atomic_max
:
2923 case nir_intrinsic_image_var_atomic_and
:
2924 case nir_intrinsic_image_var_atomic_or
:
2925 case nir_intrinsic_image_var_atomic_xor
:
2926 case nir_intrinsic_image_var_atomic_exchange
:
2927 case nir_intrinsic_image_var_atomic_comp_swap
:
2928 result
= visit_image_atomic(ctx
, instr
);
2930 case nir_intrinsic_image_var_size
:
2931 result
= visit_image_size(ctx
, instr
);
2933 case nir_intrinsic_shader_clock
:
2934 result
= ac_build_shader_clock(&ctx
->ac
);
2936 case nir_intrinsic_discard
:
2937 case nir_intrinsic_discard_if
:
2938 emit_discard(ctx
, instr
);
2940 case nir_intrinsic_memory_barrier
:
2941 case nir_intrinsic_group_memory_barrier
:
2942 case nir_intrinsic_memory_barrier_atomic_counter
:
2943 case nir_intrinsic_memory_barrier_buffer
:
2944 case nir_intrinsic_memory_barrier_image
:
2945 case nir_intrinsic_memory_barrier_shared
:
2946 emit_membar(&ctx
->ac
, instr
);
2948 case nir_intrinsic_barrier
:
2949 ac_emit_barrier(&ctx
->ac
, ctx
->stage
);
2951 case nir_intrinsic_shared_atomic_add
:
2952 case nir_intrinsic_shared_atomic_imin
:
2953 case nir_intrinsic_shared_atomic_umin
:
2954 case nir_intrinsic_shared_atomic_imax
:
2955 case nir_intrinsic_shared_atomic_umax
:
2956 case nir_intrinsic_shared_atomic_and
:
2957 case nir_intrinsic_shared_atomic_or
:
2958 case nir_intrinsic_shared_atomic_xor
:
2959 case nir_intrinsic_shared_atomic_exchange
:
2960 case nir_intrinsic_shared_atomic_comp_swap
: {
2961 LLVMValueRef ptr
= get_memory_ptr(ctx
, instr
->src
[0]);
2962 result
= visit_var_atomic(ctx
, instr
, ptr
, 1);
2965 case nir_intrinsic_var_atomic_add
:
2966 case nir_intrinsic_var_atomic_imin
:
2967 case nir_intrinsic_var_atomic_umin
:
2968 case nir_intrinsic_var_atomic_imax
:
2969 case nir_intrinsic_var_atomic_umax
:
2970 case nir_intrinsic_var_atomic_and
:
2971 case nir_intrinsic_var_atomic_or
:
2972 case nir_intrinsic_var_atomic_xor
:
2973 case nir_intrinsic_var_atomic_exchange
:
2974 case nir_intrinsic_var_atomic_comp_swap
: {
2975 LLVMValueRef ptr
= build_gep_for_deref(ctx
, instr
->variables
[0]);
2976 result
= visit_var_atomic(ctx
, instr
, ptr
, 0);
2979 case nir_intrinsic_interp_var_at_centroid
:
2980 case nir_intrinsic_interp_var_at_sample
:
2981 case nir_intrinsic_interp_var_at_offset
:
2982 result
= visit_interp(ctx
, instr
);
2984 case nir_intrinsic_emit_vertex
:
2985 ctx
->abi
->emit_vertex(ctx
->abi
, nir_intrinsic_stream_id(instr
), ctx
->abi
->outputs
);
2987 case nir_intrinsic_end_primitive
:
2988 ctx
->abi
->emit_primitive(ctx
->abi
, nir_intrinsic_stream_id(instr
));
2990 case nir_intrinsic_load_tess_coord
:
2991 result
= ctx
->abi
->load_tess_coord(ctx
->abi
);
2993 case nir_intrinsic_load_tess_level_outer
:
2994 result
= ctx
->abi
->load_tess_level(ctx
->abi
, VARYING_SLOT_TESS_LEVEL_OUTER
);
2996 case nir_intrinsic_load_tess_level_inner
:
2997 result
= ctx
->abi
->load_tess_level(ctx
->abi
, VARYING_SLOT_TESS_LEVEL_INNER
);
2999 case nir_intrinsic_load_patch_vertices_in
:
3000 result
= ctx
->abi
->load_patch_vertices_in(ctx
->abi
);
3002 case nir_intrinsic_vote_all
: {
3003 LLVMValueRef tmp
= ac_build_vote_all(&ctx
->ac
, get_src(ctx
, instr
->src
[0]));
3004 result
= LLVMBuildSExt(ctx
->ac
.builder
, tmp
, ctx
->ac
.i32
, "");
3007 case nir_intrinsic_vote_any
: {
3008 LLVMValueRef tmp
= ac_build_vote_any(&ctx
->ac
, get_src(ctx
, instr
->src
[0]));
3009 result
= LLVMBuildSExt(ctx
->ac
.builder
, tmp
, ctx
->ac
.i32
, "");
3013 fprintf(stderr
, "Unknown intrinsic: ");
3014 nir_print_instr(&instr
->instr
, stderr
);
3015 fprintf(stderr
, "\n");
3019 _mesa_hash_table_insert(ctx
->defs
, &instr
->dest
.ssa
, result
);
3023 static LLVMValueRef
get_sampler_desc(struct ac_nir_context
*ctx
,
3024 const nir_deref_var
*deref
,
3025 enum ac_descriptor_type desc_type
,
3026 const nir_tex_instr
*tex_instr
,
3027 bool image
, bool write
)
3029 LLVMValueRef index
= NULL
;
3030 unsigned constant_index
= 0;
3031 unsigned descriptor_set
;
3032 unsigned base_index
;
3033 bool bindless
= false;
3036 assert(tex_instr
&& !image
);
3038 base_index
= tex_instr
->sampler_index
;
3040 const nir_deref
*tail
= &deref
->deref
;
3041 while (tail
->child
) {
3042 const nir_deref_array
*child
= nir_deref_as_array(tail
->child
);
3043 unsigned array_size
= glsl_get_aoa_size(tail
->child
->type
);
3048 assert(child
->deref_array_type
!= nir_deref_array_type_wildcard
);
3050 if (child
->deref_array_type
== nir_deref_array_type_indirect
) {
3051 LLVMValueRef indirect
= get_src(ctx
, child
->indirect
);
3053 indirect
= LLVMBuildMul(ctx
->ac
.builder
, indirect
,
3054 LLVMConstInt(ctx
->ac
.i32
, array_size
, false), "");
3059 index
= LLVMBuildAdd(ctx
->ac
.builder
, index
, indirect
, "");
3062 constant_index
+= child
->base_offset
* array_size
;
3064 tail
= &child
->deref
;
3066 descriptor_set
= deref
->var
->data
.descriptor_set
;
3068 if (deref
->var
->data
.bindless
) {
3069 bindless
= deref
->var
->data
.bindless
;
3070 base_index
= deref
->var
->data
.driver_location
;
3072 base_index
= deref
->var
->data
.binding
;
3076 return ctx
->abi
->load_sampler_desc(ctx
->abi
,
3079 constant_index
, index
,
3080 desc_type
, image
, write
, bindless
);
3083 static void set_tex_fetch_args(struct ac_llvm_context
*ctx
,
3084 struct ac_image_args
*args
,
3085 const nir_tex_instr
*instr
,
3087 LLVMValueRef res_ptr
, LLVMValueRef samp_ptr
,
3088 LLVMValueRef
*param
, unsigned count
,
3091 unsigned is_rect
= 0;
3092 bool da
= instr
->is_array
|| instr
->sampler_dim
== GLSL_SAMPLER_DIM_CUBE
;
3094 if (op
== nir_texop_lod
)
3096 /* Pad to power of two vector */
3097 while (count
< util_next_power_of_two(count
))
3098 param
[count
++] = LLVMGetUndef(ctx
->i32
);
3101 args
->addr
= ac_build_gather_values(ctx
, param
, count
);
3103 args
->addr
= param
[0];
3105 args
->resource
= res_ptr
;
3106 args
->sampler
= samp_ptr
;
3108 if (instr
->sampler_dim
== GLSL_SAMPLER_DIM_BUF
&& op
== nir_texop_txf
) {
3109 args
->addr
= param
[0];
3113 args
->dmask
= dmask
;
3114 args
->unorm
= is_rect
;
3118 /* Disable anisotropic filtering if BASE_LEVEL == LAST_LEVEL.
3121 * If BASE_LEVEL == LAST_LEVEL, the shader must disable anisotropic
3122 * filtering manually. The driver sets img7 to a mask clearing
3123 * MAX_ANISO_RATIO if BASE_LEVEL == LAST_LEVEL. The shader must do:
3124 * s_and_b32 samp0, samp0, img7
3127 * The ANISO_OVERRIDE sampler field enables this fix in TA.
3129 static LLVMValueRef
sici_fix_sampler_aniso(struct ac_nir_context
*ctx
,
3130 LLVMValueRef res
, LLVMValueRef samp
)
3132 LLVMBuilderRef builder
= ctx
->ac
.builder
;
3133 LLVMValueRef img7
, samp0
;
3135 if (ctx
->ac
.chip_class
>= VI
)
3138 img7
= LLVMBuildExtractElement(builder
, res
,
3139 LLVMConstInt(ctx
->ac
.i32
, 7, 0), "");
3140 samp0
= LLVMBuildExtractElement(builder
, samp
,
3141 LLVMConstInt(ctx
->ac
.i32
, 0, 0), "");
3142 samp0
= LLVMBuildAnd(builder
, samp0
, img7
, "");
3143 return LLVMBuildInsertElement(builder
, samp
, samp0
,
3144 LLVMConstInt(ctx
->ac
.i32
, 0, 0), "");
3147 static void tex_fetch_ptrs(struct ac_nir_context
*ctx
,
3148 nir_tex_instr
*instr
,
3149 LLVMValueRef
*res_ptr
, LLVMValueRef
*samp_ptr
,
3150 LLVMValueRef
*fmask_ptr
)
3152 if (instr
->sampler_dim
== GLSL_SAMPLER_DIM_BUF
)
3153 *res_ptr
= get_sampler_desc(ctx
, instr
->texture
, AC_DESC_BUFFER
, instr
, false, false);
3155 *res_ptr
= get_sampler_desc(ctx
, instr
->texture
, AC_DESC_IMAGE
, instr
, false, false);
3158 *samp_ptr
= get_sampler_desc(ctx
, instr
->sampler
, AC_DESC_SAMPLER
, instr
, false, false);
3160 *samp_ptr
= get_sampler_desc(ctx
, instr
->texture
, AC_DESC_SAMPLER
, instr
, false, false);
3161 if (instr
->sampler_dim
< GLSL_SAMPLER_DIM_RECT
)
3162 *samp_ptr
= sici_fix_sampler_aniso(ctx
, *res_ptr
, *samp_ptr
);
3164 if (fmask_ptr
&& !instr
->sampler
&& (instr
->op
== nir_texop_txf_ms
||
3165 instr
->op
== nir_texop_samples_identical
))
3166 *fmask_ptr
= get_sampler_desc(ctx
, instr
->texture
, AC_DESC_FMASK
, instr
, false, false);
3169 static LLVMValueRef
apply_round_slice(struct ac_llvm_context
*ctx
,
3172 coord
= ac_to_float(ctx
, coord
);
3173 coord
= ac_build_intrinsic(ctx
, "llvm.rint.f32", ctx
->f32
, &coord
, 1, 0);
3174 coord
= ac_to_integer(ctx
, coord
);
3178 static void visit_tex(struct ac_nir_context
*ctx
, nir_tex_instr
*instr
)
3180 LLVMValueRef result
= NULL
;
3181 struct ac_image_args args
= { 0 };
3182 unsigned dmask
= 0xf;
3183 LLVMValueRef address
[16];
3184 LLVMValueRef coords
[5];
3185 LLVMValueRef coord
= NULL
, lod
= NULL
, comparator
= NULL
;
3186 LLVMValueRef bias
= NULL
, offsets
= NULL
;
3187 LLVMValueRef res_ptr
, samp_ptr
, fmask_ptr
= NULL
, sample_index
= NULL
;
3188 LLVMValueRef ddx
= NULL
, ddy
= NULL
;
3189 LLVMValueRef derivs
[6];
3190 unsigned chan
, count
= 0;
3191 unsigned const_src
= 0, num_deriv_comp
= 0;
3192 bool lod_is_zero
= false;
3194 tex_fetch_ptrs(ctx
, instr
, &res_ptr
, &samp_ptr
, &fmask_ptr
);
3196 for (unsigned i
= 0; i
< instr
->num_srcs
; i
++) {
3197 switch (instr
->src
[i
].src_type
) {
3198 case nir_tex_src_coord
:
3199 coord
= get_src(ctx
, instr
->src
[i
].src
);
3201 case nir_tex_src_projector
:
3203 case nir_tex_src_comparator
:
3204 comparator
= get_src(ctx
, instr
->src
[i
].src
);
3206 case nir_tex_src_offset
:
3207 offsets
= get_src(ctx
, instr
->src
[i
].src
);
3210 case nir_tex_src_bias
:
3211 bias
= get_src(ctx
, instr
->src
[i
].src
);
3213 case nir_tex_src_lod
: {
3214 nir_const_value
*val
= nir_src_as_const_value(instr
->src
[i
].src
);
3216 if (val
&& val
->i32
[0] == 0)
3218 lod
= get_src(ctx
, instr
->src
[i
].src
);
3221 case nir_tex_src_ms_index
:
3222 sample_index
= get_src(ctx
, instr
->src
[i
].src
);
3224 case nir_tex_src_ms_mcs
:
3226 case nir_tex_src_ddx
:
3227 ddx
= get_src(ctx
, instr
->src
[i
].src
);
3228 num_deriv_comp
= instr
->src
[i
].src
.ssa
->num_components
;
3230 case nir_tex_src_ddy
:
3231 ddy
= get_src(ctx
, instr
->src
[i
].src
);
3233 case nir_tex_src_texture_offset
:
3234 case nir_tex_src_sampler_offset
:
3235 case nir_tex_src_plane
:
3241 if (instr
->op
== nir_texop_txs
&& instr
->sampler_dim
== GLSL_SAMPLER_DIM_BUF
) {
3242 result
= get_buffer_size(ctx
, res_ptr
, true);
3246 if (instr
->op
== nir_texop_texture_samples
) {
3247 LLVMValueRef res
, samples
, is_msaa
;
3248 res
= LLVMBuildBitCast(ctx
->ac
.builder
, res_ptr
, ctx
->ac
.v8i32
, "");
3249 samples
= LLVMBuildExtractElement(ctx
->ac
.builder
, res
,
3250 LLVMConstInt(ctx
->ac
.i32
, 3, false), "");
3251 is_msaa
= LLVMBuildLShr(ctx
->ac
.builder
, samples
,
3252 LLVMConstInt(ctx
->ac
.i32
, 28, false), "");
3253 is_msaa
= LLVMBuildAnd(ctx
->ac
.builder
, is_msaa
,
3254 LLVMConstInt(ctx
->ac
.i32
, 0xe, false), "");
3255 is_msaa
= LLVMBuildICmp(ctx
->ac
.builder
, LLVMIntEQ
, is_msaa
,
3256 LLVMConstInt(ctx
->ac
.i32
, 0xe, false), "");
3258 samples
= LLVMBuildLShr(ctx
->ac
.builder
, samples
,
3259 LLVMConstInt(ctx
->ac
.i32
, 16, false), "");
3260 samples
= LLVMBuildAnd(ctx
->ac
.builder
, samples
,
3261 LLVMConstInt(ctx
->ac
.i32
, 0xf, false), "");
3262 samples
= LLVMBuildShl(ctx
->ac
.builder
, ctx
->ac
.i32_1
,
3264 samples
= LLVMBuildSelect(ctx
->ac
.builder
, is_msaa
, samples
,
3271 for (chan
= 0; chan
< instr
->coord_components
; chan
++)
3272 coords
[chan
] = ac_llvm_extract_elem(&ctx
->ac
, coord
, chan
);
3274 if (offsets
&& instr
->op
!= nir_texop_txf
) {
3275 LLVMValueRef offset
[3], pack
;
3276 for (chan
= 0; chan
< 3; ++chan
)
3277 offset
[chan
] = ctx
->ac
.i32_0
;
3280 for (chan
= 0; chan
< ac_get_llvm_num_components(offsets
); chan
++) {
3281 offset
[chan
] = ac_llvm_extract_elem(&ctx
->ac
, offsets
, chan
);
3282 offset
[chan
] = LLVMBuildAnd(ctx
->ac
.builder
, offset
[chan
],
3283 LLVMConstInt(ctx
->ac
.i32
, 0x3f, false), "");
3285 offset
[chan
] = LLVMBuildShl(ctx
->ac
.builder
, offset
[chan
],
3286 LLVMConstInt(ctx
->ac
.i32
, chan
* 8, false), "");
3288 pack
= LLVMBuildOr(ctx
->ac
.builder
, offset
[0], offset
[1], "");
3289 pack
= LLVMBuildOr(ctx
->ac
.builder
, pack
, offset
[2], "");
3290 address
[count
++] = pack
;
3293 /* pack LOD bias value */
3294 if (instr
->op
== nir_texop_txb
&& bias
) {
3295 address
[count
++] = bias
;
3298 /* Pack depth comparison value */
3299 if (instr
->is_shadow
&& comparator
) {
3300 LLVMValueRef z
= ac_to_float(&ctx
->ac
,
3301 ac_llvm_extract_elem(&ctx
->ac
, comparator
, 0));
3303 /* TC-compatible HTILE on radeonsi promotes Z16 and Z24 to Z32_FLOAT,
3304 * so the depth comparison value isn't clamped for Z16 and
3305 * Z24 anymore. Do it manually here.
3307 * It's unnecessary if the original texture format was
3308 * Z32_FLOAT, but we don't know that here.
3310 if (ctx
->ac
.chip_class
== VI
&& ctx
->abi
->clamp_shadow_reference
)
3311 z
= ac_build_clamp(&ctx
->ac
, z
);
3313 address
[count
++] = z
;
3316 /* pack derivatives */
3318 int num_src_deriv_channels
, num_dest_deriv_channels
;
3319 switch (instr
->sampler_dim
) {
3320 case GLSL_SAMPLER_DIM_3D
:
3321 case GLSL_SAMPLER_DIM_CUBE
:
3323 num_src_deriv_channels
= 3;
3324 num_dest_deriv_channels
= 3;
3326 case GLSL_SAMPLER_DIM_2D
:
3328 num_src_deriv_channels
= 2;
3329 num_dest_deriv_channels
= 2;
3332 case GLSL_SAMPLER_DIM_1D
:
3333 num_src_deriv_channels
= 1;
3334 if (ctx
->ac
.chip_class
>= GFX9
) {
3335 num_dest_deriv_channels
= 2;
3338 num_dest_deriv_channels
= 1;
3344 for (unsigned i
= 0; i
< num_src_deriv_channels
; i
++) {
3345 derivs
[i
] = ac_to_float(&ctx
->ac
, ac_llvm_extract_elem(&ctx
->ac
, ddx
, i
));
3346 derivs
[num_dest_deriv_channels
+ i
] = ac_to_float(&ctx
->ac
, ac_llvm_extract_elem(&ctx
->ac
, ddy
, i
));
3348 for (unsigned i
= num_src_deriv_channels
; i
< num_dest_deriv_channels
; i
++) {
3349 derivs
[i
] = ctx
->ac
.f32_0
;
3350 derivs
[num_dest_deriv_channels
+ i
] = ctx
->ac
.f32_0
;
3354 if (instr
->sampler_dim
== GLSL_SAMPLER_DIM_CUBE
&& coord
) {
3355 for (chan
= 0; chan
< instr
->coord_components
; chan
++)
3356 coords
[chan
] = ac_to_float(&ctx
->ac
, coords
[chan
]);
3357 if (instr
->coord_components
== 3)
3358 coords
[3] = LLVMGetUndef(ctx
->ac
.f32
);
3359 ac_prepare_cube_coords(&ctx
->ac
,
3360 instr
->op
== nir_texop_txd
, instr
->is_array
,
3361 instr
->op
== nir_texop_lod
, coords
, derivs
);
3367 for (unsigned i
= 0; i
< num_deriv_comp
* 2; i
++)
3368 address
[count
++] = derivs
[i
];
3371 /* Pack texture coordinates */
3373 address
[count
++] = coords
[0];
3374 if (instr
->coord_components
> 1) {
3375 if (instr
->sampler_dim
== GLSL_SAMPLER_DIM_1D
&& instr
->is_array
&& instr
->op
!= nir_texop_txf
) {
3376 coords
[1] = apply_round_slice(&ctx
->ac
, coords
[1]);
3378 address
[count
++] = coords
[1];
3380 if (instr
->coord_components
> 2) {
3381 if ((instr
->sampler_dim
== GLSL_SAMPLER_DIM_2D
||
3382 instr
->sampler_dim
== GLSL_SAMPLER_DIM_MS
||
3383 instr
->sampler_dim
== GLSL_SAMPLER_DIM_SUBPASS
||
3384 instr
->sampler_dim
== GLSL_SAMPLER_DIM_SUBPASS_MS
) &&
3386 instr
->op
!= nir_texop_txf
&& instr
->op
!= nir_texop_txf_ms
) {
3387 coords
[2] = apply_round_slice(&ctx
->ac
, coords
[2]);
3389 address
[count
++] = coords
[2];
3392 if (ctx
->ac
.chip_class
>= GFX9
) {
3393 LLVMValueRef filler
;
3394 if (instr
->op
== nir_texop_txf
)
3395 filler
= ctx
->ac
.i32_0
;
3397 filler
= LLVMConstReal(ctx
->ac
.f32
, 0.5);
3399 if (instr
->sampler_dim
== GLSL_SAMPLER_DIM_1D
) {
3400 /* No nir_texop_lod, because it does not take a slice
3401 * even with array textures. */
3402 if (instr
->is_array
&& instr
->op
!= nir_texop_lod
) {
3403 address
[count
] = address
[count
- 1];
3404 address
[count
- 1] = filler
;
3407 address
[count
++] = filler
;
3413 if (lod
&& ((instr
->op
== nir_texop_txl
|| instr
->op
== nir_texop_txf
) && !lod_is_zero
)) {
3414 address
[count
++] = lod
;
3415 } else if (instr
->op
== nir_texop_txf_ms
&& sample_index
) {
3416 address
[count
++] = sample_index
;
3417 } else if(instr
->op
== nir_texop_txs
) {
3420 address
[count
++] = lod
;
3422 address
[count
++] = ctx
->ac
.i32_0
;
3425 for (chan
= 0; chan
< count
; chan
++) {
3426 address
[chan
] = LLVMBuildBitCast(ctx
->ac
.builder
,
3427 address
[chan
], ctx
->ac
.i32
, "");
3430 if (instr
->op
== nir_texop_samples_identical
) {
3431 LLVMValueRef txf_address
[4];
3432 struct ac_image_args txf_args
= { 0 };
3433 unsigned txf_count
= count
;
3434 memcpy(txf_address
, address
, sizeof(txf_address
));
3436 if (!instr
->is_array
)
3437 txf_address
[2] = ctx
->ac
.i32_0
;
3438 txf_address
[3] = ctx
->ac
.i32_0
;
3440 set_tex_fetch_args(&ctx
->ac
, &txf_args
, instr
, nir_texop_txf
,
3442 txf_address
, txf_count
, 0xf);
3444 result
= build_tex_intrinsic(ctx
, instr
, false, &txf_args
);
3446 result
= LLVMBuildExtractElement(ctx
->ac
.builder
, result
, ctx
->ac
.i32_0
, "");
3447 result
= emit_int_cmp(&ctx
->ac
, LLVMIntEQ
, result
, ctx
->ac
.i32_0
);
3451 if (instr
->sampler_dim
== GLSL_SAMPLER_DIM_MS
&&
3452 instr
->op
!= nir_texop_txs
) {
3453 unsigned sample_chan
= instr
->is_array
? 3 : 2;
3454 address
[sample_chan
] = adjust_sample_index_using_fmask(&ctx
->ac
,
3457 instr
->is_array
? address
[2] : NULL
,
3458 address
[sample_chan
],
3462 if (offsets
&& instr
->op
== nir_texop_txf
) {
3463 nir_const_value
*const_offset
=
3464 nir_src_as_const_value(instr
->src
[const_src
].src
);
3465 int num_offsets
= instr
->src
[const_src
].src
.ssa
->num_components
;
3466 assert(const_offset
);
3467 num_offsets
= MIN2(num_offsets
, instr
->coord_components
);
3468 if (num_offsets
> 2)
3469 address
[2] = LLVMBuildAdd(ctx
->ac
.builder
,
3470 address
[2], LLVMConstInt(ctx
->ac
.i32
, const_offset
->i32
[2], false), "");
3471 if (num_offsets
> 1)
3472 address
[1] = LLVMBuildAdd(ctx
->ac
.builder
,
3473 address
[1], LLVMConstInt(ctx
->ac
.i32
, const_offset
->i32
[1], false), "");
3474 address
[0] = LLVMBuildAdd(ctx
->ac
.builder
,
3475 address
[0], LLVMConstInt(ctx
->ac
.i32
, const_offset
->i32
[0], false), "");
3479 /* TODO TG4 support */
3480 if (instr
->op
== nir_texop_tg4
) {
3481 if (instr
->is_shadow
)
3484 dmask
= 1 << instr
->component
;
3486 set_tex_fetch_args(&ctx
->ac
, &args
, instr
, instr
->op
,
3487 res_ptr
, samp_ptr
, address
, count
, dmask
);
3489 result
= build_tex_intrinsic(ctx
, instr
, lod_is_zero
, &args
);
3491 if (instr
->op
== nir_texop_query_levels
)
3492 result
= LLVMBuildExtractElement(ctx
->ac
.builder
, result
, LLVMConstInt(ctx
->ac
.i32
, 3, false), "");
3493 else if (instr
->is_shadow
&& instr
->is_new_style_shadow
&&
3494 instr
->op
!= nir_texop_txs
&& instr
->op
!= nir_texop_lod
&&
3495 instr
->op
!= nir_texop_tg4
)
3496 result
= LLVMBuildExtractElement(ctx
->ac
.builder
, result
, ctx
->ac
.i32_0
, "");
3497 else if (instr
->op
== nir_texop_txs
&&
3498 instr
->sampler_dim
== GLSL_SAMPLER_DIM_CUBE
&&
3500 LLVMValueRef two
= LLVMConstInt(ctx
->ac
.i32
, 2, false);
3501 LLVMValueRef six
= LLVMConstInt(ctx
->ac
.i32
, 6, false);
3502 LLVMValueRef z
= LLVMBuildExtractElement(ctx
->ac
.builder
, result
, two
, "");
3503 z
= LLVMBuildSDiv(ctx
->ac
.builder
, z
, six
, "");
3504 result
= LLVMBuildInsertElement(ctx
->ac
.builder
, result
, z
, two
, "");
3505 } else if (ctx
->ac
.chip_class
>= GFX9
&&
3506 instr
->op
== nir_texop_txs
&&
3507 instr
->sampler_dim
== GLSL_SAMPLER_DIM_1D
&&
3509 LLVMValueRef two
= LLVMConstInt(ctx
->ac
.i32
, 2, false);
3510 LLVMValueRef layers
= LLVMBuildExtractElement(ctx
->ac
.builder
, result
, two
, "");
3511 result
= LLVMBuildInsertElement(ctx
->ac
.builder
, result
, layers
,
3513 } else if (instr
->dest
.ssa
.num_components
!= 4)
3514 result
= ac_trim_vector(&ctx
->ac
, result
, instr
->dest
.ssa
.num_components
);
3518 assert(instr
->dest
.is_ssa
);
3519 result
= ac_to_integer(&ctx
->ac
, result
);
3520 _mesa_hash_table_insert(ctx
->defs
, &instr
->dest
.ssa
, result
);
3525 static void visit_phi(struct ac_nir_context
*ctx
, nir_phi_instr
*instr
)
3527 LLVMTypeRef type
= get_def_type(ctx
, &instr
->dest
.ssa
);
3528 LLVMValueRef result
= LLVMBuildPhi(ctx
->ac
.builder
, type
, "");
3530 _mesa_hash_table_insert(ctx
->defs
, &instr
->dest
.ssa
, result
);
3531 _mesa_hash_table_insert(ctx
->phis
, instr
, result
);
3534 static void visit_post_phi(struct ac_nir_context
*ctx
,
3535 nir_phi_instr
*instr
,
3536 LLVMValueRef llvm_phi
)
3538 nir_foreach_phi_src(src
, instr
) {
3539 LLVMBasicBlockRef block
= get_block(ctx
, src
->pred
);
3540 LLVMValueRef llvm_src
= get_src(ctx
, src
->src
);
3542 LLVMAddIncoming(llvm_phi
, &llvm_src
, &block
, 1);
3546 static void phi_post_pass(struct ac_nir_context
*ctx
)
3548 struct hash_entry
*entry
;
3549 hash_table_foreach(ctx
->phis
, entry
) {
3550 visit_post_phi(ctx
, (nir_phi_instr
*)entry
->key
,
3551 (LLVMValueRef
)entry
->data
);
3556 static void visit_ssa_undef(struct ac_nir_context
*ctx
,
3557 const nir_ssa_undef_instr
*instr
)
3559 unsigned num_components
= instr
->def
.num_components
;
3560 LLVMTypeRef type
= LLVMIntTypeInContext(ctx
->ac
.context
, instr
->def
.bit_size
);
3563 if (num_components
== 1)
3564 undef
= LLVMGetUndef(type
);
3566 undef
= LLVMGetUndef(LLVMVectorType(type
, num_components
));
3568 _mesa_hash_table_insert(ctx
->defs
, &instr
->def
, undef
);
3571 static void visit_jump(struct ac_llvm_context
*ctx
,
3572 const nir_jump_instr
*instr
)
3574 switch (instr
->type
) {
3575 case nir_jump_break
:
3576 ac_build_break(ctx
);
3578 case nir_jump_continue
:
3579 ac_build_continue(ctx
);
3582 fprintf(stderr
, "Unknown NIR jump instr: ");
3583 nir_print_instr(&instr
->instr
, stderr
);
3584 fprintf(stderr
, "\n");
3589 static void visit_cf_list(struct ac_nir_context
*ctx
,
3590 struct exec_list
*list
);
3592 static void visit_block(struct ac_nir_context
*ctx
, nir_block
*block
)
3594 LLVMBasicBlockRef llvm_block
= LLVMGetInsertBlock(ctx
->ac
.builder
);
3595 nir_foreach_instr(instr
, block
)
3597 switch (instr
->type
) {
3598 case nir_instr_type_alu
:
3599 visit_alu(ctx
, nir_instr_as_alu(instr
));
3601 case nir_instr_type_load_const
:
3602 visit_load_const(ctx
, nir_instr_as_load_const(instr
));
3604 case nir_instr_type_intrinsic
:
3605 visit_intrinsic(ctx
, nir_instr_as_intrinsic(instr
));
3607 case nir_instr_type_tex
:
3608 visit_tex(ctx
, nir_instr_as_tex(instr
));
3610 case nir_instr_type_phi
:
3611 visit_phi(ctx
, nir_instr_as_phi(instr
));
3613 case nir_instr_type_ssa_undef
:
3614 visit_ssa_undef(ctx
, nir_instr_as_ssa_undef(instr
));
3616 case nir_instr_type_jump
:
3617 visit_jump(&ctx
->ac
, nir_instr_as_jump(instr
));
3620 fprintf(stderr
, "Unknown NIR instr type: ");
3621 nir_print_instr(instr
, stderr
);
3622 fprintf(stderr
, "\n");
3627 _mesa_hash_table_insert(ctx
->defs
, block
, llvm_block
);
3630 static void visit_if(struct ac_nir_context
*ctx
, nir_if
*if_stmt
)
3632 LLVMValueRef value
= get_src(ctx
, if_stmt
->condition
);
3634 nir_block
*then_block
=
3635 (nir_block
*) exec_list_get_head(&if_stmt
->then_list
);
3637 ac_build_uif(&ctx
->ac
, value
, then_block
->index
);
3639 visit_cf_list(ctx
, &if_stmt
->then_list
);
3641 if (!exec_list_is_empty(&if_stmt
->else_list
)) {
3642 nir_block
*else_block
=
3643 (nir_block
*) exec_list_get_head(&if_stmt
->else_list
);
3645 ac_build_else(&ctx
->ac
, else_block
->index
);
3646 visit_cf_list(ctx
, &if_stmt
->else_list
);
3649 ac_build_endif(&ctx
->ac
, then_block
->index
);
3652 static void visit_loop(struct ac_nir_context
*ctx
, nir_loop
*loop
)
3654 nir_block
*first_loop_block
=
3655 (nir_block
*) exec_list_get_head(&loop
->body
);
3657 ac_build_bgnloop(&ctx
->ac
, first_loop_block
->index
);
3659 visit_cf_list(ctx
, &loop
->body
);
3661 ac_build_endloop(&ctx
->ac
, first_loop_block
->index
);
3664 static void visit_cf_list(struct ac_nir_context
*ctx
,
3665 struct exec_list
*list
)
3667 foreach_list_typed(nir_cf_node
, node
, node
, list
)
3669 switch (node
->type
) {
3670 case nir_cf_node_block
:
3671 visit_block(ctx
, nir_cf_node_as_block(node
));
3674 case nir_cf_node_if
:
3675 visit_if(ctx
, nir_cf_node_as_if(node
));
3678 case nir_cf_node_loop
:
3679 visit_loop(ctx
, nir_cf_node_as_loop(node
));
3689 ac_handle_shader_output_decl(struct ac_llvm_context
*ctx
,
3690 struct ac_shader_abi
*abi
,
3691 struct nir_shader
*nir
,
3692 struct nir_variable
*variable
,
3693 gl_shader_stage stage
)
3695 unsigned output_loc
= variable
->data
.driver_location
/ 4;
3696 unsigned attrib_count
= glsl_count_attribute_slots(variable
->type
, false);
3698 /* tess ctrl has it's own load/store paths for outputs */
3699 if (stage
== MESA_SHADER_TESS_CTRL
)
3702 if (stage
== MESA_SHADER_VERTEX
||
3703 stage
== MESA_SHADER_TESS_EVAL
||
3704 stage
== MESA_SHADER_GEOMETRY
) {
3705 int idx
= variable
->data
.location
+ variable
->data
.index
;
3706 if (idx
== VARYING_SLOT_CLIP_DIST0
) {
3707 int length
= nir
->info
.clip_distance_array_size
+
3708 nir
->info
.cull_distance_array_size
;
3717 for (unsigned i
= 0; i
< attrib_count
; ++i
) {
3718 for (unsigned chan
= 0; chan
< 4; chan
++) {
3719 abi
->outputs
[ac_llvm_reg_index_soa(output_loc
+ i
, chan
)] =
3720 ac_build_alloca_undef(ctx
, ctx
->f32
, "");
3726 glsl_base_to_llvm_type(struct ac_llvm_context
*ac
,
3727 enum glsl_base_type type
)
3731 case GLSL_TYPE_UINT
:
3732 case GLSL_TYPE_BOOL
:
3733 case GLSL_TYPE_SUBROUTINE
:
3735 case GLSL_TYPE_FLOAT
: /* TODO handle mediump */
3737 case GLSL_TYPE_INT64
:
3738 case GLSL_TYPE_UINT64
:
3740 case GLSL_TYPE_DOUBLE
:
3743 unreachable("unknown GLSL type");
3748 glsl_to_llvm_type(struct ac_llvm_context
*ac
,
3749 const struct glsl_type
*type
)
3751 if (glsl_type_is_scalar(type
)) {
3752 return glsl_base_to_llvm_type(ac
, glsl_get_base_type(type
));
3755 if (glsl_type_is_vector(type
)) {
3756 return LLVMVectorType(
3757 glsl_base_to_llvm_type(ac
, glsl_get_base_type(type
)),
3758 glsl_get_vector_elements(type
));
3761 if (glsl_type_is_matrix(type
)) {
3762 return LLVMArrayType(
3763 glsl_to_llvm_type(ac
, glsl_get_column_type(type
)),
3764 glsl_get_matrix_columns(type
));
3767 if (glsl_type_is_array(type
)) {
3768 return LLVMArrayType(
3769 glsl_to_llvm_type(ac
, glsl_get_array_element(type
)),
3770 glsl_get_length(type
));
3773 assert(glsl_type_is_struct(type
));
3775 LLVMTypeRef member_types
[glsl_get_length(type
)];
3777 for (unsigned i
= 0; i
< glsl_get_length(type
); i
++) {
3779 glsl_to_llvm_type(ac
,
3780 glsl_get_struct_field(type
, i
));
3783 return LLVMStructTypeInContext(ac
->context
, member_types
,
3784 glsl_get_length(type
), false);
3788 setup_locals(struct ac_nir_context
*ctx
,
3789 struct nir_function
*func
)
3792 ctx
->num_locals
= 0;
3793 nir_foreach_variable(variable
, &func
->impl
->locals
) {
3794 unsigned attrib_count
= glsl_count_attribute_slots(variable
->type
, false);
3795 variable
->data
.driver_location
= ctx
->num_locals
* 4;
3796 variable
->data
.location_frac
= 0;
3797 ctx
->num_locals
+= attrib_count
;
3799 ctx
->locals
= malloc(4 * ctx
->num_locals
* sizeof(LLVMValueRef
));
3803 for (i
= 0; i
< ctx
->num_locals
; i
++) {
3804 for (j
= 0; j
< 4; j
++) {
3805 ctx
->locals
[i
* 4 + j
] =
3806 ac_build_alloca_undef(&ctx
->ac
, ctx
->ac
.f32
, "temp");
3812 setup_shared(struct ac_nir_context
*ctx
,
3813 struct nir_shader
*nir
)
3815 nir_foreach_variable(variable
, &nir
->shared
) {
3816 LLVMValueRef shared
=
3817 LLVMAddGlobalInAddressSpace(
3818 ctx
->ac
.module
, glsl_to_llvm_type(&ctx
->ac
, variable
->type
),
3819 variable
->name
? variable
->name
: "",
3820 AC_LOCAL_ADDR_SPACE
);
3821 _mesa_hash_table_insert(ctx
->vars
, variable
, shared
);
3825 void ac_nir_translate(struct ac_llvm_context
*ac
, struct ac_shader_abi
*abi
,
3826 struct nir_shader
*nir
)
3828 struct ac_nir_context ctx
= {};
3829 struct nir_function
*func
;
3834 ctx
.stage
= nir
->info
.stage
;
3836 ctx
.main_function
= LLVMGetBasicBlockParent(LLVMGetInsertBlock(ctx
.ac
.builder
));
3838 nir_foreach_variable(variable
, &nir
->outputs
)
3839 ac_handle_shader_output_decl(&ctx
.ac
, ctx
.abi
, nir
, variable
,
3842 ctx
.defs
= _mesa_hash_table_create(NULL
, _mesa_hash_pointer
,
3843 _mesa_key_pointer_equal
);
3844 ctx
.phis
= _mesa_hash_table_create(NULL
, _mesa_hash_pointer
,
3845 _mesa_key_pointer_equal
);
3846 ctx
.vars
= _mesa_hash_table_create(NULL
, _mesa_hash_pointer
,
3847 _mesa_key_pointer_equal
);
3849 func
= (struct nir_function
*)exec_list_get_head(&nir
->functions
);
3851 setup_locals(&ctx
, func
);
3853 if (nir
->info
.stage
== MESA_SHADER_COMPUTE
)
3854 setup_shared(&ctx
, nir
);
3856 visit_cf_list(&ctx
, &func
->impl
->body
);
3857 phi_post_pass(&ctx
);
3859 if (nir
->info
.stage
!= MESA_SHADER_COMPUTE
)
3860 ctx
.abi
->emit_outputs(ctx
.abi
, AC_LLVM_MAX_OUTPUTS
,
3864 ralloc_free(ctx
.defs
);
3865 ralloc_free(ctx
.phis
);
3866 ralloc_free(ctx
.vars
);
3870 ac_lower_indirect_derefs(struct nir_shader
*nir
, enum chip_class chip_class
)
3872 /* While it would be nice not to have this flag, we are constrained
3873 * by the reality that LLVM 5.0 doesn't have working VGPR indexing
3876 bool llvm_has_working_vgpr_indexing
= chip_class
<= VI
;
3878 /* TODO: Indirect indexing of GS inputs is unimplemented.
3880 * TCS and TES load inputs directly from LDS or offchip memory, so
3881 * indirect indexing is trivial.
3883 nir_variable_mode indirect_mask
= 0;
3884 if (nir
->info
.stage
== MESA_SHADER_GEOMETRY
||
3885 (nir
->info
.stage
!= MESA_SHADER_TESS_CTRL
&&
3886 nir
->info
.stage
!= MESA_SHADER_TESS_EVAL
&&
3887 !llvm_has_working_vgpr_indexing
)) {
3888 indirect_mask
|= nir_var_shader_in
;
3890 if (!llvm_has_working_vgpr_indexing
&&
3891 nir
->info
.stage
!= MESA_SHADER_TESS_CTRL
)
3892 indirect_mask
|= nir_var_shader_out
;
3894 /* TODO: We shouldn't need to do this, however LLVM isn't currently
3895 * smart enough to handle indirects without causing excess spilling
3896 * causing the gpu to hang.
3898 * See the following thread for more details of the problem:
3899 * https://lists.freedesktop.org/archives/mesa-dev/2017-July/162106.html
3901 indirect_mask
|= nir_var_local
;
3903 nir_lower_indirect_derefs(nir
, indirect_mask
);