4b696f28f124eb774a15483da9c1d4eb84dad17e
[mesa.git] / src / amd / llvm / ac_nir_to_llvm.c
1 /*
2 * Copyright © 2016 Bas Nieuwenhuizen
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21 * IN THE SOFTWARE.
22 */
23
24 #include <llvm/Config/llvm-config.h>
25
26 #include "ac_nir_to_llvm.h"
27 #include "ac_llvm_build.h"
28 #include "ac_llvm_util.h"
29 #include "ac_binary.h"
30 #include "sid.h"
31 #include "nir/nir.h"
32 #include "nir/nir_deref.h"
33 #include "util/bitscan.h"
34 #include "util/u_math.h"
35 #include "ac_shader_abi.h"
36 #include "ac_shader_util.h"
37
38 struct ac_nir_context {
39 struct ac_llvm_context ac;
40 struct ac_shader_abi *abi;
41 const struct ac_shader_args *args;
42
43 gl_shader_stage stage;
44 shader_info *info;
45
46 LLVMValueRef *ssa_defs;
47
48 LLVMValueRef scratch;
49 LLVMValueRef constant_data;
50
51 struct hash_table *defs;
52 struct hash_table *phis;
53 struct hash_table *vars;
54 struct hash_table *verified_interp;
55
56 LLVMValueRef main_function;
57 LLVMBasicBlockRef continue_block;
58 LLVMBasicBlockRef break_block;
59
60 int num_locals;
61 LLVMValueRef *locals;
62 };
63
64 static LLVMValueRef get_sampler_desc_index(struct ac_nir_context *ctx,
65 nir_deref_instr *deref_instr,
66 const nir_instr *instr,
67 bool image);
68
69 static LLVMValueRef get_sampler_desc(struct ac_nir_context *ctx,
70 nir_deref_instr *deref_instr,
71 enum ac_descriptor_type desc_type,
72 const nir_instr *instr,
73 LLVMValueRef index,
74 bool image, bool write);
75
76 static void
77 build_store_values_extended(struct ac_llvm_context *ac,
78 LLVMValueRef *values,
79 unsigned value_count,
80 unsigned value_stride,
81 LLVMValueRef vec)
82 {
83 LLVMBuilderRef builder = ac->builder;
84 unsigned i;
85
86 for (i = 0; i < value_count; i++) {
87 LLVMValueRef ptr = values[i * value_stride];
88 LLVMValueRef index = LLVMConstInt(ac->i32, i, false);
89 LLVMValueRef value = LLVMBuildExtractElement(builder, vec, index, "");
90 LLVMBuildStore(builder, value, ptr);
91 }
92 }
93
94 static LLVMTypeRef get_def_type(struct ac_nir_context *ctx,
95 const nir_ssa_def *def)
96 {
97 LLVMTypeRef type = LLVMIntTypeInContext(ctx->ac.context, def->bit_size);
98 if (def->num_components > 1) {
99 type = LLVMVectorType(type, def->num_components);
100 }
101 return type;
102 }
103
104 static LLVMValueRef get_src(struct ac_nir_context *nir, nir_src src)
105 {
106 assert(src.is_ssa);
107 return nir->ssa_defs[src.ssa->index];
108 }
109
110 static LLVMValueRef
111 get_memory_ptr(struct ac_nir_context *ctx, nir_src src, unsigned bit_size)
112 {
113 LLVMValueRef ptr = get_src(ctx, src);
114 ptr = LLVMBuildGEP(ctx->ac.builder, ctx->ac.lds, &ptr, 1, "");
115 int addr_space = LLVMGetPointerAddressSpace(LLVMTypeOf(ptr));
116
117 LLVMTypeRef type = LLVMIntTypeInContext(ctx->ac.context, bit_size);
118
119 return LLVMBuildBitCast(ctx->ac.builder, ptr,
120 LLVMPointerType(type, addr_space), "");
121 }
122
123 static LLVMBasicBlockRef get_block(struct ac_nir_context *nir,
124 const struct nir_block *b)
125 {
126 struct hash_entry *entry = _mesa_hash_table_search(nir->defs, b);
127 return (LLVMBasicBlockRef)entry->data;
128 }
129
130 static LLVMValueRef get_alu_src(struct ac_nir_context *ctx,
131 nir_alu_src src,
132 unsigned num_components)
133 {
134 LLVMValueRef value = get_src(ctx, src.src);
135 bool need_swizzle = false;
136
137 assert(value);
138 unsigned src_components = ac_get_llvm_num_components(value);
139 for (unsigned i = 0; i < num_components; ++i) {
140 assert(src.swizzle[i] < src_components);
141 if (src.swizzle[i] != i)
142 need_swizzle = true;
143 }
144
145 if (need_swizzle || num_components != src_components) {
146 LLVMValueRef masks[] = {
147 LLVMConstInt(ctx->ac.i32, src.swizzle[0], false),
148 LLVMConstInt(ctx->ac.i32, src.swizzle[1], false),
149 LLVMConstInt(ctx->ac.i32, src.swizzle[2], false),
150 LLVMConstInt(ctx->ac.i32, src.swizzle[3], false)};
151
152 if (src_components > 1 && num_components == 1) {
153 value = LLVMBuildExtractElement(ctx->ac.builder, value,
154 masks[0], "");
155 } else if (src_components == 1 && num_components > 1) {
156 LLVMValueRef values[] = {value, value, value, value};
157 value = ac_build_gather_values(&ctx->ac, values, num_components);
158 } else {
159 LLVMValueRef swizzle = LLVMConstVector(masks, num_components);
160 value = LLVMBuildShuffleVector(ctx->ac.builder, value, value,
161 swizzle, "");
162 }
163 }
164 assert(!src.negate);
165 assert(!src.abs);
166 return value;
167 }
168
169 static LLVMValueRef emit_int_cmp(struct ac_llvm_context *ctx,
170 LLVMIntPredicate pred, LLVMValueRef src0,
171 LLVMValueRef src1)
172 {
173 LLVMTypeRef src0_type = LLVMTypeOf(src0);
174 LLVMTypeRef src1_type = LLVMTypeOf(src1);
175
176 if (LLVMGetTypeKind(src0_type) == LLVMPointerTypeKind &&
177 LLVMGetTypeKind(src1_type) != LLVMPointerTypeKind) {
178 src1 = LLVMBuildIntToPtr(ctx->builder, src1, src0_type, "");
179 } else if (LLVMGetTypeKind(src1_type) == LLVMPointerTypeKind &&
180 LLVMGetTypeKind(src0_type) != LLVMPointerTypeKind) {
181 src0 = LLVMBuildIntToPtr(ctx->builder, src0, src1_type, "");
182 }
183
184 LLVMValueRef result = LLVMBuildICmp(ctx->builder, pred, src0, src1, "");
185 return LLVMBuildSelect(ctx->builder, result,
186 LLVMConstInt(ctx->i32, 0xFFFFFFFF, false),
187 ctx->i32_0, "");
188 }
189
190 static LLVMValueRef emit_float_cmp(struct ac_llvm_context *ctx,
191 LLVMRealPredicate pred, LLVMValueRef src0,
192 LLVMValueRef src1)
193 {
194 LLVMValueRef result;
195 src0 = ac_to_float(ctx, src0);
196 src1 = ac_to_float(ctx, src1);
197 result = LLVMBuildFCmp(ctx->builder, pred, src0, src1, "");
198 return LLVMBuildSelect(ctx->builder, result,
199 LLVMConstInt(ctx->i32, 0xFFFFFFFF, false),
200 ctx->i32_0, "");
201 }
202
203 static LLVMValueRef emit_intrin_1f_param(struct ac_llvm_context *ctx,
204 const char *intrin,
205 LLVMTypeRef result_type,
206 LLVMValueRef src0)
207 {
208 char name[64], type[64];
209 LLVMValueRef params[] = {
210 ac_to_float(ctx, src0),
211 };
212
213 ac_build_type_name_for_intr(LLVMTypeOf(params[0]), type, sizeof(type));
214 ASSERTED const int length = snprintf(name, sizeof(name), "%s.%s", intrin, type);
215 assert(length < sizeof(name));
216 return ac_build_intrinsic(ctx, name, result_type, params, 1, AC_FUNC_ATTR_READNONE);
217 }
218
219 static LLVMValueRef emit_intrin_1f_param_scalar(struct ac_llvm_context *ctx,
220 const char *intrin,
221 LLVMTypeRef result_type,
222 LLVMValueRef src0)
223 {
224 if (LLVMGetTypeKind(result_type) != LLVMVectorTypeKind)
225 return emit_intrin_1f_param(ctx, intrin, result_type, src0);
226
227 LLVMTypeRef elem_type = LLVMGetElementType(result_type);
228 LLVMValueRef ret = LLVMGetUndef(result_type);
229
230 /* Scalarize the intrinsic, because vectors are not supported. */
231 for (unsigned i = 0; i < LLVMGetVectorSize(result_type); i++) {
232 char name[64], type[64];
233 LLVMValueRef params[] = {
234 ac_to_float(ctx, ac_llvm_extract_elem(ctx, src0, i)),
235 };
236
237 ac_build_type_name_for_intr(LLVMTypeOf(params[0]), type, sizeof(type));
238 ASSERTED const int length = snprintf(name, sizeof(name), "%s.%s", intrin, type);
239 assert(length < sizeof(name));
240 ret = LLVMBuildInsertElement(ctx->builder, ret,
241 ac_build_intrinsic(ctx, name, elem_type, params,
242 1, AC_FUNC_ATTR_READNONE),
243 LLVMConstInt(ctx->i32, i, 0), "");
244 }
245 return ret;
246 }
247
248 static LLVMValueRef emit_intrin_2f_param(struct ac_llvm_context *ctx,
249 const char *intrin,
250 LLVMTypeRef result_type,
251 LLVMValueRef src0, LLVMValueRef src1)
252 {
253 char name[64], type[64];
254 LLVMValueRef params[] = {
255 ac_to_float(ctx, src0),
256 ac_to_float(ctx, src1),
257 };
258
259 ac_build_type_name_for_intr(LLVMTypeOf(params[0]), type, sizeof(type));
260 ASSERTED const int length = snprintf(name, sizeof(name), "%s.%s", intrin, type);
261 assert(length < sizeof(name));
262 return ac_build_intrinsic(ctx, name, result_type, params, 2, AC_FUNC_ATTR_READNONE);
263 }
264
265 static LLVMValueRef emit_intrin_3f_param(struct ac_llvm_context *ctx,
266 const char *intrin,
267 LLVMTypeRef result_type,
268 LLVMValueRef src0, LLVMValueRef src1, LLVMValueRef src2)
269 {
270 char name[64], type[64];
271 LLVMValueRef params[] = {
272 ac_to_float(ctx, src0),
273 ac_to_float(ctx, src1),
274 ac_to_float(ctx, src2),
275 };
276
277 ac_build_type_name_for_intr(LLVMTypeOf(params[0]), type, sizeof(type));
278 ASSERTED const int length = snprintf(name, sizeof(name), "%s.%s", intrin, type);
279 assert(length < sizeof(name));
280 return ac_build_intrinsic(ctx, name, result_type, params, 3, AC_FUNC_ATTR_READNONE);
281 }
282
283 static LLVMValueRef emit_bcsel(struct ac_llvm_context *ctx,
284 LLVMValueRef src0, LLVMValueRef src1, LLVMValueRef src2)
285 {
286 LLVMTypeRef src1_type = LLVMTypeOf(src1);
287 LLVMTypeRef src2_type = LLVMTypeOf(src2);
288
289 if (LLVMGetTypeKind(src1_type) == LLVMPointerTypeKind &&
290 LLVMGetTypeKind(src2_type) != LLVMPointerTypeKind) {
291 src2 = LLVMBuildIntToPtr(ctx->builder, src2, src1_type, "");
292 } else if (LLVMGetTypeKind(src2_type) == LLVMPointerTypeKind &&
293 LLVMGetTypeKind(src1_type) != LLVMPointerTypeKind) {
294 src1 = LLVMBuildIntToPtr(ctx->builder, src1, src2_type, "");
295 }
296
297 LLVMValueRef v = LLVMBuildICmp(ctx->builder, LLVMIntNE, src0,
298 LLVMConstNull(LLVMTypeOf(src0)), "");
299 return LLVMBuildSelect(ctx->builder, v,
300 ac_to_integer_or_pointer(ctx, src1),
301 ac_to_integer_or_pointer(ctx, src2), "");
302 }
303
304 static LLVMValueRef emit_iabs(struct ac_llvm_context *ctx,
305 LLVMValueRef src0)
306 {
307 return ac_build_imax(ctx, src0, LLVMBuildNeg(ctx->builder, src0, ""));
308 }
309
310 static LLVMValueRef emit_uint_carry(struct ac_llvm_context *ctx,
311 const char *intrin,
312 LLVMValueRef src0, LLVMValueRef src1)
313 {
314 LLVMTypeRef ret_type;
315 LLVMTypeRef types[] = { ctx->i32, ctx->i1 };
316 LLVMValueRef res;
317 LLVMValueRef params[] = { src0, src1 };
318 ret_type = LLVMStructTypeInContext(ctx->context, types,
319 2, true);
320
321 res = ac_build_intrinsic(ctx, intrin, ret_type,
322 params, 2, AC_FUNC_ATTR_READNONE);
323
324 res = LLVMBuildExtractValue(ctx->builder, res, 1, "");
325 res = LLVMBuildZExt(ctx->builder, res, ctx->i32, "");
326 return res;
327 }
328
329 static LLVMValueRef emit_b2f(struct ac_llvm_context *ctx,
330 LLVMValueRef src0,
331 unsigned bitsize)
332 {
333 LLVMValueRef result = LLVMBuildAnd(ctx->builder, src0,
334 LLVMBuildBitCast(ctx->builder, LLVMConstReal(ctx->f32, 1.0), ctx->i32, ""),
335 "");
336 result = LLVMBuildBitCast(ctx->builder, result, ctx->f32, "");
337
338 switch (bitsize) {
339 case 16:
340 return LLVMBuildFPTrunc(ctx->builder, result, ctx->f16, "");
341 case 32:
342 return result;
343 case 64:
344 return LLVMBuildFPExt(ctx->builder, result, ctx->f64, "");
345 default:
346 unreachable("Unsupported bit size.");
347 }
348 }
349
350 static LLVMValueRef emit_f2b(struct ac_llvm_context *ctx,
351 LLVMValueRef src0)
352 {
353 src0 = ac_to_float(ctx, src0);
354 LLVMValueRef zero = LLVMConstNull(LLVMTypeOf(src0));
355 return LLVMBuildSExt(ctx->builder,
356 LLVMBuildFCmp(ctx->builder, LLVMRealUNE, src0, zero, ""),
357 ctx->i32, "");
358 }
359
360 static LLVMValueRef emit_b2i(struct ac_llvm_context *ctx,
361 LLVMValueRef src0,
362 unsigned bitsize)
363 {
364 LLVMValueRef result = LLVMBuildAnd(ctx->builder, src0, ctx->i32_1, "");
365
366 switch (bitsize) {
367 case 8:
368 return LLVMBuildTrunc(ctx->builder, result, ctx->i8, "");
369 case 16:
370 return LLVMBuildTrunc(ctx->builder, result, ctx->i16, "");
371 case 32:
372 return result;
373 case 64:
374 return LLVMBuildZExt(ctx->builder, result, ctx->i64, "");
375 default:
376 unreachable("Unsupported bit size.");
377 }
378 }
379
380 static LLVMValueRef emit_i2b(struct ac_llvm_context *ctx,
381 LLVMValueRef src0)
382 {
383 LLVMValueRef zero = LLVMConstNull(LLVMTypeOf(src0));
384 return LLVMBuildSExt(ctx->builder,
385 LLVMBuildICmp(ctx->builder, LLVMIntNE, src0, zero, ""),
386 ctx->i32, "");
387 }
388
389 static LLVMValueRef emit_f2f16(struct ac_llvm_context *ctx,
390 LLVMValueRef src0)
391 {
392 LLVMValueRef result;
393 LLVMValueRef cond = NULL;
394
395 src0 = ac_to_float(ctx, src0);
396 result = LLVMBuildFPTrunc(ctx->builder, src0, ctx->f16, "");
397
398 if (ctx->chip_class >= GFX8) {
399 LLVMValueRef args[2];
400 /* Check if the result is a denormal - and flush to 0 if so. */
401 args[0] = result;
402 args[1] = LLVMConstInt(ctx->i32, N_SUBNORMAL | P_SUBNORMAL, false);
403 cond = ac_build_intrinsic(ctx, "llvm.amdgcn.class.f16", ctx->i1, args, 2, AC_FUNC_ATTR_READNONE);
404 }
405
406 /* need to convert back up to f32 */
407 result = LLVMBuildFPExt(ctx->builder, result, ctx->f32, "");
408
409 if (ctx->chip_class >= GFX8)
410 result = LLVMBuildSelect(ctx->builder, cond, ctx->f32_0, result, "");
411 else {
412 /* for GFX6-GFX7 */
413 /* 0x38800000 is smallest half float value (2^-14) in 32-bit float,
414 * so compare the result and flush to 0 if it's smaller.
415 */
416 LLVMValueRef temp, cond2;
417 temp = emit_intrin_1f_param(ctx, "llvm.fabs", ctx->f32, result);
418 cond = LLVMBuildFCmp(ctx->builder, LLVMRealOGT,
419 LLVMBuildBitCast(ctx->builder, LLVMConstInt(ctx->i32, 0x38800000, false), ctx->f32, ""),
420 temp, "");
421 cond2 = LLVMBuildFCmp(ctx->builder, LLVMRealONE,
422 temp, ctx->f32_0, "");
423 cond = LLVMBuildAnd(ctx->builder, cond, cond2, "");
424 result = LLVMBuildSelect(ctx->builder, cond, ctx->f32_0, result, "");
425 }
426 return result;
427 }
428
429 static LLVMValueRef emit_umul_high(struct ac_llvm_context *ctx,
430 LLVMValueRef src0, LLVMValueRef src1)
431 {
432 LLVMValueRef dst64, result;
433 src0 = LLVMBuildZExt(ctx->builder, src0, ctx->i64, "");
434 src1 = LLVMBuildZExt(ctx->builder, src1, ctx->i64, "");
435
436 dst64 = LLVMBuildMul(ctx->builder, src0, src1, "");
437 dst64 = LLVMBuildLShr(ctx->builder, dst64, LLVMConstInt(ctx->i64, 32, false), "");
438 result = LLVMBuildTrunc(ctx->builder, dst64, ctx->i32, "");
439 return result;
440 }
441
442 static LLVMValueRef emit_imul_high(struct ac_llvm_context *ctx,
443 LLVMValueRef src0, LLVMValueRef src1)
444 {
445 LLVMValueRef dst64, result;
446 src0 = LLVMBuildSExt(ctx->builder, src0, ctx->i64, "");
447 src1 = LLVMBuildSExt(ctx->builder, src1, ctx->i64, "");
448
449 dst64 = LLVMBuildMul(ctx->builder, src0, src1, "");
450 dst64 = LLVMBuildAShr(ctx->builder, dst64, LLVMConstInt(ctx->i64, 32, false), "");
451 result = LLVMBuildTrunc(ctx->builder, dst64, ctx->i32, "");
452 return result;
453 }
454
455 static LLVMValueRef emit_bfm(struct ac_llvm_context *ctx,
456 LLVMValueRef bits, LLVMValueRef offset)
457 {
458 /* mask = ((1 << bits) - 1) << offset */
459 return LLVMBuildShl(ctx->builder,
460 LLVMBuildSub(ctx->builder,
461 LLVMBuildShl(ctx->builder,
462 ctx->i32_1,
463 bits, ""),
464 ctx->i32_1, ""),
465 offset, "");
466 }
467
468 static LLVMValueRef emit_bitfield_select(struct ac_llvm_context *ctx,
469 LLVMValueRef mask, LLVMValueRef insert,
470 LLVMValueRef base)
471 {
472 /* Calculate:
473 * (mask & insert) | (~mask & base) = base ^ (mask & (insert ^ base))
474 * Use the right-hand side, which the LLVM backend can convert to V_BFI.
475 */
476 return LLVMBuildXor(ctx->builder, base,
477 LLVMBuildAnd(ctx->builder, mask,
478 LLVMBuildXor(ctx->builder, insert, base, ""), ""), "");
479 }
480
481 static LLVMValueRef emit_pack_2x16(struct ac_llvm_context *ctx,
482 LLVMValueRef src0,
483 LLVMValueRef (*pack)(struct ac_llvm_context *ctx,
484 LLVMValueRef args[2]))
485 {
486 LLVMValueRef comp[2];
487
488 src0 = ac_to_float(ctx, src0);
489 comp[0] = LLVMBuildExtractElement(ctx->builder, src0, ctx->i32_0, "");
490 comp[1] = LLVMBuildExtractElement(ctx->builder, src0, ctx->i32_1, "");
491
492 return LLVMBuildBitCast(ctx->builder, pack(ctx, comp), ctx->i32, "");
493 }
494
495 static LLVMValueRef emit_unpack_half_2x16(struct ac_llvm_context *ctx,
496 LLVMValueRef src0)
497 {
498 LLVMValueRef const16 = LLVMConstInt(ctx->i32, 16, false);
499 LLVMValueRef temps[2], val;
500 int i;
501
502 for (i = 0; i < 2; i++) {
503 val = i == 1 ? LLVMBuildLShr(ctx->builder, src0, const16, "") : src0;
504 val = LLVMBuildTrunc(ctx->builder, val, ctx->i16, "");
505 val = LLVMBuildBitCast(ctx->builder, val, ctx->f16, "");
506 temps[i] = LLVMBuildFPExt(ctx->builder, val, ctx->f32, "");
507 }
508 return ac_build_gather_values(ctx, temps, 2);
509 }
510
511 static LLVMValueRef emit_ddxy(struct ac_nir_context *ctx,
512 nir_op op,
513 LLVMValueRef src0)
514 {
515 unsigned mask;
516 int idx;
517 LLVMValueRef result;
518
519 if (op == nir_op_fddx_fine)
520 mask = AC_TID_MASK_LEFT;
521 else if (op == nir_op_fddy_fine)
522 mask = AC_TID_MASK_TOP;
523 else
524 mask = AC_TID_MASK_TOP_LEFT;
525
526 /* for DDX we want to next X pixel, DDY next Y pixel. */
527 if (op == nir_op_fddx_fine ||
528 op == nir_op_fddx_coarse ||
529 op == nir_op_fddx)
530 idx = 1;
531 else
532 idx = 2;
533
534 result = ac_build_ddxy(&ctx->ac, mask, idx, src0);
535 return result;
536 }
537
538 struct waterfall_context {
539 LLVMBasicBlockRef phi_bb[2];
540 bool use_waterfall;
541 };
542
543 /* To deal with divergent descriptors we can create a loop that handles all
544 * lanes with the same descriptor on a given iteration (henceforth a
545 * waterfall loop).
546 *
547 * These helper create the begin and end of the loop leaving the caller
548 * to implement the body.
549 *
550 * params:
551 * - ctx is the usal nir context
552 * - wctx is a temporary struct containing some loop info. Can be left uninitialized.
553 * - value is the possibly divergent value for which we built the loop
554 * - divergent is whether value is actually divergent. If false we just pass
555 * things through.
556 */
557 static LLVMValueRef enter_waterfall(struct ac_nir_context *ctx,
558 struct waterfall_context *wctx,
559 LLVMValueRef value, bool divergent)
560 {
561 /* If the app claims the value is divergent but it is constant we can
562 * end up with a dynamic index of NULL. */
563 if (!value)
564 divergent = false;
565
566 wctx->use_waterfall = divergent;
567 if (!divergent)
568 return value;
569
570 ac_build_bgnloop(&ctx->ac, 6000);
571
572 LLVMValueRef scalar_value = ac_build_readlane(&ctx->ac, value, NULL);
573
574 LLVMValueRef active = LLVMBuildICmp(ctx->ac.builder, LLVMIntEQ, value,
575 scalar_value, "uniform_active");
576
577 wctx->phi_bb[0] = LLVMGetInsertBlock(ctx->ac.builder);
578 ac_build_ifcc(&ctx->ac, active, 6001);
579
580 return scalar_value;
581 }
582
583 static LLVMValueRef exit_waterfall(struct ac_nir_context *ctx,
584 struct waterfall_context *wctx,
585 LLVMValueRef value)
586 {
587 LLVMValueRef ret = NULL;
588 LLVMValueRef phi_src[2];
589 LLVMValueRef cc_phi_src[2] = {
590 LLVMConstInt(ctx->ac.i32, 0, false),
591 LLVMConstInt(ctx->ac.i32, 0xffffffff, false),
592 };
593
594 if (!wctx->use_waterfall)
595 return value;
596
597 wctx->phi_bb[1] = LLVMGetInsertBlock(ctx->ac.builder);
598
599 ac_build_endif(&ctx->ac, 6001);
600
601 if (value) {
602 phi_src[0] = LLVMGetUndef(LLVMTypeOf(value));
603 phi_src[1] = value;
604
605 ret = ac_build_phi(&ctx->ac, LLVMTypeOf(value), 2, phi_src, wctx->phi_bb);
606 }
607
608 /*
609 * By using the optimization barrier on the exit decision, we decouple
610 * the operations from the break, and hence avoid LLVM hoisting the
611 * opteration into the break block.
612 */
613 LLVMValueRef cc = ac_build_phi(&ctx->ac, ctx->ac.i32, 2, cc_phi_src, wctx->phi_bb);
614 ac_build_optimization_barrier(&ctx->ac, &cc);
615
616 LLVMValueRef active = LLVMBuildICmp(ctx->ac.builder, LLVMIntNE, cc, ctx->ac.i32_0, "uniform_active2");
617 ac_build_ifcc(&ctx->ac, active, 6002);
618 ac_build_break(&ctx->ac);
619 ac_build_endif(&ctx->ac, 6002);
620
621 ac_build_endloop(&ctx->ac, 6000);
622 return ret;
623 }
624
625 static void visit_alu(struct ac_nir_context *ctx, const nir_alu_instr *instr)
626 {
627 LLVMValueRef src[4], result = NULL;
628 unsigned num_components = instr->dest.dest.ssa.num_components;
629 unsigned src_components;
630 LLVMTypeRef def_type = get_def_type(ctx, &instr->dest.dest.ssa);
631
632 assert(nir_op_infos[instr->op].num_inputs <= ARRAY_SIZE(src));
633 switch (instr->op) {
634 case nir_op_vec2:
635 case nir_op_vec3:
636 case nir_op_vec4:
637 src_components = 1;
638 break;
639 case nir_op_pack_half_2x16:
640 case nir_op_pack_snorm_2x16:
641 case nir_op_pack_unorm_2x16:
642 src_components = 2;
643 break;
644 case nir_op_unpack_half_2x16:
645 src_components = 1;
646 break;
647 case nir_op_cube_face_coord:
648 case nir_op_cube_face_index:
649 src_components = 3;
650 break;
651 default:
652 src_components = num_components;
653 break;
654 }
655 for (unsigned i = 0; i < nir_op_infos[instr->op].num_inputs; i++)
656 src[i] = get_alu_src(ctx, instr->src[i], src_components);
657
658 switch (instr->op) {
659 case nir_op_mov:
660 result = src[0];
661 break;
662 case nir_op_fneg:
663 src[0] = ac_to_float(&ctx->ac, src[0]);
664 result = LLVMBuildFNeg(ctx->ac.builder, src[0], "");
665 if (ctx->ac.float_mode == AC_FLOAT_MODE_DENORM_FLUSH_TO_ZERO) {
666 /* fneg will be optimized by backend compiler with sign
667 * bit removed via XOR. This is probably a LLVM bug.
668 */
669 result = ac_build_canonicalize(&ctx->ac, result,
670 instr->dest.dest.ssa.bit_size);
671 }
672 break;
673 case nir_op_ineg:
674 result = LLVMBuildNeg(ctx->ac.builder, src[0], "");
675 break;
676 case nir_op_inot:
677 result = LLVMBuildNot(ctx->ac.builder, src[0], "");
678 break;
679 case nir_op_iadd:
680 result = LLVMBuildAdd(ctx->ac.builder, src[0], src[1], "");
681 break;
682 case nir_op_fadd:
683 src[0] = ac_to_float(&ctx->ac, src[0]);
684 src[1] = ac_to_float(&ctx->ac, src[1]);
685 result = LLVMBuildFAdd(ctx->ac.builder, src[0], src[1], "");
686 break;
687 case nir_op_fsub:
688 src[0] = ac_to_float(&ctx->ac, src[0]);
689 src[1] = ac_to_float(&ctx->ac, src[1]);
690 result = LLVMBuildFSub(ctx->ac.builder, src[0], src[1], "");
691 break;
692 case nir_op_isub:
693 result = LLVMBuildSub(ctx->ac.builder, src[0], src[1], "");
694 break;
695 case nir_op_imul:
696 result = LLVMBuildMul(ctx->ac.builder, src[0], src[1], "");
697 break;
698 case nir_op_imod:
699 result = LLVMBuildSRem(ctx->ac.builder, src[0], src[1], "");
700 break;
701 case nir_op_umod:
702 result = LLVMBuildURem(ctx->ac.builder, src[0], src[1], "");
703 break;
704 case nir_op_irem:
705 result = LLVMBuildSRem(ctx->ac.builder, src[0], src[1], "");
706 break;
707 case nir_op_idiv:
708 result = LLVMBuildSDiv(ctx->ac.builder, src[0], src[1], "");
709 break;
710 case nir_op_udiv:
711 result = LLVMBuildUDiv(ctx->ac.builder, src[0], src[1], "");
712 break;
713 case nir_op_fmul:
714 src[0] = ac_to_float(&ctx->ac, src[0]);
715 src[1] = ac_to_float(&ctx->ac, src[1]);
716 result = LLVMBuildFMul(ctx->ac.builder, src[0], src[1], "");
717 break;
718 case nir_op_frcp:
719 /* For doubles, we need precise division to pass GLCTS. */
720 if (ctx->ac.float_mode == AC_FLOAT_MODE_DEFAULT_OPENGL &&
721 ac_get_type_size(def_type) == 8) {
722 result = LLVMBuildFDiv(ctx->ac.builder, ctx->ac.f64_1,
723 ac_to_float(&ctx->ac, src[0]), "");
724 } else {
725 result = emit_intrin_1f_param_scalar(&ctx->ac, "llvm.amdgcn.rcp",
726 ac_to_float_type(&ctx->ac, def_type), src[0]);
727 }
728 if (ctx->abi->clamp_div_by_zero)
729 result = ac_build_fmin(&ctx->ac, result,
730 LLVMConstReal(ac_to_float_type(&ctx->ac, def_type), FLT_MAX));
731 break;
732 case nir_op_iand:
733 result = LLVMBuildAnd(ctx->ac.builder, src[0], src[1], "");
734 break;
735 case nir_op_ior:
736 result = LLVMBuildOr(ctx->ac.builder, src[0], src[1], "");
737 break;
738 case nir_op_ixor:
739 result = LLVMBuildXor(ctx->ac.builder, src[0], src[1], "");
740 break;
741 case nir_op_ishl:
742 if (ac_get_elem_bits(&ctx->ac, LLVMTypeOf(src[1])) < ac_get_elem_bits(&ctx->ac, LLVMTypeOf(src[0])))
743 src[1] = LLVMBuildZExt(ctx->ac.builder, src[1],
744 LLVMTypeOf(src[0]), "");
745 else if (ac_get_elem_bits(&ctx->ac, LLVMTypeOf(src[1])) > ac_get_elem_bits(&ctx->ac, LLVMTypeOf(src[0])))
746 src[1] = LLVMBuildTrunc(ctx->ac.builder, src[1],
747 LLVMTypeOf(src[0]), "");
748 result = LLVMBuildShl(ctx->ac.builder, src[0], src[1], "");
749 break;
750 case nir_op_ishr:
751 if (ac_get_elem_bits(&ctx->ac, LLVMTypeOf(src[1])) < ac_get_elem_bits(&ctx->ac, LLVMTypeOf(src[0])))
752 src[1] = LLVMBuildZExt(ctx->ac.builder, src[1],
753 LLVMTypeOf(src[0]), "");
754 else if (ac_get_elem_bits(&ctx->ac, LLVMTypeOf(src[1])) > ac_get_elem_bits(&ctx->ac, LLVMTypeOf(src[0])))
755 src[1] = LLVMBuildTrunc(ctx->ac.builder, src[1],
756 LLVMTypeOf(src[0]), "");
757 result = LLVMBuildAShr(ctx->ac.builder, src[0], src[1], "");
758 break;
759 case nir_op_ushr:
760 if (ac_get_elem_bits(&ctx->ac, LLVMTypeOf(src[1])) < ac_get_elem_bits(&ctx->ac, LLVMTypeOf(src[0])))
761 src[1] = LLVMBuildZExt(ctx->ac.builder, src[1],
762 LLVMTypeOf(src[0]), "");
763 else if (ac_get_elem_bits(&ctx->ac, LLVMTypeOf(src[1])) > ac_get_elem_bits(&ctx->ac, LLVMTypeOf(src[0])))
764 src[1] = LLVMBuildTrunc(ctx->ac.builder, src[1],
765 LLVMTypeOf(src[0]), "");
766 result = LLVMBuildLShr(ctx->ac.builder, src[0], src[1], "");
767 break;
768 case nir_op_ilt32:
769 result = emit_int_cmp(&ctx->ac, LLVMIntSLT, src[0], src[1]);
770 break;
771 case nir_op_ine32:
772 result = emit_int_cmp(&ctx->ac, LLVMIntNE, src[0], src[1]);
773 break;
774 case nir_op_ieq32:
775 result = emit_int_cmp(&ctx->ac, LLVMIntEQ, src[0], src[1]);
776 break;
777 case nir_op_ige32:
778 result = emit_int_cmp(&ctx->ac, LLVMIntSGE, src[0], src[1]);
779 break;
780 case nir_op_ult32:
781 result = emit_int_cmp(&ctx->ac, LLVMIntULT, src[0], src[1]);
782 break;
783 case nir_op_uge32:
784 result = emit_int_cmp(&ctx->ac, LLVMIntUGE, src[0], src[1]);
785 break;
786 case nir_op_feq32:
787 result = emit_float_cmp(&ctx->ac, LLVMRealOEQ, src[0], src[1]);
788 break;
789 case nir_op_fneu32:
790 result = emit_float_cmp(&ctx->ac, LLVMRealUNE, src[0], src[1]);
791 break;
792 case nir_op_flt32:
793 result = emit_float_cmp(&ctx->ac, LLVMRealOLT, src[0], src[1]);
794 break;
795 case nir_op_fge32:
796 result = emit_float_cmp(&ctx->ac, LLVMRealOGE, src[0], src[1]);
797 break;
798 case nir_op_fabs:
799 result = emit_intrin_1f_param(&ctx->ac, "llvm.fabs",
800 ac_to_float_type(&ctx->ac, def_type), src[0]);
801 if (ctx->ac.float_mode == AC_FLOAT_MODE_DENORM_FLUSH_TO_ZERO) {
802 /* fabs will be optimized by backend compiler with sign
803 * bit removed via AND.
804 */
805 result = ac_build_canonicalize(&ctx->ac, result,
806 instr->dest.dest.ssa.bit_size);
807 }
808 break;
809 case nir_op_iabs:
810 result = emit_iabs(&ctx->ac, src[0]);
811 break;
812 case nir_op_imax:
813 result = ac_build_imax(&ctx->ac, src[0], src[1]);
814 break;
815 case nir_op_imin:
816 result = ac_build_imin(&ctx->ac, src[0], src[1]);
817 break;
818 case nir_op_umax:
819 result = ac_build_umax(&ctx->ac, src[0], src[1]);
820 break;
821 case nir_op_umin:
822 result = ac_build_umin(&ctx->ac, src[0], src[1]);
823 break;
824 case nir_op_isign:
825 result = ac_build_isign(&ctx->ac, src[0]);
826 break;
827 case nir_op_fsign:
828 src[0] = ac_to_float(&ctx->ac, src[0]);
829 result = ac_build_fsign(&ctx->ac, src[0]);
830 break;
831 case nir_op_ffloor:
832 result = emit_intrin_1f_param(&ctx->ac, "llvm.floor",
833 ac_to_float_type(&ctx->ac, def_type), src[0]);
834 break;
835 case nir_op_ftrunc:
836 result = emit_intrin_1f_param(&ctx->ac, "llvm.trunc",
837 ac_to_float_type(&ctx->ac, def_type), src[0]);
838 break;
839 case nir_op_fceil:
840 result = emit_intrin_1f_param(&ctx->ac, "llvm.ceil",
841 ac_to_float_type(&ctx->ac, def_type), src[0]);
842 break;
843 case nir_op_fround_even:
844 result = emit_intrin_1f_param(&ctx->ac, "llvm.rint",
845 ac_to_float_type(&ctx->ac, def_type),src[0]);
846 break;
847 case nir_op_ffract:
848 result = emit_intrin_1f_param_scalar(&ctx->ac, "llvm.amdgcn.fract",
849 ac_to_float_type(&ctx->ac, def_type), src[0]);
850 break;
851 case nir_op_fsin:
852 result = emit_intrin_1f_param(&ctx->ac, "llvm.sin",
853 ac_to_float_type(&ctx->ac, def_type), src[0]);
854 break;
855 case nir_op_fcos:
856 result = emit_intrin_1f_param(&ctx->ac, "llvm.cos",
857 ac_to_float_type(&ctx->ac, def_type), src[0]);
858 break;
859 case nir_op_fsqrt:
860 result = emit_intrin_1f_param(&ctx->ac, "llvm.sqrt",
861 ac_to_float_type(&ctx->ac, def_type), src[0]);
862 break;
863 case nir_op_fexp2:
864 result = emit_intrin_1f_param(&ctx->ac, "llvm.exp2",
865 ac_to_float_type(&ctx->ac, def_type), src[0]);
866 break;
867 case nir_op_flog2:
868 result = emit_intrin_1f_param(&ctx->ac, "llvm.log2",
869 ac_to_float_type(&ctx->ac, def_type), src[0]);
870 break;
871 case nir_op_frsq:
872 result = emit_intrin_1f_param_scalar(&ctx->ac, "llvm.amdgcn.rsq",
873 ac_to_float_type(&ctx->ac, def_type), src[0]);
874 if (ctx->abi->clamp_div_by_zero)
875 result = ac_build_fmin(&ctx->ac, result,
876 LLVMConstReal(ac_to_float_type(&ctx->ac, def_type), FLT_MAX));
877 break;
878 case nir_op_frexp_exp:
879 src[0] = ac_to_float(&ctx->ac, src[0]);
880 result = ac_build_frexp_exp(&ctx->ac, src[0],
881 ac_get_elem_bits(&ctx->ac, LLVMTypeOf(src[0])));
882 if (ac_get_elem_bits(&ctx->ac, LLVMTypeOf(src[0])) == 16)
883 result = LLVMBuildSExt(ctx->ac.builder, result,
884 ctx->ac.i32, "");
885 break;
886 case nir_op_frexp_sig:
887 src[0] = ac_to_float(&ctx->ac, src[0]);
888 result = ac_build_frexp_mant(&ctx->ac, src[0],
889 instr->dest.dest.ssa.bit_size);
890 break;
891 case nir_op_fpow:
892 result = emit_intrin_2f_param(&ctx->ac, "llvm.pow",
893 ac_to_float_type(&ctx->ac, def_type), src[0], src[1]);
894 break;
895 case nir_op_fmax:
896 result = emit_intrin_2f_param(&ctx->ac, "llvm.maxnum",
897 ac_to_float_type(&ctx->ac, def_type), src[0], src[1]);
898 if (ctx->ac.chip_class < GFX9 &&
899 instr->dest.dest.ssa.bit_size == 32) {
900 /* Only pre-GFX9 chips do not flush denorms. */
901 result = ac_build_canonicalize(&ctx->ac, result,
902 instr->dest.dest.ssa.bit_size);
903 }
904 break;
905 case nir_op_fmin:
906 result = emit_intrin_2f_param(&ctx->ac, "llvm.minnum",
907 ac_to_float_type(&ctx->ac, def_type), src[0], src[1]);
908 if (ctx->ac.chip_class < GFX9 &&
909 instr->dest.dest.ssa.bit_size == 32) {
910 /* Only pre-GFX9 chips do not flush denorms. */
911 result = ac_build_canonicalize(&ctx->ac, result,
912 instr->dest.dest.ssa.bit_size);
913 }
914 break;
915 case nir_op_ffma:
916 /* FMA is better on GFX10, because it has FMA units instead of MUL-ADD units. */
917 result = emit_intrin_3f_param(&ctx->ac, ctx->ac.chip_class >= GFX10 ? "llvm.fma" : "llvm.fmuladd",
918 ac_to_float_type(&ctx->ac, def_type), src[0], src[1], src[2]);
919 break;
920 case nir_op_ldexp:
921 src[0] = ac_to_float(&ctx->ac, src[0]);
922 if (ac_get_elem_bits(&ctx->ac, def_type) == 32)
923 result = ac_build_intrinsic(&ctx->ac, "llvm.amdgcn.ldexp.f32", ctx->ac.f32, src, 2, AC_FUNC_ATTR_READNONE);
924 else if (ac_get_elem_bits(&ctx->ac, def_type) == 16)
925 result = ac_build_intrinsic(&ctx->ac, "llvm.amdgcn.ldexp.f16", ctx->ac.f16, src, 2, AC_FUNC_ATTR_READNONE);
926 else
927 result = ac_build_intrinsic(&ctx->ac, "llvm.amdgcn.ldexp.f64", ctx->ac.f64, src, 2, AC_FUNC_ATTR_READNONE);
928 break;
929 case nir_op_bfm:
930 result = emit_bfm(&ctx->ac, src[0], src[1]);
931 break;
932 case nir_op_bitfield_select:
933 result = emit_bitfield_select(&ctx->ac, src[0], src[1], src[2]);
934 break;
935 case nir_op_ubfe:
936 result = ac_build_bfe(&ctx->ac, src[0], src[1], src[2], false);
937 break;
938 case nir_op_ibfe:
939 result = ac_build_bfe(&ctx->ac, src[0], src[1], src[2], true);
940 break;
941 case nir_op_bitfield_reverse:
942 result = ac_build_bitfield_reverse(&ctx->ac, src[0]);
943 break;
944 case nir_op_bit_count:
945 result = ac_build_bit_count(&ctx->ac, src[0]);
946 break;
947 case nir_op_vec2:
948 case nir_op_vec3:
949 case nir_op_vec4:
950 for (unsigned i = 0; i < nir_op_infos[instr->op].num_inputs; i++)
951 src[i] = ac_to_integer(&ctx->ac, src[i]);
952 result = ac_build_gather_values(&ctx->ac, src, num_components);
953 break;
954 case nir_op_f2i8:
955 case nir_op_f2i16:
956 case nir_op_f2i32:
957 case nir_op_f2i64:
958 src[0] = ac_to_float(&ctx->ac, src[0]);
959 result = LLVMBuildFPToSI(ctx->ac.builder, src[0], def_type, "");
960 break;
961 case nir_op_f2u8:
962 case nir_op_f2u16:
963 case nir_op_f2u32:
964 case nir_op_f2u64:
965 src[0] = ac_to_float(&ctx->ac, src[0]);
966 result = LLVMBuildFPToUI(ctx->ac.builder, src[0], def_type, "");
967 break;
968 case nir_op_i2f16:
969 case nir_op_i2f32:
970 case nir_op_i2f64:
971 result = LLVMBuildSIToFP(ctx->ac.builder, src[0], ac_to_float_type(&ctx->ac, def_type), "");
972 break;
973 case nir_op_u2f16:
974 case nir_op_u2f32:
975 case nir_op_u2f64:
976 result = LLVMBuildUIToFP(ctx->ac.builder, src[0], ac_to_float_type(&ctx->ac, def_type), "");
977 break;
978 case nir_op_f2f16_rtz:
979 case nir_op_f2f16:
980 case nir_op_f2fmp:
981 src[0] = ac_to_float(&ctx->ac, src[0]);
982
983 /* For OpenGL, we want fast packing with v_cvt_pkrtz_f16, but if we use it,
984 * all f32->f16 conversions have to round towards zero, because both scalar
985 * and vec2 down-conversions have to round equally.
986 */
987 if (ctx->ac.float_mode == AC_FLOAT_MODE_DEFAULT_OPENGL ||
988 instr->op == nir_op_f2f16_rtz) {
989 src[0] = ac_to_float(&ctx->ac, src[0]);
990
991 if (LLVMTypeOf(src[0]) == ctx->ac.f64)
992 src[0] = LLVMBuildFPTrunc(ctx->ac.builder, src[0], ctx->ac.f32, "");
993
994 /* Fast path conversion. This only works if NIR is vectorized
995 * to vec2 16.
996 */
997 if (LLVMTypeOf(src[0]) == ctx->ac.v2f32) {
998 LLVMValueRef args[] = {
999 ac_llvm_extract_elem(&ctx->ac, src[0], 0),
1000 ac_llvm_extract_elem(&ctx->ac, src[0], 1),
1001 };
1002 result = ac_build_cvt_pkrtz_f16(&ctx->ac, args);
1003 break;
1004 }
1005
1006 assert(ac_get_llvm_num_components(src[0]) == 1);
1007 LLVMValueRef param[2] = { src[0], LLVMGetUndef(ctx->ac.f32) };
1008 result = ac_build_cvt_pkrtz_f16(&ctx->ac, param);
1009 result = LLVMBuildExtractElement(ctx->ac.builder, result, ctx->ac.i32_0, "");
1010 } else {
1011 if (ac_get_elem_bits(&ctx->ac, LLVMTypeOf(src[0])) < ac_get_elem_bits(&ctx->ac, def_type))
1012 result = LLVMBuildFPExt(ctx->ac.builder, src[0], ac_to_float_type(&ctx->ac, def_type), "");
1013 else
1014 result = LLVMBuildFPTrunc(ctx->ac.builder, src[0], ac_to_float_type(&ctx->ac, def_type), "");
1015 }
1016 break;
1017 case nir_op_f2f16_rtne:
1018 case nir_op_f2f32:
1019 case nir_op_f2f64:
1020 src[0] = ac_to_float(&ctx->ac, src[0]);
1021 if (ac_get_elem_bits(&ctx->ac, LLVMTypeOf(src[0])) < ac_get_elem_bits(&ctx->ac, def_type))
1022 result = LLVMBuildFPExt(ctx->ac.builder, src[0], ac_to_float_type(&ctx->ac, def_type), "");
1023 else
1024 result = LLVMBuildFPTrunc(ctx->ac.builder, src[0], ac_to_float_type(&ctx->ac, def_type), "");
1025 break;
1026 case nir_op_u2u8:
1027 case nir_op_u2u16:
1028 case nir_op_u2ump:
1029 case nir_op_u2u32:
1030 case nir_op_u2u64:
1031 if (ac_get_elem_bits(&ctx->ac, LLVMTypeOf(src[0])) < ac_get_elem_bits(&ctx->ac, def_type))
1032 result = LLVMBuildZExt(ctx->ac.builder, src[0], def_type, "");
1033 else
1034 result = LLVMBuildTrunc(ctx->ac.builder, src[0], def_type, "");
1035 break;
1036 case nir_op_i2i8:
1037 case nir_op_i2i16:
1038 case nir_op_i2imp:
1039 case nir_op_i2i32:
1040 case nir_op_i2i64:
1041 if (ac_get_elem_bits(&ctx->ac, LLVMTypeOf(src[0])) < ac_get_elem_bits(&ctx->ac, def_type))
1042 result = LLVMBuildSExt(ctx->ac.builder, src[0], def_type, "");
1043 else
1044 result = LLVMBuildTrunc(ctx->ac.builder, src[0], def_type, "");
1045 break;
1046 case nir_op_b32csel:
1047 result = emit_bcsel(&ctx->ac, src[0], src[1], src[2]);
1048 break;
1049 case nir_op_find_lsb:
1050 result = ac_find_lsb(&ctx->ac, ctx->ac.i32, src[0]);
1051 break;
1052 case nir_op_ufind_msb:
1053 result = ac_build_umsb(&ctx->ac, src[0], ctx->ac.i32);
1054 break;
1055 case nir_op_ifind_msb:
1056 result = ac_build_imsb(&ctx->ac, src[0], ctx->ac.i32);
1057 break;
1058 case nir_op_uadd_carry:
1059 result = emit_uint_carry(&ctx->ac, "llvm.uadd.with.overflow.i32", src[0], src[1]);
1060 break;
1061 case nir_op_usub_borrow:
1062 result = emit_uint_carry(&ctx->ac, "llvm.usub.with.overflow.i32", src[0], src[1]);
1063 break;
1064 case nir_op_b2f16:
1065 case nir_op_b2f32:
1066 case nir_op_b2f64:
1067 result = emit_b2f(&ctx->ac, src[0], instr->dest.dest.ssa.bit_size);
1068 break;
1069 case nir_op_f2b32:
1070 result = emit_f2b(&ctx->ac, src[0]);
1071 break;
1072 case nir_op_b2i8:
1073 case nir_op_b2i16:
1074 case nir_op_b2i32:
1075 case nir_op_b2i64:
1076 result = emit_b2i(&ctx->ac, src[0], instr->dest.dest.ssa.bit_size);
1077 break;
1078 case nir_op_i2b32:
1079 result = emit_i2b(&ctx->ac, src[0]);
1080 break;
1081 case nir_op_fquantize2f16:
1082 result = emit_f2f16(&ctx->ac, src[0]);
1083 break;
1084 case nir_op_umul_high:
1085 result = emit_umul_high(&ctx->ac, src[0], src[1]);
1086 break;
1087 case nir_op_imul_high:
1088 result = emit_imul_high(&ctx->ac, src[0], src[1]);
1089 break;
1090 case nir_op_pack_half_2x16:
1091 result = emit_pack_2x16(&ctx->ac, src[0], ac_build_cvt_pkrtz_f16);
1092 break;
1093 case nir_op_pack_snorm_2x16:
1094 result = emit_pack_2x16(&ctx->ac, src[0], ac_build_cvt_pknorm_i16);
1095 break;
1096 case nir_op_pack_unorm_2x16:
1097 result = emit_pack_2x16(&ctx->ac, src[0], ac_build_cvt_pknorm_u16);
1098 break;
1099 case nir_op_unpack_half_2x16:
1100 result = emit_unpack_half_2x16(&ctx->ac, src[0]);
1101 break;
1102 case nir_op_fddx:
1103 case nir_op_fddy:
1104 case nir_op_fddx_fine:
1105 case nir_op_fddy_fine:
1106 case nir_op_fddx_coarse:
1107 case nir_op_fddy_coarse:
1108 result = emit_ddxy(ctx, instr->op, src[0]);
1109 break;
1110
1111 case nir_op_unpack_64_2x32_split_x: {
1112 assert(ac_get_llvm_num_components(src[0]) == 1);
1113 LLVMValueRef tmp = LLVMBuildBitCast(ctx->ac.builder, src[0],
1114 ctx->ac.v2i32,
1115 "");
1116 result = LLVMBuildExtractElement(ctx->ac.builder, tmp,
1117 ctx->ac.i32_0, "");
1118 break;
1119 }
1120
1121 case nir_op_unpack_64_2x32_split_y: {
1122 assert(ac_get_llvm_num_components(src[0]) == 1);
1123 LLVMValueRef tmp = LLVMBuildBitCast(ctx->ac.builder, src[0],
1124 ctx->ac.v2i32,
1125 "");
1126 result = LLVMBuildExtractElement(ctx->ac.builder, tmp,
1127 ctx->ac.i32_1, "");
1128 break;
1129 }
1130
1131 case nir_op_pack_64_2x32_split: {
1132 LLVMValueRef tmp = ac_build_gather_values(&ctx->ac, src, 2);
1133 result = LLVMBuildBitCast(ctx->ac.builder, tmp, ctx->ac.i64, "");
1134 break;
1135 }
1136
1137 case nir_op_pack_32_2x16_split: {
1138 LLVMValueRef tmp = ac_build_gather_values(&ctx->ac, src, 2);
1139 result = LLVMBuildBitCast(ctx->ac.builder, tmp, ctx->ac.i32, "");
1140 break;
1141 }
1142
1143 case nir_op_unpack_32_2x16_split_x: {
1144 LLVMValueRef tmp = LLVMBuildBitCast(ctx->ac.builder, src[0],
1145 ctx->ac.v2i16,
1146 "");
1147 result = LLVMBuildExtractElement(ctx->ac.builder, tmp,
1148 ctx->ac.i32_0, "");
1149 break;
1150 }
1151
1152 case nir_op_unpack_32_2x16_split_y: {
1153 LLVMValueRef tmp = LLVMBuildBitCast(ctx->ac.builder, src[0],
1154 ctx->ac.v2i16,
1155 "");
1156 result = LLVMBuildExtractElement(ctx->ac.builder, tmp,
1157 ctx->ac.i32_1, "");
1158 break;
1159 }
1160
1161 case nir_op_cube_face_coord: {
1162 src[0] = ac_to_float(&ctx->ac, src[0]);
1163 LLVMValueRef results[2];
1164 LLVMValueRef in[3];
1165 for (unsigned chan = 0; chan < 3; chan++)
1166 in[chan] = ac_llvm_extract_elem(&ctx->ac, src[0], chan);
1167 results[0] = ac_build_intrinsic(&ctx->ac, "llvm.amdgcn.cubesc",
1168 ctx->ac.f32, in, 3, AC_FUNC_ATTR_READNONE);
1169 results[1] = ac_build_intrinsic(&ctx->ac, "llvm.amdgcn.cubetc",
1170 ctx->ac.f32, in, 3, AC_FUNC_ATTR_READNONE);
1171 LLVMValueRef ma = ac_build_intrinsic(&ctx->ac, "llvm.amdgcn.cubema",
1172 ctx->ac.f32, in, 3, AC_FUNC_ATTR_READNONE);
1173 results[0] = ac_build_fdiv(&ctx->ac, results[0], ma);
1174 results[1] = ac_build_fdiv(&ctx->ac, results[1], ma);
1175 LLVMValueRef offset = LLVMConstReal(ctx->ac.f32, 0.5);
1176 results[0] = LLVMBuildFAdd(ctx->ac.builder, results[0], offset, "");
1177 results[1] = LLVMBuildFAdd(ctx->ac.builder, results[1], offset, "");
1178 result = ac_build_gather_values(&ctx->ac, results, 2);
1179 break;
1180 }
1181
1182 case nir_op_cube_face_index: {
1183 src[0] = ac_to_float(&ctx->ac, src[0]);
1184 LLVMValueRef in[3];
1185 for (unsigned chan = 0; chan < 3; chan++)
1186 in[chan] = ac_llvm_extract_elem(&ctx->ac, src[0], chan);
1187 result = ac_build_intrinsic(&ctx->ac, "llvm.amdgcn.cubeid",
1188 ctx->ac.f32, in, 3, AC_FUNC_ATTR_READNONE);
1189 break;
1190 }
1191
1192 default:
1193 fprintf(stderr, "Unknown NIR alu instr: ");
1194 nir_print_instr(&instr->instr, stderr);
1195 fprintf(stderr, "\n");
1196 abort();
1197 }
1198
1199 if (result) {
1200 assert(instr->dest.dest.is_ssa);
1201 result = ac_to_integer_or_pointer(&ctx->ac, result);
1202 ctx->ssa_defs[instr->dest.dest.ssa.index] = result;
1203 }
1204 }
1205
1206 static void visit_load_const(struct ac_nir_context *ctx,
1207 const nir_load_const_instr *instr)
1208 {
1209 LLVMValueRef values[4], value = NULL;
1210 LLVMTypeRef element_type =
1211 LLVMIntTypeInContext(ctx->ac.context, instr->def.bit_size);
1212
1213 for (unsigned i = 0; i < instr->def.num_components; ++i) {
1214 switch (instr->def.bit_size) {
1215 case 8:
1216 values[i] = LLVMConstInt(element_type,
1217 instr->value[i].u8, false);
1218 break;
1219 case 16:
1220 values[i] = LLVMConstInt(element_type,
1221 instr->value[i].u16, false);
1222 break;
1223 case 32:
1224 values[i] = LLVMConstInt(element_type,
1225 instr->value[i].u32, false);
1226 break;
1227 case 64:
1228 values[i] = LLVMConstInt(element_type,
1229 instr->value[i].u64, false);
1230 break;
1231 default:
1232 fprintf(stderr,
1233 "unsupported nir load_const bit_size: %d\n",
1234 instr->def.bit_size);
1235 abort();
1236 }
1237 }
1238 if (instr->def.num_components > 1) {
1239 value = LLVMConstVector(values, instr->def.num_components);
1240 } else
1241 value = values[0];
1242
1243 ctx->ssa_defs[instr->def.index] = value;
1244 }
1245
1246 static LLVMValueRef
1247 get_buffer_size(struct ac_nir_context *ctx, LLVMValueRef descriptor, bool in_elements)
1248 {
1249 LLVMValueRef size =
1250 LLVMBuildExtractElement(ctx->ac.builder, descriptor,
1251 LLVMConstInt(ctx->ac.i32, 2, false), "");
1252
1253 /* GFX8 only */
1254 if (ctx->ac.chip_class == GFX8 && in_elements) {
1255 /* On GFX8, the descriptor contains the size in bytes,
1256 * but TXQ must return the size in elements.
1257 * The stride is always non-zero for resources using TXQ.
1258 */
1259 LLVMValueRef stride =
1260 LLVMBuildExtractElement(ctx->ac.builder, descriptor,
1261 ctx->ac.i32_1, "");
1262 stride = LLVMBuildLShr(ctx->ac.builder, stride,
1263 LLVMConstInt(ctx->ac.i32, 16, false), "");
1264 stride = LLVMBuildAnd(ctx->ac.builder, stride,
1265 LLVMConstInt(ctx->ac.i32, 0x3fff, false), "");
1266
1267 size = LLVMBuildUDiv(ctx->ac.builder, size, stride, "");
1268 }
1269 return size;
1270 }
1271
1272 /* Gather4 should follow the same rules as bilinear filtering, but the hardware
1273 * incorrectly forces nearest filtering if the texture format is integer.
1274 * The only effect it has on Gather4, which always returns 4 texels for
1275 * bilinear filtering, is that the final coordinates are off by 0.5 of
1276 * the texel size.
1277 *
1278 * The workaround is to subtract 0.5 from the unnormalized coordinates,
1279 * or (0.5 / size) from the normalized coordinates.
1280 *
1281 * However, cube textures with 8_8_8_8 data formats require a different
1282 * workaround of overriding the num format to USCALED/SSCALED. This would lose
1283 * precision in 32-bit data formats, so it needs to be applied dynamically at
1284 * runtime. In this case, return an i1 value that indicates whether the
1285 * descriptor was overridden (and hence a fixup of the sampler result is needed).
1286 */
1287 static LLVMValueRef lower_gather4_integer(struct ac_llvm_context *ctx,
1288 nir_variable *var,
1289 struct ac_image_args *args,
1290 const nir_tex_instr *instr)
1291 {
1292 const struct glsl_type *type = glsl_without_array(var->type);
1293 enum glsl_base_type stype = glsl_get_sampler_result_type(type);
1294 LLVMValueRef wa_8888 = NULL;
1295 LLVMValueRef half_texel[2];
1296 LLVMValueRef result;
1297
1298 assert(stype == GLSL_TYPE_INT || stype == GLSL_TYPE_UINT);
1299
1300 if (instr->sampler_dim == GLSL_SAMPLER_DIM_CUBE) {
1301 LLVMValueRef formats;
1302 LLVMValueRef data_format;
1303 LLVMValueRef wa_formats;
1304
1305 formats = LLVMBuildExtractElement(ctx->builder, args->resource, ctx->i32_1, "");
1306
1307 data_format = LLVMBuildLShr(ctx->builder, formats,
1308 LLVMConstInt(ctx->i32, 20, false), "");
1309 data_format = LLVMBuildAnd(ctx->builder, data_format,
1310 LLVMConstInt(ctx->i32, (1u << 6) - 1, false), "");
1311 wa_8888 = LLVMBuildICmp(
1312 ctx->builder, LLVMIntEQ, data_format,
1313 LLVMConstInt(ctx->i32, V_008F14_IMG_DATA_FORMAT_8_8_8_8, false),
1314 "");
1315
1316 uint32_t wa_num_format =
1317 stype == GLSL_TYPE_UINT ?
1318 S_008F14_NUM_FORMAT(V_008F14_IMG_NUM_FORMAT_USCALED) :
1319 S_008F14_NUM_FORMAT(V_008F14_IMG_NUM_FORMAT_SSCALED);
1320 wa_formats = LLVMBuildAnd(ctx->builder, formats,
1321 LLVMConstInt(ctx->i32, C_008F14_NUM_FORMAT, false),
1322 "");
1323 wa_formats = LLVMBuildOr(ctx->builder, wa_formats,
1324 LLVMConstInt(ctx->i32, wa_num_format, false), "");
1325
1326 formats = LLVMBuildSelect(ctx->builder, wa_8888, wa_formats, formats, "");
1327 args->resource = LLVMBuildInsertElement(
1328 ctx->builder, args->resource, formats, ctx->i32_1, "");
1329 }
1330
1331 if (instr->sampler_dim == GLSL_SAMPLER_DIM_RECT) {
1332 assert(!wa_8888);
1333 half_texel[0] = half_texel[1] = LLVMConstReal(ctx->f32, -0.5);
1334 } else {
1335 struct ac_image_args resinfo = {};
1336 LLVMBasicBlockRef bbs[2];
1337
1338 LLVMValueRef unnorm = NULL;
1339 LLVMValueRef default_offset = ctx->f32_0;
1340 if (instr->sampler_dim == GLSL_SAMPLER_DIM_2D &&
1341 !instr->is_array) {
1342 /* In vulkan, whether the sampler uses unnormalized
1343 * coordinates or not is a dynamic property of the
1344 * sampler. Hence, to figure out whether or not we
1345 * need to divide by the texture size, we need to test
1346 * the sampler at runtime. This tests the bit set by
1347 * radv_init_sampler().
1348 */
1349 LLVMValueRef sampler0 =
1350 LLVMBuildExtractElement(ctx->builder, args->sampler, ctx->i32_0, "");
1351 sampler0 = LLVMBuildLShr(ctx->builder, sampler0,
1352 LLVMConstInt(ctx->i32, 15, false), "");
1353 sampler0 = LLVMBuildAnd(ctx->builder, sampler0, ctx->i32_1, "");
1354 unnorm = LLVMBuildICmp(ctx->builder, LLVMIntEQ, sampler0, ctx->i32_1, "");
1355 default_offset = LLVMConstReal(ctx->f32, -0.5);
1356 }
1357
1358 bbs[0] = LLVMGetInsertBlock(ctx->builder);
1359 if (wa_8888 || unnorm) {
1360 assert(!(wa_8888 && unnorm));
1361 LLVMValueRef not_needed = wa_8888 ? wa_8888 : unnorm;
1362 /* Skip the texture size query entirely if we don't need it. */
1363 ac_build_ifcc(ctx, LLVMBuildNot(ctx->builder, not_needed, ""), 2000);
1364 bbs[1] = LLVMGetInsertBlock(ctx->builder);
1365 }
1366
1367 /* Query the texture size. */
1368 resinfo.dim = ac_get_sampler_dim(ctx->chip_class, instr->sampler_dim, instr->is_array);
1369 resinfo.opcode = ac_image_get_resinfo;
1370 resinfo.dmask = 0xf;
1371 resinfo.lod = ctx->i32_0;
1372 resinfo.resource = args->resource;
1373 resinfo.attributes = AC_FUNC_ATTR_READNONE;
1374 LLVMValueRef size = ac_build_image_opcode(ctx, &resinfo);
1375
1376 /* Compute -0.5 / size. */
1377 for (unsigned c = 0; c < 2; c++) {
1378 half_texel[c] =
1379 LLVMBuildExtractElement(ctx->builder, size,
1380 LLVMConstInt(ctx->i32, c, 0), "");
1381 half_texel[c] = LLVMBuildUIToFP(ctx->builder, half_texel[c], ctx->f32, "");
1382 half_texel[c] = ac_build_fdiv(ctx, ctx->f32_1, half_texel[c]);
1383 half_texel[c] = LLVMBuildFMul(ctx->builder, half_texel[c],
1384 LLVMConstReal(ctx->f32, -0.5), "");
1385 }
1386
1387 if (wa_8888 || unnorm) {
1388 ac_build_endif(ctx, 2000);
1389
1390 for (unsigned c = 0; c < 2; c++) {
1391 LLVMValueRef values[2] = { default_offset, half_texel[c] };
1392 half_texel[c] = ac_build_phi(ctx, ctx->f32, 2,
1393 values, bbs);
1394 }
1395 }
1396 }
1397
1398 for (unsigned c = 0; c < 2; c++) {
1399 LLVMValueRef tmp;
1400 tmp = LLVMBuildBitCast(ctx->builder, args->coords[c], ctx->f32, "");
1401 args->coords[c] = LLVMBuildFAdd(ctx->builder, tmp, half_texel[c], "");
1402 }
1403
1404 args->attributes = AC_FUNC_ATTR_READNONE;
1405 result = ac_build_image_opcode(ctx, args);
1406
1407 if (instr->sampler_dim == GLSL_SAMPLER_DIM_CUBE) {
1408 LLVMValueRef tmp, tmp2;
1409
1410 /* if the cube workaround is in place, f2i the result. */
1411 for (unsigned c = 0; c < 4; c++) {
1412 tmp = LLVMBuildExtractElement(ctx->builder, result, LLVMConstInt(ctx->i32, c, false), "");
1413 if (stype == GLSL_TYPE_UINT)
1414 tmp2 = LLVMBuildFPToUI(ctx->builder, tmp, ctx->i32, "");
1415 else
1416 tmp2 = LLVMBuildFPToSI(ctx->builder, tmp, ctx->i32, "");
1417 tmp = LLVMBuildBitCast(ctx->builder, tmp, ctx->i32, "");
1418 tmp2 = LLVMBuildBitCast(ctx->builder, tmp2, ctx->i32, "");
1419 tmp = LLVMBuildSelect(ctx->builder, wa_8888, tmp2, tmp, "");
1420 tmp = LLVMBuildBitCast(ctx->builder, tmp, ctx->f32, "");
1421 result = LLVMBuildInsertElement(ctx->builder, result, tmp, LLVMConstInt(ctx->i32, c, false), "");
1422 }
1423 }
1424 return result;
1425 }
1426
1427 static nir_deref_instr *get_tex_texture_deref(const nir_tex_instr *instr)
1428 {
1429 nir_deref_instr *texture_deref_instr = NULL;
1430
1431 for (unsigned i = 0; i < instr->num_srcs; i++) {
1432 switch (instr->src[i].src_type) {
1433 case nir_tex_src_texture_deref:
1434 texture_deref_instr = nir_src_as_deref(instr->src[i].src);
1435 break;
1436 default:
1437 break;
1438 }
1439 }
1440 return texture_deref_instr;
1441 }
1442
1443 static LLVMValueRef build_tex_intrinsic(struct ac_nir_context *ctx,
1444 const nir_tex_instr *instr,
1445 struct ac_image_args *args)
1446 {
1447 if (instr->sampler_dim == GLSL_SAMPLER_DIM_BUF) {
1448 unsigned mask = nir_ssa_def_components_read(&instr->dest.ssa);
1449
1450 assert(instr->dest.is_ssa);
1451 return ac_build_buffer_load_format(&ctx->ac,
1452 args->resource,
1453 args->coords[0],
1454 ctx->ac.i32_0,
1455 util_last_bit(mask),
1456 0, true,
1457 instr->dest.ssa.bit_size == 16);
1458 }
1459
1460 args->opcode = ac_image_sample;
1461
1462 switch (instr->op) {
1463 case nir_texop_txf:
1464 case nir_texop_txf_ms:
1465 case nir_texop_samples_identical:
1466 args->opcode = args->level_zero ||
1467 instr->sampler_dim == GLSL_SAMPLER_DIM_MS ?
1468 ac_image_load : ac_image_load_mip;
1469 args->level_zero = false;
1470 break;
1471 case nir_texop_txs:
1472 case nir_texop_query_levels:
1473 args->opcode = ac_image_get_resinfo;
1474 if (!args->lod)
1475 args->lod = ctx->ac.i32_0;
1476 args->level_zero = false;
1477 break;
1478 case nir_texop_tex:
1479 if (ctx->stage != MESA_SHADER_FRAGMENT) {
1480 assert(!args->lod);
1481 args->level_zero = true;
1482 }
1483 break;
1484 case nir_texop_tg4:
1485 args->opcode = ac_image_gather4;
1486 if (!args->lod && !args->bias)
1487 args->level_zero = true;
1488 break;
1489 case nir_texop_lod:
1490 args->opcode = ac_image_get_lod;
1491 break;
1492 case nir_texop_fragment_fetch:
1493 case nir_texop_fragment_mask_fetch:
1494 args->opcode = ac_image_load;
1495 args->level_zero = false;
1496 break;
1497 default:
1498 break;
1499 }
1500
1501 if (instr->op == nir_texop_tg4 && ctx->ac.chip_class <= GFX8) {
1502 nir_deref_instr *texture_deref_instr = get_tex_texture_deref(instr);
1503 nir_variable *var = nir_deref_instr_get_variable(texture_deref_instr);
1504 const struct glsl_type *type = glsl_without_array(var->type);
1505 enum glsl_base_type stype = glsl_get_sampler_result_type(type);
1506 if (stype == GLSL_TYPE_UINT || stype == GLSL_TYPE_INT) {
1507 return lower_gather4_integer(&ctx->ac, var, args, instr);
1508 }
1509 }
1510
1511 /* Fixup for GFX9 which allocates 1D textures as 2D. */
1512 if (instr->op == nir_texop_lod && ctx->ac.chip_class == GFX9) {
1513 if ((args->dim == ac_image_2darray ||
1514 args->dim == ac_image_2d) && !args->coords[1]) {
1515 args->coords[1] = ctx->ac.i32_0;
1516 }
1517 }
1518
1519 args->attributes = AC_FUNC_ATTR_READNONE;
1520 bool cs_derivs = ctx->stage == MESA_SHADER_COMPUTE &&
1521 ctx->info->cs.derivative_group != DERIVATIVE_GROUP_NONE;
1522 if (ctx->stage == MESA_SHADER_FRAGMENT || cs_derivs) {
1523 /* Prevent texture instructions with implicit derivatives from being
1524 * sinked into branches. */
1525 switch (instr->op) {
1526 case nir_texop_tex:
1527 case nir_texop_txb:
1528 case nir_texop_lod:
1529 args->attributes |= AC_FUNC_ATTR_CONVERGENT;
1530 break;
1531 default:
1532 break;
1533 }
1534 }
1535
1536 return ac_build_image_opcode(&ctx->ac, args);
1537 }
1538
1539 static LLVMValueRef visit_vulkan_resource_reindex(struct ac_nir_context *ctx,
1540 nir_intrinsic_instr *instr)
1541 {
1542 LLVMValueRef ptr = get_src(ctx, instr->src[0]);
1543 LLVMValueRef index = get_src(ctx, instr->src[1]);
1544
1545 LLVMValueRef result = LLVMBuildGEP(ctx->ac.builder, ptr, &index, 1, "");
1546 LLVMSetMetadata(result, ctx->ac.uniform_md_kind, ctx->ac.empty_md);
1547 return result;
1548 }
1549
1550 static LLVMValueRef visit_load_push_constant(struct ac_nir_context *ctx,
1551 nir_intrinsic_instr *instr)
1552 {
1553 LLVMValueRef ptr, addr;
1554 LLVMValueRef src0 = get_src(ctx, instr->src[0]);
1555 unsigned index = nir_intrinsic_base(instr);
1556
1557 addr = LLVMConstInt(ctx->ac.i32, index, 0);
1558 addr = LLVMBuildAdd(ctx->ac.builder, addr, src0, "");
1559
1560 /* Load constant values from user SGPRS when possible, otherwise
1561 * fallback to the default path that loads directly from memory.
1562 */
1563 if (LLVMIsConstant(src0) &&
1564 instr->dest.ssa.bit_size == 32) {
1565 unsigned count = instr->dest.ssa.num_components;
1566 unsigned offset = index;
1567
1568 offset += LLVMConstIntGetZExtValue(src0);
1569 offset /= 4;
1570
1571 offset -= ctx->args->base_inline_push_consts;
1572
1573 unsigned num_inline_push_consts = ctx->args->num_inline_push_consts;
1574 if (offset + count <= num_inline_push_consts) {
1575 LLVMValueRef push_constants[num_inline_push_consts];
1576 for (unsigned i = 0; i < num_inline_push_consts; i++)
1577 push_constants[i] = ac_get_arg(&ctx->ac,
1578 ctx->args->inline_push_consts[i]);
1579 return ac_build_gather_values(&ctx->ac,
1580 push_constants + offset,
1581 count);
1582 }
1583 }
1584
1585 ptr = LLVMBuildGEP(ctx->ac.builder,
1586 ac_get_arg(&ctx->ac, ctx->args->push_constants), &addr, 1, "");
1587
1588 if (instr->dest.ssa.bit_size == 8) {
1589 unsigned load_dwords = instr->dest.ssa.num_components > 1 ? 2 : 1;
1590 LLVMTypeRef vec_type = LLVMVectorType(ctx->ac.i8, 4 * load_dwords);
1591 ptr = ac_cast_ptr(&ctx->ac, ptr, vec_type);
1592 LLVMValueRef res = LLVMBuildLoad(ctx->ac.builder, ptr, "");
1593
1594 LLVMValueRef params[3];
1595 if (load_dwords > 1) {
1596 LLVMValueRef res_vec = LLVMBuildBitCast(ctx->ac.builder, res, ctx->ac.v2i32, "");
1597 params[0] = LLVMBuildExtractElement(ctx->ac.builder, res_vec, LLVMConstInt(ctx->ac.i32, 1, false), "");
1598 params[1] = LLVMBuildExtractElement(ctx->ac.builder, res_vec, LLVMConstInt(ctx->ac.i32, 0, false), "");
1599 } else {
1600 res = LLVMBuildBitCast(ctx->ac.builder, res, ctx->ac.i32, "");
1601 params[0] = ctx->ac.i32_0;
1602 params[1] = res;
1603 }
1604 params[2] = addr;
1605 res = ac_build_intrinsic(&ctx->ac, "llvm.amdgcn.alignbyte", ctx->ac.i32, params, 3, 0);
1606
1607 res = LLVMBuildTrunc(ctx->ac.builder, res, LLVMIntTypeInContext(ctx->ac.context, instr->dest.ssa.num_components * 8), "");
1608 if (instr->dest.ssa.num_components > 1)
1609 res = LLVMBuildBitCast(ctx->ac.builder, res, LLVMVectorType(ctx->ac.i8, instr->dest.ssa.num_components), "");
1610 return res;
1611 } else if (instr->dest.ssa.bit_size == 16) {
1612 unsigned load_dwords = instr->dest.ssa.num_components / 2 + 1;
1613 LLVMTypeRef vec_type = LLVMVectorType(ctx->ac.i16, 2 * load_dwords);
1614 ptr = ac_cast_ptr(&ctx->ac, ptr, vec_type);
1615 LLVMValueRef res = LLVMBuildLoad(ctx->ac.builder, ptr, "");
1616 res = LLVMBuildBitCast(ctx->ac.builder, res, vec_type, "");
1617 LLVMValueRef cond = LLVMBuildLShr(ctx->ac.builder, addr, ctx->ac.i32_1, "");
1618 cond = LLVMBuildTrunc(ctx->ac.builder, cond, ctx->ac.i1, "");
1619 LLVMValueRef mask[] = { LLVMConstInt(ctx->ac.i32, 0, false), LLVMConstInt(ctx->ac.i32, 1, false),
1620 LLVMConstInt(ctx->ac.i32, 2, false), LLVMConstInt(ctx->ac.i32, 3, false),
1621 LLVMConstInt(ctx->ac.i32, 4, false)};
1622 LLVMValueRef swizzle_aligned = LLVMConstVector(&mask[0], instr->dest.ssa.num_components);
1623 LLVMValueRef swizzle_unaligned = LLVMConstVector(&mask[1], instr->dest.ssa.num_components);
1624 LLVMValueRef shuffle_aligned = LLVMBuildShuffleVector(ctx->ac.builder, res, res, swizzle_aligned, "");
1625 LLVMValueRef shuffle_unaligned = LLVMBuildShuffleVector(ctx->ac.builder, res, res, swizzle_unaligned, "");
1626 res = LLVMBuildSelect(ctx->ac.builder, cond, shuffle_unaligned, shuffle_aligned, "");
1627 return LLVMBuildBitCast(ctx->ac.builder, res, get_def_type(ctx, &instr->dest.ssa), "");
1628 }
1629
1630 ptr = ac_cast_ptr(&ctx->ac, ptr, get_def_type(ctx, &instr->dest.ssa));
1631
1632 return LLVMBuildLoad(ctx->ac.builder, ptr, "");
1633 }
1634
1635 static LLVMValueRef visit_get_buffer_size(struct ac_nir_context *ctx,
1636 const nir_intrinsic_instr *instr)
1637 {
1638 LLVMValueRef index = get_src(ctx, instr->src[0]);
1639
1640 return get_buffer_size(ctx, ctx->abi->load_ssbo(ctx->abi, index, false), false);
1641 }
1642
1643 static uint32_t widen_mask(uint32_t mask, unsigned multiplier)
1644 {
1645 uint32_t new_mask = 0;
1646 for(unsigned i = 0; i < 32 && (1u << i) <= mask; ++i)
1647 if (mask & (1u << i))
1648 new_mask |= ((1u << multiplier) - 1u) << (i * multiplier);
1649 return new_mask;
1650 }
1651
1652 static LLVMValueRef extract_vector_range(struct ac_llvm_context *ctx, LLVMValueRef src,
1653 unsigned start, unsigned count)
1654 {
1655 LLVMValueRef mask[] = {
1656 ctx->i32_0, ctx->i32_1,
1657 LLVMConstInt(ctx->i32, 2, false), LLVMConstInt(ctx->i32, 3, false) };
1658
1659 unsigned src_elements = ac_get_llvm_num_components(src);
1660
1661 if (count == src_elements) {
1662 assert(start == 0);
1663 return src;
1664 } else if (count == 1) {
1665 assert(start < src_elements);
1666 return LLVMBuildExtractElement(ctx->builder, src, mask[start], "");
1667 } else {
1668 assert(start + count <= src_elements);
1669 assert(count <= 4);
1670 LLVMValueRef swizzle = LLVMConstVector(&mask[start], count);
1671 return LLVMBuildShuffleVector(ctx->builder, src, src, swizzle, "");
1672 }
1673 }
1674
1675 static unsigned get_cache_policy(struct ac_nir_context *ctx,
1676 enum gl_access_qualifier access,
1677 bool may_store_unaligned,
1678 bool writeonly_memory)
1679 {
1680 unsigned cache_policy = 0;
1681
1682 /* GFX6 has a TC L1 bug causing corruption of 8bit/16bit stores. All
1683 * store opcodes not aligned to a dword are affected. The only way to
1684 * get unaligned stores is through shader images.
1685 */
1686 if (((may_store_unaligned && ctx->ac.chip_class == GFX6) ||
1687 /* If this is write-only, don't keep data in L1 to prevent
1688 * evicting L1 cache lines that may be needed by other
1689 * instructions.
1690 */
1691 writeonly_memory ||
1692 access & (ACCESS_COHERENT | ACCESS_VOLATILE))) {
1693 cache_policy |= ac_glc;
1694 }
1695
1696 if (access & ACCESS_STREAM_CACHE_POLICY)
1697 cache_policy |= ac_slc | ac_glc;
1698
1699 return cache_policy;
1700 }
1701
1702 static LLVMValueRef enter_waterfall_ssbo(struct ac_nir_context *ctx,
1703 struct waterfall_context *wctx,
1704 const nir_intrinsic_instr *instr,
1705 nir_src src)
1706 {
1707 return enter_waterfall(ctx, wctx, get_src(ctx, src),
1708 nir_intrinsic_access(instr) & ACCESS_NON_UNIFORM);
1709 }
1710
1711 static void visit_store_ssbo(struct ac_nir_context *ctx,
1712 nir_intrinsic_instr *instr)
1713 {
1714 if (ctx->ac.postponed_kill) {
1715 LLVMValueRef cond = LLVMBuildLoad(ctx->ac.builder,
1716 ctx->ac.postponed_kill, "");
1717 ac_build_ifcc(&ctx->ac, cond, 7000);
1718 }
1719
1720 LLVMValueRef src_data = get_src(ctx, instr->src[0]);
1721 int elem_size_bytes = ac_get_elem_bits(&ctx->ac, LLVMTypeOf(src_data)) / 8;
1722 unsigned writemask = nir_intrinsic_write_mask(instr);
1723 enum gl_access_qualifier access = nir_intrinsic_access(instr);
1724 bool writeonly_memory = access & ACCESS_NON_READABLE;
1725 unsigned cache_policy = get_cache_policy(ctx, access, false, writeonly_memory);
1726
1727 struct waterfall_context wctx;
1728 LLVMValueRef rsrc_base = enter_waterfall_ssbo(ctx, &wctx, instr, instr->src[1]);
1729
1730 LLVMValueRef rsrc = ctx->abi->load_ssbo(ctx->abi, rsrc_base, true);
1731 LLVMValueRef base_data = src_data;
1732 base_data = ac_trim_vector(&ctx->ac, base_data, instr->num_components);
1733 LLVMValueRef base_offset = get_src(ctx, instr->src[2]);
1734
1735 while (writemask) {
1736 int start, count;
1737 LLVMValueRef data, offset;
1738 LLVMTypeRef data_type;
1739
1740 u_bit_scan_consecutive_range(&writemask, &start, &count);
1741
1742 /* Due to an LLVM limitation with LLVM < 9, split 3-element
1743 * writes into a 2-element and a 1-element write. */
1744 if (count == 3 &&
1745 (elem_size_bytes != 4 || !ac_has_vec3_support(ctx->ac.chip_class, false))) {
1746 writemask |= 1 << (start + 2);
1747 count = 2;
1748 }
1749 int num_bytes = count * elem_size_bytes; /* count in bytes */
1750
1751 /* we can only store 4 DWords at the same time.
1752 * can only happen for 64 Bit vectors. */
1753 if (num_bytes > 16) {
1754 writemask |= ((1u << (count - 2)) - 1u) << (start + 2);
1755 count = 2;
1756 num_bytes = 16;
1757 }
1758
1759 /* check alignment of 16 Bit stores */
1760 if (elem_size_bytes == 2 && num_bytes > 2 && (start % 2) == 1) {
1761 writemask |= ((1u << (count - 1)) - 1u) << (start + 1);
1762 count = 1;
1763 num_bytes = 2;
1764 }
1765
1766 /* Due to alignment issues, split stores of 8-bit/16-bit
1767 * vectors.
1768 */
1769 if (ctx->ac.chip_class == GFX6 && count > 1 && elem_size_bytes < 4) {
1770 writemask |= ((1u << (count - 1)) - 1u) << (start + 1);
1771 count = 1;
1772 num_bytes = elem_size_bytes;
1773 }
1774
1775 data = extract_vector_range(&ctx->ac, base_data, start, count);
1776
1777 offset = LLVMBuildAdd(ctx->ac.builder, base_offset,
1778 LLVMConstInt(ctx->ac.i32, start * elem_size_bytes, false), "");
1779
1780 if (num_bytes == 1) {
1781 ac_build_tbuffer_store_byte(&ctx->ac, rsrc, data,
1782 offset, ctx->ac.i32_0,
1783 cache_policy);
1784 } else if (num_bytes == 2) {
1785 ac_build_tbuffer_store_short(&ctx->ac, rsrc, data,
1786 offset, ctx->ac.i32_0,
1787 cache_policy);
1788 } else {
1789 int num_channels = num_bytes / 4;
1790
1791 switch (num_bytes) {
1792 case 16: /* v4f32 */
1793 data_type = ctx->ac.v4f32;
1794 break;
1795 case 12: /* v3f32 */
1796 data_type = ctx->ac.v3f32;
1797 break;
1798 case 8: /* v2f32 */
1799 data_type = ctx->ac.v2f32;
1800 break;
1801 case 4: /* f32 */
1802 data_type = ctx->ac.f32;
1803 break;
1804 default:
1805 unreachable("Malformed vector store.");
1806 }
1807 data = LLVMBuildBitCast(ctx->ac.builder, data, data_type, "");
1808
1809 ac_build_buffer_store_dword(&ctx->ac, rsrc, data,
1810 num_channels, offset,
1811 ctx->ac.i32_0, 0,
1812 cache_policy);
1813 }
1814 }
1815
1816 exit_waterfall(ctx, &wctx, NULL);
1817
1818 if (ctx->ac.postponed_kill)
1819 ac_build_endif(&ctx->ac, 7000);
1820 }
1821
1822 static LLVMValueRef emit_ssbo_comp_swap_64(struct ac_nir_context *ctx,
1823 LLVMValueRef descriptor,
1824 LLVMValueRef offset,
1825 LLVMValueRef compare,
1826 LLVMValueRef exchange)
1827 {
1828 LLVMBasicBlockRef start_block = NULL, then_block = NULL;
1829 if (ctx->abi->robust_buffer_access) {
1830 LLVMValueRef size = ac_llvm_extract_elem(&ctx->ac, descriptor, 2);
1831
1832 LLVMValueRef cond = LLVMBuildICmp(ctx->ac.builder, LLVMIntULT, offset, size, "");
1833 start_block = LLVMGetInsertBlock(ctx->ac.builder);
1834
1835 ac_build_ifcc(&ctx->ac, cond, -1);
1836
1837 then_block = LLVMGetInsertBlock(ctx->ac.builder);
1838 }
1839
1840 LLVMValueRef ptr_parts[2] = {
1841 ac_llvm_extract_elem(&ctx->ac, descriptor, 0),
1842 LLVMBuildAnd(ctx->ac.builder,
1843 ac_llvm_extract_elem(&ctx->ac, descriptor, 1),
1844 LLVMConstInt(ctx->ac.i32, 65535, 0), "")
1845 };
1846
1847 ptr_parts[1] = LLVMBuildTrunc(ctx->ac.builder, ptr_parts[1], ctx->ac.i16, "");
1848 ptr_parts[1] = LLVMBuildSExt(ctx->ac.builder, ptr_parts[1], ctx->ac.i32, "");
1849
1850 offset = LLVMBuildZExt(ctx->ac.builder, offset, ctx->ac.i64, "");
1851
1852 LLVMValueRef ptr = ac_build_gather_values(&ctx->ac, ptr_parts, 2);
1853 ptr = LLVMBuildBitCast(ctx->ac.builder, ptr, ctx->ac.i64, "");
1854 ptr = LLVMBuildAdd(ctx->ac.builder, ptr, offset, "");
1855 ptr = LLVMBuildIntToPtr(ctx->ac.builder, ptr, LLVMPointerType(ctx->ac.i64, AC_ADDR_SPACE_GLOBAL), "");
1856
1857 LLVMValueRef result = ac_build_atomic_cmp_xchg(&ctx->ac, ptr, compare, exchange, "singlethread-one-as");
1858 result = LLVMBuildExtractValue(ctx->ac.builder, result, 0, "");
1859
1860 if (ctx->abi->robust_buffer_access) {
1861 ac_build_endif(&ctx->ac, -1);
1862
1863 LLVMBasicBlockRef incoming_blocks[2] = {
1864 start_block,
1865 then_block,
1866 };
1867
1868 LLVMValueRef incoming_values[2] = {
1869 LLVMConstInt(ctx->ac.i64, 0, 0),
1870 result,
1871 };
1872 LLVMValueRef ret = LLVMBuildPhi(ctx->ac.builder, ctx->ac.i64, "");
1873 LLVMAddIncoming(ret, incoming_values, incoming_blocks, 2);
1874 return ret;
1875 } else {
1876 return result;
1877 }
1878 }
1879
1880 static LLVMValueRef visit_atomic_ssbo(struct ac_nir_context *ctx,
1881 nir_intrinsic_instr *instr)
1882 {
1883 if (ctx->ac.postponed_kill) {
1884 LLVMValueRef cond = LLVMBuildLoad(ctx->ac.builder,
1885 ctx->ac.postponed_kill, "");
1886 ac_build_ifcc(&ctx->ac, cond, 7001);
1887 }
1888
1889 LLVMTypeRef return_type = LLVMTypeOf(get_src(ctx, instr->src[2]));
1890 const char *op;
1891 char name[64], type[8];
1892 LLVMValueRef params[6], descriptor;
1893 LLVMValueRef result;
1894 int arg_count = 0;
1895
1896 struct waterfall_context wctx;
1897 LLVMValueRef rsrc_base = enter_waterfall_ssbo(ctx, &wctx, instr, instr->src[0]);
1898
1899 switch (instr->intrinsic) {
1900 case nir_intrinsic_ssbo_atomic_add:
1901 op = "add";
1902 break;
1903 case nir_intrinsic_ssbo_atomic_imin:
1904 op = "smin";
1905 break;
1906 case nir_intrinsic_ssbo_atomic_umin:
1907 op = "umin";
1908 break;
1909 case nir_intrinsic_ssbo_atomic_imax:
1910 op = "smax";
1911 break;
1912 case nir_intrinsic_ssbo_atomic_umax:
1913 op = "umax";
1914 break;
1915 case nir_intrinsic_ssbo_atomic_and:
1916 op = "and";
1917 break;
1918 case nir_intrinsic_ssbo_atomic_or:
1919 op = "or";
1920 break;
1921 case nir_intrinsic_ssbo_atomic_xor:
1922 op = "xor";
1923 break;
1924 case nir_intrinsic_ssbo_atomic_exchange:
1925 op = "swap";
1926 break;
1927 case nir_intrinsic_ssbo_atomic_comp_swap:
1928 op = "cmpswap";
1929 break;
1930 default:
1931 abort();
1932 }
1933
1934 descriptor = ctx->abi->load_ssbo(ctx->abi,
1935 rsrc_base,
1936 true);
1937
1938 if (instr->intrinsic == nir_intrinsic_ssbo_atomic_comp_swap &&
1939 return_type == ctx->ac.i64) {
1940 result = emit_ssbo_comp_swap_64(ctx, descriptor,
1941 get_src(ctx, instr->src[1]),
1942 get_src(ctx, instr->src[2]),
1943 get_src(ctx, instr->src[3]));
1944 } else {
1945 if (instr->intrinsic == nir_intrinsic_ssbo_atomic_comp_swap) {
1946 params[arg_count++] = ac_llvm_extract_elem(&ctx->ac, get_src(ctx, instr->src[3]), 0);
1947 }
1948 params[arg_count++] = ac_llvm_extract_elem(&ctx->ac, get_src(ctx, instr->src[2]), 0);
1949 params[arg_count++] = descriptor;
1950
1951 if (LLVM_VERSION_MAJOR >= 9) {
1952 /* XXX: The new raw/struct atomic intrinsics are buggy with
1953 * LLVM 8, see r358579.
1954 */
1955 params[arg_count++] = get_src(ctx, instr->src[1]); /* voffset */
1956 params[arg_count++] = ctx->ac.i32_0; /* soffset */
1957 params[arg_count++] = ctx->ac.i32_0; /* slc */
1958
1959 ac_build_type_name_for_intr(return_type, type, sizeof(type));
1960 snprintf(name, sizeof(name),
1961 "llvm.amdgcn.raw.buffer.atomic.%s.%s", op, type);
1962 } else {
1963 params[arg_count++] = ctx->ac.i32_0; /* vindex */
1964 params[arg_count++] = get_src(ctx, instr->src[1]); /* voffset */
1965 params[arg_count++] = ctx->ac.i1false; /* slc */
1966
1967 assert(return_type == ctx->ac.i32);
1968 snprintf(name, sizeof(name),
1969 "llvm.amdgcn.buffer.atomic.%s", op);
1970 }
1971
1972 result = ac_build_intrinsic(&ctx->ac, name, return_type, params,
1973 arg_count, 0);
1974 }
1975
1976 result = exit_waterfall(ctx, &wctx, result);
1977 if (ctx->ac.postponed_kill)
1978 ac_build_endif(&ctx->ac, 7001);
1979 return result;
1980 }
1981
1982 static LLVMValueRef visit_load_buffer(struct ac_nir_context *ctx,
1983 nir_intrinsic_instr *instr)
1984 {
1985 struct waterfall_context wctx;
1986 LLVMValueRef rsrc_base = enter_waterfall_ssbo(ctx, &wctx, instr, instr->src[0]);
1987
1988 int elem_size_bytes = instr->dest.ssa.bit_size / 8;
1989 int num_components = instr->num_components;
1990 enum gl_access_qualifier access = nir_intrinsic_access(instr);
1991 unsigned cache_policy = get_cache_policy(ctx, access, false, false);
1992
1993 LLVMValueRef offset = get_src(ctx, instr->src[1]);
1994 LLVMValueRef rsrc = ctx->abi->load_ssbo(ctx->abi, rsrc_base, false);
1995 LLVMValueRef vindex = ctx->ac.i32_0;
1996
1997 LLVMTypeRef def_type = get_def_type(ctx, &instr->dest.ssa);
1998 LLVMTypeRef def_elem_type = num_components > 1 ? LLVMGetElementType(def_type) : def_type;
1999
2000 LLVMValueRef results[4];
2001 for (int i = 0; i < num_components;) {
2002 int num_elems = num_components - i;
2003 if (elem_size_bytes < 4 && nir_intrinsic_align(instr) % 4 != 0)
2004 num_elems = 1;
2005 if (num_elems * elem_size_bytes > 16)
2006 num_elems = 16 / elem_size_bytes;
2007 int load_bytes = num_elems * elem_size_bytes;
2008
2009 LLVMValueRef immoffset = LLVMConstInt(ctx->ac.i32, i * elem_size_bytes, false);
2010
2011 LLVMValueRef ret;
2012
2013 if (load_bytes == 1) {
2014 ret = ac_build_tbuffer_load_byte(&ctx->ac,
2015 rsrc,
2016 offset,
2017 ctx->ac.i32_0,
2018 immoffset,
2019 cache_policy);
2020 } else if (load_bytes == 2) {
2021 ret = ac_build_tbuffer_load_short(&ctx->ac,
2022 rsrc,
2023 offset,
2024 ctx->ac.i32_0,
2025 immoffset,
2026 cache_policy);
2027 } else {
2028 int num_channels = util_next_power_of_two(load_bytes) / 4;
2029 bool can_speculate = access & ACCESS_CAN_REORDER;
2030
2031 ret = ac_build_buffer_load(&ctx->ac, rsrc, num_channels,
2032 vindex, offset, immoffset, 0,
2033 cache_policy, can_speculate, false);
2034 }
2035
2036 LLVMTypeRef byte_vec = LLVMVectorType(ctx->ac.i8, ac_get_type_size(LLVMTypeOf(ret)));
2037 ret = LLVMBuildBitCast(ctx->ac.builder, ret, byte_vec, "");
2038 ret = ac_trim_vector(&ctx->ac, ret, load_bytes);
2039
2040 LLVMTypeRef ret_type = LLVMVectorType(def_elem_type, num_elems);
2041 ret = LLVMBuildBitCast(ctx->ac.builder, ret, ret_type, "");
2042
2043 for (unsigned j = 0; j < num_elems; j++) {
2044 results[i + j] = LLVMBuildExtractElement(ctx->ac.builder, ret, LLVMConstInt(ctx->ac.i32, j, false), "");
2045 }
2046 i += num_elems;
2047 }
2048
2049 LLVMValueRef ret = ac_build_gather_values(&ctx->ac, results, num_components);
2050 return exit_waterfall(ctx, &wctx, ret);
2051 }
2052
2053 static LLVMValueRef enter_waterfall_ubo(struct ac_nir_context *ctx,
2054 struct waterfall_context *wctx,
2055 const nir_intrinsic_instr *instr)
2056 {
2057 return enter_waterfall(ctx, wctx, get_src(ctx, instr->src[0]),
2058 nir_intrinsic_access(instr) & ACCESS_NON_UNIFORM);
2059 }
2060
2061 static LLVMValueRef visit_load_ubo_buffer(struct ac_nir_context *ctx,
2062 nir_intrinsic_instr *instr)
2063 {
2064 struct waterfall_context wctx;
2065 LLVMValueRef rsrc_base = enter_waterfall_ubo(ctx, &wctx, instr);
2066
2067 LLVMValueRef ret;
2068 LLVMValueRef rsrc = rsrc_base;
2069 LLVMValueRef offset = get_src(ctx, instr->src[1]);
2070 int num_components = instr->num_components;
2071
2072 if (ctx->abi->load_ubo)
2073 rsrc = ctx->abi->load_ubo(ctx->abi, rsrc);
2074
2075 if (instr->dest.ssa.bit_size == 64)
2076 num_components *= 2;
2077
2078 if (instr->dest.ssa.bit_size == 16 || instr->dest.ssa.bit_size == 8) {
2079 unsigned load_bytes = instr->dest.ssa.bit_size / 8;
2080 LLVMValueRef results[num_components];
2081 for (unsigned i = 0; i < num_components; ++i) {
2082 LLVMValueRef immoffset = LLVMConstInt(ctx->ac.i32,
2083 load_bytes * i, 0);
2084
2085 if (load_bytes == 1) {
2086 results[i] = ac_build_tbuffer_load_byte(&ctx->ac,
2087 rsrc,
2088 offset,
2089 ctx->ac.i32_0,
2090 immoffset,
2091 0);
2092 } else {
2093 assert(load_bytes == 2);
2094 results[i] = ac_build_tbuffer_load_short(&ctx->ac,
2095 rsrc,
2096 offset,
2097 ctx->ac.i32_0,
2098 immoffset,
2099 0);
2100 }
2101 }
2102 ret = ac_build_gather_values(&ctx->ac, results, num_components);
2103 } else {
2104 ret = ac_build_buffer_load(&ctx->ac, rsrc, num_components, NULL, offset,
2105 NULL, 0, 0, true, true);
2106
2107 ret = ac_trim_vector(&ctx->ac, ret, num_components);
2108 }
2109
2110 ret = LLVMBuildBitCast(ctx->ac.builder, ret,
2111 get_def_type(ctx, &instr->dest.ssa), "");
2112
2113 return exit_waterfall(ctx, &wctx, ret);
2114 }
2115
2116 static void
2117 get_deref_offset(struct ac_nir_context *ctx, nir_deref_instr *instr,
2118 bool vs_in, unsigned *vertex_index_out,
2119 LLVMValueRef *vertex_index_ref,
2120 unsigned *const_out, LLVMValueRef *indir_out)
2121 {
2122 nir_variable *var = nir_deref_instr_get_variable(instr);
2123 nir_deref_path path;
2124 unsigned idx_lvl = 1;
2125
2126 nir_deref_path_init(&path, instr, NULL);
2127
2128 if (vertex_index_out != NULL || vertex_index_ref != NULL) {
2129 if (vertex_index_ref) {
2130 *vertex_index_ref = get_src(ctx, path.path[idx_lvl]->arr.index);
2131 if (vertex_index_out)
2132 *vertex_index_out = 0;
2133 } else {
2134 *vertex_index_out = nir_src_as_uint(path.path[idx_lvl]->arr.index);
2135 }
2136 ++idx_lvl;
2137 }
2138
2139 uint32_t const_offset = 0;
2140 LLVMValueRef offset = NULL;
2141
2142 if (var->data.compact) {
2143 assert(instr->deref_type == nir_deref_type_array);
2144 const_offset = nir_src_as_uint(instr->arr.index);
2145 goto out;
2146 }
2147
2148 for (; path.path[idx_lvl]; ++idx_lvl) {
2149 const struct glsl_type *parent_type = path.path[idx_lvl - 1]->type;
2150 if (path.path[idx_lvl]->deref_type == nir_deref_type_struct) {
2151 unsigned index = path.path[idx_lvl]->strct.index;
2152
2153 for (unsigned i = 0; i < index; i++) {
2154 const struct glsl_type *ft = glsl_get_struct_field(parent_type, i);
2155 const_offset += glsl_count_attribute_slots(ft, vs_in);
2156 }
2157 } else if(path.path[idx_lvl]->deref_type == nir_deref_type_array) {
2158 unsigned size = glsl_count_attribute_slots(path.path[idx_lvl]->type, vs_in);
2159 if (nir_src_is_const(path.path[idx_lvl]->arr.index)) {
2160 const_offset += size *
2161 nir_src_as_uint(path.path[idx_lvl]->arr.index);
2162 } else {
2163 LLVMValueRef array_off = LLVMBuildMul(ctx->ac.builder, LLVMConstInt(ctx->ac.i32, size, 0),
2164 get_src(ctx, path.path[idx_lvl]->arr.index), "");
2165 if (offset)
2166 offset = LLVMBuildAdd(ctx->ac.builder, offset, array_off, "");
2167 else
2168 offset = array_off;
2169 }
2170 } else
2171 unreachable("Uhandled deref type in get_deref_instr_offset");
2172 }
2173
2174 out:
2175 nir_deref_path_finish(&path);
2176
2177 if (const_offset && offset)
2178 offset = LLVMBuildAdd(ctx->ac.builder, offset,
2179 LLVMConstInt(ctx->ac.i32, const_offset, 0),
2180 "");
2181
2182 *const_out = const_offset;
2183 *indir_out = offset;
2184 }
2185
2186 static LLVMValueRef load_tess_varyings(struct ac_nir_context *ctx,
2187 nir_intrinsic_instr *instr,
2188 bool load_inputs)
2189 {
2190 LLVMValueRef result;
2191 LLVMValueRef vertex_index = NULL;
2192 LLVMValueRef indir_index = NULL;
2193 unsigned const_index = 0;
2194
2195 nir_variable *var = nir_deref_instr_get_variable(nir_instr_as_deref(instr->src[0].ssa->parent_instr));
2196
2197 unsigned location = var->data.location;
2198 unsigned driver_location = var->data.driver_location;
2199 const bool is_patch = var->data.patch ||
2200 var->data.location == VARYING_SLOT_TESS_LEVEL_INNER ||
2201 var->data.location == VARYING_SLOT_TESS_LEVEL_OUTER;
2202 const bool is_compact = var->data.compact;
2203
2204 get_deref_offset(ctx, nir_instr_as_deref(instr->src[0].ssa->parent_instr),
2205 false, NULL, is_patch ? NULL : &vertex_index,
2206 &const_index, &indir_index);
2207
2208 LLVMTypeRef dest_type = get_def_type(ctx, &instr->dest.ssa);
2209
2210 LLVMTypeRef src_component_type;
2211 if (LLVMGetTypeKind(dest_type) == LLVMVectorTypeKind)
2212 src_component_type = LLVMGetElementType(dest_type);
2213 else
2214 src_component_type = dest_type;
2215
2216 result = ctx->abi->load_tess_varyings(ctx->abi, src_component_type,
2217 vertex_index, indir_index,
2218 const_index, location, driver_location,
2219 var->data.location_frac,
2220 instr->num_components,
2221 is_patch, is_compact, load_inputs);
2222 if (instr->dest.ssa.bit_size == 16) {
2223 result = ac_to_integer(&ctx->ac, result);
2224 result = LLVMBuildTrunc(ctx->ac.builder, result, dest_type, "");
2225 }
2226 return LLVMBuildBitCast(ctx->ac.builder, result, dest_type, "");
2227 }
2228
2229 static unsigned
2230 type_scalar_size_bytes(const struct glsl_type *type)
2231 {
2232 assert(glsl_type_is_vector_or_scalar(type) ||
2233 glsl_type_is_matrix(type));
2234 return glsl_type_is_boolean(type) ? 4 : glsl_get_bit_size(type) / 8;
2235 }
2236
2237 static LLVMValueRef visit_load_var(struct ac_nir_context *ctx,
2238 nir_intrinsic_instr *instr)
2239 {
2240 nir_deref_instr *deref = nir_instr_as_deref(instr->src[0].ssa->parent_instr);
2241 nir_variable *var = nir_deref_instr_get_variable(deref);
2242
2243 LLVMValueRef values[8];
2244 int idx = 0;
2245 int ve = instr->dest.ssa.num_components;
2246 unsigned comp = 0;
2247 LLVMValueRef indir_index;
2248 LLVMValueRef ret;
2249 unsigned const_index;
2250 unsigned stride = 4;
2251 int mode = deref->mode;
2252
2253 if (var) {
2254 bool vs_in = ctx->stage == MESA_SHADER_VERTEX &&
2255 var->data.mode == nir_var_shader_in;
2256 idx = var->data.driver_location;
2257 comp = var->data.location_frac;
2258 mode = var->data.mode;
2259
2260 get_deref_offset(ctx, deref, vs_in, NULL, NULL,
2261 &const_index, &indir_index);
2262
2263 if (var->data.compact) {
2264 stride = 1;
2265 const_index += comp;
2266 comp = 0;
2267 }
2268 }
2269
2270 if (instr->dest.ssa.bit_size == 64 &&
2271 (deref->mode == nir_var_shader_in ||
2272 deref->mode == nir_var_shader_out ||
2273 deref->mode == nir_var_function_temp))
2274 ve *= 2;
2275
2276 switch (mode) {
2277 case nir_var_shader_in:
2278 /* TODO: remove this after RADV switches to lowered IO */
2279 if (ctx->stage == MESA_SHADER_TESS_CTRL ||
2280 ctx->stage == MESA_SHADER_TESS_EVAL) {
2281 return load_tess_varyings(ctx, instr, true);
2282 }
2283
2284 if (ctx->stage == MESA_SHADER_GEOMETRY) {
2285 LLVMTypeRef type = LLVMIntTypeInContext(ctx->ac.context, instr->dest.ssa.bit_size);
2286 LLVMValueRef indir_index;
2287 unsigned const_index, vertex_index;
2288 get_deref_offset(ctx, deref, false, &vertex_index, NULL,
2289 &const_index, &indir_index);
2290 assert(indir_index == NULL);
2291
2292 return ctx->abi->load_inputs(ctx->abi, var->data.location,
2293 var->data.driver_location,
2294 var->data.location_frac,
2295 instr->num_components, vertex_index, const_index, type);
2296 }
2297
2298 for (unsigned chan = comp; chan < ve + comp; chan++) {
2299 if (indir_index) {
2300 unsigned count = glsl_count_attribute_slots(
2301 var->type,
2302 ctx->stage == MESA_SHADER_VERTEX);
2303 count -= chan / 4;
2304 LLVMValueRef tmp_vec = ac_build_gather_values_extended(
2305 &ctx->ac, ctx->abi->inputs + idx + chan, count,
2306 stride, false, true);
2307
2308 values[chan] = LLVMBuildExtractElement(ctx->ac.builder,
2309 tmp_vec,
2310 indir_index, "");
2311 } else
2312 values[chan] = ctx->abi->inputs[idx + chan + const_index * stride];
2313 }
2314 break;
2315 case nir_var_function_temp:
2316 for (unsigned chan = 0; chan < ve; chan++) {
2317 if (indir_index) {
2318 unsigned count = glsl_count_attribute_slots(
2319 var->type, false);
2320 count -= chan / 4;
2321 LLVMValueRef tmp_vec = ac_build_gather_values_extended(
2322 &ctx->ac, ctx->locals + idx + chan, count,
2323 stride, true, true);
2324
2325 values[chan] = LLVMBuildExtractElement(ctx->ac.builder,
2326 tmp_vec,
2327 indir_index, "");
2328 } else {
2329 values[chan] = LLVMBuildLoad(ctx->ac.builder, ctx->locals[idx + chan + const_index * stride], "");
2330 }
2331 }
2332 break;
2333 case nir_var_shader_out:
2334 /* TODO: remove this after RADV switches to lowered IO */
2335 if (ctx->stage == MESA_SHADER_TESS_CTRL) {
2336 return load_tess_varyings(ctx, instr, false);
2337 }
2338
2339 if (ctx->stage == MESA_SHADER_FRAGMENT &&
2340 var->data.fb_fetch_output &&
2341 ctx->abi->emit_fbfetch)
2342 return ctx->abi->emit_fbfetch(ctx->abi);
2343
2344 for (unsigned chan = comp; chan < ve + comp; chan++) {
2345 if (indir_index) {
2346 unsigned count = glsl_count_attribute_slots(
2347 var->type, false);
2348 count -= chan / 4;
2349 LLVMValueRef tmp_vec = ac_build_gather_values_extended(
2350 &ctx->ac, ctx->abi->outputs + idx + chan, count,
2351 stride, true, true);
2352
2353 values[chan] = LLVMBuildExtractElement(ctx->ac.builder,
2354 tmp_vec,
2355 indir_index, "");
2356 } else {
2357 values[chan] = LLVMBuildLoad(ctx->ac.builder,
2358 ctx->abi->outputs[idx + chan + const_index * stride],
2359 "");
2360 }
2361 }
2362 break;
2363 case nir_var_mem_global: {
2364 LLVMValueRef address = get_src(ctx, instr->src[0]);
2365 LLVMTypeRef result_type = get_def_type(ctx, &instr->dest.ssa);
2366 unsigned explicit_stride = glsl_get_explicit_stride(deref->type);
2367 unsigned natural_stride = type_scalar_size_bytes(deref->type);
2368 unsigned stride = explicit_stride ? explicit_stride : natural_stride;
2369 int elem_size_bytes = ac_get_elem_bits(&ctx->ac, result_type) / 8;
2370 bool split_loads = ctx->ac.chip_class == GFX6 && elem_size_bytes < 4;
2371
2372 if (stride != natural_stride || split_loads) {
2373 if (LLVMGetTypeKind(result_type) == LLVMVectorTypeKind)
2374 result_type = LLVMGetElementType(result_type);
2375
2376 LLVMTypeRef ptr_type = LLVMPointerType(result_type,
2377 LLVMGetPointerAddressSpace(LLVMTypeOf(address)));
2378 address = LLVMBuildBitCast(ctx->ac.builder, address, ptr_type , "");
2379
2380 for (unsigned i = 0; i < instr->dest.ssa.num_components; ++i) {
2381 LLVMValueRef offset = LLVMConstInt(ctx->ac.i32, i * stride / natural_stride, 0);
2382 values[i] = LLVMBuildLoad(ctx->ac.builder,
2383 ac_build_gep_ptr(&ctx->ac, address, offset), "");
2384
2385 if (nir_intrinsic_access(instr) & (ACCESS_COHERENT | ACCESS_VOLATILE))
2386 LLVMSetOrdering(values[i], LLVMAtomicOrderingMonotonic);
2387 }
2388 return ac_build_gather_values(&ctx->ac, values, instr->dest.ssa.num_components);
2389 } else {
2390 LLVMTypeRef ptr_type = LLVMPointerType(result_type,
2391 LLVMGetPointerAddressSpace(LLVMTypeOf(address)));
2392 address = LLVMBuildBitCast(ctx->ac.builder, address, ptr_type , "");
2393 LLVMValueRef val = LLVMBuildLoad(ctx->ac.builder, address, "");
2394
2395 if (nir_intrinsic_access(instr) & (ACCESS_COHERENT | ACCESS_VOLATILE))
2396 LLVMSetOrdering(val, LLVMAtomicOrderingMonotonic);
2397 return val;
2398 }
2399 }
2400 default:
2401 unreachable("unhandle variable mode");
2402 }
2403 ret = ac_build_varying_gather_values(&ctx->ac, values, ve, comp);
2404 return LLVMBuildBitCast(ctx->ac.builder, ret, get_def_type(ctx, &instr->dest.ssa), "");
2405 }
2406
2407 static void
2408 visit_store_var(struct ac_nir_context *ctx,
2409 nir_intrinsic_instr *instr)
2410 {
2411 if (ctx->ac.postponed_kill) {
2412 LLVMValueRef cond = LLVMBuildLoad(ctx->ac.builder,
2413 ctx->ac.postponed_kill, "");
2414 ac_build_ifcc(&ctx->ac, cond, 7002);
2415 }
2416
2417 nir_deref_instr *deref = nir_instr_as_deref(instr->src[0].ssa->parent_instr);
2418 nir_variable *var = nir_deref_instr_get_variable(deref);
2419
2420 LLVMValueRef temp_ptr, value;
2421 int idx = 0;
2422 unsigned comp = 0;
2423 LLVMValueRef src = ac_to_float(&ctx->ac, get_src(ctx, instr->src[1]));
2424 int writemask = instr->const_index[0];
2425 LLVMValueRef indir_index;
2426 unsigned const_index;
2427
2428 if (var) {
2429 get_deref_offset(ctx, deref, false,
2430 NULL, NULL, &const_index, &indir_index);
2431 idx = var->data.driver_location;
2432 comp = var->data.location_frac;
2433
2434 if (var->data.compact) {
2435 const_index += comp;
2436 comp = 0;
2437 }
2438 }
2439
2440 if (ac_get_elem_bits(&ctx->ac, LLVMTypeOf(src)) == 64 &&
2441 (deref->mode == nir_var_shader_out ||
2442 deref->mode == nir_var_function_temp)) {
2443
2444 src = LLVMBuildBitCast(ctx->ac.builder, src,
2445 LLVMVectorType(ctx->ac.f32, ac_get_llvm_num_components(src) * 2),
2446 "");
2447
2448 writemask = widen_mask(writemask, 2);
2449 }
2450
2451 writemask = writemask << comp;
2452
2453 switch (deref->mode) {
2454 case nir_var_shader_out:
2455 /* TODO: remove this after RADV switches to lowered IO */
2456 if (ctx->stage == MESA_SHADER_TESS_CTRL) {
2457 LLVMValueRef vertex_index = NULL;
2458 LLVMValueRef indir_index = NULL;
2459 unsigned const_index = 0;
2460 const bool is_patch = var->data.patch ||
2461 var->data.location == VARYING_SLOT_TESS_LEVEL_INNER ||
2462 var->data.location == VARYING_SLOT_TESS_LEVEL_OUTER;
2463
2464 get_deref_offset(ctx, deref, false, NULL,
2465 is_patch ? NULL : &vertex_index,
2466 &const_index, &indir_index);
2467
2468 ctx->abi->store_tcs_outputs(ctx->abi, var,
2469 vertex_index, indir_index,
2470 const_index, src, writemask,
2471 var->data.location_frac,
2472 var->data.driver_location);
2473 break;
2474 }
2475
2476 for (unsigned chan = 0; chan < 8; chan++) {
2477 int stride = 4;
2478 if (!(writemask & (1 << chan)))
2479 continue;
2480
2481 value = ac_llvm_extract_elem(&ctx->ac, src, chan - comp);
2482
2483 if (var->data.compact)
2484 stride = 1;
2485 if (indir_index) {
2486 unsigned count = glsl_count_attribute_slots(
2487 var->type, false);
2488 count -= chan / 4;
2489 LLVMValueRef tmp_vec = ac_build_gather_values_extended(
2490 &ctx->ac, ctx->abi->outputs + idx + chan, count,
2491 stride, true, true);
2492
2493 tmp_vec = LLVMBuildInsertElement(ctx->ac.builder, tmp_vec,
2494 value, indir_index, "");
2495 build_store_values_extended(&ctx->ac, ctx->abi->outputs + idx + chan,
2496 count, stride, tmp_vec);
2497
2498 } else {
2499 temp_ptr = ctx->abi->outputs[idx + chan + const_index * stride];
2500
2501 LLVMBuildStore(ctx->ac.builder, value, temp_ptr);
2502 }
2503 }
2504 break;
2505 case nir_var_function_temp:
2506 for (unsigned chan = 0; chan < 8; chan++) {
2507 if (!(writemask & (1 << chan)))
2508 continue;
2509
2510 value = ac_llvm_extract_elem(&ctx->ac, src, chan);
2511 if (indir_index) {
2512 unsigned count = glsl_count_attribute_slots(
2513 var->type, false);
2514 count -= chan / 4;
2515 LLVMValueRef tmp_vec = ac_build_gather_values_extended(
2516 &ctx->ac, ctx->locals + idx + chan, count,
2517 4, true, true);
2518
2519 tmp_vec = LLVMBuildInsertElement(ctx->ac.builder, tmp_vec,
2520 value, indir_index, "");
2521 build_store_values_extended(&ctx->ac, ctx->locals + idx + chan,
2522 count, 4, tmp_vec);
2523 } else {
2524 temp_ptr = ctx->locals[idx + chan + const_index * 4];
2525
2526 LLVMBuildStore(ctx->ac.builder, value, temp_ptr);
2527 }
2528 }
2529 break;
2530
2531 case nir_var_mem_global: {
2532 int writemask = instr->const_index[0];
2533 LLVMValueRef address = get_src(ctx, instr->src[0]);
2534 LLVMValueRef val = get_src(ctx, instr->src[1]);
2535
2536 unsigned explicit_stride = glsl_get_explicit_stride(deref->type);
2537 unsigned natural_stride = type_scalar_size_bytes(deref->type);
2538 unsigned stride = explicit_stride ? explicit_stride : natural_stride;
2539 int elem_size_bytes = ac_get_elem_bits(&ctx->ac, LLVMTypeOf(val)) / 8;
2540 bool split_stores = ctx->ac.chip_class == GFX6 && elem_size_bytes < 4;
2541
2542 LLVMTypeRef ptr_type = LLVMPointerType(LLVMTypeOf(val),
2543 LLVMGetPointerAddressSpace(LLVMTypeOf(address)));
2544 address = LLVMBuildBitCast(ctx->ac.builder, address, ptr_type , "");
2545
2546 if (writemask == (1u << ac_get_llvm_num_components(val)) - 1 &&
2547 stride == natural_stride && !split_stores) {
2548 LLVMTypeRef ptr_type = LLVMPointerType(LLVMTypeOf(val),
2549 LLVMGetPointerAddressSpace(LLVMTypeOf(address)));
2550 address = LLVMBuildBitCast(ctx->ac.builder, address, ptr_type , "");
2551
2552 val = LLVMBuildBitCast(ctx->ac.builder, val,
2553 LLVMGetElementType(LLVMTypeOf(address)), "");
2554 LLVMValueRef store = LLVMBuildStore(ctx->ac.builder, val, address);
2555
2556 if (nir_intrinsic_access(instr) & (ACCESS_COHERENT | ACCESS_VOLATILE))
2557 LLVMSetOrdering(store, LLVMAtomicOrderingMonotonic);
2558 } else {
2559 LLVMTypeRef val_type = LLVMTypeOf(val);
2560 if (LLVMGetTypeKind(LLVMTypeOf(val)) == LLVMVectorTypeKind)
2561 val_type = LLVMGetElementType(val_type);
2562
2563 LLVMTypeRef ptr_type = LLVMPointerType(val_type,
2564 LLVMGetPointerAddressSpace(LLVMTypeOf(address)));
2565 address = LLVMBuildBitCast(ctx->ac.builder, address, ptr_type , "");
2566 for (unsigned chan = 0; chan < 4; chan++) {
2567 if (!(writemask & (1 << chan)))
2568 continue;
2569
2570 LLVMValueRef offset = LLVMConstInt(ctx->ac.i32, chan * stride / natural_stride, 0);
2571
2572 LLVMValueRef ptr = ac_build_gep_ptr(&ctx->ac, address, offset);
2573 LLVMValueRef src = ac_llvm_extract_elem(&ctx->ac, val,
2574 chan);
2575 src = LLVMBuildBitCast(ctx->ac.builder, src,
2576 LLVMGetElementType(LLVMTypeOf(ptr)), "");
2577 LLVMValueRef store = LLVMBuildStore(ctx->ac.builder, src, ptr);
2578
2579 if (nir_intrinsic_access(instr) & (ACCESS_COHERENT | ACCESS_VOLATILE))
2580 LLVMSetOrdering(store, LLVMAtomicOrderingMonotonic);
2581 }
2582 }
2583 break;
2584 }
2585 default:
2586 abort();
2587 break;
2588 }
2589
2590 if (ctx->ac.postponed_kill)
2591 ac_build_endif(&ctx->ac, 7002);
2592 }
2593
2594 static void
2595 visit_store_output(struct ac_nir_context *ctx, nir_intrinsic_instr *instr)
2596 {
2597 if (ctx->ac.postponed_kill) {
2598 LLVMValueRef cond = LLVMBuildLoad(ctx->ac.builder,
2599 ctx->ac.postponed_kill, "");
2600 ac_build_ifcc(&ctx->ac, cond, 7002);
2601 }
2602
2603 unsigned base = nir_intrinsic_base(instr);
2604 unsigned writemask = nir_intrinsic_write_mask(instr);
2605 unsigned component = nir_intrinsic_component(instr);
2606 LLVMValueRef src = ac_to_float(&ctx->ac, get_src(ctx, instr->src[0]));
2607 nir_src offset = *nir_get_io_offset_src(instr);
2608 LLVMValueRef indir_index = NULL;
2609
2610 if (nir_src_is_const(offset))
2611 assert(nir_src_as_uint(offset) == 0);
2612 else
2613 indir_index = get_src(ctx, offset);
2614
2615 switch (ac_get_elem_bits(&ctx->ac, LLVMTypeOf(src))) {
2616 case 32:
2617 break;
2618 case 64:
2619 writemask = widen_mask(writemask, 2);
2620 src = LLVMBuildBitCast(ctx->ac.builder, src,
2621 LLVMVectorType(ctx->ac.f32, ac_get_llvm_num_components(src) * 2),
2622 "");
2623 break;
2624 default:
2625 unreachable("unhandled store_output bit size");
2626 return;
2627 }
2628
2629 writemask <<= component;
2630
2631 if (ctx->stage == MESA_SHADER_TESS_CTRL) {
2632 nir_src *vertex_index_src = nir_get_io_vertex_index_src(instr);
2633 LLVMValueRef vertex_index =
2634 vertex_index_src ? get_src(ctx, *vertex_index_src) : NULL;
2635
2636 ctx->abi->store_tcs_outputs(ctx->abi, NULL,
2637 vertex_index, indir_index,
2638 0, src, writemask,
2639 component, base * 4);
2640 return;
2641 }
2642
2643 /* No indirect indexing is allowed after this point. */
2644 assert(!indir_index);
2645
2646 for (unsigned chan = 0; chan < 8; chan++) {
2647 if (!(writemask & (1 << chan)))
2648 continue;
2649
2650 LLVMValueRef value = ac_llvm_extract_elem(&ctx->ac, src, chan - component);
2651 LLVMBuildStore(ctx->ac.builder, value,
2652 ctx->abi->outputs[base * 4 + chan]);
2653 }
2654
2655 if (ctx->ac.postponed_kill)
2656 ac_build_endif(&ctx->ac, 7002);
2657 }
2658
2659 static int image_type_to_components_count(enum glsl_sampler_dim dim, bool array)
2660 {
2661 switch (dim) {
2662 case GLSL_SAMPLER_DIM_BUF:
2663 return 1;
2664 case GLSL_SAMPLER_DIM_1D:
2665 return array ? 2 : 1;
2666 case GLSL_SAMPLER_DIM_2D:
2667 return array ? 3 : 2;
2668 case GLSL_SAMPLER_DIM_MS:
2669 return array ? 4 : 3;
2670 case GLSL_SAMPLER_DIM_3D:
2671 case GLSL_SAMPLER_DIM_CUBE:
2672 return 3;
2673 case GLSL_SAMPLER_DIM_RECT:
2674 case GLSL_SAMPLER_DIM_SUBPASS:
2675 return 2;
2676 case GLSL_SAMPLER_DIM_SUBPASS_MS:
2677 return 3;
2678 default:
2679 break;
2680 }
2681 return 0;
2682 }
2683
2684 static LLVMValueRef adjust_sample_index_using_fmask(struct ac_llvm_context *ctx,
2685 LLVMValueRef coord_x, LLVMValueRef coord_y,
2686 LLVMValueRef coord_z,
2687 LLVMValueRef sample_index,
2688 LLVMValueRef fmask_desc_ptr)
2689 {
2690 unsigned sample_chan = coord_z ? 3 : 2;
2691 LLVMValueRef addr[4] = {coord_x, coord_y, coord_z};
2692 addr[sample_chan] = sample_index;
2693
2694 ac_apply_fmask_to_sample(ctx, fmask_desc_ptr, addr, coord_z != NULL);
2695 return addr[sample_chan];
2696 }
2697
2698 static nir_deref_instr *get_image_deref(const nir_intrinsic_instr *instr)
2699 {
2700 assert(instr->src[0].is_ssa);
2701 return nir_instr_as_deref(instr->src[0].ssa->parent_instr);
2702 }
2703
2704 static LLVMValueRef get_image_descriptor(struct ac_nir_context *ctx,
2705 const nir_intrinsic_instr *instr,
2706 LLVMValueRef dynamic_index,
2707 enum ac_descriptor_type desc_type,
2708 bool write)
2709 {
2710 nir_deref_instr *deref_instr =
2711 instr->src[0].ssa->parent_instr->type == nir_instr_type_deref ?
2712 nir_instr_as_deref(instr->src[0].ssa->parent_instr) : NULL;
2713
2714 return get_sampler_desc(ctx, deref_instr, desc_type, &instr->instr, dynamic_index, true, write);
2715 }
2716
2717 static void get_image_coords(struct ac_nir_context *ctx,
2718 const nir_intrinsic_instr *instr,
2719 LLVMValueRef dynamic_desc_index,
2720 struct ac_image_args *args,
2721 enum glsl_sampler_dim dim,
2722 bool is_array)
2723 {
2724 LLVMValueRef src0 = get_src(ctx, instr->src[1]);
2725 LLVMValueRef masks[] = {
2726 LLVMConstInt(ctx->ac.i32, 0, false), LLVMConstInt(ctx->ac.i32, 1, false),
2727 LLVMConstInt(ctx->ac.i32, 2, false), LLVMConstInt(ctx->ac.i32, 3, false),
2728 };
2729 LLVMValueRef sample_index = ac_llvm_extract_elem(&ctx->ac, get_src(ctx, instr->src[2]), 0);
2730
2731 int count;
2732 ASSERTED bool add_frag_pos = (dim == GLSL_SAMPLER_DIM_SUBPASS ||
2733 dim == GLSL_SAMPLER_DIM_SUBPASS_MS);
2734 bool is_ms = (dim == GLSL_SAMPLER_DIM_MS ||
2735 dim == GLSL_SAMPLER_DIM_SUBPASS_MS);
2736 bool gfx9_1d = ctx->ac.chip_class == GFX9 && dim == GLSL_SAMPLER_DIM_1D;
2737 assert(!add_frag_pos && "Input attachments should be lowered by this point.");
2738 count = image_type_to_components_count(dim, is_array);
2739
2740 if (is_ms && (instr->intrinsic == nir_intrinsic_image_deref_load ||
2741 instr->intrinsic == nir_intrinsic_bindless_image_load)) {
2742 LLVMValueRef fmask_load_address[3];
2743
2744 fmask_load_address[0] = LLVMBuildExtractElement(ctx->ac.builder, src0, masks[0], "");
2745 fmask_load_address[1] = LLVMBuildExtractElement(ctx->ac.builder, src0, masks[1], "");
2746 if (is_array)
2747 fmask_load_address[2] = LLVMBuildExtractElement(ctx->ac.builder, src0, masks[2], "");
2748 else
2749 fmask_load_address[2] = NULL;
2750
2751 sample_index = adjust_sample_index_using_fmask(&ctx->ac,
2752 fmask_load_address[0],
2753 fmask_load_address[1],
2754 fmask_load_address[2],
2755 sample_index,
2756 get_sampler_desc(ctx, nir_instr_as_deref(instr->src[0].ssa->parent_instr),
2757 AC_DESC_FMASK, &instr->instr, dynamic_desc_index, true, false));
2758 }
2759 if (count == 1 && !gfx9_1d) {
2760 if (instr->src[1].ssa->num_components)
2761 args->coords[0] = LLVMBuildExtractElement(ctx->ac.builder, src0, masks[0], "");
2762 else
2763 args->coords[0] = src0;
2764 } else {
2765 int chan;
2766 if (is_ms)
2767 count--;
2768 for (chan = 0; chan < count; ++chan) {
2769 args->coords[chan] = ac_llvm_extract_elem(&ctx->ac, src0, chan);
2770 }
2771
2772 if (gfx9_1d) {
2773 if (is_array) {
2774 args->coords[2] = args->coords[1];
2775 args->coords[1] = ctx->ac.i32_0;
2776 } else
2777 args->coords[1] = ctx->ac.i32_0;
2778 count++;
2779 }
2780 if (ctx->ac.chip_class == GFX9 &&
2781 dim == GLSL_SAMPLER_DIM_2D &&
2782 !is_array) {
2783 /* The hw can't bind a slice of a 3D image as a 2D
2784 * image, because it ignores BASE_ARRAY if the target
2785 * is 3D. The workaround is to read BASE_ARRAY and set
2786 * it as the 3rd address operand for all 2D images.
2787 */
2788 LLVMValueRef first_layer, const5, mask;
2789
2790 const5 = LLVMConstInt(ctx->ac.i32, 5, 0);
2791 mask = LLVMConstInt(ctx->ac.i32, S_008F24_BASE_ARRAY(~0), 0);
2792 first_layer = LLVMBuildExtractElement(ctx->ac.builder, args->resource, const5, "");
2793 first_layer = LLVMBuildAnd(ctx->ac.builder, first_layer, mask, "");
2794
2795 args->coords[count] = first_layer;
2796 count++;
2797 }
2798
2799
2800 if (is_ms) {
2801 args->coords[count] = sample_index;
2802 count++;
2803 }
2804 }
2805 }
2806
2807 static LLVMValueRef get_image_buffer_descriptor(struct ac_nir_context *ctx,
2808 const nir_intrinsic_instr *instr,
2809 LLVMValueRef dynamic_index,
2810 bool write, bool atomic)
2811 {
2812 LLVMValueRef rsrc = get_image_descriptor(ctx, instr, dynamic_index, AC_DESC_BUFFER, write);
2813 if (ctx->ac.chip_class == GFX9 && LLVM_VERSION_MAJOR < 9 && atomic) {
2814 LLVMValueRef elem_count = LLVMBuildExtractElement(ctx->ac.builder, rsrc, LLVMConstInt(ctx->ac.i32, 2, 0), "");
2815 LLVMValueRef stride = LLVMBuildExtractElement(ctx->ac.builder, rsrc, LLVMConstInt(ctx->ac.i32, 1, 0), "");
2816 stride = LLVMBuildLShr(ctx->ac.builder, stride, LLVMConstInt(ctx->ac.i32, 16, 0), "");
2817
2818 LLVMValueRef new_elem_count = LLVMBuildSelect(ctx->ac.builder,
2819 LLVMBuildICmp(ctx->ac.builder, LLVMIntUGT, elem_count, stride, ""),
2820 elem_count, stride, "");
2821
2822 rsrc = LLVMBuildInsertElement(ctx->ac.builder, rsrc, new_elem_count,
2823 LLVMConstInt(ctx->ac.i32, 2, 0), "");
2824 }
2825 return rsrc;
2826 }
2827
2828 static LLVMValueRef enter_waterfall_image(struct ac_nir_context *ctx,
2829 struct waterfall_context *wctx,
2830 const nir_intrinsic_instr *instr)
2831 {
2832 nir_deref_instr *deref_instr = NULL;
2833
2834 if (instr->src[0].ssa->parent_instr->type == nir_instr_type_deref)
2835 deref_instr = nir_instr_as_deref(instr->src[0].ssa->parent_instr);
2836
2837 LLVMValueRef value = get_sampler_desc_index(ctx, deref_instr, &instr->instr, true);
2838 return enter_waterfall(ctx, wctx, value, nir_intrinsic_access(instr) & ACCESS_NON_UNIFORM);
2839 }
2840
2841 static LLVMValueRef visit_image_load(struct ac_nir_context *ctx,
2842 const nir_intrinsic_instr *instr,
2843 bool bindless)
2844 {
2845 LLVMValueRef res;
2846
2847 enum glsl_sampler_dim dim;
2848 enum gl_access_qualifier access = nir_intrinsic_access(instr);
2849 bool is_array;
2850 if (bindless) {
2851 dim = nir_intrinsic_image_dim(instr);
2852 is_array = nir_intrinsic_image_array(instr);
2853 } else {
2854 const nir_deref_instr *image_deref = get_image_deref(instr);
2855 const struct glsl_type *type = image_deref->type;
2856 const nir_variable *var = nir_deref_instr_get_variable(image_deref);
2857 dim = glsl_get_sampler_dim(type);
2858 access |= var->data.access;
2859 is_array = glsl_sampler_type_is_array(type);
2860 }
2861
2862 struct waterfall_context wctx;
2863 LLVMValueRef dynamic_index = enter_waterfall_image(ctx, &wctx, instr);
2864
2865 struct ac_image_args args = {};
2866
2867 args.cache_policy = get_cache_policy(ctx, access, false, false);
2868
2869 if (dim == GLSL_SAMPLER_DIM_BUF) {
2870 unsigned mask = nir_ssa_def_components_read(&instr->dest.ssa);
2871 unsigned num_channels = util_last_bit(mask);
2872 LLVMValueRef rsrc, vindex;
2873
2874 rsrc = get_image_buffer_descriptor(ctx, instr, dynamic_index, false, false);
2875 vindex = LLVMBuildExtractElement(ctx->ac.builder, get_src(ctx, instr->src[1]),
2876 ctx->ac.i32_0, "");
2877
2878 assert(instr->dest.is_ssa);
2879 bool can_speculate = access & ACCESS_CAN_REORDER;
2880 res = ac_build_buffer_load_format(&ctx->ac, rsrc, vindex,
2881 ctx->ac.i32_0, num_channels,
2882 args.cache_policy,
2883 can_speculate,
2884 instr->dest.ssa.bit_size == 16);
2885 res = ac_build_expand_to_vec4(&ctx->ac, res, num_channels);
2886
2887 res = ac_trim_vector(&ctx->ac, res, instr->dest.ssa.num_components);
2888 res = ac_to_integer(&ctx->ac, res);
2889 } else {
2890 bool level_zero = nir_src_is_const(instr->src[3]) && nir_src_as_uint(instr->src[3]) == 0;
2891
2892 args.opcode = level_zero ? ac_image_load : ac_image_load_mip;
2893 args.resource = get_image_descriptor(ctx, instr, dynamic_index, AC_DESC_IMAGE, false);
2894 get_image_coords(ctx, instr, dynamic_index, &args, dim, is_array);
2895 args.dim = ac_get_image_dim(ctx->ac.chip_class, dim, is_array);
2896 if (!level_zero)
2897 args.lod = get_src(ctx, instr->src[3]);
2898 args.dmask = 15;
2899 args.attributes = AC_FUNC_ATTR_READONLY;
2900
2901 assert(instr->dest.is_ssa);
2902 args.d16 = instr->dest.ssa.bit_size == 16;
2903
2904 res = ac_build_image_opcode(&ctx->ac, &args);
2905 }
2906 return exit_waterfall(ctx, &wctx, res);
2907 }
2908
2909 static void visit_image_store(struct ac_nir_context *ctx,
2910 const nir_intrinsic_instr *instr,
2911 bool bindless)
2912 {
2913 if (ctx->ac.postponed_kill) {
2914 LLVMValueRef cond = LLVMBuildLoad(ctx->ac.builder,
2915 ctx->ac.postponed_kill, "");
2916 ac_build_ifcc(&ctx->ac, cond, 7003);
2917 }
2918
2919 enum glsl_sampler_dim dim;
2920 enum gl_access_qualifier access = nir_intrinsic_access(instr);
2921 bool is_array;
2922
2923 if (bindless) {
2924 dim = nir_intrinsic_image_dim(instr);
2925 is_array = nir_intrinsic_image_array(instr);
2926 } else {
2927 const nir_deref_instr *image_deref = get_image_deref(instr);
2928 const struct glsl_type *type = image_deref->type;
2929 const nir_variable *var = nir_deref_instr_get_variable(image_deref);
2930 dim = glsl_get_sampler_dim(type);
2931 access |= var->data.access;
2932 is_array = glsl_sampler_type_is_array(type);
2933 }
2934
2935 struct waterfall_context wctx;
2936 LLVMValueRef dynamic_index = enter_waterfall_image(ctx, &wctx, instr);
2937
2938 bool writeonly_memory = access & ACCESS_NON_READABLE;
2939 struct ac_image_args args = {};
2940
2941 args.cache_policy = get_cache_policy(ctx, access, true, writeonly_memory);
2942
2943 if (dim == GLSL_SAMPLER_DIM_BUF) {
2944 LLVMValueRef rsrc = get_image_buffer_descriptor(ctx, instr, dynamic_index, true, false);
2945 LLVMValueRef src = ac_to_float(&ctx->ac, get_src(ctx, instr->src[3]));
2946 unsigned src_channels = ac_get_llvm_num_components(src);
2947 LLVMValueRef vindex;
2948
2949 if (src_channels == 3)
2950 src = ac_build_expand_to_vec4(&ctx->ac, src, 3);
2951
2952 vindex = LLVMBuildExtractElement(ctx->ac.builder,
2953 get_src(ctx, instr->src[1]),
2954 ctx->ac.i32_0, "");
2955
2956 ac_build_buffer_store_format(&ctx->ac, rsrc, src, vindex,
2957 ctx->ac.i32_0, args.cache_policy);
2958 } else {
2959 bool level_zero = nir_src_is_const(instr->src[4]) && nir_src_as_uint(instr->src[4]) == 0;
2960
2961 args.opcode = level_zero ? ac_image_store : ac_image_store_mip;
2962 args.data[0] = ac_to_float(&ctx->ac, get_src(ctx, instr->src[3]));
2963 args.resource = get_image_descriptor(ctx, instr, dynamic_index, AC_DESC_IMAGE, true);
2964 get_image_coords(ctx, instr, dynamic_index, &args, dim, is_array);
2965 args.dim = ac_get_image_dim(ctx->ac.chip_class, dim, is_array);
2966 if (!level_zero)
2967 args.lod = get_src(ctx, instr->src[4]);
2968 args.dmask = 15;
2969 args.d16 = ac_get_elem_bits(&ctx->ac, LLVMTypeOf(args.data[0])) == 16;
2970
2971 ac_build_image_opcode(&ctx->ac, &args);
2972 }
2973
2974 exit_waterfall(ctx, &wctx, NULL);
2975 if (ctx->ac.postponed_kill)
2976 ac_build_endif(&ctx->ac, 7003);
2977 }
2978
2979 static LLVMValueRef visit_image_atomic(struct ac_nir_context *ctx,
2980 const nir_intrinsic_instr *instr,
2981 bool bindless)
2982 {
2983 if (ctx->ac.postponed_kill) {
2984 LLVMValueRef cond = LLVMBuildLoad(ctx->ac.builder,
2985 ctx->ac.postponed_kill, "");
2986 ac_build_ifcc(&ctx->ac, cond, 7004);
2987 }
2988
2989 LLVMValueRef params[7];
2990 int param_count = 0;
2991
2992 bool cmpswap = instr->intrinsic == nir_intrinsic_image_deref_atomic_comp_swap ||
2993 instr->intrinsic == nir_intrinsic_bindless_image_atomic_comp_swap;
2994 const char *atomic_name;
2995 char intrinsic_name[64];
2996 enum ac_atomic_op atomic_subop;
2997 ASSERTED int length;
2998
2999 enum glsl_sampler_dim dim;
3000 bool is_array;
3001 if (bindless) {
3002 if (instr->intrinsic == nir_intrinsic_bindless_image_atomic_imin ||
3003 instr->intrinsic == nir_intrinsic_bindless_image_atomic_umin ||
3004 instr->intrinsic == nir_intrinsic_bindless_image_atomic_imax ||
3005 instr->intrinsic == nir_intrinsic_bindless_image_atomic_umax) {
3006 ASSERTED const GLenum format = nir_intrinsic_format(instr);
3007 assert(format == GL_R32UI || format == GL_R32I);
3008 }
3009 dim = nir_intrinsic_image_dim(instr);
3010 is_array = nir_intrinsic_image_array(instr);
3011 } else {
3012 const struct glsl_type *type = get_image_deref(instr)->type;
3013 dim = glsl_get_sampler_dim(type);
3014 is_array = glsl_sampler_type_is_array(type);
3015 }
3016
3017 struct waterfall_context wctx;
3018 LLVMValueRef dynamic_index = enter_waterfall_image(ctx, &wctx, instr);
3019
3020 switch (instr->intrinsic) {
3021 case nir_intrinsic_bindless_image_atomic_add:
3022 case nir_intrinsic_image_deref_atomic_add:
3023 atomic_name = "add";
3024 atomic_subop = ac_atomic_add;
3025 break;
3026 case nir_intrinsic_bindless_image_atomic_imin:
3027 case nir_intrinsic_image_deref_atomic_imin:
3028 atomic_name = "smin";
3029 atomic_subop = ac_atomic_smin;
3030 break;
3031 case nir_intrinsic_bindless_image_atomic_umin:
3032 case nir_intrinsic_image_deref_atomic_umin:
3033 atomic_name = "umin";
3034 atomic_subop = ac_atomic_umin;
3035 break;
3036 case nir_intrinsic_bindless_image_atomic_imax:
3037 case nir_intrinsic_image_deref_atomic_imax:
3038 atomic_name = "smax";
3039 atomic_subop = ac_atomic_smax;
3040 break;
3041 case nir_intrinsic_bindless_image_atomic_umax:
3042 case nir_intrinsic_image_deref_atomic_umax:
3043 atomic_name = "umax";
3044 atomic_subop = ac_atomic_umax;
3045 break;
3046 case nir_intrinsic_bindless_image_atomic_and:
3047 case nir_intrinsic_image_deref_atomic_and:
3048 atomic_name = "and";
3049 atomic_subop = ac_atomic_and;
3050 break;
3051 case nir_intrinsic_bindless_image_atomic_or:
3052 case nir_intrinsic_image_deref_atomic_or:
3053 atomic_name = "or";
3054 atomic_subop = ac_atomic_or;
3055 break;
3056 case nir_intrinsic_bindless_image_atomic_xor:
3057 case nir_intrinsic_image_deref_atomic_xor:
3058 atomic_name = "xor";
3059 atomic_subop = ac_atomic_xor;
3060 break;
3061 case nir_intrinsic_bindless_image_atomic_exchange:
3062 case nir_intrinsic_image_deref_atomic_exchange:
3063 atomic_name = "swap";
3064 atomic_subop = ac_atomic_swap;
3065 break;
3066 case nir_intrinsic_bindless_image_atomic_comp_swap:
3067 case nir_intrinsic_image_deref_atomic_comp_swap:
3068 atomic_name = "cmpswap";
3069 atomic_subop = 0; /* not used */
3070 break;
3071 case nir_intrinsic_bindless_image_atomic_inc_wrap:
3072 case nir_intrinsic_image_deref_atomic_inc_wrap: {
3073 atomic_name = "inc";
3074 atomic_subop = ac_atomic_inc_wrap;
3075 break;
3076 }
3077 case nir_intrinsic_bindless_image_atomic_dec_wrap:
3078 case nir_intrinsic_image_deref_atomic_dec_wrap:
3079 atomic_name = "dec";
3080 atomic_subop = ac_atomic_dec_wrap;
3081 break;
3082 default:
3083 abort();
3084 }
3085
3086 if (cmpswap)
3087 params[param_count++] = get_src(ctx, instr->src[4]);
3088 params[param_count++] = get_src(ctx, instr->src[3]);
3089
3090 LLVMValueRef result;
3091 if (dim == GLSL_SAMPLER_DIM_BUF) {
3092 params[param_count++] = get_image_buffer_descriptor(ctx, instr, dynamic_index, true, true);
3093 params[param_count++] = LLVMBuildExtractElement(ctx->ac.builder, get_src(ctx, instr->src[1]),
3094 ctx->ac.i32_0, ""); /* vindex */
3095 params[param_count++] = ctx->ac.i32_0; /* voffset */
3096 if (LLVM_VERSION_MAJOR >= 9) {
3097 /* XXX: The new raw/struct atomic intrinsics are buggy
3098 * with LLVM 8, see r358579.
3099 */
3100 params[param_count++] = ctx->ac.i32_0; /* soffset */
3101 params[param_count++] = ctx->ac.i32_0; /* slc */
3102
3103 length = snprintf(intrinsic_name, sizeof(intrinsic_name),
3104 "llvm.amdgcn.struct.buffer.atomic.%s.i32", atomic_name);
3105 } else {
3106 params[param_count++] = ctx->ac.i1false; /* slc */
3107
3108 length = snprintf(intrinsic_name, sizeof(intrinsic_name),
3109 "llvm.amdgcn.buffer.atomic.%s", atomic_name);
3110 }
3111
3112 assert(length < sizeof(intrinsic_name));
3113 result = ac_build_intrinsic(&ctx->ac, intrinsic_name, ctx->ac.i32,
3114 params, param_count, 0);
3115 } else {
3116 struct ac_image_args args = {};
3117 args.opcode = cmpswap ? ac_image_atomic_cmpswap : ac_image_atomic;
3118 args.atomic = atomic_subop;
3119 args.data[0] = params[0];
3120 if (cmpswap)
3121 args.data[1] = params[1];
3122 args.resource = get_image_descriptor(ctx, instr, dynamic_index, AC_DESC_IMAGE, true);
3123 get_image_coords(ctx, instr, dynamic_index, &args, dim, is_array);
3124 args.dim = ac_get_image_dim(ctx->ac.chip_class, dim, is_array);
3125
3126 result = ac_build_image_opcode(&ctx->ac, &args);
3127 }
3128
3129 result = exit_waterfall(ctx, &wctx, result);
3130 if (ctx->ac.postponed_kill)
3131 ac_build_endif(&ctx->ac, 7004);
3132 return result;
3133 }
3134
3135 static LLVMValueRef visit_image_samples(struct ac_nir_context *ctx,
3136 nir_intrinsic_instr *instr)
3137 {
3138 struct waterfall_context wctx;
3139 LLVMValueRef dynamic_index = enter_waterfall_image(ctx, &wctx, instr);
3140 LLVMValueRef rsrc = get_image_descriptor(ctx, instr, dynamic_index, AC_DESC_IMAGE, false);
3141
3142 LLVMValueRef ret = ac_build_image_get_sample_count(&ctx->ac, rsrc);
3143
3144 return exit_waterfall(ctx, &wctx, ret);
3145 }
3146
3147 static LLVMValueRef visit_image_size(struct ac_nir_context *ctx,
3148 const nir_intrinsic_instr *instr,
3149 bool bindless)
3150 {
3151 LLVMValueRef res;
3152
3153 enum glsl_sampler_dim dim;
3154 bool is_array;
3155 if (bindless) {
3156 dim = nir_intrinsic_image_dim(instr);
3157 is_array = nir_intrinsic_image_array(instr);
3158 } else {
3159 const struct glsl_type *type = get_image_deref(instr)->type;
3160 dim = glsl_get_sampler_dim(type);
3161 is_array = glsl_sampler_type_is_array(type);
3162 }
3163
3164 struct waterfall_context wctx;
3165 LLVMValueRef dynamic_index = enter_waterfall_image(ctx, &wctx, instr);
3166
3167 if (dim == GLSL_SAMPLER_DIM_BUF) {
3168 res = get_buffer_size(ctx, get_image_descriptor(ctx, instr, dynamic_index, AC_DESC_BUFFER, false), true);
3169 } else {
3170
3171 struct ac_image_args args = { 0 };
3172
3173 args.dim = ac_get_image_dim(ctx->ac.chip_class, dim, is_array);
3174 args.dmask = 0xf;
3175 args.resource = get_image_descriptor(ctx, instr, dynamic_index, AC_DESC_IMAGE, false);
3176 args.opcode = ac_image_get_resinfo;
3177 assert(nir_src_as_uint(instr->src[1]) == 0);
3178 args.lod = ctx->ac.i32_0;
3179 args.attributes = AC_FUNC_ATTR_READNONE;
3180
3181 res = ac_build_image_opcode(&ctx->ac, &args);
3182
3183 LLVMValueRef two = LLVMConstInt(ctx->ac.i32, 2, false);
3184
3185 if (dim == GLSL_SAMPLER_DIM_CUBE && is_array) {
3186 LLVMValueRef six = LLVMConstInt(ctx->ac.i32, 6, false);
3187 LLVMValueRef z = LLVMBuildExtractElement(ctx->ac.builder, res, two, "");
3188 z = LLVMBuildSDiv(ctx->ac.builder, z, six, "");
3189 res = LLVMBuildInsertElement(ctx->ac.builder, res, z, two, "");
3190 }
3191
3192 if (ctx->ac.chip_class == GFX9 && dim == GLSL_SAMPLER_DIM_1D && is_array) {
3193 LLVMValueRef layers = LLVMBuildExtractElement(ctx->ac.builder, res, two, "");
3194 res = LLVMBuildInsertElement(ctx->ac.builder, res, layers,
3195 ctx->ac.i32_1, "");
3196 }
3197 }
3198 return exit_waterfall(ctx, &wctx, res);
3199 }
3200
3201 static void emit_membar(struct ac_llvm_context *ac,
3202 const nir_intrinsic_instr *instr)
3203 {
3204 unsigned wait_flags = 0;
3205
3206 switch (instr->intrinsic) {
3207 case nir_intrinsic_memory_barrier:
3208 case nir_intrinsic_group_memory_barrier:
3209 wait_flags = AC_WAIT_LGKM | AC_WAIT_VLOAD | AC_WAIT_VSTORE;
3210 break;
3211 case nir_intrinsic_memory_barrier_buffer:
3212 case nir_intrinsic_memory_barrier_image:
3213 wait_flags = AC_WAIT_VLOAD | AC_WAIT_VSTORE;
3214 break;
3215 case nir_intrinsic_memory_barrier_shared:
3216 wait_flags = AC_WAIT_LGKM;
3217 break;
3218 default:
3219 break;
3220 }
3221
3222 ac_build_waitcnt(ac, wait_flags);
3223 }
3224
3225 void ac_emit_barrier(struct ac_llvm_context *ac, gl_shader_stage stage)
3226 {
3227 /* GFX6 only (thanks to a hw bug workaround):
3228 * The real barrier instruction isn’t needed, because an entire patch
3229 * always fits into a single wave.
3230 */
3231 if (ac->chip_class == GFX6 && stage == MESA_SHADER_TESS_CTRL) {
3232 ac_build_waitcnt(ac, AC_WAIT_LGKM | AC_WAIT_VLOAD | AC_WAIT_VSTORE);
3233 return;
3234 }
3235 ac_build_s_barrier(ac);
3236 }
3237
3238 static void emit_discard(struct ac_nir_context *ctx,
3239 const nir_intrinsic_instr *instr)
3240 {
3241 LLVMValueRef cond;
3242
3243 if (instr->intrinsic == nir_intrinsic_discard_if) {
3244 cond = LLVMBuildICmp(ctx->ac.builder, LLVMIntEQ,
3245 get_src(ctx, instr->src[0]),
3246 ctx->ac.i32_0, "");
3247 } else {
3248 assert(instr->intrinsic == nir_intrinsic_discard);
3249 cond = ctx->ac.i1false;
3250 }
3251
3252 ac_build_kill_if_false(&ctx->ac, cond);
3253 }
3254
3255 static void emit_demote(struct ac_nir_context *ctx,
3256 const nir_intrinsic_instr *instr)
3257 {
3258 LLVMValueRef cond;
3259
3260 if (instr->intrinsic == nir_intrinsic_demote_if) {
3261 cond = LLVMBuildICmp(ctx->ac.builder, LLVMIntEQ,
3262 get_src(ctx, instr->src[0]),
3263 ctx->ac.i32_0, "");
3264 } else {
3265 assert(instr->intrinsic == nir_intrinsic_demote);
3266 cond = ctx->ac.i1false;
3267 }
3268
3269 /* Kill immediately while maintaining WQM. */
3270 ac_build_kill_if_false(&ctx->ac, ac_build_wqm_vote(&ctx->ac, cond));
3271
3272 LLVMValueRef mask = LLVMBuildLoad(ctx->ac.builder, ctx->ac.postponed_kill, "");
3273 mask = LLVMBuildAnd(ctx->ac.builder, mask, cond, "");
3274 LLVMBuildStore(ctx->ac.builder, mask, ctx->ac.postponed_kill);
3275 return;
3276 }
3277
3278 static LLVMValueRef
3279 visit_load_local_invocation_index(struct ac_nir_context *ctx)
3280 {
3281 LLVMValueRef result;
3282 LLVMValueRef thread_id = ac_get_thread_id(&ctx->ac);
3283 result = LLVMBuildAnd(ctx->ac.builder,
3284 ac_get_arg(&ctx->ac, ctx->args->tg_size),
3285 LLVMConstInt(ctx->ac.i32, 0xfc0, false), "");
3286
3287 if (ctx->ac.wave_size == 32)
3288 result = LLVMBuildLShr(ctx->ac.builder, result,
3289 LLVMConstInt(ctx->ac.i32, 1, false), "");
3290
3291 return LLVMBuildAdd(ctx->ac.builder, result, thread_id, "");
3292 }
3293
3294 static LLVMValueRef
3295 visit_load_subgroup_id(struct ac_nir_context *ctx)
3296 {
3297 if (ctx->stage == MESA_SHADER_COMPUTE) {
3298 LLVMValueRef result;
3299 result = LLVMBuildAnd(ctx->ac.builder,
3300 ac_get_arg(&ctx->ac, ctx->args->tg_size),
3301 LLVMConstInt(ctx->ac.i32, 0xfc0, false), "");
3302 return LLVMBuildLShr(ctx->ac.builder, result, LLVMConstInt(ctx->ac.i32, 6, false), "");
3303 } else {
3304 return LLVMConstInt(ctx->ac.i32, 0, false);
3305 }
3306 }
3307
3308 static LLVMValueRef
3309 visit_load_num_subgroups(struct ac_nir_context *ctx)
3310 {
3311 if (ctx->stage == MESA_SHADER_COMPUTE) {
3312 return LLVMBuildAnd(ctx->ac.builder,
3313 ac_get_arg(&ctx->ac, ctx->args->tg_size),
3314 LLVMConstInt(ctx->ac.i32, 0x3f, false), "");
3315 } else {
3316 return LLVMConstInt(ctx->ac.i32, 1, false);
3317 }
3318 }
3319
3320 static LLVMValueRef
3321 visit_first_invocation(struct ac_nir_context *ctx)
3322 {
3323 LLVMValueRef active_set = ac_build_ballot(&ctx->ac, ctx->ac.i32_1);
3324 const char *intr = ctx->ac.wave_size == 32 ? "llvm.cttz.i32" : "llvm.cttz.i64";
3325
3326 /* The second argument is whether cttz(0) should be defined, but we do not care. */
3327 LLVMValueRef args[] = {active_set, ctx->ac.i1false};
3328 LLVMValueRef result = ac_build_intrinsic(&ctx->ac, intr,
3329 ctx->ac.iN_wavemask, args, 2,
3330 AC_FUNC_ATTR_NOUNWIND |
3331 AC_FUNC_ATTR_READNONE);
3332
3333 return LLVMBuildTrunc(ctx->ac.builder, result, ctx->ac.i32, "");
3334 }
3335
3336 static LLVMValueRef
3337 visit_load_shared(struct ac_nir_context *ctx,
3338 const nir_intrinsic_instr *instr)
3339 {
3340 LLVMValueRef values[4], derived_ptr, index, ret;
3341
3342 LLVMValueRef ptr = get_memory_ptr(ctx, instr->src[0],
3343 instr->dest.ssa.bit_size);
3344
3345 for (int chan = 0; chan < instr->num_components; chan++) {
3346 index = LLVMConstInt(ctx->ac.i32, chan, 0);
3347 derived_ptr = LLVMBuildGEP(ctx->ac.builder, ptr, &index, 1, "");
3348 values[chan] = LLVMBuildLoad(ctx->ac.builder, derived_ptr, "");
3349 }
3350
3351 ret = ac_build_gather_values(&ctx->ac, values, instr->num_components);
3352 return LLVMBuildBitCast(ctx->ac.builder, ret, get_def_type(ctx, &instr->dest.ssa), "");
3353 }
3354
3355 static void
3356 visit_store_shared(struct ac_nir_context *ctx,
3357 const nir_intrinsic_instr *instr)
3358 {
3359 LLVMValueRef derived_ptr, data,index;
3360 LLVMBuilderRef builder = ctx->ac.builder;
3361
3362 LLVMValueRef ptr = get_memory_ptr(ctx, instr->src[1],
3363 instr->src[0].ssa->bit_size);
3364 LLVMValueRef src = get_src(ctx, instr->src[0]);
3365
3366 int writemask = nir_intrinsic_write_mask(instr);
3367 for (int chan = 0; chan < 4; chan++) {
3368 if (!(writemask & (1 << chan))) {
3369 continue;
3370 }
3371 data = ac_llvm_extract_elem(&ctx->ac, src, chan);
3372 index = LLVMConstInt(ctx->ac.i32, chan, 0);
3373 derived_ptr = LLVMBuildGEP(builder, ptr, &index, 1, "");
3374 LLVMBuildStore(builder, data, derived_ptr);
3375 }
3376 }
3377
3378 static LLVMValueRef visit_var_atomic(struct ac_nir_context *ctx,
3379 const nir_intrinsic_instr *instr,
3380 LLVMValueRef ptr, int src_idx)
3381 {
3382 if (ctx->ac.postponed_kill) {
3383 LLVMValueRef cond = LLVMBuildLoad(ctx->ac.builder,
3384 ctx->ac.postponed_kill, "");
3385 ac_build_ifcc(&ctx->ac, cond, 7005);
3386 }
3387
3388 LLVMValueRef result;
3389 LLVMValueRef src = get_src(ctx, instr->src[src_idx]);
3390
3391 const char *sync_scope = LLVM_VERSION_MAJOR >= 9 ? "workgroup-one-as" : "workgroup";
3392
3393 if (instr->src[0].ssa->parent_instr->type == nir_instr_type_deref) {
3394 nir_deref_instr *deref = nir_instr_as_deref(instr->src[0].ssa->parent_instr);
3395 if (deref->mode == nir_var_mem_global) {
3396 /* use "singlethread" sync scope to implement relaxed ordering */
3397 sync_scope = LLVM_VERSION_MAJOR >= 9 ? "singlethread-one-as" : "singlethread";
3398
3399 LLVMTypeRef ptr_type = LLVMPointerType(LLVMTypeOf(src), LLVMGetPointerAddressSpace(LLVMTypeOf(ptr)));
3400 ptr = LLVMBuildBitCast(ctx->ac.builder, ptr, ptr_type , "");
3401 }
3402 }
3403
3404 if (instr->intrinsic == nir_intrinsic_shared_atomic_comp_swap ||
3405 instr->intrinsic == nir_intrinsic_deref_atomic_comp_swap) {
3406 LLVMValueRef src1 = get_src(ctx, instr->src[src_idx + 1]);
3407 result = ac_build_atomic_cmp_xchg(&ctx->ac, ptr, src, src1, sync_scope);
3408 result = LLVMBuildExtractValue(ctx->ac.builder, result, 0, "");
3409 } else {
3410 LLVMAtomicRMWBinOp op;
3411 switch (instr->intrinsic) {
3412 case nir_intrinsic_shared_atomic_add:
3413 case nir_intrinsic_deref_atomic_add:
3414 op = LLVMAtomicRMWBinOpAdd;
3415 break;
3416 case nir_intrinsic_shared_atomic_umin:
3417 case nir_intrinsic_deref_atomic_umin:
3418 op = LLVMAtomicRMWBinOpUMin;
3419 break;
3420 case nir_intrinsic_shared_atomic_umax:
3421 case nir_intrinsic_deref_atomic_umax:
3422 op = LLVMAtomicRMWBinOpUMax;
3423 break;
3424 case nir_intrinsic_shared_atomic_imin:
3425 case nir_intrinsic_deref_atomic_imin:
3426 op = LLVMAtomicRMWBinOpMin;
3427 break;
3428 case nir_intrinsic_shared_atomic_imax:
3429 case nir_intrinsic_deref_atomic_imax:
3430 op = LLVMAtomicRMWBinOpMax;
3431 break;
3432 case nir_intrinsic_shared_atomic_and:
3433 case nir_intrinsic_deref_atomic_and:
3434 op = LLVMAtomicRMWBinOpAnd;
3435 break;
3436 case nir_intrinsic_shared_atomic_or:
3437 case nir_intrinsic_deref_atomic_or:
3438 op = LLVMAtomicRMWBinOpOr;
3439 break;
3440 case nir_intrinsic_shared_atomic_xor:
3441 case nir_intrinsic_deref_atomic_xor:
3442 op = LLVMAtomicRMWBinOpXor;
3443 break;
3444 case nir_intrinsic_shared_atomic_exchange:
3445 case nir_intrinsic_deref_atomic_exchange:
3446 op = LLVMAtomicRMWBinOpXchg;
3447 break;
3448 #if LLVM_VERSION_MAJOR >= 10
3449 case nir_intrinsic_shared_atomic_fadd:
3450 case nir_intrinsic_deref_atomic_fadd:
3451 op = LLVMAtomicRMWBinOpFAdd;
3452 break;
3453 #endif
3454 default:
3455 return NULL;
3456 }
3457
3458 LLVMValueRef val;
3459
3460 if (instr->intrinsic == nir_intrinsic_shared_atomic_fadd ||
3461 instr->intrinsic == nir_intrinsic_deref_atomic_fadd) {
3462 val = ac_to_float(&ctx->ac, src);
3463 } else {
3464 val = ac_to_integer(&ctx->ac, src);
3465 }
3466
3467 result = ac_build_atomic_rmw(&ctx->ac, op, ptr, val, sync_scope);
3468 }
3469
3470 if (ctx->ac.postponed_kill)
3471 ac_build_endif(&ctx->ac, 7005);
3472 return result;
3473 }
3474
3475 static LLVMValueRef load_sample_pos(struct ac_nir_context *ctx)
3476 {
3477 LLVMValueRef values[2];
3478 LLVMValueRef pos[2];
3479
3480 pos[0] = ac_to_float(&ctx->ac,
3481 ac_get_arg(&ctx->ac, ctx->args->frag_pos[0]));
3482 pos[1] = ac_to_float(&ctx->ac,
3483 ac_get_arg(&ctx->ac, ctx->args->frag_pos[1]));
3484
3485 values[0] = ac_build_fract(&ctx->ac, pos[0], 32);
3486 values[1] = ac_build_fract(&ctx->ac, pos[1], 32);
3487 return ac_build_gather_values(&ctx->ac, values, 2);
3488 }
3489
3490 static LLVMValueRef lookup_interp_param(struct ac_nir_context *ctx,
3491 enum glsl_interp_mode interp, unsigned location)
3492 {
3493 switch (interp) {
3494 case INTERP_MODE_FLAT:
3495 default:
3496 return NULL;
3497 case INTERP_MODE_SMOOTH:
3498 case INTERP_MODE_NONE:
3499 if (location == INTERP_CENTER)
3500 return ac_get_arg(&ctx->ac, ctx->args->persp_center);
3501 else if (location == INTERP_CENTROID)
3502 return ctx->abi->persp_centroid;
3503 else if (location == INTERP_SAMPLE)
3504 return ac_get_arg(&ctx->ac, ctx->args->persp_sample);
3505 break;
3506 case INTERP_MODE_NOPERSPECTIVE:
3507 if (location == INTERP_CENTER)
3508 return ac_get_arg(&ctx->ac, ctx->args->linear_center);
3509 else if (location == INTERP_CENTROID)
3510 return ctx->abi->linear_centroid;
3511 else if (location == INTERP_SAMPLE)
3512 return ac_get_arg(&ctx->ac, ctx->args->linear_sample);
3513 break;
3514 }
3515 return NULL;
3516 }
3517
3518 static LLVMValueRef barycentric_center(struct ac_nir_context *ctx,
3519 unsigned mode)
3520 {
3521 LLVMValueRef interp_param = lookup_interp_param(ctx, mode, INTERP_CENTER);
3522 return LLVMBuildBitCast(ctx->ac.builder, interp_param, ctx->ac.v2i32, "");
3523 }
3524
3525 static LLVMValueRef barycentric_offset(struct ac_nir_context *ctx,
3526 unsigned mode,
3527 LLVMValueRef offset)
3528 {
3529 LLVMValueRef interp_param = lookup_interp_param(ctx, mode, INTERP_CENTER);
3530 LLVMValueRef src_c0 = ac_to_float(&ctx->ac, LLVMBuildExtractElement(ctx->ac.builder, offset, ctx->ac.i32_0, ""));
3531 LLVMValueRef src_c1 = ac_to_float(&ctx->ac, LLVMBuildExtractElement(ctx->ac.builder, offset, ctx->ac.i32_1, ""));
3532
3533 LLVMValueRef ij_out[2];
3534 LLVMValueRef ddxy_out = ac_build_ddxy_interp(&ctx->ac, interp_param);
3535
3536 /*
3537 * take the I then J parameters, and the DDX/Y for it, and
3538 * calculate the IJ inputs for the interpolator.
3539 * temp1 = ddx * offset/sample.x + I;
3540 * interp_param.I = ddy * offset/sample.y + temp1;
3541 * temp1 = ddx * offset/sample.x + J;
3542 * interp_param.J = ddy * offset/sample.y + temp1;
3543 */
3544 for (unsigned i = 0; i < 2; i++) {
3545 LLVMValueRef ix_ll = LLVMConstInt(ctx->ac.i32, i, false);
3546 LLVMValueRef iy_ll = LLVMConstInt(ctx->ac.i32, i + 2, false);
3547 LLVMValueRef ddx_el = LLVMBuildExtractElement(ctx->ac.builder,
3548 ddxy_out, ix_ll, "");
3549 LLVMValueRef ddy_el = LLVMBuildExtractElement(ctx->ac.builder,
3550 ddxy_out, iy_ll, "");
3551 LLVMValueRef interp_el = LLVMBuildExtractElement(ctx->ac.builder,
3552 interp_param, ix_ll, "");
3553 LLVMValueRef temp1, temp2;
3554
3555 interp_el = LLVMBuildBitCast(ctx->ac.builder, interp_el,
3556 ctx->ac.f32, "");
3557
3558 temp1 = ac_build_fmad(&ctx->ac, ddx_el, src_c0, interp_el);
3559 temp2 = ac_build_fmad(&ctx->ac, ddy_el, src_c1, temp1);
3560
3561 ij_out[i] = LLVMBuildBitCast(ctx->ac.builder,
3562 temp2, ctx->ac.i32, "");
3563 }
3564 interp_param = ac_build_gather_values(&ctx->ac, ij_out, 2);
3565 return LLVMBuildBitCast(ctx->ac.builder, interp_param, ctx->ac.v2i32, "");
3566 }
3567
3568 static LLVMValueRef barycentric_centroid(struct ac_nir_context *ctx,
3569 unsigned mode)
3570 {
3571 LLVMValueRef interp_param = lookup_interp_param(ctx, mode, INTERP_CENTROID);
3572 return LLVMBuildBitCast(ctx->ac.builder, interp_param, ctx->ac.v2i32, "");
3573 }
3574
3575 static LLVMValueRef barycentric_at_sample(struct ac_nir_context *ctx,
3576 unsigned mode,
3577 LLVMValueRef sample_id)
3578 {
3579 if (ctx->abi->interp_at_sample_force_center)
3580 return barycentric_center(ctx, mode);
3581
3582 LLVMValueRef halfval = LLVMConstReal(ctx->ac.f32, 0.5f);
3583
3584 /* fetch sample ID */
3585 LLVMValueRef sample_pos = ctx->abi->load_sample_position(ctx->abi, sample_id);
3586
3587 LLVMValueRef src_c0 = LLVMBuildExtractElement(ctx->ac.builder, sample_pos, ctx->ac.i32_0, "");
3588 src_c0 = LLVMBuildFSub(ctx->ac.builder, src_c0, halfval, "");
3589 LLVMValueRef src_c1 = LLVMBuildExtractElement(ctx->ac.builder, sample_pos, ctx->ac.i32_1, "");
3590 src_c1 = LLVMBuildFSub(ctx->ac.builder, src_c1, halfval, "");
3591 LLVMValueRef coords[] = { src_c0, src_c1 };
3592 LLVMValueRef offset = ac_build_gather_values(&ctx->ac, coords, 2);
3593
3594 return barycentric_offset(ctx, mode, offset);
3595 }
3596
3597
3598 static LLVMValueRef barycentric_sample(struct ac_nir_context *ctx,
3599 unsigned mode)
3600 {
3601 LLVMValueRef interp_param = lookup_interp_param(ctx, mode, INTERP_SAMPLE);
3602 return LLVMBuildBitCast(ctx->ac.builder, interp_param, ctx->ac.v2i32, "");
3603 }
3604
3605 static LLVMValueRef barycentric_model(struct ac_nir_context *ctx)
3606 {
3607 return LLVMBuildBitCast(ctx->ac.builder,
3608 ac_get_arg(&ctx->ac, ctx->args->pull_model),
3609 ctx->ac.v3i32, "");
3610 }
3611
3612 static LLVMValueRef load_interpolated_input(struct ac_nir_context *ctx,
3613 LLVMValueRef interp_param,
3614 unsigned index, unsigned comp_start,
3615 unsigned num_components,
3616 unsigned bitsize)
3617 {
3618 LLVMValueRef attr_number = LLVMConstInt(ctx->ac.i32, index, false);
3619 LLVMValueRef interp_param_f;
3620
3621 interp_param_f = LLVMBuildBitCast(ctx->ac.builder,
3622 interp_param, ctx->ac.v2f32, "");
3623 LLVMValueRef i = LLVMBuildExtractElement(
3624 ctx->ac.builder, interp_param_f, ctx->ac.i32_0, "");
3625 LLVMValueRef j = LLVMBuildExtractElement(
3626 ctx->ac.builder, interp_param_f, ctx->ac.i32_1, "");
3627
3628 /* Workaround for issue 2647: kill threads with infinite interpolation coeffs */
3629 if (ctx->verified_interp &&
3630 !_mesa_hash_table_search(ctx->verified_interp, interp_param)) {
3631 LLVMValueRef args[2];
3632 args[0] = i;
3633 args[1] = LLVMConstInt(ctx->ac.i32, S_NAN | Q_NAN | N_INFINITY | P_INFINITY, false);
3634 LLVMValueRef cond = ac_build_intrinsic(&ctx->ac, "llvm.amdgcn.class.f32", ctx->ac.i1,
3635 args, 2, AC_FUNC_ATTR_READNONE);
3636 ac_build_kill_if_false(&ctx->ac, LLVMBuildNot(ctx->ac.builder, cond, ""));
3637 _mesa_hash_table_insert(ctx->verified_interp, interp_param, interp_param);
3638 }
3639
3640 LLVMValueRef values[4];
3641 assert(bitsize == 16 || bitsize == 32);
3642 for (unsigned comp = 0; comp < num_components; comp++) {
3643 LLVMValueRef llvm_chan = LLVMConstInt(ctx->ac.i32, comp_start + comp, false);
3644 if (bitsize == 16) {
3645 values[comp] = ac_build_fs_interp_f16(&ctx->ac, llvm_chan, attr_number,
3646 ac_get_arg(&ctx->ac, ctx->args->prim_mask), i, j);
3647 } else {
3648 values[comp] = ac_build_fs_interp(&ctx->ac, llvm_chan, attr_number,
3649 ac_get_arg(&ctx->ac, ctx->args->prim_mask), i, j);
3650 }
3651 }
3652
3653 return ac_to_integer(&ctx->ac, ac_build_gather_values(&ctx->ac, values, num_components));
3654 }
3655
3656 static LLVMValueRef visit_load(struct ac_nir_context *ctx,
3657 nir_intrinsic_instr *instr, bool is_output)
3658 {
3659 LLVMValueRef values[8];
3660 LLVMTypeRef dest_type = get_def_type(ctx, &instr->dest.ssa);
3661 LLVMTypeRef component_type;
3662 unsigned base = nir_intrinsic_base(instr);
3663 unsigned component = nir_intrinsic_component(instr);
3664 unsigned count = instr->dest.ssa.num_components *
3665 (instr->dest.ssa.bit_size == 64 ? 2 : 1);
3666 nir_src *vertex_index_src = nir_get_io_vertex_index_src(instr);
3667 LLVMValueRef vertex_index =
3668 vertex_index_src ? get_src(ctx, *vertex_index_src) : NULL;
3669 nir_src offset = *nir_get_io_offset_src(instr);
3670 LLVMValueRef indir_index = NULL;
3671
3672 if (LLVMGetTypeKind(dest_type) == LLVMVectorTypeKind)
3673 component_type = LLVMGetElementType(dest_type);
3674 else
3675 component_type = dest_type;
3676
3677 if (nir_src_is_const(offset))
3678 assert(nir_src_as_uint(offset) == 0);
3679 else
3680 indir_index = get_src(ctx, offset);
3681
3682 if (ctx->stage == MESA_SHADER_TESS_CTRL ||
3683 (ctx->stage == MESA_SHADER_TESS_EVAL && !is_output)) {
3684 LLVMValueRef result =
3685 ctx->abi->load_tess_varyings(ctx->abi, component_type,
3686 vertex_index, indir_index,
3687 0, 0, base * 4,
3688 component,
3689 instr->num_components,
3690 false, false, !is_output);
3691 if (instr->dest.ssa.bit_size == 16) {
3692 result = ac_to_integer(&ctx->ac, result);
3693 result = LLVMBuildTrunc(ctx->ac.builder, result, dest_type, "");
3694 }
3695 return LLVMBuildBitCast(ctx->ac.builder, result, dest_type, "");
3696 }
3697
3698 /* No indirect indexing is allowed after this point. */
3699 assert(!indir_index);
3700
3701 if (ctx->stage == MESA_SHADER_GEOMETRY) {
3702 LLVMTypeRef type = LLVMIntTypeInContext(ctx->ac.context, instr->dest.ssa.bit_size);
3703 assert(nir_src_is_const(*vertex_index_src));
3704
3705 return ctx->abi->load_inputs(ctx->abi, 0, base * 4, component,
3706 instr->num_components,
3707 nir_src_as_uint(*vertex_index_src),
3708 0, type);
3709 }
3710
3711 if (ctx->stage == MESA_SHADER_FRAGMENT && is_output &&
3712 nir_intrinsic_io_semantics(instr).fb_fetch_output)
3713 return ctx->abi->emit_fbfetch(ctx->abi);
3714
3715 /* Other non-fragment cases have inputs and outputs in temporaries. */
3716 if (ctx->stage != MESA_SHADER_FRAGMENT) {
3717 for (unsigned chan = component; chan < count + component; chan++) {
3718 if (is_output) {
3719 values[chan] = LLVMBuildLoad(ctx->ac.builder,
3720 ctx->abi->outputs[base * 4 + chan], "");
3721 } else {
3722 values[chan] = ctx->abi->inputs[base * 4 + chan];
3723 if (!values[chan])
3724 values[chan] = LLVMGetUndef(ctx->ac.i32);
3725 }
3726 }
3727 LLVMValueRef result = ac_build_varying_gather_values(&ctx->ac, values, count, component);
3728 return LLVMBuildBitCast(ctx->ac.builder, result, dest_type, "");
3729 }
3730
3731 /* Fragment shader inputs. */
3732 unsigned vertex_id = 2; /* P0 */
3733
3734 if (instr->intrinsic == nir_intrinsic_load_input_vertex) {
3735 nir_const_value *src0 = nir_src_as_const_value(instr->src[0]);
3736
3737 switch (src0[0].i32) {
3738 case 0:
3739 vertex_id = 2;
3740 break;
3741 case 1:
3742 vertex_id = 0;
3743 break;
3744 case 2:
3745 vertex_id = 1;
3746 break;
3747 default:
3748 unreachable("Invalid vertex index");
3749 }
3750 }
3751
3752 LLVMValueRef attr_number = LLVMConstInt(ctx->ac.i32, base, false);
3753
3754 for (unsigned chan = 0; chan < count; chan++) {
3755 if (component + chan > 4)
3756 attr_number = LLVMConstInt(ctx->ac.i32, base + 1, false);
3757 LLVMValueRef llvm_chan = LLVMConstInt(ctx->ac.i32, (component + chan) % 4, false);
3758 values[chan] = ac_build_fs_interp_mov(&ctx->ac,
3759 LLVMConstInt(ctx->ac.i32, vertex_id, false),
3760 llvm_chan,
3761 attr_number,
3762 ac_get_arg(&ctx->ac, ctx->args->prim_mask));
3763 values[chan] = LLVMBuildBitCast(ctx->ac.builder, values[chan], ctx->ac.i32, "");
3764 values[chan] = LLVMBuildTruncOrBitCast(ctx->ac.builder, values[chan],
3765 instr->dest.ssa.bit_size == 16 ? ctx->ac.i16
3766 : ctx->ac.i32, "");
3767 }
3768
3769 LLVMValueRef result = ac_build_gather_values(&ctx->ac, values, count);
3770 return LLVMBuildBitCast(ctx->ac.builder, result, dest_type, "");
3771 }
3772
3773 static void visit_intrinsic(struct ac_nir_context *ctx,
3774 nir_intrinsic_instr *instr)
3775 {
3776 LLVMValueRef result = NULL;
3777
3778 switch (instr->intrinsic) {
3779 case nir_intrinsic_ballot:
3780 result = ac_build_ballot(&ctx->ac, get_src(ctx, instr->src[0]));
3781 if (ctx->ac.ballot_mask_bits > ctx->ac.wave_size)
3782 result = LLVMBuildZExt(ctx->ac.builder, result, ctx->ac.iN_ballotmask, "");
3783 break;
3784 case nir_intrinsic_read_invocation:
3785 result = ac_build_readlane(&ctx->ac, get_src(ctx, instr->src[0]),
3786 get_src(ctx, instr->src[1]));
3787 break;
3788 case nir_intrinsic_read_first_invocation:
3789 result = ac_build_readlane(&ctx->ac, get_src(ctx, instr->src[0]), NULL);
3790 break;
3791 case nir_intrinsic_load_subgroup_invocation:
3792 result = ac_get_thread_id(&ctx->ac);
3793 break;
3794 case nir_intrinsic_load_work_group_id: {
3795 LLVMValueRef values[3];
3796
3797 for (int i = 0; i < 3; i++) {
3798 values[i] = ctx->args->workgroup_ids[i].used ?
3799 ac_get_arg(&ctx->ac, ctx->args->workgroup_ids[i]) : ctx->ac.i32_0;
3800 }
3801
3802 result = ac_build_gather_values(&ctx->ac, values, 3);
3803 break;
3804 }
3805 case nir_intrinsic_load_base_vertex:
3806 case nir_intrinsic_load_first_vertex:
3807 result = ctx->abi->load_base_vertex(ctx->abi);
3808 break;
3809 case nir_intrinsic_load_local_group_size:
3810 result = ctx->abi->load_local_group_size(ctx->abi);
3811 break;
3812 case nir_intrinsic_load_vertex_id:
3813 result = LLVMBuildAdd(ctx->ac.builder,
3814 ac_get_arg(&ctx->ac, ctx->args->vertex_id),
3815 ac_get_arg(&ctx->ac, ctx->args->base_vertex), "");
3816 break;
3817 case nir_intrinsic_load_vertex_id_zero_base: {
3818 result = ctx->abi->vertex_id;
3819 break;
3820 }
3821 case nir_intrinsic_load_local_invocation_id: {
3822 result = ac_get_arg(&ctx->ac, ctx->args->local_invocation_ids);
3823 break;
3824 }
3825 case nir_intrinsic_load_base_instance:
3826 result = ac_get_arg(&ctx->ac, ctx->args->start_instance);
3827 break;
3828 case nir_intrinsic_load_draw_id:
3829 result = ac_get_arg(&ctx->ac, ctx->args->draw_id);
3830 break;
3831 case nir_intrinsic_load_view_index:
3832 result = ac_get_arg(&ctx->ac, ctx->args->view_index);
3833 break;
3834 case nir_intrinsic_load_invocation_id:
3835 if (ctx->stage == MESA_SHADER_TESS_CTRL) {
3836 result = ac_unpack_param(&ctx->ac,
3837 ac_get_arg(&ctx->ac, ctx->args->tcs_rel_ids),
3838 8, 5);
3839 } else {
3840 if (ctx->ac.chip_class >= GFX10) {
3841 result = LLVMBuildAnd(ctx->ac.builder,
3842 ac_get_arg(&ctx->ac, ctx->args->gs_invocation_id),
3843 LLVMConstInt(ctx->ac.i32, 127, 0), "");
3844 } else {
3845 result = ac_get_arg(&ctx->ac, ctx->args->gs_invocation_id);
3846 }
3847 }
3848 break;
3849 case nir_intrinsic_load_primitive_id:
3850 if (ctx->stage == MESA_SHADER_GEOMETRY) {
3851 result = ac_get_arg(&ctx->ac, ctx->args->gs_prim_id);
3852 } else if (ctx->stage == MESA_SHADER_TESS_CTRL) {
3853 result = ac_get_arg(&ctx->ac, ctx->args->tcs_patch_id);
3854 } else if (ctx->stage == MESA_SHADER_TESS_EVAL) {
3855 result = ac_get_arg(&ctx->ac, ctx->args->tes_patch_id);
3856 } else
3857 fprintf(stderr, "Unknown primitive id intrinsic: %d", ctx->stage);
3858 break;
3859 case nir_intrinsic_load_sample_id:
3860 result = ac_unpack_param(&ctx->ac,
3861 ac_get_arg(&ctx->ac, ctx->args->ancillary),
3862 8, 4);
3863 break;
3864 case nir_intrinsic_load_sample_pos:
3865 result = load_sample_pos(ctx);
3866 break;
3867 case nir_intrinsic_load_sample_mask_in:
3868 result = ctx->abi->load_sample_mask_in(ctx->abi);
3869 break;
3870 case nir_intrinsic_load_frag_coord: {
3871 LLVMValueRef values[4] = {
3872 ac_get_arg(&ctx->ac, ctx->args->frag_pos[0]),
3873 ac_get_arg(&ctx->ac, ctx->args->frag_pos[1]),
3874 ac_get_arg(&ctx->ac, ctx->args->frag_pos[2]),
3875 ac_build_fdiv(&ctx->ac, ctx->ac.f32_1,
3876 ac_get_arg(&ctx->ac, ctx->args->frag_pos[3]))
3877 };
3878 result = ac_to_integer(&ctx->ac,
3879 ac_build_gather_values(&ctx->ac, values, 4));
3880 break;
3881 }
3882 case nir_intrinsic_load_layer_id:
3883 result = ctx->abi->inputs[ac_llvm_reg_index_soa(VARYING_SLOT_LAYER, 0)];
3884 break;
3885 case nir_intrinsic_load_front_face:
3886 result = ac_get_arg(&ctx->ac, ctx->args->front_face);
3887 break;
3888 case nir_intrinsic_load_helper_invocation:
3889 result = ac_build_load_helper_invocation(&ctx->ac);
3890 break;
3891 case nir_intrinsic_is_helper_invocation:
3892 result = ac_build_is_helper_invocation(&ctx->ac);
3893 break;
3894 case nir_intrinsic_load_color0:
3895 result = ctx->abi->color0;
3896 break;
3897 case nir_intrinsic_load_color1:
3898 result = ctx->abi->color1;
3899 break;
3900 case nir_intrinsic_load_user_data_amd:
3901 assert(LLVMTypeOf(ctx->abi->user_data) == ctx->ac.v4i32);
3902 result = ctx->abi->user_data;
3903 break;
3904 case nir_intrinsic_load_instance_id:
3905 result = ctx->abi->instance_id;
3906 break;
3907 case nir_intrinsic_load_num_work_groups:
3908 result = ac_get_arg(&ctx->ac, ctx->args->num_work_groups);
3909 break;
3910 case nir_intrinsic_load_local_invocation_index:
3911 result = visit_load_local_invocation_index(ctx);
3912 break;
3913 case nir_intrinsic_load_subgroup_id:
3914 result = visit_load_subgroup_id(ctx);
3915 break;
3916 case nir_intrinsic_load_num_subgroups:
3917 result = visit_load_num_subgroups(ctx);
3918 break;
3919 case nir_intrinsic_first_invocation:
3920 result = visit_first_invocation(ctx);
3921 break;
3922 case nir_intrinsic_load_push_constant:
3923 result = visit_load_push_constant(ctx, instr);
3924 break;
3925 case nir_intrinsic_vulkan_resource_index: {
3926 LLVMValueRef index = get_src(ctx, instr->src[0]);
3927 unsigned desc_set = nir_intrinsic_desc_set(instr);
3928 unsigned binding = nir_intrinsic_binding(instr);
3929
3930 result = ctx->abi->load_resource(ctx->abi, index, desc_set,
3931 binding);
3932 break;
3933 }
3934 case nir_intrinsic_vulkan_resource_reindex:
3935 result = visit_vulkan_resource_reindex(ctx, instr);
3936 break;
3937 case nir_intrinsic_store_ssbo:
3938 visit_store_ssbo(ctx, instr);
3939 break;
3940 case nir_intrinsic_load_ssbo:
3941 result = visit_load_buffer(ctx, instr);
3942 break;
3943 case nir_intrinsic_ssbo_atomic_add:
3944 case nir_intrinsic_ssbo_atomic_imin:
3945 case nir_intrinsic_ssbo_atomic_umin:
3946 case nir_intrinsic_ssbo_atomic_imax:
3947 case nir_intrinsic_ssbo_atomic_umax:
3948 case nir_intrinsic_ssbo_atomic_and:
3949 case nir_intrinsic_ssbo_atomic_or:
3950 case nir_intrinsic_ssbo_atomic_xor:
3951 case nir_intrinsic_ssbo_atomic_exchange:
3952 case nir_intrinsic_ssbo_atomic_comp_swap:
3953 result = visit_atomic_ssbo(ctx, instr);
3954 break;
3955 case nir_intrinsic_load_ubo:
3956 result = visit_load_ubo_buffer(ctx, instr);
3957 break;
3958 case nir_intrinsic_get_buffer_size:
3959 result = visit_get_buffer_size(ctx, instr);
3960 break;
3961 case nir_intrinsic_load_deref:
3962 result = visit_load_var(ctx, instr);
3963 break;
3964 case nir_intrinsic_store_deref:
3965 visit_store_var(ctx, instr);
3966 break;
3967 case nir_intrinsic_load_input:
3968 case nir_intrinsic_load_input_vertex:
3969 case nir_intrinsic_load_per_vertex_input:
3970 result = visit_load(ctx, instr, false);
3971 break;
3972 case nir_intrinsic_load_output:
3973 case nir_intrinsic_load_per_vertex_output:
3974 result = visit_load(ctx, instr, true);
3975 break;
3976 case nir_intrinsic_store_output:
3977 case nir_intrinsic_store_per_vertex_output:
3978 visit_store_output(ctx, instr);
3979 break;
3980 case nir_intrinsic_load_shared:
3981 result = visit_load_shared(ctx, instr);
3982 break;
3983 case nir_intrinsic_store_shared:
3984 visit_store_shared(ctx, instr);
3985 break;
3986 case nir_intrinsic_bindless_image_samples:
3987 case nir_intrinsic_image_deref_samples:
3988 result = visit_image_samples(ctx, instr);
3989 break;
3990 case nir_intrinsic_bindless_image_load:
3991 result = visit_image_load(ctx, instr, true);
3992 break;
3993 case nir_intrinsic_image_deref_load:
3994 result = visit_image_load(ctx, instr, false);
3995 break;
3996 case nir_intrinsic_bindless_image_store:
3997 visit_image_store(ctx, instr, true);
3998 break;
3999 case nir_intrinsic_image_deref_store:
4000 visit_image_store(ctx, instr, false);
4001 break;
4002 case nir_intrinsic_bindless_image_atomic_add:
4003 case nir_intrinsic_bindless_image_atomic_imin:
4004 case nir_intrinsic_bindless_image_atomic_umin:
4005 case nir_intrinsic_bindless_image_atomic_imax:
4006 case nir_intrinsic_bindless_image_atomic_umax:
4007 case nir_intrinsic_bindless_image_atomic_and:
4008 case nir_intrinsic_bindless_image_atomic_or:
4009 case nir_intrinsic_bindless_image_atomic_xor:
4010 case nir_intrinsic_bindless_image_atomic_exchange:
4011 case nir_intrinsic_bindless_image_atomic_comp_swap:
4012 case nir_intrinsic_bindless_image_atomic_inc_wrap:
4013 case nir_intrinsic_bindless_image_atomic_dec_wrap:
4014 result = visit_image_atomic(ctx, instr, true);
4015 break;
4016 case nir_intrinsic_image_deref_atomic_add:
4017 case nir_intrinsic_image_deref_atomic_imin:
4018 case nir_intrinsic_image_deref_atomic_umin:
4019 case nir_intrinsic_image_deref_atomic_imax:
4020 case nir_intrinsic_image_deref_atomic_umax:
4021 case nir_intrinsic_image_deref_atomic_and:
4022 case nir_intrinsic_image_deref_atomic_or:
4023 case nir_intrinsic_image_deref_atomic_xor:
4024 case nir_intrinsic_image_deref_atomic_exchange:
4025 case nir_intrinsic_image_deref_atomic_comp_swap:
4026 case nir_intrinsic_image_deref_atomic_inc_wrap:
4027 case nir_intrinsic_image_deref_atomic_dec_wrap:
4028 result = visit_image_atomic(ctx, instr, false);
4029 break;
4030 case nir_intrinsic_bindless_image_size:
4031 result = visit_image_size(ctx, instr, true);
4032 break;
4033 case nir_intrinsic_image_deref_size:
4034 result = visit_image_size(ctx, instr, false);
4035 break;
4036 case nir_intrinsic_shader_clock:
4037 result = ac_build_shader_clock(&ctx->ac,
4038 nir_intrinsic_memory_scope(instr));
4039 break;
4040 case nir_intrinsic_discard:
4041 case nir_intrinsic_discard_if:
4042 emit_discard(ctx, instr);
4043 break;
4044 case nir_intrinsic_demote:
4045 case nir_intrinsic_demote_if:
4046 emit_demote(ctx, instr);
4047 break;
4048 case nir_intrinsic_memory_barrier:
4049 case nir_intrinsic_group_memory_barrier:
4050 case nir_intrinsic_memory_barrier_buffer:
4051 case nir_intrinsic_memory_barrier_image:
4052 case nir_intrinsic_memory_barrier_shared:
4053 emit_membar(&ctx->ac, instr);
4054 break;
4055 case nir_intrinsic_scoped_barrier: {
4056 assert(!(nir_intrinsic_memory_semantics(instr) &
4057 (NIR_MEMORY_MAKE_AVAILABLE | NIR_MEMORY_MAKE_VISIBLE)));
4058
4059 nir_variable_mode modes = nir_intrinsic_memory_modes(instr);
4060
4061 unsigned wait_flags = 0;
4062 if (modes & (nir_var_mem_global | nir_var_mem_ssbo))
4063 wait_flags |= AC_WAIT_VLOAD | AC_WAIT_VSTORE;
4064 if (modes & nir_var_mem_shared)
4065 wait_flags |= AC_WAIT_LGKM;
4066
4067 if (wait_flags)
4068 ac_build_waitcnt(&ctx->ac, wait_flags);
4069
4070 if (nir_intrinsic_execution_scope(instr) == NIR_SCOPE_WORKGROUP)
4071 ac_emit_barrier(&ctx->ac, ctx->stage);
4072 break;
4073 }
4074 case nir_intrinsic_memory_barrier_tcs_patch:
4075 break;
4076 case nir_intrinsic_control_barrier:
4077 ac_emit_barrier(&ctx->ac, ctx->stage);
4078 break;
4079 case nir_intrinsic_shared_atomic_add:
4080 case nir_intrinsic_shared_atomic_imin:
4081 case nir_intrinsic_shared_atomic_umin:
4082 case nir_intrinsic_shared_atomic_imax:
4083 case nir_intrinsic_shared_atomic_umax:
4084 case nir_intrinsic_shared_atomic_and:
4085 case nir_intrinsic_shared_atomic_or:
4086 case nir_intrinsic_shared_atomic_xor:
4087 case nir_intrinsic_shared_atomic_exchange:
4088 case nir_intrinsic_shared_atomic_comp_swap:
4089 case nir_intrinsic_shared_atomic_fadd: {
4090 LLVMValueRef ptr = get_memory_ptr(ctx, instr->src[0],
4091 instr->src[1].ssa->bit_size);
4092 result = visit_var_atomic(ctx, instr, ptr, 1);
4093 break;
4094 }
4095 case nir_intrinsic_deref_atomic_add:
4096 case nir_intrinsic_deref_atomic_imin:
4097 case nir_intrinsic_deref_atomic_umin:
4098 case nir_intrinsic_deref_atomic_imax:
4099 case nir_intrinsic_deref_atomic_umax:
4100 case nir_intrinsic_deref_atomic_and:
4101 case nir_intrinsic_deref_atomic_or:
4102 case nir_intrinsic_deref_atomic_xor:
4103 case nir_intrinsic_deref_atomic_exchange:
4104 case nir_intrinsic_deref_atomic_comp_swap:
4105 case nir_intrinsic_deref_atomic_fadd: {
4106 LLVMValueRef ptr = get_src(ctx, instr->src[0]);
4107 result = visit_var_atomic(ctx, instr, ptr, 1);
4108 break;
4109 }
4110 case nir_intrinsic_load_barycentric_pixel:
4111 result = barycentric_center(ctx, nir_intrinsic_interp_mode(instr));
4112 break;
4113 case nir_intrinsic_load_barycentric_centroid:
4114 result = barycentric_centroid(ctx, nir_intrinsic_interp_mode(instr));
4115 break;
4116 case nir_intrinsic_load_barycentric_sample:
4117 result = barycentric_sample(ctx, nir_intrinsic_interp_mode(instr));
4118 break;
4119 case nir_intrinsic_load_barycentric_model:
4120 result = barycentric_model(ctx);
4121 break;
4122 case nir_intrinsic_load_barycentric_at_offset: {
4123 LLVMValueRef offset = ac_to_float(&ctx->ac, get_src(ctx, instr->src[0]));
4124 result = barycentric_offset(ctx, nir_intrinsic_interp_mode(instr), offset);
4125 break;
4126 }
4127 case nir_intrinsic_load_barycentric_at_sample: {
4128 LLVMValueRef sample_id = get_src(ctx, instr->src[0]);
4129 result = barycentric_at_sample(ctx, nir_intrinsic_interp_mode(instr), sample_id);
4130 break;
4131 }
4132 case nir_intrinsic_load_interpolated_input: {
4133 /* We assume any indirect loads have been lowered away */
4134 ASSERTED nir_const_value *offset = nir_src_as_const_value(instr->src[1]);
4135 assert(offset);
4136 assert(offset[0].i32 == 0);
4137
4138 LLVMValueRef interp_param = get_src(ctx, instr->src[0]);
4139 unsigned index = nir_intrinsic_base(instr);
4140 unsigned component = nir_intrinsic_component(instr);
4141 result = load_interpolated_input(ctx, interp_param, index,
4142 component,
4143 instr->dest.ssa.num_components,
4144 instr->dest.ssa.bit_size);
4145 break;
4146 }
4147 case nir_intrinsic_emit_vertex:
4148 ctx->abi->emit_vertex(ctx->abi, nir_intrinsic_stream_id(instr), ctx->abi->outputs);
4149 break;
4150 case nir_intrinsic_emit_vertex_with_counter: {
4151 unsigned stream = nir_intrinsic_stream_id(instr);
4152 LLVMValueRef next_vertex = get_src(ctx, instr->src[0]);
4153 ctx->abi->emit_vertex_with_counter(ctx->abi, stream,
4154 next_vertex,
4155 ctx->abi->outputs);
4156 break;
4157 }
4158 case nir_intrinsic_end_primitive:
4159 case nir_intrinsic_end_primitive_with_counter:
4160 ctx->abi->emit_primitive(ctx->abi, nir_intrinsic_stream_id(instr));
4161 break;
4162 case nir_intrinsic_load_tess_coord:
4163 result = ctx->abi->load_tess_coord(ctx->abi);
4164 break;
4165 case nir_intrinsic_load_tess_level_outer:
4166 result = ctx->abi->load_tess_level(ctx->abi, VARYING_SLOT_TESS_LEVEL_OUTER, false);
4167 break;
4168 case nir_intrinsic_load_tess_level_inner:
4169 result = ctx->abi->load_tess_level(ctx->abi, VARYING_SLOT_TESS_LEVEL_INNER, false);
4170 break;
4171 case nir_intrinsic_load_tess_level_outer_default:
4172 result = ctx->abi->load_tess_level(ctx->abi, VARYING_SLOT_TESS_LEVEL_OUTER, true);
4173 break;
4174 case nir_intrinsic_load_tess_level_inner_default:
4175 result = ctx->abi->load_tess_level(ctx->abi, VARYING_SLOT_TESS_LEVEL_INNER, true);
4176 break;
4177 case nir_intrinsic_load_patch_vertices_in:
4178 result = ctx->abi->load_patch_vertices_in(ctx->abi);
4179 break;
4180 case nir_intrinsic_vote_all: {
4181 LLVMValueRef tmp = ac_build_vote_all(&ctx->ac, get_src(ctx, instr->src[0]));
4182 result = LLVMBuildSExt(ctx->ac.builder, tmp, ctx->ac.i32, "");
4183 break;
4184 }
4185 case nir_intrinsic_vote_any: {
4186 LLVMValueRef tmp = ac_build_vote_any(&ctx->ac, get_src(ctx, instr->src[0]));
4187 result = LLVMBuildSExt(ctx->ac.builder, tmp, ctx->ac.i32, "");
4188 break;
4189 }
4190 case nir_intrinsic_shuffle:
4191 if (ctx->ac.chip_class == GFX8 ||
4192 ctx->ac.chip_class == GFX9 ||
4193 (ctx->ac.chip_class >= GFX10 && ctx->ac.wave_size == 32)) {
4194 result = ac_build_shuffle(&ctx->ac, get_src(ctx, instr->src[0]),
4195 get_src(ctx, instr->src[1]));
4196 } else {
4197 LLVMValueRef src = get_src(ctx, instr->src[0]);
4198 LLVMValueRef index = get_src(ctx, instr->src[1]);
4199 LLVMTypeRef type = LLVMTypeOf(src);
4200 struct waterfall_context wctx;
4201 LLVMValueRef index_val;
4202
4203 index_val = enter_waterfall(ctx, &wctx, index, true);
4204
4205 src = LLVMBuildZExt(ctx->ac.builder, src,
4206 ctx->ac.i32, "");
4207
4208 result = ac_build_intrinsic(&ctx->ac, "llvm.amdgcn.readlane",
4209 ctx->ac.i32,
4210 (LLVMValueRef []) { src, index_val }, 2,
4211 AC_FUNC_ATTR_READNONE |
4212 AC_FUNC_ATTR_CONVERGENT);
4213
4214 result = LLVMBuildTrunc(ctx->ac.builder, result, type, "");
4215
4216 result = exit_waterfall(ctx, &wctx, result);
4217 }
4218 break;
4219 case nir_intrinsic_reduce:
4220 result = ac_build_reduce(&ctx->ac,
4221 get_src(ctx, instr->src[0]),
4222 instr->const_index[0],
4223 instr->const_index[1]);
4224 break;
4225 case nir_intrinsic_inclusive_scan:
4226 result = ac_build_inclusive_scan(&ctx->ac,
4227 get_src(ctx, instr->src[0]),
4228 instr->const_index[0]);
4229 break;
4230 case nir_intrinsic_exclusive_scan:
4231 result = ac_build_exclusive_scan(&ctx->ac,
4232 get_src(ctx, instr->src[0]),
4233 instr->const_index[0]);
4234 break;
4235 case nir_intrinsic_quad_broadcast: {
4236 unsigned lane = nir_src_as_uint(instr->src[1]);
4237 result = ac_build_quad_swizzle(&ctx->ac, get_src(ctx, instr->src[0]),
4238 lane, lane, lane, lane);
4239 break;
4240 }
4241 case nir_intrinsic_quad_swap_horizontal:
4242 result = ac_build_quad_swizzle(&ctx->ac, get_src(ctx, instr->src[0]), 1, 0, 3 ,2);
4243 break;
4244 case nir_intrinsic_quad_swap_vertical:
4245 result = ac_build_quad_swizzle(&ctx->ac, get_src(ctx, instr->src[0]), 2, 3, 0 ,1);
4246 break;
4247 case nir_intrinsic_quad_swap_diagonal:
4248 result = ac_build_quad_swizzle(&ctx->ac, get_src(ctx, instr->src[0]), 3, 2, 1 ,0);
4249 break;
4250 case nir_intrinsic_quad_swizzle_amd: {
4251 uint32_t mask = nir_intrinsic_swizzle_mask(instr);
4252 result = ac_build_quad_swizzle(&ctx->ac, get_src(ctx, instr->src[0]),
4253 mask & 0x3, (mask >> 2) & 0x3,
4254 (mask >> 4) & 0x3, (mask >> 6) & 0x3);
4255 break;
4256 }
4257 case nir_intrinsic_masked_swizzle_amd: {
4258 uint32_t mask = nir_intrinsic_swizzle_mask(instr);
4259 result = ac_build_ds_swizzle(&ctx->ac, get_src(ctx, instr->src[0]), mask);
4260 break;
4261 }
4262 case nir_intrinsic_write_invocation_amd:
4263 result = ac_build_writelane(&ctx->ac, get_src(ctx, instr->src[0]),
4264 get_src(ctx, instr->src[1]),
4265 get_src(ctx, instr->src[2]));
4266 break;
4267 case nir_intrinsic_mbcnt_amd:
4268 result = ac_build_mbcnt(&ctx->ac, get_src(ctx, instr->src[0]));
4269 break;
4270 case nir_intrinsic_load_scratch: {
4271 LLVMValueRef offset = get_src(ctx, instr->src[0]);
4272 LLVMValueRef ptr = ac_build_gep0(&ctx->ac, ctx->scratch,
4273 offset);
4274 LLVMTypeRef comp_type =
4275 LLVMIntTypeInContext(ctx->ac.context, instr->dest.ssa.bit_size);
4276 LLVMTypeRef vec_type =
4277 instr->dest.ssa.num_components == 1 ? comp_type :
4278 LLVMVectorType(comp_type, instr->dest.ssa.num_components);
4279 unsigned addr_space = LLVMGetPointerAddressSpace(LLVMTypeOf(ptr));
4280 ptr = LLVMBuildBitCast(ctx->ac.builder, ptr,
4281 LLVMPointerType(vec_type, addr_space), "");
4282 result = LLVMBuildLoad(ctx->ac.builder, ptr, "");
4283 break;
4284 }
4285 case nir_intrinsic_store_scratch: {
4286 LLVMValueRef offset = get_src(ctx, instr->src[1]);
4287 LLVMValueRef ptr = ac_build_gep0(&ctx->ac, ctx->scratch,
4288 offset);
4289 LLVMTypeRef comp_type =
4290 LLVMIntTypeInContext(ctx->ac.context, instr->src[0].ssa->bit_size);
4291 unsigned addr_space = LLVMGetPointerAddressSpace(LLVMTypeOf(ptr));
4292 ptr = LLVMBuildBitCast(ctx->ac.builder, ptr,
4293 LLVMPointerType(comp_type, addr_space), "");
4294 LLVMValueRef src = get_src(ctx, instr->src[0]);
4295 unsigned wrmask = nir_intrinsic_write_mask(instr);
4296 while (wrmask) {
4297 int start, count;
4298 u_bit_scan_consecutive_range(&wrmask, &start, &count);
4299
4300 LLVMValueRef offset = LLVMConstInt(ctx->ac.i32, start, false);
4301 LLVMValueRef offset_ptr = LLVMBuildGEP(ctx->ac.builder, ptr, &offset, 1, "");
4302 LLVMTypeRef vec_type =
4303 count == 1 ? comp_type : LLVMVectorType(comp_type, count);
4304 offset_ptr = LLVMBuildBitCast(ctx->ac.builder,
4305 offset_ptr,
4306 LLVMPointerType(vec_type, addr_space),
4307 "");
4308 LLVMValueRef offset_src =
4309 ac_extract_components(&ctx->ac, src, start, count);
4310 LLVMBuildStore(ctx->ac.builder, offset_src, offset_ptr);
4311 }
4312 break;
4313 }
4314 case nir_intrinsic_load_constant: {
4315 unsigned base = nir_intrinsic_base(instr);
4316 unsigned range = nir_intrinsic_range(instr);
4317
4318 LLVMValueRef offset = get_src(ctx, instr->src[0]);
4319 offset = LLVMBuildAdd(ctx->ac.builder, offset,
4320 LLVMConstInt(ctx->ac.i32, base, false), "");
4321
4322 /* Clamp the offset to avoid out-of-bound access because global
4323 * instructions can't handle them.
4324 */
4325 LLVMValueRef size = LLVMConstInt(ctx->ac.i32, base + range, false);
4326 LLVMValueRef cond = LLVMBuildICmp(ctx->ac.builder, LLVMIntULT,
4327 offset, size, "");
4328 offset = LLVMBuildSelect(ctx->ac.builder, cond, offset, size, "");
4329
4330 LLVMValueRef ptr = ac_build_gep0(&ctx->ac, ctx->constant_data,
4331 offset);
4332 LLVMTypeRef comp_type =
4333 LLVMIntTypeInContext(ctx->ac.context, instr->dest.ssa.bit_size);
4334 LLVMTypeRef vec_type =
4335 instr->dest.ssa.num_components == 1 ? comp_type :
4336 LLVMVectorType(comp_type, instr->dest.ssa.num_components);
4337 unsigned addr_space = LLVMGetPointerAddressSpace(LLVMTypeOf(ptr));
4338 ptr = LLVMBuildBitCast(ctx->ac.builder, ptr,
4339 LLVMPointerType(vec_type, addr_space), "");
4340 result = LLVMBuildLoad(ctx->ac.builder, ptr, "");
4341 break;
4342 }
4343 default:
4344 fprintf(stderr, "Unknown intrinsic: ");
4345 nir_print_instr(&instr->instr, stderr);
4346 fprintf(stderr, "\n");
4347 break;
4348 }
4349 if (result) {
4350 ctx->ssa_defs[instr->dest.ssa.index] = result;
4351 }
4352 }
4353
4354 static LLVMValueRef get_bindless_index_from_uniform(struct ac_nir_context *ctx,
4355 unsigned base_index,
4356 unsigned constant_index,
4357 LLVMValueRef dynamic_index)
4358 {
4359 LLVMValueRef offset = LLVMConstInt(ctx->ac.i32, base_index * 4, 0);
4360 LLVMValueRef index = LLVMBuildAdd(ctx->ac.builder, dynamic_index,
4361 LLVMConstInt(ctx->ac.i32, constant_index, 0), "");
4362
4363 /* Bindless uniforms are 64bit so multiple index by 8 */
4364 index = LLVMBuildMul(ctx->ac.builder, index, LLVMConstInt(ctx->ac.i32, 8, 0), "");
4365 offset = LLVMBuildAdd(ctx->ac.builder, offset, index, "");
4366
4367 LLVMValueRef ubo_index = ctx->abi->load_ubo(ctx->abi, ctx->ac.i32_0);
4368
4369 LLVMValueRef ret = ac_build_buffer_load(&ctx->ac, ubo_index, 1, NULL, offset,
4370 NULL, 0, 0, true, true);
4371
4372 return LLVMBuildBitCast(ctx->ac.builder, ret, ctx->ac.i32, "");
4373 }
4374
4375 struct sampler_desc_address {
4376 unsigned descriptor_set;
4377 unsigned base_index; /* binding in vulkan */
4378 unsigned constant_index;
4379 LLVMValueRef dynamic_index;
4380 bool image;
4381 bool bindless;
4382 };
4383
4384 static struct sampler_desc_address
4385 get_sampler_desc_internal(struct ac_nir_context *ctx,
4386 nir_deref_instr *deref_instr,
4387 const nir_instr *instr,
4388 bool image)
4389 {
4390 LLVMValueRef index = NULL;
4391 unsigned constant_index = 0;
4392 unsigned descriptor_set;
4393 unsigned base_index;
4394 bool bindless = false;
4395
4396 if (!deref_instr) {
4397 descriptor_set = 0;
4398 if (image) {
4399 nir_intrinsic_instr *img_instr = nir_instr_as_intrinsic(instr);
4400 base_index = 0;
4401 bindless = true;
4402 index = get_src(ctx, img_instr->src[0]);
4403 } else {
4404 nir_tex_instr *tex_instr = nir_instr_as_tex(instr);
4405 int sampSrcIdx = nir_tex_instr_src_index(tex_instr,
4406 nir_tex_src_sampler_handle);
4407 if (sampSrcIdx != -1) {
4408 base_index = 0;
4409 bindless = true;
4410 index = get_src(ctx, tex_instr->src[sampSrcIdx].src);
4411 } else {
4412 assert(tex_instr && !image);
4413 base_index = tex_instr->sampler_index;
4414 }
4415 }
4416 } else {
4417 while(deref_instr->deref_type != nir_deref_type_var) {
4418 if (deref_instr->deref_type == nir_deref_type_array) {
4419 unsigned array_size = glsl_get_aoa_size(deref_instr->type);
4420 if (!array_size)
4421 array_size = 1;
4422
4423 if (nir_src_is_const(deref_instr->arr.index)) {
4424 constant_index += array_size * nir_src_as_uint(deref_instr->arr.index);
4425 } else {
4426 LLVMValueRef indirect = get_src(ctx, deref_instr->arr.index);
4427
4428 indirect = LLVMBuildMul(ctx->ac.builder, indirect,
4429 LLVMConstInt(ctx->ac.i32, array_size, false), "");
4430
4431 if (!index)
4432 index = indirect;
4433 else
4434 index = LLVMBuildAdd(ctx->ac.builder, index, indirect, "");
4435 }
4436
4437 deref_instr = nir_src_as_deref(deref_instr->parent);
4438 } else if (deref_instr->deref_type == nir_deref_type_struct) {
4439 unsigned sidx = deref_instr->strct.index;
4440 deref_instr = nir_src_as_deref(deref_instr->parent);
4441 constant_index += glsl_get_struct_location_offset(deref_instr->type, sidx);
4442 } else {
4443 unreachable("Unsupported deref type");
4444 }
4445 }
4446 descriptor_set = deref_instr->var->data.descriptor_set;
4447
4448 if (deref_instr->var->data.bindless) {
4449 /* For now just assert on unhandled variable types */
4450 assert(deref_instr->var->data.mode == nir_var_uniform);
4451
4452 base_index = deref_instr->var->data.driver_location;
4453 bindless = true;
4454
4455 index = index ? index : ctx->ac.i32_0;
4456 index = get_bindless_index_from_uniform(ctx, base_index,
4457 constant_index, index);
4458 } else
4459 base_index = deref_instr->var->data.binding;
4460 }
4461 return (struct sampler_desc_address) {
4462 .descriptor_set = descriptor_set,
4463 .base_index = base_index,
4464 .constant_index = constant_index,
4465 .dynamic_index = index,
4466 .image = image,
4467 .bindless = bindless,
4468 };
4469 }
4470
4471 /* Extract any possibly divergent index into a separate value that can be fed
4472 * into get_sampler_desc with the same arguments. */
4473 static LLVMValueRef get_sampler_desc_index(struct ac_nir_context *ctx,
4474 nir_deref_instr *deref_instr,
4475 const nir_instr *instr,
4476 bool image)
4477 {
4478 struct sampler_desc_address addr = get_sampler_desc_internal(ctx, deref_instr, instr, image);
4479 return addr.dynamic_index;
4480 }
4481
4482 static LLVMValueRef get_sampler_desc(struct ac_nir_context *ctx,
4483 nir_deref_instr *deref_instr,
4484 enum ac_descriptor_type desc_type,
4485 const nir_instr *instr,
4486 LLVMValueRef index,
4487 bool image, bool write)
4488 {
4489 struct sampler_desc_address addr = get_sampler_desc_internal(ctx, deref_instr, instr, image);
4490 return ctx->abi->load_sampler_desc(ctx->abi,
4491 addr.descriptor_set,
4492 addr.base_index,
4493 addr.constant_index, index,
4494 desc_type, addr.image, write, addr.bindless);
4495 }
4496
4497 /* Disable anisotropic filtering if BASE_LEVEL == LAST_LEVEL.
4498 *
4499 * GFX6-GFX7:
4500 * If BASE_LEVEL == LAST_LEVEL, the shader must disable anisotropic
4501 * filtering manually. The driver sets img7 to a mask clearing
4502 * MAX_ANISO_RATIO if BASE_LEVEL == LAST_LEVEL. The shader must do:
4503 * s_and_b32 samp0, samp0, img7
4504 *
4505 * GFX8:
4506 * The ANISO_OVERRIDE sampler field enables this fix in TA.
4507 */
4508 static LLVMValueRef sici_fix_sampler_aniso(struct ac_nir_context *ctx,
4509 LLVMValueRef res, LLVMValueRef samp)
4510 {
4511 LLVMBuilderRef builder = ctx->ac.builder;
4512 LLVMValueRef img7, samp0;
4513
4514 if (ctx->ac.chip_class >= GFX8)
4515 return samp;
4516
4517 img7 = LLVMBuildExtractElement(builder, res,
4518 LLVMConstInt(ctx->ac.i32, 7, 0), "");
4519 samp0 = LLVMBuildExtractElement(builder, samp,
4520 LLVMConstInt(ctx->ac.i32, 0, 0), "");
4521 samp0 = LLVMBuildAnd(builder, samp0, img7, "");
4522 return LLVMBuildInsertElement(builder, samp, samp0,
4523 LLVMConstInt(ctx->ac.i32, 0, 0), "");
4524 }
4525
4526 static void tex_fetch_ptrs(struct ac_nir_context *ctx,
4527 nir_tex_instr *instr,
4528 struct waterfall_context *wctx,
4529 LLVMValueRef *res_ptr, LLVMValueRef *samp_ptr,
4530 LLVMValueRef *fmask_ptr)
4531 {
4532 nir_deref_instr *texture_deref_instr = NULL;
4533 nir_deref_instr *sampler_deref_instr = NULL;
4534 int plane = -1;
4535
4536 for (unsigned i = 0; i < instr->num_srcs; i++) {
4537 switch (instr->src[i].src_type) {
4538 case nir_tex_src_texture_deref:
4539 texture_deref_instr = nir_src_as_deref(instr->src[i].src);
4540 break;
4541 case nir_tex_src_sampler_deref:
4542 sampler_deref_instr = nir_src_as_deref(instr->src[i].src);
4543 break;
4544 case nir_tex_src_plane:
4545 plane = nir_src_as_int(instr->src[i].src);
4546 break;
4547 default:
4548 break;
4549 }
4550 }
4551
4552 LLVMValueRef texture_dynamic_index = get_sampler_desc_index(ctx, texture_deref_instr,
4553 &instr->instr, false);
4554 if (!sampler_deref_instr)
4555 sampler_deref_instr = texture_deref_instr;
4556
4557 LLVMValueRef sampler_dynamic_index = get_sampler_desc_index(ctx, sampler_deref_instr,
4558 &instr->instr, false);
4559 if (instr->texture_non_uniform)
4560 texture_dynamic_index = enter_waterfall(ctx, wctx + 0, texture_dynamic_index, true);
4561
4562 if (instr->sampler_non_uniform)
4563 sampler_dynamic_index = enter_waterfall(ctx, wctx + 1, sampler_dynamic_index, true);
4564
4565 enum ac_descriptor_type main_descriptor = instr->sampler_dim == GLSL_SAMPLER_DIM_BUF ? AC_DESC_BUFFER : AC_DESC_IMAGE;
4566
4567 if (plane >= 0) {
4568 assert(instr->op != nir_texop_txf_ms &&
4569 instr->op != nir_texop_samples_identical);
4570 assert(instr->sampler_dim != GLSL_SAMPLER_DIM_BUF);
4571
4572 main_descriptor = AC_DESC_PLANE_0 + plane;
4573 }
4574
4575 if (instr->op == nir_texop_fragment_mask_fetch) {
4576 /* The fragment mask is fetched from the compressed
4577 * multisampled surface.
4578 */
4579 main_descriptor = AC_DESC_FMASK;
4580 }
4581
4582 *res_ptr = get_sampler_desc(ctx, texture_deref_instr, main_descriptor, &instr->instr,
4583 texture_dynamic_index, false, false);
4584
4585 if (samp_ptr) {
4586 *samp_ptr = get_sampler_desc(ctx, sampler_deref_instr, AC_DESC_SAMPLER, &instr->instr,
4587 sampler_dynamic_index, false, false);
4588 if (instr->sampler_dim < GLSL_SAMPLER_DIM_RECT)
4589 *samp_ptr = sici_fix_sampler_aniso(ctx, *res_ptr, *samp_ptr);
4590 }
4591 if (fmask_ptr && (instr->op == nir_texop_txf_ms ||
4592 instr->op == nir_texop_samples_identical))
4593 *fmask_ptr = get_sampler_desc(ctx, texture_deref_instr, AC_DESC_FMASK,
4594 &instr->instr, texture_dynamic_index, false, false);
4595 }
4596
4597 static LLVMValueRef apply_round_slice(struct ac_llvm_context *ctx,
4598 LLVMValueRef coord)
4599 {
4600 coord = ac_to_float(ctx, coord);
4601 coord = ac_build_round(ctx, coord);
4602 coord = ac_to_integer(ctx, coord);
4603 return coord;
4604 }
4605
4606 static void visit_tex(struct ac_nir_context *ctx, nir_tex_instr *instr)
4607 {
4608 LLVMValueRef result = NULL;
4609 struct ac_image_args args = { 0 };
4610 LLVMValueRef fmask_ptr = NULL, sample_index = NULL;
4611 LLVMValueRef ddx = NULL, ddy = NULL;
4612 unsigned offset_src = 0;
4613 struct waterfall_context wctx[2] = {{{0}}};
4614
4615 tex_fetch_ptrs(ctx, instr, wctx, &args.resource, &args.sampler, &fmask_ptr);
4616
4617 for (unsigned i = 0; i < instr->num_srcs; i++) {
4618 switch (instr->src[i].src_type) {
4619 case nir_tex_src_coord: {
4620 LLVMValueRef coord = get_src(ctx, instr->src[i].src);
4621 for (unsigned chan = 0; chan < instr->coord_components; ++chan)
4622 args.coords[chan] = ac_llvm_extract_elem(&ctx->ac, coord, chan);
4623 break;
4624 }
4625 case nir_tex_src_projector:
4626 break;
4627 case nir_tex_src_comparator:
4628 if (instr->is_shadow) {
4629 args.compare = get_src(ctx, instr->src[i].src);
4630 args.compare = ac_to_float(&ctx->ac, args.compare);
4631 }
4632 break;
4633 case nir_tex_src_offset:
4634 args.offset = get_src(ctx, instr->src[i].src);
4635 offset_src = i;
4636 break;
4637 case nir_tex_src_bias:
4638 args.bias = get_src(ctx, instr->src[i].src);
4639 break;
4640 case nir_tex_src_lod: {
4641 if (nir_src_is_const(instr->src[i].src) && nir_src_as_uint(instr->src[i].src) == 0)
4642 args.level_zero = true;
4643 else
4644 args.lod = get_src(ctx, instr->src[i].src);
4645 break;
4646 }
4647 case nir_tex_src_ms_index:
4648 sample_index = get_src(ctx, instr->src[i].src);
4649 break;
4650 case nir_tex_src_ms_mcs:
4651 break;
4652 case nir_tex_src_ddx:
4653 ddx = get_src(ctx, instr->src[i].src);
4654 break;
4655 case nir_tex_src_ddy:
4656 ddy = get_src(ctx, instr->src[i].src);
4657 break;
4658 case nir_tex_src_min_lod:
4659 args.min_lod = get_src(ctx, instr->src[i].src);
4660 break;
4661 case nir_tex_src_texture_offset:
4662 case nir_tex_src_sampler_offset:
4663 case nir_tex_src_plane:
4664 default:
4665 break;
4666 }
4667 }
4668
4669 if (instr->op == nir_texop_txs && instr->sampler_dim == GLSL_SAMPLER_DIM_BUF) {
4670 result = get_buffer_size(ctx, args.resource, true);
4671 goto write_result;
4672 }
4673
4674 if (instr->op == nir_texop_texture_samples) {
4675 LLVMValueRef res, samples, is_msaa;
4676 LLVMValueRef default_sample;
4677
4678 res = LLVMBuildBitCast(ctx->ac.builder, args.resource, ctx->ac.v8i32, "");
4679 samples = LLVMBuildExtractElement(ctx->ac.builder, res,
4680 LLVMConstInt(ctx->ac.i32, 3, false), "");
4681 is_msaa = LLVMBuildLShr(ctx->ac.builder, samples,
4682 LLVMConstInt(ctx->ac.i32, 28, false), "");
4683 is_msaa = LLVMBuildAnd(ctx->ac.builder, is_msaa,
4684 LLVMConstInt(ctx->ac.i32, 0xe, false), "");
4685 is_msaa = LLVMBuildICmp(ctx->ac.builder, LLVMIntEQ, is_msaa,
4686 LLVMConstInt(ctx->ac.i32, 0xe, false), "");
4687
4688 samples = LLVMBuildLShr(ctx->ac.builder, samples,
4689 LLVMConstInt(ctx->ac.i32, 16, false), "");
4690 samples = LLVMBuildAnd(ctx->ac.builder, samples,
4691 LLVMConstInt(ctx->ac.i32, 0xf, false), "");
4692 samples = LLVMBuildShl(ctx->ac.builder, ctx->ac.i32_1,
4693 samples, "");
4694
4695 if (ctx->abi->robust_buffer_access) {
4696 LLVMValueRef dword1, is_null_descriptor;
4697
4698 /* Extract the second dword of the descriptor, if it's
4699 * all zero, then it's a null descriptor.
4700 */
4701 dword1 = LLVMBuildExtractElement(ctx->ac.builder, res,
4702 LLVMConstInt(ctx->ac.i32, 1, false), "");
4703 is_null_descriptor =
4704 LLVMBuildICmp(ctx->ac.builder, LLVMIntEQ, dword1,
4705 LLVMConstInt(ctx->ac.i32, 0, false), "");
4706 default_sample =
4707 LLVMBuildSelect(ctx->ac.builder, is_null_descriptor,
4708 ctx->ac.i32_0, ctx->ac.i32_1, "");
4709 } else {
4710 default_sample = ctx->ac.i32_1;
4711 }
4712
4713 samples = LLVMBuildSelect(ctx->ac.builder, is_msaa, samples,
4714 default_sample, "");
4715 result = samples;
4716 goto write_result;
4717 }
4718
4719 if (args.offset && instr->op != nir_texop_txf && instr->op != nir_texop_txf_ms) {
4720 LLVMValueRef offset[3], pack;
4721 for (unsigned chan = 0; chan < 3; ++chan)
4722 offset[chan] = ctx->ac.i32_0;
4723
4724 unsigned num_components = ac_get_llvm_num_components(args.offset);
4725 for (unsigned chan = 0; chan < num_components; chan++) {
4726 offset[chan] = ac_llvm_extract_elem(&ctx->ac, args.offset, chan);
4727 offset[chan] = LLVMBuildAnd(ctx->ac.builder, offset[chan],
4728 LLVMConstInt(ctx->ac.i32, 0x3f, false), "");
4729 if (chan)
4730 offset[chan] = LLVMBuildShl(ctx->ac.builder, offset[chan],
4731 LLVMConstInt(ctx->ac.i32, chan * 8, false), "");
4732 }
4733 pack = LLVMBuildOr(ctx->ac.builder, offset[0], offset[1], "");
4734 pack = LLVMBuildOr(ctx->ac.builder, pack, offset[2], "");
4735 args.offset = pack;
4736 }
4737
4738 /* Section 8.23.1 (Depth Texture Comparison Mode) of the
4739 * OpenGL 4.5 spec says:
4740 *
4741 * "If the texture’s internal format indicates a fixed-point
4742 * depth texture, then D_t and D_ref are clamped to the
4743 * range [0, 1]; otherwise no clamping is performed."
4744 *
4745 * TC-compatible HTILE promotes Z16 and Z24 to Z32_FLOAT,
4746 * so the depth comparison value isn't clamped for Z16 and
4747 * Z24 anymore. Do it manually here for GFX8-9; GFX10 has
4748 * an explicitly clamped 32-bit float format.
4749 */
4750 if (args.compare &&
4751 ctx->ac.chip_class >= GFX8 &&
4752 ctx->ac.chip_class <= GFX9 &&
4753 ctx->abi->clamp_shadow_reference) {
4754 LLVMValueRef upgraded, clamped;
4755
4756 upgraded = LLVMBuildExtractElement(ctx->ac.builder, args.sampler,
4757 LLVMConstInt(ctx->ac.i32, 3, false), "");
4758 upgraded = LLVMBuildLShr(ctx->ac.builder, upgraded,
4759 LLVMConstInt(ctx->ac.i32, 29, false), "");
4760 upgraded = LLVMBuildTrunc(ctx->ac.builder, upgraded, ctx->ac.i1, "");
4761 clamped = ac_build_clamp(&ctx->ac, args.compare);
4762 args.compare = LLVMBuildSelect(ctx->ac.builder, upgraded, clamped,
4763 args.compare, "");
4764 }
4765
4766 /* pack derivatives */
4767 if (ddx || ddy) {
4768 int num_src_deriv_channels, num_dest_deriv_channels;
4769 switch (instr->sampler_dim) {
4770 case GLSL_SAMPLER_DIM_3D:
4771 case GLSL_SAMPLER_DIM_CUBE:
4772 num_src_deriv_channels = 3;
4773 num_dest_deriv_channels = 3;
4774 break;
4775 case GLSL_SAMPLER_DIM_2D:
4776 default:
4777 num_src_deriv_channels = 2;
4778 num_dest_deriv_channels = 2;
4779 break;
4780 case GLSL_SAMPLER_DIM_1D:
4781 num_src_deriv_channels = 1;
4782 if (ctx->ac.chip_class == GFX9) {
4783 num_dest_deriv_channels = 2;
4784 } else {
4785 num_dest_deriv_channels = 1;
4786 }
4787 break;
4788 }
4789
4790 for (unsigned i = 0; i < num_src_deriv_channels; i++) {
4791 args.derivs[i] = ac_to_float(&ctx->ac,
4792 ac_llvm_extract_elem(&ctx->ac, ddx, i));
4793 args.derivs[num_dest_deriv_channels + i] = ac_to_float(&ctx->ac,
4794 ac_llvm_extract_elem(&ctx->ac, ddy, i));
4795 }
4796 for (unsigned i = num_src_deriv_channels; i < num_dest_deriv_channels; i++) {
4797 args.derivs[i] = ctx->ac.f32_0;
4798 args.derivs[num_dest_deriv_channels + i] = ctx->ac.f32_0;
4799 }
4800 }
4801
4802 if (instr->sampler_dim == GLSL_SAMPLER_DIM_CUBE && args.coords[0]) {
4803 for (unsigned chan = 0; chan < instr->coord_components; chan++)
4804 args.coords[chan] = ac_to_float(&ctx->ac, args.coords[chan]);
4805 if (instr->coord_components == 3)
4806 args.coords[3] = LLVMGetUndef(ctx->ac.f32);
4807 ac_prepare_cube_coords(&ctx->ac,
4808 instr->op == nir_texop_txd, instr->is_array,
4809 instr->op == nir_texop_lod, args.coords, args.derivs);
4810 }
4811
4812 /* Texture coordinates fixups */
4813 if (instr->coord_components > 1 &&
4814 instr->sampler_dim == GLSL_SAMPLER_DIM_1D &&
4815 instr->is_array &&
4816 instr->op != nir_texop_txf) {
4817 args.coords[1] = apply_round_slice(&ctx->ac, args.coords[1]);
4818 }
4819
4820 if (instr->coord_components > 2 &&
4821 (instr->sampler_dim == GLSL_SAMPLER_DIM_2D ||
4822 instr->sampler_dim == GLSL_SAMPLER_DIM_MS ||
4823 instr->sampler_dim == GLSL_SAMPLER_DIM_SUBPASS ||
4824 instr->sampler_dim == GLSL_SAMPLER_DIM_SUBPASS_MS) &&
4825 instr->is_array &&
4826 instr->op != nir_texop_txf &&
4827 instr->op != nir_texop_txf_ms &&
4828 instr->op != nir_texop_fragment_fetch &&
4829 instr->op != nir_texop_fragment_mask_fetch) {
4830 args.coords[2] = apply_round_slice(&ctx->ac, args.coords[2]);
4831 }
4832
4833 if (ctx->ac.chip_class == GFX9 &&
4834 instr->sampler_dim == GLSL_SAMPLER_DIM_1D &&
4835 instr->op != nir_texop_lod) {
4836 LLVMValueRef filler;
4837 if (instr->op == nir_texop_txf)
4838 filler = ctx->ac.i32_0;
4839 else
4840 filler = LLVMConstReal(ctx->ac.f32, 0.5);
4841
4842 if (instr->is_array)
4843 args.coords[2] = args.coords[1];
4844 args.coords[1] = filler;
4845 }
4846
4847 /* Pack sample index */
4848 if (sample_index && (instr->op == nir_texop_txf_ms ||
4849 instr->op == nir_texop_fragment_fetch))
4850 args.coords[instr->coord_components] = sample_index;
4851
4852 if (instr->op == nir_texop_samples_identical) {
4853 struct ac_image_args txf_args = { 0 };
4854 memcpy(txf_args.coords, args.coords, sizeof(txf_args.coords));
4855
4856 txf_args.dmask = 0xf;
4857 txf_args.resource = fmask_ptr;
4858 txf_args.dim = instr->is_array ? ac_image_2darray : ac_image_2d;
4859 result = build_tex_intrinsic(ctx, instr, &txf_args);
4860
4861 result = LLVMBuildExtractElement(ctx->ac.builder, result, ctx->ac.i32_0, "");
4862 result = emit_int_cmp(&ctx->ac, LLVMIntEQ, result, ctx->ac.i32_0);
4863 goto write_result;
4864 }
4865
4866 if ((instr->sampler_dim == GLSL_SAMPLER_DIM_SUBPASS_MS ||
4867 instr->sampler_dim == GLSL_SAMPLER_DIM_MS) &&
4868 instr->op != nir_texop_txs &&
4869 instr->op != nir_texop_fragment_fetch &&
4870 instr->op != nir_texop_fragment_mask_fetch) {
4871 unsigned sample_chan = instr->is_array ? 3 : 2;
4872 args.coords[sample_chan] = adjust_sample_index_using_fmask(
4873 &ctx->ac, args.coords[0], args.coords[1],
4874 instr->is_array ? args.coords[2] : NULL,
4875 args.coords[sample_chan], fmask_ptr);
4876 }
4877
4878 if (args.offset && (instr->op == nir_texop_txf || instr->op == nir_texop_txf_ms)) {
4879 int num_offsets = instr->src[offset_src].src.ssa->num_components;
4880 num_offsets = MIN2(num_offsets, instr->coord_components);
4881 for (unsigned i = 0; i < num_offsets; ++i) {
4882 args.coords[i] = LLVMBuildAdd(
4883 ctx->ac.builder, args.coords[i],
4884 LLVMConstInt(ctx->ac.i32, nir_src_comp_as_uint(instr->src[offset_src].src, i), false), "");
4885 }
4886 args.offset = NULL;
4887 }
4888
4889 /* DMASK was repurposed for GATHER4. 4 components are always
4890 * returned and DMASK works like a swizzle - it selects
4891 * the component to fetch. The only valid DMASK values are
4892 * 1=red, 2=green, 4=blue, 8=alpha. (e.g. 1 returns
4893 * (red,red,red,red) etc.) The ISA document doesn't mention
4894 * this.
4895 */
4896 args.dmask = 0xf;
4897 if (instr->op == nir_texop_tg4) {
4898 if (instr->is_shadow)
4899 args.dmask = 1;
4900 else
4901 args.dmask = 1 << instr->component;
4902 }
4903
4904 if (instr->sampler_dim != GLSL_SAMPLER_DIM_BUF) {
4905 args.dim = ac_get_sampler_dim(ctx->ac.chip_class, instr->sampler_dim, instr->is_array);
4906 args.unorm = instr->sampler_dim == GLSL_SAMPLER_DIM_RECT;
4907 }
4908
4909 /* Adjust the number of coordinates because we only need (x,y) for 2D
4910 * multisampled images and (x,y,layer) for 2D multisampled layered
4911 * images or for multisampled input attachments.
4912 */
4913 if (instr->op == nir_texop_fragment_mask_fetch) {
4914 if (args.dim == ac_image_2dmsaa) {
4915 args.dim = ac_image_2d;
4916 } else {
4917 assert(args.dim == ac_image_2darraymsaa);
4918 args.dim = ac_image_2darray;
4919 }
4920 }
4921
4922 assert(instr->dest.is_ssa);
4923 args.d16 = instr->dest.ssa.bit_size == 16;
4924
4925 result = build_tex_intrinsic(ctx, instr, &args);
4926
4927 if (instr->op == nir_texop_query_levels)
4928 result = LLVMBuildExtractElement(ctx->ac.builder, result, LLVMConstInt(ctx->ac.i32, 3, false), "");
4929 else if (instr->is_shadow && instr->is_new_style_shadow &&
4930 instr->op != nir_texop_txs && instr->op != nir_texop_lod &&
4931 instr->op != nir_texop_tg4)
4932 result = LLVMBuildExtractElement(ctx->ac.builder, result, ctx->ac.i32_0, "");
4933 else if (instr->op == nir_texop_txs &&
4934 instr->sampler_dim == GLSL_SAMPLER_DIM_CUBE &&
4935 instr->is_array) {
4936 LLVMValueRef two = LLVMConstInt(ctx->ac.i32, 2, false);
4937 LLVMValueRef six = LLVMConstInt(ctx->ac.i32, 6, false);
4938 LLVMValueRef z = LLVMBuildExtractElement(ctx->ac.builder, result, two, "");
4939 z = LLVMBuildSDiv(ctx->ac.builder, z, six, "");
4940 result = LLVMBuildInsertElement(ctx->ac.builder, result, z, two, "");
4941 } else if (ctx->ac.chip_class == GFX9 &&
4942 instr->op == nir_texop_txs &&
4943 instr->sampler_dim == GLSL_SAMPLER_DIM_1D &&
4944 instr->is_array) {
4945 LLVMValueRef two = LLVMConstInt(ctx->ac.i32, 2, false);
4946 LLVMValueRef layers = LLVMBuildExtractElement(ctx->ac.builder, result, two, "");
4947 result = LLVMBuildInsertElement(ctx->ac.builder, result, layers,
4948 ctx->ac.i32_1, "");
4949 } else if (instr->dest.ssa.num_components != 4)
4950 result = ac_trim_vector(&ctx->ac, result, instr->dest.ssa.num_components);
4951
4952 write_result:
4953 if (result) {
4954 assert(instr->dest.is_ssa);
4955 result = ac_to_integer(&ctx->ac, result);
4956
4957 for (int i = ARRAY_SIZE(wctx); --i >= 0;) {
4958 result = exit_waterfall(ctx, wctx + i, result);
4959 }
4960
4961 ctx->ssa_defs[instr->dest.ssa.index] = result;
4962 }
4963 }
4964
4965 static void visit_phi(struct ac_nir_context *ctx, nir_phi_instr *instr)
4966 {
4967 LLVMTypeRef type = get_def_type(ctx, &instr->dest.ssa);
4968 LLVMValueRef result = LLVMBuildPhi(ctx->ac.builder, type, "");
4969
4970 ctx->ssa_defs[instr->dest.ssa.index] = result;
4971 _mesa_hash_table_insert(ctx->phis, instr, result);
4972 }
4973
4974 static void visit_post_phi(struct ac_nir_context *ctx,
4975 nir_phi_instr *instr,
4976 LLVMValueRef llvm_phi)
4977 {
4978 nir_foreach_phi_src(src, instr) {
4979 LLVMBasicBlockRef block = get_block(ctx, src->pred);
4980 LLVMValueRef llvm_src = get_src(ctx, src->src);
4981
4982 LLVMAddIncoming(llvm_phi, &llvm_src, &block, 1);
4983 }
4984 }
4985
4986 static void phi_post_pass(struct ac_nir_context *ctx)
4987 {
4988 hash_table_foreach(ctx->phis, entry) {
4989 visit_post_phi(ctx, (nir_phi_instr*)entry->key,
4990 (LLVMValueRef)entry->data);
4991 }
4992 }
4993
4994
4995 static bool is_def_used_in_an_export(const nir_ssa_def* def) {
4996 nir_foreach_use(use_src, def) {
4997 if (use_src->parent_instr->type == nir_instr_type_intrinsic) {
4998 nir_intrinsic_instr *instr = nir_instr_as_intrinsic(use_src->parent_instr);
4999 if (instr->intrinsic == nir_intrinsic_store_deref)
5000 return true;
5001 } else if (use_src->parent_instr->type == nir_instr_type_alu) {
5002 nir_alu_instr *instr = nir_instr_as_alu(use_src->parent_instr);
5003 if (instr->op == nir_op_vec4 &&
5004 is_def_used_in_an_export(&instr->dest.dest.ssa)) {
5005 return true;
5006 }
5007 }
5008 }
5009 return false;
5010 }
5011
5012 static void visit_ssa_undef(struct ac_nir_context *ctx,
5013 const nir_ssa_undef_instr *instr)
5014 {
5015 unsigned num_components = instr->def.num_components;
5016 LLVMTypeRef type = LLVMIntTypeInContext(ctx->ac.context, instr->def.bit_size);
5017
5018 if (!ctx->abi->convert_undef_to_zero || is_def_used_in_an_export(&instr->def)) {
5019 LLVMValueRef undef;
5020
5021 if (num_components == 1)
5022 undef = LLVMGetUndef(type);
5023 else {
5024 undef = LLVMGetUndef(LLVMVectorType(type, num_components));
5025 }
5026 ctx->ssa_defs[instr->def.index] = undef;
5027 } else {
5028 LLVMValueRef zero = LLVMConstInt(type, 0, false);
5029 if (num_components > 1) {
5030 zero = ac_build_gather_values_extended(
5031 &ctx->ac, &zero, 4, 0, false, false);
5032 }
5033 ctx->ssa_defs[instr->def.index] = zero;
5034 }
5035 }
5036
5037 static void visit_jump(struct ac_llvm_context *ctx,
5038 const nir_jump_instr *instr)
5039 {
5040 switch (instr->type) {
5041 case nir_jump_break:
5042 ac_build_break(ctx);
5043 break;
5044 case nir_jump_continue:
5045 ac_build_continue(ctx);
5046 break;
5047 default:
5048 fprintf(stderr, "Unknown NIR jump instr: ");
5049 nir_print_instr(&instr->instr, stderr);
5050 fprintf(stderr, "\n");
5051 abort();
5052 }
5053 }
5054
5055 static LLVMTypeRef
5056 glsl_base_to_llvm_type(struct ac_llvm_context *ac,
5057 enum glsl_base_type type)
5058 {
5059 switch (type) {
5060 case GLSL_TYPE_INT:
5061 case GLSL_TYPE_UINT:
5062 case GLSL_TYPE_BOOL:
5063 case GLSL_TYPE_SUBROUTINE:
5064 return ac->i32;
5065 case GLSL_TYPE_INT8:
5066 case GLSL_TYPE_UINT8:
5067 return ac->i8;
5068 case GLSL_TYPE_INT16:
5069 case GLSL_TYPE_UINT16:
5070 return ac->i16;
5071 case GLSL_TYPE_FLOAT:
5072 return ac->f32;
5073 case GLSL_TYPE_FLOAT16:
5074 return ac->f16;
5075 case GLSL_TYPE_INT64:
5076 case GLSL_TYPE_UINT64:
5077 return ac->i64;
5078 case GLSL_TYPE_DOUBLE:
5079 return ac->f64;
5080 default:
5081 unreachable("unknown GLSL type");
5082 }
5083 }
5084
5085 static LLVMTypeRef
5086 glsl_to_llvm_type(struct ac_llvm_context *ac,
5087 const struct glsl_type *type)
5088 {
5089 if (glsl_type_is_scalar(type)) {
5090 return glsl_base_to_llvm_type(ac, glsl_get_base_type(type));
5091 }
5092
5093 if (glsl_type_is_vector(type)) {
5094 return LLVMVectorType(
5095 glsl_base_to_llvm_type(ac, glsl_get_base_type(type)),
5096 glsl_get_vector_elements(type));
5097 }
5098
5099 if (glsl_type_is_matrix(type)) {
5100 return LLVMArrayType(
5101 glsl_to_llvm_type(ac, glsl_get_column_type(type)),
5102 glsl_get_matrix_columns(type));
5103 }
5104
5105 if (glsl_type_is_array(type)) {
5106 return LLVMArrayType(
5107 glsl_to_llvm_type(ac, glsl_get_array_element(type)),
5108 glsl_get_length(type));
5109 }
5110
5111 assert(glsl_type_is_struct_or_ifc(type));
5112
5113 LLVMTypeRef member_types[glsl_get_length(type)];
5114
5115 for (unsigned i = 0; i < glsl_get_length(type); i++) {
5116 member_types[i] =
5117 glsl_to_llvm_type(ac,
5118 glsl_get_struct_field(type, i));
5119 }
5120
5121 return LLVMStructTypeInContext(ac->context, member_types,
5122 glsl_get_length(type), false);
5123 }
5124
5125 static void visit_deref(struct ac_nir_context *ctx,
5126 nir_deref_instr *instr)
5127 {
5128 if (instr->mode != nir_var_mem_shared &&
5129 instr->mode != nir_var_mem_global)
5130 return;
5131
5132 LLVMValueRef result = NULL;
5133 switch(instr->deref_type) {
5134 case nir_deref_type_var: {
5135 struct hash_entry *entry = _mesa_hash_table_search(ctx->vars, instr->var);
5136 result = entry->data;
5137 break;
5138 }
5139 case nir_deref_type_struct:
5140 if (instr->mode == nir_var_mem_global) {
5141 nir_deref_instr *parent = nir_deref_instr_parent(instr);
5142 uint64_t offset = glsl_get_struct_field_offset(parent->type,
5143 instr->strct.index);
5144 result = ac_build_gep_ptr(&ctx->ac, get_src(ctx, instr->parent),
5145 LLVMConstInt(ctx->ac.i32, offset, 0));
5146 } else {
5147 result = ac_build_gep0(&ctx->ac, get_src(ctx, instr->parent),
5148 LLVMConstInt(ctx->ac.i32, instr->strct.index, 0));
5149 }
5150 break;
5151 case nir_deref_type_array:
5152 if (instr->mode == nir_var_mem_global) {
5153 nir_deref_instr *parent = nir_deref_instr_parent(instr);
5154 unsigned stride = glsl_get_explicit_stride(parent->type);
5155
5156 if ((glsl_type_is_matrix(parent->type) &&
5157 glsl_matrix_type_is_row_major(parent->type)) ||
5158 (glsl_type_is_vector(parent->type) && stride == 0))
5159 stride = type_scalar_size_bytes(parent->type);
5160
5161 assert(stride > 0);
5162 LLVMValueRef index = get_src(ctx, instr->arr.index);
5163 if (LLVMTypeOf(index) != ctx->ac.i64)
5164 index = LLVMBuildZExt(ctx->ac.builder, index, ctx->ac.i64, "");
5165
5166 LLVMValueRef offset = LLVMBuildMul(ctx->ac.builder, index, LLVMConstInt(ctx->ac.i64, stride, 0), "");
5167
5168 result = ac_build_gep_ptr(&ctx->ac, get_src(ctx, instr->parent), offset);
5169 } else {
5170 result = ac_build_gep0(&ctx->ac, get_src(ctx, instr->parent),
5171 get_src(ctx, instr->arr.index));
5172 }
5173 break;
5174 case nir_deref_type_ptr_as_array:
5175 if (instr->mode == nir_var_mem_global) {
5176 unsigned stride = nir_deref_instr_array_stride(instr);
5177
5178 LLVMValueRef index = get_src(ctx, instr->arr.index);
5179 if (LLVMTypeOf(index) != ctx->ac.i64)
5180 index = LLVMBuildZExt(ctx->ac.builder, index, ctx->ac.i64, "");
5181
5182 LLVMValueRef offset = LLVMBuildMul(ctx->ac.builder, index, LLVMConstInt(ctx->ac.i64, stride, 0), "");
5183
5184 result = ac_build_gep_ptr(&ctx->ac, get_src(ctx, instr->parent), offset);
5185 } else {
5186 result = ac_build_gep_ptr(&ctx->ac, get_src(ctx, instr->parent),
5187 get_src(ctx, instr->arr.index));
5188 }
5189 break;
5190 case nir_deref_type_cast: {
5191 result = get_src(ctx, instr->parent);
5192
5193 /* We can't use the structs from LLVM because the shader
5194 * specifies its own offsets. */
5195 LLVMTypeRef pointee_type = ctx->ac.i8;
5196 if (instr->mode == nir_var_mem_shared)
5197 pointee_type = glsl_to_llvm_type(&ctx->ac, instr->type);
5198
5199 unsigned address_space;
5200
5201 switch(instr->mode) {
5202 case nir_var_mem_shared:
5203 address_space = AC_ADDR_SPACE_LDS;
5204 break;
5205 case nir_var_mem_global:
5206 address_space = AC_ADDR_SPACE_GLOBAL;
5207 break;
5208 default:
5209 unreachable("Unhandled address space");
5210 }
5211
5212 LLVMTypeRef type = LLVMPointerType(pointee_type, address_space);
5213
5214 if (LLVMTypeOf(result) != type) {
5215 if (LLVMGetTypeKind(LLVMTypeOf(result)) == LLVMVectorTypeKind) {
5216 result = LLVMBuildBitCast(ctx->ac.builder, result,
5217 type, "");
5218 } else {
5219 result = LLVMBuildIntToPtr(ctx->ac.builder, result,
5220 type, "");
5221 }
5222 }
5223 break;
5224 }
5225 default:
5226 unreachable("Unhandled deref_instr deref type");
5227 }
5228
5229 ctx->ssa_defs[instr->dest.ssa.index] = result;
5230 }
5231
5232 static void visit_cf_list(struct ac_nir_context *ctx,
5233 struct exec_list *list);
5234
5235 static void visit_block(struct ac_nir_context *ctx, nir_block *block)
5236 {
5237 nir_foreach_instr(instr, block)
5238 {
5239 switch (instr->type) {
5240 case nir_instr_type_alu:
5241 visit_alu(ctx, nir_instr_as_alu(instr));
5242 break;
5243 case nir_instr_type_load_const:
5244 visit_load_const(ctx, nir_instr_as_load_const(instr));
5245 break;
5246 case nir_instr_type_intrinsic:
5247 visit_intrinsic(ctx, nir_instr_as_intrinsic(instr));
5248 break;
5249 case nir_instr_type_tex:
5250 visit_tex(ctx, nir_instr_as_tex(instr));
5251 break;
5252 case nir_instr_type_phi:
5253 visit_phi(ctx, nir_instr_as_phi(instr));
5254 break;
5255 case nir_instr_type_ssa_undef:
5256 visit_ssa_undef(ctx, nir_instr_as_ssa_undef(instr));
5257 break;
5258 case nir_instr_type_jump:
5259 visit_jump(&ctx->ac, nir_instr_as_jump(instr));
5260 break;
5261 case nir_instr_type_deref:
5262 visit_deref(ctx, nir_instr_as_deref(instr));
5263 break;
5264 default:
5265 fprintf(stderr, "Unknown NIR instr type: ");
5266 nir_print_instr(instr, stderr);
5267 fprintf(stderr, "\n");
5268 abort();
5269 }
5270 }
5271
5272 _mesa_hash_table_insert(ctx->defs, block,
5273 LLVMGetInsertBlock(ctx->ac.builder));
5274 }
5275
5276 static void visit_if(struct ac_nir_context *ctx, nir_if *if_stmt)
5277 {
5278 LLVMValueRef value = get_src(ctx, if_stmt->condition);
5279
5280 nir_block *then_block =
5281 (nir_block *) exec_list_get_head(&if_stmt->then_list);
5282
5283 ac_build_uif(&ctx->ac, value, then_block->index);
5284
5285 visit_cf_list(ctx, &if_stmt->then_list);
5286
5287 if (!exec_list_is_empty(&if_stmt->else_list)) {
5288 nir_block *else_block =
5289 (nir_block *) exec_list_get_head(&if_stmt->else_list);
5290
5291 ac_build_else(&ctx->ac, else_block->index);
5292 visit_cf_list(ctx, &if_stmt->else_list);
5293 }
5294
5295 ac_build_endif(&ctx->ac, then_block->index);
5296 }
5297
5298 static void visit_loop(struct ac_nir_context *ctx, nir_loop *loop)
5299 {
5300 nir_block *first_loop_block =
5301 (nir_block *) exec_list_get_head(&loop->body);
5302
5303 ac_build_bgnloop(&ctx->ac, first_loop_block->index);
5304
5305 visit_cf_list(ctx, &loop->body);
5306
5307 ac_build_endloop(&ctx->ac, first_loop_block->index);
5308 }
5309
5310 static void visit_cf_list(struct ac_nir_context *ctx,
5311 struct exec_list *list)
5312 {
5313 foreach_list_typed(nir_cf_node, node, node, list)
5314 {
5315 switch (node->type) {
5316 case nir_cf_node_block:
5317 visit_block(ctx, nir_cf_node_as_block(node));
5318 break;
5319
5320 case nir_cf_node_if:
5321 visit_if(ctx, nir_cf_node_as_if(node));
5322 break;
5323
5324 case nir_cf_node_loop:
5325 visit_loop(ctx, nir_cf_node_as_loop(node));
5326 break;
5327
5328 default:
5329 assert(0);
5330 }
5331 }
5332 }
5333
5334 void
5335 ac_handle_shader_output_decl(struct ac_llvm_context *ctx,
5336 struct ac_shader_abi *abi,
5337 struct nir_shader *nir,
5338 struct nir_variable *variable,
5339 gl_shader_stage stage)
5340 {
5341 unsigned output_loc = variable->data.driver_location / 4;
5342 unsigned attrib_count = glsl_count_attribute_slots(variable->type, false);
5343
5344 /* tess ctrl has it's own load/store paths for outputs */
5345 if (stage == MESA_SHADER_TESS_CTRL)
5346 return;
5347
5348 if (stage == MESA_SHADER_VERTEX ||
5349 stage == MESA_SHADER_TESS_EVAL ||
5350 stage == MESA_SHADER_GEOMETRY) {
5351 int idx = variable->data.location + variable->data.index;
5352 if (idx == VARYING_SLOT_CLIP_DIST0) {
5353 int length = nir->info.clip_distance_array_size +
5354 nir->info.cull_distance_array_size;
5355
5356 if (length > 4)
5357 attrib_count = 2;
5358 else
5359 attrib_count = 1;
5360 }
5361 }
5362
5363 bool is_16bit = glsl_type_is_16bit(glsl_without_array(variable->type));
5364 LLVMTypeRef type = is_16bit ? ctx->f16 : ctx->f32;
5365 for (unsigned i = 0; i < attrib_count; ++i) {
5366 for (unsigned chan = 0; chan < 4; chan++) {
5367 abi->outputs[ac_llvm_reg_index_soa(output_loc + i, chan)] =
5368 ac_build_alloca_undef(ctx, type, "");
5369 }
5370 }
5371 }
5372
5373 static void
5374 setup_locals(struct ac_nir_context *ctx,
5375 struct nir_function *func)
5376 {
5377 int i, j;
5378 ctx->num_locals = 0;
5379 nir_foreach_function_temp_variable(variable, func->impl) {
5380 unsigned attrib_count = glsl_count_attribute_slots(variable->type, false);
5381 variable->data.driver_location = ctx->num_locals * 4;
5382 variable->data.location_frac = 0;
5383 ctx->num_locals += attrib_count;
5384 }
5385 ctx->locals = malloc(4 * ctx->num_locals * sizeof(LLVMValueRef));
5386 if (!ctx->locals)
5387 return;
5388
5389 for (i = 0; i < ctx->num_locals; i++) {
5390 for (j = 0; j < 4; j++) {
5391 ctx->locals[i * 4 + j] =
5392 ac_build_alloca_undef(&ctx->ac, ctx->ac.f32, "temp");
5393 }
5394 }
5395 }
5396
5397 static void
5398 setup_scratch(struct ac_nir_context *ctx,
5399 struct nir_shader *shader)
5400 {
5401 if (shader->scratch_size == 0)
5402 return;
5403
5404 ctx->scratch = ac_build_alloca_undef(&ctx->ac,
5405 LLVMArrayType(ctx->ac.i8, shader->scratch_size),
5406 "scratch");
5407 }
5408
5409 static void
5410 setup_constant_data(struct ac_nir_context *ctx,
5411 struct nir_shader *shader)
5412 {
5413 if (!shader->constant_data)
5414 return;
5415
5416 LLVMValueRef data =
5417 LLVMConstStringInContext(ctx->ac.context,
5418 shader->constant_data,
5419 shader->constant_data_size,
5420 true);
5421 LLVMTypeRef type = LLVMArrayType(ctx->ac.i8, shader->constant_data_size);
5422
5423 /* We want to put the constant data in the CONST address space so that
5424 * we can use scalar loads. However, LLVM versions before 10 put these
5425 * variables in the same section as the code, which is unacceptable
5426 * for RadeonSI as it needs to relocate all the data sections after
5427 * the code sections. See https://reviews.llvm.org/D65813.
5428 */
5429 unsigned address_space =
5430 LLVM_VERSION_MAJOR < 10 ? AC_ADDR_SPACE_GLOBAL : AC_ADDR_SPACE_CONST;
5431
5432 LLVMValueRef global =
5433 LLVMAddGlobalInAddressSpace(ctx->ac.module, type,
5434 "const_data",
5435 address_space);
5436
5437 LLVMSetInitializer(global, data);
5438 LLVMSetGlobalConstant(global, true);
5439 LLVMSetVisibility(global, LLVMHiddenVisibility);
5440 ctx->constant_data = global;
5441 }
5442
5443 static void
5444 setup_shared(struct ac_nir_context *ctx,
5445 struct nir_shader *nir)
5446 {
5447 if (ctx->ac.lds)
5448 return;
5449
5450 LLVMTypeRef type = LLVMArrayType(ctx->ac.i8,
5451 nir->info.cs.shared_size);
5452
5453 LLVMValueRef lds =
5454 LLVMAddGlobalInAddressSpace(ctx->ac.module, type,
5455 "compute_lds",
5456 AC_ADDR_SPACE_LDS);
5457 LLVMSetAlignment(lds, 64 * 1024);
5458
5459 ctx->ac.lds = LLVMBuildBitCast(ctx->ac.builder, lds,
5460 LLVMPointerType(ctx->ac.i8,
5461 AC_ADDR_SPACE_LDS), "");
5462 }
5463
5464 void ac_nir_translate(struct ac_llvm_context *ac, struct ac_shader_abi *abi,
5465 const struct ac_shader_args *args, struct nir_shader *nir)
5466 {
5467 struct ac_nir_context ctx = {};
5468 struct nir_function *func;
5469
5470 ctx.ac = *ac;
5471 ctx.abi = abi;
5472 ctx.args = args;
5473
5474 ctx.stage = nir->info.stage;
5475 ctx.info = &nir->info;
5476
5477 ctx.main_function = LLVMGetBasicBlockParent(LLVMGetInsertBlock(ctx.ac.builder));
5478
5479 /* TODO: remove this after RADV switches to lowered IO */
5480 if (!nir->info.io_lowered) {
5481 nir_foreach_shader_out_variable(variable, nir) {
5482 ac_handle_shader_output_decl(&ctx.ac, ctx.abi, nir, variable,
5483 ctx.stage);
5484 }
5485 }
5486
5487 ctx.defs = _mesa_hash_table_create(NULL, _mesa_hash_pointer,
5488 _mesa_key_pointer_equal);
5489 ctx.phis = _mesa_hash_table_create(NULL, _mesa_hash_pointer,
5490 _mesa_key_pointer_equal);
5491 ctx.vars = _mesa_hash_table_create(NULL, _mesa_hash_pointer,
5492 _mesa_key_pointer_equal);
5493
5494 if (ctx.abi->kill_ps_if_inf_interp)
5495 ctx.verified_interp = _mesa_hash_table_create(NULL, _mesa_hash_pointer,
5496 _mesa_key_pointer_equal);
5497
5498 func = (struct nir_function *)exec_list_get_head(&nir->functions);
5499
5500 nir_index_ssa_defs(func->impl);
5501 ctx.ssa_defs = calloc(func->impl->ssa_alloc, sizeof(LLVMValueRef));
5502
5503 setup_locals(&ctx, func);
5504 setup_scratch(&ctx, nir);
5505 setup_constant_data(&ctx, nir);
5506
5507 if (gl_shader_stage_is_compute(nir->info.stage))
5508 setup_shared(&ctx, nir);
5509
5510 if (nir->info.stage == MESA_SHADER_FRAGMENT && nir->info.fs.uses_demote) {
5511 ctx.ac.postponed_kill = ac_build_alloca_undef(&ctx.ac, ac->i1, "");
5512 /* true = don't kill. */
5513 LLVMBuildStore(ctx.ac.builder, ctx.ac.i1true, ctx.ac.postponed_kill);
5514 }
5515
5516 visit_cf_list(&ctx, &func->impl->body);
5517 phi_post_pass(&ctx);
5518
5519 if (ctx.ac.postponed_kill)
5520 ac_build_kill_if_false(&ctx.ac, LLVMBuildLoad(ctx.ac.builder,
5521 ctx.ac.postponed_kill, ""));
5522
5523 if (!gl_shader_stage_is_compute(nir->info.stage))
5524 ctx.abi->emit_outputs(ctx.abi, AC_LLVM_MAX_OUTPUTS,
5525 ctx.abi->outputs);
5526
5527 free(ctx.locals);
5528 free(ctx.ssa_defs);
5529 ralloc_free(ctx.defs);
5530 ralloc_free(ctx.phis);
5531 ralloc_free(ctx.vars);
5532 if (ctx.abi->kill_ps_if_inf_interp)
5533 ralloc_free(ctx.verified_interp);
5534 }
5535
5536 bool
5537 ac_lower_indirect_derefs(struct nir_shader *nir, enum chip_class chip_class)
5538 {
5539 bool progress = false;
5540
5541 /* Lower large variables to scratch first so that we won't bloat the
5542 * shader by generating large if ladders for them. We later lower
5543 * scratch to alloca's, assuming LLVM won't generate VGPR indexing.
5544 */
5545 NIR_PASS(progress, nir, nir_lower_vars_to_scratch,
5546 nir_var_function_temp,
5547 256,
5548 glsl_get_natural_size_align_bytes);
5549
5550 /* While it would be nice not to have this flag, we are constrained
5551 * by the reality that LLVM 9.0 has buggy VGPR indexing on GFX9.
5552 */
5553 bool llvm_has_working_vgpr_indexing = chip_class != GFX9;
5554
5555 /* TODO: Indirect indexing of GS inputs is unimplemented.
5556 *
5557 * TCS and TES load inputs directly from LDS or offchip memory, so
5558 * indirect indexing is trivial.
5559 */
5560 nir_variable_mode indirect_mask = 0;
5561 if (nir->info.stage == MESA_SHADER_GEOMETRY ||
5562 (nir->info.stage != MESA_SHADER_TESS_CTRL &&
5563 nir->info.stage != MESA_SHADER_TESS_EVAL &&
5564 !llvm_has_working_vgpr_indexing)) {
5565 indirect_mask |= nir_var_shader_in;
5566 }
5567 if (!llvm_has_working_vgpr_indexing &&
5568 nir->info.stage != MESA_SHADER_TESS_CTRL)
5569 indirect_mask |= nir_var_shader_out;
5570
5571 /* TODO: We shouldn't need to do this, however LLVM isn't currently
5572 * smart enough to handle indirects without causing excess spilling
5573 * causing the gpu to hang.
5574 *
5575 * See the following thread for more details of the problem:
5576 * https://lists.freedesktop.org/archives/mesa-dev/2017-July/162106.html
5577 */
5578 indirect_mask |= nir_var_function_temp;
5579
5580 progress |= nir_lower_indirect_derefs(nir, indirect_mask, UINT32_MAX);
5581 return progress;
5582 }
5583
5584 static unsigned
5585 get_inst_tessfactor_writemask(nir_intrinsic_instr *intrin)
5586 {
5587 if (intrin->intrinsic != nir_intrinsic_store_output)
5588 return 0;
5589
5590 unsigned writemask = nir_intrinsic_write_mask(intrin) <<
5591 nir_intrinsic_component(intrin);
5592 unsigned location = nir_intrinsic_io_semantics(intrin).location;
5593
5594 if (location == VARYING_SLOT_TESS_LEVEL_OUTER)
5595 return writemask << 4;
5596 else if (location == VARYING_SLOT_TESS_LEVEL_INNER)
5597 return writemask;
5598
5599 return 0;
5600 }
5601
5602 static void
5603 scan_tess_ctrl(nir_cf_node *cf_node, unsigned *upper_block_tf_writemask,
5604 unsigned *cond_block_tf_writemask,
5605 bool *tessfactors_are_def_in_all_invocs, bool is_nested_cf)
5606 {
5607 switch (cf_node->type) {
5608 case nir_cf_node_block: {
5609 nir_block *block = nir_cf_node_as_block(cf_node);
5610 nir_foreach_instr(instr, block) {
5611 if (instr->type != nir_instr_type_intrinsic)
5612 continue;
5613
5614 nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr);
5615 if (intrin->intrinsic == nir_intrinsic_control_barrier) {
5616
5617 /* If we find a barrier in nested control flow put this in the
5618 * too hard basket. In GLSL this is not possible but it is in
5619 * SPIR-V.
5620 */
5621 if (is_nested_cf) {
5622 *tessfactors_are_def_in_all_invocs = false;
5623 return;
5624 }
5625
5626 /* The following case must be prevented:
5627 * gl_TessLevelInner = ...;
5628 * barrier();
5629 * if (gl_InvocationID == 1)
5630 * gl_TessLevelInner = ...;
5631 *
5632 * If you consider disjoint code segments separated by barriers, each
5633 * such segment that writes tess factor channels should write the same
5634 * channels in all codepaths within that segment.
5635 */
5636 if (upper_block_tf_writemask || cond_block_tf_writemask) {
5637 /* Accumulate the result: */
5638 *tessfactors_are_def_in_all_invocs &=
5639 !(*cond_block_tf_writemask & ~(*upper_block_tf_writemask));
5640
5641 /* Analyze the next code segment from scratch. */
5642 *upper_block_tf_writemask = 0;
5643 *cond_block_tf_writemask = 0;
5644 }
5645 } else
5646 *upper_block_tf_writemask |= get_inst_tessfactor_writemask(intrin);
5647 }
5648
5649 break;
5650 }
5651 case nir_cf_node_if: {
5652 unsigned then_tessfactor_writemask = 0;
5653 unsigned else_tessfactor_writemask = 0;
5654
5655 nir_if *if_stmt = nir_cf_node_as_if(cf_node);
5656 foreach_list_typed(nir_cf_node, nested_node, node, &if_stmt->then_list) {
5657 scan_tess_ctrl(nested_node, &then_tessfactor_writemask,
5658 cond_block_tf_writemask,
5659 tessfactors_are_def_in_all_invocs, true);
5660 }
5661
5662 foreach_list_typed(nir_cf_node, nested_node, node, &if_stmt->else_list) {
5663 scan_tess_ctrl(nested_node, &else_tessfactor_writemask,
5664 cond_block_tf_writemask,
5665 tessfactors_are_def_in_all_invocs, true);
5666 }
5667
5668 if (then_tessfactor_writemask || else_tessfactor_writemask) {
5669 /* If both statements write the same tess factor channels,
5670 * we can say that the upper block writes them too.
5671 */
5672 *upper_block_tf_writemask |= then_tessfactor_writemask &
5673 else_tessfactor_writemask;
5674 *cond_block_tf_writemask |= then_tessfactor_writemask |
5675 else_tessfactor_writemask;
5676 }
5677
5678 break;
5679 }
5680 case nir_cf_node_loop: {
5681 nir_loop *loop = nir_cf_node_as_loop(cf_node);
5682 foreach_list_typed(nir_cf_node, nested_node, node, &loop->body) {
5683 scan_tess_ctrl(nested_node, cond_block_tf_writemask,
5684 cond_block_tf_writemask,
5685 tessfactors_are_def_in_all_invocs, true);
5686 }
5687
5688 break;
5689 }
5690 default:
5691 unreachable("unknown cf node type");
5692 }
5693 }
5694
5695 bool
5696 ac_are_tessfactors_def_in_all_invocs(const struct nir_shader *nir)
5697 {
5698 assert(nir->info.stage == MESA_SHADER_TESS_CTRL);
5699
5700 /* The pass works as follows:
5701 * If all codepaths write tess factors, we can say that all
5702 * invocations define tess factors.
5703 *
5704 * Each tess factor channel is tracked separately.
5705 */
5706 unsigned main_block_tf_writemask = 0; /* if main block writes tess factors */
5707 unsigned cond_block_tf_writemask = 0; /* if cond block writes tess factors */
5708
5709 /* Initial value = true. Here the pass will accumulate results from
5710 * multiple segments surrounded by barriers. If tess factors aren't
5711 * written at all, it's a shader bug and we don't care if this will be
5712 * true.
5713 */
5714 bool tessfactors_are_def_in_all_invocs = true;
5715
5716 nir_foreach_function(function, nir) {
5717 if (function->impl) {
5718 foreach_list_typed(nir_cf_node, node, node, &function->impl->body) {
5719 scan_tess_ctrl(node, &main_block_tf_writemask,
5720 &cond_block_tf_writemask,
5721 &tessfactors_are_def_in_all_invocs,
5722 false);
5723 }
5724 }
5725 }
5726
5727 /* Accumulate the result for the last code segment separated by a
5728 * barrier.
5729 */
5730 if (main_block_tf_writemask || cond_block_tf_writemask) {
5731 tessfactors_are_def_in_all_invocs &=
5732 !(cond_block_tf_writemask & ~main_block_tf_writemask);
5733 }
5734
5735 return tessfactors_are_def_in_all_invocs;
5736 }