3d2f738edec417efc7291afb13e0e86079575bdf
[mesa.git] / src / amd / common / ac_nir_to_llvm.c
1 /*
2 * Copyright © 2016 Bas Nieuwenhuizen
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21 * IN THE SOFTWARE.
22 */
23
24 #include "ac_nir_to_llvm.h"
25 #include "ac_llvm_build.h"
26 #include "ac_llvm_util.h"
27 #include "ac_binary.h"
28 #include "sid.h"
29 #include "nir/nir.h"
30 #include "nir/nir_deref.h"
31 #include "util/bitscan.h"
32 #include "util/u_math.h"
33 #include "ac_shader_abi.h"
34 #include "ac_shader_util.h"
35
36 struct ac_nir_context {
37 struct ac_llvm_context ac;
38 struct ac_shader_abi *abi;
39
40 gl_shader_stage stage;
41
42 LLVMValueRef *ssa_defs;
43
44 struct hash_table *defs;
45 struct hash_table *phis;
46 struct hash_table *vars;
47
48 LLVMValueRef main_function;
49 LLVMBasicBlockRef continue_block;
50 LLVMBasicBlockRef break_block;
51
52 int num_locals;
53 LLVMValueRef *locals;
54 };
55
56 static LLVMValueRef get_sampler_desc(struct ac_nir_context *ctx,
57 nir_deref_instr *deref_instr,
58 enum ac_descriptor_type desc_type,
59 const nir_tex_instr *instr,
60 bool image, bool write);
61
62 static void
63 build_store_values_extended(struct ac_llvm_context *ac,
64 LLVMValueRef *values,
65 unsigned value_count,
66 unsigned value_stride,
67 LLVMValueRef vec)
68 {
69 LLVMBuilderRef builder = ac->builder;
70 unsigned i;
71
72 for (i = 0; i < value_count; i++) {
73 LLVMValueRef ptr = values[i * value_stride];
74 LLVMValueRef index = LLVMConstInt(ac->i32, i, false);
75 LLVMValueRef value = LLVMBuildExtractElement(builder, vec, index, "");
76 LLVMBuildStore(builder, value, ptr);
77 }
78 }
79
80 static enum ac_image_dim
81 get_ac_sampler_dim(const struct ac_llvm_context *ctx, enum glsl_sampler_dim dim,
82 bool is_array)
83 {
84 switch (dim) {
85 case GLSL_SAMPLER_DIM_1D:
86 if (ctx->chip_class >= GFX9)
87 return is_array ? ac_image_2darray : ac_image_2d;
88 return is_array ? ac_image_1darray : ac_image_1d;
89 case GLSL_SAMPLER_DIM_2D:
90 case GLSL_SAMPLER_DIM_RECT:
91 case GLSL_SAMPLER_DIM_EXTERNAL:
92 return is_array ? ac_image_2darray : ac_image_2d;
93 case GLSL_SAMPLER_DIM_3D:
94 return ac_image_3d;
95 case GLSL_SAMPLER_DIM_CUBE:
96 return ac_image_cube;
97 case GLSL_SAMPLER_DIM_MS:
98 return is_array ? ac_image_2darraymsaa : ac_image_2dmsaa;
99 case GLSL_SAMPLER_DIM_SUBPASS:
100 return ac_image_2darray;
101 case GLSL_SAMPLER_DIM_SUBPASS_MS:
102 return ac_image_2darraymsaa;
103 default:
104 unreachable("bad sampler dim");
105 }
106 }
107
108 static enum ac_image_dim
109 get_ac_image_dim(const struct ac_llvm_context *ctx, enum glsl_sampler_dim sdim,
110 bool is_array)
111 {
112 enum ac_image_dim dim = get_ac_sampler_dim(ctx, sdim, is_array);
113
114 if (dim == ac_image_cube ||
115 (ctx->chip_class <= VI && dim == ac_image_3d))
116 dim = ac_image_2darray;
117
118 return dim;
119 }
120
121 static LLVMTypeRef get_def_type(struct ac_nir_context *ctx,
122 const nir_ssa_def *def)
123 {
124 LLVMTypeRef type = LLVMIntTypeInContext(ctx->ac.context, def->bit_size);
125 if (def->num_components > 1) {
126 type = LLVMVectorType(type, def->num_components);
127 }
128 return type;
129 }
130
131 static LLVMValueRef get_src(struct ac_nir_context *nir, nir_src src)
132 {
133 assert(src.is_ssa);
134 return nir->ssa_defs[src.ssa->index];
135 }
136
137 static LLVMValueRef
138 get_memory_ptr(struct ac_nir_context *ctx, nir_src src)
139 {
140 LLVMValueRef ptr = get_src(ctx, src);
141 ptr = LLVMBuildGEP(ctx->ac.builder, ctx->ac.lds, &ptr, 1, "");
142 int addr_space = LLVMGetPointerAddressSpace(LLVMTypeOf(ptr));
143
144 return LLVMBuildBitCast(ctx->ac.builder, ptr,
145 LLVMPointerType(ctx->ac.i32, addr_space), "");
146 }
147
148 static LLVMBasicBlockRef get_block(struct ac_nir_context *nir,
149 const struct nir_block *b)
150 {
151 struct hash_entry *entry = _mesa_hash_table_search(nir->defs, b);
152 return (LLVMBasicBlockRef)entry->data;
153 }
154
155 static LLVMValueRef get_alu_src(struct ac_nir_context *ctx,
156 nir_alu_src src,
157 unsigned num_components)
158 {
159 LLVMValueRef value = get_src(ctx, src.src);
160 bool need_swizzle = false;
161
162 assert(value);
163 unsigned src_components = ac_get_llvm_num_components(value);
164 for (unsigned i = 0; i < num_components; ++i) {
165 assert(src.swizzle[i] < src_components);
166 if (src.swizzle[i] != i)
167 need_swizzle = true;
168 }
169
170 if (need_swizzle || num_components != src_components) {
171 LLVMValueRef masks[] = {
172 LLVMConstInt(ctx->ac.i32, src.swizzle[0], false),
173 LLVMConstInt(ctx->ac.i32, src.swizzle[1], false),
174 LLVMConstInt(ctx->ac.i32, src.swizzle[2], false),
175 LLVMConstInt(ctx->ac.i32, src.swizzle[3], false)};
176
177 if (src_components > 1 && num_components == 1) {
178 value = LLVMBuildExtractElement(ctx->ac.builder, value,
179 masks[0], "");
180 } else if (src_components == 1 && num_components > 1) {
181 LLVMValueRef values[] = {value, value, value, value};
182 value = ac_build_gather_values(&ctx->ac, values, num_components);
183 } else {
184 LLVMValueRef swizzle = LLVMConstVector(masks, num_components);
185 value = LLVMBuildShuffleVector(ctx->ac.builder, value, value,
186 swizzle, "");
187 }
188 }
189 assert(!src.negate);
190 assert(!src.abs);
191 return value;
192 }
193
194 static LLVMValueRef emit_int_cmp(struct ac_llvm_context *ctx,
195 LLVMIntPredicate pred, LLVMValueRef src0,
196 LLVMValueRef src1)
197 {
198 LLVMValueRef result = LLVMBuildICmp(ctx->builder, pred, src0, src1, "");
199 return LLVMBuildSelect(ctx->builder, result,
200 LLVMConstInt(ctx->i32, 0xFFFFFFFF, false),
201 ctx->i32_0, "");
202 }
203
204 static LLVMValueRef emit_float_cmp(struct ac_llvm_context *ctx,
205 LLVMRealPredicate pred, LLVMValueRef src0,
206 LLVMValueRef src1)
207 {
208 LLVMValueRef result;
209 src0 = ac_to_float(ctx, src0);
210 src1 = ac_to_float(ctx, src1);
211 result = LLVMBuildFCmp(ctx->builder, pred, src0, src1, "");
212 return LLVMBuildSelect(ctx->builder, result,
213 LLVMConstInt(ctx->i32, 0xFFFFFFFF, false),
214 ctx->i32_0, "");
215 }
216
217 static LLVMValueRef emit_intrin_1f_param(struct ac_llvm_context *ctx,
218 const char *intrin,
219 LLVMTypeRef result_type,
220 LLVMValueRef src0)
221 {
222 char name[64];
223 LLVMValueRef params[] = {
224 ac_to_float(ctx, src0),
225 };
226
227 MAYBE_UNUSED const int length = snprintf(name, sizeof(name), "%s.f%d", intrin,
228 ac_get_elem_bits(ctx, result_type));
229 assert(length < sizeof(name));
230 return ac_build_intrinsic(ctx, name, result_type, params, 1, AC_FUNC_ATTR_READNONE);
231 }
232
233 static LLVMValueRef emit_intrin_2f_param(struct ac_llvm_context *ctx,
234 const char *intrin,
235 LLVMTypeRef result_type,
236 LLVMValueRef src0, LLVMValueRef src1)
237 {
238 char name[64];
239 LLVMValueRef params[] = {
240 ac_to_float(ctx, src0),
241 ac_to_float(ctx, src1),
242 };
243
244 MAYBE_UNUSED const int length = snprintf(name, sizeof(name), "%s.f%d", intrin,
245 ac_get_elem_bits(ctx, result_type));
246 assert(length < sizeof(name));
247 return ac_build_intrinsic(ctx, name, result_type, params, 2, AC_FUNC_ATTR_READNONE);
248 }
249
250 static LLVMValueRef emit_intrin_3f_param(struct ac_llvm_context *ctx,
251 const char *intrin,
252 LLVMTypeRef result_type,
253 LLVMValueRef src0, LLVMValueRef src1, LLVMValueRef src2)
254 {
255 char name[64];
256 LLVMValueRef params[] = {
257 ac_to_float(ctx, src0),
258 ac_to_float(ctx, src1),
259 ac_to_float(ctx, src2),
260 };
261
262 MAYBE_UNUSED const int length = snprintf(name, sizeof(name), "%s.f%d", intrin,
263 ac_get_elem_bits(ctx, result_type));
264 assert(length < sizeof(name));
265 return ac_build_intrinsic(ctx, name, result_type, params, 3, AC_FUNC_ATTR_READNONE);
266 }
267
268 static LLVMValueRef emit_bcsel(struct ac_llvm_context *ctx,
269 LLVMValueRef src0, LLVMValueRef src1, LLVMValueRef src2)
270 {
271 assert(LLVMGetTypeKind(LLVMTypeOf(src0)) != LLVMVectorTypeKind);
272
273 LLVMValueRef v = LLVMBuildICmp(ctx->builder, LLVMIntNE, src0,
274 ctx->i32_0, "");
275 return LLVMBuildSelect(ctx->builder, v,
276 ac_to_integer_or_pointer(ctx, src1),
277 ac_to_integer_or_pointer(ctx, src2), "");
278 }
279
280 static LLVMValueRef emit_minmax_int(struct ac_llvm_context *ctx,
281 LLVMIntPredicate pred,
282 LLVMValueRef src0, LLVMValueRef src1)
283 {
284 return LLVMBuildSelect(ctx->builder,
285 LLVMBuildICmp(ctx->builder, pred, src0, src1, ""),
286 src0,
287 src1, "");
288
289 }
290 static LLVMValueRef emit_iabs(struct ac_llvm_context *ctx,
291 LLVMValueRef src0)
292 {
293 return emit_minmax_int(ctx, LLVMIntSGT, src0,
294 LLVMBuildNeg(ctx->builder, src0, ""));
295 }
296
297 static LLVMValueRef emit_uint_carry(struct ac_llvm_context *ctx,
298 const char *intrin,
299 LLVMValueRef src0, LLVMValueRef src1)
300 {
301 LLVMTypeRef ret_type;
302 LLVMTypeRef types[] = { ctx->i32, ctx->i1 };
303 LLVMValueRef res;
304 LLVMValueRef params[] = { src0, src1 };
305 ret_type = LLVMStructTypeInContext(ctx->context, types,
306 2, true);
307
308 res = ac_build_intrinsic(ctx, intrin, ret_type,
309 params, 2, AC_FUNC_ATTR_READNONE);
310
311 res = LLVMBuildExtractValue(ctx->builder, res, 1, "");
312 res = LLVMBuildZExt(ctx->builder, res, ctx->i32, "");
313 return res;
314 }
315
316 static LLVMValueRef emit_b2f(struct ac_llvm_context *ctx,
317 LLVMValueRef src0,
318 unsigned bitsize)
319 {
320 LLVMValueRef result = LLVMBuildAnd(ctx->builder, src0,
321 LLVMBuildBitCast(ctx->builder, LLVMConstReal(ctx->f32, 1.0), ctx->i32, ""),
322 "");
323 result = LLVMBuildBitCast(ctx->builder, result, ctx->f32, "");
324
325 switch (bitsize) {
326 case 16:
327 return LLVMBuildFPTrunc(ctx->builder, result, ctx->f16, "");
328 case 32:
329 return result;
330 case 64:
331 return LLVMBuildFPExt(ctx->builder, result, ctx->f64, "");
332 default:
333 unreachable("Unsupported bit size.");
334 }
335 }
336
337 static LLVMValueRef emit_f2b(struct ac_llvm_context *ctx,
338 LLVMValueRef src0)
339 {
340 src0 = ac_to_float(ctx, src0);
341 LLVMValueRef zero = LLVMConstNull(LLVMTypeOf(src0));
342 return LLVMBuildSExt(ctx->builder,
343 LLVMBuildFCmp(ctx->builder, LLVMRealUNE, src0, zero, ""),
344 ctx->i32, "");
345 }
346
347 static LLVMValueRef emit_b2i(struct ac_llvm_context *ctx,
348 LLVMValueRef src0,
349 unsigned bitsize)
350 {
351 LLVMValueRef result = LLVMBuildAnd(ctx->builder, src0, ctx->i32_1, "");
352
353 switch (bitsize) {
354 case 8:
355 return LLVMBuildTrunc(ctx->builder, result, ctx->i8, "");
356 case 16:
357 return LLVMBuildTrunc(ctx->builder, result, ctx->i16, "");
358 case 32:
359 return result;
360 case 64:
361 return LLVMBuildZExt(ctx->builder, result, ctx->i64, "");
362 default:
363 unreachable("Unsupported bit size.");
364 }
365 }
366
367 static LLVMValueRef emit_i2b(struct ac_llvm_context *ctx,
368 LLVMValueRef src0)
369 {
370 LLVMValueRef zero = LLVMConstNull(LLVMTypeOf(src0));
371 return LLVMBuildSExt(ctx->builder,
372 LLVMBuildICmp(ctx->builder, LLVMIntNE, src0, zero, ""),
373 ctx->i32, "");
374 }
375
376 static LLVMValueRef emit_f2f16(struct ac_llvm_context *ctx,
377 LLVMValueRef src0)
378 {
379 LLVMValueRef result;
380 LLVMValueRef cond = NULL;
381
382 src0 = ac_to_float(ctx, src0);
383 result = LLVMBuildFPTrunc(ctx->builder, src0, ctx->f16, "");
384
385 if (ctx->chip_class >= VI) {
386 LLVMValueRef args[2];
387 /* Check if the result is a denormal - and flush to 0 if so. */
388 args[0] = result;
389 args[1] = LLVMConstInt(ctx->i32, N_SUBNORMAL | P_SUBNORMAL, false);
390 cond = ac_build_intrinsic(ctx, "llvm.amdgcn.class.f16", ctx->i1, args, 2, AC_FUNC_ATTR_READNONE);
391 }
392
393 /* need to convert back up to f32 */
394 result = LLVMBuildFPExt(ctx->builder, result, ctx->f32, "");
395
396 if (ctx->chip_class >= VI)
397 result = LLVMBuildSelect(ctx->builder, cond, ctx->f32_0, result, "");
398 else {
399 /* for SI/CIK */
400 /* 0x38800000 is smallest half float value (2^-14) in 32-bit float,
401 * so compare the result and flush to 0 if it's smaller.
402 */
403 LLVMValueRef temp, cond2;
404 temp = emit_intrin_1f_param(ctx, "llvm.fabs", ctx->f32, result);
405 cond = LLVMBuildFCmp(ctx->builder, LLVMRealUGT,
406 LLVMBuildBitCast(ctx->builder, LLVMConstInt(ctx->i32, 0x38800000, false), ctx->f32, ""),
407 temp, "");
408 cond2 = LLVMBuildFCmp(ctx->builder, LLVMRealUNE,
409 temp, ctx->f32_0, "");
410 cond = LLVMBuildAnd(ctx->builder, cond, cond2, "");
411 result = LLVMBuildSelect(ctx->builder, cond, ctx->f32_0, result, "");
412 }
413 return result;
414 }
415
416 static LLVMValueRef emit_umul_high(struct ac_llvm_context *ctx,
417 LLVMValueRef src0, LLVMValueRef src1)
418 {
419 LLVMValueRef dst64, result;
420 src0 = LLVMBuildZExt(ctx->builder, src0, ctx->i64, "");
421 src1 = LLVMBuildZExt(ctx->builder, src1, ctx->i64, "");
422
423 dst64 = LLVMBuildMul(ctx->builder, src0, src1, "");
424 dst64 = LLVMBuildLShr(ctx->builder, dst64, LLVMConstInt(ctx->i64, 32, false), "");
425 result = LLVMBuildTrunc(ctx->builder, dst64, ctx->i32, "");
426 return result;
427 }
428
429 static LLVMValueRef emit_imul_high(struct ac_llvm_context *ctx,
430 LLVMValueRef src0, LLVMValueRef src1)
431 {
432 LLVMValueRef dst64, result;
433 src0 = LLVMBuildSExt(ctx->builder, src0, ctx->i64, "");
434 src1 = LLVMBuildSExt(ctx->builder, src1, ctx->i64, "");
435
436 dst64 = LLVMBuildMul(ctx->builder, src0, src1, "");
437 dst64 = LLVMBuildAShr(ctx->builder, dst64, LLVMConstInt(ctx->i64, 32, false), "");
438 result = LLVMBuildTrunc(ctx->builder, dst64, ctx->i32, "");
439 return result;
440 }
441
442 static LLVMValueRef emit_bitfield_extract(struct ac_llvm_context *ctx,
443 bool is_signed,
444 const LLVMValueRef srcs[3])
445 {
446 LLVMValueRef result;
447
448 if (HAVE_LLVM >= 0x0800) {
449 LLVMValueRef icond = LLVMBuildICmp(ctx->builder, LLVMIntEQ, srcs[2], LLVMConstInt(ctx->i32, 32, false), "");
450 result = ac_build_bfe(ctx, srcs[0], srcs[1], srcs[2], is_signed);
451 result = LLVMBuildSelect(ctx->builder, icond, srcs[0], result, "");
452 } else {
453 /* FIXME: LLVM 7+ returns incorrect result when count is 0.
454 * https://bugs.freedesktop.org/show_bug.cgi?id=107276
455 */
456 LLVMValueRef zero = ctx->i32_0;
457 LLVMValueRef icond1 = LLVMBuildICmp(ctx->builder, LLVMIntEQ, srcs[2], LLVMConstInt(ctx->i32, 32, false), "");
458 LLVMValueRef icond2 = LLVMBuildICmp(ctx->builder, LLVMIntEQ, srcs[2], zero, "");
459
460 result = ac_build_bfe(ctx, srcs[0], srcs[1], srcs[2], is_signed);
461 result = LLVMBuildSelect(ctx->builder, icond1, srcs[0], result, "");
462 result = LLVMBuildSelect(ctx->builder, icond2, zero, result, "");
463 }
464
465 return result;
466 }
467
468 static LLVMValueRef emit_bitfield_insert(struct ac_llvm_context *ctx,
469 LLVMValueRef src0, LLVMValueRef src1,
470 LLVMValueRef src2, LLVMValueRef src3)
471 {
472 LLVMValueRef bfi_args[3], result;
473
474 bfi_args[0] = LLVMBuildShl(ctx->builder,
475 LLVMBuildSub(ctx->builder,
476 LLVMBuildShl(ctx->builder,
477 ctx->i32_1,
478 src3, ""),
479 ctx->i32_1, ""),
480 src2, "");
481 bfi_args[1] = LLVMBuildShl(ctx->builder, src1, src2, "");
482 bfi_args[2] = src0;
483
484 LLVMValueRef icond = LLVMBuildICmp(ctx->builder, LLVMIntEQ, src3, LLVMConstInt(ctx->i32, 32, false), "");
485
486 /* Calculate:
487 * (arg0 & arg1) | (~arg0 & arg2) = arg2 ^ (arg0 & (arg1 ^ arg2)
488 * Use the right-hand side, which the LLVM backend can convert to V_BFI.
489 */
490 result = LLVMBuildXor(ctx->builder, bfi_args[2],
491 LLVMBuildAnd(ctx->builder, bfi_args[0],
492 LLVMBuildXor(ctx->builder, bfi_args[1], bfi_args[2], ""), ""), "");
493
494 result = LLVMBuildSelect(ctx->builder, icond, src1, result, "");
495 return result;
496 }
497
498 static LLVMValueRef emit_pack_half_2x16(struct ac_llvm_context *ctx,
499 LLVMValueRef src0)
500 {
501 LLVMValueRef comp[2];
502
503 src0 = ac_to_float(ctx, src0);
504 comp[0] = LLVMBuildExtractElement(ctx->builder, src0, ctx->i32_0, "");
505 comp[1] = LLVMBuildExtractElement(ctx->builder, src0, ctx->i32_1, "");
506
507 return LLVMBuildBitCast(ctx->builder, ac_build_cvt_pkrtz_f16(ctx, comp),
508 ctx->i32, "");
509 }
510
511 static LLVMValueRef emit_unpack_half_2x16(struct ac_llvm_context *ctx,
512 LLVMValueRef src0)
513 {
514 LLVMValueRef const16 = LLVMConstInt(ctx->i32, 16, false);
515 LLVMValueRef temps[2], val;
516 int i;
517
518 for (i = 0; i < 2; i++) {
519 val = i == 1 ? LLVMBuildLShr(ctx->builder, src0, const16, "") : src0;
520 val = LLVMBuildTrunc(ctx->builder, val, ctx->i16, "");
521 val = LLVMBuildBitCast(ctx->builder, val, ctx->f16, "");
522 temps[i] = LLVMBuildFPExt(ctx->builder, val, ctx->f32, "");
523 }
524 return ac_build_gather_values(ctx, temps, 2);
525 }
526
527 static LLVMValueRef emit_ddxy(struct ac_nir_context *ctx,
528 nir_op op,
529 LLVMValueRef src0)
530 {
531 unsigned mask;
532 int idx;
533 LLVMValueRef result;
534
535 if (op == nir_op_fddx_fine)
536 mask = AC_TID_MASK_LEFT;
537 else if (op == nir_op_fddy_fine)
538 mask = AC_TID_MASK_TOP;
539 else
540 mask = AC_TID_MASK_TOP_LEFT;
541
542 /* for DDX we want to next X pixel, DDY next Y pixel. */
543 if (op == nir_op_fddx_fine ||
544 op == nir_op_fddx_coarse ||
545 op == nir_op_fddx)
546 idx = 1;
547 else
548 idx = 2;
549
550 result = ac_build_ddxy(&ctx->ac, mask, idx, src0);
551 return result;
552 }
553
554 /*
555 * this takes an I,J coordinate pair,
556 * and works out the X and Y derivatives.
557 * it returns DDX(I), DDX(J), DDY(I), DDY(J).
558 */
559 static LLVMValueRef emit_ddxy_interp(
560 struct ac_nir_context *ctx,
561 LLVMValueRef interp_ij)
562 {
563 LLVMValueRef result[4], a;
564 unsigned i;
565
566 for (i = 0; i < 2; i++) {
567 a = LLVMBuildExtractElement(ctx->ac.builder, interp_ij,
568 LLVMConstInt(ctx->ac.i32, i, false), "");
569 result[i] = emit_ddxy(ctx, nir_op_fddx, a);
570 result[2+i] = emit_ddxy(ctx, nir_op_fddy, a);
571 }
572 return ac_build_gather_values(&ctx->ac, result, 4);
573 }
574
575 static void visit_alu(struct ac_nir_context *ctx, const nir_alu_instr *instr)
576 {
577 LLVMValueRef src[4], result = NULL;
578 unsigned num_components = instr->dest.dest.ssa.num_components;
579 unsigned src_components;
580 LLVMTypeRef def_type = get_def_type(ctx, &instr->dest.dest.ssa);
581
582 assert(nir_op_infos[instr->op].num_inputs <= ARRAY_SIZE(src));
583 switch (instr->op) {
584 case nir_op_vec2:
585 case nir_op_vec3:
586 case nir_op_vec4:
587 src_components = 1;
588 break;
589 case nir_op_pack_half_2x16:
590 src_components = 2;
591 break;
592 case nir_op_unpack_half_2x16:
593 src_components = 1;
594 break;
595 case nir_op_cube_face_coord:
596 case nir_op_cube_face_index:
597 src_components = 3;
598 break;
599 default:
600 src_components = num_components;
601 break;
602 }
603 for (unsigned i = 0; i < nir_op_infos[instr->op].num_inputs; i++)
604 src[i] = get_alu_src(ctx, instr->src[i], src_components);
605
606 switch (instr->op) {
607 case nir_op_fmov:
608 case nir_op_imov:
609 result = src[0];
610 break;
611 case nir_op_fneg:
612 src[0] = ac_to_float(&ctx->ac, src[0]);
613 result = LLVMBuildFNeg(ctx->ac.builder, src[0], "");
614 break;
615 case nir_op_ineg:
616 result = LLVMBuildNeg(ctx->ac.builder, src[0], "");
617 break;
618 case nir_op_inot:
619 result = LLVMBuildNot(ctx->ac.builder, src[0], "");
620 break;
621 case nir_op_iadd:
622 result = LLVMBuildAdd(ctx->ac.builder, src[0], src[1], "");
623 break;
624 case nir_op_fadd:
625 src[0] = ac_to_float(&ctx->ac, src[0]);
626 src[1] = ac_to_float(&ctx->ac, src[1]);
627 result = LLVMBuildFAdd(ctx->ac.builder, src[0], src[1], "");
628 break;
629 case nir_op_fsub:
630 src[0] = ac_to_float(&ctx->ac, src[0]);
631 src[1] = ac_to_float(&ctx->ac, src[1]);
632 result = LLVMBuildFSub(ctx->ac.builder, src[0], src[1], "");
633 break;
634 case nir_op_isub:
635 result = LLVMBuildSub(ctx->ac.builder, src[0], src[1], "");
636 break;
637 case nir_op_imul:
638 result = LLVMBuildMul(ctx->ac.builder, src[0], src[1], "");
639 break;
640 case nir_op_imod:
641 result = LLVMBuildSRem(ctx->ac.builder, src[0], src[1], "");
642 break;
643 case nir_op_umod:
644 result = LLVMBuildURem(ctx->ac.builder, src[0], src[1], "");
645 break;
646 case nir_op_fmod:
647 src[0] = ac_to_float(&ctx->ac, src[0]);
648 src[1] = ac_to_float(&ctx->ac, src[1]);
649 result = ac_build_fdiv(&ctx->ac, src[0], src[1]);
650 result = emit_intrin_1f_param(&ctx->ac, "llvm.floor",
651 ac_to_float_type(&ctx->ac, def_type), result);
652 result = LLVMBuildFMul(ctx->ac.builder, src[1] , result, "");
653 result = LLVMBuildFSub(ctx->ac.builder, src[0], result, "");
654 break;
655 case nir_op_frem:
656 src[0] = ac_to_float(&ctx->ac, src[0]);
657 src[1] = ac_to_float(&ctx->ac, src[1]);
658 result = LLVMBuildFRem(ctx->ac.builder, src[0], src[1], "");
659 break;
660 case nir_op_irem:
661 result = LLVMBuildSRem(ctx->ac.builder, src[0], src[1], "");
662 break;
663 case nir_op_idiv:
664 result = LLVMBuildSDiv(ctx->ac.builder, src[0], src[1], "");
665 break;
666 case nir_op_udiv:
667 result = LLVMBuildUDiv(ctx->ac.builder, src[0], src[1], "");
668 break;
669 case nir_op_fmul:
670 src[0] = ac_to_float(&ctx->ac, src[0]);
671 src[1] = ac_to_float(&ctx->ac, src[1]);
672 result = LLVMBuildFMul(ctx->ac.builder, src[0], src[1], "");
673 break;
674 case nir_op_frcp:
675 src[0] = ac_to_float(&ctx->ac, src[0]);
676 result = ac_build_fdiv(&ctx->ac, LLVMConstReal(LLVMTypeOf(src[0]), 1.0), src[0]);
677 break;
678 case nir_op_iand:
679 result = LLVMBuildAnd(ctx->ac.builder, src[0], src[1], "");
680 break;
681 case nir_op_ior:
682 result = LLVMBuildOr(ctx->ac.builder, src[0], src[1], "");
683 break;
684 case nir_op_ixor:
685 result = LLVMBuildXor(ctx->ac.builder, src[0], src[1], "");
686 break;
687 case nir_op_ishl:
688 if (ac_get_elem_bits(&ctx->ac, LLVMTypeOf(src[1])) < ac_get_elem_bits(&ctx->ac, LLVMTypeOf(src[0])))
689 src[1] = LLVMBuildZExt(ctx->ac.builder, src[1],
690 LLVMTypeOf(src[0]), "");
691 else if (ac_get_elem_bits(&ctx->ac, LLVMTypeOf(src[1])) > ac_get_elem_bits(&ctx->ac, LLVMTypeOf(src[0])))
692 src[1] = LLVMBuildTrunc(ctx->ac.builder, src[1],
693 LLVMTypeOf(src[0]), "");
694 result = LLVMBuildShl(ctx->ac.builder, src[0], src[1], "");
695 break;
696 case nir_op_ishr:
697 if (ac_get_elem_bits(&ctx->ac, LLVMTypeOf(src[1])) < ac_get_elem_bits(&ctx->ac, LLVMTypeOf(src[0])))
698 src[1] = LLVMBuildZExt(ctx->ac.builder, src[1],
699 LLVMTypeOf(src[0]), "");
700 else if (ac_get_elem_bits(&ctx->ac, LLVMTypeOf(src[1])) > ac_get_elem_bits(&ctx->ac, LLVMTypeOf(src[0])))
701 src[1] = LLVMBuildTrunc(ctx->ac.builder, src[1],
702 LLVMTypeOf(src[0]), "");
703 result = LLVMBuildAShr(ctx->ac.builder, src[0], src[1], "");
704 break;
705 case nir_op_ushr:
706 if (ac_get_elem_bits(&ctx->ac, LLVMTypeOf(src[1])) < ac_get_elem_bits(&ctx->ac, LLVMTypeOf(src[0])))
707 src[1] = LLVMBuildZExt(ctx->ac.builder, src[1],
708 LLVMTypeOf(src[0]), "");
709 else if (ac_get_elem_bits(&ctx->ac, LLVMTypeOf(src[1])) > ac_get_elem_bits(&ctx->ac, LLVMTypeOf(src[0])))
710 src[1] = LLVMBuildTrunc(ctx->ac.builder, src[1],
711 LLVMTypeOf(src[0]), "");
712 result = LLVMBuildLShr(ctx->ac.builder, src[0], src[1], "");
713 break;
714 case nir_op_ilt32:
715 result = emit_int_cmp(&ctx->ac, LLVMIntSLT, src[0], src[1]);
716 break;
717 case nir_op_ine32:
718 result = emit_int_cmp(&ctx->ac, LLVMIntNE, src[0], src[1]);
719 break;
720 case nir_op_ieq32:
721 result = emit_int_cmp(&ctx->ac, LLVMIntEQ, src[0], src[1]);
722 break;
723 case nir_op_ige32:
724 result = emit_int_cmp(&ctx->ac, LLVMIntSGE, src[0], src[1]);
725 break;
726 case nir_op_ult32:
727 result = emit_int_cmp(&ctx->ac, LLVMIntULT, src[0], src[1]);
728 break;
729 case nir_op_uge32:
730 result = emit_int_cmp(&ctx->ac, LLVMIntUGE, src[0], src[1]);
731 break;
732 case nir_op_feq32:
733 result = emit_float_cmp(&ctx->ac, LLVMRealOEQ, src[0], src[1]);
734 break;
735 case nir_op_fne32:
736 result = emit_float_cmp(&ctx->ac, LLVMRealUNE, src[0], src[1]);
737 break;
738 case nir_op_flt32:
739 result = emit_float_cmp(&ctx->ac, LLVMRealOLT, src[0], src[1]);
740 break;
741 case nir_op_fge32:
742 result = emit_float_cmp(&ctx->ac, LLVMRealOGE, src[0], src[1]);
743 break;
744 case nir_op_fabs:
745 result = emit_intrin_1f_param(&ctx->ac, "llvm.fabs",
746 ac_to_float_type(&ctx->ac, def_type), src[0]);
747 break;
748 case nir_op_iabs:
749 result = emit_iabs(&ctx->ac, src[0]);
750 break;
751 case nir_op_imax:
752 result = emit_minmax_int(&ctx->ac, LLVMIntSGT, src[0], src[1]);
753 break;
754 case nir_op_imin:
755 result = emit_minmax_int(&ctx->ac, LLVMIntSLT, src[0], src[1]);
756 break;
757 case nir_op_umax:
758 result = emit_minmax_int(&ctx->ac, LLVMIntUGT, src[0], src[1]);
759 break;
760 case nir_op_umin:
761 result = emit_minmax_int(&ctx->ac, LLVMIntULT, src[0], src[1]);
762 break;
763 case nir_op_isign:
764 result = ac_build_isign(&ctx->ac, src[0],
765 instr->dest.dest.ssa.bit_size);
766 break;
767 case nir_op_fsign:
768 src[0] = ac_to_float(&ctx->ac, src[0]);
769 result = ac_build_fsign(&ctx->ac, src[0],
770 instr->dest.dest.ssa.bit_size);
771 break;
772 case nir_op_ffloor:
773 result = emit_intrin_1f_param(&ctx->ac, "llvm.floor",
774 ac_to_float_type(&ctx->ac, def_type), src[0]);
775 break;
776 case nir_op_ftrunc:
777 result = emit_intrin_1f_param(&ctx->ac, "llvm.trunc",
778 ac_to_float_type(&ctx->ac, def_type), src[0]);
779 break;
780 case nir_op_fceil:
781 result = emit_intrin_1f_param(&ctx->ac, "llvm.ceil",
782 ac_to_float_type(&ctx->ac, def_type), src[0]);
783 break;
784 case nir_op_fround_even:
785 result = emit_intrin_1f_param(&ctx->ac, "llvm.rint",
786 ac_to_float_type(&ctx->ac, def_type),src[0]);
787 break;
788 case nir_op_ffract:
789 src[0] = ac_to_float(&ctx->ac, src[0]);
790 result = ac_build_fract(&ctx->ac, src[0],
791 instr->dest.dest.ssa.bit_size);
792 break;
793 case nir_op_fsin:
794 result = emit_intrin_1f_param(&ctx->ac, "llvm.sin",
795 ac_to_float_type(&ctx->ac, def_type), src[0]);
796 break;
797 case nir_op_fcos:
798 result = emit_intrin_1f_param(&ctx->ac, "llvm.cos",
799 ac_to_float_type(&ctx->ac, def_type), src[0]);
800 break;
801 case nir_op_fsqrt:
802 result = emit_intrin_1f_param(&ctx->ac, "llvm.sqrt",
803 ac_to_float_type(&ctx->ac, def_type), src[0]);
804 break;
805 case nir_op_fexp2:
806 result = emit_intrin_1f_param(&ctx->ac, "llvm.exp2",
807 ac_to_float_type(&ctx->ac, def_type), src[0]);
808 break;
809 case nir_op_flog2:
810 result = emit_intrin_1f_param(&ctx->ac, "llvm.log2",
811 ac_to_float_type(&ctx->ac, def_type), src[0]);
812 break;
813 case nir_op_frsq:
814 result = emit_intrin_1f_param(&ctx->ac, "llvm.sqrt",
815 ac_to_float_type(&ctx->ac, def_type), src[0]);
816 result = ac_build_fdiv(&ctx->ac, LLVMConstReal(LLVMTypeOf(result), 1.0), result);
817 break;
818 case nir_op_frexp_exp:
819 src[0] = ac_to_float(&ctx->ac, src[0]);
820 result = ac_build_frexp_exp(&ctx->ac, src[0],
821 ac_get_elem_bits(&ctx->ac, LLVMTypeOf(src[0])));
822 if (ac_get_elem_bits(&ctx->ac, LLVMTypeOf(src[0])) == 16)
823 result = LLVMBuildSExt(ctx->ac.builder, result,
824 ctx->ac.i32, "");
825 break;
826 case nir_op_frexp_sig:
827 src[0] = ac_to_float(&ctx->ac, src[0]);
828 result = ac_build_frexp_mant(&ctx->ac, src[0],
829 instr->dest.dest.ssa.bit_size);
830 break;
831 case nir_op_fpow:
832 result = emit_intrin_2f_param(&ctx->ac, "llvm.pow",
833 ac_to_float_type(&ctx->ac, def_type), src[0], src[1]);
834 break;
835 case nir_op_fmax:
836 result = emit_intrin_2f_param(&ctx->ac, "llvm.maxnum",
837 ac_to_float_type(&ctx->ac, def_type), src[0], src[1]);
838 if (ctx->ac.chip_class < GFX9 &&
839 instr->dest.dest.ssa.bit_size == 32) {
840 /* Only pre-GFX9 chips do not flush denorms. */
841 result = emit_intrin_1f_param(&ctx->ac, "llvm.canonicalize",
842 ac_to_float_type(&ctx->ac, def_type),
843 result);
844 }
845 break;
846 case nir_op_fmin:
847 result = emit_intrin_2f_param(&ctx->ac, "llvm.minnum",
848 ac_to_float_type(&ctx->ac, def_type), src[0], src[1]);
849 if (ctx->ac.chip_class < GFX9 &&
850 instr->dest.dest.ssa.bit_size == 32) {
851 /* Only pre-GFX9 chips do not flush denorms. */
852 result = emit_intrin_1f_param(&ctx->ac, "llvm.canonicalize",
853 ac_to_float_type(&ctx->ac, def_type),
854 result);
855 }
856 break;
857 case nir_op_ffma:
858 result = emit_intrin_3f_param(&ctx->ac, "llvm.fmuladd",
859 ac_to_float_type(&ctx->ac, def_type), src[0], src[1], src[2]);
860 break;
861 case nir_op_ldexp:
862 src[0] = ac_to_float(&ctx->ac, src[0]);
863 if (ac_get_elem_bits(&ctx->ac, def_type) == 32)
864 result = ac_build_intrinsic(&ctx->ac, "llvm.amdgcn.ldexp.f32", ctx->ac.f32, src, 2, AC_FUNC_ATTR_READNONE);
865 else if (ac_get_elem_bits(&ctx->ac, def_type) == 16)
866 result = ac_build_intrinsic(&ctx->ac, "llvm.amdgcn.ldexp.f16", ctx->ac.f16, src, 2, AC_FUNC_ATTR_READNONE);
867 else
868 result = ac_build_intrinsic(&ctx->ac, "llvm.amdgcn.ldexp.f64", ctx->ac.f64, src, 2, AC_FUNC_ATTR_READNONE);
869 break;
870 case nir_op_ibitfield_extract:
871 result = emit_bitfield_extract(&ctx->ac, true, src);
872 break;
873 case nir_op_ubitfield_extract:
874 result = emit_bitfield_extract(&ctx->ac, false, src);
875 break;
876 case nir_op_bitfield_insert:
877 result = emit_bitfield_insert(&ctx->ac, src[0], src[1], src[2], src[3]);
878 break;
879 case nir_op_bitfield_reverse:
880 result = ac_build_bitfield_reverse(&ctx->ac, src[0]);
881 break;
882 case nir_op_bit_count:
883 result = ac_build_bit_count(&ctx->ac, src[0]);
884 break;
885 case nir_op_vec2:
886 case nir_op_vec3:
887 case nir_op_vec4:
888 for (unsigned i = 0; i < nir_op_infos[instr->op].num_inputs; i++)
889 src[i] = ac_to_integer(&ctx->ac, src[i]);
890 result = ac_build_gather_values(&ctx->ac, src, num_components);
891 break;
892 case nir_op_f2i8:
893 case nir_op_f2i16:
894 case nir_op_f2i32:
895 case nir_op_f2i64:
896 src[0] = ac_to_float(&ctx->ac, src[0]);
897 result = LLVMBuildFPToSI(ctx->ac.builder, src[0], def_type, "");
898 break;
899 case nir_op_f2u8:
900 case nir_op_f2u16:
901 case nir_op_f2u32:
902 case nir_op_f2u64:
903 src[0] = ac_to_float(&ctx->ac, src[0]);
904 result = LLVMBuildFPToUI(ctx->ac.builder, src[0], def_type, "");
905 break;
906 case nir_op_i2f16:
907 case nir_op_i2f32:
908 case nir_op_i2f64:
909 src[0] = ac_to_integer(&ctx->ac, src[0]);
910 result = LLVMBuildSIToFP(ctx->ac.builder, src[0], ac_to_float_type(&ctx->ac, def_type), "");
911 break;
912 case nir_op_u2f16:
913 case nir_op_u2f32:
914 case nir_op_u2f64:
915 src[0] = ac_to_integer(&ctx->ac, src[0]);
916 result = LLVMBuildUIToFP(ctx->ac.builder, src[0], ac_to_float_type(&ctx->ac, def_type), "");
917 break;
918 case nir_op_f2f16_rtz:
919 src[0] = ac_to_float(&ctx->ac, src[0]);
920 if (LLVMTypeOf(src[0]) == ctx->ac.f64)
921 src[0] = LLVMBuildFPTrunc(ctx->ac.builder, src[0], ctx->ac.f32, "");
922 LLVMValueRef param[2] = { src[0], ctx->ac.f32_0 };
923 result = ac_build_cvt_pkrtz_f16(&ctx->ac, param);
924 result = LLVMBuildExtractElement(ctx->ac.builder, result, ctx->ac.i32_0, "");
925 break;
926 case nir_op_f2f16_rtne:
927 case nir_op_f2f16:
928 case nir_op_f2f32:
929 case nir_op_f2f64:
930 src[0] = ac_to_float(&ctx->ac, src[0]);
931 if (ac_get_elem_bits(&ctx->ac, LLVMTypeOf(src[0])) < ac_get_elem_bits(&ctx->ac, def_type))
932 result = LLVMBuildFPExt(ctx->ac.builder, src[0], ac_to_float_type(&ctx->ac, def_type), "");
933 else
934 result = LLVMBuildFPTrunc(ctx->ac.builder, src[0], ac_to_float_type(&ctx->ac, def_type), "");
935 break;
936 case nir_op_u2u8:
937 case nir_op_u2u16:
938 case nir_op_u2u32:
939 case nir_op_u2u64:
940 src[0] = ac_to_integer(&ctx->ac, src[0]);
941 if (ac_get_elem_bits(&ctx->ac, LLVMTypeOf(src[0])) < ac_get_elem_bits(&ctx->ac, def_type))
942 result = LLVMBuildZExt(ctx->ac.builder, src[0], def_type, "");
943 else
944 result = LLVMBuildTrunc(ctx->ac.builder, src[0], def_type, "");
945 break;
946 case nir_op_i2i8:
947 case nir_op_i2i16:
948 case nir_op_i2i32:
949 case nir_op_i2i64:
950 src[0] = ac_to_integer(&ctx->ac, src[0]);
951 if (ac_get_elem_bits(&ctx->ac, LLVMTypeOf(src[0])) < ac_get_elem_bits(&ctx->ac, def_type))
952 result = LLVMBuildSExt(ctx->ac.builder, src[0], def_type, "");
953 else
954 result = LLVMBuildTrunc(ctx->ac.builder, src[0], def_type, "");
955 break;
956 case nir_op_b32csel:
957 result = emit_bcsel(&ctx->ac, src[0], src[1], src[2]);
958 break;
959 case nir_op_find_lsb:
960 src[0] = ac_to_integer(&ctx->ac, src[0]);
961 result = ac_find_lsb(&ctx->ac, ctx->ac.i32, src[0]);
962 break;
963 case nir_op_ufind_msb:
964 src[0] = ac_to_integer(&ctx->ac, src[0]);
965 result = ac_build_umsb(&ctx->ac, src[0], ctx->ac.i32);
966 break;
967 case nir_op_ifind_msb:
968 src[0] = ac_to_integer(&ctx->ac, src[0]);
969 result = ac_build_imsb(&ctx->ac, src[0], ctx->ac.i32);
970 break;
971 case nir_op_uadd_carry:
972 src[0] = ac_to_integer(&ctx->ac, src[0]);
973 src[1] = ac_to_integer(&ctx->ac, src[1]);
974 result = emit_uint_carry(&ctx->ac, "llvm.uadd.with.overflow.i32", src[0], src[1]);
975 break;
976 case nir_op_usub_borrow:
977 src[0] = ac_to_integer(&ctx->ac, src[0]);
978 src[1] = ac_to_integer(&ctx->ac, src[1]);
979 result = emit_uint_carry(&ctx->ac, "llvm.usub.with.overflow.i32", src[0], src[1]);
980 break;
981 case nir_op_b2f16:
982 case nir_op_b2f32:
983 case nir_op_b2f64:
984 result = emit_b2f(&ctx->ac, src[0], instr->dest.dest.ssa.bit_size);
985 break;
986 case nir_op_f2b32:
987 result = emit_f2b(&ctx->ac, src[0]);
988 break;
989 case nir_op_b2i8:
990 case nir_op_b2i16:
991 case nir_op_b2i32:
992 case nir_op_b2i64:
993 result = emit_b2i(&ctx->ac, src[0], instr->dest.dest.ssa.bit_size);
994 break;
995 case nir_op_i2b32:
996 src[0] = ac_to_integer(&ctx->ac, src[0]);
997 result = emit_i2b(&ctx->ac, src[0]);
998 break;
999 case nir_op_fquantize2f16:
1000 result = emit_f2f16(&ctx->ac, src[0]);
1001 break;
1002 case nir_op_umul_high:
1003 src[0] = ac_to_integer(&ctx->ac, src[0]);
1004 src[1] = ac_to_integer(&ctx->ac, src[1]);
1005 result = emit_umul_high(&ctx->ac, src[0], src[1]);
1006 break;
1007 case nir_op_imul_high:
1008 src[0] = ac_to_integer(&ctx->ac, src[0]);
1009 src[1] = ac_to_integer(&ctx->ac, src[1]);
1010 result = emit_imul_high(&ctx->ac, src[0], src[1]);
1011 break;
1012 case nir_op_pack_half_2x16:
1013 result = emit_pack_half_2x16(&ctx->ac, src[0]);
1014 break;
1015 case nir_op_unpack_half_2x16:
1016 result = emit_unpack_half_2x16(&ctx->ac, src[0]);
1017 break;
1018 case nir_op_fddx:
1019 case nir_op_fddy:
1020 case nir_op_fddx_fine:
1021 case nir_op_fddy_fine:
1022 case nir_op_fddx_coarse:
1023 case nir_op_fddy_coarse:
1024 result = emit_ddxy(ctx, instr->op, src[0]);
1025 break;
1026
1027 case nir_op_unpack_64_2x32_split_x: {
1028 assert(ac_get_llvm_num_components(src[0]) == 1);
1029 LLVMValueRef tmp = LLVMBuildBitCast(ctx->ac.builder, src[0],
1030 ctx->ac.v2i32,
1031 "");
1032 result = LLVMBuildExtractElement(ctx->ac.builder, tmp,
1033 ctx->ac.i32_0, "");
1034 break;
1035 }
1036
1037 case nir_op_unpack_64_2x32_split_y: {
1038 assert(ac_get_llvm_num_components(src[0]) == 1);
1039 LLVMValueRef tmp = LLVMBuildBitCast(ctx->ac.builder, src[0],
1040 ctx->ac.v2i32,
1041 "");
1042 result = LLVMBuildExtractElement(ctx->ac.builder, tmp,
1043 ctx->ac.i32_1, "");
1044 break;
1045 }
1046
1047 case nir_op_pack_64_2x32_split: {
1048 LLVMValueRef tmp = LLVMGetUndef(ctx->ac.v2i32);
1049 tmp = ac_build_gather_values(&ctx->ac, src, 2);
1050 result = LLVMBuildBitCast(ctx->ac.builder, tmp, ctx->ac.i64, "");
1051 break;
1052 }
1053
1054 case nir_op_pack_32_2x16_split: {
1055 LLVMValueRef tmp = ac_build_gather_values(&ctx->ac, src, 2);
1056 result = LLVMBuildBitCast(ctx->ac.builder, tmp, ctx->ac.i32, "");
1057 break;
1058 }
1059
1060 case nir_op_unpack_32_2x16_split_x: {
1061 LLVMValueRef tmp = LLVMBuildBitCast(ctx->ac.builder, src[0],
1062 ctx->ac.v2i16,
1063 "");
1064 result = LLVMBuildExtractElement(ctx->ac.builder, tmp,
1065 ctx->ac.i32_0, "");
1066 break;
1067 }
1068
1069 case nir_op_unpack_32_2x16_split_y: {
1070 LLVMValueRef tmp = LLVMBuildBitCast(ctx->ac.builder, src[0],
1071 ctx->ac.v2i16,
1072 "");
1073 result = LLVMBuildExtractElement(ctx->ac.builder, tmp,
1074 ctx->ac.i32_1, "");
1075 break;
1076 }
1077
1078 case nir_op_cube_face_coord: {
1079 src[0] = ac_to_float(&ctx->ac, src[0]);
1080 LLVMValueRef results[2];
1081 LLVMValueRef in[3];
1082 for (unsigned chan = 0; chan < 3; chan++)
1083 in[chan] = ac_llvm_extract_elem(&ctx->ac, src[0], chan);
1084 results[0] = ac_build_intrinsic(&ctx->ac, "llvm.amdgcn.cubetc",
1085 ctx->ac.f32, in, 3, AC_FUNC_ATTR_READNONE);
1086 results[1] = ac_build_intrinsic(&ctx->ac, "llvm.amdgcn.cubesc",
1087 ctx->ac.f32, in, 3, AC_FUNC_ATTR_READNONE);
1088 result = ac_build_gather_values(&ctx->ac, results, 2);
1089 break;
1090 }
1091
1092 case nir_op_cube_face_index: {
1093 src[0] = ac_to_float(&ctx->ac, src[0]);
1094 LLVMValueRef in[3];
1095 for (unsigned chan = 0; chan < 3; chan++)
1096 in[chan] = ac_llvm_extract_elem(&ctx->ac, src[0], chan);
1097 result = ac_build_intrinsic(&ctx->ac, "llvm.amdgcn.cubeid",
1098 ctx->ac.f32, in, 3, AC_FUNC_ATTR_READNONE);
1099 break;
1100 }
1101
1102 case nir_op_fmin3:
1103 result = emit_intrin_2f_param(&ctx->ac, "llvm.minnum",
1104 ac_to_float_type(&ctx->ac, def_type), src[0], src[1]);
1105 result = emit_intrin_2f_param(&ctx->ac, "llvm.minnum",
1106 ac_to_float_type(&ctx->ac, def_type), result, src[2]);
1107 break;
1108 case nir_op_umin3:
1109 result = emit_minmax_int(&ctx->ac, LLVMIntULT, src[0], src[1]);
1110 result = emit_minmax_int(&ctx->ac, LLVMIntULT, result, src[2]);
1111 break;
1112 case nir_op_imin3:
1113 result = emit_minmax_int(&ctx->ac, LLVMIntSLT, src[0], src[1]);
1114 result = emit_minmax_int(&ctx->ac, LLVMIntSLT, result, src[2]);
1115 break;
1116 case nir_op_fmax3:
1117 result = emit_intrin_2f_param(&ctx->ac, "llvm.maxnum",
1118 ac_to_float_type(&ctx->ac, def_type), src[0], src[1]);
1119 result = emit_intrin_2f_param(&ctx->ac, "llvm.maxnum",
1120 ac_to_float_type(&ctx->ac, def_type), result, src[2]);
1121 break;
1122 case nir_op_umax3:
1123 result = emit_minmax_int(&ctx->ac, LLVMIntUGT, src[0], src[1]);
1124 result = emit_minmax_int(&ctx->ac, LLVMIntUGT, result, src[2]);
1125 break;
1126 case nir_op_imax3:
1127 result = emit_minmax_int(&ctx->ac, LLVMIntSGT, src[0], src[1]);
1128 result = emit_minmax_int(&ctx->ac, LLVMIntSGT, result, src[2]);
1129 break;
1130 case nir_op_fmed3: {
1131 src[0] = ac_to_float(&ctx->ac, src[0]);
1132 src[1] = ac_to_float(&ctx->ac, src[1]);
1133 src[2] = ac_to_float(&ctx->ac, src[2]);
1134 result = ac_build_fmed3(&ctx->ac, src[0], src[1], src[2],
1135 instr->dest.dest.ssa.bit_size);
1136 break;
1137 }
1138 case nir_op_imed3: {
1139 LLVMValueRef tmp1 = emit_minmax_int(&ctx->ac, LLVMIntSLT, src[0], src[1]);
1140 LLVMValueRef tmp2 = emit_minmax_int(&ctx->ac, LLVMIntSGT, src[0], src[1]);
1141 tmp2 = emit_minmax_int(&ctx->ac, LLVMIntSLT, tmp2, src[2]);
1142 result = emit_minmax_int(&ctx->ac, LLVMIntSGT, tmp1, tmp2);
1143 break;
1144 }
1145 case nir_op_umed3: {
1146 LLVMValueRef tmp1 = emit_minmax_int(&ctx->ac, LLVMIntULT, src[0], src[1]);
1147 LLVMValueRef tmp2 = emit_minmax_int(&ctx->ac, LLVMIntUGT, src[0], src[1]);
1148 tmp2 = emit_minmax_int(&ctx->ac, LLVMIntULT, tmp2, src[2]);
1149 result = emit_minmax_int(&ctx->ac, LLVMIntUGT, tmp1, tmp2);
1150 break;
1151 }
1152
1153 default:
1154 fprintf(stderr, "Unknown NIR alu instr: ");
1155 nir_print_instr(&instr->instr, stderr);
1156 fprintf(stderr, "\n");
1157 abort();
1158 }
1159
1160 if (result) {
1161 assert(instr->dest.dest.is_ssa);
1162 result = ac_to_integer_or_pointer(&ctx->ac, result);
1163 ctx->ssa_defs[instr->dest.dest.ssa.index] = result;
1164 }
1165 }
1166
1167 static void visit_load_const(struct ac_nir_context *ctx,
1168 const nir_load_const_instr *instr)
1169 {
1170 LLVMValueRef values[4], value = NULL;
1171 LLVMTypeRef element_type =
1172 LLVMIntTypeInContext(ctx->ac.context, instr->def.bit_size);
1173
1174 for (unsigned i = 0; i < instr->def.num_components; ++i) {
1175 switch (instr->def.bit_size) {
1176 case 8:
1177 values[i] = LLVMConstInt(element_type,
1178 instr->value.u8[i], false);
1179 break;
1180 case 16:
1181 values[i] = LLVMConstInt(element_type,
1182 instr->value.u16[i], false);
1183 break;
1184 case 32:
1185 values[i] = LLVMConstInt(element_type,
1186 instr->value.u32[i], false);
1187 break;
1188 case 64:
1189 values[i] = LLVMConstInt(element_type,
1190 instr->value.u64[i], false);
1191 break;
1192 default:
1193 fprintf(stderr,
1194 "unsupported nir load_const bit_size: %d\n",
1195 instr->def.bit_size);
1196 abort();
1197 }
1198 }
1199 if (instr->def.num_components > 1) {
1200 value = LLVMConstVector(values, instr->def.num_components);
1201 } else
1202 value = values[0];
1203
1204 ctx->ssa_defs[instr->def.index] = value;
1205 }
1206
1207 static LLVMValueRef
1208 get_buffer_size(struct ac_nir_context *ctx, LLVMValueRef descriptor, bool in_elements)
1209 {
1210 LLVMValueRef size =
1211 LLVMBuildExtractElement(ctx->ac.builder, descriptor,
1212 LLVMConstInt(ctx->ac.i32, 2, false), "");
1213
1214 /* VI only */
1215 if (ctx->ac.chip_class == VI && in_elements) {
1216 /* On VI, the descriptor contains the size in bytes,
1217 * but TXQ must return the size in elements.
1218 * The stride is always non-zero for resources using TXQ.
1219 */
1220 LLVMValueRef stride =
1221 LLVMBuildExtractElement(ctx->ac.builder, descriptor,
1222 ctx->ac.i32_1, "");
1223 stride = LLVMBuildLShr(ctx->ac.builder, stride,
1224 LLVMConstInt(ctx->ac.i32, 16, false), "");
1225 stride = LLVMBuildAnd(ctx->ac.builder, stride,
1226 LLVMConstInt(ctx->ac.i32, 0x3fff, false), "");
1227
1228 size = LLVMBuildUDiv(ctx->ac.builder, size, stride, "");
1229 }
1230 return size;
1231 }
1232
1233 static LLVMValueRef lower_gather4_integer(struct ac_llvm_context *ctx,
1234 nir_variable *var,
1235 struct ac_image_args *args,
1236 const nir_tex_instr *instr)
1237 {
1238 const struct glsl_type *type = glsl_without_array(var->type);
1239 enum glsl_base_type stype = glsl_get_sampler_result_type(type);
1240 LLVMValueRef half_texel[2];
1241 LLVMValueRef compare_cube_wa = NULL;
1242 LLVMValueRef result;
1243
1244 //TODO Rect
1245 {
1246 struct ac_image_args txq_args = { 0 };
1247
1248 txq_args.dim = get_ac_sampler_dim(ctx, instr->sampler_dim, instr->is_array);
1249 txq_args.opcode = ac_image_get_resinfo;
1250 txq_args.dmask = 0xf;
1251 txq_args.lod = ctx->i32_0;
1252 txq_args.resource = args->resource;
1253 txq_args.attributes = AC_FUNC_ATTR_READNONE;
1254 LLVMValueRef size = ac_build_image_opcode(ctx, &txq_args);
1255
1256 for (unsigned c = 0; c < 2; c++) {
1257 half_texel[c] = LLVMBuildExtractElement(ctx->builder, size,
1258 LLVMConstInt(ctx->i32, c, false), "");
1259 half_texel[c] = LLVMBuildUIToFP(ctx->builder, half_texel[c], ctx->f32, "");
1260 half_texel[c] = ac_build_fdiv(ctx, ctx->f32_1, half_texel[c]);
1261 half_texel[c] = LLVMBuildFMul(ctx->builder, half_texel[c],
1262 LLVMConstReal(ctx->f32, -0.5), "");
1263 }
1264 }
1265
1266 LLVMValueRef orig_coords[2] = { args->coords[0], args->coords[1] };
1267
1268 for (unsigned c = 0; c < 2; c++) {
1269 LLVMValueRef tmp;
1270 tmp = LLVMBuildBitCast(ctx->builder, args->coords[c], ctx->f32, "");
1271 args->coords[c] = LLVMBuildFAdd(ctx->builder, tmp, half_texel[c], "");
1272 }
1273
1274 /*
1275 * Apparantly cube has issue with integer types that the workaround doesn't solve,
1276 * so this tests if the format is 8_8_8_8 and an integer type do an alternate
1277 * workaround by sampling using a scaled type and converting.
1278 * This is taken from amdgpu-pro shaders.
1279 */
1280 /* NOTE this produces some ugly code compared to amdgpu-pro,
1281 * LLVM ends up dumping SGPRs into VGPRs to deal with the compare/select,
1282 * and then reads them back. -pro generates two selects,
1283 * one s_cmp for the descriptor rewriting
1284 * one v_cmp for the coordinate and result changes.
1285 */
1286 if (instr->sampler_dim == GLSL_SAMPLER_DIM_CUBE) {
1287 LLVMValueRef tmp, tmp2;
1288
1289 /* workaround 8/8/8/8 uint/sint cube gather bug */
1290 /* first detect it then change to a scaled read and f2i */
1291 tmp = LLVMBuildExtractElement(ctx->builder, args->resource, ctx->i32_1, "");
1292 tmp2 = tmp;
1293
1294 /* extract the DATA_FORMAT */
1295 tmp = ac_build_bfe(ctx, tmp, LLVMConstInt(ctx->i32, 20, false),
1296 LLVMConstInt(ctx->i32, 6, false), false);
1297
1298 /* is the DATA_FORMAT == 8_8_8_8 */
1299 compare_cube_wa = LLVMBuildICmp(ctx->builder, LLVMIntEQ, tmp, LLVMConstInt(ctx->i32, V_008F14_IMG_DATA_FORMAT_8_8_8_8, false), "");
1300
1301 if (stype == GLSL_TYPE_UINT)
1302 /* Create a NUM FORMAT - 0x2 or 0x4 - USCALED or UINT */
1303 tmp = LLVMBuildSelect(ctx->builder, compare_cube_wa, LLVMConstInt(ctx->i32, 0x8000000, false),
1304 LLVMConstInt(ctx->i32, 0x10000000, false), "");
1305 else
1306 /* Create a NUM FORMAT - 0x3 or 0x5 - SSCALED or SINT */
1307 tmp = LLVMBuildSelect(ctx->builder, compare_cube_wa, LLVMConstInt(ctx->i32, 0xc000000, false),
1308 LLVMConstInt(ctx->i32, 0x14000000, false), "");
1309
1310 /* replace the NUM FORMAT in the descriptor */
1311 tmp2 = LLVMBuildAnd(ctx->builder, tmp2, LLVMConstInt(ctx->i32, C_008F14_NUM_FORMAT_GFX6, false), "");
1312 tmp2 = LLVMBuildOr(ctx->builder, tmp2, tmp, "");
1313
1314 args->resource = LLVMBuildInsertElement(ctx->builder, args->resource, tmp2, ctx->i32_1, "");
1315
1316 /* don't modify the coordinates for this case */
1317 for (unsigned c = 0; c < 2; ++c)
1318 args->coords[c] = LLVMBuildSelect(
1319 ctx->builder, compare_cube_wa,
1320 orig_coords[c], args->coords[c], "");
1321 }
1322
1323 args->attributes = AC_FUNC_ATTR_READNONE;
1324 result = ac_build_image_opcode(ctx, args);
1325
1326 if (instr->sampler_dim == GLSL_SAMPLER_DIM_CUBE) {
1327 LLVMValueRef tmp, tmp2;
1328
1329 /* if the cube workaround is in place, f2i the result. */
1330 for (unsigned c = 0; c < 4; c++) {
1331 tmp = LLVMBuildExtractElement(ctx->builder, result, LLVMConstInt(ctx->i32, c, false), "");
1332 if (stype == GLSL_TYPE_UINT)
1333 tmp2 = LLVMBuildFPToUI(ctx->builder, tmp, ctx->i32, "");
1334 else
1335 tmp2 = LLVMBuildFPToSI(ctx->builder, tmp, ctx->i32, "");
1336 tmp = LLVMBuildBitCast(ctx->builder, tmp, ctx->i32, "");
1337 tmp2 = LLVMBuildBitCast(ctx->builder, tmp2, ctx->i32, "");
1338 tmp = LLVMBuildSelect(ctx->builder, compare_cube_wa, tmp2, tmp, "");
1339 tmp = LLVMBuildBitCast(ctx->builder, tmp, ctx->f32, "");
1340 result = LLVMBuildInsertElement(ctx->builder, result, tmp, LLVMConstInt(ctx->i32, c, false), "");
1341 }
1342 }
1343 return result;
1344 }
1345
1346 static nir_deref_instr *get_tex_texture_deref(const nir_tex_instr *instr)
1347 {
1348 nir_deref_instr *texture_deref_instr = NULL;
1349
1350 for (unsigned i = 0; i < instr->num_srcs; i++) {
1351 switch (instr->src[i].src_type) {
1352 case nir_tex_src_texture_deref:
1353 texture_deref_instr = nir_src_as_deref(instr->src[i].src);
1354 break;
1355 default:
1356 break;
1357 }
1358 }
1359 return texture_deref_instr;
1360 }
1361
1362 static LLVMValueRef build_tex_intrinsic(struct ac_nir_context *ctx,
1363 const nir_tex_instr *instr,
1364 struct ac_image_args *args)
1365 {
1366 if (instr->sampler_dim == GLSL_SAMPLER_DIM_BUF) {
1367 unsigned mask = nir_ssa_def_components_read(&instr->dest.ssa);
1368
1369 if (ctx->abi->gfx9_stride_size_workaround) {
1370 return ac_build_buffer_load_format_gfx9_safe(&ctx->ac,
1371 args->resource,
1372 args->coords[0],
1373 ctx->ac.i32_0,
1374 util_last_bit(mask),
1375 false, true);
1376 } else {
1377 return ac_build_buffer_load_format(&ctx->ac,
1378 args->resource,
1379 args->coords[0],
1380 ctx->ac.i32_0,
1381 util_last_bit(mask),
1382 false, true);
1383 }
1384 }
1385
1386 args->opcode = ac_image_sample;
1387
1388 switch (instr->op) {
1389 case nir_texop_txf:
1390 case nir_texop_txf_ms:
1391 case nir_texop_samples_identical:
1392 args->opcode = args->level_zero ||
1393 instr->sampler_dim == GLSL_SAMPLER_DIM_MS ?
1394 ac_image_load : ac_image_load_mip;
1395 args->level_zero = false;
1396 break;
1397 case nir_texop_txs:
1398 case nir_texop_query_levels:
1399 args->opcode = ac_image_get_resinfo;
1400 if (!args->lod)
1401 args->lod = ctx->ac.i32_0;
1402 args->level_zero = false;
1403 break;
1404 case nir_texop_tex:
1405 if (ctx->stage != MESA_SHADER_FRAGMENT) {
1406 assert(!args->lod);
1407 args->level_zero = true;
1408 }
1409 break;
1410 case nir_texop_tg4:
1411 args->opcode = ac_image_gather4;
1412 args->level_zero = true;
1413 break;
1414 case nir_texop_lod:
1415 args->opcode = ac_image_get_lod;
1416 break;
1417 default:
1418 break;
1419 }
1420
1421 if (instr->op == nir_texop_tg4 && ctx->ac.chip_class <= VI) {
1422 nir_deref_instr *texture_deref_instr = get_tex_texture_deref(instr);
1423 nir_variable *var = nir_deref_instr_get_variable(texture_deref_instr);
1424 const struct glsl_type *type = glsl_without_array(var->type);
1425 enum glsl_base_type stype = glsl_get_sampler_result_type(type);
1426 if (stype == GLSL_TYPE_UINT || stype == GLSL_TYPE_INT) {
1427 return lower_gather4_integer(&ctx->ac, var, args, instr);
1428 }
1429 }
1430
1431 /* Fixup for GFX9 which allocates 1D textures as 2D. */
1432 if (instr->op == nir_texop_lod && ctx->ac.chip_class >= GFX9) {
1433 if ((args->dim == ac_image_2darray ||
1434 args->dim == ac_image_2d) && !args->coords[1]) {
1435 args->coords[1] = ctx->ac.i32_0;
1436 }
1437 }
1438
1439 args->attributes = AC_FUNC_ATTR_READNONE;
1440 return ac_build_image_opcode(&ctx->ac, args);
1441 }
1442
1443 static LLVMValueRef visit_vulkan_resource_reindex(struct ac_nir_context *ctx,
1444 nir_intrinsic_instr *instr)
1445 {
1446 LLVMValueRef ptr = get_src(ctx, instr->src[0]);
1447 LLVMValueRef index = get_src(ctx, instr->src[1]);
1448
1449 LLVMValueRef result = LLVMBuildGEP(ctx->ac.builder, ptr, &index, 1, "");
1450 LLVMSetMetadata(result, ctx->ac.uniform_md_kind, ctx->ac.empty_md);
1451 return result;
1452 }
1453
1454 static LLVMValueRef visit_load_push_constant(struct ac_nir_context *ctx,
1455 nir_intrinsic_instr *instr)
1456 {
1457 LLVMValueRef ptr, addr;
1458 LLVMValueRef src0 = get_src(ctx, instr->src[0]);
1459 unsigned index = nir_intrinsic_base(instr);
1460
1461 addr = LLVMConstInt(ctx->ac.i32, index, 0);
1462 addr = LLVMBuildAdd(ctx->ac.builder, addr, src0, "");
1463
1464 /* Load constant values from user SGPRS when possible, otherwise
1465 * fallback to the default path that loads directly from memory.
1466 */
1467 if (LLVMIsConstant(src0) &&
1468 instr->dest.ssa.bit_size == 32) {
1469 unsigned count = instr->dest.ssa.num_components;
1470 unsigned offset = index;
1471
1472 offset += LLVMConstIntGetZExtValue(src0);
1473 offset /= 4;
1474
1475 offset -= ctx->abi->base_inline_push_consts;
1476
1477 if (offset + count <= ctx->abi->num_inline_push_consts) {
1478 return ac_build_gather_values(&ctx->ac,
1479 ctx->abi->inline_push_consts + offset,
1480 count);
1481 }
1482 }
1483
1484 ptr = ac_build_gep0(&ctx->ac, ctx->abi->push_constants, addr);
1485
1486 if (instr->dest.ssa.bit_size == 8) {
1487 unsigned load_dwords = instr->dest.ssa.num_components > 1 ? 2 : 1;
1488 LLVMTypeRef vec_type = LLVMVectorType(LLVMInt8TypeInContext(ctx->ac.context), 4 * load_dwords);
1489 ptr = ac_cast_ptr(&ctx->ac, ptr, vec_type);
1490 LLVMValueRef res = LLVMBuildLoad(ctx->ac.builder, ptr, "");
1491
1492 LLVMValueRef params[3];
1493 if (load_dwords > 1) {
1494 LLVMValueRef res_vec = LLVMBuildBitCast(ctx->ac.builder, res, LLVMVectorType(ctx->ac.i32, 2), "");
1495 params[0] = LLVMBuildExtractElement(ctx->ac.builder, res_vec, LLVMConstInt(ctx->ac.i32, 1, false), "");
1496 params[1] = LLVMBuildExtractElement(ctx->ac.builder, res_vec, LLVMConstInt(ctx->ac.i32, 0, false), "");
1497 } else {
1498 res = LLVMBuildBitCast(ctx->ac.builder, res, ctx->ac.i32, "");
1499 params[0] = ctx->ac.i32_0;
1500 params[1] = res;
1501 }
1502 params[2] = addr;
1503 res = ac_build_intrinsic(&ctx->ac, "llvm.amdgcn.alignbyte", ctx->ac.i32, params, 3, 0);
1504
1505 res = LLVMBuildTrunc(ctx->ac.builder, res, LLVMIntTypeInContext(ctx->ac.context, instr->dest.ssa.num_components * 8), "");
1506 if (instr->dest.ssa.num_components > 1)
1507 res = LLVMBuildBitCast(ctx->ac.builder, res, LLVMVectorType(LLVMInt8TypeInContext(ctx->ac.context), instr->dest.ssa.num_components), "");
1508 return res;
1509 } else if (instr->dest.ssa.bit_size == 16) {
1510 unsigned load_dwords = instr->dest.ssa.num_components / 2 + 1;
1511 LLVMTypeRef vec_type = LLVMVectorType(LLVMInt16TypeInContext(ctx->ac.context), 2 * load_dwords);
1512 ptr = ac_cast_ptr(&ctx->ac, ptr, vec_type);
1513 LLVMValueRef res = LLVMBuildLoad(ctx->ac.builder, ptr, "");
1514 res = LLVMBuildBitCast(ctx->ac.builder, res, vec_type, "");
1515 LLVMValueRef cond = LLVMBuildLShr(ctx->ac.builder, addr, ctx->ac.i32_1, "");
1516 cond = LLVMBuildTrunc(ctx->ac.builder, cond, ctx->ac.i1, "");
1517 LLVMValueRef mask[] = { LLVMConstInt(ctx->ac.i32, 0, false), LLVMConstInt(ctx->ac.i32, 1, false),
1518 LLVMConstInt(ctx->ac.i32, 2, false), LLVMConstInt(ctx->ac.i32, 3, false),
1519 LLVMConstInt(ctx->ac.i32, 4, false)};
1520 LLVMValueRef swizzle_aligned = LLVMConstVector(&mask[0], instr->dest.ssa.num_components);
1521 LLVMValueRef swizzle_unaligned = LLVMConstVector(&mask[1], instr->dest.ssa.num_components);
1522 LLVMValueRef shuffle_aligned = LLVMBuildShuffleVector(ctx->ac.builder, res, res, swizzle_aligned, "");
1523 LLVMValueRef shuffle_unaligned = LLVMBuildShuffleVector(ctx->ac.builder, res, res, swizzle_unaligned, "");
1524 res = LLVMBuildSelect(ctx->ac.builder, cond, shuffle_unaligned, shuffle_aligned, "");
1525 return LLVMBuildBitCast(ctx->ac.builder, res, get_def_type(ctx, &instr->dest.ssa), "");
1526 }
1527
1528 ptr = ac_cast_ptr(&ctx->ac, ptr, get_def_type(ctx, &instr->dest.ssa));
1529
1530 return LLVMBuildLoad(ctx->ac.builder, ptr, "");
1531 }
1532
1533 static LLVMValueRef visit_get_buffer_size(struct ac_nir_context *ctx,
1534 const nir_intrinsic_instr *instr)
1535 {
1536 LLVMValueRef index = get_src(ctx, instr->src[0]);
1537
1538 return get_buffer_size(ctx, ctx->abi->load_ssbo(ctx->abi, index, false), false);
1539 }
1540
1541 static uint32_t widen_mask(uint32_t mask, unsigned multiplier)
1542 {
1543 uint32_t new_mask = 0;
1544 for(unsigned i = 0; i < 32 && (1u << i) <= mask; ++i)
1545 if (mask & (1u << i))
1546 new_mask |= ((1u << multiplier) - 1u) << (i * multiplier);
1547 return new_mask;
1548 }
1549
1550 static LLVMValueRef extract_vector_range(struct ac_llvm_context *ctx, LLVMValueRef src,
1551 unsigned start, unsigned count)
1552 {
1553 LLVMValueRef mask[] = {
1554 ctx->i32_0, ctx->i32_1,
1555 LLVMConstInt(ctx->i32, 2, false), LLVMConstInt(ctx->i32, 3, false) };
1556
1557 unsigned src_elements = ac_get_llvm_num_components(src);
1558
1559 if (count == src_elements) {
1560 assert(start == 0);
1561 return src;
1562 } else if (count == 1) {
1563 assert(start < src_elements);
1564 return LLVMBuildExtractElement(ctx->builder, src, mask[start], "");
1565 } else {
1566 assert(start + count <= src_elements);
1567 assert(count <= 4);
1568 LLVMValueRef swizzle = LLVMConstVector(&mask[start], count);
1569 return LLVMBuildShuffleVector(ctx->builder, src, src, swizzle, "");
1570 }
1571 }
1572
1573 static unsigned get_cache_policy(struct ac_nir_context *ctx,
1574 enum gl_access_qualifier access,
1575 bool may_store_unaligned,
1576 bool writeonly_memory)
1577 {
1578 unsigned cache_policy = 0;
1579
1580 /* SI has a TC L1 bug causing corruption of 8bit/16bit stores. All
1581 * store opcodes not aligned to a dword are affected. The only way to
1582 * get unaligned stores is through shader images.
1583 */
1584 if (((may_store_unaligned && ctx->ac.chip_class == SI) ||
1585 /* If this is write-only, don't keep data in L1 to prevent
1586 * evicting L1 cache lines that may be needed by other
1587 * instructions.
1588 */
1589 writeonly_memory ||
1590 access & (ACCESS_COHERENT | ACCESS_VOLATILE))) {
1591 cache_policy |= ac_glc;
1592 }
1593
1594 return cache_policy;
1595 }
1596
1597 static void visit_store_ssbo(struct ac_nir_context *ctx,
1598 nir_intrinsic_instr *instr)
1599 {
1600 LLVMValueRef src_data = get_src(ctx, instr->src[0]);
1601 int elem_size_bytes = ac_get_elem_bits(&ctx->ac, LLVMTypeOf(src_data)) / 8;
1602 unsigned writemask = nir_intrinsic_write_mask(instr);
1603 enum gl_access_qualifier access = nir_intrinsic_access(instr);
1604 bool writeonly_memory = access & ACCESS_NON_READABLE;
1605 unsigned cache_policy = get_cache_policy(ctx, access, false, writeonly_memory);
1606
1607 LLVMValueRef rsrc = ctx->abi->load_ssbo(ctx->abi,
1608 get_src(ctx, instr->src[1]), true);
1609 LLVMValueRef base_data = src_data;
1610 base_data = ac_trim_vector(&ctx->ac, base_data, instr->num_components);
1611 LLVMValueRef base_offset = get_src(ctx, instr->src[2]);
1612
1613 while (writemask) {
1614 int start, count;
1615 LLVMValueRef data, offset;
1616 LLVMTypeRef data_type;
1617
1618 u_bit_scan_consecutive_range(&writemask, &start, &count);
1619
1620 /* Due to an LLVM limitation, split 3-element writes
1621 * into a 2-element and a 1-element write. */
1622 if (count == 3) {
1623 writemask |= 1 << (start + 2);
1624 count = 2;
1625 }
1626 int num_bytes = count * elem_size_bytes; /* count in bytes */
1627
1628 /* we can only store 4 DWords at the same time.
1629 * can only happen for 64 Bit vectors. */
1630 if (num_bytes > 16) {
1631 writemask |= ((1u << (count - 2)) - 1u) << (start + 2);
1632 count = 2;
1633 num_bytes = 16;
1634 }
1635
1636 /* check alignment of 16 Bit stores */
1637 if (elem_size_bytes == 2 && num_bytes > 2 && (start % 2) == 1) {
1638 writemask |= ((1u << (count - 1)) - 1u) << (start + 1);
1639 count = 1;
1640 num_bytes = 2;
1641 }
1642 data = extract_vector_range(&ctx->ac, base_data, start, count);
1643
1644 offset = LLVMBuildAdd(ctx->ac.builder, base_offset,
1645 LLVMConstInt(ctx->ac.i32, start * elem_size_bytes, false), "");
1646
1647 if (num_bytes == 1) {
1648 ac_build_tbuffer_store_byte(&ctx->ac, rsrc, data,
1649 offset, ctx->ac.i32_0,
1650 cache_policy & ac_glc,
1651 writeonly_memory);
1652 } else if (num_bytes == 2) {
1653 ac_build_tbuffer_store_short(&ctx->ac, rsrc, data,
1654 offset, ctx->ac.i32_0,
1655 cache_policy & ac_glc,
1656 writeonly_memory);
1657 } else {
1658 int num_channels = num_bytes / 4;
1659
1660 switch (num_bytes) {
1661 case 16: /* v4f32 */
1662 data_type = ctx->ac.v4f32;
1663 break;
1664 case 8: /* v2f32 */
1665 data_type = ctx->ac.v2f32;
1666 break;
1667 case 4: /* f32 */
1668 data_type = ctx->ac.f32;
1669 break;
1670 default:
1671 unreachable("Malformed vector store.");
1672 }
1673 data = LLVMBuildBitCast(ctx->ac.builder, data, data_type, "");
1674
1675 ac_build_buffer_store_dword(&ctx->ac, rsrc, data,
1676 num_channels, offset,
1677 ctx->ac.i32_0, 0,
1678 cache_policy & ac_glc,
1679 false, writeonly_memory,
1680 false);
1681 }
1682 }
1683 }
1684
1685 static LLVMValueRef visit_atomic_ssbo(struct ac_nir_context *ctx,
1686 const nir_intrinsic_instr *instr)
1687 {
1688 const char *op;
1689 char name[64];
1690 LLVMValueRef params[6];
1691 int arg_count = 0;
1692
1693 if (instr->intrinsic == nir_intrinsic_ssbo_atomic_comp_swap) {
1694 params[arg_count++] = ac_llvm_extract_elem(&ctx->ac, get_src(ctx, instr->src[3]), 0);
1695 }
1696 params[arg_count++] = ac_llvm_extract_elem(&ctx->ac, get_src(ctx, instr->src[2]), 0);
1697 params[arg_count++] = ctx->abi->load_ssbo(ctx->abi,
1698 get_src(ctx, instr->src[0]),
1699 true);
1700 params[arg_count++] = ctx->ac.i32_0; /* vindex */
1701 params[arg_count++] = get_src(ctx, instr->src[1]); /* voffset */
1702 params[arg_count++] = ctx->ac.i1false; /* slc */
1703
1704 switch (instr->intrinsic) {
1705 case nir_intrinsic_ssbo_atomic_add:
1706 op = "add";
1707 break;
1708 case nir_intrinsic_ssbo_atomic_imin:
1709 op = "smin";
1710 break;
1711 case nir_intrinsic_ssbo_atomic_umin:
1712 op = "umin";
1713 break;
1714 case nir_intrinsic_ssbo_atomic_imax:
1715 op = "smax";
1716 break;
1717 case nir_intrinsic_ssbo_atomic_umax:
1718 op = "umax";
1719 break;
1720 case nir_intrinsic_ssbo_atomic_and:
1721 op = "and";
1722 break;
1723 case nir_intrinsic_ssbo_atomic_or:
1724 op = "or";
1725 break;
1726 case nir_intrinsic_ssbo_atomic_xor:
1727 op = "xor";
1728 break;
1729 case nir_intrinsic_ssbo_atomic_exchange:
1730 op = "swap";
1731 break;
1732 case nir_intrinsic_ssbo_atomic_comp_swap:
1733 op = "cmpswap";
1734 break;
1735 default:
1736 abort();
1737 }
1738
1739 if (HAVE_LLVM >= 0x900 &&
1740 instr->intrinsic != nir_intrinsic_ssbo_atomic_comp_swap) {
1741 snprintf(name, sizeof(name),
1742 "llvm.amdgcn.buffer.atomic.%s.i32", op);
1743 } else {
1744 snprintf(name, sizeof(name),
1745 "llvm.amdgcn.buffer.atomic.%s", op);
1746 }
1747
1748 return ac_build_intrinsic(&ctx->ac, name, ctx->ac.i32, params, arg_count, 0);
1749 }
1750
1751 static LLVMValueRef visit_load_buffer(struct ac_nir_context *ctx,
1752 const nir_intrinsic_instr *instr)
1753 {
1754 int elem_size_bytes = instr->dest.ssa.bit_size / 8;
1755 int num_components = instr->num_components;
1756 enum gl_access_qualifier access = nir_intrinsic_access(instr);
1757 unsigned cache_policy = get_cache_policy(ctx, access, false, false);
1758
1759 LLVMValueRef offset = get_src(ctx, instr->src[1]);
1760 LLVMValueRef rsrc = ctx->abi->load_ssbo(ctx->abi,
1761 get_src(ctx, instr->src[0]), false);
1762 LLVMValueRef vindex = ctx->ac.i32_0;
1763
1764 LLVMTypeRef def_type = get_def_type(ctx, &instr->dest.ssa);
1765 LLVMTypeRef def_elem_type = num_components > 1 ? LLVMGetElementType(def_type) : def_type;
1766
1767 LLVMValueRef results[4];
1768 for (int i = 0; i < num_components;) {
1769 int num_elems = num_components - i;
1770 if (elem_size_bytes < 4 && nir_intrinsic_align(instr) % 4 != 0)
1771 num_elems = 1;
1772 if (num_elems * elem_size_bytes > 16)
1773 num_elems = 16 / elem_size_bytes;
1774 int load_bytes = num_elems * elem_size_bytes;
1775
1776 LLVMValueRef immoffset = LLVMConstInt(ctx->ac.i32, i * elem_size_bytes, false);
1777
1778 LLVMValueRef ret;
1779
1780 if (load_bytes == 1) {
1781 ret = ac_build_tbuffer_load_byte(&ctx->ac,
1782 rsrc,
1783 offset,
1784 ctx->ac.i32_0,
1785 immoffset,
1786 cache_policy & ac_glc);
1787 } else if (load_bytes == 2) {
1788 ret = ac_build_tbuffer_load_short(&ctx->ac,
1789 rsrc,
1790 offset,
1791 ctx->ac.i32_0,
1792 immoffset,
1793 cache_policy & ac_glc);
1794 } else {
1795 int num_channels = util_next_power_of_two(load_bytes) / 4;
1796
1797 ret = ac_build_buffer_load(&ctx->ac, rsrc, num_channels,
1798 vindex, offset, immoffset, 0,
1799 cache_policy & ac_glc, 0,
1800 false, false);
1801 }
1802
1803 LLVMTypeRef byte_vec = LLVMVectorType(ctx->ac.i8, ac_get_type_size(LLVMTypeOf(ret)));
1804 ret = LLVMBuildBitCast(ctx->ac.builder, ret, byte_vec, "");
1805 ret = ac_trim_vector(&ctx->ac, ret, load_bytes);
1806
1807 LLVMTypeRef ret_type = LLVMVectorType(def_elem_type, num_elems);
1808 ret = LLVMBuildBitCast(ctx->ac.builder, ret, ret_type, "");
1809
1810 for (unsigned j = 0; j < num_elems; j++) {
1811 results[i + j] = LLVMBuildExtractElement(ctx->ac.builder, ret, LLVMConstInt(ctx->ac.i32, j, false), "");
1812 }
1813 i += num_elems;
1814 }
1815
1816 return ac_build_gather_values(&ctx->ac, results, num_components);
1817 }
1818
1819 static LLVMValueRef visit_load_ubo_buffer(struct ac_nir_context *ctx,
1820 const nir_intrinsic_instr *instr)
1821 {
1822 LLVMValueRef ret;
1823 LLVMValueRef rsrc = get_src(ctx, instr->src[0]);
1824 LLVMValueRef offset = get_src(ctx, instr->src[1]);
1825 int num_components = instr->num_components;
1826
1827 if (ctx->abi->load_ubo)
1828 rsrc = ctx->abi->load_ubo(ctx->abi, rsrc);
1829
1830 if (instr->dest.ssa.bit_size == 64)
1831 num_components *= 2;
1832
1833 if (instr->dest.ssa.bit_size == 16 || instr->dest.ssa.bit_size == 8) {
1834 unsigned load_bytes = instr->dest.ssa.bit_size / 8;
1835 LLVMValueRef results[num_components];
1836 for (unsigned i = 0; i < num_components; ++i) {
1837 LLVMValueRef immoffset = LLVMConstInt(ctx->ac.i32,
1838 load_bytes * i, 0);
1839
1840 if (load_bytes == 1) {
1841 results[i] = ac_build_tbuffer_load_byte(&ctx->ac,
1842 rsrc,
1843 offset,
1844 ctx->ac.i32_0,
1845 immoffset,
1846 false);
1847 } else {
1848 assert(load_bytes == 2);
1849 results[i] = ac_build_tbuffer_load_short(&ctx->ac,
1850 rsrc,
1851 offset,
1852 ctx->ac.i32_0,
1853 immoffset,
1854 false);
1855 }
1856 }
1857 ret = ac_build_gather_values(&ctx->ac, results, num_components);
1858 } else {
1859 ret = ac_build_buffer_load(&ctx->ac, rsrc, num_components, NULL, offset,
1860 NULL, 0, false, false, true, true);
1861
1862 ret = ac_trim_vector(&ctx->ac, ret, num_components);
1863 }
1864
1865 return LLVMBuildBitCast(ctx->ac.builder, ret,
1866 get_def_type(ctx, &instr->dest.ssa), "");
1867 }
1868
1869 static void
1870 get_deref_offset(struct ac_nir_context *ctx, nir_deref_instr *instr,
1871 bool vs_in, unsigned *vertex_index_out,
1872 LLVMValueRef *vertex_index_ref,
1873 unsigned *const_out, LLVMValueRef *indir_out)
1874 {
1875 nir_variable *var = nir_deref_instr_get_variable(instr);
1876 nir_deref_path path;
1877 unsigned idx_lvl = 1;
1878
1879 nir_deref_path_init(&path, instr, NULL);
1880
1881 if (vertex_index_out != NULL || vertex_index_ref != NULL) {
1882 if (vertex_index_ref) {
1883 *vertex_index_ref = get_src(ctx, path.path[idx_lvl]->arr.index);
1884 if (vertex_index_out)
1885 *vertex_index_out = 0;
1886 } else {
1887 nir_const_value *v = nir_src_as_const_value(path.path[idx_lvl]->arr.index);
1888 assert(v);
1889 *vertex_index_out = v->u32[0];
1890 }
1891 ++idx_lvl;
1892 }
1893
1894 uint32_t const_offset = 0;
1895 LLVMValueRef offset = NULL;
1896
1897 if (var->data.compact) {
1898 assert(instr->deref_type == nir_deref_type_array);
1899 nir_const_value *v = nir_src_as_const_value(instr->arr.index);
1900 assert(v);
1901 const_offset = v->u32[0];
1902 goto out;
1903 }
1904
1905 for (; path.path[idx_lvl]; ++idx_lvl) {
1906 const struct glsl_type *parent_type = path.path[idx_lvl - 1]->type;
1907 if (path.path[idx_lvl]->deref_type == nir_deref_type_struct) {
1908 unsigned index = path.path[idx_lvl]->strct.index;
1909
1910 for (unsigned i = 0; i < index; i++) {
1911 const struct glsl_type *ft = glsl_get_struct_field(parent_type, i);
1912 const_offset += glsl_count_attribute_slots(ft, vs_in);
1913 }
1914 } else if(path.path[idx_lvl]->deref_type == nir_deref_type_array) {
1915 unsigned size = glsl_count_attribute_slots(path.path[idx_lvl]->type, vs_in);
1916 LLVMValueRef array_off = LLVMBuildMul(ctx->ac.builder, LLVMConstInt(ctx->ac.i32, size, 0),
1917 get_src(ctx, path.path[idx_lvl]->arr.index), "");
1918 if (offset)
1919 offset = LLVMBuildAdd(ctx->ac.builder, offset, array_off, "");
1920 else
1921 offset = array_off;
1922 } else
1923 unreachable("Uhandled deref type in get_deref_instr_offset");
1924 }
1925
1926 out:
1927 nir_deref_path_finish(&path);
1928
1929 if (const_offset && offset)
1930 offset = LLVMBuildAdd(ctx->ac.builder, offset,
1931 LLVMConstInt(ctx->ac.i32, const_offset, 0),
1932 "");
1933
1934 *const_out = const_offset;
1935 *indir_out = offset;
1936 }
1937
1938 static LLVMValueRef load_tess_varyings(struct ac_nir_context *ctx,
1939 nir_intrinsic_instr *instr,
1940 bool load_inputs)
1941 {
1942 LLVMValueRef result;
1943 LLVMValueRef vertex_index = NULL;
1944 LLVMValueRef indir_index = NULL;
1945 unsigned const_index = 0;
1946
1947 nir_variable *var = nir_deref_instr_get_variable(nir_instr_as_deref(instr->src[0].ssa->parent_instr));
1948
1949 unsigned location = var->data.location;
1950 unsigned driver_location = var->data.driver_location;
1951 const bool is_patch = var->data.patch;
1952 const bool is_compact = var->data.compact;
1953
1954 get_deref_offset(ctx, nir_instr_as_deref(instr->src[0].ssa->parent_instr),
1955 false, NULL, is_patch ? NULL : &vertex_index,
1956 &const_index, &indir_index);
1957
1958 LLVMTypeRef dest_type = get_def_type(ctx, &instr->dest.ssa);
1959
1960 LLVMTypeRef src_component_type;
1961 if (LLVMGetTypeKind(dest_type) == LLVMVectorTypeKind)
1962 src_component_type = LLVMGetElementType(dest_type);
1963 else
1964 src_component_type = dest_type;
1965
1966 result = ctx->abi->load_tess_varyings(ctx->abi, src_component_type,
1967 vertex_index, indir_index,
1968 const_index, location, driver_location,
1969 var->data.location_frac,
1970 instr->num_components,
1971 is_patch, is_compact, load_inputs);
1972 if (instr->dest.ssa.bit_size == 16) {
1973 result = ac_to_integer(&ctx->ac, result);
1974 result = LLVMBuildTrunc(ctx->ac.builder, result, dest_type, "");
1975 }
1976 return LLVMBuildBitCast(ctx->ac.builder, result, dest_type, "");
1977 }
1978
1979 static unsigned
1980 type_scalar_size_bytes(const struct glsl_type *type)
1981 {
1982 assert(glsl_type_is_vector_or_scalar(type) ||
1983 glsl_type_is_matrix(type));
1984 return glsl_type_is_boolean(type) ? 4 : glsl_get_bit_size(type) / 8;
1985 }
1986
1987 static LLVMValueRef visit_load_var(struct ac_nir_context *ctx,
1988 nir_intrinsic_instr *instr)
1989 {
1990 nir_deref_instr *deref = nir_instr_as_deref(instr->src[0].ssa->parent_instr);
1991 nir_variable *var = nir_deref_instr_get_variable(deref);
1992
1993 LLVMValueRef values[8];
1994 int idx = 0;
1995 int ve = instr->dest.ssa.num_components;
1996 unsigned comp = 0;
1997 LLVMValueRef indir_index;
1998 LLVMValueRef ret;
1999 unsigned const_index;
2000 unsigned stride = 4;
2001 int mode = deref->mode;
2002
2003 if (var) {
2004 bool vs_in = ctx->stage == MESA_SHADER_VERTEX &&
2005 var->data.mode == nir_var_shader_in;
2006 idx = var->data.driver_location;
2007 comp = var->data.location_frac;
2008 mode = var->data.mode;
2009
2010 get_deref_offset(ctx, deref, vs_in, NULL, NULL,
2011 &const_index, &indir_index);
2012
2013 if (var->data.compact) {
2014 stride = 1;
2015 const_index += comp;
2016 comp = 0;
2017 }
2018 }
2019
2020 if (instr->dest.ssa.bit_size == 64 &&
2021 (deref->mode == nir_var_shader_in ||
2022 deref->mode == nir_var_shader_out ||
2023 deref->mode == nir_var_function_temp))
2024 ve *= 2;
2025
2026 switch (mode) {
2027 case nir_var_shader_in:
2028 if (ctx->stage == MESA_SHADER_TESS_CTRL ||
2029 ctx->stage == MESA_SHADER_TESS_EVAL) {
2030 return load_tess_varyings(ctx, instr, true);
2031 }
2032
2033 if (ctx->stage == MESA_SHADER_GEOMETRY) {
2034 LLVMTypeRef type = LLVMIntTypeInContext(ctx->ac.context, instr->dest.ssa.bit_size);
2035 LLVMValueRef indir_index;
2036 unsigned const_index, vertex_index;
2037 get_deref_offset(ctx, deref, false, &vertex_index, NULL,
2038 &const_index, &indir_index);
2039
2040 return ctx->abi->load_inputs(ctx->abi, var->data.location,
2041 var->data.driver_location,
2042 var->data.location_frac,
2043 instr->num_components, vertex_index, const_index, type);
2044 }
2045
2046 for (unsigned chan = comp; chan < ve + comp; chan++) {
2047 if (indir_index) {
2048 unsigned count = glsl_count_attribute_slots(
2049 var->type,
2050 ctx->stage == MESA_SHADER_VERTEX);
2051 count -= chan / 4;
2052 LLVMValueRef tmp_vec = ac_build_gather_values_extended(
2053 &ctx->ac, ctx->abi->inputs + idx + chan, count,
2054 stride, false, true);
2055
2056 values[chan] = LLVMBuildExtractElement(ctx->ac.builder,
2057 tmp_vec,
2058 indir_index, "");
2059 } else
2060 values[chan] = ctx->abi->inputs[idx + chan + const_index * stride];
2061 }
2062 break;
2063 case nir_var_function_temp:
2064 for (unsigned chan = 0; chan < ve; chan++) {
2065 if (indir_index) {
2066 unsigned count = glsl_count_attribute_slots(
2067 var->type, false);
2068 count -= chan / 4;
2069 LLVMValueRef tmp_vec = ac_build_gather_values_extended(
2070 &ctx->ac, ctx->locals + idx + chan, count,
2071 stride, true, true);
2072
2073 values[chan] = LLVMBuildExtractElement(ctx->ac.builder,
2074 tmp_vec,
2075 indir_index, "");
2076 } else {
2077 values[chan] = LLVMBuildLoad(ctx->ac.builder, ctx->locals[idx + chan + const_index * stride], "");
2078 }
2079 }
2080 break;
2081 case nir_var_mem_shared: {
2082 LLVMValueRef address = get_src(ctx, instr->src[0]);
2083 LLVMValueRef val = LLVMBuildLoad(ctx->ac.builder, address, "");
2084 return LLVMBuildBitCast(ctx->ac.builder, val,
2085 get_def_type(ctx, &instr->dest.ssa),
2086 "");
2087 }
2088 case nir_var_shader_out:
2089 if (ctx->stage == MESA_SHADER_TESS_CTRL) {
2090 return load_tess_varyings(ctx, instr, false);
2091 }
2092
2093 for (unsigned chan = comp; chan < ve + comp; chan++) {
2094 if (indir_index) {
2095 unsigned count = glsl_count_attribute_slots(
2096 var->type, false);
2097 count -= chan / 4;
2098 LLVMValueRef tmp_vec = ac_build_gather_values_extended(
2099 &ctx->ac, ctx->abi->outputs + idx + chan, count,
2100 stride, true, true);
2101
2102 values[chan] = LLVMBuildExtractElement(ctx->ac.builder,
2103 tmp_vec,
2104 indir_index, "");
2105 } else {
2106 values[chan] = LLVMBuildLoad(ctx->ac.builder,
2107 ctx->abi->outputs[idx + chan + const_index * stride],
2108 "");
2109 }
2110 }
2111 break;
2112 case nir_var_mem_global: {
2113 LLVMValueRef address = get_src(ctx, instr->src[0]);
2114 unsigned explicit_stride = glsl_get_explicit_stride(deref->type);
2115 unsigned natural_stride = type_scalar_size_bytes(deref->type);
2116 unsigned stride = explicit_stride ? explicit_stride : natural_stride;
2117
2118 LLVMTypeRef result_type = get_def_type(ctx, &instr->dest.ssa);
2119 if (stride != natural_stride) {
2120 LLVMTypeRef ptr_type = LLVMPointerType(LLVMGetElementType(result_type),
2121 LLVMGetPointerAddressSpace(LLVMTypeOf(address)));
2122 address = LLVMBuildBitCast(ctx->ac.builder, address, ptr_type , "");
2123
2124 for (unsigned i = 0; i < instr->dest.ssa.num_components; ++i) {
2125 LLVMValueRef offset = LLVMConstInt(ctx->ac.i32, i * stride / natural_stride, 0);
2126 values[i] = LLVMBuildLoad(ctx->ac.builder,
2127 ac_build_gep_ptr(&ctx->ac, address, offset), "");
2128 }
2129 return ac_build_gather_values(&ctx->ac, values, instr->dest.ssa.num_components);
2130 } else {
2131 LLVMTypeRef ptr_type = LLVMPointerType(result_type,
2132 LLVMGetPointerAddressSpace(LLVMTypeOf(address)));
2133 address = LLVMBuildBitCast(ctx->ac.builder, address, ptr_type , "");
2134 LLVMValueRef val = LLVMBuildLoad(ctx->ac.builder, address, "");
2135 return val;
2136 }
2137 }
2138 default:
2139 unreachable("unhandle variable mode");
2140 }
2141 ret = ac_build_varying_gather_values(&ctx->ac, values, ve, comp);
2142 return LLVMBuildBitCast(ctx->ac.builder, ret, get_def_type(ctx, &instr->dest.ssa), "");
2143 }
2144
2145 static void
2146 visit_store_var(struct ac_nir_context *ctx,
2147 nir_intrinsic_instr *instr)
2148 {
2149 nir_deref_instr *deref = nir_instr_as_deref(instr->src[0].ssa->parent_instr);
2150 nir_variable *var = nir_deref_instr_get_variable(deref);
2151
2152 LLVMValueRef temp_ptr, value;
2153 int idx = 0;
2154 unsigned comp = 0;
2155 LLVMValueRef src = ac_to_float(&ctx->ac, get_src(ctx, instr->src[1]));
2156 int writemask = instr->const_index[0];
2157 LLVMValueRef indir_index;
2158 unsigned const_index;
2159
2160 if (var) {
2161 get_deref_offset(ctx, deref, false,
2162 NULL, NULL, &const_index, &indir_index);
2163 idx = var->data.driver_location;
2164 comp = var->data.location_frac;
2165
2166 if (var->data.compact) {
2167 const_index += comp;
2168 comp = 0;
2169 }
2170 }
2171
2172 if (ac_get_elem_bits(&ctx->ac, LLVMTypeOf(src)) == 64 &&
2173 (deref->mode == nir_var_shader_out ||
2174 deref->mode == nir_var_function_temp)) {
2175
2176 src = LLVMBuildBitCast(ctx->ac.builder, src,
2177 LLVMVectorType(ctx->ac.f32, ac_get_llvm_num_components(src) * 2),
2178 "");
2179
2180 writemask = widen_mask(writemask, 2);
2181 }
2182
2183 writemask = writemask << comp;
2184
2185 switch (deref->mode) {
2186 case nir_var_shader_out:
2187
2188 if (ctx->stage == MESA_SHADER_TESS_CTRL) {
2189 LLVMValueRef vertex_index = NULL;
2190 LLVMValueRef indir_index = NULL;
2191 unsigned const_index = 0;
2192 const bool is_patch = var->data.patch;
2193
2194 get_deref_offset(ctx, deref, false, NULL,
2195 is_patch ? NULL : &vertex_index,
2196 &const_index, &indir_index);
2197
2198 ctx->abi->store_tcs_outputs(ctx->abi, var,
2199 vertex_index, indir_index,
2200 const_index, src, writemask);
2201 return;
2202 }
2203
2204 for (unsigned chan = 0; chan < 8; chan++) {
2205 int stride = 4;
2206 if (!(writemask & (1 << chan)))
2207 continue;
2208
2209 value = ac_llvm_extract_elem(&ctx->ac, src, chan - comp);
2210
2211 if (var->data.compact)
2212 stride = 1;
2213 if (indir_index) {
2214 unsigned count = glsl_count_attribute_slots(
2215 var->type, false);
2216 count -= chan / 4;
2217 LLVMValueRef tmp_vec = ac_build_gather_values_extended(
2218 &ctx->ac, ctx->abi->outputs + idx + chan, count,
2219 stride, true, true);
2220
2221 tmp_vec = LLVMBuildInsertElement(ctx->ac.builder, tmp_vec,
2222 value, indir_index, "");
2223 build_store_values_extended(&ctx->ac, ctx->abi->outputs + idx + chan,
2224 count, stride, tmp_vec);
2225
2226 } else {
2227 temp_ptr = ctx->abi->outputs[idx + chan + const_index * stride];
2228
2229 LLVMBuildStore(ctx->ac.builder, value, temp_ptr);
2230 }
2231 }
2232 break;
2233 case nir_var_function_temp:
2234 for (unsigned chan = 0; chan < 8; chan++) {
2235 if (!(writemask & (1 << chan)))
2236 continue;
2237
2238 value = ac_llvm_extract_elem(&ctx->ac, src, chan);
2239 if (indir_index) {
2240 unsigned count = glsl_count_attribute_slots(
2241 var->type, false);
2242 count -= chan / 4;
2243 LLVMValueRef tmp_vec = ac_build_gather_values_extended(
2244 &ctx->ac, ctx->locals + idx + chan, count,
2245 4, true, true);
2246
2247 tmp_vec = LLVMBuildInsertElement(ctx->ac.builder, tmp_vec,
2248 value, indir_index, "");
2249 build_store_values_extended(&ctx->ac, ctx->locals + idx + chan,
2250 count, 4, tmp_vec);
2251 } else {
2252 temp_ptr = ctx->locals[idx + chan + const_index * 4];
2253
2254 LLVMBuildStore(ctx->ac.builder, value, temp_ptr);
2255 }
2256 }
2257 break;
2258
2259 case nir_var_mem_global:
2260 case nir_var_mem_shared: {
2261 int writemask = instr->const_index[0];
2262 LLVMValueRef address = get_src(ctx, instr->src[0]);
2263 LLVMValueRef val = get_src(ctx, instr->src[1]);
2264
2265 unsigned explicit_stride = glsl_get_explicit_stride(deref->type);
2266 unsigned natural_stride = type_scalar_size_bytes(deref->type);
2267 unsigned stride = explicit_stride ? explicit_stride : natural_stride;
2268
2269 LLVMTypeRef ptr_type = LLVMPointerType(LLVMTypeOf(val),
2270 LLVMGetPointerAddressSpace(LLVMTypeOf(address)));
2271 address = LLVMBuildBitCast(ctx->ac.builder, address, ptr_type , "");
2272
2273 if (writemask == (1u << ac_get_llvm_num_components(val)) - 1 &&
2274 stride == natural_stride) {
2275 LLVMTypeRef ptr_type = LLVMPointerType(LLVMTypeOf(val),
2276 LLVMGetPointerAddressSpace(LLVMTypeOf(address)));
2277 address = LLVMBuildBitCast(ctx->ac.builder, address, ptr_type , "");
2278
2279 val = LLVMBuildBitCast(ctx->ac.builder, val,
2280 LLVMGetElementType(LLVMTypeOf(address)), "");
2281 LLVMBuildStore(ctx->ac.builder, val, address);
2282 } else {
2283 LLVMTypeRef ptr_type = LLVMPointerType(LLVMGetElementType(LLVMTypeOf(val)),
2284 LLVMGetPointerAddressSpace(LLVMTypeOf(address)));
2285 address = LLVMBuildBitCast(ctx->ac.builder, address, ptr_type , "");
2286 for (unsigned chan = 0; chan < 4; chan++) {
2287 if (!(writemask & (1 << chan)))
2288 continue;
2289
2290 LLVMValueRef offset = LLVMConstInt(ctx->ac.i32, chan * stride / natural_stride, 0);
2291
2292 LLVMValueRef ptr = ac_build_gep_ptr(&ctx->ac, address, offset);
2293 LLVMValueRef src = ac_llvm_extract_elem(&ctx->ac, val,
2294 chan);
2295 src = LLVMBuildBitCast(ctx->ac.builder, src,
2296 LLVMGetElementType(LLVMTypeOf(ptr)), "");
2297 LLVMBuildStore(ctx->ac.builder, src, ptr);
2298 }
2299 }
2300 break;
2301 }
2302 default:
2303 abort();
2304 break;
2305 }
2306 }
2307
2308 static int image_type_to_components_count(enum glsl_sampler_dim dim, bool array)
2309 {
2310 switch (dim) {
2311 case GLSL_SAMPLER_DIM_BUF:
2312 return 1;
2313 case GLSL_SAMPLER_DIM_1D:
2314 return array ? 2 : 1;
2315 case GLSL_SAMPLER_DIM_2D:
2316 return array ? 3 : 2;
2317 case GLSL_SAMPLER_DIM_MS:
2318 return array ? 4 : 3;
2319 case GLSL_SAMPLER_DIM_3D:
2320 case GLSL_SAMPLER_DIM_CUBE:
2321 return 3;
2322 case GLSL_SAMPLER_DIM_RECT:
2323 case GLSL_SAMPLER_DIM_SUBPASS:
2324 return 2;
2325 case GLSL_SAMPLER_DIM_SUBPASS_MS:
2326 return 3;
2327 default:
2328 break;
2329 }
2330 return 0;
2331 }
2332
2333
2334 /* Adjust the sample index according to FMASK.
2335 *
2336 * For uncompressed MSAA surfaces, FMASK should return 0x76543210,
2337 * which is the identity mapping. Each nibble says which physical sample
2338 * should be fetched to get that sample.
2339 *
2340 * For example, 0x11111100 means there are only 2 samples stored and
2341 * the second sample covers 3/4 of the pixel. When reading samples 0
2342 * and 1, return physical sample 0 (determined by the first two 0s
2343 * in FMASK), otherwise return physical sample 1.
2344 *
2345 * The sample index should be adjusted as follows:
2346 * sample_index = (fmask >> (sample_index * 4)) & 0xF;
2347 */
2348 static LLVMValueRef adjust_sample_index_using_fmask(struct ac_llvm_context *ctx,
2349 LLVMValueRef coord_x, LLVMValueRef coord_y,
2350 LLVMValueRef coord_z,
2351 LLVMValueRef sample_index,
2352 LLVMValueRef fmask_desc_ptr)
2353 {
2354 struct ac_image_args args = {0};
2355 LLVMValueRef res;
2356
2357 args.coords[0] = coord_x;
2358 args.coords[1] = coord_y;
2359 if (coord_z)
2360 args.coords[2] = coord_z;
2361
2362 args.opcode = ac_image_load;
2363 args.dim = coord_z ? ac_image_2darray : ac_image_2d;
2364 args.resource = fmask_desc_ptr;
2365 args.dmask = 0xf;
2366 args.attributes = AC_FUNC_ATTR_READNONE;
2367
2368 res = ac_build_image_opcode(ctx, &args);
2369
2370 res = ac_to_integer(ctx, res);
2371 LLVMValueRef four = LLVMConstInt(ctx->i32, 4, false);
2372 LLVMValueRef F = LLVMConstInt(ctx->i32, 0xf, false);
2373
2374 LLVMValueRef fmask = LLVMBuildExtractElement(ctx->builder,
2375 res,
2376 ctx->i32_0, "");
2377
2378 LLVMValueRef sample_index4 =
2379 LLVMBuildMul(ctx->builder, sample_index, four, "");
2380 LLVMValueRef shifted_fmask =
2381 LLVMBuildLShr(ctx->builder, fmask, sample_index4, "");
2382 LLVMValueRef final_sample =
2383 LLVMBuildAnd(ctx->builder, shifted_fmask, F, "");
2384
2385 /* Don't rewrite the sample index if WORD1.DATA_FORMAT of the FMASK
2386 * resource descriptor is 0 (invalid),
2387 */
2388 LLVMValueRef fmask_desc =
2389 LLVMBuildBitCast(ctx->builder, fmask_desc_ptr,
2390 ctx->v8i32, "");
2391
2392 LLVMValueRef fmask_word1 =
2393 LLVMBuildExtractElement(ctx->builder, fmask_desc,
2394 ctx->i32_1, "");
2395
2396 LLVMValueRef word1_is_nonzero =
2397 LLVMBuildICmp(ctx->builder, LLVMIntNE,
2398 fmask_word1, ctx->i32_0, "");
2399
2400 /* Replace the MSAA sample index. */
2401 sample_index =
2402 LLVMBuildSelect(ctx->builder, word1_is_nonzero,
2403 final_sample, sample_index, "");
2404 return sample_index;
2405 }
2406
2407 static nir_deref_instr *get_image_deref(const nir_intrinsic_instr *instr)
2408 {
2409 assert(instr->src[0].is_ssa);
2410 return nir_instr_as_deref(instr->src[0].ssa->parent_instr);
2411 }
2412
2413 static LLVMValueRef get_image_descriptor(struct ac_nir_context *ctx,
2414 const nir_intrinsic_instr *instr,
2415 enum ac_descriptor_type desc_type,
2416 bool write)
2417 {
2418 return get_sampler_desc(ctx, nir_instr_as_deref(instr->src[0].ssa->parent_instr), desc_type, NULL, true, write);
2419 }
2420
2421 static void get_image_coords(struct ac_nir_context *ctx,
2422 const nir_intrinsic_instr *instr,
2423 struct ac_image_args *args)
2424 {
2425 const struct glsl_type *type = get_image_deref(instr)->type;
2426
2427 LLVMValueRef src0 = get_src(ctx, instr->src[1]);
2428 LLVMValueRef masks[] = {
2429 LLVMConstInt(ctx->ac.i32, 0, false), LLVMConstInt(ctx->ac.i32, 1, false),
2430 LLVMConstInt(ctx->ac.i32, 2, false), LLVMConstInt(ctx->ac.i32, 3, false),
2431 };
2432 LLVMValueRef sample_index = ac_llvm_extract_elem(&ctx->ac, get_src(ctx, instr->src[2]), 0);
2433
2434 int count;
2435 enum glsl_sampler_dim dim = glsl_get_sampler_dim(type);
2436 bool is_array = glsl_sampler_type_is_array(type);
2437 bool add_frag_pos = (dim == GLSL_SAMPLER_DIM_SUBPASS ||
2438 dim == GLSL_SAMPLER_DIM_SUBPASS_MS);
2439 bool is_ms = (dim == GLSL_SAMPLER_DIM_MS ||
2440 dim == GLSL_SAMPLER_DIM_SUBPASS_MS);
2441 bool gfx9_1d = ctx->ac.chip_class >= GFX9 && dim == GLSL_SAMPLER_DIM_1D;
2442 count = image_type_to_components_count(dim, is_array);
2443
2444 if (is_ms && instr->intrinsic == nir_intrinsic_image_deref_load) {
2445 LLVMValueRef fmask_load_address[3];
2446 int chan;
2447
2448 fmask_load_address[0] = LLVMBuildExtractElement(ctx->ac.builder, src0, masks[0], "");
2449 fmask_load_address[1] = LLVMBuildExtractElement(ctx->ac.builder, src0, masks[1], "");
2450 if (is_array)
2451 fmask_load_address[2] = LLVMBuildExtractElement(ctx->ac.builder, src0, masks[2], "");
2452 else
2453 fmask_load_address[2] = NULL;
2454 if (add_frag_pos) {
2455 for (chan = 0; chan < 2; ++chan)
2456 fmask_load_address[chan] =
2457 LLVMBuildAdd(ctx->ac.builder, fmask_load_address[chan],
2458 LLVMBuildFPToUI(ctx->ac.builder, ctx->abi->frag_pos[chan],
2459 ctx->ac.i32, ""), "");
2460 fmask_load_address[2] = ac_to_integer(&ctx->ac, ctx->abi->inputs[ac_llvm_reg_index_soa(VARYING_SLOT_LAYER, 0)]);
2461 }
2462 sample_index = adjust_sample_index_using_fmask(&ctx->ac,
2463 fmask_load_address[0],
2464 fmask_load_address[1],
2465 fmask_load_address[2],
2466 sample_index,
2467 get_sampler_desc(ctx, nir_instr_as_deref(instr->src[0].ssa->parent_instr),
2468 AC_DESC_FMASK, NULL, false, false));
2469 }
2470 if (count == 1 && !gfx9_1d) {
2471 if (instr->src[1].ssa->num_components)
2472 args->coords[0] = LLVMBuildExtractElement(ctx->ac.builder, src0, masks[0], "");
2473 else
2474 args->coords[0] = src0;
2475 } else {
2476 int chan;
2477 if (is_ms)
2478 count--;
2479 for (chan = 0; chan < count; ++chan) {
2480 args->coords[chan] = ac_llvm_extract_elem(&ctx->ac, src0, chan);
2481 }
2482 if (add_frag_pos) {
2483 for (chan = 0; chan < 2; ++chan) {
2484 args->coords[chan] = LLVMBuildAdd(
2485 ctx->ac.builder, args->coords[chan],
2486 LLVMBuildFPToUI(
2487 ctx->ac.builder, ctx->abi->frag_pos[chan],
2488 ctx->ac.i32, ""), "");
2489 }
2490 args->coords[2] = ac_to_integer(&ctx->ac,
2491 ctx->abi->inputs[ac_llvm_reg_index_soa(VARYING_SLOT_LAYER, 0)]);
2492 count++;
2493 }
2494
2495 if (gfx9_1d) {
2496 if (is_array) {
2497 args->coords[2] = args->coords[1];
2498 args->coords[1] = ctx->ac.i32_0;
2499 } else
2500 args->coords[1] = ctx->ac.i32_0;
2501 count++;
2502 }
2503
2504 if (is_ms) {
2505 args->coords[count] = sample_index;
2506 count++;
2507 }
2508 }
2509 }
2510
2511 static LLVMValueRef get_image_buffer_descriptor(struct ac_nir_context *ctx,
2512 const nir_intrinsic_instr *instr, bool write)
2513 {
2514 LLVMValueRef rsrc = get_image_descriptor(ctx, instr, AC_DESC_BUFFER, write);
2515 if (ctx->abi->gfx9_stride_size_workaround) {
2516 LLVMValueRef elem_count = LLVMBuildExtractElement(ctx->ac.builder, rsrc, LLVMConstInt(ctx->ac.i32, 2, 0), "");
2517 LLVMValueRef stride = LLVMBuildExtractElement(ctx->ac.builder, rsrc, LLVMConstInt(ctx->ac.i32, 1, 0), "");
2518 stride = LLVMBuildLShr(ctx->ac.builder, stride, LLVMConstInt(ctx->ac.i32, 16, 0), "");
2519
2520 LLVMValueRef new_elem_count = LLVMBuildSelect(ctx->ac.builder,
2521 LLVMBuildICmp(ctx->ac.builder, LLVMIntUGT, elem_count, stride, ""),
2522 elem_count, stride, "");
2523
2524 rsrc = LLVMBuildInsertElement(ctx->ac.builder, rsrc, new_elem_count,
2525 LLVMConstInt(ctx->ac.i32, 2, 0), "");
2526 }
2527 return rsrc;
2528 }
2529
2530 static LLVMValueRef visit_image_load(struct ac_nir_context *ctx,
2531 const nir_intrinsic_instr *instr)
2532 {
2533 LLVMValueRef res;
2534 const nir_deref_instr *image_deref = get_image_deref(instr);
2535 const struct glsl_type *type = image_deref->type;
2536 const nir_variable *var = nir_deref_instr_get_variable(image_deref);
2537 struct ac_image_args args = {};
2538
2539 args.cache_policy =
2540 get_cache_policy(ctx, var->data.image.access, false, false);
2541
2542 const enum glsl_sampler_dim dim = glsl_get_sampler_dim(type);
2543 if (dim == GLSL_SAMPLER_DIM_BUF) {
2544 unsigned mask = nir_ssa_def_components_read(&instr->dest.ssa);
2545 unsigned num_channels = util_last_bit(mask);
2546 LLVMValueRef rsrc, vindex;
2547
2548 rsrc = get_image_buffer_descriptor(ctx, instr, false);
2549 vindex = LLVMBuildExtractElement(ctx->ac.builder, get_src(ctx, instr->src[1]),
2550 ctx->ac.i32_0, "");
2551
2552 /* TODO: set "can_speculate" when OpenGL needs it. */
2553 res = ac_build_buffer_load_format(&ctx->ac, rsrc, vindex,
2554 ctx->ac.i32_0, num_channels,
2555 !!(args.cache_policy & ac_glc),
2556 false);
2557 res = ac_build_expand_to_vec4(&ctx->ac, res, num_channels);
2558
2559 res = ac_trim_vector(&ctx->ac, res, instr->dest.ssa.num_components);
2560 res = ac_to_integer(&ctx->ac, res);
2561 } else {
2562 args.opcode = ac_image_load;
2563 get_image_coords(ctx, instr, &args);
2564 args.resource = get_image_descriptor(ctx, instr, AC_DESC_IMAGE, false);
2565 args.dim = get_ac_image_dim(&ctx->ac, glsl_get_sampler_dim(type),
2566 glsl_sampler_type_is_array(type));
2567 args.dmask = 15;
2568 args.attributes = AC_FUNC_ATTR_READONLY;
2569
2570 res = ac_build_image_opcode(&ctx->ac, &args);
2571 }
2572 return ac_to_integer(&ctx->ac, res);
2573 }
2574
2575 static void visit_image_store(struct ac_nir_context *ctx,
2576 nir_intrinsic_instr *instr)
2577 {
2578 const nir_deref_instr *image_deref = get_image_deref(instr);
2579 const struct glsl_type *type = image_deref->type;
2580 const nir_variable *var = nir_deref_instr_get_variable(image_deref);
2581 const enum glsl_sampler_dim dim = glsl_get_sampler_dim(type);
2582 bool writeonly_memory = var->data.image.access & ACCESS_NON_READABLE;
2583 struct ac_image_args args = {};
2584
2585 args.cache_policy = get_cache_policy(ctx, var->data.image.access, true,
2586 writeonly_memory);
2587
2588 if (dim == GLSL_SAMPLER_DIM_BUF) {
2589 LLVMValueRef rsrc = get_image_buffer_descriptor(ctx, instr, true);
2590 LLVMValueRef src = ac_to_float(&ctx->ac, get_src(ctx, instr->src[3]));
2591 unsigned src_channels = ac_get_llvm_num_components(src);
2592 LLVMValueRef vindex;
2593
2594 if (src_channels == 3)
2595 src = ac_build_expand_to_vec4(&ctx->ac, src, 3);
2596
2597 vindex = LLVMBuildExtractElement(ctx->ac.builder,
2598 get_src(ctx, instr->src[1]),
2599 ctx->ac.i32_0, "");
2600
2601 ac_build_buffer_store_format(&ctx->ac, rsrc, src, vindex,
2602 ctx->ac.i32_0, src_channels,
2603 args.cache_policy & ac_glc,
2604 writeonly_memory);
2605 } else {
2606 args.opcode = ac_image_store;
2607 args.data[0] = ac_to_float(&ctx->ac, get_src(ctx, instr->src[3]));
2608 get_image_coords(ctx, instr, &args);
2609 args.resource = get_image_descriptor(ctx, instr, AC_DESC_IMAGE, true);
2610 args.dim = get_ac_image_dim(&ctx->ac, glsl_get_sampler_dim(type),
2611 glsl_sampler_type_is_array(type));
2612 args.dmask = 15;
2613
2614 ac_build_image_opcode(&ctx->ac, &args);
2615 }
2616
2617 }
2618
2619 static LLVMValueRef visit_image_atomic(struct ac_nir_context *ctx,
2620 const nir_intrinsic_instr *instr)
2621 {
2622 LLVMValueRef params[7];
2623 int param_count = 0;
2624 const struct glsl_type *type = get_image_deref(instr)->type;
2625
2626 bool cmpswap = instr->intrinsic == nir_intrinsic_image_deref_atomic_comp_swap;
2627 const char *atomic_name;
2628 char intrinsic_name[64];
2629 enum ac_atomic_op atomic_subop;
2630 MAYBE_UNUSED int length;
2631
2632 bool is_unsigned = glsl_get_sampler_result_type(type) == GLSL_TYPE_UINT;
2633
2634 switch (instr->intrinsic) {
2635 case nir_intrinsic_image_deref_atomic_add:
2636 atomic_name = "add";
2637 atomic_subop = ac_atomic_add;
2638 break;
2639 case nir_intrinsic_image_deref_atomic_min:
2640 atomic_name = is_unsigned ? "umin" : "smin";
2641 atomic_subop = is_unsigned ? ac_atomic_umin : ac_atomic_smin;
2642 break;
2643 case nir_intrinsic_image_deref_atomic_max:
2644 atomic_name = is_unsigned ? "umax" : "smax";
2645 atomic_subop = is_unsigned ? ac_atomic_umax : ac_atomic_smax;
2646 break;
2647 case nir_intrinsic_image_deref_atomic_and:
2648 atomic_name = "and";
2649 atomic_subop = ac_atomic_and;
2650 break;
2651 case nir_intrinsic_image_deref_atomic_or:
2652 atomic_name = "or";
2653 atomic_subop = ac_atomic_or;
2654 break;
2655 case nir_intrinsic_image_deref_atomic_xor:
2656 atomic_name = "xor";
2657 atomic_subop = ac_atomic_xor;
2658 break;
2659 case nir_intrinsic_image_deref_atomic_exchange:
2660 atomic_name = "swap";
2661 atomic_subop = ac_atomic_swap;
2662 break;
2663 case nir_intrinsic_image_deref_atomic_comp_swap:
2664 atomic_name = "cmpswap";
2665 atomic_subop = 0; /* not used */
2666 break;
2667 default:
2668 abort();
2669 }
2670
2671 if (cmpswap)
2672 params[param_count++] = get_src(ctx, instr->src[4]);
2673 params[param_count++] = get_src(ctx, instr->src[3]);
2674
2675 if (glsl_get_sampler_dim(type) == GLSL_SAMPLER_DIM_BUF) {
2676 params[param_count++] = get_image_buffer_descriptor(ctx, instr, true);
2677 params[param_count++] = LLVMBuildExtractElement(ctx->ac.builder, get_src(ctx, instr->src[1]),
2678 ctx->ac.i32_0, ""); /* vindex */
2679 params[param_count++] = ctx->ac.i32_0; /* voffset */
2680 if (HAVE_LLVM >= 0x800) {
2681 params[param_count++] = ctx->ac.i32_0; /* soffset */
2682 params[param_count++] = ctx->ac.i32_0; /* slc */
2683
2684 length = snprintf(intrinsic_name, sizeof(intrinsic_name),
2685 "llvm.amdgcn.struct.buffer.atomic.%s.i32", atomic_name);
2686 } else {
2687 params[param_count++] = ctx->ac.i1false; /* slc */
2688
2689 length = snprintf(intrinsic_name, sizeof(intrinsic_name),
2690 "llvm.amdgcn.buffer.atomic.%s", atomic_name);
2691 }
2692
2693 assert(length < sizeof(intrinsic_name));
2694 return ac_build_intrinsic(&ctx->ac, intrinsic_name, ctx->ac.i32,
2695 params, param_count, 0);
2696 } else {
2697 struct ac_image_args args = {};
2698 args.opcode = cmpswap ? ac_image_atomic_cmpswap : ac_image_atomic;
2699 args.atomic = atomic_subop;
2700 args.data[0] = params[0];
2701 if (cmpswap)
2702 args.data[1] = params[1];
2703 get_image_coords(ctx, instr, &args);
2704 args.resource = get_image_descriptor(ctx, instr, AC_DESC_IMAGE, true);
2705 args.dim = get_ac_image_dim(&ctx->ac, glsl_get_sampler_dim(type),
2706 glsl_sampler_type_is_array(type));
2707
2708 return ac_build_image_opcode(&ctx->ac, &args);
2709 }
2710 }
2711
2712 static LLVMValueRef visit_image_samples(struct ac_nir_context *ctx,
2713 const nir_intrinsic_instr *instr)
2714 {
2715 const struct glsl_type *type = get_image_deref(instr)->type;
2716
2717 struct ac_image_args args = { 0 };
2718 args.dim = get_ac_sampler_dim(&ctx->ac, glsl_get_sampler_dim(type),
2719 glsl_sampler_type_is_array(type));
2720 args.dmask = 0xf;
2721 args.resource = get_image_descriptor(ctx, instr, AC_DESC_IMAGE, false);
2722 args.opcode = ac_image_get_resinfo;
2723 args.lod = ctx->ac.i32_0;
2724 args.attributes = AC_FUNC_ATTR_READNONE;
2725
2726 return ac_build_image_opcode(&ctx->ac, &args);
2727 }
2728
2729 static LLVMValueRef visit_image_size(struct ac_nir_context *ctx,
2730 const nir_intrinsic_instr *instr)
2731 {
2732 LLVMValueRef res;
2733 const struct glsl_type *type = get_image_deref(instr)->type;
2734
2735 if (glsl_get_sampler_dim(type) == GLSL_SAMPLER_DIM_BUF)
2736 return get_buffer_size(ctx, get_image_descriptor(ctx, instr, AC_DESC_BUFFER, false), true);
2737
2738 struct ac_image_args args = { 0 };
2739
2740 args.dim = get_ac_image_dim(&ctx->ac, glsl_get_sampler_dim(type),
2741 glsl_sampler_type_is_array(type));
2742 args.dmask = 0xf;
2743 args.resource = get_image_descriptor(ctx, instr, AC_DESC_IMAGE, false);
2744 args.opcode = ac_image_get_resinfo;
2745 args.lod = ctx->ac.i32_0;
2746 args.attributes = AC_FUNC_ATTR_READNONE;
2747
2748 res = ac_build_image_opcode(&ctx->ac, &args);
2749
2750 LLVMValueRef two = LLVMConstInt(ctx->ac.i32, 2, false);
2751
2752 if (glsl_get_sampler_dim(type) == GLSL_SAMPLER_DIM_CUBE &&
2753 glsl_sampler_type_is_array(type)) {
2754 LLVMValueRef six = LLVMConstInt(ctx->ac.i32, 6, false);
2755 LLVMValueRef z = LLVMBuildExtractElement(ctx->ac.builder, res, two, "");
2756 z = LLVMBuildSDiv(ctx->ac.builder, z, six, "");
2757 res = LLVMBuildInsertElement(ctx->ac.builder, res, z, two, "");
2758 }
2759 if (ctx->ac.chip_class >= GFX9 &&
2760 glsl_get_sampler_dim(type) == GLSL_SAMPLER_DIM_1D &&
2761 glsl_sampler_type_is_array(type)) {
2762 LLVMValueRef layers = LLVMBuildExtractElement(ctx->ac.builder, res, two, "");
2763 res = LLVMBuildInsertElement(ctx->ac.builder, res, layers,
2764 ctx->ac.i32_1, "");
2765
2766 }
2767 return res;
2768 }
2769
2770 static void emit_membar(struct ac_llvm_context *ac,
2771 const nir_intrinsic_instr *instr)
2772 {
2773 unsigned waitcnt = NOOP_WAITCNT;
2774
2775 switch (instr->intrinsic) {
2776 case nir_intrinsic_memory_barrier:
2777 case nir_intrinsic_group_memory_barrier:
2778 waitcnt &= VM_CNT & LGKM_CNT;
2779 break;
2780 case nir_intrinsic_memory_barrier_atomic_counter:
2781 case nir_intrinsic_memory_barrier_buffer:
2782 case nir_intrinsic_memory_barrier_image:
2783 waitcnt &= VM_CNT;
2784 break;
2785 case nir_intrinsic_memory_barrier_shared:
2786 waitcnt &= LGKM_CNT;
2787 break;
2788 default:
2789 break;
2790 }
2791 if (waitcnt != NOOP_WAITCNT)
2792 ac_build_waitcnt(ac, waitcnt);
2793 }
2794
2795 void ac_emit_barrier(struct ac_llvm_context *ac, gl_shader_stage stage)
2796 {
2797 /* SI only (thanks to a hw bug workaround):
2798 * The real barrier instruction isn’t needed, because an entire patch
2799 * always fits into a single wave.
2800 */
2801 if (ac->chip_class == SI && stage == MESA_SHADER_TESS_CTRL) {
2802 ac_build_waitcnt(ac, LGKM_CNT & VM_CNT);
2803 return;
2804 }
2805 ac_build_s_barrier(ac);
2806 }
2807
2808 static void emit_discard(struct ac_nir_context *ctx,
2809 const nir_intrinsic_instr *instr)
2810 {
2811 LLVMValueRef cond;
2812
2813 if (instr->intrinsic == nir_intrinsic_discard_if) {
2814 cond = LLVMBuildICmp(ctx->ac.builder, LLVMIntEQ,
2815 get_src(ctx, instr->src[0]),
2816 ctx->ac.i32_0, "");
2817 } else {
2818 assert(instr->intrinsic == nir_intrinsic_discard);
2819 cond = ctx->ac.i1false;
2820 }
2821
2822 ctx->abi->emit_kill(ctx->abi, cond);
2823 }
2824
2825 static LLVMValueRef
2826 visit_load_helper_invocation(struct ac_nir_context *ctx)
2827 {
2828 LLVMValueRef result = ac_build_intrinsic(&ctx->ac,
2829 "llvm.amdgcn.ps.live",
2830 ctx->ac.i1, NULL, 0,
2831 AC_FUNC_ATTR_READNONE);
2832 result = LLVMBuildNot(ctx->ac.builder, result, "");
2833 return LLVMBuildSExt(ctx->ac.builder, result, ctx->ac.i32, "");
2834 }
2835
2836 static LLVMValueRef
2837 visit_load_local_invocation_index(struct ac_nir_context *ctx)
2838 {
2839 LLVMValueRef result;
2840 LLVMValueRef thread_id = ac_get_thread_id(&ctx->ac);
2841 result = LLVMBuildAnd(ctx->ac.builder, ctx->abi->tg_size,
2842 LLVMConstInt(ctx->ac.i32, 0xfc0, false), "");
2843
2844 return LLVMBuildAdd(ctx->ac.builder, result, thread_id, "");
2845 }
2846
2847 static LLVMValueRef
2848 visit_load_subgroup_id(struct ac_nir_context *ctx)
2849 {
2850 if (ctx->stage == MESA_SHADER_COMPUTE) {
2851 LLVMValueRef result;
2852 result = LLVMBuildAnd(ctx->ac.builder, ctx->abi->tg_size,
2853 LLVMConstInt(ctx->ac.i32, 0xfc0, false), "");
2854 return LLVMBuildLShr(ctx->ac.builder, result, LLVMConstInt(ctx->ac.i32, 6, false), "");
2855 } else {
2856 return LLVMConstInt(ctx->ac.i32, 0, false);
2857 }
2858 }
2859
2860 static LLVMValueRef
2861 visit_load_num_subgroups(struct ac_nir_context *ctx)
2862 {
2863 if (ctx->stage == MESA_SHADER_COMPUTE) {
2864 return LLVMBuildAnd(ctx->ac.builder, ctx->abi->tg_size,
2865 LLVMConstInt(ctx->ac.i32, 0x3f, false), "");
2866 } else {
2867 return LLVMConstInt(ctx->ac.i32, 1, false);
2868 }
2869 }
2870
2871 static LLVMValueRef
2872 visit_first_invocation(struct ac_nir_context *ctx)
2873 {
2874 LLVMValueRef active_set = ac_build_ballot(&ctx->ac, ctx->ac.i32_1);
2875
2876 /* The second argument is whether cttz(0) should be defined, but we do not care. */
2877 LLVMValueRef args[] = {active_set, ctx->ac.i1false};
2878 LLVMValueRef result = ac_build_intrinsic(&ctx->ac,
2879 "llvm.cttz.i64",
2880 ctx->ac.i64, args, 2,
2881 AC_FUNC_ATTR_NOUNWIND |
2882 AC_FUNC_ATTR_READNONE);
2883
2884 return LLVMBuildTrunc(ctx->ac.builder, result, ctx->ac.i32, "");
2885 }
2886
2887 static LLVMValueRef
2888 visit_load_shared(struct ac_nir_context *ctx,
2889 const nir_intrinsic_instr *instr)
2890 {
2891 LLVMValueRef values[4], derived_ptr, index, ret;
2892
2893 LLVMValueRef ptr = get_memory_ptr(ctx, instr->src[0]);
2894
2895 for (int chan = 0; chan < instr->num_components; chan++) {
2896 index = LLVMConstInt(ctx->ac.i32, chan, 0);
2897 derived_ptr = LLVMBuildGEP(ctx->ac.builder, ptr, &index, 1, "");
2898 values[chan] = LLVMBuildLoad(ctx->ac.builder, derived_ptr, "");
2899 }
2900
2901 ret = ac_build_gather_values(&ctx->ac, values, instr->num_components);
2902 return LLVMBuildBitCast(ctx->ac.builder, ret, get_def_type(ctx, &instr->dest.ssa), "");
2903 }
2904
2905 static void
2906 visit_store_shared(struct ac_nir_context *ctx,
2907 const nir_intrinsic_instr *instr)
2908 {
2909 LLVMValueRef derived_ptr, data,index;
2910 LLVMBuilderRef builder = ctx->ac.builder;
2911
2912 LLVMValueRef ptr = get_memory_ptr(ctx, instr->src[1]);
2913 LLVMValueRef src = get_src(ctx, instr->src[0]);
2914
2915 int writemask = nir_intrinsic_write_mask(instr);
2916 for (int chan = 0; chan < 4; chan++) {
2917 if (!(writemask & (1 << chan))) {
2918 continue;
2919 }
2920 data = ac_llvm_extract_elem(&ctx->ac, src, chan);
2921 index = LLVMConstInt(ctx->ac.i32, chan, 0);
2922 derived_ptr = LLVMBuildGEP(builder, ptr, &index, 1, "");
2923 LLVMBuildStore(builder, data, derived_ptr);
2924 }
2925 }
2926
2927 static LLVMValueRef visit_var_atomic(struct ac_nir_context *ctx,
2928 const nir_intrinsic_instr *instr,
2929 LLVMValueRef ptr, int src_idx)
2930 {
2931 LLVMValueRef result;
2932 LLVMValueRef src = get_src(ctx, instr->src[src_idx]);
2933
2934 if (instr->intrinsic == nir_intrinsic_shared_atomic_comp_swap ||
2935 instr->intrinsic == nir_intrinsic_deref_atomic_comp_swap) {
2936 LLVMValueRef src1 = get_src(ctx, instr->src[src_idx + 1]);
2937 result = LLVMBuildAtomicCmpXchg(ctx->ac.builder,
2938 ptr, src, src1,
2939 LLVMAtomicOrderingSequentiallyConsistent,
2940 LLVMAtomicOrderingSequentiallyConsistent,
2941 false);
2942 result = LLVMBuildExtractValue(ctx->ac.builder, result, 0, "");
2943 } else {
2944 LLVMAtomicRMWBinOp op;
2945 switch (instr->intrinsic) {
2946 case nir_intrinsic_shared_atomic_add:
2947 case nir_intrinsic_deref_atomic_add:
2948 op = LLVMAtomicRMWBinOpAdd;
2949 break;
2950 case nir_intrinsic_shared_atomic_umin:
2951 case nir_intrinsic_deref_atomic_umin:
2952 op = LLVMAtomicRMWBinOpUMin;
2953 break;
2954 case nir_intrinsic_shared_atomic_umax:
2955 case nir_intrinsic_deref_atomic_umax:
2956 op = LLVMAtomicRMWBinOpUMax;
2957 break;
2958 case nir_intrinsic_shared_atomic_imin:
2959 case nir_intrinsic_deref_atomic_imin:
2960 op = LLVMAtomicRMWBinOpMin;
2961 break;
2962 case nir_intrinsic_shared_atomic_imax:
2963 case nir_intrinsic_deref_atomic_imax:
2964 op = LLVMAtomicRMWBinOpMax;
2965 break;
2966 case nir_intrinsic_shared_atomic_and:
2967 case nir_intrinsic_deref_atomic_and:
2968 op = LLVMAtomicRMWBinOpAnd;
2969 break;
2970 case nir_intrinsic_shared_atomic_or:
2971 case nir_intrinsic_deref_atomic_or:
2972 op = LLVMAtomicRMWBinOpOr;
2973 break;
2974 case nir_intrinsic_shared_atomic_xor:
2975 case nir_intrinsic_deref_atomic_xor:
2976 op = LLVMAtomicRMWBinOpXor;
2977 break;
2978 case nir_intrinsic_shared_atomic_exchange:
2979 case nir_intrinsic_deref_atomic_exchange:
2980 op = LLVMAtomicRMWBinOpXchg;
2981 break;
2982 default:
2983 return NULL;
2984 }
2985
2986 result = LLVMBuildAtomicRMW(ctx->ac.builder, op, ptr, ac_to_integer(&ctx->ac, src),
2987 LLVMAtomicOrderingSequentiallyConsistent,
2988 false);
2989 }
2990 return result;
2991 }
2992
2993 static LLVMValueRef load_sample_pos(struct ac_nir_context *ctx)
2994 {
2995 LLVMValueRef values[2];
2996 LLVMValueRef pos[2];
2997
2998 pos[0] = ac_to_float(&ctx->ac, ctx->abi->frag_pos[0]);
2999 pos[1] = ac_to_float(&ctx->ac, ctx->abi->frag_pos[1]);
3000
3001 values[0] = ac_build_fract(&ctx->ac, pos[0], 32);
3002 values[1] = ac_build_fract(&ctx->ac, pos[1], 32);
3003 return ac_build_gather_values(&ctx->ac, values, 2);
3004 }
3005
3006 static LLVMValueRef visit_interp(struct ac_nir_context *ctx,
3007 const nir_intrinsic_instr *instr)
3008 {
3009 LLVMValueRef result[4];
3010 LLVMValueRef interp_param;
3011 unsigned location;
3012 unsigned chan;
3013 LLVMValueRef src_c0 = NULL;
3014 LLVMValueRef src_c1 = NULL;
3015 LLVMValueRef src0 = NULL;
3016
3017 nir_deref_instr *deref_instr = nir_instr_as_deref(instr->src[0].ssa->parent_instr);
3018 nir_variable *var = nir_deref_instr_get_variable(deref_instr);
3019 int input_base = ctx->abi->fs_input_attr_indices[var->data.location - VARYING_SLOT_VAR0];
3020 switch (instr->intrinsic) {
3021 case nir_intrinsic_interp_deref_at_centroid:
3022 location = INTERP_CENTROID;
3023 break;
3024 case nir_intrinsic_interp_deref_at_sample:
3025 case nir_intrinsic_interp_deref_at_offset:
3026 location = INTERP_CENTER;
3027 src0 = get_src(ctx, instr->src[1]);
3028 break;
3029 default:
3030 break;
3031 }
3032
3033 if (instr->intrinsic == nir_intrinsic_interp_deref_at_offset) {
3034 src_c0 = ac_to_float(&ctx->ac, LLVMBuildExtractElement(ctx->ac.builder, src0, ctx->ac.i32_0, ""));
3035 src_c1 = ac_to_float(&ctx->ac, LLVMBuildExtractElement(ctx->ac.builder, src0, ctx->ac.i32_1, ""));
3036 } else if (instr->intrinsic == nir_intrinsic_interp_deref_at_sample) {
3037 LLVMValueRef sample_position;
3038 LLVMValueRef halfval = LLVMConstReal(ctx->ac.f32, 0.5f);
3039
3040 /* fetch sample ID */
3041 sample_position = ctx->abi->load_sample_position(ctx->abi, src0);
3042
3043 src_c0 = LLVMBuildExtractElement(ctx->ac.builder, sample_position, ctx->ac.i32_0, "");
3044 src_c0 = LLVMBuildFSub(ctx->ac.builder, src_c0, halfval, "");
3045 src_c1 = LLVMBuildExtractElement(ctx->ac.builder, sample_position, ctx->ac.i32_1, "");
3046 src_c1 = LLVMBuildFSub(ctx->ac.builder, src_c1, halfval, "");
3047 }
3048 interp_param = ctx->abi->lookup_interp_param(ctx->abi, var->data.interpolation, location);
3049
3050 if (location == INTERP_CENTER) {
3051 LLVMValueRef ij_out[2];
3052 LLVMValueRef ddxy_out = emit_ddxy_interp(ctx, interp_param);
3053
3054 /*
3055 * take the I then J parameters, and the DDX/Y for it, and
3056 * calculate the IJ inputs for the interpolator.
3057 * temp1 = ddx * offset/sample.x + I;
3058 * interp_param.I = ddy * offset/sample.y + temp1;
3059 * temp1 = ddx * offset/sample.x + J;
3060 * interp_param.J = ddy * offset/sample.y + temp1;
3061 */
3062 for (unsigned i = 0; i < 2; i++) {
3063 LLVMValueRef ix_ll = LLVMConstInt(ctx->ac.i32, i, false);
3064 LLVMValueRef iy_ll = LLVMConstInt(ctx->ac.i32, i + 2, false);
3065 LLVMValueRef ddx_el = LLVMBuildExtractElement(ctx->ac.builder,
3066 ddxy_out, ix_ll, "");
3067 LLVMValueRef ddy_el = LLVMBuildExtractElement(ctx->ac.builder,
3068 ddxy_out, iy_ll, "");
3069 LLVMValueRef interp_el = LLVMBuildExtractElement(ctx->ac.builder,
3070 interp_param, ix_ll, "");
3071 LLVMValueRef temp1, temp2;
3072
3073 interp_el = LLVMBuildBitCast(ctx->ac.builder, interp_el,
3074 ctx->ac.f32, "");
3075
3076 temp1 = ac_build_fmad(&ctx->ac, ddx_el, src_c0, interp_el);
3077 temp2 = ac_build_fmad(&ctx->ac, ddy_el, src_c1, temp1);
3078
3079 ij_out[i] = LLVMBuildBitCast(ctx->ac.builder,
3080 temp2, ctx->ac.i32, "");
3081 }
3082 interp_param = ac_build_gather_values(&ctx->ac, ij_out, 2);
3083
3084 }
3085
3086 LLVMValueRef attrib_idx = ctx->ac.i32_0;
3087 while(deref_instr->deref_type != nir_deref_type_var) {
3088 if (deref_instr->deref_type == nir_deref_type_array) {
3089 unsigned array_size = glsl_count_attribute_slots(deref_instr->type, false);
3090
3091 LLVMValueRef offset;
3092 nir_const_value *const_value = nir_src_as_const_value(deref_instr->arr.index);
3093 if (const_value) {
3094 offset = LLVMConstInt(ctx->ac.i32, array_size * const_value->u32[0], false);
3095 } else {
3096 LLVMValueRef indirect = get_src(ctx, deref_instr->arr.index);
3097
3098 offset = LLVMBuildMul(ctx->ac.builder, indirect,
3099 LLVMConstInt(ctx->ac.i32, array_size, false), "");
3100 }
3101
3102 attrib_idx = LLVMBuildAdd(ctx->ac.builder, attrib_idx, offset, "");
3103 deref_instr = nir_src_as_deref(deref_instr->parent);
3104 } else if (deref_instr->deref_type == nir_deref_type_struct) {
3105 LLVMValueRef offset;
3106 unsigned sidx = deref_instr->strct.index;
3107 deref_instr = nir_src_as_deref(deref_instr->parent);
3108 offset = LLVMConstInt(ctx->ac.i32, glsl_get_struct_location_offset(deref_instr->type, sidx), false);
3109 attrib_idx = LLVMBuildAdd(ctx->ac.builder, attrib_idx, offset, "");
3110 } else {
3111 unreachable("Unsupported deref type");
3112 }
3113
3114 }
3115
3116 unsigned attrib_size = glsl_count_attribute_slots(var->type, false);
3117 for (chan = 0; chan < 4; chan++) {
3118 LLVMValueRef gather = LLVMGetUndef(LLVMVectorType(ctx->ac.f32, attrib_size));
3119 LLVMValueRef llvm_chan = LLVMConstInt(ctx->ac.i32, chan, false);
3120
3121 for (unsigned idx = 0; idx < attrib_size; ++idx) {
3122 LLVMValueRef v, attr_number;
3123
3124 attr_number = LLVMConstInt(ctx->ac.i32, input_base + idx, false);
3125 if (interp_param) {
3126 interp_param = LLVMBuildBitCast(ctx->ac.builder,
3127 interp_param, ctx->ac.v2f32, "");
3128 LLVMValueRef i = LLVMBuildExtractElement(
3129 ctx->ac.builder, interp_param, ctx->ac.i32_0, "");
3130 LLVMValueRef j = LLVMBuildExtractElement(
3131 ctx->ac.builder, interp_param, ctx->ac.i32_1, "");
3132
3133 v = ac_build_fs_interp(&ctx->ac, llvm_chan, attr_number,
3134 ctx->abi->prim_mask, i, j);
3135 } else {
3136 v = ac_build_fs_interp_mov(&ctx->ac, LLVMConstInt(ctx->ac.i32, 2, false),
3137 llvm_chan, attr_number, ctx->abi->prim_mask);
3138 }
3139
3140 gather = LLVMBuildInsertElement(ctx->ac.builder, gather, v,
3141 LLVMConstInt(ctx->ac.i32, idx, false), "");
3142 }
3143
3144 result[chan] = LLVMBuildExtractElement(ctx->ac.builder, gather, attrib_idx, "");
3145
3146 }
3147 return ac_build_varying_gather_values(&ctx->ac, result, instr->num_components,
3148 var->data.location_frac);
3149 }
3150
3151 static void visit_intrinsic(struct ac_nir_context *ctx,
3152 nir_intrinsic_instr *instr)
3153 {
3154 LLVMValueRef result = NULL;
3155
3156 switch (instr->intrinsic) {
3157 case nir_intrinsic_ballot:
3158 result = ac_build_ballot(&ctx->ac, get_src(ctx, instr->src[0]));
3159 break;
3160 case nir_intrinsic_read_invocation:
3161 result = ac_build_readlane(&ctx->ac, get_src(ctx, instr->src[0]),
3162 get_src(ctx, instr->src[1]));
3163 break;
3164 case nir_intrinsic_read_first_invocation:
3165 result = ac_build_readlane(&ctx->ac, get_src(ctx, instr->src[0]), NULL);
3166 break;
3167 case nir_intrinsic_load_subgroup_invocation:
3168 result = ac_get_thread_id(&ctx->ac);
3169 break;
3170 case nir_intrinsic_load_work_group_id: {
3171 LLVMValueRef values[3];
3172
3173 for (int i = 0; i < 3; i++) {
3174 values[i] = ctx->abi->workgroup_ids[i] ?
3175 ctx->abi->workgroup_ids[i] : ctx->ac.i32_0;
3176 }
3177
3178 result = ac_build_gather_values(&ctx->ac, values, 3);
3179 break;
3180 }
3181 case nir_intrinsic_load_base_vertex:
3182 case nir_intrinsic_load_first_vertex:
3183 result = ctx->abi->load_base_vertex(ctx->abi);
3184 break;
3185 case nir_intrinsic_load_local_group_size:
3186 result = ctx->abi->load_local_group_size(ctx->abi);
3187 break;
3188 case nir_intrinsic_load_vertex_id:
3189 result = LLVMBuildAdd(ctx->ac.builder, ctx->abi->vertex_id,
3190 ctx->abi->base_vertex, "");
3191 break;
3192 case nir_intrinsic_load_vertex_id_zero_base: {
3193 result = ctx->abi->vertex_id;
3194 break;
3195 }
3196 case nir_intrinsic_load_local_invocation_id: {
3197 result = ctx->abi->local_invocation_ids;
3198 break;
3199 }
3200 case nir_intrinsic_load_base_instance:
3201 result = ctx->abi->start_instance;
3202 break;
3203 case nir_intrinsic_load_draw_id:
3204 result = ctx->abi->draw_id;
3205 break;
3206 case nir_intrinsic_load_view_index:
3207 result = ctx->abi->view_index;
3208 break;
3209 case nir_intrinsic_load_invocation_id:
3210 if (ctx->stage == MESA_SHADER_TESS_CTRL)
3211 result = ac_unpack_param(&ctx->ac, ctx->abi->tcs_rel_ids, 8, 5);
3212 else
3213 result = ctx->abi->gs_invocation_id;
3214 break;
3215 case nir_intrinsic_load_primitive_id:
3216 if (ctx->stage == MESA_SHADER_GEOMETRY) {
3217 result = ctx->abi->gs_prim_id;
3218 } else if (ctx->stage == MESA_SHADER_TESS_CTRL) {
3219 result = ctx->abi->tcs_patch_id;
3220 } else if (ctx->stage == MESA_SHADER_TESS_EVAL) {
3221 result = ctx->abi->tes_patch_id;
3222 } else
3223 fprintf(stderr, "Unknown primitive id intrinsic: %d", ctx->stage);
3224 break;
3225 case nir_intrinsic_load_sample_id:
3226 result = ac_unpack_param(&ctx->ac, ctx->abi->ancillary, 8, 4);
3227 break;
3228 case nir_intrinsic_load_sample_pos:
3229 result = load_sample_pos(ctx);
3230 break;
3231 case nir_intrinsic_load_sample_mask_in:
3232 result = ctx->abi->load_sample_mask_in(ctx->abi);
3233 break;
3234 case nir_intrinsic_load_frag_coord: {
3235 LLVMValueRef values[4] = {
3236 ctx->abi->frag_pos[0],
3237 ctx->abi->frag_pos[1],
3238 ctx->abi->frag_pos[2],
3239 ac_build_fdiv(&ctx->ac, ctx->ac.f32_1, ctx->abi->frag_pos[3])
3240 };
3241 result = ac_to_integer(&ctx->ac,
3242 ac_build_gather_values(&ctx->ac, values, 4));
3243 break;
3244 }
3245 case nir_intrinsic_load_front_face:
3246 result = ctx->abi->front_face;
3247 break;
3248 case nir_intrinsic_load_helper_invocation:
3249 result = visit_load_helper_invocation(ctx);
3250 break;
3251 case nir_intrinsic_load_instance_id:
3252 result = ctx->abi->instance_id;
3253 break;
3254 case nir_intrinsic_load_num_work_groups:
3255 result = ctx->abi->num_work_groups;
3256 break;
3257 case nir_intrinsic_load_local_invocation_index:
3258 result = visit_load_local_invocation_index(ctx);
3259 break;
3260 case nir_intrinsic_load_subgroup_id:
3261 result = visit_load_subgroup_id(ctx);
3262 break;
3263 case nir_intrinsic_load_num_subgroups:
3264 result = visit_load_num_subgroups(ctx);
3265 break;
3266 case nir_intrinsic_first_invocation:
3267 result = visit_first_invocation(ctx);
3268 break;
3269 case nir_intrinsic_load_push_constant:
3270 result = visit_load_push_constant(ctx, instr);
3271 break;
3272 case nir_intrinsic_vulkan_resource_index: {
3273 LLVMValueRef index = get_src(ctx, instr->src[0]);
3274 unsigned desc_set = nir_intrinsic_desc_set(instr);
3275 unsigned binding = nir_intrinsic_binding(instr);
3276
3277 result = ctx->abi->load_resource(ctx->abi, index, desc_set,
3278 binding);
3279 break;
3280 }
3281 case nir_intrinsic_vulkan_resource_reindex:
3282 result = visit_vulkan_resource_reindex(ctx, instr);
3283 break;
3284 case nir_intrinsic_store_ssbo:
3285 visit_store_ssbo(ctx, instr);
3286 break;
3287 case nir_intrinsic_load_ssbo:
3288 result = visit_load_buffer(ctx, instr);
3289 break;
3290 case nir_intrinsic_ssbo_atomic_add:
3291 case nir_intrinsic_ssbo_atomic_imin:
3292 case nir_intrinsic_ssbo_atomic_umin:
3293 case nir_intrinsic_ssbo_atomic_imax:
3294 case nir_intrinsic_ssbo_atomic_umax:
3295 case nir_intrinsic_ssbo_atomic_and:
3296 case nir_intrinsic_ssbo_atomic_or:
3297 case nir_intrinsic_ssbo_atomic_xor:
3298 case nir_intrinsic_ssbo_atomic_exchange:
3299 case nir_intrinsic_ssbo_atomic_comp_swap:
3300 result = visit_atomic_ssbo(ctx, instr);
3301 break;
3302 case nir_intrinsic_load_ubo:
3303 result = visit_load_ubo_buffer(ctx, instr);
3304 break;
3305 case nir_intrinsic_get_buffer_size:
3306 result = visit_get_buffer_size(ctx, instr);
3307 break;
3308 case nir_intrinsic_load_deref:
3309 result = visit_load_var(ctx, instr);
3310 break;
3311 case nir_intrinsic_store_deref:
3312 visit_store_var(ctx, instr);
3313 break;
3314 case nir_intrinsic_load_shared:
3315 result = visit_load_shared(ctx, instr);
3316 break;
3317 case nir_intrinsic_store_shared:
3318 visit_store_shared(ctx, instr);
3319 break;
3320 case nir_intrinsic_image_deref_samples:
3321 result = visit_image_samples(ctx, instr);
3322 break;
3323 case nir_intrinsic_image_deref_load:
3324 result = visit_image_load(ctx, instr);
3325 break;
3326 case nir_intrinsic_image_deref_store:
3327 visit_image_store(ctx, instr);
3328 break;
3329 case nir_intrinsic_image_deref_atomic_add:
3330 case nir_intrinsic_image_deref_atomic_min:
3331 case nir_intrinsic_image_deref_atomic_max:
3332 case nir_intrinsic_image_deref_atomic_and:
3333 case nir_intrinsic_image_deref_atomic_or:
3334 case nir_intrinsic_image_deref_atomic_xor:
3335 case nir_intrinsic_image_deref_atomic_exchange:
3336 case nir_intrinsic_image_deref_atomic_comp_swap:
3337 result = visit_image_atomic(ctx, instr);
3338 break;
3339 case nir_intrinsic_image_deref_size:
3340 result = visit_image_size(ctx, instr);
3341 break;
3342 case nir_intrinsic_shader_clock:
3343 result = ac_build_shader_clock(&ctx->ac);
3344 break;
3345 case nir_intrinsic_discard:
3346 case nir_intrinsic_discard_if:
3347 emit_discard(ctx, instr);
3348 break;
3349 case nir_intrinsic_memory_barrier:
3350 case nir_intrinsic_group_memory_barrier:
3351 case nir_intrinsic_memory_barrier_atomic_counter:
3352 case nir_intrinsic_memory_barrier_buffer:
3353 case nir_intrinsic_memory_barrier_image:
3354 case nir_intrinsic_memory_barrier_shared:
3355 emit_membar(&ctx->ac, instr);
3356 break;
3357 case nir_intrinsic_barrier:
3358 ac_emit_barrier(&ctx->ac, ctx->stage);
3359 break;
3360 case nir_intrinsic_shared_atomic_add:
3361 case nir_intrinsic_shared_atomic_imin:
3362 case nir_intrinsic_shared_atomic_umin:
3363 case nir_intrinsic_shared_atomic_imax:
3364 case nir_intrinsic_shared_atomic_umax:
3365 case nir_intrinsic_shared_atomic_and:
3366 case nir_intrinsic_shared_atomic_or:
3367 case nir_intrinsic_shared_atomic_xor:
3368 case nir_intrinsic_shared_atomic_exchange:
3369 case nir_intrinsic_shared_atomic_comp_swap: {
3370 LLVMValueRef ptr = get_memory_ptr(ctx, instr->src[0]);
3371 result = visit_var_atomic(ctx, instr, ptr, 1);
3372 break;
3373 }
3374 case nir_intrinsic_deref_atomic_add:
3375 case nir_intrinsic_deref_atomic_imin:
3376 case nir_intrinsic_deref_atomic_umin:
3377 case nir_intrinsic_deref_atomic_imax:
3378 case nir_intrinsic_deref_atomic_umax:
3379 case nir_intrinsic_deref_atomic_and:
3380 case nir_intrinsic_deref_atomic_or:
3381 case nir_intrinsic_deref_atomic_xor:
3382 case nir_intrinsic_deref_atomic_exchange:
3383 case nir_intrinsic_deref_atomic_comp_swap: {
3384 LLVMValueRef ptr = get_src(ctx, instr->src[0]);
3385 result = visit_var_atomic(ctx, instr, ptr, 1);
3386 break;
3387 }
3388 case nir_intrinsic_interp_deref_at_centroid:
3389 case nir_intrinsic_interp_deref_at_sample:
3390 case nir_intrinsic_interp_deref_at_offset:
3391 result = visit_interp(ctx, instr);
3392 break;
3393 case nir_intrinsic_emit_vertex:
3394 ctx->abi->emit_vertex(ctx->abi, nir_intrinsic_stream_id(instr), ctx->abi->outputs);
3395 break;
3396 case nir_intrinsic_end_primitive:
3397 ctx->abi->emit_primitive(ctx->abi, nir_intrinsic_stream_id(instr));
3398 break;
3399 case nir_intrinsic_load_tess_coord:
3400 result = ctx->abi->load_tess_coord(ctx->abi);
3401 break;
3402 case nir_intrinsic_load_tess_level_outer:
3403 result = ctx->abi->load_tess_level(ctx->abi, VARYING_SLOT_TESS_LEVEL_OUTER);
3404 break;
3405 case nir_intrinsic_load_tess_level_inner:
3406 result = ctx->abi->load_tess_level(ctx->abi, VARYING_SLOT_TESS_LEVEL_INNER);
3407 break;
3408 case nir_intrinsic_load_patch_vertices_in:
3409 result = ctx->abi->load_patch_vertices_in(ctx->abi);
3410 break;
3411 case nir_intrinsic_vote_all: {
3412 LLVMValueRef tmp = ac_build_vote_all(&ctx->ac, get_src(ctx, instr->src[0]));
3413 result = LLVMBuildSExt(ctx->ac.builder, tmp, ctx->ac.i32, "");
3414 break;
3415 }
3416 case nir_intrinsic_vote_any: {
3417 LLVMValueRef tmp = ac_build_vote_any(&ctx->ac, get_src(ctx, instr->src[0]));
3418 result = LLVMBuildSExt(ctx->ac.builder, tmp, ctx->ac.i32, "");
3419 break;
3420 }
3421 case nir_intrinsic_shuffle:
3422 result = ac_build_shuffle(&ctx->ac, get_src(ctx, instr->src[0]),
3423 get_src(ctx, instr->src[1]));
3424 break;
3425 case nir_intrinsic_reduce:
3426 result = ac_build_reduce(&ctx->ac,
3427 get_src(ctx, instr->src[0]),
3428 instr->const_index[0],
3429 instr->const_index[1]);
3430 break;
3431 case nir_intrinsic_inclusive_scan:
3432 result = ac_build_inclusive_scan(&ctx->ac,
3433 get_src(ctx, instr->src[0]),
3434 instr->const_index[0]);
3435 break;
3436 case nir_intrinsic_exclusive_scan:
3437 result = ac_build_exclusive_scan(&ctx->ac,
3438 get_src(ctx, instr->src[0]),
3439 instr->const_index[0]);
3440 break;
3441 case nir_intrinsic_quad_broadcast: {
3442 unsigned lane = nir_src_as_const_value(instr->src[1])->u32[0];
3443 result = ac_build_quad_swizzle(&ctx->ac, get_src(ctx, instr->src[0]),
3444 lane, lane, lane, lane);
3445 break;
3446 }
3447 case nir_intrinsic_quad_swap_horizontal:
3448 result = ac_build_quad_swizzle(&ctx->ac, get_src(ctx, instr->src[0]), 1, 0, 3 ,2);
3449 break;
3450 case nir_intrinsic_quad_swap_vertical:
3451 result = ac_build_quad_swizzle(&ctx->ac, get_src(ctx, instr->src[0]), 2, 3, 0 ,1);
3452 break;
3453 case nir_intrinsic_quad_swap_diagonal:
3454 result = ac_build_quad_swizzle(&ctx->ac, get_src(ctx, instr->src[0]), 3, 2, 1 ,0);
3455 break;
3456 default:
3457 fprintf(stderr, "Unknown intrinsic: ");
3458 nir_print_instr(&instr->instr, stderr);
3459 fprintf(stderr, "\n");
3460 break;
3461 }
3462 if (result) {
3463 ctx->ssa_defs[instr->dest.ssa.index] = result;
3464 }
3465 }
3466
3467 static LLVMValueRef get_bindless_index_from_uniform(struct ac_nir_context *ctx,
3468 unsigned base_index,
3469 unsigned constant_index,
3470 LLVMValueRef dynamic_index)
3471 {
3472 LLVMValueRef offset = LLVMConstInt(ctx->ac.i32, base_index * 4, 0);
3473 LLVMValueRef index = LLVMBuildAdd(ctx->ac.builder, dynamic_index,
3474 LLVMConstInt(ctx->ac.i32, constant_index, 0), "");
3475
3476 /* Bindless uniforms are 64bit so multiple index by 8 */
3477 index = LLVMBuildMul(ctx->ac.builder, index, LLVMConstInt(ctx->ac.i32, 8, 0), "");
3478 offset = LLVMBuildAdd(ctx->ac.builder, offset, index, "");
3479
3480 LLVMValueRef ubo_index = ctx->abi->load_ubo(ctx->abi, ctx->ac.i32_0);
3481
3482 LLVMValueRef ret = ac_build_buffer_load(&ctx->ac, ubo_index, 1, NULL, offset,
3483 NULL, 0, false, false, true, true);
3484
3485 return LLVMBuildBitCast(ctx->ac.builder, ret, ctx->ac.i32, "");
3486 }
3487
3488 static LLVMValueRef get_sampler_desc(struct ac_nir_context *ctx,
3489 nir_deref_instr *deref_instr,
3490 enum ac_descriptor_type desc_type,
3491 const nir_tex_instr *tex_instr,
3492 bool image, bool write)
3493 {
3494 LLVMValueRef index = NULL;
3495 unsigned constant_index = 0;
3496 unsigned descriptor_set;
3497 unsigned base_index;
3498 bool bindless = false;
3499
3500 if (!deref_instr) {
3501 assert(tex_instr && !image);
3502 descriptor_set = 0;
3503 base_index = tex_instr->sampler_index;
3504 } else {
3505 while(deref_instr->deref_type != nir_deref_type_var) {
3506 if (deref_instr->deref_type == nir_deref_type_array) {
3507 unsigned array_size = glsl_get_aoa_size(deref_instr->type);
3508 if (!array_size)
3509 array_size = 1;
3510
3511 nir_const_value *const_value = nir_src_as_const_value(deref_instr->arr.index);
3512 if (const_value) {
3513 constant_index += array_size * const_value->u32[0];
3514 } else {
3515 LLVMValueRef indirect = get_src(ctx, deref_instr->arr.index);
3516
3517 indirect = LLVMBuildMul(ctx->ac.builder, indirect,
3518 LLVMConstInt(ctx->ac.i32, array_size, false), "");
3519
3520 if (!index)
3521 index = indirect;
3522 else
3523 index = LLVMBuildAdd(ctx->ac.builder, index, indirect, "");
3524 }
3525
3526 deref_instr = nir_src_as_deref(deref_instr->parent);
3527 } else if (deref_instr->deref_type == nir_deref_type_struct) {
3528 unsigned sidx = deref_instr->strct.index;
3529 deref_instr = nir_src_as_deref(deref_instr->parent);
3530 constant_index += glsl_get_struct_location_offset(deref_instr->type, sidx);
3531 } else {
3532 unreachable("Unsupported deref type");
3533 }
3534 }
3535 descriptor_set = deref_instr->var->data.descriptor_set;
3536
3537 if (deref_instr->var->data.bindless) {
3538 /* For now just assert on unhandled variable types */
3539 assert(deref_instr->var->data.mode == nir_var_uniform);
3540
3541 base_index = deref_instr->var->data.driver_location;
3542 bindless = true;
3543
3544 index = index ? index : ctx->ac.i32_0;
3545 index = get_bindless_index_from_uniform(ctx, base_index,
3546 constant_index, index);
3547 } else
3548 base_index = deref_instr->var->data.binding;
3549 }
3550
3551 return ctx->abi->load_sampler_desc(ctx->abi,
3552 descriptor_set,
3553 base_index,
3554 constant_index, index,
3555 desc_type, image, write, bindless);
3556 }
3557
3558 /* Disable anisotropic filtering if BASE_LEVEL == LAST_LEVEL.
3559 *
3560 * SI-CI:
3561 * If BASE_LEVEL == LAST_LEVEL, the shader must disable anisotropic
3562 * filtering manually. The driver sets img7 to a mask clearing
3563 * MAX_ANISO_RATIO if BASE_LEVEL == LAST_LEVEL. The shader must do:
3564 * s_and_b32 samp0, samp0, img7
3565 *
3566 * VI:
3567 * The ANISO_OVERRIDE sampler field enables this fix in TA.
3568 */
3569 static LLVMValueRef sici_fix_sampler_aniso(struct ac_nir_context *ctx,
3570 LLVMValueRef res, LLVMValueRef samp)
3571 {
3572 LLVMBuilderRef builder = ctx->ac.builder;
3573 LLVMValueRef img7, samp0;
3574
3575 if (ctx->ac.chip_class >= VI)
3576 return samp;
3577
3578 img7 = LLVMBuildExtractElement(builder, res,
3579 LLVMConstInt(ctx->ac.i32, 7, 0), "");
3580 samp0 = LLVMBuildExtractElement(builder, samp,
3581 LLVMConstInt(ctx->ac.i32, 0, 0), "");
3582 samp0 = LLVMBuildAnd(builder, samp0, img7, "");
3583 return LLVMBuildInsertElement(builder, samp, samp0,
3584 LLVMConstInt(ctx->ac.i32, 0, 0), "");
3585 }
3586
3587 static void tex_fetch_ptrs(struct ac_nir_context *ctx,
3588 nir_tex_instr *instr,
3589 LLVMValueRef *res_ptr, LLVMValueRef *samp_ptr,
3590 LLVMValueRef *fmask_ptr)
3591 {
3592 nir_deref_instr *texture_deref_instr = NULL;
3593 nir_deref_instr *sampler_deref_instr = NULL;
3594
3595 for (unsigned i = 0; i < instr->num_srcs; i++) {
3596 switch (instr->src[i].src_type) {
3597 case nir_tex_src_texture_deref:
3598 texture_deref_instr = nir_src_as_deref(instr->src[i].src);
3599 break;
3600 case nir_tex_src_sampler_deref:
3601 sampler_deref_instr = nir_src_as_deref(instr->src[i].src);
3602 break;
3603 default:
3604 break;
3605 }
3606 }
3607
3608 if (!sampler_deref_instr)
3609 sampler_deref_instr = texture_deref_instr;
3610
3611 if (instr->sampler_dim == GLSL_SAMPLER_DIM_BUF)
3612 *res_ptr = get_sampler_desc(ctx, texture_deref_instr, AC_DESC_BUFFER, instr, false, false);
3613 else
3614 *res_ptr = get_sampler_desc(ctx, texture_deref_instr, AC_DESC_IMAGE, instr, false, false);
3615 if (samp_ptr) {
3616 *samp_ptr = get_sampler_desc(ctx, sampler_deref_instr, AC_DESC_SAMPLER, instr, false, false);
3617 if (instr->sampler_dim < GLSL_SAMPLER_DIM_RECT)
3618 *samp_ptr = sici_fix_sampler_aniso(ctx, *res_ptr, *samp_ptr);
3619 }
3620 if (fmask_ptr && (instr->op == nir_texop_txf_ms ||
3621 instr->op == nir_texop_samples_identical))
3622 *fmask_ptr = get_sampler_desc(ctx, texture_deref_instr, AC_DESC_FMASK, instr, false, false);
3623 }
3624
3625 static LLVMValueRef apply_round_slice(struct ac_llvm_context *ctx,
3626 LLVMValueRef coord)
3627 {
3628 coord = ac_to_float(ctx, coord);
3629 coord = ac_build_round(ctx, coord);
3630 coord = ac_to_integer(ctx, coord);
3631 return coord;
3632 }
3633
3634 static void visit_tex(struct ac_nir_context *ctx, nir_tex_instr *instr)
3635 {
3636 LLVMValueRef result = NULL;
3637 struct ac_image_args args = { 0 };
3638 LLVMValueRef fmask_ptr = NULL, sample_index = NULL;
3639 LLVMValueRef ddx = NULL, ddy = NULL;
3640 unsigned offset_src = 0;
3641
3642 tex_fetch_ptrs(ctx, instr, &args.resource, &args.sampler, &fmask_ptr);
3643
3644 for (unsigned i = 0; i < instr->num_srcs; i++) {
3645 switch (instr->src[i].src_type) {
3646 case nir_tex_src_coord: {
3647 LLVMValueRef coord = get_src(ctx, instr->src[i].src);
3648 for (unsigned chan = 0; chan < instr->coord_components; ++chan)
3649 args.coords[chan] = ac_llvm_extract_elem(&ctx->ac, coord, chan);
3650 break;
3651 }
3652 case nir_tex_src_projector:
3653 break;
3654 case nir_tex_src_comparator:
3655 if (instr->is_shadow)
3656 args.compare = get_src(ctx, instr->src[i].src);
3657 break;
3658 case nir_tex_src_offset:
3659 args.offset = get_src(ctx, instr->src[i].src);
3660 offset_src = i;
3661 break;
3662 case nir_tex_src_bias:
3663 if (instr->op == nir_texop_txb)
3664 args.bias = get_src(ctx, instr->src[i].src);
3665 break;
3666 case nir_tex_src_lod: {
3667 nir_const_value *val = nir_src_as_const_value(instr->src[i].src);
3668
3669 if (val && val->i32[0] == 0)
3670 args.level_zero = true;
3671 else
3672 args.lod = get_src(ctx, instr->src[i].src);
3673 break;
3674 }
3675 case nir_tex_src_ms_index:
3676 sample_index = get_src(ctx, instr->src[i].src);
3677 break;
3678 case nir_tex_src_ms_mcs:
3679 break;
3680 case nir_tex_src_ddx:
3681 ddx = get_src(ctx, instr->src[i].src);
3682 break;
3683 case nir_tex_src_ddy:
3684 ddy = get_src(ctx, instr->src[i].src);
3685 break;
3686 case nir_tex_src_texture_offset:
3687 case nir_tex_src_sampler_offset:
3688 case nir_tex_src_plane:
3689 default:
3690 break;
3691 }
3692 }
3693
3694 if (instr->op == nir_texop_txs && instr->sampler_dim == GLSL_SAMPLER_DIM_BUF) {
3695 result = get_buffer_size(ctx, args.resource, true);
3696 goto write_result;
3697 }
3698
3699 if (instr->op == nir_texop_texture_samples) {
3700 LLVMValueRef res, samples, is_msaa;
3701 res = LLVMBuildBitCast(ctx->ac.builder, args.resource, ctx->ac.v8i32, "");
3702 samples = LLVMBuildExtractElement(ctx->ac.builder, res,
3703 LLVMConstInt(ctx->ac.i32, 3, false), "");
3704 is_msaa = LLVMBuildLShr(ctx->ac.builder, samples,
3705 LLVMConstInt(ctx->ac.i32, 28, false), "");
3706 is_msaa = LLVMBuildAnd(ctx->ac.builder, is_msaa,
3707 LLVMConstInt(ctx->ac.i32, 0xe, false), "");
3708 is_msaa = LLVMBuildICmp(ctx->ac.builder, LLVMIntEQ, is_msaa,
3709 LLVMConstInt(ctx->ac.i32, 0xe, false), "");
3710
3711 samples = LLVMBuildLShr(ctx->ac.builder, samples,
3712 LLVMConstInt(ctx->ac.i32, 16, false), "");
3713 samples = LLVMBuildAnd(ctx->ac.builder, samples,
3714 LLVMConstInt(ctx->ac.i32, 0xf, false), "");
3715 samples = LLVMBuildShl(ctx->ac.builder, ctx->ac.i32_1,
3716 samples, "");
3717 samples = LLVMBuildSelect(ctx->ac.builder, is_msaa, samples,
3718 ctx->ac.i32_1, "");
3719 result = samples;
3720 goto write_result;
3721 }
3722
3723 if (args.offset && instr->op != nir_texop_txf) {
3724 LLVMValueRef offset[3], pack;
3725 for (unsigned chan = 0; chan < 3; ++chan)
3726 offset[chan] = ctx->ac.i32_0;
3727
3728 unsigned num_components = ac_get_llvm_num_components(args.offset);
3729 for (unsigned chan = 0; chan < num_components; chan++) {
3730 offset[chan] = ac_llvm_extract_elem(&ctx->ac, args.offset, chan);
3731 offset[chan] = LLVMBuildAnd(ctx->ac.builder, offset[chan],
3732 LLVMConstInt(ctx->ac.i32, 0x3f, false), "");
3733 if (chan)
3734 offset[chan] = LLVMBuildShl(ctx->ac.builder, offset[chan],
3735 LLVMConstInt(ctx->ac.i32, chan * 8, false), "");
3736 }
3737 pack = LLVMBuildOr(ctx->ac.builder, offset[0], offset[1], "");
3738 pack = LLVMBuildOr(ctx->ac.builder, pack, offset[2], "");
3739 args.offset = pack;
3740 }
3741
3742 /* TC-compatible HTILE on radeonsi promotes Z16 and Z24 to Z32_FLOAT,
3743 * so the depth comparison value isn't clamped for Z16 and
3744 * Z24 anymore. Do it manually here.
3745 *
3746 * It's unnecessary if the original texture format was
3747 * Z32_FLOAT, but we don't know that here.
3748 */
3749 if (args.compare && ctx->ac.chip_class >= VI && ctx->abi->clamp_shadow_reference)
3750 args.compare = ac_build_clamp(&ctx->ac, ac_to_float(&ctx->ac, args.compare));
3751
3752 /* pack derivatives */
3753 if (ddx || ddy) {
3754 int num_src_deriv_channels, num_dest_deriv_channels;
3755 switch (instr->sampler_dim) {
3756 case GLSL_SAMPLER_DIM_3D:
3757 case GLSL_SAMPLER_DIM_CUBE:
3758 num_src_deriv_channels = 3;
3759 num_dest_deriv_channels = 3;
3760 break;
3761 case GLSL_SAMPLER_DIM_2D:
3762 default:
3763 num_src_deriv_channels = 2;
3764 num_dest_deriv_channels = 2;
3765 break;
3766 case GLSL_SAMPLER_DIM_1D:
3767 num_src_deriv_channels = 1;
3768 if (ctx->ac.chip_class >= GFX9) {
3769 num_dest_deriv_channels = 2;
3770 } else {
3771 num_dest_deriv_channels = 1;
3772 }
3773 break;
3774 }
3775
3776 for (unsigned i = 0; i < num_src_deriv_channels; i++) {
3777 args.derivs[i] = ac_to_float(&ctx->ac,
3778 ac_llvm_extract_elem(&ctx->ac, ddx, i));
3779 args.derivs[num_dest_deriv_channels + i] = ac_to_float(&ctx->ac,
3780 ac_llvm_extract_elem(&ctx->ac, ddy, i));
3781 }
3782 for (unsigned i = num_src_deriv_channels; i < num_dest_deriv_channels; i++) {
3783 args.derivs[i] = ctx->ac.f32_0;
3784 args.derivs[num_dest_deriv_channels + i] = ctx->ac.f32_0;
3785 }
3786 }
3787
3788 if (instr->sampler_dim == GLSL_SAMPLER_DIM_CUBE && args.coords[0]) {
3789 for (unsigned chan = 0; chan < instr->coord_components; chan++)
3790 args.coords[chan] = ac_to_float(&ctx->ac, args.coords[chan]);
3791 if (instr->coord_components == 3)
3792 args.coords[3] = LLVMGetUndef(ctx->ac.f32);
3793 ac_prepare_cube_coords(&ctx->ac,
3794 instr->op == nir_texop_txd, instr->is_array,
3795 instr->op == nir_texop_lod, args.coords, args.derivs);
3796 }
3797
3798 /* Texture coordinates fixups */
3799 if (instr->coord_components > 1 &&
3800 instr->sampler_dim == GLSL_SAMPLER_DIM_1D &&
3801 instr->is_array &&
3802 instr->op != nir_texop_txf) {
3803 args.coords[1] = apply_round_slice(&ctx->ac, args.coords[1]);
3804 }
3805
3806 if (instr->coord_components > 2 &&
3807 (instr->sampler_dim == GLSL_SAMPLER_DIM_2D ||
3808 instr->sampler_dim == GLSL_SAMPLER_DIM_MS ||
3809 instr->sampler_dim == GLSL_SAMPLER_DIM_SUBPASS ||
3810 instr->sampler_dim == GLSL_SAMPLER_DIM_SUBPASS_MS) &&
3811 instr->is_array &&
3812 instr->op != nir_texop_txf && instr->op != nir_texop_txf_ms) {
3813 args.coords[2] = apply_round_slice(&ctx->ac, args.coords[2]);
3814 }
3815
3816 if (ctx->ac.chip_class >= GFX9 &&
3817 instr->sampler_dim == GLSL_SAMPLER_DIM_1D &&
3818 instr->op != nir_texop_lod) {
3819 LLVMValueRef filler;
3820 if (instr->op == nir_texop_txf)
3821 filler = ctx->ac.i32_0;
3822 else
3823 filler = LLVMConstReal(ctx->ac.f32, 0.5);
3824
3825 if (instr->is_array)
3826 args.coords[2] = args.coords[1];
3827 args.coords[1] = filler;
3828 }
3829
3830 /* Pack sample index */
3831 if (instr->op == nir_texop_txf_ms && sample_index)
3832 args.coords[instr->coord_components] = sample_index;
3833
3834 if (instr->op == nir_texop_samples_identical) {
3835 struct ac_image_args txf_args = { 0 };
3836 memcpy(txf_args.coords, args.coords, sizeof(txf_args.coords));
3837
3838 txf_args.dmask = 0xf;
3839 txf_args.resource = fmask_ptr;
3840 txf_args.dim = instr->is_array ? ac_image_2darray : ac_image_2d;
3841 result = build_tex_intrinsic(ctx, instr, &txf_args);
3842
3843 result = LLVMBuildExtractElement(ctx->ac.builder, result, ctx->ac.i32_0, "");
3844 result = emit_int_cmp(&ctx->ac, LLVMIntEQ, result, ctx->ac.i32_0);
3845 goto write_result;
3846 }
3847
3848 if (instr->sampler_dim == GLSL_SAMPLER_DIM_MS &&
3849 instr->op != nir_texop_txs) {
3850 unsigned sample_chan = instr->is_array ? 3 : 2;
3851 args.coords[sample_chan] = adjust_sample_index_using_fmask(
3852 &ctx->ac, args.coords[0], args.coords[1],
3853 instr->is_array ? args.coords[2] : NULL,
3854 args.coords[sample_chan], fmask_ptr);
3855 }
3856
3857 if (args.offset && instr->op == nir_texop_txf) {
3858 nir_const_value *const_offset =
3859 nir_src_as_const_value(instr->src[offset_src].src);
3860 int num_offsets = instr->src[offset_src].src.ssa->num_components;
3861 assert(const_offset);
3862 num_offsets = MIN2(num_offsets, instr->coord_components);
3863 for (unsigned i = 0; i < num_offsets; ++i) {
3864 args.coords[i] = LLVMBuildAdd(
3865 ctx->ac.builder, args.coords[i],
3866 LLVMConstInt(ctx->ac.i32, const_offset->i32[i], false), "");
3867 }
3868 args.offset = NULL;
3869 }
3870
3871 /* TODO TG4 support */
3872 args.dmask = 0xf;
3873 if (instr->op == nir_texop_tg4) {
3874 if (instr->is_shadow)
3875 args.dmask = 1;
3876 else
3877 args.dmask = 1 << instr->component;
3878 }
3879
3880 if (instr->sampler_dim != GLSL_SAMPLER_DIM_BUF)
3881 args.dim = get_ac_sampler_dim(&ctx->ac, instr->sampler_dim, instr->is_array);
3882 result = build_tex_intrinsic(ctx, instr, &args);
3883
3884 if (instr->op == nir_texop_query_levels)
3885 result = LLVMBuildExtractElement(ctx->ac.builder, result, LLVMConstInt(ctx->ac.i32, 3, false), "");
3886 else if (instr->is_shadow && instr->is_new_style_shadow &&
3887 instr->op != nir_texop_txs && instr->op != nir_texop_lod &&
3888 instr->op != nir_texop_tg4)
3889 result = LLVMBuildExtractElement(ctx->ac.builder, result, ctx->ac.i32_0, "");
3890 else if (instr->op == nir_texop_txs &&
3891 instr->sampler_dim == GLSL_SAMPLER_DIM_CUBE &&
3892 instr->is_array) {
3893 LLVMValueRef two = LLVMConstInt(ctx->ac.i32, 2, false);
3894 LLVMValueRef six = LLVMConstInt(ctx->ac.i32, 6, false);
3895 LLVMValueRef z = LLVMBuildExtractElement(ctx->ac.builder, result, two, "");
3896 z = LLVMBuildSDiv(ctx->ac.builder, z, six, "");
3897 result = LLVMBuildInsertElement(ctx->ac.builder, result, z, two, "");
3898 } else if (ctx->ac.chip_class >= GFX9 &&
3899 instr->op == nir_texop_txs &&
3900 instr->sampler_dim == GLSL_SAMPLER_DIM_1D &&
3901 instr->is_array) {
3902 LLVMValueRef two = LLVMConstInt(ctx->ac.i32, 2, false);
3903 LLVMValueRef layers = LLVMBuildExtractElement(ctx->ac.builder, result, two, "");
3904 result = LLVMBuildInsertElement(ctx->ac.builder, result, layers,
3905 ctx->ac.i32_1, "");
3906 } else if (instr->dest.ssa.num_components != 4)
3907 result = ac_trim_vector(&ctx->ac, result, instr->dest.ssa.num_components);
3908
3909 write_result:
3910 if (result) {
3911 assert(instr->dest.is_ssa);
3912 result = ac_to_integer(&ctx->ac, result);
3913 ctx->ssa_defs[instr->dest.ssa.index] = result;
3914 }
3915 }
3916
3917
3918 static void visit_phi(struct ac_nir_context *ctx, nir_phi_instr *instr)
3919 {
3920 LLVMTypeRef type = get_def_type(ctx, &instr->dest.ssa);
3921 LLVMValueRef result = LLVMBuildPhi(ctx->ac.builder, type, "");
3922
3923 ctx->ssa_defs[instr->dest.ssa.index] = result;
3924 _mesa_hash_table_insert(ctx->phis, instr, result);
3925 }
3926
3927 static void visit_post_phi(struct ac_nir_context *ctx,
3928 nir_phi_instr *instr,
3929 LLVMValueRef llvm_phi)
3930 {
3931 nir_foreach_phi_src(src, instr) {
3932 LLVMBasicBlockRef block = get_block(ctx, src->pred);
3933 LLVMValueRef llvm_src = get_src(ctx, src->src);
3934
3935 LLVMAddIncoming(llvm_phi, &llvm_src, &block, 1);
3936 }
3937 }
3938
3939 static void phi_post_pass(struct ac_nir_context *ctx)
3940 {
3941 hash_table_foreach(ctx->phis, entry) {
3942 visit_post_phi(ctx, (nir_phi_instr*)entry->key,
3943 (LLVMValueRef)entry->data);
3944 }
3945 }
3946
3947
3948 static void visit_ssa_undef(struct ac_nir_context *ctx,
3949 const nir_ssa_undef_instr *instr)
3950 {
3951 unsigned num_components = instr->def.num_components;
3952 LLVMTypeRef type = LLVMIntTypeInContext(ctx->ac.context, instr->def.bit_size);
3953 LLVMValueRef undef;
3954
3955 if (num_components == 1)
3956 undef = LLVMGetUndef(type);
3957 else {
3958 undef = LLVMGetUndef(LLVMVectorType(type, num_components));
3959 }
3960 ctx->ssa_defs[instr->def.index] = undef;
3961 }
3962
3963 static void visit_jump(struct ac_llvm_context *ctx,
3964 const nir_jump_instr *instr)
3965 {
3966 switch (instr->type) {
3967 case nir_jump_break:
3968 ac_build_break(ctx);
3969 break;
3970 case nir_jump_continue:
3971 ac_build_continue(ctx);
3972 break;
3973 default:
3974 fprintf(stderr, "Unknown NIR jump instr: ");
3975 nir_print_instr(&instr->instr, stderr);
3976 fprintf(stderr, "\n");
3977 abort();
3978 }
3979 }
3980
3981 static LLVMTypeRef
3982 glsl_base_to_llvm_type(struct ac_llvm_context *ac,
3983 enum glsl_base_type type)
3984 {
3985 switch (type) {
3986 case GLSL_TYPE_INT:
3987 case GLSL_TYPE_UINT:
3988 case GLSL_TYPE_BOOL:
3989 case GLSL_TYPE_SUBROUTINE:
3990 return ac->i32;
3991 case GLSL_TYPE_INT8:
3992 case GLSL_TYPE_UINT8:
3993 return ac->i8;
3994 case GLSL_TYPE_INT16:
3995 case GLSL_TYPE_UINT16:
3996 return ac->i16;
3997 case GLSL_TYPE_FLOAT:
3998 return ac->f32;
3999 case GLSL_TYPE_FLOAT16:
4000 return ac->f16;
4001 case GLSL_TYPE_INT64:
4002 case GLSL_TYPE_UINT64:
4003 return ac->i64;
4004 case GLSL_TYPE_DOUBLE:
4005 return ac->f64;
4006 default:
4007 unreachable("unknown GLSL type");
4008 }
4009 }
4010
4011 static LLVMTypeRef
4012 glsl_to_llvm_type(struct ac_llvm_context *ac,
4013 const struct glsl_type *type)
4014 {
4015 if (glsl_type_is_scalar(type)) {
4016 return glsl_base_to_llvm_type(ac, glsl_get_base_type(type));
4017 }
4018
4019 if (glsl_type_is_vector(type)) {
4020 return LLVMVectorType(
4021 glsl_base_to_llvm_type(ac, glsl_get_base_type(type)),
4022 glsl_get_vector_elements(type));
4023 }
4024
4025 if (glsl_type_is_matrix(type)) {
4026 return LLVMArrayType(
4027 glsl_to_llvm_type(ac, glsl_get_column_type(type)),
4028 glsl_get_matrix_columns(type));
4029 }
4030
4031 if (glsl_type_is_array(type)) {
4032 return LLVMArrayType(
4033 glsl_to_llvm_type(ac, glsl_get_array_element(type)),
4034 glsl_get_length(type));
4035 }
4036
4037 assert(glsl_type_is_struct_or_ifc(type));
4038
4039 LLVMTypeRef member_types[glsl_get_length(type)];
4040
4041 for (unsigned i = 0; i < glsl_get_length(type); i++) {
4042 member_types[i] =
4043 glsl_to_llvm_type(ac,
4044 glsl_get_struct_field(type, i));
4045 }
4046
4047 return LLVMStructTypeInContext(ac->context, member_types,
4048 glsl_get_length(type), false);
4049 }
4050
4051 static void visit_deref(struct ac_nir_context *ctx,
4052 nir_deref_instr *instr)
4053 {
4054 if (instr->mode != nir_var_mem_shared &&
4055 instr->mode != nir_var_mem_global)
4056 return;
4057
4058 LLVMValueRef result = NULL;
4059 switch(instr->deref_type) {
4060 case nir_deref_type_var: {
4061 struct hash_entry *entry = _mesa_hash_table_search(ctx->vars, instr->var);
4062 result = entry->data;
4063 break;
4064 }
4065 case nir_deref_type_struct:
4066 if (instr->mode == nir_var_mem_global) {
4067 nir_deref_instr *parent = nir_deref_instr_parent(instr);
4068 uint64_t offset = glsl_get_struct_field_offset(parent->type,
4069 instr->strct.index);
4070 result = ac_build_gep_ptr(&ctx->ac, get_src(ctx, instr->parent),
4071 LLVMConstInt(ctx->ac.i32, offset, 0));
4072 } else {
4073 result = ac_build_gep0(&ctx->ac, get_src(ctx, instr->parent),
4074 LLVMConstInt(ctx->ac.i32, instr->strct.index, 0));
4075 }
4076 break;
4077 case nir_deref_type_array:
4078 if (instr->mode == nir_var_mem_global) {
4079 nir_deref_instr *parent = nir_deref_instr_parent(instr);
4080 unsigned stride = glsl_get_explicit_stride(parent->type);
4081
4082 if ((glsl_type_is_matrix(parent->type) &&
4083 glsl_matrix_type_is_row_major(parent->type)) ||
4084 (glsl_type_is_vector(parent->type) && stride == 0))
4085 stride = type_scalar_size_bytes(parent->type);
4086
4087 assert(stride > 0);
4088 LLVMValueRef index = get_src(ctx, instr->arr.index);
4089 if (LLVMTypeOf(index) != ctx->ac.i64)
4090 index = LLVMBuildZExt(ctx->ac.builder, index, ctx->ac.i64, "");
4091
4092 LLVMValueRef offset = LLVMBuildMul(ctx->ac.builder, index, LLVMConstInt(ctx->ac.i64, stride, 0), "");
4093
4094 result = ac_build_gep_ptr(&ctx->ac, get_src(ctx, instr->parent), offset);
4095 } else {
4096 result = ac_build_gep0(&ctx->ac, get_src(ctx, instr->parent),
4097 get_src(ctx, instr->arr.index));
4098 }
4099 break;
4100 case nir_deref_type_ptr_as_array:
4101 if (instr->mode == nir_var_mem_global) {
4102 unsigned stride = nir_deref_instr_ptr_as_array_stride(instr);
4103
4104 LLVMValueRef index = get_src(ctx, instr->arr.index);
4105 if (LLVMTypeOf(index) != ctx->ac.i64)
4106 index = LLVMBuildZExt(ctx->ac.builder, index, ctx->ac.i64, "");
4107
4108 LLVMValueRef offset = LLVMBuildMul(ctx->ac.builder, index, LLVMConstInt(ctx->ac.i64, stride, 0), "");
4109
4110 result = ac_build_gep_ptr(&ctx->ac, get_src(ctx, instr->parent), offset);
4111 } else {
4112 result = ac_build_gep_ptr(&ctx->ac, get_src(ctx, instr->parent),
4113 get_src(ctx, instr->arr.index));
4114 }
4115 break;
4116 case nir_deref_type_cast: {
4117 result = get_src(ctx, instr->parent);
4118
4119 /* We can't use the structs from LLVM because the shader
4120 * specifies its own offsets. */
4121 LLVMTypeRef pointee_type = ctx->ac.i8;
4122 if (instr->mode == nir_var_mem_shared)
4123 pointee_type = glsl_to_llvm_type(&ctx->ac, instr->type);
4124
4125 unsigned address_space;
4126
4127 switch(instr->mode) {
4128 case nir_var_mem_shared:
4129 address_space = AC_ADDR_SPACE_LDS;
4130 break;
4131 case nir_var_mem_global:
4132 address_space = AC_ADDR_SPACE_GLOBAL;
4133 break;
4134 default:
4135 unreachable("Unhandled address space");
4136 }
4137
4138 LLVMTypeRef type = LLVMPointerType(pointee_type, address_space);
4139
4140 if (LLVMTypeOf(result) != type) {
4141 if (LLVMGetTypeKind(LLVMTypeOf(result)) == LLVMVectorTypeKind) {
4142 result = LLVMBuildBitCast(ctx->ac.builder, result,
4143 type, "");
4144 } else {
4145 result = LLVMBuildIntToPtr(ctx->ac.builder, result,
4146 type, "");
4147 }
4148 }
4149 break;
4150 }
4151 default:
4152 unreachable("Unhandled deref_instr deref type");
4153 }
4154
4155 ctx->ssa_defs[instr->dest.ssa.index] = result;
4156 }
4157
4158 static void visit_cf_list(struct ac_nir_context *ctx,
4159 struct exec_list *list);
4160
4161 static void visit_block(struct ac_nir_context *ctx, nir_block *block)
4162 {
4163 LLVMBasicBlockRef llvm_block = LLVMGetInsertBlock(ctx->ac.builder);
4164 nir_foreach_instr(instr, block)
4165 {
4166 switch (instr->type) {
4167 case nir_instr_type_alu:
4168 visit_alu(ctx, nir_instr_as_alu(instr));
4169 break;
4170 case nir_instr_type_load_const:
4171 visit_load_const(ctx, nir_instr_as_load_const(instr));
4172 break;
4173 case nir_instr_type_intrinsic:
4174 visit_intrinsic(ctx, nir_instr_as_intrinsic(instr));
4175 break;
4176 case nir_instr_type_tex:
4177 visit_tex(ctx, nir_instr_as_tex(instr));
4178 break;
4179 case nir_instr_type_phi:
4180 visit_phi(ctx, nir_instr_as_phi(instr));
4181 break;
4182 case nir_instr_type_ssa_undef:
4183 visit_ssa_undef(ctx, nir_instr_as_ssa_undef(instr));
4184 break;
4185 case nir_instr_type_jump:
4186 visit_jump(&ctx->ac, nir_instr_as_jump(instr));
4187 break;
4188 case nir_instr_type_deref:
4189 visit_deref(ctx, nir_instr_as_deref(instr));
4190 break;
4191 default:
4192 fprintf(stderr, "Unknown NIR instr type: ");
4193 nir_print_instr(instr, stderr);
4194 fprintf(stderr, "\n");
4195 abort();
4196 }
4197 }
4198
4199 _mesa_hash_table_insert(ctx->defs, block, llvm_block);
4200 }
4201
4202 static void visit_if(struct ac_nir_context *ctx, nir_if *if_stmt)
4203 {
4204 LLVMValueRef value = get_src(ctx, if_stmt->condition);
4205
4206 nir_block *then_block =
4207 (nir_block *) exec_list_get_head(&if_stmt->then_list);
4208
4209 ac_build_uif(&ctx->ac, value, then_block->index);
4210
4211 visit_cf_list(ctx, &if_stmt->then_list);
4212
4213 if (!exec_list_is_empty(&if_stmt->else_list)) {
4214 nir_block *else_block =
4215 (nir_block *) exec_list_get_head(&if_stmt->else_list);
4216
4217 ac_build_else(&ctx->ac, else_block->index);
4218 visit_cf_list(ctx, &if_stmt->else_list);
4219 }
4220
4221 ac_build_endif(&ctx->ac, then_block->index);
4222 }
4223
4224 static void visit_loop(struct ac_nir_context *ctx, nir_loop *loop)
4225 {
4226 nir_block *first_loop_block =
4227 (nir_block *) exec_list_get_head(&loop->body);
4228
4229 ac_build_bgnloop(&ctx->ac, first_loop_block->index);
4230
4231 visit_cf_list(ctx, &loop->body);
4232
4233 ac_build_endloop(&ctx->ac, first_loop_block->index);
4234 }
4235
4236 static void visit_cf_list(struct ac_nir_context *ctx,
4237 struct exec_list *list)
4238 {
4239 foreach_list_typed(nir_cf_node, node, node, list)
4240 {
4241 switch (node->type) {
4242 case nir_cf_node_block:
4243 visit_block(ctx, nir_cf_node_as_block(node));
4244 break;
4245
4246 case nir_cf_node_if:
4247 visit_if(ctx, nir_cf_node_as_if(node));
4248 break;
4249
4250 case nir_cf_node_loop:
4251 visit_loop(ctx, nir_cf_node_as_loop(node));
4252 break;
4253
4254 default:
4255 assert(0);
4256 }
4257 }
4258 }
4259
4260 void
4261 ac_handle_shader_output_decl(struct ac_llvm_context *ctx,
4262 struct ac_shader_abi *abi,
4263 struct nir_shader *nir,
4264 struct nir_variable *variable,
4265 gl_shader_stage stage)
4266 {
4267 unsigned output_loc = variable->data.driver_location / 4;
4268 unsigned attrib_count = glsl_count_attribute_slots(variable->type, false);
4269
4270 /* tess ctrl has it's own load/store paths for outputs */
4271 if (stage == MESA_SHADER_TESS_CTRL)
4272 return;
4273
4274 if (stage == MESA_SHADER_VERTEX ||
4275 stage == MESA_SHADER_TESS_EVAL ||
4276 stage == MESA_SHADER_GEOMETRY) {
4277 int idx = variable->data.location + variable->data.index;
4278 if (idx == VARYING_SLOT_CLIP_DIST0) {
4279 int length = nir->info.clip_distance_array_size +
4280 nir->info.cull_distance_array_size;
4281
4282 if (length > 4)
4283 attrib_count = 2;
4284 else
4285 attrib_count = 1;
4286 }
4287 }
4288
4289 bool is_16bit = glsl_type_is_16bit(glsl_without_array(variable->type));
4290 LLVMTypeRef type = is_16bit ? ctx->f16 : ctx->f32;
4291 for (unsigned i = 0; i < attrib_count; ++i) {
4292 for (unsigned chan = 0; chan < 4; chan++) {
4293 abi->outputs[ac_llvm_reg_index_soa(output_loc + i, chan)] =
4294 ac_build_alloca_undef(ctx, type, "");
4295 }
4296 }
4297 }
4298
4299 static void
4300 setup_locals(struct ac_nir_context *ctx,
4301 struct nir_function *func)
4302 {
4303 int i, j;
4304 ctx->num_locals = 0;
4305 nir_foreach_variable(variable, &func->impl->locals) {
4306 unsigned attrib_count = glsl_count_attribute_slots(variable->type, false);
4307 variable->data.driver_location = ctx->num_locals * 4;
4308 variable->data.location_frac = 0;
4309 ctx->num_locals += attrib_count;
4310 }
4311 ctx->locals = malloc(4 * ctx->num_locals * sizeof(LLVMValueRef));
4312 if (!ctx->locals)
4313 return;
4314
4315 for (i = 0; i < ctx->num_locals; i++) {
4316 for (j = 0; j < 4; j++) {
4317 ctx->locals[i * 4 + j] =
4318 ac_build_alloca_undef(&ctx->ac, ctx->ac.f32, "temp");
4319 }
4320 }
4321 }
4322
4323 static void
4324 setup_shared(struct ac_nir_context *ctx,
4325 struct nir_shader *nir)
4326 {
4327 nir_foreach_variable(variable, &nir->shared) {
4328 LLVMValueRef shared =
4329 LLVMAddGlobalInAddressSpace(
4330 ctx->ac.module, glsl_to_llvm_type(&ctx->ac, variable->type),
4331 variable->name ? variable->name : "",
4332 AC_ADDR_SPACE_LDS);
4333 _mesa_hash_table_insert(ctx->vars, variable, shared);
4334 }
4335 }
4336
4337 void ac_nir_translate(struct ac_llvm_context *ac, struct ac_shader_abi *abi,
4338 struct nir_shader *nir)
4339 {
4340 struct ac_nir_context ctx = {};
4341 struct nir_function *func;
4342
4343 ctx.ac = *ac;
4344 ctx.abi = abi;
4345
4346 ctx.stage = nir->info.stage;
4347
4348 ctx.main_function = LLVMGetBasicBlockParent(LLVMGetInsertBlock(ctx.ac.builder));
4349
4350 nir_foreach_variable(variable, &nir->outputs)
4351 ac_handle_shader_output_decl(&ctx.ac, ctx.abi, nir, variable,
4352 ctx.stage);
4353
4354 ctx.defs = _mesa_hash_table_create(NULL, _mesa_hash_pointer,
4355 _mesa_key_pointer_equal);
4356 ctx.phis = _mesa_hash_table_create(NULL, _mesa_hash_pointer,
4357 _mesa_key_pointer_equal);
4358 ctx.vars = _mesa_hash_table_create(NULL, _mesa_hash_pointer,
4359 _mesa_key_pointer_equal);
4360
4361 func = (struct nir_function *)exec_list_get_head(&nir->functions);
4362
4363 nir_index_ssa_defs(func->impl);
4364 ctx.ssa_defs = calloc(func->impl->ssa_alloc, sizeof(LLVMValueRef));
4365
4366 setup_locals(&ctx, func);
4367
4368 if (gl_shader_stage_is_compute(nir->info.stage))
4369 setup_shared(&ctx, nir);
4370
4371 visit_cf_list(&ctx, &func->impl->body);
4372 phi_post_pass(&ctx);
4373
4374 if (!gl_shader_stage_is_compute(nir->info.stage))
4375 ctx.abi->emit_outputs(ctx.abi, AC_LLVM_MAX_OUTPUTS,
4376 ctx.abi->outputs);
4377
4378 free(ctx.locals);
4379 free(ctx.ssa_defs);
4380 ralloc_free(ctx.defs);
4381 ralloc_free(ctx.phis);
4382 ralloc_free(ctx.vars);
4383 }
4384
4385 void
4386 ac_lower_indirect_derefs(struct nir_shader *nir, enum chip_class chip_class)
4387 {
4388 /* While it would be nice not to have this flag, we are constrained
4389 * by the reality that LLVM 5.0 doesn't have working VGPR indexing
4390 * on GFX9.
4391 */
4392 bool llvm_has_working_vgpr_indexing = chip_class <= VI;
4393
4394 /* TODO: Indirect indexing of GS inputs is unimplemented.
4395 *
4396 * TCS and TES load inputs directly from LDS or offchip memory, so
4397 * indirect indexing is trivial.
4398 */
4399 nir_variable_mode indirect_mask = 0;
4400 if (nir->info.stage == MESA_SHADER_GEOMETRY ||
4401 (nir->info.stage != MESA_SHADER_TESS_CTRL &&
4402 nir->info.stage != MESA_SHADER_TESS_EVAL &&
4403 !llvm_has_working_vgpr_indexing)) {
4404 indirect_mask |= nir_var_shader_in;
4405 }
4406 if (!llvm_has_working_vgpr_indexing &&
4407 nir->info.stage != MESA_SHADER_TESS_CTRL)
4408 indirect_mask |= nir_var_shader_out;
4409
4410 /* TODO: We shouldn't need to do this, however LLVM isn't currently
4411 * smart enough to handle indirects without causing excess spilling
4412 * causing the gpu to hang.
4413 *
4414 * See the following thread for more details of the problem:
4415 * https://lists.freedesktop.org/archives/mesa-dev/2017-July/162106.html
4416 */
4417 indirect_mask |= nir_var_function_temp;
4418
4419 nir_lower_indirect_derefs(nir, indirect_mask);
4420 }
4421
4422 static unsigned
4423 get_inst_tessfactor_writemask(nir_intrinsic_instr *intrin)
4424 {
4425 if (intrin->intrinsic != nir_intrinsic_store_deref)
4426 return 0;
4427
4428 nir_variable *var =
4429 nir_deref_instr_get_variable(nir_src_as_deref(intrin->src[0]));
4430
4431 if (var->data.mode != nir_var_shader_out)
4432 return 0;
4433
4434 unsigned writemask = 0;
4435 const int location = var->data.location;
4436 unsigned first_component = var->data.location_frac;
4437 unsigned num_comps = intrin->dest.ssa.num_components;
4438
4439 if (location == VARYING_SLOT_TESS_LEVEL_INNER)
4440 writemask = ((1 << (num_comps + 1)) - 1) << first_component;
4441 else if (location == VARYING_SLOT_TESS_LEVEL_OUTER)
4442 writemask = (((1 << (num_comps + 1)) - 1) << first_component) << 4;
4443
4444 return writemask;
4445 }
4446
4447 static void
4448 scan_tess_ctrl(nir_cf_node *cf_node, unsigned *upper_block_tf_writemask,
4449 unsigned *cond_block_tf_writemask,
4450 bool *tessfactors_are_def_in_all_invocs, bool is_nested_cf)
4451 {
4452 switch (cf_node->type) {
4453 case nir_cf_node_block: {
4454 nir_block *block = nir_cf_node_as_block(cf_node);
4455 nir_foreach_instr(instr, block) {
4456 if (instr->type != nir_instr_type_intrinsic)
4457 continue;
4458
4459 nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr);
4460 if (intrin->intrinsic == nir_intrinsic_barrier) {
4461
4462 /* If we find a barrier in nested control flow put this in the
4463 * too hard basket. In GLSL this is not possible but it is in
4464 * SPIR-V.
4465 */
4466 if (is_nested_cf) {
4467 *tessfactors_are_def_in_all_invocs = false;
4468 return;
4469 }
4470
4471 /* The following case must be prevented:
4472 * gl_TessLevelInner = ...;
4473 * barrier();
4474 * if (gl_InvocationID == 1)
4475 * gl_TessLevelInner = ...;
4476 *
4477 * If you consider disjoint code segments separated by barriers, each
4478 * such segment that writes tess factor channels should write the same
4479 * channels in all codepaths within that segment.
4480 */
4481 if (upper_block_tf_writemask || cond_block_tf_writemask) {
4482 /* Accumulate the result: */
4483 *tessfactors_are_def_in_all_invocs &=
4484 !(*cond_block_tf_writemask & ~(*upper_block_tf_writemask));
4485
4486 /* Analyze the next code segment from scratch. */
4487 *upper_block_tf_writemask = 0;
4488 *cond_block_tf_writemask = 0;
4489 }
4490 } else
4491 *upper_block_tf_writemask |= get_inst_tessfactor_writemask(intrin);
4492 }
4493
4494 break;
4495 }
4496 case nir_cf_node_if: {
4497 unsigned then_tessfactor_writemask = 0;
4498 unsigned else_tessfactor_writemask = 0;
4499
4500 nir_if *if_stmt = nir_cf_node_as_if(cf_node);
4501 foreach_list_typed(nir_cf_node, nested_node, node, &if_stmt->then_list) {
4502 scan_tess_ctrl(nested_node, &then_tessfactor_writemask,
4503 cond_block_tf_writemask,
4504 tessfactors_are_def_in_all_invocs, true);
4505 }
4506
4507 foreach_list_typed(nir_cf_node, nested_node, node, &if_stmt->else_list) {
4508 scan_tess_ctrl(nested_node, &else_tessfactor_writemask,
4509 cond_block_tf_writemask,
4510 tessfactors_are_def_in_all_invocs, true);
4511 }
4512
4513 if (then_tessfactor_writemask || else_tessfactor_writemask) {
4514 /* If both statements write the same tess factor channels,
4515 * we can say that the upper block writes them too.
4516 */
4517 *upper_block_tf_writemask |= then_tessfactor_writemask &
4518 else_tessfactor_writemask;
4519 *cond_block_tf_writemask |= then_tessfactor_writemask |
4520 else_tessfactor_writemask;
4521 }
4522
4523 break;
4524 }
4525 case nir_cf_node_loop: {
4526 nir_loop *loop = nir_cf_node_as_loop(cf_node);
4527 foreach_list_typed(nir_cf_node, nested_node, node, &loop->body) {
4528 scan_tess_ctrl(nested_node, cond_block_tf_writemask,
4529 cond_block_tf_writemask,
4530 tessfactors_are_def_in_all_invocs, true);
4531 }
4532
4533 break;
4534 }
4535 default:
4536 unreachable("unknown cf node type");
4537 }
4538 }
4539
4540 bool
4541 ac_are_tessfactors_def_in_all_invocs(const struct nir_shader *nir)
4542 {
4543 assert(nir->info.stage == MESA_SHADER_TESS_CTRL);
4544
4545 /* The pass works as follows:
4546 * If all codepaths write tess factors, we can say that all
4547 * invocations define tess factors.
4548 *
4549 * Each tess factor channel is tracked separately.
4550 */
4551 unsigned main_block_tf_writemask = 0; /* if main block writes tess factors */
4552 unsigned cond_block_tf_writemask = 0; /* if cond block writes tess factors */
4553
4554 /* Initial value = true. Here the pass will accumulate results from
4555 * multiple segments surrounded by barriers. If tess factors aren't
4556 * written at all, it's a shader bug and we don't care if this will be
4557 * true.
4558 */
4559 bool tessfactors_are_def_in_all_invocs = true;
4560
4561 nir_foreach_function(function, nir) {
4562 if (function->impl) {
4563 foreach_list_typed(nir_cf_node, node, node, &function->impl->body) {
4564 scan_tess_ctrl(node, &main_block_tf_writemask,
4565 &cond_block_tf_writemask,
4566 &tessfactors_are_def_in_all_invocs,
4567 false);
4568 }
4569 }
4570 }
4571
4572 /* Accumulate the result for the last code segment separated by a
4573 * barrier.
4574 */
4575 if (main_block_tf_writemask || cond_block_tf_writemask) {
4576 tessfactors_are_def_in_all_invocs &=
4577 !(cond_block_tf_writemask & ~main_block_tf_writemask);
4578 }
4579
4580 return tessfactors_are_def_in_all_invocs;
4581 }