7de59efcffff494b2c64c314be6f8860df922054
[mesa.git] / src / amd / common / ac_nir_to_llvm.c
1 /*
2 * Copyright © 2016 Bas Nieuwenhuizen
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21 * IN THE SOFTWARE.
22 */
23
24 #include "ac_nir_to_llvm.h"
25 #include "ac_llvm_build.h"
26 #include "ac_llvm_util.h"
27 #include "ac_binary.h"
28 #include "sid.h"
29 #include "nir/nir.h"
30 #include "util/bitscan.h"
31 #include "ac_shader_abi.h"
32 #include "ac_shader_util.h"
33
34 struct ac_nir_context {
35 struct ac_llvm_context ac;
36 struct ac_shader_abi *abi;
37
38 gl_shader_stage stage;
39
40 struct hash_table *defs;
41 struct hash_table *phis;
42 struct hash_table *vars;
43
44 LLVMValueRef main_function;
45 LLVMBasicBlockRef continue_block;
46 LLVMBasicBlockRef break_block;
47
48 int num_locals;
49 LLVMValueRef *locals;
50 };
51
52 static LLVMValueRef get_sampler_desc(struct ac_nir_context *ctx,
53 const nir_deref_var *deref,
54 enum ac_descriptor_type desc_type,
55 const nir_tex_instr *instr,
56 bool image, bool write);
57
58 static void
59 build_store_values_extended(struct ac_llvm_context *ac,
60 LLVMValueRef *values,
61 unsigned value_count,
62 unsigned value_stride,
63 LLVMValueRef vec)
64 {
65 LLVMBuilderRef builder = ac->builder;
66 unsigned i;
67
68 for (i = 0; i < value_count; i++) {
69 LLVMValueRef ptr = values[i * value_stride];
70 LLVMValueRef index = LLVMConstInt(ac->i32, i, false);
71 LLVMValueRef value = LLVMBuildExtractElement(builder, vec, index, "");
72 LLVMBuildStore(builder, value, ptr);
73 }
74 }
75
76 static LLVMTypeRef get_def_type(struct ac_nir_context *ctx,
77 const nir_ssa_def *def)
78 {
79 LLVMTypeRef type = LLVMIntTypeInContext(ctx->ac.context, def->bit_size);
80 if (def->num_components > 1) {
81 type = LLVMVectorType(type, def->num_components);
82 }
83 return type;
84 }
85
86 static LLVMValueRef get_src(struct ac_nir_context *nir, nir_src src)
87 {
88 assert(src.is_ssa);
89 struct hash_entry *entry = _mesa_hash_table_search(nir->defs, src.ssa);
90 return (LLVMValueRef)entry->data;
91 }
92
93 static LLVMValueRef
94 get_memory_ptr(struct ac_nir_context *ctx, nir_src src)
95 {
96 LLVMValueRef ptr = get_src(ctx, src);
97 ptr = LLVMBuildGEP(ctx->ac.builder, ctx->ac.lds, &ptr, 1, "");
98 int addr_space = LLVMGetPointerAddressSpace(LLVMTypeOf(ptr));
99
100 return LLVMBuildBitCast(ctx->ac.builder, ptr,
101 LLVMPointerType(ctx->ac.i32, addr_space), "");
102 }
103
104 static LLVMBasicBlockRef get_block(struct ac_nir_context *nir,
105 const struct nir_block *b)
106 {
107 struct hash_entry *entry = _mesa_hash_table_search(nir->defs, b);
108 return (LLVMBasicBlockRef)entry->data;
109 }
110
111 static LLVMValueRef get_alu_src(struct ac_nir_context *ctx,
112 nir_alu_src src,
113 unsigned num_components)
114 {
115 LLVMValueRef value = get_src(ctx, src.src);
116 bool need_swizzle = false;
117
118 assert(value);
119 unsigned src_components = ac_get_llvm_num_components(value);
120 for (unsigned i = 0; i < num_components; ++i) {
121 assert(src.swizzle[i] < src_components);
122 if (src.swizzle[i] != i)
123 need_swizzle = true;
124 }
125
126 if (need_swizzle || num_components != src_components) {
127 LLVMValueRef masks[] = {
128 LLVMConstInt(ctx->ac.i32, src.swizzle[0], false),
129 LLVMConstInt(ctx->ac.i32, src.swizzle[1], false),
130 LLVMConstInt(ctx->ac.i32, src.swizzle[2], false),
131 LLVMConstInt(ctx->ac.i32, src.swizzle[3], false)};
132
133 if (src_components > 1 && num_components == 1) {
134 value = LLVMBuildExtractElement(ctx->ac.builder, value,
135 masks[0], "");
136 } else if (src_components == 1 && num_components > 1) {
137 LLVMValueRef values[] = {value, value, value, value};
138 value = ac_build_gather_values(&ctx->ac, values, num_components);
139 } else {
140 LLVMValueRef swizzle = LLVMConstVector(masks, num_components);
141 value = LLVMBuildShuffleVector(ctx->ac.builder, value, value,
142 swizzle, "");
143 }
144 }
145 assert(!src.negate);
146 assert(!src.abs);
147 return value;
148 }
149
150 static LLVMValueRef emit_int_cmp(struct ac_llvm_context *ctx,
151 LLVMIntPredicate pred, LLVMValueRef src0,
152 LLVMValueRef src1)
153 {
154 LLVMValueRef result = LLVMBuildICmp(ctx->builder, pred, src0, src1, "");
155 return LLVMBuildSelect(ctx->builder, result,
156 LLVMConstInt(ctx->i32, 0xFFFFFFFF, false),
157 ctx->i32_0, "");
158 }
159
160 static LLVMValueRef emit_float_cmp(struct ac_llvm_context *ctx,
161 LLVMRealPredicate pred, LLVMValueRef src0,
162 LLVMValueRef src1)
163 {
164 LLVMValueRef result;
165 src0 = ac_to_float(ctx, src0);
166 src1 = ac_to_float(ctx, src1);
167 result = LLVMBuildFCmp(ctx->builder, pred, src0, src1, "");
168 return LLVMBuildSelect(ctx->builder, result,
169 LLVMConstInt(ctx->i32, 0xFFFFFFFF, false),
170 ctx->i32_0, "");
171 }
172
173 static LLVMValueRef emit_intrin_1f_param(struct ac_llvm_context *ctx,
174 const char *intrin,
175 LLVMTypeRef result_type,
176 LLVMValueRef src0)
177 {
178 char name[64];
179 LLVMValueRef params[] = {
180 ac_to_float(ctx, src0),
181 };
182
183 MAYBE_UNUSED const int length = snprintf(name, sizeof(name), "%s.f%d", intrin,
184 ac_get_elem_bits(ctx, result_type));
185 assert(length < sizeof(name));
186 return ac_build_intrinsic(ctx, name, result_type, params, 1, AC_FUNC_ATTR_READNONE);
187 }
188
189 static LLVMValueRef emit_intrin_2f_param(struct ac_llvm_context *ctx,
190 const char *intrin,
191 LLVMTypeRef result_type,
192 LLVMValueRef src0, LLVMValueRef src1)
193 {
194 char name[64];
195 LLVMValueRef params[] = {
196 ac_to_float(ctx, src0),
197 ac_to_float(ctx, src1),
198 };
199
200 MAYBE_UNUSED const int length = snprintf(name, sizeof(name), "%s.f%d", intrin,
201 ac_get_elem_bits(ctx, result_type));
202 assert(length < sizeof(name));
203 return ac_build_intrinsic(ctx, name, result_type, params, 2, AC_FUNC_ATTR_READNONE);
204 }
205
206 static LLVMValueRef emit_intrin_3f_param(struct ac_llvm_context *ctx,
207 const char *intrin,
208 LLVMTypeRef result_type,
209 LLVMValueRef src0, LLVMValueRef src1, LLVMValueRef src2)
210 {
211 char name[64];
212 LLVMValueRef params[] = {
213 ac_to_float(ctx, src0),
214 ac_to_float(ctx, src1),
215 ac_to_float(ctx, src2),
216 };
217
218 MAYBE_UNUSED const int length = snprintf(name, sizeof(name), "%s.f%d", intrin,
219 ac_get_elem_bits(ctx, result_type));
220 assert(length < sizeof(name));
221 return ac_build_intrinsic(ctx, name, result_type, params, 3, AC_FUNC_ATTR_READNONE);
222 }
223
224 static LLVMValueRef emit_bcsel(struct ac_llvm_context *ctx,
225 LLVMValueRef src0, LLVMValueRef src1, LLVMValueRef src2)
226 {
227 LLVMValueRef v = LLVMBuildICmp(ctx->builder, LLVMIntNE, src0,
228 ctx->i32_0, "");
229 return LLVMBuildSelect(ctx->builder, v, ac_to_integer(ctx, src1),
230 ac_to_integer(ctx, src2), "");
231 }
232
233 static LLVMValueRef emit_minmax_int(struct ac_llvm_context *ctx,
234 LLVMIntPredicate pred,
235 LLVMValueRef src0, LLVMValueRef src1)
236 {
237 return LLVMBuildSelect(ctx->builder,
238 LLVMBuildICmp(ctx->builder, pred, src0, src1, ""),
239 src0,
240 src1, "");
241
242 }
243 static LLVMValueRef emit_iabs(struct ac_llvm_context *ctx,
244 LLVMValueRef src0)
245 {
246 return emit_minmax_int(ctx, LLVMIntSGT, src0,
247 LLVMBuildNeg(ctx->builder, src0, ""));
248 }
249
250 static LLVMValueRef emit_uint_carry(struct ac_llvm_context *ctx,
251 const char *intrin,
252 LLVMValueRef src0, LLVMValueRef src1)
253 {
254 LLVMTypeRef ret_type;
255 LLVMTypeRef types[] = { ctx->i32, ctx->i1 };
256 LLVMValueRef res;
257 LLVMValueRef params[] = { src0, src1 };
258 ret_type = LLVMStructTypeInContext(ctx->context, types,
259 2, true);
260
261 res = ac_build_intrinsic(ctx, intrin, ret_type,
262 params, 2, AC_FUNC_ATTR_READNONE);
263
264 res = LLVMBuildExtractValue(ctx->builder, res, 1, "");
265 res = LLVMBuildZExt(ctx->builder, res, ctx->i32, "");
266 return res;
267 }
268
269 static LLVMValueRef emit_b2f(struct ac_llvm_context *ctx,
270 LLVMValueRef src0)
271 {
272 return LLVMBuildAnd(ctx->builder, src0, LLVMBuildBitCast(ctx->builder, LLVMConstReal(ctx->f32, 1.0), ctx->i32, ""), "");
273 }
274
275 static LLVMValueRef emit_f2b(struct ac_llvm_context *ctx,
276 LLVMValueRef src0)
277 {
278 src0 = ac_to_float(ctx, src0);
279 LLVMValueRef zero = LLVMConstNull(LLVMTypeOf(src0));
280 return LLVMBuildSExt(ctx->builder,
281 LLVMBuildFCmp(ctx->builder, LLVMRealUNE, src0, zero, ""),
282 ctx->i32, "");
283 }
284
285 static LLVMValueRef emit_b2i(struct ac_llvm_context *ctx,
286 LLVMValueRef src0,
287 unsigned bitsize)
288 {
289 LLVMValueRef result = LLVMBuildAnd(ctx->builder, src0, ctx->i32_1, "");
290
291 if (bitsize == 32)
292 return result;
293
294 return LLVMBuildZExt(ctx->builder, result, ctx->i64, "");
295 }
296
297 static LLVMValueRef emit_i2b(struct ac_llvm_context *ctx,
298 LLVMValueRef src0)
299 {
300 LLVMValueRef zero = LLVMConstNull(LLVMTypeOf(src0));
301 return LLVMBuildSExt(ctx->builder,
302 LLVMBuildICmp(ctx->builder, LLVMIntNE, src0, zero, ""),
303 ctx->i32, "");
304 }
305
306 static LLVMValueRef emit_f2f16(struct ac_llvm_context *ctx,
307 LLVMValueRef src0)
308 {
309 LLVMValueRef result;
310 LLVMValueRef cond = NULL;
311
312 src0 = ac_to_float(ctx, src0);
313 result = LLVMBuildFPTrunc(ctx->builder, src0, ctx->f16, "");
314
315 if (ctx->chip_class >= VI) {
316 LLVMValueRef args[2];
317 /* Check if the result is a denormal - and flush to 0 if so. */
318 args[0] = result;
319 args[1] = LLVMConstInt(ctx->i32, N_SUBNORMAL | P_SUBNORMAL, false);
320 cond = ac_build_intrinsic(ctx, "llvm.amdgcn.class.f16", ctx->i1, args, 2, AC_FUNC_ATTR_READNONE);
321 }
322
323 /* need to convert back up to f32 */
324 result = LLVMBuildFPExt(ctx->builder, result, ctx->f32, "");
325
326 if (ctx->chip_class >= VI)
327 result = LLVMBuildSelect(ctx->builder, cond, ctx->f32_0, result, "");
328 else {
329 /* for SI/CIK */
330 /* 0x38800000 is smallest half float value (2^-14) in 32-bit float,
331 * so compare the result and flush to 0 if it's smaller.
332 */
333 LLVMValueRef temp, cond2;
334 temp = emit_intrin_1f_param(ctx, "llvm.fabs", ctx->f32, result);
335 cond = LLVMBuildFCmp(ctx->builder, LLVMRealUGT,
336 LLVMBuildBitCast(ctx->builder, LLVMConstInt(ctx->i32, 0x38800000, false), ctx->f32, ""),
337 temp, "");
338 cond2 = LLVMBuildFCmp(ctx->builder, LLVMRealUNE,
339 temp, ctx->f32_0, "");
340 cond = LLVMBuildAnd(ctx->builder, cond, cond2, "");
341 result = LLVMBuildSelect(ctx->builder, cond, ctx->f32_0, result, "");
342 }
343 return result;
344 }
345
346 static LLVMValueRef emit_umul_high(struct ac_llvm_context *ctx,
347 LLVMValueRef src0, LLVMValueRef src1)
348 {
349 LLVMValueRef dst64, result;
350 src0 = LLVMBuildZExt(ctx->builder, src0, ctx->i64, "");
351 src1 = LLVMBuildZExt(ctx->builder, src1, ctx->i64, "");
352
353 dst64 = LLVMBuildMul(ctx->builder, src0, src1, "");
354 dst64 = LLVMBuildLShr(ctx->builder, dst64, LLVMConstInt(ctx->i64, 32, false), "");
355 result = LLVMBuildTrunc(ctx->builder, dst64, ctx->i32, "");
356 return result;
357 }
358
359 static LLVMValueRef emit_imul_high(struct ac_llvm_context *ctx,
360 LLVMValueRef src0, LLVMValueRef src1)
361 {
362 LLVMValueRef dst64, result;
363 src0 = LLVMBuildSExt(ctx->builder, src0, ctx->i64, "");
364 src1 = LLVMBuildSExt(ctx->builder, src1, ctx->i64, "");
365
366 dst64 = LLVMBuildMul(ctx->builder, src0, src1, "");
367 dst64 = LLVMBuildAShr(ctx->builder, dst64, LLVMConstInt(ctx->i64, 32, false), "");
368 result = LLVMBuildTrunc(ctx->builder, dst64, ctx->i32, "");
369 return result;
370 }
371
372 static LLVMValueRef emit_bitfield_extract(struct ac_llvm_context *ctx,
373 bool is_signed,
374 const LLVMValueRef srcs[3])
375 {
376 LLVMValueRef result;
377 LLVMValueRef icond = LLVMBuildICmp(ctx->builder, LLVMIntEQ, srcs[2], LLVMConstInt(ctx->i32, 32, false), "");
378
379 result = ac_build_bfe(ctx, srcs[0], srcs[1], srcs[2], is_signed);
380 result = LLVMBuildSelect(ctx->builder, icond, srcs[0], result, "");
381 return result;
382 }
383
384 static LLVMValueRef emit_bitfield_insert(struct ac_llvm_context *ctx,
385 LLVMValueRef src0, LLVMValueRef src1,
386 LLVMValueRef src2, LLVMValueRef src3)
387 {
388 LLVMValueRef bfi_args[3], result;
389
390 bfi_args[0] = LLVMBuildShl(ctx->builder,
391 LLVMBuildSub(ctx->builder,
392 LLVMBuildShl(ctx->builder,
393 ctx->i32_1,
394 src3, ""),
395 ctx->i32_1, ""),
396 src2, "");
397 bfi_args[1] = LLVMBuildShl(ctx->builder, src1, src2, "");
398 bfi_args[2] = src0;
399
400 LLVMValueRef icond = LLVMBuildICmp(ctx->builder, LLVMIntEQ, src3, LLVMConstInt(ctx->i32, 32, false), "");
401
402 /* Calculate:
403 * (arg0 & arg1) | (~arg0 & arg2) = arg2 ^ (arg0 & (arg1 ^ arg2)
404 * Use the right-hand side, which the LLVM backend can convert to V_BFI.
405 */
406 result = LLVMBuildXor(ctx->builder, bfi_args[2],
407 LLVMBuildAnd(ctx->builder, bfi_args[0],
408 LLVMBuildXor(ctx->builder, bfi_args[1], bfi_args[2], ""), ""), "");
409
410 result = LLVMBuildSelect(ctx->builder, icond, src1, result, "");
411 return result;
412 }
413
414 static LLVMValueRef emit_pack_half_2x16(struct ac_llvm_context *ctx,
415 LLVMValueRef src0)
416 {
417 LLVMValueRef comp[2];
418
419 src0 = ac_to_float(ctx, src0);
420 comp[0] = LLVMBuildExtractElement(ctx->builder, src0, ctx->i32_0, "");
421 comp[1] = LLVMBuildExtractElement(ctx->builder, src0, ctx->i32_1, "");
422
423 return ac_build_cvt_pkrtz_f16(ctx, comp);
424 }
425
426 static LLVMValueRef emit_unpack_half_2x16(struct ac_llvm_context *ctx,
427 LLVMValueRef src0)
428 {
429 LLVMValueRef const16 = LLVMConstInt(ctx->i32, 16, false);
430 LLVMValueRef temps[2], result, val;
431 int i;
432
433 for (i = 0; i < 2; i++) {
434 val = i == 1 ? LLVMBuildLShr(ctx->builder, src0, const16, "") : src0;
435 val = LLVMBuildTrunc(ctx->builder, val, ctx->i16, "");
436 val = LLVMBuildBitCast(ctx->builder, val, ctx->f16, "");
437 temps[i] = LLVMBuildFPExt(ctx->builder, val, ctx->f32, "");
438 }
439
440 result = LLVMBuildInsertElement(ctx->builder, LLVMGetUndef(ctx->v2f32), temps[0],
441 ctx->i32_0, "");
442 result = LLVMBuildInsertElement(ctx->builder, result, temps[1],
443 ctx->i32_1, "");
444 return result;
445 }
446
447 static LLVMValueRef emit_ddxy(struct ac_nir_context *ctx,
448 nir_op op,
449 LLVMValueRef src0)
450 {
451 unsigned mask;
452 int idx;
453 LLVMValueRef result;
454
455 if (op == nir_op_fddx_fine)
456 mask = AC_TID_MASK_LEFT;
457 else if (op == nir_op_fddy_fine)
458 mask = AC_TID_MASK_TOP;
459 else
460 mask = AC_TID_MASK_TOP_LEFT;
461
462 /* for DDX we want to next X pixel, DDY next Y pixel. */
463 if (op == nir_op_fddx_fine ||
464 op == nir_op_fddx_coarse ||
465 op == nir_op_fddx)
466 idx = 1;
467 else
468 idx = 2;
469
470 result = ac_build_ddxy(&ctx->ac, mask, idx, src0);
471 return result;
472 }
473
474 /*
475 * this takes an I,J coordinate pair,
476 * and works out the X and Y derivatives.
477 * it returns DDX(I), DDX(J), DDY(I), DDY(J).
478 */
479 static LLVMValueRef emit_ddxy_interp(
480 struct ac_nir_context *ctx,
481 LLVMValueRef interp_ij)
482 {
483 LLVMValueRef result[4], a;
484 unsigned i;
485
486 for (i = 0; i < 2; i++) {
487 a = LLVMBuildExtractElement(ctx->ac.builder, interp_ij,
488 LLVMConstInt(ctx->ac.i32, i, false), "");
489 result[i] = emit_ddxy(ctx, nir_op_fddx, a);
490 result[2+i] = emit_ddxy(ctx, nir_op_fddy, a);
491 }
492 return ac_build_gather_values(&ctx->ac, result, 4);
493 }
494
495 static void visit_alu(struct ac_nir_context *ctx, const nir_alu_instr *instr)
496 {
497 LLVMValueRef src[4], result = NULL;
498 unsigned num_components = instr->dest.dest.ssa.num_components;
499 unsigned src_components;
500 LLVMTypeRef def_type = get_def_type(ctx, &instr->dest.dest.ssa);
501
502 assert(nir_op_infos[instr->op].num_inputs <= ARRAY_SIZE(src));
503 switch (instr->op) {
504 case nir_op_vec2:
505 case nir_op_vec3:
506 case nir_op_vec4:
507 src_components = 1;
508 break;
509 case nir_op_pack_half_2x16:
510 src_components = 2;
511 break;
512 case nir_op_unpack_half_2x16:
513 src_components = 1;
514 break;
515 case nir_op_cube_face_coord:
516 case nir_op_cube_face_index:
517 src_components = 3;
518 break;
519 default:
520 src_components = num_components;
521 break;
522 }
523 for (unsigned i = 0; i < nir_op_infos[instr->op].num_inputs; i++)
524 src[i] = get_alu_src(ctx, instr->src[i], src_components);
525
526 switch (instr->op) {
527 case nir_op_fmov:
528 case nir_op_imov:
529 result = src[0];
530 break;
531 case nir_op_fneg:
532 src[0] = ac_to_float(&ctx->ac, src[0]);
533 result = LLVMBuildFNeg(ctx->ac.builder, src[0], "");
534 break;
535 case nir_op_ineg:
536 result = LLVMBuildNeg(ctx->ac.builder, src[0], "");
537 break;
538 case nir_op_inot:
539 result = LLVMBuildNot(ctx->ac.builder, src[0], "");
540 break;
541 case nir_op_iadd:
542 result = LLVMBuildAdd(ctx->ac.builder, src[0], src[1], "");
543 break;
544 case nir_op_fadd:
545 src[0] = ac_to_float(&ctx->ac, src[0]);
546 src[1] = ac_to_float(&ctx->ac, src[1]);
547 result = LLVMBuildFAdd(ctx->ac.builder, src[0], src[1], "");
548 break;
549 case nir_op_fsub:
550 src[0] = ac_to_float(&ctx->ac, src[0]);
551 src[1] = ac_to_float(&ctx->ac, src[1]);
552 result = LLVMBuildFSub(ctx->ac.builder, src[0], src[1], "");
553 break;
554 case nir_op_isub:
555 result = LLVMBuildSub(ctx->ac.builder, src[0], src[1], "");
556 break;
557 case nir_op_imul:
558 result = LLVMBuildMul(ctx->ac.builder, src[0], src[1], "");
559 break;
560 case nir_op_imod:
561 result = LLVMBuildSRem(ctx->ac.builder, src[0], src[1], "");
562 break;
563 case nir_op_umod:
564 result = LLVMBuildURem(ctx->ac.builder, src[0], src[1], "");
565 break;
566 case nir_op_fmod:
567 src[0] = ac_to_float(&ctx->ac, src[0]);
568 src[1] = ac_to_float(&ctx->ac, src[1]);
569 result = ac_build_fdiv(&ctx->ac, src[0], src[1]);
570 result = emit_intrin_1f_param(&ctx->ac, "llvm.floor",
571 ac_to_float_type(&ctx->ac, def_type), result);
572 result = LLVMBuildFMul(ctx->ac.builder, src[1] , result, "");
573 result = LLVMBuildFSub(ctx->ac.builder, src[0], result, "");
574 break;
575 case nir_op_frem:
576 src[0] = ac_to_float(&ctx->ac, src[0]);
577 src[1] = ac_to_float(&ctx->ac, src[1]);
578 result = LLVMBuildFRem(ctx->ac.builder, src[0], src[1], "");
579 break;
580 case nir_op_irem:
581 result = LLVMBuildSRem(ctx->ac.builder, src[0], src[1], "");
582 break;
583 case nir_op_idiv:
584 result = LLVMBuildSDiv(ctx->ac.builder, src[0], src[1], "");
585 break;
586 case nir_op_udiv:
587 result = LLVMBuildUDiv(ctx->ac.builder, src[0], src[1], "");
588 break;
589 case nir_op_fmul:
590 src[0] = ac_to_float(&ctx->ac, src[0]);
591 src[1] = ac_to_float(&ctx->ac, src[1]);
592 result = LLVMBuildFMul(ctx->ac.builder, src[0], src[1], "");
593 break;
594 case nir_op_frcp:
595 src[0] = ac_to_float(&ctx->ac, src[0]);
596 result = ac_build_fdiv(&ctx->ac, instr->dest.dest.ssa.bit_size == 32 ? ctx->ac.f32_1 : ctx->ac.f64_1,
597 src[0]);
598 break;
599 case nir_op_iand:
600 result = LLVMBuildAnd(ctx->ac.builder, src[0], src[1], "");
601 break;
602 case nir_op_ior:
603 result = LLVMBuildOr(ctx->ac.builder, src[0], src[1], "");
604 break;
605 case nir_op_ixor:
606 result = LLVMBuildXor(ctx->ac.builder, src[0], src[1], "");
607 break;
608 case nir_op_ishl:
609 result = LLVMBuildShl(ctx->ac.builder, src[0],
610 LLVMBuildZExt(ctx->ac.builder, src[1],
611 LLVMTypeOf(src[0]), ""),
612 "");
613 break;
614 case nir_op_ishr:
615 result = LLVMBuildAShr(ctx->ac.builder, src[0],
616 LLVMBuildZExt(ctx->ac.builder, src[1],
617 LLVMTypeOf(src[0]), ""),
618 "");
619 break;
620 case nir_op_ushr:
621 result = LLVMBuildLShr(ctx->ac.builder, src[0],
622 LLVMBuildZExt(ctx->ac.builder, src[1],
623 LLVMTypeOf(src[0]), ""),
624 "");
625 break;
626 case nir_op_ilt:
627 result = emit_int_cmp(&ctx->ac, LLVMIntSLT, src[0], src[1]);
628 break;
629 case nir_op_ine:
630 result = emit_int_cmp(&ctx->ac, LLVMIntNE, src[0], src[1]);
631 break;
632 case nir_op_ieq:
633 result = emit_int_cmp(&ctx->ac, LLVMIntEQ, src[0], src[1]);
634 break;
635 case nir_op_ige:
636 result = emit_int_cmp(&ctx->ac, LLVMIntSGE, src[0], src[1]);
637 break;
638 case nir_op_ult:
639 result = emit_int_cmp(&ctx->ac, LLVMIntULT, src[0], src[1]);
640 break;
641 case nir_op_uge:
642 result = emit_int_cmp(&ctx->ac, LLVMIntUGE, src[0], src[1]);
643 break;
644 case nir_op_feq:
645 result = emit_float_cmp(&ctx->ac, LLVMRealOEQ, src[0], src[1]);
646 break;
647 case nir_op_fne:
648 result = emit_float_cmp(&ctx->ac, LLVMRealUNE, src[0], src[1]);
649 break;
650 case nir_op_flt:
651 result = emit_float_cmp(&ctx->ac, LLVMRealOLT, src[0], src[1]);
652 break;
653 case nir_op_fge:
654 result = emit_float_cmp(&ctx->ac, LLVMRealOGE, src[0], src[1]);
655 break;
656 case nir_op_fabs:
657 result = emit_intrin_1f_param(&ctx->ac, "llvm.fabs",
658 ac_to_float_type(&ctx->ac, def_type), src[0]);
659 break;
660 case nir_op_iabs:
661 result = emit_iabs(&ctx->ac, src[0]);
662 break;
663 case nir_op_imax:
664 result = emit_minmax_int(&ctx->ac, LLVMIntSGT, src[0], src[1]);
665 break;
666 case nir_op_imin:
667 result = emit_minmax_int(&ctx->ac, LLVMIntSLT, src[0], src[1]);
668 break;
669 case nir_op_umax:
670 result = emit_minmax_int(&ctx->ac, LLVMIntUGT, src[0], src[1]);
671 break;
672 case nir_op_umin:
673 result = emit_minmax_int(&ctx->ac, LLVMIntULT, src[0], src[1]);
674 break;
675 case nir_op_isign:
676 result = ac_build_isign(&ctx->ac, src[0],
677 instr->dest.dest.ssa.bit_size);
678 break;
679 case nir_op_fsign:
680 src[0] = ac_to_float(&ctx->ac, src[0]);
681 result = ac_build_fsign(&ctx->ac, src[0],
682 instr->dest.dest.ssa.bit_size);
683 break;
684 case nir_op_ffloor:
685 result = emit_intrin_1f_param(&ctx->ac, "llvm.floor",
686 ac_to_float_type(&ctx->ac, def_type), src[0]);
687 break;
688 case nir_op_ftrunc:
689 result = emit_intrin_1f_param(&ctx->ac, "llvm.trunc",
690 ac_to_float_type(&ctx->ac, def_type), src[0]);
691 break;
692 case nir_op_fceil:
693 result = emit_intrin_1f_param(&ctx->ac, "llvm.ceil",
694 ac_to_float_type(&ctx->ac, def_type), src[0]);
695 break;
696 case nir_op_fround_even:
697 result = emit_intrin_1f_param(&ctx->ac, "llvm.rint",
698 ac_to_float_type(&ctx->ac, def_type),src[0]);
699 break;
700 case nir_op_ffract:
701 src[0] = ac_to_float(&ctx->ac, src[0]);
702 result = ac_build_fract(&ctx->ac, src[0],
703 instr->dest.dest.ssa.bit_size);
704 break;
705 case nir_op_fsin:
706 result = emit_intrin_1f_param(&ctx->ac, "llvm.sin",
707 ac_to_float_type(&ctx->ac, def_type), src[0]);
708 break;
709 case nir_op_fcos:
710 result = emit_intrin_1f_param(&ctx->ac, "llvm.cos",
711 ac_to_float_type(&ctx->ac, def_type), src[0]);
712 break;
713 case nir_op_fsqrt:
714 result = emit_intrin_1f_param(&ctx->ac, "llvm.sqrt",
715 ac_to_float_type(&ctx->ac, def_type), src[0]);
716 break;
717 case nir_op_fexp2:
718 result = emit_intrin_1f_param(&ctx->ac, "llvm.exp2",
719 ac_to_float_type(&ctx->ac, def_type), src[0]);
720 break;
721 case nir_op_flog2:
722 result = emit_intrin_1f_param(&ctx->ac, "llvm.log2",
723 ac_to_float_type(&ctx->ac, def_type), src[0]);
724 break;
725 case nir_op_frsq:
726 result = emit_intrin_1f_param(&ctx->ac, "llvm.sqrt",
727 ac_to_float_type(&ctx->ac, def_type), src[0]);
728 result = ac_build_fdiv(&ctx->ac, instr->dest.dest.ssa.bit_size == 32 ? ctx->ac.f32_1 : ctx->ac.f64_1,
729 result);
730 break;
731 case nir_op_frexp_exp:
732 src[0] = ac_to_float(&ctx->ac, src[0]);
733 result = ac_build_intrinsic(&ctx->ac, "llvm.amdgcn.frexp.exp.i32.f64",
734 ctx->ac.i32, src, 1, AC_FUNC_ATTR_READNONE);
735
736 break;
737 case nir_op_frexp_sig:
738 src[0] = ac_to_float(&ctx->ac, src[0]);
739 result = ac_build_intrinsic(&ctx->ac, "llvm.amdgcn.frexp.mant.f64",
740 ctx->ac.f64, src, 1, AC_FUNC_ATTR_READNONE);
741 break;
742 case nir_op_fmax:
743 result = emit_intrin_2f_param(&ctx->ac, "llvm.maxnum",
744 ac_to_float_type(&ctx->ac, def_type), src[0], src[1]);
745 if (ctx->ac.chip_class < GFX9 &&
746 instr->dest.dest.ssa.bit_size == 32) {
747 /* Only pre-GFX9 chips do not flush denorms. */
748 result = emit_intrin_1f_param(&ctx->ac, "llvm.canonicalize",
749 ac_to_float_type(&ctx->ac, def_type),
750 result);
751 }
752 break;
753 case nir_op_fmin:
754 result = emit_intrin_2f_param(&ctx->ac, "llvm.minnum",
755 ac_to_float_type(&ctx->ac, def_type), src[0], src[1]);
756 if (ctx->ac.chip_class < GFX9 &&
757 instr->dest.dest.ssa.bit_size == 32) {
758 /* Only pre-GFX9 chips do not flush denorms. */
759 result = emit_intrin_1f_param(&ctx->ac, "llvm.canonicalize",
760 ac_to_float_type(&ctx->ac, def_type),
761 result);
762 }
763 break;
764 case nir_op_ffma:
765 result = emit_intrin_3f_param(&ctx->ac, "llvm.fmuladd",
766 ac_to_float_type(&ctx->ac, def_type), src[0], src[1], src[2]);
767 break;
768 case nir_op_ldexp:
769 src[0] = ac_to_float(&ctx->ac, src[0]);
770 if (ac_get_elem_bits(&ctx->ac, LLVMTypeOf(src[0])) == 32)
771 result = ac_build_intrinsic(&ctx->ac, "llvm.amdgcn.ldexp.f32", ctx->ac.f32, src, 2, AC_FUNC_ATTR_READNONE);
772 else
773 result = ac_build_intrinsic(&ctx->ac, "llvm.amdgcn.ldexp.f64", ctx->ac.f64, src, 2, AC_FUNC_ATTR_READNONE);
774 break;
775 case nir_op_ibitfield_extract:
776 result = emit_bitfield_extract(&ctx->ac, true, src);
777 break;
778 case nir_op_ubitfield_extract:
779 result = emit_bitfield_extract(&ctx->ac, false, src);
780 break;
781 case nir_op_bitfield_insert:
782 result = emit_bitfield_insert(&ctx->ac, src[0], src[1], src[2], src[3]);
783 break;
784 case nir_op_bitfield_reverse:
785 result = ac_build_intrinsic(&ctx->ac, "llvm.bitreverse.i32", ctx->ac.i32, src, 1, AC_FUNC_ATTR_READNONE);
786 break;
787 case nir_op_bit_count:
788 if (ac_get_elem_bits(&ctx->ac, LLVMTypeOf(src[0])) == 32)
789 result = ac_build_intrinsic(&ctx->ac, "llvm.ctpop.i32", ctx->ac.i32, src, 1, AC_FUNC_ATTR_READNONE);
790 else {
791 result = ac_build_intrinsic(&ctx->ac, "llvm.ctpop.i64", ctx->ac.i64, src, 1, AC_FUNC_ATTR_READNONE);
792 result = LLVMBuildTrunc(ctx->ac.builder, result, ctx->ac.i32, "");
793 }
794 break;
795 case nir_op_vec2:
796 case nir_op_vec3:
797 case nir_op_vec4:
798 for (unsigned i = 0; i < nir_op_infos[instr->op].num_inputs; i++)
799 src[i] = ac_to_integer(&ctx->ac, src[i]);
800 result = ac_build_gather_values(&ctx->ac, src, num_components);
801 break;
802 case nir_op_f2i32:
803 case nir_op_f2i64:
804 src[0] = ac_to_float(&ctx->ac, src[0]);
805 result = LLVMBuildFPToSI(ctx->ac.builder, src[0], def_type, "");
806 break;
807 case nir_op_f2u32:
808 case nir_op_f2u64:
809 src[0] = ac_to_float(&ctx->ac, src[0]);
810 result = LLVMBuildFPToUI(ctx->ac.builder, src[0], def_type, "");
811 break;
812 case nir_op_i2f32:
813 case nir_op_i2f64:
814 src[0] = ac_to_integer(&ctx->ac, src[0]);
815 result = LLVMBuildSIToFP(ctx->ac.builder, src[0], ac_to_float_type(&ctx->ac, def_type), "");
816 break;
817 case nir_op_u2f32:
818 case nir_op_u2f64:
819 src[0] = ac_to_integer(&ctx->ac, src[0]);
820 result = LLVMBuildUIToFP(ctx->ac.builder, src[0], ac_to_float_type(&ctx->ac, def_type), "");
821 break;
822 case nir_op_f2f64:
823 src[0] = ac_to_float(&ctx->ac, src[0]);
824 result = LLVMBuildFPExt(ctx->ac.builder, src[0], ac_to_float_type(&ctx->ac, def_type), "");
825 break;
826 case nir_op_f2f32:
827 src[0] = ac_to_float(&ctx->ac, src[0]);
828 result = LLVMBuildFPTrunc(ctx->ac.builder, src[0], ac_to_float_type(&ctx->ac, def_type), "");
829 break;
830 case nir_op_u2u32:
831 case nir_op_u2u64:
832 src[0] = ac_to_integer(&ctx->ac, src[0]);
833 if (ac_get_elem_bits(&ctx->ac, LLVMTypeOf(src[0])) < ac_get_elem_bits(&ctx->ac, def_type))
834 result = LLVMBuildZExt(ctx->ac.builder, src[0], def_type, "");
835 else
836 result = LLVMBuildTrunc(ctx->ac.builder, src[0], def_type, "");
837 break;
838 case nir_op_i2i32:
839 case nir_op_i2i64:
840 src[0] = ac_to_integer(&ctx->ac, src[0]);
841 if (ac_get_elem_bits(&ctx->ac, LLVMTypeOf(src[0])) < ac_get_elem_bits(&ctx->ac, def_type))
842 result = LLVMBuildSExt(ctx->ac.builder, src[0], def_type, "");
843 else
844 result = LLVMBuildTrunc(ctx->ac.builder, src[0], def_type, "");
845 break;
846 case nir_op_bcsel:
847 result = emit_bcsel(&ctx->ac, src[0], src[1], src[2]);
848 break;
849 case nir_op_find_lsb:
850 src[0] = ac_to_integer(&ctx->ac, src[0]);
851 result = ac_find_lsb(&ctx->ac, ctx->ac.i32, src[0]);
852 break;
853 case nir_op_ufind_msb:
854 src[0] = ac_to_integer(&ctx->ac, src[0]);
855 result = ac_build_umsb(&ctx->ac, src[0], ctx->ac.i32);
856 break;
857 case nir_op_ifind_msb:
858 src[0] = ac_to_integer(&ctx->ac, src[0]);
859 result = ac_build_imsb(&ctx->ac, src[0], ctx->ac.i32);
860 break;
861 case nir_op_uadd_carry:
862 src[0] = ac_to_integer(&ctx->ac, src[0]);
863 src[1] = ac_to_integer(&ctx->ac, src[1]);
864 result = emit_uint_carry(&ctx->ac, "llvm.uadd.with.overflow.i32", src[0], src[1]);
865 break;
866 case nir_op_usub_borrow:
867 src[0] = ac_to_integer(&ctx->ac, src[0]);
868 src[1] = ac_to_integer(&ctx->ac, src[1]);
869 result = emit_uint_carry(&ctx->ac, "llvm.usub.with.overflow.i32", src[0], src[1]);
870 break;
871 case nir_op_b2f:
872 result = emit_b2f(&ctx->ac, src[0]);
873 break;
874 case nir_op_f2b:
875 result = emit_f2b(&ctx->ac, src[0]);
876 break;
877 case nir_op_b2i:
878 result = emit_b2i(&ctx->ac, src[0], instr->dest.dest.ssa.bit_size);
879 break;
880 case nir_op_i2b:
881 src[0] = ac_to_integer(&ctx->ac, src[0]);
882 result = emit_i2b(&ctx->ac, src[0]);
883 break;
884 case nir_op_fquantize2f16:
885 result = emit_f2f16(&ctx->ac, src[0]);
886 break;
887 case nir_op_umul_high:
888 src[0] = ac_to_integer(&ctx->ac, src[0]);
889 src[1] = ac_to_integer(&ctx->ac, src[1]);
890 result = emit_umul_high(&ctx->ac, src[0], src[1]);
891 break;
892 case nir_op_imul_high:
893 src[0] = ac_to_integer(&ctx->ac, src[0]);
894 src[1] = ac_to_integer(&ctx->ac, src[1]);
895 result = emit_imul_high(&ctx->ac, src[0], src[1]);
896 break;
897 case nir_op_pack_half_2x16:
898 result = emit_pack_half_2x16(&ctx->ac, src[0]);
899 break;
900 case nir_op_unpack_half_2x16:
901 result = emit_unpack_half_2x16(&ctx->ac, src[0]);
902 break;
903 case nir_op_fddx:
904 case nir_op_fddy:
905 case nir_op_fddx_fine:
906 case nir_op_fddy_fine:
907 case nir_op_fddx_coarse:
908 case nir_op_fddy_coarse:
909 result = emit_ddxy(ctx, instr->op, src[0]);
910 break;
911
912 case nir_op_unpack_64_2x32_split_x: {
913 assert(ac_get_llvm_num_components(src[0]) == 1);
914 LLVMValueRef tmp = LLVMBuildBitCast(ctx->ac.builder, src[0],
915 ctx->ac.v2i32,
916 "");
917 result = LLVMBuildExtractElement(ctx->ac.builder, tmp,
918 ctx->ac.i32_0, "");
919 break;
920 }
921
922 case nir_op_unpack_64_2x32_split_y: {
923 assert(ac_get_llvm_num_components(src[0]) == 1);
924 LLVMValueRef tmp = LLVMBuildBitCast(ctx->ac.builder, src[0],
925 ctx->ac.v2i32,
926 "");
927 result = LLVMBuildExtractElement(ctx->ac.builder, tmp,
928 ctx->ac.i32_1, "");
929 break;
930 }
931
932 case nir_op_pack_64_2x32_split: {
933 LLVMValueRef tmp = LLVMGetUndef(ctx->ac.v2i32);
934 tmp = LLVMBuildInsertElement(ctx->ac.builder, tmp,
935 src[0], ctx->ac.i32_0, "");
936 tmp = LLVMBuildInsertElement(ctx->ac.builder, tmp,
937 src[1], ctx->ac.i32_1, "");
938 result = LLVMBuildBitCast(ctx->ac.builder, tmp, ctx->ac.i64, "");
939 break;
940 }
941
942 case nir_op_cube_face_coord: {
943 src[0] = ac_to_float(&ctx->ac, src[0]);
944 LLVMValueRef results[2];
945 LLVMValueRef in[3];
946 for (unsigned chan = 0; chan < 3; chan++)
947 in[chan] = ac_llvm_extract_elem(&ctx->ac, src[0], chan);
948 results[0] = ac_build_intrinsic(&ctx->ac, "llvm.amdgcn.cubetc",
949 ctx->ac.f32, in, 3, AC_FUNC_ATTR_READNONE);
950 results[1] = ac_build_intrinsic(&ctx->ac, "llvm.amdgcn.cubesc",
951 ctx->ac.f32, in, 3, AC_FUNC_ATTR_READNONE);
952 result = ac_build_gather_values(&ctx->ac, results, 2);
953 break;
954 }
955
956 case nir_op_cube_face_index: {
957 src[0] = ac_to_float(&ctx->ac, src[0]);
958 LLVMValueRef in[3];
959 for (unsigned chan = 0; chan < 3; chan++)
960 in[chan] = ac_llvm_extract_elem(&ctx->ac, src[0], chan);
961 result = ac_build_intrinsic(&ctx->ac, "llvm.amdgcn.cubeid",
962 ctx->ac.f32, in, 3, AC_FUNC_ATTR_READNONE);
963 break;
964 }
965
966 default:
967 fprintf(stderr, "Unknown NIR alu instr: ");
968 nir_print_instr(&instr->instr, stderr);
969 fprintf(stderr, "\n");
970 abort();
971 }
972
973 if (result) {
974 assert(instr->dest.dest.is_ssa);
975 result = ac_to_integer(&ctx->ac, result);
976 _mesa_hash_table_insert(ctx->defs, &instr->dest.dest.ssa,
977 result);
978 }
979 }
980
981 static void visit_load_const(struct ac_nir_context *ctx,
982 const nir_load_const_instr *instr)
983 {
984 LLVMValueRef values[4], value = NULL;
985 LLVMTypeRef element_type =
986 LLVMIntTypeInContext(ctx->ac.context, instr->def.bit_size);
987
988 for (unsigned i = 0; i < instr->def.num_components; ++i) {
989 switch (instr->def.bit_size) {
990 case 32:
991 values[i] = LLVMConstInt(element_type,
992 instr->value.u32[i], false);
993 break;
994 case 64:
995 values[i] = LLVMConstInt(element_type,
996 instr->value.u64[i], false);
997 break;
998 default:
999 fprintf(stderr,
1000 "unsupported nir load_const bit_size: %d\n",
1001 instr->def.bit_size);
1002 abort();
1003 }
1004 }
1005 if (instr->def.num_components > 1) {
1006 value = LLVMConstVector(values, instr->def.num_components);
1007 } else
1008 value = values[0];
1009
1010 _mesa_hash_table_insert(ctx->defs, &instr->def, value);
1011 }
1012
1013 static LLVMValueRef
1014 get_buffer_size(struct ac_nir_context *ctx, LLVMValueRef descriptor, bool in_elements)
1015 {
1016 LLVMValueRef size =
1017 LLVMBuildExtractElement(ctx->ac.builder, descriptor,
1018 LLVMConstInt(ctx->ac.i32, 2, false), "");
1019
1020 /* VI only */
1021 if (ctx->ac.chip_class == VI && in_elements) {
1022 /* On VI, the descriptor contains the size in bytes,
1023 * but TXQ must return the size in elements.
1024 * The stride is always non-zero for resources using TXQ.
1025 */
1026 LLVMValueRef stride =
1027 LLVMBuildExtractElement(ctx->ac.builder, descriptor,
1028 ctx->ac.i32_1, "");
1029 stride = LLVMBuildLShr(ctx->ac.builder, stride,
1030 LLVMConstInt(ctx->ac.i32, 16, false), "");
1031 stride = LLVMBuildAnd(ctx->ac.builder, stride,
1032 LLVMConstInt(ctx->ac.i32, 0x3fff, false), "");
1033
1034 size = LLVMBuildUDiv(ctx->ac.builder, size, stride, "");
1035 }
1036 return size;
1037 }
1038
1039 /**
1040 * Given the i32 or vNi32 \p type, generate the textual name (e.g. for use with
1041 * intrinsic names).
1042 */
1043 static void build_int_type_name(
1044 LLVMTypeRef type,
1045 char *buf, unsigned bufsize)
1046 {
1047 assert(bufsize >= 6);
1048
1049 if (LLVMGetTypeKind(type) == LLVMVectorTypeKind)
1050 snprintf(buf, bufsize, "v%ui32",
1051 LLVMGetVectorSize(type));
1052 else
1053 strcpy(buf, "i32");
1054 }
1055
1056 static LLVMValueRef lower_gather4_integer(struct ac_llvm_context *ctx,
1057 struct ac_image_args *args,
1058 const nir_tex_instr *instr)
1059 {
1060 enum glsl_base_type stype = glsl_get_sampler_result_type(instr->texture->var->type);
1061 LLVMValueRef coord = args->addr;
1062 LLVMValueRef half_texel[2];
1063 LLVMValueRef compare_cube_wa = NULL;
1064 LLVMValueRef result;
1065 int c;
1066 unsigned coord_vgpr_index = (unsigned)args->offset + (unsigned)args->compare;
1067
1068 //TODO Rect
1069 {
1070 struct ac_image_args txq_args = { 0 };
1071
1072 txq_args.da = instr->is_array || instr->sampler_dim == GLSL_SAMPLER_DIM_CUBE;
1073 txq_args.opcode = ac_image_get_resinfo;
1074 txq_args.dmask = 0xf;
1075 txq_args.addr = ctx->i32_0;
1076 txq_args.resource = args->resource;
1077 LLVMValueRef size = ac_build_image_opcode(ctx, &txq_args);
1078
1079 for (c = 0; c < 2; c++) {
1080 half_texel[c] = LLVMBuildExtractElement(ctx->builder, size,
1081 LLVMConstInt(ctx->i32, c, false), "");
1082 half_texel[c] = LLVMBuildUIToFP(ctx->builder, half_texel[c], ctx->f32, "");
1083 half_texel[c] = ac_build_fdiv(ctx, ctx->f32_1, half_texel[c]);
1084 half_texel[c] = LLVMBuildFMul(ctx->builder, half_texel[c],
1085 LLVMConstReal(ctx->f32, -0.5), "");
1086 }
1087 }
1088
1089 LLVMValueRef orig_coords = args->addr;
1090
1091 for (c = 0; c < 2; c++) {
1092 LLVMValueRef tmp;
1093 LLVMValueRef index = LLVMConstInt(ctx->i32, coord_vgpr_index + c, 0);
1094 tmp = LLVMBuildExtractElement(ctx->builder, coord, index, "");
1095 tmp = LLVMBuildBitCast(ctx->builder, tmp, ctx->f32, "");
1096 tmp = LLVMBuildFAdd(ctx->builder, tmp, half_texel[c], "");
1097 tmp = LLVMBuildBitCast(ctx->builder, tmp, ctx->i32, "");
1098 coord = LLVMBuildInsertElement(ctx->builder, coord, tmp, index, "");
1099 }
1100
1101
1102 /*
1103 * Apparantly cube has issue with integer types that the workaround doesn't solve,
1104 * so this tests if the format is 8_8_8_8 and an integer type do an alternate
1105 * workaround by sampling using a scaled type and converting.
1106 * This is taken from amdgpu-pro shaders.
1107 */
1108 /* NOTE this produces some ugly code compared to amdgpu-pro,
1109 * LLVM ends up dumping SGPRs into VGPRs to deal with the compare/select,
1110 * and then reads them back. -pro generates two selects,
1111 * one s_cmp for the descriptor rewriting
1112 * one v_cmp for the coordinate and result changes.
1113 */
1114 if (instr->sampler_dim == GLSL_SAMPLER_DIM_CUBE) {
1115 LLVMValueRef tmp, tmp2;
1116
1117 /* workaround 8/8/8/8 uint/sint cube gather bug */
1118 /* first detect it then change to a scaled read and f2i */
1119 tmp = LLVMBuildExtractElement(ctx->builder, args->resource, ctx->i32_1, "");
1120 tmp2 = tmp;
1121
1122 /* extract the DATA_FORMAT */
1123 tmp = ac_build_bfe(ctx, tmp, LLVMConstInt(ctx->i32, 20, false),
1124 LLVMConstInt(ctx->i32, 6, false), false);
1125
1126 /* is the DATA_FORMAT == 8_8_8_8 */
1127 compare_cube_wa = LLVMBuildICmp(ctx->builder, LLVMIntEQ, tmp, LLVMConstInt(ctx->i32, V_008F14_IMG_DATA_FORMAT_8_8_8_8, false), "");
1128
1129 if (stype == GLSL_TYPE_UINT)
1130 /* Create a NUM FORMAT - 0x2 or 0x4 - USCALED or UINT */
1131 tmp = LLVMBuildSelect(ctx->builder, compare_cube_wa, LLVMConstInt(ctx->i32, 0x8000000, false),
1132 LLVMConstInt(ctx->i32, 0x10000000, false), "");
1133 else
1134 /* Create a NUM FORMAT - 0x3 or 0x5 - SSCALED or SINT */
1135 tmp = LLVMBuildSelect(ctx->builder, compare_cube_wa, LLVMConstInt(ctx->i32, 0xc000000, false),
1136 LLVMConstInt(ctx->i32, 0x14000000, false), "");
1137
1138 /* replace the NUM FORMAT in the descriptor */
1139 tmp2 = LLVMBuildAnd(ctx->builder, tmp2, LLVMConstInt(ctx->i32, C_008F14_NUM_FORMAT_GFX6, false), "");
1140 tmp2 = LLVMBuildOr(ctx->builder, tmp2, tmp, "");
1141
1142 args->resource = LLVMBuildInsertElement(ctx->builder, args->resource, tmp2, ctx->i32_1, "");
1143
1144 /* don't modify the coordinates for this case */
1145 coord = LLVMBuildSelect(ctx->builder, compare_cube_wa, orig_coords, coord, "");
1146 }
1147 args->addr = coord;
1148 result = ac_build_image_opcode(ctx, args);
1149
1150 if (instr->sampler_dim == GLSL_SAMPLER_DIM_CUBE) {
1151 LLVMValueRef tmp, tmp2;
1152
1153 /* if the cube workaround is in place, f2i the result. */
1154 for (c = 0; c < 4; c++) {
1155 tmp = LLVMBuildExtractElement(ctx->builder, result, LLVMConstInt(ctx->i32, c, false), "");
1156 if (stype == GLSL_TYPE_UINT)
1157 tmp2 = LLVMBuildFPToUI(ctx->builder, tmp, ctx->i32, "");
1158 else
1159 tmp2 = LLVMBuildFPToSI(ctx->builder, tmp, ctx->i32, "");
1160 tmp = LLVMBuildBitCast(ctx->builder, tmp, ctx->i32, "");
1161 tmp2 = LLVMBuildBitCast(ctx->builder, tmp2, ctx->i32, "");
1162 tmp = LLVMBuildSelect(ctx->builder, compare_cube_wa, tmp2, tmp, "");
1163 tmp = LLVMBuildBitCast(ctx->builder, tmp, ctx->f32, "");
1164 result = LLVMBuildInsertElement(ctx->builder, result, tmp, LLVMConstInt(ctx->i32, c, false), "");
1165 }
1166 }
1167 return result;
1168 }
1169
1170 static LLVMValueRef build_tex_intrinsic(struct ac_nir_context *ctx,
1171 const nir_tex_instr *instr,
1172 bool lod_is_zero,
1173 struct ac_image_args *args)
1174 {
1175 if (instr->sampler_dim == GLSL_SAMPLER_DIM_BUF) {
1176 unsigned mask = nir_ssa_def_components_read(&instr->dest.ssa);
1177
1178 return ac_build_buffer_load_format(&ctx->ac,
1179 args->resource,
1180 args->addr,
1181 ctx->ac.i32_0,
1182 util_last_bit(mask),
1183 false, true);
1184 }
1185
1186 args->opcode = ac_image_sample;
1187 args->compare = instr->is_shadow;
1188
1189 switch (instr->op) {
1190 case nir_texop_txf:
1191 case nir_texop_txf_ms:
1192 case nir_texop_samples_identical:
1193 args->opcode = lod_is_zero ||
1194 instr->sampler_dim == GLSL_SAMPLER_DIM_MS ?
1195 ac_image_load : ac_image_load_mip;
1196 args->compare = false;
1197 args->offset = false;
1198 break;
1199 case nir_texop_txb:
1200 args->bias = true;
1201 break;
1202 case nir_texop_txl:
1203 if (lod_is_zero)
1204 args->level_zero = true;
1205 else
1206 args->lod = true;
1207 break;
1208 case nir_texop_txs:
1209 case nir_texop_query_levels:
1210 args->opcode = ac_image_get_resinfo;
1211 break;
1212 case nir_texop_tex:
1213 if (ctx->stage != MESA_SHADER_FRAGMENT)
1214 args->level_zero = true;
1215 break;
1216 case nir_texop_txd:
1217 args->deriv = true;
1218 break;
1219 case nir_texop_tg4:
1220 args->opcode = ac_image_gather4;
1221 args->level_zero = true;
1222 break;
1223 case nir_texop_lod:
1224 args->opcode = ac_image_get_lod;
1225 args->compare = false;
1226 args->offset = false;
1227 break;
1228 default:
1229 break;
1230 }
1231
1232 if (instr->op == nir_texop_tg4 && ctx->ac.chip_class <= VI) {
1233 enum glsl_base_type stype = glsl_get_sampler_result_type(instr->texture->var->type);
1234 if (stype == GLSL_TYPE_UINT || stype == GLSL_TYPE_INT) {
1235 return lower_gather4_integer(&ctx->ac, args, instr);
1236 }
1237 }
1238 return ac_build_image_opcode(&ctx->ac, args);
1239 }
1240
1241 static LLVMValueRef visit_vulkan_resource_reindex(struct ac_nir_context *ctx,
1242 nir_intrinsic_instr *instr)
1243 {
1244 LLVMValueRef ptr = get_src(ctx, instr->src[0]);
1245 LLVMValueRef index = get_src(ctx, instr->src[1]);
1246
1247 LLVMValueRef result = LLVMBuildGEP(ctx->ac.builder, ptr, &index, 1, "");
1248 LLVMSetMetadata(result, ctx->ac.uniform_md_kind, ctx->ac.empty_md);
1249 return result;
1250 }
1251
1252 static LLVMValueRef visit_load_push_constant(struct ac_nir_context *ctx,
1253 nir_intrinsic_instr *instr)
1254 {
1255 LLVMValueRef ptr, addr;
1256
1257 addr = LLVMConstInt(ctx->ac.i32, nir_intrinsic_base(instr), 0);
1258 addr = LLVMBuildAdd(ctx->ac.builder, addr,
1259 get_src(ctx, instr->src[0]), "");
1260
1261 ptr = ac_build_gep0(&ctx->ac, ctx->abi->push_constants, addr);
1262 ptr = ac_cast_ptr(&ctx->ac, ptr, get_def_type(ctx, &instr->dest.ssa));
1263
1264 return LLVMBuildLoad(ctx->ac.builder, ptr, "");
1265 }
1266
1267 static LLVMValueRef visit_get_buffer_size(struct ac_nir_context *ctx,
1268 const nir_intrinsic_instr *instr)
1269 {
1270 LLVMValueRef index = get_src(ctx, instr->src[0]);
1271
1272 return get_buffer_size(ctx, ctx->abi->load_ssbo(ctx->abi, index, false), false);
1273 }
1274
1275 static uint32_t widen_mask(uint32_t mask, unsigned multiplier)
1276 {
1277 uint32_t new_mask = 0;
1278 for(unsigned i = 0; i < 32 && (1u << i) <= mask; ++i)
1279 if (mask & (1u << i))
1280 new_mask |= ((1u << multiplier) - 1u) << (i * multiplier);
1281 return new_mask;
1282 }
1283
1284 static LLVMValueRef extract_vector_range(struct ac_llvm_context *ctx, LLVMValueRef src,
1285 unsigned start, unsigned count)
1286 {
1287 LLVMTypeRef type = LLVMTypeOf(src);
1288
1289 if (LLVMGetTypeKind(type) != LLVMVectorTypeKind) {
1290 assert(start == 0);
1291 assert(count == 1);
1292 return src;
1293 }
1294
1295 unsigned src_elements = LLVMGetVectorSize(type);
1296 assert(start < src_elements);
1297 assert(start + count <= src_elements);
1298
1299 if (start == 0 && count == src_elements)
1300 return src;
1301
1302 if (count == 1)
1303 return LLVMBuildExtractElement(ctx->builder, src, LLVMConstInt(ctx->i32, start, false), "");
1304
1305 assert(count <= 8);
1306 LLVMValueRef indices[8];
1307 for (unsigned i = 0; i < count; ++i)
1308 indices[i] = LLVMConstInt(ctx->i32, start + i, false);
1309
1310 LLVMValueRef swizzle = LLVMConstVector(indices, count);
1311 return LLVMBuildShuffleVector(ctx->builder, src, src, swizzle, "");
1312 }
1313
1314 static void visit_store_ssbo(struct ac_nir_context *ctx,
1315 nir_intrinsic_instr *instr)
1316 {
1317 const char *store_name;
1318 LLVMValueRef src_data = get_src(ctx, instr->src[0]);
1319 LLVMTypeRef data_type = ctx->ac.f32;
1320 int elem_size_mult = ac_get_elem_bits(&ctx->ac, LLVMTypeOf(src_data)) / 32;
1321 int components_32bit = elem_size_mult * instr->num_components;
1322 unsigned writemask = nir_intrinsic_write_mask(instr);
1323 LLVMValueRef base_data, base_offset;
1324 LLVMValueRef params[6];
1325
1326 params[1] = ctx->abi->load_ssbo(ctx->abi,
1327 get_src(ctx, instr->src[1]), true);
1328 params[2] = ctx->ac.i32_0; /* vindex */
1329 params[4] = ctx->ac.i1false; /* glc */
1330 params[5] = ctx->ac.i1false; /* slc */
1331
1332 if (components_32bit > 1)
1333 data_type = LLVMVectorType(ctx->ac.f32, components_32bit);
1334
1335 writemask = widen_mask(writemask, elem_size_mult);
1336
1337 base_data = ac_to_float(&ctx->ac, src_data);
1338 base_data = ac_trim_vector(&ctx->ac, base_data, instr->num_components);
1339 base_data = LLVMBuildBitCast(ctx->ac.builder, base_data,
1340 data_type, "");
1341 base_offset = get_src(ctx, instr->src[2]); /* voffset */
1342 while (writemask) {
1343 int start, count;
1344 LLVMValueRef data;
1345 LLVMValueRef offset;
1346
1347 u_bit_scan_consecutive_range(&writemask, &start, &count);
1348
1349 /* Due to an LLVM limitation, split 3-element writes
1350 * into a 2-element and a 1-element write. */
1351 if (count == 3) {
1352 writemask |= 1 << (start + 2);
1353 count = 2;
1354 }
1355
1356 if (count > 4) {
1357 writemask |= ((1u << (count - 4)) - 1u) << (start + 4);
1358 count = 4;
1359 }
1360
1361 if (count == 4) {
1362 store_name = "llvm.amdgcn.buffer.store.v4f32";
1363 } else if (count == 2) {
1364 store_name = "llvm.amdgcn.buffer.store.v2f32";
1365
1366 } else {
1367 assert(count == 1);
1368 store_name = "llvm.amdgcn.buffer.store.f32";
1369 }
1370 data = extract_vector_range(&ctx->ac, base_data, start, count);
1371
1372 offset = base_offset;
1373 if (start != 0) {
1374 offset = LLVMBuildAdd(ctx->ac.builder, offset, LLVMConstInt(ctx->ac.i32, start * 4, false), "");
1375 }
1376 params[0] = data;
1377 params[3] = offset;
1378 ac_build_intrinsic(&ctx->ac, store_name,
1379 ctx->ac.voidt, params, 6, 0);
1380 }
1381 }
1382
1383 static LLVMValueRef visit_atomic_ssbo(struct ac_nir_context *ctx,
1384 const nir_intrinsic_instr *instr)
1385 {
1386 const char *name;
1387 LLVMValueRef params[6];
1388 int arg_count = 0;
1389
1390 if (instr->intrinsic == nir_intrinsic_ssbo_atomic_comp_swap) {
1391 params[arg_count++] = ac_llvm_extract_elem(&ctx->ac, get_src(ctx, instr->src[3]), 0);
1392 }
1393 params[arg_count++] = ac_llvm_extract_elem(&ctx->ac, get_src(ctx, instr->src[2]), 0);
1394 params[arg_count++] = ctx->abi->load_ssbo(ctx->abi,
1395 get_src(ctx, instr->src[0]),
1396 true);
1397 params[arg_count++] = ctx->ac.i32_0; /* vindex */
1398 params[arg_count++] = get_src(ctx, instr->src[1]); /* voffset */
1399 params[arg_count++] = LLVMConstInt(ctx->ac.i1, 0, false); /* slc */
1400
1401 switch (instr->intrinsic) {
1402 case nir_intrinsic_ssbo_atomic_add:
1403 name = "llvm.amdgcn.buffer.atomic.add";
1404 break;
1405 case nir_intrinsic_ssbo_atomic_imin:
1406 name = "llvm.amdgcn.buffer.atomic.smin";
1407 break;
1408 case nir_intrinsic_ssbo_atomic_umin:
1409 name = "llvm.amdgcn.buffer.atomic.umin";
1410 break;
1411 case nir_intrinsic_ssbo_atomic_imax:
1412 name = "llvm.amdgcn.buffer.atomic.smax";
1413 break;
1414 case nir_intrinsic_ssbo_atomic_umax:
1415 name = "llvm.amdgcn.buffer.atomic.umax";
1416 break;
1417 case nir_intrinsic_ssbo_atomic_and:
1418 name = "llvm.amdgcn.buffer.atomic.and";
1419 break;
1420 case nir_intrinsic_ssbo_atomic_or:
1421 name = "llvm.amdgcn.buffer.atomic.or";
1422 break;
1423 case nir_intrinsic_ssbo_atomic_xor:
1424 name = "llvm.amdgcn.buffer.atomic.xor";
1425 break;
1426 case nir_intrinsic_ssbo_atomic_exchange:
1427 name = "llvm.amdgcn.buffer.atomic.swap";
1428 break;
1429 case nir_intrinsic_ssbo_atomic_comp_swap:
1430 name = "llvm.amdgcn.buffer.atomic.cmpswap";
1431 break;
1432 default:
1433 abort();
1434 }
1435
1436 return ac_build_intrinsic(&ctx->ac, name, ctx->ac.i32, params, arg_count, 0);
1437 }
1438
1439 static LLVMValueRef visit_load_buffer(struct ac_nir_context *ctx,
1440 const nir_intrinsic_instr *instr)
1441 {
1442 LLVMValueRef results[2];
1443 int load_components;
1444 int num_components = instr->num_components;
1445 if (instr->dest.ssa.bit_size == 64)
1446 num_components *= 2;
1447
1448 for (int i = 0; i < num_components; i += load_components) {
1449 load_components = MIN2(num_components - i, 4);
1450 const char *load_name;
1451 LLVMTypeRef data_type = ctx->ac.f32;
1452 LLVMValueRef offset = LLVMConstInt(ctx->ac.i32, i * 4, false);
1453 offset = LLVMBuildAdd(ctx->ac.builder, get_src(ctx, instr->src[1]), offset, "");
1454
1455 if (load_components == 3)
1456 data_type = LLVMVectorType(ctx->ac.f32, 4);
1457 else if (load_components > 1)
1458 data_type = LLVMVectorType(ctx->ac.f32, load_components);
1459
1460 if (load_components >= 3)
1461 load_name = "llvm.amdgcn.buffer.load.v4f32";
1462 else if (load_components == 2)
1463 load_name = "llvm.amdgcn.buffer.load.v2f32";
1464 else if (load_components == 1)
1465 load_name = "llvm.amdgcn.buffer.load.f32";
1466 else
1467 unreachable("unhandled number of components");
1468
1469 LLVMValueRef params[] = {
1470 ctx->abi->load_ssbo(ctx->abi,
1471 get_src(ctx, instr->src[0]),
1472 false),
1473 ctx->ac.i32_0,
1474 offset,
1475 ctx->ac.i1false,
1476 ctx->ac.i1false,
1477 };
1478
1479 results[i > 0 ? 1 : 0] = ac_build_intrinsic(&ctx->ac, load_name, data_type, params, 5, 0);
1480 }
1481
1482 assume(results[0]);
1483 LLVMValueRef ret = results[0];
1484 if (num_components > 4 || num_components == 3) {
1485 LLVMValueRef masks[] = {
1486 LLVMConstInt(ctx->ac.i32, 0, false), LLVMConstInt(ctx->ac.i32, 1, false),
1487 LLVMConstInt(ctx->ac.i32, 2, false), LLVMConstInt(ctx->ac.i32, 3, false),
1488 LLVMConstInt(ctx->ac.i32, 4, false), LLVMConstInt(ctx->ac.i32, 5, false),
1489 LLVMConstInt(ctx->ac.i32, 6, false), LLVMConstInt(ctx->ac.i32, 7, false)
1490 };
1491
1492 LLVMValueRef swizzle = LLVMConstVector(masks, num_components);
1493 ret = LLVMBuildShuffleVector(ctx->ac.builder, results[0],
1494 results[num_components > 4 ? 1 : 0], swizzle, "");
1495 }
1496
1497 return LLVMBuildBitCast(ctx->ac.builder, ret,
1498 get_def_type(ctx, &instr->dest.ssa), "");
1499 }
1500
1501 static LLVMValueRef visit_load_ubo_buffer(struct ac_nir_context *ctx,
1502 const nir_intrinsic_instr *instr)
1503 {
1504 LLVMValueRef ret;
1505 LLVMValueRef rsrc = get_src(ctx, instr->src[0]);
1506 LLVMValueRef offset = get_src(ctx, instr->src[1]);
1507 int num_components = instr->num_components;
1508
1509 if (ctx->abi->load_ubo)
1510 rsrc = ctx->abi->load_ubo(ctx->abi, rsrc);
1511
1512 if (instr->dest.ssa.bit_size == 64)
1513 num_components *= 2;
1514
1515 ret = ac_build_buffer_load(&ctx->ac, rsrc, num_components, NULL, offset,
1516 NULL, 0, false, false, true, true);
1517 ret = ac_trim_vector(&ctx->ac, ret, num_components);
1518 return LLVMBuildBitCast(ctx->ac.builder, ret,
1519 get_def_type(ctx, &instr->dest.ssa), "");
1520 }
1521
1522 static void
1523 get_deref_offset(struct ac_nir_context *ctx, nir_deref_var *deref,
1524 bool vs_in, unsigned *vertex_index_out,
1525 LLVMValueRef *vertex_index_ref,
1526 unsigned *const_out, LLVMValueRef *indir_out)
1527 {
1528 unsigned const_offset = 0;
1529 nir_deref *tail = &deref->deref;
1530 LLVMValueRef offset = NULL;
1531
1532 if (vertex_index_out != NULL || vertex_index_ref != NULL) {
1533 tail = tail->child;
1534 nir_deref_array *deref_array = nir_deref_as_array(tail);
1535 if (vertex_index_out)
1536 *vertex_index_out = deref_array->base_offset;
1537
1538 if (vertex_index_ref) {
1539 LLVMValueRef vtx = LLVMConstInt(ctx->ac.i32, deref_array->base_offset, false);
1540 if (deref_array->deref_array_type == nir_deref_array_type_indirect) {
1541 vtx = LLVMBuildAdd(ctx->ac.builder, vtx, get_src(ctx, deref_array->indirect), "");
1542 }
1543 *vertex_index_ref = vtx;
1544 }
1545 }
1546
1547 if (deref->var->data.compact) {
1548 assert(tail->child->deref_type == nir_deref_type_array);
1549 assert(glsl_type_is_scalar(glsl_without_array(deref->var->type)));
1550 nir_deref_array *deref_array = nir_deref_as_array(tail->child);
1551 /* We always lower indirect dereferences for "compact" array vars. */
1552 assert(deref_array->deref_array_type == nir_deref_array_type_direct);
1553
1554 const_offset = deref_array->base_offset;
1555 goto out;
1556 }
1557
1558 while (tail->child != NULL) {
1559 const struct glsl_type *parent_type = tail->type;
1560 tail = tail->child;
1561
1562 if (tail->deref_type == nir_deref_type_array) {
1563 nir_deref_array *deref_array = nir_deref_as_array(tail);
1564 LLVMValueRef index, stride, local_offset;
1565 unsigned size = glsl_count_attribute_slots(tail->type, vs_in);
1566
1567 const_offset += size * deref_array->base_offset;
1568 if (deref_array->deref_array_type == nir_deref_array_type_direct)
1569 continue;
1570
1571 assert(deref_array->deref_array_type == nir_deref_array_type_indirect);
1572 index = get_src(ctx, deref_array->indirect);
1573 stride = LLVMConstInt(ctx->ac.i32, size, 0);
1574 local_offset = LLVMBuildMul(ctx->ac.builder, stride, index, "");
1575
1576 if (offset)
1577 offset = LLVMBuildAdd(ctx->ac.builder, offset, local_offset, "");
1578 else
1579 offset = local_offset;
1580 } else if (tail->deref_type == nir_deref_type_struct) {
1581 nir_deref_struct *deref_struct = nir_deref_as_struct(tail);
1582
1583 for (unsigned i = 0; i < deref_struct->index; i++) {
1584 const struct glsl_type *ft = glsl_get_struct_field(parent_type, i);
1585 const_offset += glsl_count_attribute_slots(ft, vs_in);
1586 }
1587 } else
1588 unreachable("unsupported deref type");
1589
1590 }
1591 out:
1592 if (const_offset && offset)
1593 offset = LLVMBuildAdd(ctx->ac.builder, offset,
1594 LLVMConstInt(ctx->ac.i32, const_offset, 0),
1595 "");
1596
1597 *const_out = const_offset;
1598 *indir_out = offset;
1599 }
1600
1601 static LLVMValueRef
1602 build_gep_for_deref(struct ac_nir_context *ctx,
1603 nir_deref_var *deref)
1604 {
1605 struct hash_entry *entry = _mesa_hash_table_search(ctx->vars, deref->var);
1606 assert(entry->data);
1607 LLVMValueRef val = entry->data;
1608 nir_deref *tail = deref->deref.child;
1609 while (tail != NULL) {
1610 LLVMValueRef offset;
1611 switch (tail->deref_type) {
1612 case nir_deref_type_array: {
1613 nir_deref_array *array = nir_deref_as_array(tail);
1614 offset = LLVMConstInt(ctx->ac.i32, array->base_offset, 0);
1615 if (array->deref_array_type ==
1616 nir_deref_array_type_indirect) {
1617 offset = LLVMBuildAdd(ctx->ac.builder, offset,
1618 get_src(ctx,
1619 array->indirect),
1620 "");
1621 }
1622 break;
1623 }
1624 case nir_deref_type_struct: {
1625 nir_deref_struct *deref_struct =
1626 nir_deref_as_struct(tail);
1627 offset = LLVMConstInt(ctx->ac.i32,
1628 deref_struct->index, 0);
1629 break;
1630 }
1631 default:
1632 unreachable("bad deref type");
1633 }
1634 val = ac_build_gep0(&ctx->ac, val, offset);
1635 tail = tail->child;
1636 }
1637 return val;
1638 }
1639
1640 static LLVMValueRef load_tess_varyings(struct ac_nir_context *ctx,
1641 nir_intrinsic_instr *instr,
1642 bool load_inputs)
1643 {
1644 LLVMValueRef result;
1645 LLVMValueRef vertex_index = NULL;
1646 LLVMValueRef indir_index = NULL;
1647 unsigned const_index = 0;
1648 unsigned location = instr->variables[0]->var->data.location;
1649 unsigned driver_location = instr->variables[0]->var->data.driver_location;
1650 const bool is_patch = instr->variables[0]->var->data.patch;
1651 const bool is_compact = instr->variables[0]->var->data.compact;
1652
1653 get_deref_offset(ctx, instr->variables[0],
1654 false, NULL, is_patch ? NULL : &vertex_index,
1655 &const_index, &indir_index);
1656
1657 LLVMTypeRef dest_type = get_def_type(ctx, &instr->dest.ssa);
1658
1659 LLVMTypeRef src_component_type;
1660 if (LLVMGetTypeKind(dest_type) == LLVMVectorTypeKind)
1661 src_component_type = LLVMGetElementType(dest_type);
1662 else
1663 src_component_type = dest_type;
1664
1665 result = ctx->abi->load_tess_varyings(ctx->abi, src_component_type,
1666 vertex_index, indir_index,
1667 const_index, location, driver_location,
1668 instr->variables[0]->var->data.location_frac,
1669 instr->num_components,
1670 is_patch, is_compact, load_inputs);
1671 return LLVMBuildBitCast(ctx->ac.builder, result, dest_type, "");
1672 }
1673
1674 static LLVMValueRef visit_load_var(struct ac_nir_context *ctx,
1675 nir_intrinsic_instr *instr)
1676 {
1677 LLVMValueRef values[8];
1678 int idx = instr->variables[0]->var->data.driver_location;
1679 int ve = instr->dest.ssa.num_components;
1680 unsigned comp = instr->variables[0]->var->data.location_frac;
1681 LLVMValueRef indir_index;
1682 LLVMValueRef ret;
1683 unsigned const_index;
1684 unsigned stride = instr->variables[0]->var->data.compact ? 1 : 4;
1685 bool vs_in = ctx->stage == MESA_SHADER_VERTEX &&
1686 instr->variables[0]->var->data.mode == nir_var_shader_in;
1687 get_deref_offset(ctx, instr->variables[0], vs_in, NULL, NULL,
1688 &const_index, &indir_index);
1689
1690 if (instr->dest.ssa.bit_size == 64)
1691 ve *= 2;
1692
1693 switch (instr->variables[0]->var->data.mode) {
1694 case nir_var_shader_in:
1695 if (ctx->stage == MESA_SHADER_TESS_CTRL ||
1696 ctx->stage == MESA_SHADER_TESS_EVAL) {
1697 return load_tess_varyings(ctx, instr, true);
1698 }
1699
1700 if (ctx->stage == MESA_SHADER_GEOMETRY) {
1701 LLVMTypeRef type = LLVMIntTypeInContext(ctx->ac.context, instr->dest.ssa.bit_size);
1702 LLVMValueRef indir_index;
1703 unsigned const_index, vertex_index;
1704 get_deref_offset(ctx, instr->variables[0],
1705 false, &vertex_index, NULL,
1706 &const_index, &indir_index);
1707
1708 return ctx->abi->load_inputs(ctx->abi, instr->variables[0]->var->data.location,
1709 instr->variables[0]->var->data.driver_location,
1710 instr->variables[0]->var->data.location_frac,
1711 instr->num_components, vertex_index, const_index, type);
1712 }
1713
1714 for (unsigned chan = comp; chan < ve + comp; chan++) {
1715 if (indir_index) {
1716 unsigned count = glsl_count_attribute_slots(
1717 instr->variables[0]->var->type,
1718 ctx->stage == MESA_SHADER_VERTEX);
1719 count -= chan / 4;
1720 LLVMValueRef tmp_vec = ac_build_gather_values_extended(
1721 &ctx->ac, ctx->abi->inputs + idx + chan, count,
1722 stride, false, true);
1723
1724 values[chan] = LLVMBuildExtractElement(ctx->ac.builder,
1725 tmp_vec,
1726 indir_index, "");
1727 } else
1728 values[chan] = ctx->abi->inputs[idx + chan + const_index * stride];
1729 }
1730 break;
1731 case nir_var_local:
1732 for (unsigned chan = 0; chan < ve; chan++) {
1733 if (indir_index) {
1734 unsigned count = glsl_count_attribute_slots(
1735 instr->variables[0]->var->type, false);
1736 count -= chan / 4;
1737 LLVMValueRef tmp_vec = ac_build_gather_values_extended(
1738 &ctx->ac, ctx->locals + idx + chan, count,
1739 stride, true, true);
1740
1741 values[chan] = LLVMBuildExtractElement(ctx->ac.builder,
1742 tmp_vec,
1743 indir_index, "");
1744 } else {
1745 values[chan] = LLVMBuildLoad(ctx->ac.builder, ctx->locals[idx + chan + const_index * stride], "");
1746 }
1747 }
1748 break;
1749 case nir_var_shared: {
1750 LLVMValueRef address = build_gep_for_deref(ctx,
1751 instr->variables[0]);
1752 LLVMValueRef val = LLVMBuildLoad(ctx->ac.builder, address, "");
1753 return LLVMBuildBitCast(ctx->ac.builder, val,
1754 get_def_type(ctx, &instr->dest.ssa),
1755 "");
1756 }
1757 case nir_var_shader_out:
1758 if (ctx->stage == MESA_SHADER_TESS_CTRL) {
1759 return load_tess_varyings(ctx, instr, false);
1760 }
1761
1762 for (unsigned chan = comp; chan < ve + comp; chan++) {
1763 if (indir_index) {
1764 unsigned count = glsl_count_attribute_slots(
1765 instr->variables[0]->var->type, false);
1766 count -= chan / 4;
1767 LLVMValueRef tmp_vec = ac_build_gather_values_extended(
1768 &ctx->ac, ctx->abi->outputs + idx + chan, count,
1769 stride, true, true);
1770
1771 values[chan] = LLVMBuildExtractElement(ctx->ac.builder,
1772 tmp_vec,
1773 indir_index, "");
1774 } else {
1775 values[chan] = LLVMBuildLoad(ctx->ac.builder,
1776 ctx->abi->outputs[idx + chan + const_index * stride],
1777 "");
1778 }
1779 }
1780 break;
1781 default:
1782 unreachable("unhandle variable mode");
1783 }
1784 ret = ac_build_varying_gather_values(&ctx->ac, values, ve, comp);
1785 return LLVMBuildBitCast(ctx->ac.builder, ret, get_def_type(ctx, &instr->dest.ssa), "");
1786 }
1787
1788 static void
1789 visit_store_var(struct ac_nir_context *ctx,
1790 nir_intrinsic_instr *instr)
1791 {
1792 LLVMValueRef temp_ptr, value;
1793 int idx = instr->variables[0]->var->data.driver_location;
1794 unsigned comp = instr->variables[0]->var->data.location_frac;
1795 LLVMValueRef src = ac_to_float(&ctx->ac, get_src(ctx, instr->src[0]));
1796 int writemask = instr->const_index[0];
1797 LLVMValueRef indir_index;
1798 unsigned const_index;
1799 get_deref_offset(ctx, instr->variables[0], false,
1800 NULL, NULL, &const_index, &indir_index);
1801
1802 if (ac_get_elem_bits(&ctx->ac, LLVMTypeOf(src)) == 64) {
1803
1804 src = LLVMBuildBitCast(ctx->ac.builder, src,
1805 LLVMVectorType(ctx->ac.f32, ac_get_llvm_num_components(src) * 2),
1806 "");
1807
1808 writemask = widen_mask(writemask, 2);
1809 }
1810
1811 writemask = writemask << comp;
1812
1813 switch (instr->variables[0]->var->data.mode) {
1814 case nir_var_shader_out:
1815
1816 if (ctx->stage == MESA_SHADER_TESS_CTRL) {
1817 LLVMValueRef vertex_index = NULL;
1818 LLVMValueRef indir_index = NULL;
1819 unsigned const_index = 0;
1820 const bool is_patch = instr->variables[0]->var->data.patch;
1821
1822 get_deref_offset(ctx, instr->variables[0],
1823 false, NULL, is_patch ? NULL : &vertex_index,
1824 &const_index, &indir_index);
1825
1826 ctx->abi->store_tcs_outputs(ctx->abi, instr->variables[0]->var,
1827 vertex_index, indir_index,
1828 const_index, src, writemask);
1829 return;
1830 }
1831
1832 for (unsigned chan = 0; chan < 8; chan++) {
1833 int stride = 4;
1834 if (!(writemask & (1 << chan)))
1835 continue;
1836
1837 value = ac_llvm_extract_elem(&ctx->ac, src, chan - comp);
1838
1839 if (instr->variables[0]->var->data.compact)
1840 stride = 1;
1841 if (indir_index) {
1842 unsigned count = glsl_count_attribute_slots(
1843 instr->variables[0]->var->type, false);
1844 count -= chan / 4;
1845 LLVMValueRef tmp_vec = ac_build_gather_values_extended(
1846 &ctx->ac, ctx->abi->outputs + idx + chan, count,
1847 stride, true, true);
1848
1849 tmp_vec = LLVMBuildInsertElement(ctx->ac.builder, tmp_vec,
1850 value, indir_index, "");
1851 build_store_values_extended(&ctx->ac, ctx->abi->outputs + idx + chan,
1852 count, stride, tmp_vec);
1853
1854 } else {
1855 temp_ptr = ctx->abi->outputs[idx + chan + const_index * stride];
1856
1857 LLVMBuildStore(ctx->ac.builder, value, temp_ptr);
1858 }
1859 }
1860 break;
1861 case nir_var_local:
1862 for (unsigned chan = 0; chan < 8; chan++) {
1863 if (!(writemask & (1 << chan)))
1864 continue;
1865
1866 value = ac_llvm_extract_elem(&ctx->ac, src, chan);
1867 if (indir_index) {
1868 unsigned count = glsl_count_attribute_slots(
1869 instr->variables[0]->var->type, false);
1870 count -= chan / 4;
1871 LLVMValueRef tmp_vec = ac_build_gather_values_extended(
1872 &ctx->ac, ctx->locals + idx + chan, count,
1873 4, true, true);
1874
1875 tmp_vec = LLVMBuildInsertElement(ctx->ac.builder, tmp_vec,
1876 value, indir_index, "");
1877 build_store_values_extended(&ctx->ac, ctx->locals + idx + chan,
1878 count, 4, tmp_vec);
1879 } else {
1880 temp_ptr = ctx->locals[idx + chan + const_index * 4];
1881
1882 LLVMBuildStore(ctx->ac.builder, value, temp_ptr);
1883 }
1884 }
1885 break;
1886 case nir_var_shared: {
1887 int writemask = instr->const_index[0];
1888 LLVMValueRef address = build_gep_for_deref(ctx,
1889 instr->variables[0]);
1890 LLVMValueRef val = get_src(ctx, instr->src[0]);
1891 unsigned components =
1892 glsl_get_vector_elements(
1893 nir_deref_tail(&instr->variables[0]->deref)->type);
1894 if (writemask == (1 << components) - 1) {
1895 val = LLVMBuildBitCast(
1896 ctx->ac.builder, val,
1897 LLVMGetElementType(LLVMTypeOf(address)), "");
1898 LLVMBuildStore(ctx->ac.builder, val, address);
1899 } else {
1900 for (unsigned chan = 0; chan < 4; chan++) {
1901 if (!(writemask & (1 << chan)))
1902 continue;
1903 LLVMValueRef ptr =
1904 LLVMBuildStructGEP(ctx->ac.builder,
1905 address, chan, "");
1906 LLVMValueRef src = ac_llvm_extract_elem(&ctx->ac, val,
1907 chan);
1908 src = LLVMBuildBitCast(
1909 ctx->ac.builder, src,
1910 LLVMGetElementType(LLVMTypeOf(ptr)), "");
1911 LLVMBuildStore(ctx->ac.builder, src, ptr);
1912 }
1913 }
1914 break;
1915 }
1916 default:
1917 break;
1918 }
1919 }
1920
1921 static int image_type_to_components_count(enum glsl_sampler_dim dim, bool array)
1922 {
1923 switch (dim) {
1924 case GLSL_SAMPLER_DIM_BUF:
1925 return 1;
1926 case GLSL_SAMPLER_DIM_1D:
1927 return array ? 2 : 1;
1928 case GLSL_SAMPLER_DIM_2D:
1929 return array ? 3 : 2;
1930 case GLSL_SAMPLER_DIM_MS:
1931 return array ? 4 : 3;
1932 case GLSL_SAMPLER_DIM_3D:
1933 case GLSL_SAMPLER_DIM_CUBE:
1934 return 3;
1935 case GLSL_SAMPLER_DIM_RECT:
1936 case GLSL_SAMPLER_DIM_SUBPASS:
1937 return 2;
1938 case GLSL_SAMPLER_DIM_SUBPASS_MS:
1939 return 3;
1940 default:
1941 break;
1942 }
1943 return 0;
1944 }
1945
1946 static bool
1947 glsl_is_array_image(const struct glsl_type *type)
1948 {
1949 const enum glsl_sampler_dim dim = glsl_get_sampler_dim(type);
1950
1951 if (glsl_sampler_type_is_array(type))
1952 return true;
1953
1954 return dim == GLSL_SAMPLER_DIM_CUBE ||
1955 dim == GLSL_SAMPLER_DIM_3D ||
1956 dim == GLSL_SAMPLER_DIM_SUBPASS ||
1957 dim == GLSL_SAMPLER_DIM_SUBPASS_MS;
1958 }
1959
1960
1961 /* Adjust the sample index according to FMASK.
1962 *
1963 * For uncompressed MSAA surfaces, FMASK should return 0x76543210,
1964 * which is the identity mapping. Each nibble says which physical sample
1965 * should be fetched to get that sample.
1966 *
1967 * For example, 0x11111100 means there are only 2 samples stored and
1968 * the second sample covers 3/4 of the pixel. When reading samples 0
1969 * and 1, return physical sample 0 (determined by the first two 0s
1970 * in FMASK), otherwise return physical sample 1.
1971 *
1972 * The sample index should be adjusted as follows:
1973 * sample_index = (fmask >> (sample_index * 4)) & 0xF;
1974 */
1975 static LLVMValueRef adjust_sample_index_using_fmask(struct ac_llvm_context *ctx,
1976 LLVMValueRef coord_x, LLVMValueRef coord_y,
1977 LLVMValueRef coord_z,
1978 LLVMValueRef sample_index,
1979 LLVMValueRef fmask_desc_ptr)
1980 {
1981 LLVMValueRef fmask_load_address[4];
1982 LLVMValueRef res;
1983
1984 fmask_load_address[0] = coord_x;
1985 fmask_load_address[1] = coord_y;
1986 if (coord_z) {
1987 fmask_load_address[2] = coord_z;
1988 fmask_load_address[3] = LLVMGetUndef(ctx->i32);
1989 }
1990
1991 struct ac_image_args args = {0};
1992
1993 args.opcode = ac_image_load;
1994 args.da = coord_z ? true : false;
1995 args.resource = fmask_desc_ptr;
1996 args.dmask = 0xf;
1997 args.addr = ac_build_gather_values(ctx, fmask_load_address, coord_z ? 4 : 2);
1998
1999 res = ac_build_image_opcode(ctx, &args);
2000
2001 res = ac_to_integer(ctx, res);
2002 LLVMValueRef four = LLVMConstInt(ctx->i32, 4, false);
2003 LLVMValueRef F = LLVMConstInt(ctx->i32, 0xf, false);
2004
2005 LLVMValueRef fmask = LLVMBuildExtractElement(ctx->builder,
2006 res,
2007 ctx->i32_0, "");
2008
2009 LLVMValueRef sample_index4 =
2010 LLVMBuildMul(ctx->builder, sample_index, four, "");
2011 LLVMValueRef shifted_fmask =
2012 LLVMBuildLShr(ctx->builder, fmask, sample_index4, "");
2013 LLVMValueRef final_sample =
2014 LLVMBuildAnd(ctx->builder, shifted_fmask, F, "");
2015
2016 /* Don't rewrite the sample index if WORD1.DATA_FORMAT of the FMASK
2017 * resource descriptor is 0 (invalid),
2018 */
2019 LLVMValueRef fmask_desc =
2020 LLVMBuildBitCast(ctx->builder, fmask_desc_ptr,
2021 ctx->v8i32, "");
2022
2023 LLVMValueRef fmask_word1 =
2024 LLVMBuildExtractElement(ctx->builder, fmask_desc,
2025 ctx->i32_1, "");
2026
2027 LLVMValueRef word1_is_nonzero =
2028 LLVMBuildICmp(ctx->builder, LLVMIntNE,
2029 fmask_word1, ctx->i32_0, "");
2030
2031 /* Replace the MSAA sample index. */
2032 sample_index =
2033 LLVMBuildSelect(ctx->builder, word1_is_nonzero,
2034 final_sample, sample_index, "");
2035 return sample_index;
2036 }
2037
2038 static LLVMValueRef get_image_coords(struct ac_nir_context *ctx,
2039 const nir_intrinsic_instr *instr)
2040 {
2041 const struct glsl_type *type = glsl_without_array(instr->variables[0]->var->type);
2042
2043 LLVMValueRef src0 = get_src(ctx, instr->src[0]);
2044 LLVMValueRef coords[4];
2045 LLVMValueRef masks[] = {
2046 LLVMConstInt(ctx->ac.i32, 0, false), LLVMConstInt(ctx->ac.i32, 1, false),
2047 LLVMConstInt(ctx->ac.i32, 2, false), LLVMConstInt(ctx->ac.i32, 3, false),
2048 };
2049 LLVMValueRef res;
2050 LLVMValueRef sample_index = ac_llvm_extract_elem(&ctx->ac, get_src(ctx, instr->src[1]), 0);
2051
2052 int count;
2053 enum glsl_sampler_dim dim = glsl_get_sampler_dim(type);
2054 bool is_array = glsl_sampler_type_is_array(type);
2055 bool add_frag_pos = (dim == GLSL_SAMPLER_DIM_SUBPASS ||
2056 dim == GLSL_SAMPLER_DIM_SUBPASS_MS);
2057 bool is_ms = (dim == GLSL_SAMPLER_DIM_MS ||
2058 dim == GLSL_SAMPLER_DIM_SUBPASS_MS);
2059 bool gfx9_1d = ctx->ac.chip_class >= GFX9 && dim == GLSL_SAMPLER_DIM_1D;
2060 count = image_type_to_components_count(dim, is_array);
2061
2062 if (is_ms) {
2063 LLVMValueRef fmask_load_address[3];
2064 int chan;
2065
2066 fmask_load_address[0] = LLVMBuildExtractElement(ctx->ac.builder, src0, masks[0], "");
2067 fmask_load_address[1] = LLVMBuildExtractElement(ctx->ac.builder, src0, masks[1], "");
2068 if (is_array)
2069 fmask_load_address[2] = LLVMBuildExtractElement(ctx->ac.builder, src0, masks[2], "");
2070 else
2071 fmask_load_address[2] = NULL;
2072 if (add_frag_pos) {
2073 for (chan = 0; chan < 2; ++chan)
2074 fmask_load_address[chan] =
2075 LLVMBuildAdd(ctx->ac.builder, fmask_load_address[chan],
2076 LLVMBuildFPToUI(ctx->ac.builder, ctx->abi->frag_pos[chan],
2077 ctx->ac.i32, ""), "");
2078 fmask_load_address[2] = ac_to_integer(&ctx->ac, ctx->abi->inputs[ac_llvm_reg_index_soa(VARYING_SLOT_LAYER, 0)]);
2079 }
2080 sample_index = adjust_sample_index_using_fmask(&ctx->ac,
2081 fmask_load_address[0],
2082 fmask_load_address[1],
2083 fmask_load_address[2],
2084 sample_index,
2085 get_sampler_desc(ctx, instr->variables[0], AC_DESC_FMASK, NULL, true, false));
2086 }
2087 if (count == 1 && !gfx9_1d) {
2088 if (instr->src[0].ssa->num_components)
2089 res = LLVMBuildExtractElement(ctx->ac.builder, src0, masks[0], "");
2090 else
2091 res = src0;
2092 } else {
2093 int chan;
2094 if (is_ms)
2095 count--;
2096 for (chan = 0; chan < count; ++chan) {
2097 coords[chan] = ac_llvm_extract_elem(&ctx->ac, src0, chan);
2098 }
2099 if (add_frag_pos) {
2100 for (chan = 0; chan < 2; ++chan)
2101 coords[chan] = LLVMBuildAdd(ctx->ac.builder, coords[chan], LLVMBuildFPToUI(ctx->ac.builder, ctx->abi->frag_pos[chan],
2102 ctx->ac.i32, ""), "");
2103 coords[2] = ac_to_integer(&ctx->ac, ctx->abi->inputs[ac_llvm_reg_index_soa(VARYING_SLOT_LAYER, 0)]);
2104 count++;
2105 }
2106
2107 if (gfx9_1d) {
2108 if (is_array) {
2109 coords[2] = coords[1];
2110 coords[1] = ctx->ac.i32_0;
2111 } else
2112 coords[1] = ctx->ac.i32_0;
2113 count++;
2114 }
2115
2116 if (is_ms) {
2117 coords[count] = sample_index;
2118 count++;
2119 }
2120
2121 if (count == 3) {
2122 coords[3] = LLVMGetUndef(ctx->ac.i32);
2123 count = 4;
2124 }
2125 res = ac_build_gather_values(&ctx->ac, coords, count);
2126 }
2127 return res;
2128 }
2129
2130 static LLVMValueRef visit_image_load(struct ac_nir_context *ctx,
2131 const nir_intrinsic_instr *instr)
2132 {
2133 LLVMValueRef params[7];
2134 LLVMValueRef res;
2135 char intrinsic_name[64];
2136 const nir_variable *var = instr->variables[0]->var;
2137 const struct glsl_type *type = var->type;
2138
2139 if(instr->variables[0]->deref.child)
2140 type = instr->variables[0]->deref.child->type;
2141
2142 type = glsl_without_array(type);
2143
2144 const enum glsl_sampler_dim dim = glsl_get_sampler_dim(type);
2145 if (dim == GLSL_SAMPLER_DIM_BUF) {
2146 unsigned mask = nir_ssa_def_components_read(&instr->dest.ssa);
2147 unsigned num_channels = util_last_bit(mask);
2148 LLVMValueRef rsrc, vindex;
2149
2150 rsrc = get_sampler_desc(ctx, instr->variables[0], AC_DESC_BUFFER, NULL, true, false);
2151 vindex = LLVMBuildExtractElement(ctx->ac.builder, get_src(ctx, instr->src[0]),
2152 ctx->ac.i32_0, "");
2153
2154 /* TODO: set "glc" and "can_speculate" when OpenGL needs it. */
2155 res = ac_build_buffer_load_format(&ctx->ac, rsrc, vindex,
2156 ctx->ac.i32_0, num_channels,
2157 false, false);
2158 res = ac_build_expand_to_vec4(&ctx->ac, res, num_channels);
2159
2160 res = ac_trim_vector(&ctx->ac, res, instr->dest.ssa.num_components);
2161 res = ac_to_integer(&ctx->ac, res);
2162 } else {
2163 LLVMValueRef da = glsl_is_array_image(type) ? ctx->ac.i1true : ctx->ac.i1false;
2164 LLVMValueRef slc = ctx->ac.i1false;
2165
2166 params[0] = get_image_coords(ctx, instr);
2167 params[1] = get_sampler_desc(ctx, instr->variables[0], AC_DESC_IMAGE, NULL, true, false);
2168 params[2] = LLVMConstInt(ctx->ac.i32, 15, false); /* dmask */
2169 params[3] = (var->data.image._volatile || var->data.image.coherent) ?
2170 ctx->ac.i1true : ctx->ac.i1false;
2171 params[4] = slc;
2172 params[5] = ctx->ac.i1false;
2173 params[6] = da;
2174
2175 ac_get_image_intr_name("llvm.amdgcn.image.load",
2176 ctx->ac.v4f32, /* vdata */
2177 LLVMTypeOf(params[0]), /* coords */
2178 LLVMTypeOf(params[1]), /* rsrc */
2179 intrinsic_name, sizeof(intrinsic_name));
2180
2181 res = ac_build_intrinsic(&ctx->ac, intrinsic_name, ctx->ac.v4f32,
2182 params, 7, AC_FUNC_ATTR_READONLY);
2183 }
2184 return ac_to_integer(&ctx->ac, res);
2185 }
2186
2187 static void visit_image_store(struct ac_nir_context *ctx,
2188 nir_intrinsic_instr *instr)
2189 {
2190 LLVMValueRef params[8];
2191 char intrinsic_name[64];
2192 const nir_variable *var = instr->variables[0]->var;
2193 const struct glsl_type *type = glsl_without_array(var->type);
2194 const enum glsl_sampler_dim dim = glsl_get_sampler_dim(type);
2195 LLVMValueRef glc = ctx->ac.i1false;
2196 bool force_glc = ctx->ac.chip_class == SI;
2197 if (force_glc)
2198 glc = ctx->ac.i1true;
2199
2200 if (dim == GLSL_SAMPLER_DIM_BUF) {
2201 params[0] = ac_to_float(&ctx->ac, get_src(ctx, instr->src[2])); /* data */
2202 params[1] = get_sampler_desc(ctx, instr->variables[0], AC_DESC_BUFFER, NULL, true, true);
2203 params[2] = LLVMBuildExtractElement(ctx->ac.builder, get_src(ctx, instr->src[0]),
2204 ctx->ac.i32_0, ""); /* vindex */
2205 params[3] = ctx->ac.i32_0; /* voffset */
2206 params[4] = glc; /* glc */
2207 params[5] = ctx->ac.i1false; /* slc */
2208 ac_build_intrinsic(&ctx->ac, "llvm.amdgcn.buffer.store.format.v4f32", ctx->ac.voidt,
2209 params, 6, 0);
2210 } else {
2211 LLVMValueRef da = glsl_is_array_image(type) ? ctx->ac.i1true : ctx->ac.i1false;
2212 LLVMValueRef slc = ctx->ac.i1false;
2213
2214 params[0] = ac_to_float(&ctx->ac, get_src(ctx, instr->src[2]));
2215 params[1] = get_image_coords(ctx, instr); /* coords */
2216 params[2] = get_sampler_desc(ctx, instr->variables[0], AC_DESC_IMAGE, NULL, true, true);
2217 params[3] = LLVMConstInt(ctx->ac.i32, 15, false); /* dmask */
2218 params[4] = (force_glc || var->data.image._volatile || var->data.image.coherent) ?
2219 ctx->ac.i1true : ctx->ac.i1false;
2220 params[5] = slc;
2221 params[6] = ctx->ac.i1false;
2222 params[7] = da;
2223
2224 ac_get_image_intr_name("llvm.amdgcn.image.store",
2225 LLVMTypeOf(params[0]), /* vdata */
2226 LLVMTypeOf(params[1]), /* coords */
2227 LLVMTypeOf(params[2]), /* rsrc */
2228 intrinsic_name, sizeof(intrinsic_name));
2229
2230 ac_build_intrinsic(&ctx->ac, intrinsic_name, ctx->ac.voidt,
2231 params, 8, 0);
2232 }
2233
2234 }
2235
2236 static LLVMValueRef visit_image_atomic(struct ac_nir_context *ctx,
2237 const nir_intrinsic_instr *instr)
2238 {
2239 LLVMValueRef params[7];
2240 int param_count = 0;
2241 const nir_variable *var = instr->variables[0]->var;
2242
2243 const char *atomic_name;
2244 char intrinsic_name[41];
2245 const struct glsl_type *type = glsl_without_array(var->type);
2246 MAYBE_UNUSED int length;
2247
2248 bool is_unsigned = glsl_get_sampler_result_type(type) == GLSL_TYPE_UINT;
2249
2250 switch (instr->intrinsic) {
2251 case nir_intrinsic_image_var_atomic_add:
2252 atomic_name = "add";
2253 break;
2254 case nir_intrinsic_image_var_atomic_min:
2255 atomic_name = is_unsigned ? "umin" : "smin";
2256 break;
2257 case nir_intrinsic_image_var_atomic_max:
2258 atomic_name = is_unsigned ? "umax" : "smax";
2259 break;
2260 case nir_intrinsic_image_var_atomic_and:
2261 atomic_name = "and";
2262 break;
2263 case nir_intrinsic_image_var_atomic_or:
2264 atomic_name = "or";
2265 break;
2266 case nir_intrinsic_image_var_atomic_xor:
2267 atomic_name = "xor";
2268 break;
2269 case nir_intrinsic_image_var_atomic_exchange:
2270 atomic_name = "swap";
2271 break;
2272 case nir_intrinsic_image_var_atomic_comp_swap:
2273 atomic_name = "cmpswap";
2274 break;
2275 default:
2276 abort();
2277 }
2278
2279 if (instr->intrinsic == nir_intrinsic_image_var_atomic_comp_swap)
2280 params[param_count++] = get_src(ctx, instr->src[3]);
2281 params[param_count++] = get_src(ctx, instr->src[2]);
2282
2283 if (glsl_get_sampler_dim(type) == GLSL_SAMPLER_DIM_BUF) {
2284 params[param_count++] = get_sampler_desc(ctx, instr->variables[0], AC_DESC_BUFFER,
2285 NULL, true, true);
2286 params[param_count++] = LLVMBuildExtractElement(ctx->ac.builder, get_src(ctx, instr->src[0]),
2287 ctx->ac.i32_0, ""); /* vindex */
2288 params[param_count++] = ctx->ac.i32_0; /* voffset */
2289 params[param_count++] = ctx->ac.i1false; /* slc */
2290
2291 length = snprintf(intrinsic_name, sizeof(intrinsic_name),
2292 "llvm.amdgcn.buffer.atomic.%s", atomic_name);
2293 } else {
2294 char coords_type[8];
2295
2296 LLVMValueRef coords = params[param_count++] = get_image_coords(ctx, instr);
2297 params[param_count++] = get_sampler_desc(ctx, instr->variables[0], AC_DESC_IMAGE,
2298 NULL, true, true);
2299 params[param_count++] = ctx->ac.i1false; /* r128 */
2300 params[param_count++] = glsl_is_array_image(type) ? ctx->ac.i1true : ctx->ac.i1false; /* da */
2301 params[param_count++] = ctx->ac.i1false; /* slc */
2302
2303 build_int_type_name(LLVMTypeOf(coords),
2304 coords_type, sizeof(coords_type));
2305
2306 length = snprintf(intrinsic_name, sizeof(intrinsic_name),
2307 "llvm.amdgcn.image.atomic.%s.%s", atomic_name, coords_type);
2308 }
2309
2310 assert(length < sizeof(intrinsic_name));
2311 return ac_build_intrinsic(&ctx->ac, intrinsic_name, ctx->ac.i32, params, param_count, 0);
2312 }
2313
2314 static LLVMValueRef visit_image_samples(struct ac_nir_context *ctx,
2315 const nir_intrinsic_instr *instr)
2316 {
2317 const nir_variable *var = instr->variables[0]->var;
2318 const struct glsl_type *type = glsl_without_array(var->type);
2319
2320 struct ac_image_args args = { 0 };
2321 args.da = glsl_is_array_image(type);
2322 args.dmask = 0xf;
2323 args.resource = get_sampler_desc(ctx, instr->variables[0],
2324 AC_DESC_IMAGE, NULL, true, false);
2325 args.opcode = ac_image_get_resinfo;
2326 args.addr = ctx->ac.i32_0;
2327
2328 return ac_build_image_opcode(&ctx->ac, &args);
2329 }
2330
2331 static LLVMValueRef visit_image_size(struct ac_nir_context *ctx,
2332 const nir_intrinsic_instr *instr)
2333 {
2334 LLVMValueRef res;
2335 const nir_variable *var = instr->variables[0]->var;
2336 const struct glsl_type *type = glsl_without_array(var->type);
2337
2338 if (glsl_get_sampler_dim(type) == GLSL_SAMPLER_DIM_BUF)
2339 return get_buffer_size(ctx,
2340 get_sampler_desc(ctx, instr->variables[0],
2341 AC_DESC_BUFFER, NULL, true, false), true);
2342
2343 struct ac_image_args args = { 0 };
2344
2345 args.da = glsl_is_array_image(type);
2346 args.dmask = 0xf;
2347 args.resource = get_sampler_desc(ctx, instr->variables[0], AC_DESC_IMAGE, NULL, true, false);
2348 args.opcode = ac_image_get_resinfo;
2349 args.addr = ctx->ac.i32_0;
2350
2351 res = ac_build_image_opcode(&ctx->ac, &args);
2352
2353 LLVMValueRef two = LLVMConstInt(ctx->ac.i32, 2, false);
2354
2355 if (glsl_get_sampler_dim(type) == GLSL_SAMPLER_DIM_CUBE &&
2356 glsl_sampler_type_is_array(type)) {
2357 LLVMValueRef six = LLVMConstInt(ctx->ac.i32, 6, false);
2358 LLVMValueRef z = LLVMBuildExtractElement(ctx->ac.builder, res, two, "");
2359 z = LLVMBuildSDiv(ctx->ac.builder, z, six, "");
2360 res = LLVMBuildInsertElement(ctx->ac.builder, res, z, two, "");
2361 }
2362 if (ctx->ac.chip_class >= GFX9 &&
2363 glsl_get_sampler_dim(type) == GLSL_SAMPLER_DIM_1D &&
2364 glsl_sampler_type_is_array(type)) {
2365 LLVMValueRef layers = LLVMBuildExtractElement(ctx->ac.builder, res, two, "");
2366 res = LLVMBuildInsertElement(ctx->ac.builder, res, layers,
2367 ctx->ac.i32_1, "");
2368
2369 }
2370 return res;
2371 }
2372
2373 #define NOOP_WAITCNT 0xf7f
2374 #define LGKM_CNT 0x07f
2375 #define VM_CNT 0xf70
2376
2377 static void emit_membar(struct ac_llvm_context *ac,
2378 const nir_intrinsic_instr *instr)
2379 {
2380 unsigned waitcnt = NOOP_WAITCNT;
2381
2382 switch (instr->intrinsic) {
2383 case nir_intrinsic_memory_barrier:
2384 case nir_intrinsic_group_memory_barrier:
2385 waitcnt &= VM_CNT & LGKM_CNT;
2386 break;
2387 case nir_intrinsic_memory_barrier_atomic_counter:
2388 case nir_intrinsic_memory_barrier_buffer:
2389 case nir_intrinsic_memory_barrier_image:
2390 waitcnt &= VM_CNT;
2391 break;
2392 case nir_intrinsic_memory_barrier_shared:
2393 waitcnt &= LGKM_CNT;
2394 break;
2395 default:
2396 break;
2397 }
2398 if (waitcnt != NOOP_WAITCNT)
2399 ac_build_waitcnt(ac, waitcnt);
2400 }
2401
2402 void ac_emit_barrier(struct ac_llvm_context *ac, gl_shader_stage stage)
2403 {
2404 /* SI only (thanks to a hw bug workaround):
2405 * The real barrier instruction isn’t needed, because an entire patch
2406 * always fits into a single wave.
2407 */
2408 if (ac->chip_class == SI && stage == MESA_SHADER_TESS_CTRL) {
2409 ac_build_waitcnt(ac, LGKM_CNT & VM_CNT);
2410 return;
2411 }
2412 ac_build_intrinsic(ac, "llvm.amdgcn.s.barrier",
2413 ac->voidt, NULL, 0, AC_FUNC_ATTR_CONVERGENT);
2414 }
2415
2416 static void emit_discard(struct ac_nir_context *ctx,
2417 const nir_intrinsic_instr *instr)
2418 {
2419 LLVMValueRef cond;
2420
2421 if (instr->intrinsic == nir_intrinsic_discard_if) {
2422 cond = LLVMBuildICmp(ctx->ac.builder, LLVMIntEQ,
2423 get_src(ctx, instr->src[0]),
2424 ctx->ac.i32_0, "");
2425 } else {
2426 assert(instr->intrinsic == nir_intrinsic_discard);
2427 cond = LLVMConstInt(ctx->ac.i1, false, 0);
2428 }
2429
2430 ctx->abi->emit_kill(ctx->abi, cond);
2431 }
2432
2433 static LLVMValueRef
2434 visit_load_helper_invocation(struct ac_nir_context *ctx)
2435 {
2436 LLVMValueRef result = ac_build_intrinsic(&ctx->ac,
2437 "llvm.amdgcn.ps.live",
2438 ctx->ac.i1, NULL, 0,
2439 AC_FUNC_ATTR_READNONE);
2440 result = LLVMBuildNot(ctx->ac.builder, result, "");
2441 return LLVMBuildSExt(ctx->ac.builder, result, ctx->ac.i32, "");
2442 }
2443
2444 static LLVMValueRef
2445 visit_load_local_invocation_index(struct ac_nir_context *ctx)
2446 {
2447 LLVMValueRef result;
2448 LLVMValueRef thread_id = ac_get_thread_id(&ctx->ac);
2449 result = LLVMBuildAnd(ctx->ac.builder, ctx->abi->tg_size,
2450 LLVMConstInt(ctx->ac.i32, 0xfc0, false), "");
2451
2452 return LLVMBuildAdd(ctx->ac.builder, result, thread_id, "");
2453 }
2454
2455 static LLVMValueRef
2456 visit_load_subgroup_id(struct ac_nir_context *ctx)
2457 {
2458 if (ctx->stage == MESA_SHADER_COMPUTE) {
2459 LLVMValueRef result;
2460 result = LLVMBuildAnd(ctx->ac.builder, ctx->abi->tg_size,
2461 LLVMConstInt(ctx->ac.i32, 0xfc0, false), "");
2462 return LLVMBuildLShr(ctx->ac.builder, result, LLVMConstInt(ctx->ac.i32, 6, false), "");
2463 } else {
2464 return LLVMConstInt(ctx->ac.i32, 0, false);
2465 }
2466 }
2467
2468 static LLVMValueRef
2469 visit_load_num_subgroups(struct ac_nir_context *ctx)
2470 {
2471 if (ctx->stage == MESA_SHADER_COMPUTE) {
2472 return LLVMBuildAnd(ctx->ac.builder, ctx->abi->tg_size,
2473 LLVMConstInt(ctx->ac.i32, 0x3f, false), "");
2474 } else {
2475 return LLVMConstInt(ctx->ac.i32, 1, false);
2476 }
2477 }
2478
2479 static LLVMValueRef
2480 visit_first_invocation(struct ac_nir_context *ctx)
2481 {
2482 LLVMValueRef active_set = ac_build_ballot(&ctx->ac, ctx->ac.i32_1);
2483
2484 /* The second argument is whether cttz(0) should be defined, but we do not care. */
2485 LLVMValueRef args[] = {active_set, LLVMConstInt(ctx->ac.i1, 0, false)};
2486 LLVMValueRef result = ac_build_intrinsic(&ctx->ac,
2487 "llvm.cttz.i64",
2488 ctx->ac.i64, args, 2,
2489 AC_FUNC_ATTR_NOUNWIND |
2490 AC_FUNC_ATTR_READNONE);
2491
2492 return LLVMBuildTrunc(ctx->ac.builder, result, ctx->ac.i32, "");
2493 }
2494
2495 static LLVMValueRef
2496 visit_load_shared(struct ac_nir_context *ctx,
2497 const nir_intrinsic_instr *instr)
2498 {
2499 LLVMValueRef values[4], derived_ptr, index, ret;
2500
2501 LLVMValueRef ptr = get_memory_ptr(ctx, instr->src[0]);
2502
2503 for (int chan = 0; chan < instr->num_components; chan++) {
2504 index = LLVMConstInt(ctx->ac.i32, chan, 0);
2505 derived_ptr = LLVMBuildGEP(ctx->ac.builder, ptr, &index, 1, "");
2506 values[chan] = LLVMBuildLoad(ctx->ac.builder, derived_ptr, "");
2507 }
2508
2509 ret = ac_build_gather_values(&ctx->ac, values, instr->num_components);
2510 return LLVMBuildBitCast(ctx->ac.builder, ret, get_def_type(ctx, &instr->dest.ssa), "");
2511 }
2512
2513 static void
2514 visit_store_shared(struct ac_nir_context *ctx,
2515 const nir_intrinsic_instr *instr)
2516 {
2517 LLVMValueRef derived_ptr, data,index;
2518 LLVMBuilderRef builder = ctx->ac.builder;
2519
2520 LLVMValueRef ptr = get_memory_ptr(ctx, instr->src[1]);
2521 LLVMValueRef src = get_src(ctx, instr->src[0]);
2522
2523 int writemask = nir_intrinsic_write_mask(instr);
2524 for (int chan = 0; chan < 4; chan++) {
2525 if (!(writemask & (1 << chan))) {
2526 continue;
2527 }
2528 data = ac_llvm_extract_elem(&ctx->ac, src, chan);
2529 index = LLVMConstInt(ctx->ac.i32, chan, 0);
2530 derived_ptr = LLVMBuildGEP(builder, ptr, &index, 1, "");
2531 LLVMBuildStore(builder, data, derived_ptr);
2532 }
2533 }
2534
2535 static LLVMValueRef visit_var_atomic(struct ac_nir_context *ctx,
2536 const nir_intrinsic_instr *instr,
2537 LLVMValueRef ptr, int src_idx)
2538 {
2539 LLVMValueRef result;
2540 LLVMValueRef src = get_src(ctx, instr->src[src_idx]);
2541
2542 if (instr->intrinsic == nir_intrinsic_var_atomic_comp_swap ||
2543 instr->intrinsic == nir_intrinsic_shared_atomic_comp_swap) {
2544 LLVMValueRef src1 = get_src(ctx, instr->src[src_idx + 1]);
2545 result = LLVMBuildAtomicCmpXchg(ctx->ac.builder,
2546 ptr, src, src1,
2547 LLVMAtomicOrderingSequentiallyConsistent,
2548 LLVMAtomicOrderingSequentiallyConsistent,
2549 false);
2550 } else {
2551 LLVMAtomicRMWBinOp op;
2552 switch (instr->intrinsic) {
2553 case nir_intrinsic_var_atomic_add:
2554 case nir_intrinsic_shared_atomic_add:
2555 op = LLVMAtomicRMWBinOpAdd;
2556 break;
2557 case nir_intrinsic_var_atomic_umin:
2558 case nir_intrinsic_shared_atomic_umin:
2559 op = LLVMAtomicRMWBinOpUMin;
2560 break;
2561 case nir_intrinsic_var_atomic_umax:
2562 case nir_intrinsic_shared_atomic_umax:
2563 op = LLVMAtomicRMWBinOpUMax;
2564 break;
2565 case nir_intrinsic_var_atomic_imin:
2566 case nir_intrinsic_shared_atomic_imin:
2567 op = LLVMAtomicRMWBinOpMin;
2568 break;
2569 case nir_intrinsic_var_atomic_imax:
2570 case nir_intrinsic_shared_atomic_imax:
2571 op = LLVMAtomicRMWBinOpMax;
2572 break;
2573 case nir_intrinsic_var_atomic_and:
2574 case nir_intrinsic_shared_atomic_and:
2575 op = LLVMAtomicRMWBinOpAnd;
2576 break;
2577 case nir_intrinsic_var_atomic_or:
2578 case nir_intrinsic_shared_atomic_or:
2579 op = LLVMAtomicRMWBinOpOr;
2580 break;
2581 case nir_intrinsic_var_atomic_xor:
2582 case nir_intrinsic_shared_atomic_xor:
2583 op = LLVMAtomicRMWBinOpXor;
2584 break;
2585 case nir_intrinsic_var_atomic_exchange:
2586 case nir_intrinsic_shared_atomic_exchange:
2587 op = LLVMAtomicRMWBinOpXchg;
2588 break;
2589 default:
2590 return NULL;
2591 }
2592
2593 result = LLVMBuildAtomicRMW(ctx->ac.builder, op, ptr, ac_to_integer(&ctx->ac, src),
2594 LLVMAtomicOrderingSequentiallyConsistent,
2595 false);
2596 }
2597 return result;
2598 }
2599
2600 static LLVMValueRef load_sample_pos(struct ac_nir_context *ctx)
2601 {
2602 LLVMValueRef values[2];
2603 LLVMValueRef pos[2];
2604
2605 pos[0] = ac_to_float(&ctx->ac, ctx->abi->frag_pos[0]);
2606 pos[1] = ac_to_float(&ctx->ac, ctx->abi->frag_pos[1]);
2607
2608 values[0] = ac_build_fract(&ctx->ac, pos[0], 32);
2609 values[1] = ac_build_fract(&ctx->ac, pos[1], 32);
2610 return ac_build_gather_values(&ctx->ac, values, 2);
2611 }
2612
2613 static LLVMValueRef visit_interp(struct ac_nir_context *ctx,
2614 const nir_intrinsic_instr *instr)
2615 {
2616 LLVMValueRef result[4];
2617 LLVMValueRef interp_param, attr_number;
2618 unsigned location;
2619 unsigned chan;
2620 LLVMValueRef src_c0 = NULL;
2621 LLVMValueRef src_c1 = NULL;
2622 LLVMValueRef src0 = NULL;
2623 int input_index = instr->variables[0]->var->data.location - VARYING_SLOT_VAR0;
2624 switch (instr->intrinsic) {
2625 case nir_intrinsic_interp_var_at_centroid:
2626 location = INTERP_CENTROID;
2627 break;
2628 case nir_intrinsic_interp_var_at_sample:
2629 case nir_intrinsic_interp_var_at_offset:
2630 location = INTERP_CENTER;
2631 src0 = get_src(ctx, instr->src[0]);
2632 break;
2633 default:
2634 break;
2635 }
2636
2637 if (instr->intrinsic == nir_intrinsic_interp_var_at_offset) {
2638 src_c0 = ac_to_float(&ctx->ac, LLVMBuildExtractElement(ctx->ac.builder, src0, ctx->ac.i32_0, ""));
2639 src_c1 = ac_to_float(&ctx->ac, LLVMBuildExtractElement(ctx->ac.builder, src0, ctx->ac.i32_1, ""));
2640 } else if (instr->intrinsic == nir_intrinsic_interp_var_at_sample) {
2641 LLVMValueRef sample_position;
2642 LLVMValueRef halfval = LLVMConstReal(ctx->ac.f32, 0.5f);
2643
2644 /* fetch sample ID */
2645 sample_position = ctx->abi->load_sample_position(ctx->abi, src0);
2646
2647 src_c0 = LLVMBuildExtractElement(ctx->ac.builder, sample_position, ctx->ac.i32_0, "");
2648 src_c0 = LLVMBuildFSub(ctx->ac.builder, src_c0, halfval, "");
2649 src_c1 = LLVMBuildExtractElement(ctx->ac.builder, sample_position, ctx->ac.i32_1, "");
2650 src_c1 = LLVMBuildFSub(ctx->ac.builder, src_c1, halfval, "");
2651 }
2652 interp_param = ctx->abi->lookup_interp_param(ctx->abi, instr->variables[0]->var->data.interpolation, location);
2653 attr_number = LLVMConstInt(ctx->ac.i32, input_index, false);
2654
2655 if (location == INTERP_CENTER) {
2656 LLVMValueRef ij_out[2];
2657 LLVMValueRef ddxy_out = emit_ddxy_interp(ctx, interp_param);
2658
2659 /*
2660 * take the I then J parameters, and the DDX/Y for it, and
2661 * calculate the IJ inputs for the interpolator.
2662 * temp1 = ddx * offset/sample.x + I;
2663 * interp_param.I = ddy * offset/sample.y + temp1;
2664 * temp1 = ddx * offset/sample.x + J;
2665 * interp_param.J = ddy * offset/sample.y + temp1;
2666 */
2667 for (unsigned i = 0; i < 2; i++) {
2668 LLVMValueRef ix_ll = LLVMConstInt(ctx->ac.i32, i, false);
2669 LLVMValueRef iy_ll = LLVMConstInt(ctx->ac.i32, i + 2, false);
2670 LLVMValueRef ddx_el = LLVMBuildExtractElement(ctx->ac.builder,
2671 ddxy_out, ix_ll, "");
2672 LLVMValueRef ddy_el = LLVMBuildExtractElement(ctx->ac.builder,
2673 ddxy_out, iy_ll, "");
2674 LLVMValueRef interp_el = LLVMBuildExtractElement(ctx->ac.builder,
2675 interp_param, ix_ll, "");
2676 LLVMValueRef temp1, temp2;
2677
2678 interp_el = LLVMBuildBitCast(ctx->ac.builder, interp_el,
2679 ctx->ac.f32, "");
2680
2681 temp1 = LLVMBuildFMul(ctx->ac.builder, ddx_el, src_c0, "");
2682 temp1 = LLVMBuildFAdd(ctx->ac.builder, temp1, interp_el, "");
2683
2684 temp2 = LLVMBuildFMul(ctx->ac.builder, ddy_el, src_c1, "");
2685 temp2 = LLVMBuildFAdd(ctx->ac.builder, temp2, temp1, "");
2686
2687 ij_out[i] = LLVMBuildBitCast(ctx->ac.builder,
2688 temp2, ctx->ac.i32, "");
2689 }
2690 interp_param = ac_build_gather_values(&ctx->ac, ij_out, 2);
2691
2692 }
2693
2694 for (chan = 0; chan < 4; chan++) {
2695 LLVMValueRef llvm_chan = LLVMConstInt(ctx->ac.i32, chan, false);
2696
2697 if (interp_param) {
2698 interp_param = LLVMBuildBitCast(ctx->ac.builder,
2699 interp_param, ctx->ac.v2f32, "");
2700 LLVMValueRef i = LLVMBuildExtractElement(
2701 ctx->ac.builder, interp_param, ctx->ac.i32_0, "");
2702 LLVMValueRef j = LLVMBuildExtractElement(
2703 ctx->ac.builder, interp_param, ctx->ac.i32_1, "");
2704
2705 result[chan] = ac_build_fs_interp(&ctx->ac,
2706 llvm_chan, attr_number,
2707 ctx->abi->prim_mask, i, j);
2708 } else {
2709 result[chan] = ac_build_fs_interp_mov(&ctx->ac,
2710 LLVMConstInt(ctx->ac.i32, 2, false),
2711 llvm_chan, attr_number,
2712 ctx->abi->prim_mask);
2713 }
2714 }
2715 return ac_build_varying_gather_values(&ctx->ac, result, instr->num_components,
2716 instr->variables[0]->var->data.location_frac);
2717 }
2718
2719 static void visit_intrinsic(struct ac_nir_context *ctx,
2720 nir_intrinsic_instr *instr)
2721 {
2722 LLVMValueRef result = NULL;
2723
2724 switch (instr->intrinsic) {
2725 case nir_intrinsic_ballot:
2726 result = ac_build_ballot(&ctx->ac, get_src(ctx, instr->src[0]));
2727 break;
2728 case nir_intrinsic_read_invocation:
2729 case nir_intrinsic_read_first_invocation: {
2730 LLVMValueRef args[2];
2731
2732 /* Value */
2733 args[0] = get_src(ctx, instr->src[0]);
2734
2735 unsigned num_args;
2736 const char *intr_name;
2737 if (instr->intrinsic == nir_intrinsic_read_invocation) {
2738 num_args = 2;
2739 intr_name = "llvm.amdgcn.readlane";
2740
2741 /* Invocation */
2742 args[1] = get_src(ctx, instr->src[1]);
2743 } else {
2744 num_args = 1;
2745 intr_name = "llvm.amdgcn.readfirstlane";
2746 }
2747
2748 /* We currently have no other way to prevent LLVM from lifting the icmp
2749 * calls to a dominating basic block.
2750 */
2751 ac_build_optimization_barrier(&ctx->ac, &args[0]);
2752
2753 result = ac_build_intrinsic(&ctx->ac, intr_name,
2754 ctx->ac.i32, args, num_args,
2755 AC_FUNC_ATTR_READNONE |
2756 AC_FUNC_ATTR_CONVERGENT);
2757 break;
2758 }
2759 case nir_intrinsic_load_subgroup_invocation:
2760 result = ac_get_thread_id(&ctx->ac);
2761 break;
2762 case nir_intrinsic_load_work_group_id: {
2763 LLVMValueRef values[3];
2764
2765 for (int i = 0; i < 3; i++) {
2766 values[i] = ctx->abi->workgroup_ids[i] ?
2767 ctx->abi->workgroup_ids[i] : ctx->ac.i32_0;
2768 }
2769
2770 result = ac_build_gather_values(&ctx->ac, values, 3);
2771 break;
2772 }
2773 case nir_intrinsic_load_base_vertex: {
2774 result = ctx->abi->load_base_vertex(ctx->abi);
2775 break;
2776 }
2777 case nir_intrinsic_load_local_group_size:
2778 result = ctx->abi->load_local_group_size(ctx->abi);
2779 break;
2780 case nir_intrinsic_load_vertex_id:
2781 result = LLVMBuildAdd(ctx->ac.builder, ctx->abi->vertex_id,
2782 ctx->abi->base_vertex, "");
2783 break;
2784 case nir_intrinsic_load_vertex_id_zero_base: {
2785 result = ctx->abi->vertex_id;
2786 break;
2787 }
2788 case nir_intrinsic_load_local_invocation_id: {
2789 result = ctx->abi->local_invocation_ids;
2790 break;
2791 }
2792 case nir_intrinsic_load_base_instance:
2793 result = ctx->abi->start_instance;
2794 break;
2795 case nir_intrinsic_load_draw_id:
2796 result = ctx->abi->draw_id;
2797 break;
2798 case nir_intrinsic_load_view_index:
2799 result = ctx->abi->view_index;
2800 break;
2801 case nir_intrinsic_load_invocation_id:
2802 if (ctx->stage == MESA_SHADER_TESS_CTRL)
2803 result = ac_unpack_param(&ctx->ac, ctx->abi->tcs_rel_ids, 8, 5);
2804 else
2805 result = ctx->abi->gs_invocation_id;
2806 break;
2807 case nir_intrinsic_load_primitive_id:
2808 if (ctx->stage == MESA_SHADER_GEOMETRY) {
2809 result = ctx->abi->gs_prim_id;
2810 } else if (ctx->stage == MESA_SHADER_TESS_CTRL) {
2811 result = ctx->abi->tcs_patch_id;
2812 } else if (ctx->stage == MESA_SHADER_TESS_EVAL) {
2813 result = ctx->abi->tes_patch_id;
2814 } else
2815 fprintf(stderr, "Unknown primitive id intrinsic: %d", ctx->stage);
2816 break;
2817 case nir_intrinsic_load_sample_id:
2818 result = ac_unpack_param(&ctx->ac, ctx->abi->ancillary, 8, 4);
2819 break;
2820 case nir_intrinsic_load_sample_pos:
2821 result = load_sample_pos(ctx);
2822 break;
2823 case nir_intrinsic_load_sample_mask_in:
2824 result = ctx->abi->load_sample_mask_in(ctx->abi);
2825 break;
2826 case nir_intrinsic_load_frag_coord: {
2827 LLVMValueRef values[4] = {
2828 ctx->abi->frag_pos[0],
2829 ctx->abi->frag_pos[1],
2830 ctx->abi->frag_pos[2],
2831 ac_build_fdiv(&ctx->ac, ctx->ac.f32_1, ctx->abi->frag_pos[3])
2832 };
2833 result = ac_build_gather_values(&ctx->ac, values, 4);
2834 break;
2835 }
2836 case nir_intrinsic_load_front_face:
2837 result = ctx->abi->front_face;
2838 break;
2839 case nir_intrinsic_load_helper_invocation:
2840 result = visit_load_helper_invocation(ctx);
2841 break;
2842 case nir_intrinsic_load_instance_id:
2843 result = ctx->abi->instance_id;
2844 break;
2845 case nir_intrinsic_load_num_work_groups:
2846 result = ctx->abi->num_work_groups;
2847 break;
2848 case nir_intrinsic_load_local_invocation_index:
2849 result = visit_load_local_invocation_index(ctx);
2850 break;
2851 case nir_intrinsic_load_subgroup_id:
2852 result = visit_load_subgroup_id(ctx);
2853 break;
2854 case nir_intrinsic_load_num_subgroups:
2855 result = visit_load_num_subgroups(ctx);
2856 break;
2857 case nir_intrinsic_first_invocation:
2858 result = visit_first_invocation(ctx);
2859 break;
2860 case nir_intrinsic_load_push_constant:
2861 result = visit_load_push_constant(ctx, instr);
2862 break;
2863 case nir_intrinsic_vulkan_resource_index: {
2864 LLVMValueRef index = get_src(ctx, instr->src[0]);
2865 unsigned desc_set = nir_intrinsic_desc_set(instr);
2866 unsigned binding = nir_intrinsic_binding(instr);
2867
2868 result = ctx->abi->load_resource(ctx->abi, index, desc_set,
2869 binding);
2870 break;
2871 }
2872 case nir_intrinsic_vulkan_resource_reindex:
2873 result = visit_vulkan_resource_reindex(ctx, instr);
2874 break;
2875 case nir_intrinsic_store_ssbo:
2876 visit_store_ssbo(ctx, instr);
2877 break;
2878 case nir_intrinsic_load_ssbo:
2879 result = visit_load_buffer(ctx, instr);
2880 break;
2881 case nir_intrinsic_ssbo_atomic_add:
2882 case nir_intrinsic_ssbo_atomic_imin:
2883 case nir_intrinsic_ssbo_atomic_umin:
2884 case nir_intrinsic_ssbo_atomic_imax:
2885 case nir_intrinsic_ssbo_atomic_umax:
2886 case nir_intrinsic_ssbo_atomic_and:
2887 case nir_intrinsic_ssbo_atomic_or:
2888 case nir_intrinsic_ssbo_atomic_xor:
2889 case nir_intrinsic_ssbo_atomic_exchange:
2890 case nir_intrinsic_ssbo_atomic_comp_swap:
2891 result = visit_atomic_ssbo(ctx, instr);
2892 break;
2893 case nir_intrinsic_load_ubo:
2894 result = visit_load_ubo_buffer(ctx, instr);
2895 break;
2896 case nir_intrinsic_get_buffer_size:
2897 result = visit_get_buffer_size(ctx, instr);
2898 break;
2899 case nir_intrinsic_load_var:
2900 result = visit_load_var(ctx, instr);
2901 break;
2902 case nir_intrinsic_store_var:
2903 visit_store_var(ctx, instr);
2904 break;
2905 case nir_intrinsic_load_shared:
2906 result = visit_load_shared(ctx, instr);
2907 break;
2908 case nir_intrinsic_store_shared:
2909 visit_store_shared(ctx, instr);
2910 break;
2911 case nir_intrinsic_image_var_samples:
2912 result = visit_image_samples(ctx, instr);
2913 break;
2914 case nir_intrinsic_image_var_load:
2915 result = visit_image_load(ctx, instr);
2916 break;
2917 case nir_intrinsic_image_var_store:
2918 visit_image_store(ctx, instr);
2919 break;
2920 case nir_intrinsic_image_var_atomic_add:
2921 case nir_intrinsic_image_var_atomic_min:
2922 case nir_intrinsic_image_var_atomic_max:
2923 case nir_intrinsic_image_var_atomic_and:
2924 case nir_intrinsic_image_var_atomic_or:
2925 case nir_intrinsic_image_var_atomic_xor:
2926 case nir_intrinsic_image_var_atomic_exchange:
2927 case nir_intrinsic_image_var_atomic_comp_swap:
2928 result = visit_image_atomic(ctx, instr);
2929 break;
2930 case nir_intrinsic_image_var_size:
2931 result = visit_image_size(ctx, instr);
2932 break;
2933 case nir_intrinsic_shader_clock:
2934 result = ac_build_shader_clock(&ctx->ac);
2935 break;
2936 case nir_intrinsic_discard:
2937 case nir_intrinsic_discard_if:
2938 emit_discard(ctx, instr);
2939 break;
2940 case nir_intrinsic_memory_barrier:
2941 case nir_intrinsic_group_memory_barrier:
2942 case nir_intrinsic_memory_barrier_atomic_counter:
2943 case nir_intrinsic_memory_barrier_buffer:
2944 case nir_intrinsic_memory_barrier_image:
2945 case nir_intrinsic_memory_barrier_shared:
2946 emit_membar(&ctx->ac, instr);
2947 break;
2948 case nir_intrinsic_barrier:
2949 ac_emit_barrier(&ctx->ac, ctx->stage);
2950 break;
2951 case nir_intrinsic_shared_atomic_add:
2952 case nir_intrinsic_shared_atomic_imin:
2953 case nir_intrinsic_shared_atomic_umin:
2954 case nir_intrinsic_shared_atomic_imax:
2955 case nir_intrinsic_shared_atomic_umax:
2956 case nir_intrinsic_shared_atomic_and:
2957 case nir_intrinsic_shared_atomic_or:
2958 case nir_intrinsic_shared_atomic_xor:
2959 case nir_intrinsic_shared_atomic_exchange:
2960 case nir_intrinsic_shared_atomic_comp_swap: {
2961 LLVMValueRef ptr = get_memory_ptr(ctx, instr->src[0]);
2962 result = visit_var_atomic(ctx, instr, ptr, 1);
2963 break;
2964 }
2965 case nir_intrinsic_var_atomic_add:
2966 case nir_intrinsic_var_atomic_imin:
2967 case nir_intrinsic_var_atomic_umin:
2968 case nir_intrinsic_var_atomic_imax:
2969 case nir_intrinsic_var_atomic_umax:
2970 case nir_intrinsic_var_atomic_and:
2971 case nir_intrinsic_var_atomic_or:
2972 case nir_intrinsic_var_atomic_xor:
2973 case nir_intrinsic_var_atomic_exchange:
2974 case nir_intrinsic_var_atomic_comp_swap: {
2975 LLVMValueRef ptr = build_gep_for_deref(ctx, instr->variables[0]);
2976 result = visit_var_atomic(ctx, instr, ptr, 0);
2977 break;
2978 }
2979 case nir_intrinsic_interp_var_at_centroid:
2980 case nir_intrinsic_interp_var_at_sample:
2981 case nir_intrinsic_interp_var_at_offset:
2982 result = visit_interp(ctx, instr);
2983 break;
2984 case nir_intrinsic_emit_vertex:
2985 ctx->abi->emit_vertex(ctx->abi, nir_intrinsic_stream_id(instr), ctx->abi->outputs);
2986 break;
2987 case nir_intrinsic_end_primitive:
2988 ctx->abi->emit_primitive(ctx->abi, nir_intrinsic_stream_id(instr));
2989 break;
2990 case nir_intrinsic_load_tess_coord:
2991 result = ctx->abi->load_tess_coord(ctx->abi);
2992 break;
2993 case nir_intrinsic_load_tess_level_outer:
2994 result = ctx->abi->load_tess_level(ctx->abi, VARYING_SLOT_TESS_LEVEL_OUTER);
2995 break;
2996 case nir_intrinsic_load_tess_level_inner:
2997 result = ctx->abi->load_tess_level(ctx->abi, VARYING_SLOT_TESS_LEVEL_INNER);
2998 break;
2999 case nir_intrinsic_load_patch_vertices_in:
3000 result = ctx->abi->load_patch_vertices_in(ctx->abi);
3001 break;
3002 case nir_intrinsic_vote_all: {
3003 LLVMValueRef tmp = ac_build_vote_all(&ctx->ac, get_src(ctx, instr->src[0]));
3004 result = LLVMBuildSExt(ctx->ac.builder, tmp, ctx->ac.i32, "");
3005 break;
3006 }
3007 case nir_intrinsic_vote_any: {
3008 LLVMValueRef tmp = ac_build_vote_any(&ctx->ac, get_src(ctx, instr->src[0]));
3009 result = LLVMBuildSExt(ctx->ac.builder, tmp, ctx->ac.i32, "");
3010 break;
3011 }
3012 default:
3013 fprintf(stderr, "Unknown intrinsic: ");
3014 nir_print_instr(&instr->instr, stderr);
3015 fprintf(stderr, "\n");
3016 break;
3017 }
3018 if (result) {
3019 _mesa_hash_table_insert(ctx->defs, &instr->dest.ssa, result);
3020 }
3021 }
3022
3023 static LLVMValueRef get_sampler_desc(struct ac_nir_context *ctx,
3024 const nir_deref_var *deref,
3025 enum ac_descriptor_type desc_type,
3026 const nir_tex_instr *tex_instr,
3027 bool image, bool write)
3028 {
3029 LLVMValueRef index = NULL;
3030 unsigned constant_index = 0;
3031 unsigned descriptor_set;
3032 unsigned base_index;
3033 bool bindless = false;
3034
3035 if (!deref) {
3036 assert(tex_instr && !image);
3037 descriptor_set = 0;
3038 base_index = tex_instr->sampler_index;
3039 } else {
3040 const nir_deref *tail = &deref->deref;
3041 while (tail->child) {
3042 const nir_deref_array *child = nir_deref_as_array(tail->child);
3043 unsigned array_size = glsl_get_aoa_size(tail->child->type);
3044
3045 if (!array_size)
3046 array_size = 1;
3047
3048 assert(child->deref_array_type != nir_deref_array_type_wildcard);
3049
3050 if (child->deref_array_type == nir_deref_array_type_indirect) {
3051 LLVMValueRef indirect = get_src(ctx, child->indirect);
3052
3053 indirect = LLVMBuildMul(ctx->ac.builder, indirect,
3054 LLVMConstInt(ctx->ac.i32, array_size, false), "");
3055
3056 if (!index)
3057 index = indirect;
3058 else
3059 index = LLVMBuildAdd(ctx->ac.builder, index, indirect, "");
3060 }
3061
3062 constant_index += child->base_offset * array_size;
3063
3064 tail = &child->deref;
3065 }
3066 descriptor_set = deref->var->data.descriptor_set;
3067
3068 if (deref->var->data.bindless) {
3069 bindless = deref->var->data.bindless;
3070 base_index = deref->var->data.driver_location;
3071 } else {
3072 base_index = deref->var->data.binding;
3073 }
3074 }
3075
3076 return ctx->abi->load_sampler_desc(ctx->abi,
3077 descriptor_set,
3078 base_index,
3079 constant_index, index,
3080 desc_type, image, write, bindless);
3081 }
3082
3083 static void set_tex_fetch_args(struct ac_llvm_context *ctx,
3084 struct ac_image_args *args,
3085 const nir_tex_instr *instr,
3086 nir_texop op,
3087 LLVMValueRef res_ptr, LLVMValueRef samp_ptr,
3088 LLVMValueRef *param, unsigned count,
3089 unsigned dmask)
3090 {
3091 unsigned is_rect = 0;
3092 bool da = instr->is_array || instr->sampler_dim == GLSL_SAMPLER_DIM_CUBE;
3093
3094 if (op == nir_texop_lod)
3095 da = false;
3096 /* Pad to power of two vector */
3097 while (count < util_next_power_of_two(count))
3098 param[count++] = LLVMGetUndef(ctx->i32);
3099
3100 if (count > 1)
3101 args->addr = ac_build_gather_values(ctx, param, count);
3102 else
3103 args->addr = param[0];
3104
3105 args->resource = res_ptr;
3106 args->sampler = samp_ptr;
3107
3108 if (instr->sampler_dim == GLSL_SAMPLER_DIM_BUF && op == nir_texop_txf) {
3109 args->addr = param[0];
3110 return;
3111 }
3112
3113 args->dmask = dmask;
3114 args->unorm = is_rect;
3115 args->da = da;
3116 }
3117
3118 /* Disable anisotropic filtering if BASE_LEVEL == LAST_LEVEL.
3119 *
3120 * SI-CI:
3121 * If BASE_LEVEL == LAST_LEVEL, the shader must disable anisotropic
3122 * filtering manually. The driver sets img7 to a mask clearing
3123 * MAX_ANISO_RATIO if BASE_LEVEL == LAST_LEVEL. The shader must do:
3124 * s_and_b32 samp0, samp0, img7
3125 *
3126 * VI:
3127 * The ANISO_OVERRIDE sampler field enables this fix in TA.
3128 */
3129 static LLVMValueRef sici_fix_sampler_aniso(struct ac_nir_context *ctx,
3130 LLVMValueRef res, LLVMValueRef samp)
3131 {
3132 LLVMBuilderRef builder = ctx->ac.builder;
3133 LLVMValueRef img7, samp0;
3134
3135 if (ctx->ac.chip_class >= VI)
3136 return samp;
3137
3138 img7 = LLVMBuildExtractElement(builder, res,
3139 LLVMConstInt(ctx->ac.i32, 7, 0), "");
3140 samp0 = LLVMBuildExtractElement(builder, samp,
3141 LLVMConstInt(ctx->ac.i32, 0, 0), "");
3142 samp0 = LLVMBuildAnd(builder, samp0, img7, "");
3143 return LLVMBuildInsertElement(builder, samp, samp0,
3144 LLVMConstInt(ctx->ac.i32, 0, 0), "");
3145 }
3146
3147 static void tex_fetch_ptrs(struct ac_nir_context *ctx,
3148 nir_tex_instr *instr,
3149 LLVMValueRef *res_ptr, LLVMValueRef *samp_ptr,
3150 LLVMValueRef *fmask_ptr)
3151 {
3152 if (instr->sampler_dim == GLSL_SAMPLER_DIM_BUF)
3153 *res_ptr = get_sampler_desc(ctx, instr->texture, AC_DESC_BUFFER, instr, false, false);
3154 else
3155 *res_ptr = get_sampler_desc(ctx, instr->texture, AC_DESC_IMAGE, instr, false, false);
3156 if (samp_ptr) {
3157 if (instr->sampler)
3158 *samp_ptr = get_sampler_desc(ctx, instr->sampler, AC_DESC_SAMPLER, instr, false, false);
3159 else
3160 *samp_ptr = get_sampler_desc(ctx, instr->texture, AC_DESC_SAMPLER, instr, false, false);
3161 if (instr->sampler_dim < GLSL_SAMPLER_DIM_RECT)
3162 *samp_ptr = sici_fix_sampler_aniso(ctx, *res_ptr, *samp_ptr);
3163 }
3164 if (fmask_ptr && !instr->sampler && (instr->op == nir_texop_txf_ms ||
3165 instr->op == nir_texop_samples_identical))
3166 *fmask_ptr = get_sampler_desc(ctx, instr->texture, AC_DESC_FMASK, instr, false, false);
3167 }
3168
3169 static LLVMValueRef apply_round_slice(struct ac_llvm_context *ctx,
3170 LLVMValueRef coord)
3171 {
3172 coord = ac_to_float(ctx, coord);
3173 coord = ac_build_intrinsic(ctx, "llvm.rint.f32", ctx->f32, &coord, 1, 0);
3174 coord = ac_to_integer(ctx, coord);
3175 return coord;
3176 }
3177
3178 static void visit_tex(struct ac_nir_context *ctx, nir_tex_instr *instr)
3179 {
3180 LLVMValueRef result = NULL;
3181 struct ac_image_args args = { 0 };
3182 unsigned dmask = 0xf;
3183 LLVMValueRef address[16];
3184 LLVMValueRef coords[5];
3185 LLVMValueRef coord = NULL, lod = NULL, comparator = NULL;
3186 LLVMValueRef bias = NULL, offsets = NULL;
3187 LLVMValueRef res_ptr, samp_ptr, fmask_ptr = NULL, sample_index = NULL;
3188 LLVMValueRef ddx = NULL, ddy = NULL;
3189 LLVMValueRef derivs[6];
3190 unsigned chan, count = 0;
3191 unsigned const_src = 0, num_deriv_comp = 0;
3192 bool lod_is_zero = false;
3193
3194 tex_fetch_ptrs(ctx, instr, &res_ptr, &samp_ptr, &fmask_ptr);
3195
3196 for (unsigned i = 0; i < instr->num_srcs; i++) {
3197 switch (instr->src[i].src_type) {
3198 case nir_tex_src_coord:
3199 coord = get_src(ctx, instr->src[i].src);
3200 break;
3201 case nir_tex_src_projector:
3202 break;
3203 case nir_tex_src_comparator:
3204 comparator = get_src(ctx, instr->src[i].src);
3205 break;
3206 case nir_tex_src_offset:
3207 offsets = get_src(ctx, instr->src[i].src);
3208 const_src = i;
3209 break;
3210 case nir_tex_src_bias:
3211 bias = get_src(ctx, instr->src[i].src);
3212 break;
3213 case nir_tex_src_lod: {
3214 nir_const_value *val = nir_src_as_const_value(instr->src[i].src);
3215
3216 if (val && val->i32[0] == 0)
3217 lod_is_zero = true;
3218 lod = get_src(ctx, instr->src[i].src);
3219 break;
3220 }
3221 case nir_tex_src_ms_index:
3222 sample_index = get_src(ctx, instr->src[i].src);
3223 break;
3224 case nir_tex_src_ms_mcs:
3225 break;
3226 case nir_tex_src_ddx:
3227 ddx = get_src(ctx, instr->src[i].src);
3228 num_deriv_comp = instr->src[i].src.ssa->num_components;
3229 break;
3230 case nir_tex_src_ddy:
3231 ddy = get_src(ctx, instr->src[i].src);
3232 break;
3233 case nir_tex_src_texture_offset:
3234 case nir_tex_src_sampler_offset:
3235 case nir_tex_src_plane:
3236 default:
3237 break;
3238 }
3239 }
3240
3241 if (instr->op == nir_texop_txs && instr->sampler_dim == GLSL_SAMPLER_DIM_BUF) {
3242 result = get_buffer_size(ctx, res_ptr, true);
3243 goto write_result;
3244 }
3245
3246 if (instr->op == nir_texop_texture_samples) {
3247 LLVMValueRef res, samples, is_msaa;
3248 res = LLVMBuildBitCast(ctx->ac.builder, res_ptr, ctx->ac.v8i32, "");
3249 samples = LLVMBuildExtractElement(ctx->ac.builder, res,
3250 LLVMConstInt(ctx->ac.i32, 3, false), "");
3251 is_msaa = LLVMBuildLShr(ctx->ac.builder, samples,
3252 LLVMConstInt(ctx->ac.i32, 28, false), "");
3253 is_msaa = LLVMBuildAnd(ctx->ac.builder, is_msaa,
3254 LLVMConstInt(ctx->ac.i32, 0xe, false), "");
3255 is_msaa = LLVMBuildICmp(ctx->ac.builder, LLVMIntEQ, is_msaa,
3256 LLVMConstInt(ctx->ac.i32, 0xe, false), "");
3257
3258 samples = LLVMBuildLShr(ctx->ac.builder, samples,
3259 LLVMConstInt(ctx->ac.i32, 16, false), "");
3260 samples = LLVMBuildAnd(ctx->ac.builder, samples,
3261 LLVMConstInt(ctx->ac.i32, 0xf, false), "");
3262 samples = LLVMBuildShl(ctx->ac.builder, ctx->ac.i32_1,
3263 samples, "");
3264 samples = LLVMBuildSelect(ctx->ac.builder, is_msaa, samples,
3265 ctx->ac.i32_1, "");
3266 result = samples;
3267 goto write_result;
3268 }
3269
3270 if (coord)
3271 for (chan = 0; chan < instr->coord_components; chan++)
3272 coords[chan] = ac_llvm_extract_elem(&ctx->ac, coord, chan);
3273
3274 if (offsets && instr->op != nir_texop_txf) {
3275 LLVMValueRef offset[3], pack;
3276 for (chan = 0; chan < 3; ++chan)
3277 offset[chan] = ctx->ac.i32_0;
3278
3279 args.offset = true;
3280 for (chan = 0; chan < ac_get_llvm_num_components(offsets); chan++) {
3281 offset[chan] = ac_llvm_extract_elem(&ctx->ac, offsets, chan);
3282 offset[chan] = LLVMBuildAnd(ctx->ac.builder, offset[chan],
3283 LLVMConstInt(ctx->ac.i32, 0x3f, false), "");
3284 if (chan)
3285 offset[chan] = LLVMBuildShl(ctx->ac.builder, offset[chan],
3286 LLVMConstInt(ctx->ac.i32, chan * 8, false), "");
3287 }
3288 pack = LLVMBuildOr(ctx->ac.builder, offset[0], offset[1], "");
3289 pack = LLVMBuildOr(ctx->ac.builder, pack, offset[2], "");
3290 address[count++] = pack;
3291
3292 }
3293 /* pack LOD bias value */
3294 if (instr->op == nir_texop_txb && bias) {
3295 address[count++] = bias;
3296 }
3297
3298 /* Pack depth comparison value */
3299 if (instr->is_shadow && comparator) {
3300 LLVMValueRef z = ac_to_float(&ctx->ac,
3301 ac_llvm_extract_elem(&ctx->ac, comparator, 0));
3302
3303 /* TC-compatible HTILE on radeonsi promotes Z16 and Z24 to Z32_FLOAT,
3304 * so the depth comparison value isn't clamped for Z16 and
3305 * Z24 anymore. Do it manually here.
3306 *
3307 * It's unnecessary if the original texture format was
3308 * Z32_FLOAT, but we don't know that here.
3309 */
3310 if (ctx->ac.chip_class == VI && ctx->abi->clamp_shadow_reference)
3311 z = ac_build_clamp(&ctx->ac, z);
3312
3313 address[count++] = z;
3314 }
3315
3316 /* pack derivatives */
3317 if (ddx || ddy) {
3318 int num_src_deriv_channels, num_dest_deriv_channels;
3319 switch (instr->sampler_dim) {
3320 case GLSL_SAMPLER_DIM_3D:
3321 case GLSL_SAMPLER_DIM_CUBE:
3322 num_deriv_comp = 3;
3323 num_src_deriv_channels = 3;
3324 num_dest_deriv_channels = 3;
3325 break;
3326 case GLSL_SAMPLER_DIM_2D:
3327 default:
3328 num_src_deriv_channels = 2;
3329 num_dest_deriv_channels = 2;
3330 num_deriv_comp = 2;
3331 break;
3332 case GLSL_SAMPLER_DIM_1D:
3333 num_src_deriv_channels = 1;
3334 if (ctx->ac.chip_class >= GFX9) {
3335 num_dest_deriv_channels = 2;
3336 num_deriv_comp = 2;
3337 } else {
3338 num_dest_deriv_channels = 1;
3339 num_deriv_comp = 1;
3340 }
3341 break;
3342 }
3343
3344 for (unsigned i = 0; i < num_src_deriv_channels; i++) {
3345 derivs[i] = ac_to_float(&ctx->ac, ac_llvm_extract_elem(&ctx->ac, ddx, i));
3346 derivs[num_dest_deriv_channels + i] = ac_to_float(&ctx->ac, ac_llvm_extract_elem(&ctx->ac, ddy, i));
3347 }
3348 for (unsigned i = num_src_deriv_channels; i < num_dest_deriv_channels; i++) {
3349 derivs[i] = ctx->ac.f32_0;
3350 derivs[num_dest_deriv_channels + i] = ctx->ac.f32_0;
3351 }
3352 }
3353
3354 if (instr->sampler_dim == GLSL_SAMPLER_DIM_CUBE && coord) {
3355 for (chan = 0; chan < instr->coord_components; chan++)
3356 coords[chan] = ac_to_float(&ctx->ac, coords[chan]);
3357 if (instr->coord_components == 3)
3358 coords[3] = LLVMGetUndef(ctx->ac.f32);
3359 ac_prepare_cube_coords(&ctx->ac,
3360 instr->op == nir_texop_txd, instr->is_array,
3361 instr->op == nir_texop_lod, coords, derivs);
3362 if (num_deriv_comp)
3363 num_deriv_comp--;
3364 }
3365
3366 if (ddx || ddy) {
3367 for (unsigned i = 0; i < num_deriv_comp * 2; i++)
3368 address[count++] = derivs[i];
3369 }
3370
3371 /* Pack texture coordinates */
3372 if (coord) {
3373 address[count++] = coords[0];
3374 if (instr->coord_components > 1) {
3375 if (instr->sampler_dim == GLSL_SAMPLER_DIM_1D && instr->is_array && instr->op != nir_texop_txf) {
3376 coords[1] = apply_round_slice(&ctx->ac, coords[1]);
3377 }
3378 address[count++] = coords[1];
3379 }
3380 if (instr->coord_components > 2) {
3381 if ((instr->sampler_dim == GLSL_SAMPLER_DIM_2D ||
3382 instr->sampler_dim == GLSL_SAMPLER_DIM_MS ||
3383 instr->sampler_dim == GLSL_SAMPLER_DIM_SUBPASS ||
3384 instr->sampler_dim == GLSL_SAMPLER_DIM_SUBPASS_MS) &&
3385 instr->is_array &&
3386 instr->op != nir_texop_txf && instr->op != nir_texop_txf_ms) {
3387 coords[2] = apply_round_slice(&ctx->ac, coords[2]);
3388 }
3389 address[count++] = coords[2];
3390 }
3391
3392 if (ctx->ac.chip_class >= GFX9) {
3393 LLVMValueRef filler;
3394 if (instr->op == nir_texop_txf)
3395 filler = ctx->ac.i32_0;
3396 else
3397 filler = LLVMConstReal(ctx->ac.f32, 0.5);
3398
3399 if (instr->sampler_dim == GLSL_SAMPLER_DIM_1D) {
3400 /* No nir_texop_lod, because it does not take a slice
3401 * even with array textures. */
3402 if (instr->is_array && instr->op != nir_texop_lod ) {
3403 address[count] = address[count - 1];
3404 address[count - 1] = filler;
3405 count++;
3406 } else
3407 address[count++] = filler;
3408 }
3409 }
3410 }
3411
3412 /* Pack LOD */
3413 if (lod && ((instr->op == nir_texop_txl || instr->op == nir_texop_txf) && !lod_is_zero)) {
3414 address[count++] = lod;
3415 } else if (instr->op == nir_texop_txf_ms && sample_index) {
3416 address[count++] = sample_index;
3417 } else if(instr->op == nir_texop_txs) {
3418 count = 0;
3419 if (lod)
3420 address[count++] = lod;
3421 else
3422 address[count++] = ctx->ac.i32_0;
3423 }
3424
3425 for (chan = 0; chan < count; chan++) {
3426 address[chan] = LLVMBuildBitCast(ctx->ac.builder,
3427 address[chan], ctx->ac.i32, "");
3428 }
3429
3430 if (instr->op == nir_texop_samples_identical) {
3431 LLVMValueRef txf_address[4];
3432 struct ac_image_args txf_args = { 0 };
3433 unsigned txf_count = count;
3434 memcpy(txf_address, address, sizeof(txf_address));
3435
3436 if (!instr->is_array)
3437 txf_address[2] = ctx->ac.i32_0;
3438 txf_address[3] = ctx->ac.i32_0;
3439
3440 set_tex_fetch_args(&ctx->ac, &txf_args, instr, nir_texop_txf,
3441 fmask_ptr, NULL,
3442 txf_address, txf_count, 0xf);
3443
3444 result = build_tex_intrinsic(ctx, instr, false, &txf_args);
3445
3446 result = LLVMBuildExtractElement(ctx->ac.builder, result, ctx->ac.i32_0, "");
3447 result = emit_int_cmp(&ctx->ac, LLVMIntEQ, result, ctx->ac.i32_0);
3448 goto write_result;
3449 }
3450
3451 if (instr->sampler_dim == GLSL_SAMPLER_DIM_MS &&
3452 instr->op != nir_texop_txs) {
3453 unsigned sample_chan = instr->is_array ? 3 : 2;
3454 address[sample_chan] = adjust_sample_index_using_fmask(&ctx->ac,
3455 address[0],
3456 address[1],
3457 instr->is_array ? address[2] : NULL,
3458 address[sample_chan],
3459 fmask_ptr);
3460 }
3461
3462 if (offsets && instr->op == nir_texop_txf) {
3463 nir_const_value *const_offset =
3464 nir_src_as_const_value(instr->src[const_src].src);
3465 int num_offsets = instr->src[const_src].src.ssa->num_components;
3466 assert(const_offset);
3467 num_offsets = MIN2(num_offsets, instr->coord_components);
3468 if (num_offsets > 2)
3469 address[2] = LLVMBuildAdd(ctx->ac.builder,
3470 address[2], LLVMConstInt(ctx->ac.i32, const_offset->i32[2], false), "");
3471 if (num_offsets > 1)
3472 address[1] = LLVMBuildAdd(ctx->ac.builder,
3473 address[1], LLVMConstInt(ctx->ac.i32, const_offset->i32[1], false), "");
3474 address[0] = LLVMBuildAdd(ctx->ac.builder,
3475 address[0], LLVMConstInt(ctx->ac.i32, const_offset->i32[0], false), "");
3476
3477 }
3478
3479 /* TODO TG4 support */
3480 if (instr->op == nir_texop_tg4) {
3481 if (instr->is_shadow)
3482 dmask = 1;
3483 else
3484 dmask = 1 << instr->component;
3485 }
3486 set_tex_fetch_args(&ctx->ac, &args, instr, instr->op,
3487 res_ptr, samp_ptr, address, count, dmask);
3488
3489 result = build_tex_intrinsic(ctx, instr, lod_is_zero, &args);
3490
3491 if (instr->op == nir_texop_query_levels)
3492 result = LLVMBuildExtractElement(ctx->ac.builder, result, LLVMConstInt(ctx->ac.i32, 3, false), "");
3493 else if (instr->is_shadow && instr->is_new_style_shadow &&
3494 instr->op != nir_texop_txs && instr->op != nir_texop_lod &&
3495 instr->op != nir_texop_tg4)
3496 result = LLVMBuildExtractElement(ctx->ac.builder, result, ctx->ac.i32_0, "");
3497 else if (instr->op == nir_texop_txs &&
3498 instr->sampler_dim == GLSL_SAMPLER_DIM_CUBE &&
3499 instr->is_array) {
3500 LLVMValueRef two = LLVMConstInt(ctx->ac.i32, 2, false);
3501 LLVMValueRef six = LLVMConstInt(ctx->ac.i32, 6, false);
3502 LLVMValueRef z = LLVMBuildExtractElement(ctx->ac.builder, result, two, "");
3503 z = LLVMBuildSDiv(ctx->ac.builder, z, six, "");
3504 result = LLVMBuildInsertElement(ctx->ac.builder, result, z, two, "");
3505 } else if (ctx->ac.chip_class >= GFX9 &&
3506 instr->op == nir_texop_txs &&
3507 instr->sampler_dim == GLSL_SAMPLER_DIM_1D &&
3508 instr->is_array) {
3509 LLVMValueRef two = LLVMConstInt(ctx->ac.i32, 2, false);
3510 LLVMValueRef layers = LLVMBuildExtractElement(ctx->ac.builder, result, two, "");
3511 result = LLVMBuildInsertElement(ctx->ac.builder, result, layers,
3512 ctx->ac.i32_1, "");
3513 } else if (instr->dest.ssa.num_components != 4)
3514 result = ac_trim_vector(&ctx->ac, result, instr->dest.ssa.num_components);
3515
3516 write_result:
3517 if (result) {
3518 assert(instr->dest.is_ssa);
3519 result = ac_to_integer(&ctx->ac, result);
3520 _mesa_hash_table_insert(ctx->defs, &instr->dest.ssa, result);
3521 }
3522 }
3523
3524
3525 static void visit_phi(struct ac_nir_context *ctx, nir_phi_instr *instr)
3526 {
3527 LLVMTypeRef type = get_def_type(ctx, &instr->dest.ssa);
3528 LLVMValueRef result = LLVMBuildPhi(ctx->ac.builder, type, "");
3529
3530 _mesa_hash_table_insert(ctx->defs, &instr->dest.ssa, result);
3531 _mesa_hash_table_insert(ctx->phis, instr, result);
3532 }
3533
3534 static void visit_post_phi(struct ac_nir_context *ctx,
3535 nir_phi_instr *instr,
3536 LLVMValueRef llvm_phi)
3537 {
3538 nir_foreach_phi_src(src, instr) {
3539 LLVMBasicBlockRef block = get_block(ctx, src->pred);
3540 LLVMValueRef llvm_src = get_src(ctx, src->src);
3541
3542 LLVMAddIncoming(llvm_phi, &llvm_src, &block, 1);
3543 }
3544 }
3545
3546 static void phi_post_pass(struct ac_nir_context *ctx)
3547 {
3548 struct hash_entry *entry;
3549 hash_table_foreach(ctx->phis, entry) {
3550 visit_post_phi(ctx, (nir_phi_instr*)entry->key,
3551 (LLVMValueRef)entry->data);
3552 }
3553 }
3554
3555
3556 static void visit_ssa_undef(struct ac_nir_context *ctx,
3557 const nir_ssa_undef_instr *instr)
3558 {
3559 unsigned num_components = instr->def.num_components;
3560 LLVMTypeRef type = LLVMIntTypeInContext(ctx->ac.context, instr->def.bit_size);
3561 LLVMValueRef undef;
3562
3563 if (num_components == 1)
3564 undef = LLVMGetUndef(type);
3565 else {
3566 undef = LLVMGetUndef(LLVMVectorType(type, num_components));
3567 }
3568 _mesa_hash_table_insert(ctx->defs, &instr->def, undef);
3569 }
3570
3571 static void visit_jump(struct ac_llvm_context *ctx,
3572 const nir_jump_instr *instr)
3573 {
3574 switch (instr->type) {
3575 case nir_jump_break:
3576 ac_build_break(ctx);
3577 break;
3578 case nir_jump_continue:
3579 ac_build_continue(ctx);
3580 break;
3581 default:
3582 fprintf(stderr, "Unknown NIR jump instr: ");
3583 nir_print_instr(&instr->instr, stderr);
3584 fprintf(stderr, "\n");
3585 abort();
3586 }
3587 }
3588
3589 static void visit_cf_list(struct ac_nir_context *ctx,
3590 struct exec_list *list);
3591
3592 static void visit_block(struct ac_nir_context *ctx, nir_block *block)
3593 {
3594 LLVMBasicBlockRef llvm_block = LLVMGetInsertBlock(ctx->ac.builder);
3595 nir_foreach_instr(instr, block)
3596 {
3597 switch (instr->type) {
3598 case nir_instr_type_alu:
3599 visit_alu(ctx, nir_instr_as_alu(instr));
3600 break;
3601 case nir_instr_type_load_const:
3602 visit_load_const(ctx, nir_instr_as_load_const(instr));
3603 break;
3604 case nir_instr_type_intrinsic:
3605 visit_intrinsic(ctx, nir_instr_as_intrinsic(instr));
3606 break;
3607 case nir_instr_type_tex:
3608 visit_tex(ctx, nir_instr_as_tex(instr));
3609 break;
3610 case nir_instr_type_phi:
3611 visit_phi(ctx, nir_instr_as_phi(instr));
3612 break;
3613 case nir_instr_type_ssa_undef:
3614 visit_ssa_undef(ctx, nir_instr_as_ssa_undef(instr));
3615 break;
3616 case nir_instr_type_jump:
3617 visit_jump(&ctx->ac, nir_instr_as_jump(instr));
3618 break;
3619 default:
3620 fprintf(stderr, "Unknown NIR instr type: ");
3621 nir_print_instr(instr, stderr);
3622 fprintf(stderr, "\n");
3623 abort();
3624 }
3625 }
3626
3627 _mesa_hash_table_insert(ctx->defs, block, llvm_block);
3628 }
3629
3630 static void visit_if(struct ac_nir_context *ctx, nir_if *if_stmt)
3631 {
3632 LLVMValueRef value = get_src(ctx, if_stmt->condition);
3633
3634 nir_block *then_block =
3635 (nir_block *) exec_list_get_head(&if_stmt->then_list);
3636
3637 ac_build_uif(&ctx->ac, value, then_block->index);
3638
3639 visit_cf_list(ctx, &if_stmt->then_list);
3640
3641 if (!exec_list_is_empty(&if_stmt->else_list)) {
3642 nir_block *else_block =
3643 (nir_block *) exec_list_get_head(&if_stmt->else_list);
3644
3645 ac_build_else(&ctx->ac, else_block->index);
3646 visit_cf_list(ctx, &if_stmt->else_list);
3647 }
3648
3649 ac_build_endif(&ctx->ac, then_block->index);
3650 }
3651
3652 static void visit_loop(struct ac_nir_context *ctx, nir_loop *loop)
3653 {
3654 nir_block *first_loop_block =
3655 (nir_block *) exec_list_get_head(&loop->body);
3656
3657 ac_build_bgnloop(&ctx->ac, first_loop_block->index);
3658
3659 visit_cf_list(ctx, &loop->body);
3660
3661 ac_build_endloop(&ctx->ac, first_loop_block->index);
3662 }
3663
3664 static void visit_cf_list(struct ac_nir_context *ctx,
3665 struct exec_list *list)
3666 {
3667 foreach_list_typed(nir_cf_node, node, node, list)
3668 {
3669 switch (node->type) {
3670 case nir_cf_node_block:
3671 visit_block(ctx, nir_cf_node_as_block(node));
3672 break;
3673
3674 case nir_cf_node_if:
3675 visit_if(ctx, nir_cf_node_as_if(node));
3676 break;
3677
3678 case nir_cf_node_loop:
3679 visit_loop(ctx, nir_cf_node_as_loop(node));
3680 break;
3681
3682 default:
3683 assert(0);
3684 }
3685 }
3686 }
3687
3688 void
3689 ac_handle_shader_output_decl(struct ac_llvm_context *ctx,
3690 struct ac_shader_abi *abi,
3691 struct nir_shader *nir,
3692 struct nir_variable *variable,
3693 gl_shader_stage stage)
3694 {
3695 unsigned output_loc = variable->data.driver_location / 4;
3696 unsigned attrib_count = glsl_count_attribute_slots(variable->type, false);
3697
3698 /* tess ctrl has it's own load/store paths for outputs */
3699 if (stage == MESA_SHADER_TESS_CTRL)
3700 return;
3701
3702 if (stage == MESA_SHADER_VERTEX ||
3703 stage == MESA_SHADER_TESS_EVAL ||
3704 stage == MESA_SHADER_GEOMETRY) {
3705 int idx = variable->data.location + variable->data.index;
3706 if (idx == VARYING_SLOT_CLIP_DIST0) {
3707 int length = nir->info.clip_distance_array_size +
3708 nir->info.cull_distance_array_size;
3709
3710 if (length > 4)
3711 attrib_count = 2;
3712 else
3713 attrib_count = 1;
3714 }
3715 }
3716
3717 for (unsigned i = 0; i < attrib_count; ++i) {
3718 for (unsigned chan = 0; chan < 4; chan++) {
3719 abi->outputs[ac_llvm_reg_index_soa(output_loc + i, chan)] =
3720 ac_build_alloca_undef(ctx, ctx->f32, "");
3721 }
3722 }
3723 }
3724
3725 static LLVMTypeRef
3726 glsl_base_to_llvm_type(struct ac_llvm_context *ac,
3727 enum glsl_base_type type)
3728 {
3729 switch (type) {
3730 case GLSL_TYPE_INT:
3731 case GLSL_TYPE_UINT:
3732 case GLSL_TYPE_BOOL:
3733 case GLSL_TYPE_SUBROUTINE:
3734 return ac->i32;
3735 case GLSL_TYPE_FLOAT: /* TODO handle mediump */
3736 return ac->f32;
3737 case GLSL_TYPE_INT64:
3738 case GLSL_TYPE_UINT64:
3739 return ac->i64;
3740 case GLSL_TYPE_DOUBLE:
3741 return ac->f64;
3742 default:
3743 unreachable("unknown GLSL type");
3744 }
3745 }
3746
3747 static LLVMTypeRef
3748 glsl_to_llvm_type(struct ac_llvm_context *ac,
3749 const struct glsl_type *type)
3750 {
3751 if (glsl_type_is_scalar(type)) {
3752 return glsl_base_to_llvm_type(ac, glsl_get_base_type(type));
3753 }
3754
3755 if (glsl_type_is_vector(type)) {
3756 return LLVMVectorType(
3757 glsl_base_to_llvm_type(ac, glsl_get_base_type(type)),
3758 glsl_get_vector_elements(type));
3759 }
3760
3761 if (glsl_type_is_matrix(type)) {
3762 return LLVMArrayType(
3763 glsl_to_llvm_type(ac, glsl_get_column_type(type)),
3764 glsl_get_matrix_columns(type));
3765 }
3766
3767 if (glsl_type_is_array(type)) {
3768 return LLVMArrayType(
3769 glsl_to_llvm_type(ac, glsl_get_array_element(type)),
3770 glsl_get_length(type));
3771 }
3772
3773 assert(glsl_type_is_struct(type));
3774
3775 LLVMTypeRef member_types[glsl_get_length(type)];
3776
3777 for (unsigned i = 0; i < glsl_get_length(type); i++) {
3778 member_types[i] =
3779 glsl_to_llvm_type(ac,
3780 glsl_get_struct_field(type, i));
3781 }
3782
3783 return LLVMStructTypeInContext(ac->context, member_types,
3784 glsl_get_length(type), false);
3785 }
3786
3787 static void
3788 setup_locals(struct ac_nir_context *ctx,
3789 struct nir_function *func)
3790 {
3791 int i, j;
3792 ctx->num_locals = 0;
3793 nir_foreach_variable(variable, &func->impl->locals) {
3794 unsigned attrib_count = glsl_count_attribute_slots(variable->type, false);
3795 variable->data.driver_location = ctx->num_locals * 4;
3796 variable->data.location_frac = 0;
3797 ctx->num_locals += attrib_count;
3798 }
3799 ctx->locals = malloc(4 * ctx->num_locals * sizeof(LLVMValueRef));
3800 if (!ctx->locals)
3801 return;
3802
3803 for (i = 0; i < ctx->num_locals; i++) {
3804 for (j = 0; j < 4; j++) {
3805 ctx->locals[i * 4 + j] =
3806 ac_build_alloca_undef(&ctx->ac, ctx->ac.f32, "temp");
3807 }
3808 }
3809 }
3810
3811 static void
3812 setup_shared(struct ac_nir_context *ctx,
3813 struct nir_shader *nir)
3814 {
3815 nir_foreach_variable(variable, &nir->shared) {
3816 LLVMValueRef shared =
3817 LLVMAddGlobalInAddressSpace(
3818 ctx->ac.module, glsl_to_llvm_type(&ctx->ac, variable->type),
3819 variable->name ? variable->name : "",
3820 AC_LOCAL_ADDR_SPACE);
3821 _mesa_hash_table_insert(ctx->vars, variable, shared);
3822 }
3823 }
3824
3825 void ac_nir_translate(struct ac_llvm_context *ac, struct ac_shader_abi *abi,
3826 struct nir_shader *nir)
3827 {
3828 struct ac_nir_context ctx = {};
3829 struct nir_function *func;
3830
3831 ctx.ac = *ac;
3832 ctx.abi = abi;
3833
3834 ctx.stage = nir->info.stage;
3835
3836 ctx.main_function = LLVMGetBasicBlockParent(LLVMGetInsertBlock(ctx.ac.builder));
3837
3838 nir_foreach_variable(variable, &nir->outputs)
3839 ac_handle_shader_output_decl(&ctx.ac, ctx.abi, nir, variable,
3840 ctx.stage);
3841
3842 ctx.defs = _mesa_hash_table_create(NULL, _mesa_hash_pointer,
3843 _mesa_key_pointer_equal);
3844 ctx.phis = _mesa_hash_table_create(NULL, _mesa_hash_pointer,
3845 _mesa_key_pointer_equal);
3846 ctx.vars = _mesa_hash_table_create(NULL, _mesa_hash_pointer,
3847 _mesa_key_pointer_equal);
3848
3849 func = (struct nir_function *)exec_list_get_head(&nir->functions);
3850
3851 setup_locals(&ctx, func);
3852
3853 if (nir->info.stage == MESA_SHADER_COMPUTE)
3854 setup_shared(&ctx, nir);
3855
3856 visit_cf_list(&ctx, &func->impl->body);
3857 phi_post_pass(&ctx);
3858
3859 if (nir->info.stage != MESA_SHADER_COMPUTE)
3860 ctx.abi->emit_outputs(ctx.abi, AC_LLVM_MAX_OUTPUTS,
3861 ctx.abi->outputs);
3862
3863 free(ctx.locals);
3864 ralloc_free(ctx.defs);
3865 ralloc_free(ctx.phis);
3866 ralloc_free(ctx.vars);
3867 }
3868
3869 void
3870 ac_lower_indirect_derefs(struct nir_shader *nir, enum chip_class chip_class)
3871 {
3872 /* While it would be nice not to have this flag, we are constrained
3873 * by the reality that LLVM 5.0 doesn't have working VGPR indexing
3874 * on GFX9.
3875 */
3876 bool llvm_has_working_vgpr_indexing = chip_class <= VI;
3877
3878 /* TODO: Indirect indexing of GS inputs is unimplemented.
3879 *
3880 * TCS and TES load inputs directly from LDS or offchip memory, so
3881 * indirect indexing is trivial.
3882 */
3883 nir_variable_mode indirect_mask = 0;
3884 if (nir->info.stage == MESA_SHADER_GEOMETRY ||
3885 (nir->info.stage != MESA_SHADER_TESS_CTRL &&
3886 nir->info.stage != MESA_SHADER_TESS_EVAL &&
3887 !llvm_has_working_vgpr_indexing)) {
3888 indirect_mask |= nir_var_shader_in;
3889 }
3890 if (!llvm_has_working_vgpr_indexing &&
3891 nir->info.stage != MESA_SHADER_TESS_CTRL)
3892 indirect_mask |= nir_var_shader_out;
3893
3894 /* TODO: We shouldn't need to do this, however LLVM isn't currently
3895 * smart enough to handle indirects without causing excess spilling
3896 * causing the gpu to hang.
3897 *
3898 * See the following thread for more details of the problem:
3899 * https://lists.freedesktop.org/archives/mesa-dev/2017-July/162106.html
3900 */
3901 indirect_mask |= nir_var_local;
3902
3903 nir_lower_indirect_derefs(nir, indirect_mask);
3904 }