ac: replace old image intrinsics with new ones
[mesa.git] / src / amd / common / ac_llvm_build.c
1 /*
2 * Copyright 2014 Advanced Micro Devices, Inc.
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the
6 * "Software"), to deal in the Software without restriction, including
7 * without limitation the rights to use, copy, modify, merge, publish,
8 * distribute, sub license, and/or sell copies of the Software, and to
9 * permit persons to whom the Software is furnished to do so, subject to
10 * the following conditions:
11 *
12 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
13 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
14 * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
15 * THE COPYRIGHT HOLDERS, AUTHORS AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM,
16 * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
17 * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
18 * USE OR OTHER DEALINGS IN THE SOFTWARE.
19 *
20 * The above copyright notice and this permission notice (including the
21 * next paragraph) shall be included in all copies or substantial portions
22 * of the Software.
23 *
24 */
25 /* based on pieces from si_pipe.c and radeon_llvm_emit.c */
26 #include "ac_llvm_build.h"
27
28 #include <llvm-c/Core.h>
29
30 #include "c11/threads.h"
31
32 #include <assert.h>
33 #include <stdio.h>
34
35 #include "ac_llvm_util.h"
36
37 #include "util/bitscan.h"
38 #include "util/macros.h"
39 #include "sid.h"
40
41 /* Initialize module-independent parts of the context.
42 *
43 * The caller is responsible for initializing ctx::module and ctx::builder.
44 */
45 void
46 ac_llvm_context_init(struct ac_llvm_context *ctx, LLVMContextRef context)
47 {
48 LLVMValueRef args[1];
49
50 ctx->context = context;
51 ctx->module = NULL;
52 ctx->builder = NULL;
53
54 ctx->voidt = LLVMVoidTypeInContext(ctx->context);
55 ctx->i1 = LLVMInt1TypeInContext(ctx->context);
56 ctx->i8 = LLVMInt8TypeInContext(ctx->context);
57 ctx->i32 = LLVMIntTypeInContext(ctx->context, 32);
58 ctx->f32 = LLVMFloatTypeInContext(ctx->context);
59 ctx->v4i32 = LLVMVectorType(ctx->i32, 4);
60 ctx->v4f32 = LLVMVectorType(ctx->f32, 4);
61 ctx->v16i8 = LLVMVectorType(ctx->i8, 16);
62
63 ctx->range_md_kind = LLVMGetMDKindIDInContext(ctx->context,
64 "range", 5);
65
66 ctx->invariant_load_md_kind = LLVMGetMDKindIDInContext(ctx->context,
67 "invariant.load", 14);
68
69 ctx->fpmath_md_kind = LLVMGetMDKindIDInContext(ctx->context, "fpmath", 6);
70
71 args[0] = LLVMConstReal(ctx->f32, 2.5);
72 ctx->fpmath_md_2p5_ulp = LLVMMDNodeInContext(ctx->context, args, 1);
73
74 ctx->uniform_md_kind = LLVMGetMDKindIDInContext(ctx->context,
75 "amdgpu.uniform", 14);
76
77 ctx->empty_md = LLVMMDNodeInContext(ctx->context, NULL, 0);
78 }
79
80 LLVMValueRef
81 ac_emit_llvm_intrinsic(struct ac_llvm_context *ctx, const char *name,
82 LLVMTypeRef return_type, LLVMValueRef *params,
83 unsigned param_count, unsigned attrib_mask)
84 {
85 LLVMValueRef function, call;
86 bool set_callsite_attrs = HAVE_LLVM >= 0x0400 &&
87 !(attrib_mask & AC_FUNC_ATTR_LEGACY);
88
89 function = LLVMGetNamedFunction(ctx->module, name);
90 if (!function) {
91 LLVMTypeRef param_types[32], function_type;
92 unsigned i;
93
94 assert(param_count <= 32);
95
96 for (i = 0; i < param_count; ++i) {
97 assert(params[i]);
98 param_types[i] = LLVMTypeOf(params[i]);
99 }
100 function_type =
101 LLVMFunctionType(return_type, param_types, param_count, 0);
102 function = LLVMAddFunction(ctx->module, name, function_type);
103
104 LLVMSetFunctionCallConv(function, LLVMCCallConv);
105 LLVMSetLinkage(function, LLVMExternalLinkage);
106
107 if (!set_callsite_attrs)
108 ac_add_func_attributes(ctx->context, function, attrib_mask);
109 }
110
111 call = LLVMBuildCall(ctx->builder, function, params, param_count, "");
112 if (set_callsite_attrs)
113 ac_add_func_attributes(ctx->context, call, attrib_mask);
114 return call;
115 }
116
117 static LLVMValueRef bitcast_to_float(struct ac_llvm_context *ctx,
118 LLVMValueRef value)
119 {
120 LLVMTypeRef type = LLVMTypeOf(value);
121 LLVMTypeRef new_type;
122
123 if (LLVMGetTypeKind(type) == LLVMVectorTypeKind)
124 new_type = LLVMVectorType(ctx->f32, LLVMGetVectorSize(type));
125 else
126 new_type = ctx->f32;
127
128 return LLVMBuildBitCast(ctx->builder, value, new_type, "");
129 }
130
131 /**
132 * Given the i32 or vNi32 \p type, generate the textual name (e.g. for use with
133 * intrinsic names).
134 */
135 void ac_build_type_name_for_intr(LLVMTypeRef type, char *buf, unsigned bufsize)
136 {
137 LLVMTypeRef elem_type = type;
138
139 assert(bufsize >= 8);
140
141 if (LLVMGetTypeKind(type) == LLVMVectorTypeKind) {
142 int ret = snprintf(buf, bufsize, "v%u",
143 LLVMGetVectorSize(type));
144 if (ret < 0) {
145 char *type_name = LLVMPrintTypeToString(type);
146 fprintf(stderr, "Error building type name for: %s\n",
147 type_name);
148 return;
149 }
150 elem_type = LLVMGetElementType(type);
151 buf += ret;
152 bufsize -= ret;
153 }
154 switch (LLVMGetTypeKind(elem_type)) {
155 default: break;
156 case LLVMIntegerTypeKind:
157 snprintf(buf, bufsize, "i%d", LLVMGetIntTypeWidth(elem_type));
158 break;
159 case LLVMFloatTypeKind:
160 snprintf(buf, bufsize, "f32");
161 break;
162 case LLVMDoubleTypeKind:
163 snprintf(buf, bufsize, "f64");
164 break;
165 }
166 }
167
168 LLVMValueRef
169 ac_build_gather_values_extended(struct ac_llvm_context *ctx,
170 LLVMValueRef *values,
171 unsigned value_count,
172 unsigned value_stride,
173 bool load)
174 {
175 LLVMBuilderRef builder = ctx->builder;
176 LLVMValueRef vec = NULL;
177 unsigned i;
178
179 if (value_count == 1) {
180 if (load)
181 return LLVMBuildLoad(builder, values[0], "");
182 return values[0];
183 } else if (!value_count)
184 unreachable("value_count is 0");
185
186 for (i = 0; i < value_count; i++) {
187 LLVMValueRef value = values[i * value_stride];
188 if (load)
189 value = LLVMBuildLoad(builder, value, "");
190
191 if (!i)
192 vec = LLVMGetUndef( LLVMVectorType(LLVMTypeOf(value), value_count));
193 LLVMValueRef index = LLVMConstInt(ctx->i32, i, false);
194 vec = LLVMBuildInsertElement(builder, vec, value, index, "");
195 }
196 return vec;
197 }
198
199 LLVMValueRef
200 ac_build_gather_values(struct ac_llvm_context *ctx,
201 LLVMValueRef *values,
202 unsigned value_count)
203 {
204 return ac_build_gather_values_extended(ctx, values, value_count, 1, false);
205 }
206
207 LLVMValueRef
208 ac_emit_fdiv(struct ac_llvm_context *ctx,
209 LLVMValueRef num,
210 LLVMValueRef den)
211 {
212 LLVMValueRef ret = LLVMBuildFDiv(ctx->builder, num, den, "");
213
214 if (!LLVMIsConstant(ret))
215 LLVMSetMetadata(ret, ctx->fpmath_md_kind, ctx->fpmath_md_2p5_ulp);
216 return ret;
217 }
218
219 /* Coordinates for cube map selection. sc, tc, and ma are as in Table 8.27
220 * of the OpenGL 4.5 (Compatibility Profile) specification, except ma is
221 * already multiplied by two. id is the cube face number.
222 */
223 struct cube_selection_coords {
224 LLVMValueRef stc[2];
225 LLVMValueRef ma;
226 LLVMValueRef id;
227 };
228
229 static void
230 build_cube_intrinsic(struct ac_llvm_context *ctx,
231 LLVMValueRef in[3],
232 struct cube_selection_coords *out)
233 {
234 LLVMBuilderRef builder = ctx->builder;
235
236 if (HAVE_LLVM >= 0x0309) {
237 LLVMTypeRef f32 = ctx->f32;
238
239 out->stc[1] = ac_emit_llvm_intrinsic(ctx, "llvm.amdgcn.cubetc",
240 f32, in, 3, AC_FUNC_ATTR_READNONE);
241 out->stc[0] = ac_emit_llvm_intrinsic(ctx, "llvm.amdgcn.cubesc",
242 f32, in, 3, AC_FUNC_ATTR_READNONE);
243 out->ma = ac_emit_llvm_intrinsic(ctx, "llvm.amdgcn.cubema",
244 f32, in, 3, AC_FUNC_ATTR_READNONE);
245 out->id = ac_emit_llvm_intrinsic(ctx, "llvm.amdgcn.cubeid",
246 f32, in, 3, AC_FUNC_ATTR_READNONE);
247 } else {
248 LLVMValueRef c[4] = {
249 in[0],
250 in[1],
251 in[2],
252 LLVMGetUndef(LLVMTypeOf(in[0]))
253 };
254 LLVMValueRef vec = ac_build_gather_values(ctx, c, 4);
255
256 LLVMValueRef tmp =
257 ac_emit_llvm_intrinsic(ctx, "llvm.AMDGPU.cube",
258 LLVMTypeOf(vec), &vec, 1,
259 AC_FUNC_ATTR_READNONE);
260
261 out->stc[1] = LLVMBuildExtractElement(builder, tmp,
262 LLVMConstInt(ctx->i32, 0, 0), "");
263 out->stc[0] = LLVMBuildExtractElement(builder, tmp,
264 LLVMConstInt(ctx->i32, 1, 0), "");
265 out->ma = LLVMBuildExtractElement(builder, tmp,
266 LLVMConstInt(ctx->i32, 2, 0), "");
267 out->id = LLVMBuildExtractElement(builder, tmp,
268 LLVMConstInt(ctx->i32, 3, 0), "");
269 }
270 }
271
272 /**
273 * Build a manual selection sequence for cube face sc/tc coordinates and
274 * major axis vector (multiplied by 2 for consistency) for the given
275 * vec3 \p coords, for the face implied by \p selcoords.
276 *
277 * For the major axis, we always adjust the sign to be in the direction of
278 * selcoords.ma; i.e., a positive out_ma means that coords is pointed towards
279 * the selcoords major axis.
280 */
281 static void build_cube_select(LLVMBuilderRef builder,
282 const struct cube_selection_coords *selcoords,
283 const LLVMValueRef *coords,
284 LLVMValueRef *out_st,
285 LLVMValueRef *out_ma)
286 {
287 LLVMTypeRef f32 = LLVMTypeOf(coords[0]);
288 LLVMValueRef is_ma_positive;
289 LLVMValueRef sgn_ma;
290 LLVMValueRef is_ma_z, is_not_ma_z;
291 LLVMValueRef is_ma_y;
292 LLVMValueRef is_ma_x;
293 LLVMValueRef sgn;
294 LLVMValueRef tmp;
295
296 is_ma_positive = LLVMBuildFCmp(builder, LLVMRealUGE,
297 selcoords->ma, LLVMConstReal(f32, 0.0), "");
298 sgn_ma = LLVMBuildSelect(builder, is_ma_positive,
299 LLVMConstReal(f32, 1.0), LLVMConstReal(f32, -1.0), "");
300
301 is_ma_z = LLVMBuildFCmp(builder, LLVMRealUGE, selcoords->id, LLVMConstReal(f32, 4.0), "");
302 is_not_ma_z = LLVMBuildNot(builder, is_ma_z, "");
303 is_ma_y = LLVMBuildAnd(builder, is_not_ma_z,
304 LLVMBuildFCmp(builder, LLVMRealUGE, selcoords->id, LLVMConstReal(f32, 2.0), ""), "");
305 is_ma_x = LLVMBuildAnd(builder, is_not_ma_z, LLVMBuildNot(builder, is_ma_y, ""), "");
306
307 /* Select sc */
308 tmp = LLVMBuildSelect(builder, is_ma_z, coords[2], coords[0], "");
309 sgn = LLVMBuildSelect(builder, is_ma_y, LLVMConstReal(f32, 1.0),
310 LLVMBuildSelect(builder, is_ma_x, sgn_ma,
311 LLVMBuildFNeg(builder, sgn_ma, ""), ""), "");
312 out_st[0] = LLVMBuildFMul(builder, tmp, sgn, "");
313
314 /* Select tc */
315 tmp = LLVMBuildSelect(builder, is_ma_y, coords[2], coords[1], "");
316 sgn = LLVMBuildSelect(builder, is_ma_y, LLVMBuildFNeg(builder, sgn_ma, ""),
317 LLVMConstReal(f32, -1.0), "");
318 out_st[1] = LLVMBuildFMul(builder, tmp, sgn, "");
319
320 /* Select ma */
321 tmp = LLVMBuildSelect(builder, is_ma_z, coords[2],
322 LLVMBuildSelect(builder, is_ma_y, coords[1], coords[0], ""), "");
323 sgn = LLVMBuildSelect(builder, is_ma_positive,
324 LLVMConstReal(f32, 2.0), LLVMConstReal(f32, -2.0), "");
325 *out_ma = LLVMBuildFMul(builder, tmp, sgn, "");
326 }
327
328 void
329 ac_prepare_cube_coords(struct ac_llvm_context *ctx,
330 bool is_deriv, bool is_array,
331 LLVMValueRef *coords_arg,
332 LLVMValueRef *derivs_arg)
333 {
334
335 LLVMBuilderRef builder = ctx->builder;
336 struct cube_selection_coords selcoords;
337 LLVMValueRef coords[3];
338 LLVMValueRef invma;
339
340 build_cube_intrinsic(ctx, coords_arg, &selcoords);
341
342 invma = ac_emit_llvm_intrinsic(ctx, "llvm.fabs.f32",
343 ctx->f32, &selcoords.ma, 1, AC_FUNC_ATTR_READNONE);
344 invma = ac_emit_fdiv(ctx, LLVMConstReal(ctx->f32, 1.0), invma);
345
346 for (int i = 0; i < 2; ++i)
347 coords[i] = LLVMBuildFMul(builder, selcoords.stc[i], invma, "");
348
349 coords[2] = selcoords.id;
350
351 if (is_deriv && derivs_arg) {
352 LLVMValueRef derivs[4];
353 int axis;
354
355 /* Convert cube derivatives to 2D derivatives. */
356 for (axis = 0; axis < 2; axis++) {
357 LLVMValueRef deriv_st[2];
358 LLVMValueRef deriv_ma;
359
360 /* Transform the derivative alongside the texture
361 * coordinate. Mathematically, the correct formula is
362 * as follows. Assume we're projecting onto the +Z face
363 * and denote by dx/dh the derivative of the (original)
364 * X texture coordinate with respect to horizontal
365 * window coordinates. The projection onto the +Z face
366 * plane is:
367 *
368 * f(x,z) = x/z
369 *
370 * Then df/dh = df/dx * dx/dh + df/dz * dz/dh
371 * = 1/z * dx/dh - x/z * 1/z * dz/dh.
372 *
373 * This motivatives the implementation below.
374 *
375 * Whether this actually gives the expected results for
376 * apps that might feed in derivatives obtained via
377 * finite differences is anyone's guess. The OpenGL spec
378 * seems awfully quiet about how textureGrad for cube
379 * maps should be handled.
380 */
381 build_cube_select(builder, &selcoords, &derivs_arg[axis * 3],
382 deriv_st, &deriv_ma);
383
384 deriv_ma = LLVMBuildFMul(builder, deriv_ma, invma, "");
385
386 for (int i = 0; i < 2; ++i)
387 derivs[axis * 2 + i] =
388 LLVMBuildFSub(builder,
389 LLVMBuildFMul(builder, deriv_st[i], invma, ""),
390 LLVMBuildFMul(builder, deriv_ma, coords[i], ""), "");
391 }
392
393 memcpy(derivs_arg, derivs, sizeof(derivs));
394 }
395
396 /* Shift the texture coordinate. This must be applied after the
397 * derivative calculation.
398 */
399 for (int i = 0; i < 2; ++i)
400 coords[i] = LLVMBuildFAdd(builder, coords[i], LLVMConstReal(ctx->f32, 1.5), "");
401
402 if (is_array) {
403 /* for cube arrays coord.z = coord.w(array_index) * 8 + face */
404 /* coords_arg.w component - array_index for cube arrays */
405 LLVMValueRef tmp = LLVMBuildFMul(ctx->builder, coords_arg[3], LLVMConstReal(ctx->f32, 8.0), "");
406 coords[2] = LLVMBuildFAdd(ctx->builder, tmp, coords[2], "");
407 }
408
409 memcpy(coords_arg, coords, sizeof(coords));
410 }
411
412
413 LLVMValueRef
414 ac_build_fs_interp(struct ac_llvm_context *ctx,
415 LLVMValueRef llvm_chan,
416 LLVMValueRef attr_number,
417 LLVMValueRef params,
418 LLVMValueRef i,
419 LLVMValueRef j)
420 {
421 LLVMValueRef args[5];
422 LLVMValueRef p1;
423
424 if (HAVE_LLVM < 0x0400) {
425 LLVMValueRef ij[2];
426 ij[0] = LLVMBuildBitCast(ctx->builder, i, ctx->i32, "");
427 ij[1] = LLVMBuildBitCast(ctx->builder, j, ctx->i32, "");
428
429 args[0] = llvm_chan;
430 args[1] = attr_number;
431 args[2] = params;
432 args[3] = ac_build_gather_values(ctx, ij, 2);
433 return ac_emit_llvm_intrinsic(ctx, "llvm.SI.fs.interp",
434 ctx->f32, args, 4,
435 AC_FUNC_ATTR_READNONE);
436 }
437
438 args[0] = i;
439 args[1] = llvm_chan;
440 args[2] = attr_number;
441 args[3] = params;
442
443 p1 = ac_emit_llvm_intrinsic(ctx, "llvm.amdgcn.interp.p1",
444 ctx->f32, args, 4, AC_FUNC_ATTR_READNONE);
445
446 args[0] = p1;
447 args[1] = j;
448 args[2] = llvm_chan;
449 args[3] = attr_number;
450 args[4] = params;
451
452 return ac_emit_llvm_intrinsic(ctx, "llvm.amdgcn.interp.p2",
453 ctx->f32, args, 5, AC_FUNC_ATTR_READNONE);
454 }
455
456 LLVMValueRef
457 ac_build_fs_interp_mov(struct ac_llvm_context *ctx,
458 LLVMValueRef parameter,
459 LLVMValueRef llvm_chan,
460 LLVMValueRef attr_number,
461 LLVMValueRef params)
462 {
463 LLVMValueRef args[4];
464 if (HAVE_LLVM < 0x0400) {
465 args[0] = llvm_chan;
466 args[1] = attr_number;
467 args[2] = params;
468
469 return ac_emit_llvm_intrinsic(ctx,
470 "llvm.SI.fs.constant",
471 ctx->f32, args, 3,
472 AC_FUNC_ATTR_READNONE);
473 }
474
475 args[0] = parameter;
476 args[1] = llvm_chan;
477 args[2] = attr_number;
478 args[3] = params;
479
480 return ac_emit_llvm_intrinsic(ctx, "llvm.amdgcn.interp.mov",
481 ctx->f32, args, 4, AC_FUNC_ATTR_READNONE);
482 }
483
484 LLVMValueRef
485 ac_build_gep0(struct ac_llvm_context *ctx,
486 LLVMValueRef base_ptr,
487 LLVMValueRef index)
488 {
489 LLVMValueRef indices[2] = {
490 LLVMConstInt(ctx->i32, 0, 0),
491 index,
492 };
493 return LLVMBuildGEP(ctx->builder, base_ptr,
494 indices, 2, "");
495 }
496
497 void
498 ac_build_indexed_store(struct ac_llvm_context *ctx,
499 LLVMValueRef base_ptr, LLVMValueRef index,
500 LLVMValueRef value)
501 {
502 LLVMBuildStore(ctx->builder, value,
503 ac_build_gep0(ctx, base_ptr, index));
504 }
505
506 /**
507 * Build an LLVM bytecode indexed load using LLVMBuildGEP + LLVMBuildLoad.
508 * It's equivalent to doing a load from &base_ptr[index].
509 *
510 * \param base_ptr Where the array starts.
511 * \param index The element index into the array.
512 * \param uniform Whether the base_ptr and index can be assumed to be
513 * dynamically uniform
514 */
515 LLVMValueRef
516 ac_build_indexed_load(struct ac_llvm_context *ctx,
517 LLVMValueRef base_ptr, LLVMValueRef index,
518 bool uniform)
519 {
520 LLVMValueRef pointer;
521
522 pointer = ac_build_gep0(ctx, base_ptr, index);
523 if (uniform)
524 LLVMSetMetadata(pointer, ctx->uniform_md_kind, ctx->empty_md);
525 return LLVMBuildLoad(ctx->builder, pointer, "");
526 }
527
528 /**
529 * Do a load from &base_ptr[index], but also add a flag that it's loading
530 * a constant from a dynamically uniform index.
531 */
532 LLVMValueRef
533 ac_build_indexed_load_const(struct ac_llvm_context *ctx,
534 LLVMValueRef base_ptr, LLVMValueRef index)
535 {
536 LLVMValueRef result = ac_build_indexed_load(ctx, base_ptr, index, true);
537 LLVMSetMetadata(result, ctx->invariant_load_md_kind, ctx->empty_md);
538 return result;
539 }
540
541 /* TBUFFER_STORE_FORMAT_{X,XY,XYZ,XYZW} <- the suffix is selected by num_channels=1..4.
542 * The type of vdata must be one of i32 (num_channels=1), v2i32 (num_channels=2),
543 * or v4i32 (num_channels=3,4).
544 */
545 void
546 ac_build_tbuffer_store(struct ac_llvm_context *ctx,
547 LLVMValueRef rsrc,
548 LLVMValueRef vdata,
549 unsigned num_channels,
550 LLVMValueRef vaddr,
551 LLVMValueRef soffset,
552 unsigned inst_offset,
553 unsigned dfmt,
554 unsigned nfmt,
555 unsigned offen,
556 unsigned idxen,
557 unsigned glc,
558 unsigned slc,
559 unsigned tfe)
560 {
561 LLVMValueRef args[] = {
562 rsrc,
563 vdata,
564 LLVMConstInt(ctx->i32, num_channels, 0),
565 vaddr,
566 soffset,
567 LLVMConstInt(ctx->i32, inst_offset, 0),
568 LLVMConstInt(ctx->i32, dfmt, 0),
569 LLVMConstInt(ctx->i32, nfmt, 0),
570 LLVMConstInt(ctx->i32, offen, 0),
571 LLVMConstInt(ctx->i32, idxen, 0),
572 LLVMConstInt(ctx->i32, glc, 0),
573 LLVMConstInt(ctx->i32, slc, 0),
574 LLVMConstInt(ctx->i32, tfe, 0)
575 };
576
577 /* The instruction offset field has 12 bits */
578 assert(offen || inst_offset < (1 << 12));
579
580 /* The intrinsic is overloaded, we need to add a type suffix for overloading to work. */
581 unsigned func = CLAMP(num_channels, 1, 3) - 1;
582 const char *types[] = {"i32", "v2i32", "v4i32"};
583 char name[256];
584 snprintf(name, sizeof(name), "llvm.SI.tbuffer.store.%s", types[func]);
585
586 ac_emit_llvm_intrinsic(ctx, name, ctx->voidt,
587 args, ARRAY_SIZE(args),
588 AC_FUNC_ATTR_LEGACY);
589 }
590
591 void
592 ac_build_tbuffer_store_dwords(struct ac_llvm_context *ctx,
593 LLVMValueRef rsrc,
594 LLVMValueRef vdata,
595 unsigned num_channels,
596 LLVMValueRef vaddr,
597 LLVMValueRef soffset,
598 unsigned inst_offset)
599 {
600 static unsigned dfmt[] = {
601 V_008F0C_BUF_DATA_FORMAT_32,
602 V_008F0C_BUF_DATA_FORMAT_32_32,
603 V_008F0C_BUF_DATA_FORMAT_32_32_32,
604 V_008F0C_BUF_DATA_FORMAT_32_32_32_32
605 };
606 assert(num_channels >= 1 && num_channels <= 4);
607
608 ac_build_tbuffer_store(ctx, rsrc, vdata, num_channels, vaddr, soffset,
609 inst_offset, dfmt[num_channels - 1],
610 V_008F0C_BUF_NUM_FORMAT_UINT, 1, 0, 1, 1, 0);
611 }
612
613 LLVMValueRef
614 ac_build_buffer_load(struct ac_llvm_context *ctx,
615 LLVMValueRef rsrc,
616 int num_channels,
617 LLVMValueRef vindex,
618 LLVMValueRef voffset,
619 LLVMValueRef soffset,
620 unsigned inst_offset,
621 unsigned glc,
622 unsigned slc)
623 {
624 unsigned func = CLAMP(num_channels, 1, 3) - 1;
625
626 if (HAVE_LLVM >= 0x309) {
627 LLVMValueRef args[] = {
628 LLVMBuildBitCast(ctx->builder, rsrc, ctx->v4i32, ""),
629 vindex ? vindex : LLVMConstInt(ctx->i32, 0, 0),
630 LLVMConstInt(ctx->i32, inst_offset, 0),
631 LLVMConstInt(ctx->i1, glc, 0),
632 LLVMConstInt(ctx->i1, slc, 0)
633 };
634
635 LLVMTypeRef types[] = {ctx->f32, LLVMVectorType(ctx->f32, 2),
636 ctx->v4f32};
637 const char *type_names[] = {"f32", "v2f32", "v4f32"};
638 char name[256];
639
640 if (voffset) {
641 args[2] = LLVMBuildAdd(ctx->builder, args[2], voffset,
642 "");
643 }
644
645 if (soffset) {
646 args[2] = LLVMBuildAdd(ctx->builder, args[2], soffset,
647 "");
648 }
649
650 snprintf(name, sizeof(name), "llvm.amdgcn.buffer.load.%s",
651 type_names[func]);
652
653 return ac_emit_llvm_intrinsic(ctx, name, types[func], args,
654 ARRAY_SIZE(args), AC_FUNC_ATTR_READONLY);
655 } else {
656 LLVMValueRef args[] = {
657 LLVMBuildBitCast(ctx->builder, rsrc, ctx->v16i8, ""),
658 voffset ? voffset : vindex,
659 soffset,
660 LLVMConstInt(ctx->i32, inst_offset, 0),
661 LLVMConstInt(ctx->i32, voffset ? 1 : 0, 0), // offen
662 LLVMConstInt(ctx->i32, vindex ? 1 : 0, 0), //idxen
663 LLVMConstInt(ctx->i32, glc, 0),
664 LLVMConstInt(ctx->i32, slc, 0),
665 LLVMConstInt(ctx->i32, 0, 0), // TFE
666 };
667
668 LLVMTypeRef types[] = {ctx->i32, LLVMVectorType(ctx->i32, 2),
669 ctx->v4i32};
670 const char *type_names[] = {"i32", "v2i32", "v4i32"};
671 const char *arg_type = "i32";
672 char name[256];
673
674 if (voffset && vindex) {
675 LLVMValueRef vaddr[] = {vindex, voffset};
676
677 arg_type = "v2i32";
678 args[1] = ac_build_gather_values(ctx, vaddr, 2);
679 }
680
681 snprintf(name, sizeof(name), "llvm.SI.buffer.load.dword.%s.%s",
682 type_names[func], arg_type);
683
684 return ac_emit_llvm_intrinsic(ctx, name, types[func], args,
685 ARRAY_SIZE(args), AC_FUNC_ATTR_READONLY);
686 }
687 }
688
689 /**
690 * Set range metadata on an instruction. This can only be used on load and
691 * call instructions. If you know an instruction can only produce the values
692 * 0, 1, 2, you would do set_range_metadata(value, 0, 3);
693 * \p lo is the minimum value inclusive.
694 * \p hi is the maximum value exclusive.
695 */
696 static void set_range_metadata(struct ac_llvm_context *ctx,
697 LLVMValueRef value, unsigned lo, unsigned hi)
698 {
699 LLVMValueRef range_md, md_args[2];
700 LLVMTypeRef type = LLVMTypeOf(value);
701 LLVMContextRef context = LLVMGetTypeContext(type);
702
703 md_args[0] = LLVMConstInt(type, lo, false);
704 md_args[1] = LLVMConstInt(type, hi, false);
705 range_md = LLVMMDNodeInContext(context, md_args, 2);
706 LLVMSetMetadata(value, ctx->range_md_kind, range_md);
707 }
708
709 LLVMValueRef
710 ac_get_thread_id(struct ac_llvm_context *ctx)
711 {
712 LLVMValueRef tid;
713
714 if (HAVE_LLVM < 0x0308) {
715 tid = ac_emit_llvm_intrinsic(ctx, "llvm.SI.tid",
716 ctx->i32,
717 NULL, 0, AC_FUNC_ATTR_READNONE);
718 } else {
719 LLVMValueRef tid_args[2];
720 tid_args[0] = LLVMConstInt(ctx->i32, 0xffffffff, false);
721 tid_args[1] = LLVMConstInt(ctx->i32, 0, false);
722 tid_args[1] = ac_emit_llvm_intrinsic(ctx,
723 "llvm.amdgcn.mbcnt.lo", ctx->i32,
724 tid_args, 2, AC_FUNC_ATTR_READNONE);
725
726 tid = ac_emit_llvm_intrinsic(ctx, "llvm.amdgcn.mbcnt.hi",
727 ctx->i32, tid_args,
728 2, AC_FUNC_ATTR_READNONE);
729 }
730 set_range_metadata(ctx, tid, 0, 64);
731 return tid;
732 }
733
734 /*
735 * SI implements derivatives using the local data store (LDS)
736 * All writes to the LDS happen in all executing threads at
737 * the same time. TID is the Thread ID for the current
738 * thread and is a value between 0 and 63, representing
739 * the thread's position in the wavefront.
740 *
741 * For the pixel shader threads are grouped into quads of four pixels.
742 * The TIDs of the pixels of a quad are:
743 *
744 * +------+------+
745 * |4n + 0|4n + 1|
746 * +------+------+
747 * |4n + 2|4n + 3|
748 * +------+------+
749 *
750 * So, masking the TID with 0xfffffffc yields the TID of the top left pixel
751 * of the quad, masking with 0xfffffffd yields the TID of the top pixel of
752 * the current pixel's column, and masking with 0xfffffffe yields the TID
753 * of the left pixel of the current pixel's row.
754 *
755 * Adding 1 yields the TID of the pixel to the right of the left pixel, and
756 * adding 2 yields the TID of the pixel below the top pixel.
757 */
758 LLVMValueRef
759 ac_emit_ddxy(struct ac_llvm_context *ctx,
760 bool has_ds_bpermute,
761 uint32_t mask,
762 int idx,
763 LLVMValueRef lds,
764 LLVMValueRef val)
765 {
766 LLVMValueRef thread_id, tl, trbl, tl_tid, trbl_tid, args[2];
767 LLVMValueRef result;
768
769 thread_id = ac_get_thread_id(ctx);
770
771 tl_tid = LLVMBuildAnd(ctx->builder, thread_id,
772 LLVMConstInt(ctx->i32, mask, false), "");
773
774 trbl_tid = LLVMBuildAdd(ctx->builder, tl_tid,
775 LLVMConstInt(ctx->i32, idx, false), "");
776
777 if (has_ds_bpermute) {
778 args[0] = LLVMBuildMul(ctx->builder, tl_tid,
779 LLVMConstInt(ctx->i32, 4, false), "");
780 args[1] = val;
781 tl = ac_emit_llvm_intrinsic(ctx,
782 "llvm.amdgcn.ds.bpermute", ctx->i32,
783 args, 2, AC_FUNC_ATTR_READNONE);
784
785 args[0] = LLVMBuildMul(ctx->builder, trbl_tid,
786 LLVMConstInt(ctx->i32, 4, false), "");
787 trbl = ac_emit_llvm_intrinsic(ctx,
788 "llvm.amdgcn.ds.bpermute", ctx->i32,
789 args, 2, AC_FUNC_ATTR_READNONE);
790 } else {
791 LLVMValueRef store_ptr, load_ptr0, load_ptr1;
792
793 store_ptr = ac_build_gep0(ctx, lds, thread_id);
794 load_ptr0 = ac_build_gep0(ctx, lds, tl_tid);
795 load_ptr1 = ac_build_gep0(ctx, lds, trbl_tid);
796
797 LLVMBuildStore(ctx->builder, val, store_ptr);
798 tl = LLVMBuildLoad(ctx->builder, load_ptr0, "");
799 trbl = LLVMBuildLoad(ctx->builder, load_ptr1, "");
800 }
801
802 tl = LLVMBuildBitCast(ctx->builder, tl, ctx->f32, "");
803 trbl = LLVMBuildBitCast(ctx->builder, trbl, ctx->f32, "");
804 result = LLVMBuildFSub(ctx->builder, trbl, tl, "");
805 return result;
806 }
807
808 void
809 ac_emit_sendmsg(struct ac_llvm_context *ctx,
810 uint32_t msg,
811 LLVMValueRef wave_id)
812 {
813 LLVMValueRef args[2];
814 const char *intr_name = (HAVE_LLVM < 0x0400) ? "llvm.SI.sendmsg" : "llvm.amdgcn.s.sendmsg";
815 args[0] = LLVMConstInt(ctx->i32, msg, false);
816 args[1] = wave_id;
817 ac_emit_llvm_intrinsic(ctx, intr_name, ctx->voidt,
818 args, 2, 0);
819 }
820
821 LLVMValueRef
822 ac_emit_imsb(struct ac_llvm_context *ctx,
823 LLVMValueRef arg,
824 LLVMTypeRef dst_type)
825 {
826 const char *intr_name = (HAVE_LLVM < 0x0400) ? "llvm.AMDGPU.flbit.i32" :
827 "llvm.amdgcn.sffbh.i32";
828 LLVMValueRef msb = ac_emit_llvm_intrinsic(ctx, intr_name,
829 dst_type, &arg, 1,
830 AC_FUNC_ATTR_READNONE);
831
832 /* The HW returns the last bit index from MSB, but NIR/TGSI wants
833 * the index from LSB. Invert it by doing "31 - msb". */
834 msb = LLVMBuildSub(ctx->builder, LLVMConstInt(ctx->i32, 31, false),
835 msb, "");
836
837 LLVMValueRef all_ones = LLVMConstInt(ctx->i32, -1, true);
838 LLVMValueRef cond = LLVMBuildOr(ctx->builder,
839 LLVMBuildICmp(ctx->builder, LLVMIntEQ,
840 arg, LLVMConstInt(ctx->i32, 0, 0), ""),
841 LLVMBuildICmp(ctx->builder, LLVMIntEQ,
842 arg, all_ones, ""), "");
843
844 return LLVMBuildSelect(ctx->builder, cond, all_ones, msb, "");
845 }
846
847 LLVMValueRef
848 ac_emit_umsb(struct ac_llvm_context *ctx,
849 LLVMValueRef arg,
850 LLVMTypeRef dst_type)
851 {
852 LLVMValueRef args[2] = {
853 arg,
854 LLVMConstInt(ctx->i1, 1, 0),
855 };
856 LLVMValueRef msb = ac_emit_llvm_intrinsic(ctx, "llvm.ctlz.i32",
857 dst_type, args, ARRAY_SIZE(args),
858 AC_FUNC_ATTR_READNONE);
859
860 /* The HW returns the last bit index from MSB, but TGSI/NIR wants
861 * the index from LSB. Invert it by doing "31 - msb". */
862 msb = LLVMBuildSub(ctx->builder, LLVMConstInt(ctx->i32, 31, false),
863 msb, "");
864
865 /* check for zero */
866 return LLVMBuildSelect(ctx->builder,
867 LLVMBuildICmp(ctx->builder, LLVMIntEQ, arg,
868 LLVMConstInt(ctx->i32, 0, 0), ""),
869 LLVMConstInt(ctx->i32, -1, true), msb, "");
870 }
871
872 LLVMValueRef ac_emit_clamp(struct ac_llvm_context *ctx, LLVMValueRef value)
873 {
874 if (HAVE_LLVM >= 0x0500) {
875 LLVMValueRef max[2] = {
876 value,
877 LLVMConstReal(ctx->f32, 0),
878 };
879 LLVMValueRef min[2] = {
880 LLVMConstReal(ctx->f32, 1),
881 };
882
883 min[1] = ac_emit_llvm_intrinsic(ctx, "llvm.maxnum.f32",
884 ctx->f32, max, 2,
885 AC_FUNC_ATTR_READNONE);
886 return ac_emit_llvm_intrinsic(ctx, "llvm.minnum.f32",
887 ctx->f32, min, 2,
888 AC_FUNC_ATTR_READNONE);
889 }
890
891 const char *intr = HAVE_LLVM >= 0x0308 ? "llvm.AMDGPU.clamp." :
892 "llvm.AMDIL.clamp.";
893 LLVMValueRef args[3] = {
894 value,
895 LLVMConstReal(ctx->f32, 0),
896 LLVMConstReal(ctx->f32, 1),
897 };
898
899 return ac_emit_llvm_intrinsic(ctx, intr, ctx->f32, args, 3,
900 AC_FUNC_ATTR_READNONE |
901 AC_FUNC_ATTR_LEGACY);
902 }
903
904 void ac_emit_export(struct ac_llvm_context *ctx, struct ac_export_args *a)
905 {
906 LLVMValueRef args[9];
907
908 if (HAVE_LLVM >= 0x0500) {
909 args[0] = LLVMConstInt(ctx->i32, a->target, 0);
910 args[1] = LLVMConstInt(ctx->i32, a->enabled_channels, 0);
911
912 if (a->compr) {
913 LLVMTypeRef i16 = LLVMInt16TypeInContext(ctx->context);
914 LLVMTypeRef v2i16 = LLVMVectorType(i16, 2);
915
916 args[2] = LLVMBuildBitCast(ctx->builder, a->out[0],
917 v2i16, "");
918 args[3] = LLVMBuildBitCast(ctx->builder, a->out[1],
919 v2i16, "");
920 args[4] = LLVMConstInt(ctx->i1, a->done, 0);
921 args[5] = LLVMConstInt(ctx->i1, a->valid_mask, 0);
922
923 ac_emit_llvm_intrinsic(ctx, "llvm.amdgcn.exp.compr.v2i16",
924 ctx->voidt, args, 6, 0);
925 } else {
926 args[2] = a->out[0];
927 args[3] = a->out[1];
928 args[4] = a->out[2];
929 args[5] = a->out[3];
930 args[6] = LLVMConstInt(ctx->i1, a->done, 0);
931 args[7] = LLVMConstInt(ctx->i1, a->valid_mask, 0);
932
933 ac_emit_llvm_intrinsic(ctx, "llvm.amdgcn.exp.f32",
934 ctx->voidt, args, 8, 0);
935 }
936 return;
937 }
938
939 args[0] = LLVMConstInt(ctx->i32, a->enabled_channels, 0);
940 args[1] = LLVMConstInt(ctx->i32, a->valid_mask, 0);
941 args[2] = LLVMConstInt(ctx->i32, a->done, 0);
942 args[3] = LLVMConstInt(ctx->i32, a->target, 0);
943 args[4] = LLVMConstInt(ctx->i32, a->compr, 0);
944 memcpy(args + 5, a->out, sizeof(a->out[0]) * 4);
945
946 ac_emit_llvm_intrinsic(ctx, "llvm.SI.export", ctx->voidt, args, 9,
947 AC_FUNC_ATTR_LEGACY);
948 }
949
950 LLVMValueRef ac_emit_image_opcode(struct ac_llvm_context *ctx,
951 struct ac_image_args *a)
952 {
953 LLVMTypeRef dst_type;
954 LLVMValueRef args[11];
955 unsigned num_args = 0;
956 const char *name;
957 char intr_name[128], type[64];
958
959 if (HAVE_LLVM >= 0x0400) {
960 bool sample = a->opcode == ac_image_sample ||
961 a->opcode == ac_image_gather4 ||
962 a->opcode == ac_image_get_lod;
963
964 if (sample)
965 args[num_args++] = bitcast_to_float(ctx, a->addr);
966 else
967 args[num_args++] = a->addr;
968
969 args[num_args++] = a->resource;
970 if (sample)
971 args[num_args++] = a->sampler;
972 args[num_args++] = LLVMConstInt(ctx->i32, a->dmask, 0);
973 if (sample)
974 args[num_args++] = LLVMConstInt(ctx->i1, a->unorm, 0);
975 args[num_args++] = LLVMConstInt(ctx->i1, 0, 0); /* glc */
976 args[num_args++] = LLVMConstInt(ctx->i1, 0, 0); /* slc */
977 args[num_args++] = LLVMConstInt(ctx->i1, 0, 0); /* lwe */
978 args[num_args++] = LLVMConstInt(ctx->i1, a->da, 0);
979
980 switch (a->opcode) {
981 case ac_image_sample:
982 name = "llvm.amdgcn.image.sample";
983 break;
984 case ac_image_gather4:
985 name = "llvm.amdgcn.image.gather4";
986 break;
987 case ac_image_load:
988 name = "llvm.amdgcn.image.load";
989 break;
990 case ac_image_load_mip:
991 name = "llvm.amdgcn.image.load.mip";
992 break;
993 case ac_image_get_lod:
994 name = "llvm.amdgcn.image.getlod";
995 break;
996 case ac_image_get_resinfo:
997 name = "llvm.amdgcn.image.getresinfo";
998 break;
999 }
1000
1001 ac_build_type_name_for_intr(LLVMTypeOf(args[0]), type,
1002 sizeof(type));
1003
1004 snprintf(intr_name, sizeof(intr_name), "%s%s%s%s.v4f32.%s.v8i32",
1005 name,
1006 a->compare ? ".c" : "",
1007 a->bias ? ".b" :
1008 a->lod ? ".l" :
1009 a->deriv ? ".d" :
1010 a->level_zero ? ".lz" : "",
1011 a->offset ? ".o" : "",
1012 type);
1013
1014 LLVMValueRef result =
1015 ac_emit_llvm_intrinsic(ctx, intr_name,
1016 ctx->v4f32, args, num_args,
1017 AC_FUNC_ATTR_READNONE);
1018 if (!sample) {
1019 result = LLVMBuildBitCast(ctx->builder, result,
1020 ctx->v4i32, "");
1021 }
1022 return result;
1023 }
1024
1025 args[num_args++] = a->addr;
1026 args[num_args++] = a->resource;
1027
1028 if (a->opcode == ac_image_load ||
1029 a->opcode == ac_image_load_mip ||
1030 a->opcode == ac_image_get_resinfo) {
1031 dst_type = ctx->v4i32;
1032 } else {
1033 dst_type = ctx->v4f32;
1034 args[num_args++] = a->sampler;
1035 }
1036
1037 args[num_args++] = LLVMConstInt(ctx->i32, a->dmask, 0);
1038 args[num_args++] = LLVMConstInt(ctx->i32, a->unorm, 0);
1039 args[num_args++] = LLVMConstInt(ctx->i32, 0, 0); /* r128 */
1040 args[num_args++] = LLVMConstInt(ctx->i32, a->da, 0);
1041 args[num_args++] = LLVMConstInt(ctx->i32, 0, 0); /* glc */
1042 args[num_args++] = LLVMConstInt(ctx->i32, 0, 0); /* slc */
1043 args[num_args++] = LLVMConstInt(ctx->i32, 0, 0); /* tfe */
1044 args[num_args++] = LLVMConstInt(ctx->i32, 0, 0); /* lwe */
1045
1046 switch (a->opcode) {
1047 case ac_image_sample:
1048 name = "llvm.SI.image.sample";
1049 break;
1050 case ac_image_gather4:
1051 name = "llvm.SI.gather4";
1052 break;
1053 case ac_image_load:
1054 name = "llvm.SI.image.load";
1055 break;
1056 case ac_image_load_mip:
1057 name = "llvm.SI.image.load.mip";
1058 break;
1059 case ac_image_get_lod:
1060 name = "llvm.SI.getlod";
1061 break;
1062 case ac_image_get_resinfo:
1063 name = "llvm.SI.getresinfo";
1064 break;
1065 }
1066
1067 ac_build_type_name_for_intr(LLVMTypeOf(a->addr), type, sizeof(type));
1068 snprintf(intr_name, sizeof(intr_name), "%s%s%s%s.%s",
1069 name,
1070 a->compare ? ".c" : "",
1071 a->bias ? ".b" :
1072 a->lod ? ".l" :
1073 a->deriv ? ".d" :
1074 a->level_zero ? ".lz" : "",
1075 a->offset ? ".o" : "",
1076 type);
1077
1078 return ac_emit_llvm_intrinsic(ctx, intr_name,
1079 dst_type, args, num_args,
1080 AC_FUNC_ATTR_READNONE |
1081 AC_FUNC_ATTR_LEGACY);
1082 }