radeonsi: set readnone on reads from read-only memory
[mesa.git] / src / amd / common / ac_llvm_build.c
1 /*
2 * Copyright 2014 Advanced Micro Devices, Inc.
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the
6 * "Software"), to deal in the Software without restriction, including
7 * without limitation the rights to use, copy, modify, merge, publish,
8 * distribute, sub license, and/or sell copies of the Software, and to
9 * permit persons to whom the Software is furnished to do so, subject to
10 * the following conditions:
11 *
12 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
13 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
14 * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
15 * THE COPYRIGHT HOLDERS, AUTHORS AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM,
16 * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
17 * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
18 * USE OR OTHER DEALINGS IN THE SOFTWARE.
19 *
20 * The above copyright notice and this permission notice (including the
21 * next paragraph) shall be included in all copies or substantial portions
22 * of the Software.
23 *
24 */
25 /* based on pieces from si_pipe.c and radeon_llvm_emit.c */
26 #include "ac_llvm_build.h"
27
28 #include <llvm-c/Core.h>
29
30 #include "c11/threads.h"
31
32 #include <assert.h>
33 #include <stdio.h>
34
35 #include "ac_llvm_util.h"
36
37 #include "util/bitscan.h"
38 #include "util/macros.h"
39 #include "sid.h"
40
41 /* Initialize module-independent parts of the context.
42 *
43 * The caller is responsible for initializing ctx::module and ctx::builder.
44 */
45 void
46 ac_llvm_context_init(struct ac_llvm_context *ctx, LLVMContextRef context)
47 {
48 LLVMValueRef args[1];
49
50 ctx->context = context;
51 ctx->module = NULL;
52 ctx->builder = NULL;
53
54 ctx->voidt = LLVMVoidTypeInContext(ctx->context);
55 ctx->i1 = LLVMInt1TypeInContext(ctx->context);
56 ctx->i8 = LLVMInt8TypeInContext(ctx->context);
57 ctx->i32 = LLVMIntTypeInContext(ctx->context, 32);
58 ctx->f32 = LLVMFloatTypeInContext(ctx->context);
59 ctx->v4i32 = LLVMVectorType(ctx->i32, 4);
60 ctx->v4f32 = LLVMVectorType(ctx->f32, 4);
61 ctx->v16i8 = LLVMVectorType(ctx->i8, 16);
62
63 ctx->range_md_kind = LLVMGetMDKindIDInContext(ctx->context,
64 "range", 5);
65
66 ctx->invariant_load_md_kind = LLVMGetMDKindIDInContext(ctx->context,
67 "invariant.load", 14);
68
69 ctx->fpmath_md_kind = LLVMGetMDKindIDInContext(ctx->context, "fpmath", 6);
70
71 args[0] = LLVMConstReal(ctx->f32, 2.5);
72 ctx->fpmath_md_2p5_ulp = LLVMMDNodeInContext(ctx->context, args, 1);
73
74 ctx->uniform_md_kind = LLVMGetMDKindIDInContext(ctx->context,
75 "amdgpu.uniform", 14);
76
77 ctx->empty_md = LLVMMDNodeInContext(ctx->context, NULL, 0);
78 }
79
80 LLVMValueRef
81 ac_emit_llvm_intrinsic(struct ac_llvm_context *ctx, const char *name,
82 LLVMTypeRef return_type, LLVMValueRef *params,
83 unsigned param_count, unsigned attrib_mask)
84 {
85 LLVMValueRef function, call;
86 bool set_callsite_attrs = HAVE_LLVM >= 0x0400 &&
87 !(attrib_mask & AC_FUNC_ATTR_LEGACY);
88
89 function = LLVMGetNamedFunction(ctx->module, name);
90 if (!function) {
91 LLVMTypeRef param_types[32], function_type;
92 unsigned i;
93
94 assert(param_count <= 32);
95
96 for (i = 0; i < param_count; ++i) {
97 assert(params[i]);
98 param_types[i] = LLVMTypeOf(params[i]);
99 }
100 function_type =
101 LLVMFunctionType(return_type, param_types, param_count, 0);
102 function = LLVMAddFunction(ctx->module, name, function_type);
103
104 LLVMSetFunctionCallConv(function, LLVMCCallConv);
105 LLVMSetLinkage(function, LLVMExternalLinkage);
106
107 if (!set_callsite_attrs)
108 ac_add_func_attributes(ctx->context, function, attrib_mask);
109 }
110
111 call = LLVMBuildCall(ctx->builder, function, params, param_count, "");
112 if (set_callsite_attrs)
113 ac_add_func_attributes(ctx->context, call, attrib_mask);
114 return call;
115 }
116
117 static LLVMValueRef bitcast_to_float(struct ac_llvm_context *ctx,
118 LLVMValueRef value)
119 {
120 LLVMTypeRef type = LLVMTypeOf(value);
121 LLVMTypeRef new_type;
122
123 if (LLVMGetTypeKind(type) == LLVMVectorTypeKind)
124 new_type = LLVMVectorType(ctx->f32, LLVMGetVectorSize(type));
125 else
126 new_type = ctx->f32;
127
128 return LLVMBuildBitCast(ctx->builder, value, new_type, "");
129 }
130
131 /**
132 * Given the i32 or vNi32 \p type, generate the textual name (e.g. for use with
133 * intrinsic names).
134 */
135 void ac_build_type_name_for_intr(LLVMTypeRef type, char *buf, unsigned bufsize)
136 {
137 LLVMTypeRef elem_type = type;
138
139 assert(bufsize >= 8);
140
141 if (LLVMGetTypeKind(type) == LLVMVectorTypeKind) {
142 int ret = snprintf(buf, bufsize, "v%u",
143 LLVMGetVectorSize(type));
144 if (ret < 0) {
145 char *type_name = LLVMPrintTypeToString(type);
146 fprintf(stderr, "Error building type name for: %s\n",
147 type_name);
148 return;
149 }
150 elem_type = LLVMGetElementType(type);
151 buf += ret;
152 bufsize -= ret;
153 }
154 switch (LLVMGetTypeKind(elem_type)) {
155 default: break;
156 case LLVMIntegerTypeKind:
157 snprintf(buf, bufsize, "i%d", LLVMGetIntTypeWidth(elem_type));
158 break;
159 case LLVMFloatTypeKind:
160 snprintf(buf, bufsize, "f32");
161 break;
162 case LLVMDoubleTypeKind:
163 snprintf(buf, bufsize, "f64");
164 break;
165 }
166 }
167
168 LLVMValueRef
169 ac_build_gather_values_extended(struct ac_llvm_context *ctx,
170 LLVMValueRef *values,
171 unsigned value_count,
172 unsigned value_stride,
173 bool load)
174 {
175 LLVMBuilderRef builder = ctx->builder;
176 LLVMValueRef vec = NULL;
177 unsigned i;
178
179 if (value_count == 1) {
180 if (load)
181 return LLVMBuildLoad(builder, values[0], "");
182 return values[0];
183 } else if (!value_count)
184 unreachable("value_count is 0");
185
186 for (i = 0; i < value_count; i++) {
187 LLVMValueRef value = values[i * value_stride];
188 if (load)
189 value = LLVMBuildLoad(builder, value, "");
190
191 if (!i)
192 vec = LLVMGetUndef( LLVMVectorType(LLVMTypeOf(value), value_count));
193 LLVMValueRef index = LLVMConstInt(ctx->i32, i, false);
194 vec = LLVMBuildInsertElement(builder, vec, value, index, "");
195 }
196 return vec;
197 }
198
199 LLVMValueRef
200 ac_build_gather_values(struct ac_llvm_context *ctx,
201 LLVMValueRef *values,
202 unsigned value_count)
203 {
204 return ac_build_gather_values_extended(ctx, values, value_count, 1, false);
205 }
206
207 LLVMValueRef
208 ac_emit_fdiv(struct ac_llvm_context *ctx,
209 LLVMValueRef num,
210 LLVMValueRef den)
211 {
212 LLVMValueRef ret = LLVMBuildFDiv(ctx->builder, num, den, "");
213
214 if (!LLVMIsConstant(ret))
215 LLVMSetMetadata(ret, ctx->fpmath_md_kind, ctx->fpmath_md_2p5_ulp);
216 return ret;
217 }
218
219 /* Coordinates for cube map selection. sc, tc, and ma are as in Table 8.27
220 * of the OpenGL 4.5 (Compatibility Profile) specification, except ma is
221 * already multiplied by two. id is the cube face number.
222 */
223 struct cube_selection_coords {
224 LLVMValueRef stc[2];
225 LLVMValueRef ma;
226 LLVMValueRef id;
227 };
228
229 static void
230 build_cube_intrinsic(struct ac_llvm_context *ctx,
231 LLVMValueRef in[3],
232 struct cube_selection_coords *out)
233 {
234 LLVMBuilderRef builder = ctx->builder;
235
236 if (HAVE_LLVM >= 0x0309) {
237 LLVMTypeRef f32 = ctx->f32;
238
239 out->stc[1] = ac_emit_llvm_intrinsic(ctx, "llvm.amdgcn.cubetc",
240 f32, in, 3, AC_FUNC_ATTR_READNONE);
241 out->stc[0] = ac_emit_llvm_intrinsic(ctx, "llvm.amdgcn.cubesc",
242 f32, in, 3, AC_FUNC_ATTR_READNONE);
243 out->ma = ac_emit_llvm_intrinsic(ctx, "llvm.amdgcn.cubema",
244 f32, in, 3, AC_FUNC_ATTR_READNONE);
245 out->id = ac_emit_llvm_intrinsic(ctx, "llvm.amdgcn.cubeid",
246 f32, in, 3, AC_FUNC_ATTR_READNONE);
247 } else {
248 LLVMValueRef c[4] = {
249 in[0],
250 in[1],
251 in[2],
252 LLVMGetUndef(LLVMTypeOf(in[0]))
253 };
254 LLVMValueRef vec = ac_build_gather_values(ctx, c, 4);
255
256 LLVMValueRef tmp =
257 ac_emit_llvm_intrinsic(ctx, "llvm.AMDGPU.cube",
258 LLVMTypeOf(vec), &vec, 1,
259 AC_FUNC_ATTR_READNONE);
260
261 out->stc[1] = LLVMBuildExtractElement(builder, tmp,
262 LLVMConstInt(ctx->i32, 0, 0), "");
263 out->stc[0] = LLVMBuildExtractElement(builder, tmp,
264 LLVMConstInt(ctx->i32, 1, 0), "");
265 out->ma = LLVMBuildExtractElement(builder, tmp,
266 LLVMConstInt(ctx->i32, 2, 0), "");
267 out->id = LLVMBuildExtractElement(builder, tmp,
268 LLVMConstInt(ctx->i32, 3, 0), "");
269 }
270 }
271
272 /**
273 * Build a manual selection sequence for cube face sc/tc coordinates and
274 * major axis vector (multiplied by 2 for consistency) for the given
275 * vec3 \p coords, for the face implied by \p selcoords.
276 *
277 * For the major axis, we always adjust the sign to be in the direction of
278 * selcoords.ma; i.e., a positive out_ma means that coords is pointed towards
279 * the selcoords major axis.
280 */
281 static void build_cube_select(LLVMBuilderRef builder,
282 const struct cube_selection_coords *selcoords,
283 const LLVMValueRef *coords,
284 LLVMValueRef *out_st,
285 LLVMValueRef *out_ma)
286 {
287 LLVMTypeRef f32 = LLVMTypeOf(coords[0]);
288 LLVMValueRef is_ma_positive;
289 LLVMValueRef sgn_ma;
290 LLVMValueRef is_ma_z, is_not_ma_z;
291 LLVMValueRef is_ma_y;
292 LLVMValueRef is_ma_x;
293 LLVMValueRef sgn;
294 LLVMValueRef tmp;
295
296 is_ma_positive = LLVMBuildFCmp(builder, LLVMRealUGE,
297 selcoords->ma, LLVMConstReal(f32, 0.0), "");
298 sgn_ma = LLVMBuildSelect(builder, is_ma_positive,
299 LLVMConstReal(f32, 1.0), LLVMConstReal(f32, -1.0), "");
300
301 is_ma_z = LLVMBuildFCmp(builder, LLVMRealUGE, selcoords->id, LLVMConstReal(f32, 4.0), "");
302 is_not_ma_z = LLVMBuildNot(builder, is_ma_z, "");
303 is_ma_y = LLVMBuildAnd(builder, is_not_ma_z,
304 LLVMBuildFCmp(builder, LLVMRealUGE, selcoords->id, LLVMConstReal(f32, 2.0), ""), "");
305 is_ma_x = LLVMBuildAnd(builder, is_not_ma_z, LLVMBuildNot(builder, is_ma_y, ""), "");
306
307 /* Select sc */
308 tmp = LLVMBuildSelect(builder, is_ma_z, coords[2], coords[0], "");
309 sgn = LLVMBuildSelect(builder, is_ma_y, LLVMConstReal(f32, 1.0),
310 LLVMBuildSelect(builder, is_ma_x, sgn_ma,
311 LLVMBuildFNeg(builder, sgn_ma, ""), ""), "");
312 out_st[0] = LLVMBuildFMul(builder, tmp, sgn, "");
313
314 /* Select tc */
315 tmp = LLVMBuildSelect(builder, is_ma_y, coords[2], coords[1], "");
316 sgn = LLVMBuildSelect(builder, is_ma_y, LLVMBuildFNeg(builder, sgn_ma, ""),
317 LLVMConstReal(f32, -1.0), "");
318 out_st[1] = LLVMBuildFMul(builder, tmp, sgn, "");
319
320 /* Select ma */
321 tmp = LLVMBuildSelect(builder, is_ma_z, coords[2],
322 LLVMBuildSelect(builder, is_ma_y, coords[1], coords[0], ""), "");
323 sgn = LLVMBuildSelect(builder, is_ma_positive,
324 LLVMConstReal(f32, 2.0), LLVMConstReal(f32, -2.0), "");
325 *out_ma = LLVMBuildFMul(builder, tmp, sgn, "");
326 }
327
328 void
329 ac_prepare_cube_coords(struct ac_llvm_context *ctx,
330 bool is_deriv, bool is_array,
331 LLVMValueRef *coords_arg,
332 LLVMValueRef *derivs_arg)
333 {
334
335 LLVMBuilderRef builder = ctx->builder;
336 struct cube_selection_coords selcoords;
337 LLVMValueRef coords[3];
338 LLVMValueRef invma;
339
340 build_cube_intrinsic(ctx, coords_arg, &selcoords);
341
342 invma = ac_emit_llvm_intrinsic(ctx, "llvm.fabs.f32",
343 ctx->f32, &selcoords.ma, 1, AC_FUNC_ATTR_READNONE);
344 invma = ac_emit_fdiv(ctx, LLVMConstReal(ctx->f32, 1.0), invma);
345
346 for (int i = 0; i < 2; ++i)
347 coords[i] = LLVMBuildFMul(builder, selcoords.stc[i], invma, "");
348
349 coords[2] = selcoords.id;
350
351 if (is_deriv && derivs_arg) {
352 LLVMValueRef derivs[4];
353 int axis;
354
355 /* Convert cube derivatives to 2D derivatives. */
356 for (axis = 0; axis < 2; axis++) {
357 LLVMValueRef deriv_st[2];
358 LLVMValueRef deriv_ma;
359
360 /* Transform the derivative alongside the texture
361 * coordinate. Mathematically, the correct formula is
362 * as follows. Assume we're projecting onto the +Z face
363 * and denote by dx/dh the derivative of the (original)
364 * X texture coordinate with respect to horizontal
365 * window coordinates. The projection onto the +Z face
366 * plane is:
367 *
368 * f(x,z) = x/z
369 *
370 * Then df/dh = df/dx * dx/dh + df/dz * dz/dh
371 * = 1/z * dx/dh - x/z * 1/z * dz/dh.
372 *
373 * This motivatives the implementation below.
374 *
375 * Whether this actually gives the expected results for
376 * apps that might feed in derivatives obtained via
377 * finite differences is anyone's guess. The OpenGL spec
378 * seems awfully quiet about how textureGrad for cube
379 * maps should be handled.
380 */
381 build_cube_select(builder, &selcoords, &derivs_arg[axis * 3],
382 deriv_st, &deriv_ma);
383
384 deriv_ma = LLVMBuildFMul(builder, deriv_ma, invma, "");
385
386 for (int i = 0; i < 2; ++i)
387 derivs[axis * 2 + i] =
388 LLVMBuildFSub(builder,
389 LLVMBuildFMul(builder, deriv_st[i], invma, ""),
390 LLVMBuildFMul(builder, deriv_ma, coords[i], ""), "");
391 }
392
393 memcpy(derivs_arg, derivs, sizeof(derivs));
394 }
395
396 /* Shift the texture coordinate. This must be applied after the
397 * derivative calculation.
398 */
399 for (int i = 0; i < 2; ++i)
400 coords[i] = LLVMBuildFAdd(builder, coords[i], LLVMConstReal(ctx->f32, 1.5), "");
401
402 if (is_array) {
403 /* for cube arrays coord.z = coord.w(array_index) * 8 + face */
404 /* coords_arg.w component - array_index for cube arrays */
405 LLVMValueRef tmp = LLVMBuildFMul(ctx->builder, coords_arg[3], LLVMConstReal(ctx->f32, 8.0), "");
406 coords[2] = LLVMBuildFAdd(ctx->builder, tmp, coords[2], "");
407 }
408
409 memcpy(coords_arg, coords, sizeof(coords));
410 }
411
412
413 LLVMValueRef
414 ac_build_fs_interp(struct ac_llvm_context *ctx,
415 LLVMValueRef llvm_chan,
416 LLVMValueRef attr_number,
417 LLVMValueRef params,
418 LLVMValueRef i,
419 LLVMValueRef j)
420 {
421 LLVMValueRef args[5];
422 LLVMValueRef p1;
423
424 if (HAVE_LLVM < 0x0400) {
425 LLVMValueRef ij[2];
426 ij[0] = LLVMBuildBitCast(ctx->builder, i, ctx->i32, "");
427 ij[1] = LLVMBuildBitCast(ctx->builder, j, ctx->i32, "");
428
429 args[0] = llvm_chan;
430 args[1] = attr_number;
431 args[2] = params;
432 args[3] = ac_build_gather_values(ctx, ij, 2);
433 return ac_emit_llvm_intrinsic(ctx, "llvm.SI.fs.interp",
434 ctx->f32, args, 4,
435 AC_FUNC_ATTR_READNONE);
436 }
437
438 args[0] = i;
439 args[1] = llvm_chan;
440 args[2] = attr_number;
441 args[3] = params;
442
443 p1 = ac_emit_llvm_intrinsic(ctx, "llvm.amdgcn.interp.p1",
444 ctx->f32, args, 4, AC_FUNC_ATTR_READNONE);
445
446 args[0] = p1;
447 args[1] = j;
448 args[2] = llvm_chan;
449 args[3] = attr_number;
450 args[4] = params;
451
452 return ac_emit_llvm_intrinsic(ctx, "llvm.amdgcn.interp.p2",
453 ctx->f32, args, 5, AC_FUNC_ATTR_READNONE);
454 }
455
456 LLVMValueRef
457 ac_build_fs_interp_mov(struct ac_llvm_context *ctx,
458 LLVMValueRef parameter,
459 LLVMValueRef llvm_chan,
460 LLVMValueRef attr_number,
461 LLVMValueRef params)
462 {
463 LLVMValueRef args[4];
464 if (HAVE_LLVM < 0x0400) {
465 args[0] = llvm_chan;
466 args[1] = attr_number;
467 args[2] = params;
468
469 return ac_emit_llvm_intrinsic(ctx,
470 "llvm.SI.fs.constant",
471 ctx->f32, args, 3,
472 AC_FUNC_ATTR_READNONE);
473 }
474
475 args[0] = parameter;
476 args[1] = llvm_chan;
477 args[2] = attr_number;
478 args[3] = params;
479
480 return ac_emit_llvm_intrinsic(ctx, "llvm.amdgcn.interp.mov",
481 ctx->f32, args, 4, AC_FUNC_ATTR_READNONE);
482 }
483
484 LLVMValueRef
485 ac_build_gep0(struct ac_llvm_context *ctx,
486 LLVMValueRef base_ptr,
487 LLVMValueRef index)
488 {
489 LLVMValueRef indices[2] = {
490 LLVMConstInt(ctx->i32, 0, 0),
491 index,
492 };
493 return LLVMBuildGEP(ctx->builder, base_ptr,
494 indices, 2, "");
495 }
496
497 void
498 ac_build_indexed_store(struct ac_llvm_context *ctx,
499 LLVMValueRef base_ptr, LLVMValueRef index,
500 LLVMValueRef value)
501 {
502 LLVMBuildStore(ctx->builder, value,
503 ac_build_gep0(ctx, base_ptr, index));
504 }
505
506 /**
507 * Build an LLVM bytecode indexed load using LLVMBuildGEP + LLVMBuildLoad.
508 * It's equivalent to doing a load from &base_ptr[index].
509 *
510 * \param base_ptr Where the array starts.
511 * \param index The element index into the array.
512 * \param uniform Whether the base_ptr and index can be assumed to be
513 * dynamically uniform
514 */
515 LLVMValueRef
516 ac_build_indexed_load(struct ac_llvm_context *ctx,
517 LLVMValueRef base_ptr, LLVMValueRef index,
518 bool uniform)
519 {
520 LLVMValueRef pointer;
521
522 pointer = ac_build_gep0(ctx, base_ptr, index);
523 if (uniform)
524 LLVMSetMetadata(pointer, ctx->uniform_md_kind, ctx->empty_md);
525 return LLVMBuildLoad(ctx->builder, pointer, "");
526 }
527
528 /**
529 * Do a load from &base_ptr[index], but also add a flag that it's loading
530 * a constant from a dynamically uniform index.
531 */
532 LLVMValueRef
533 ac_build_indexed_load_const(struct ac_llvm_context *ctx,
534 LLVMValueRef base_ptr, LLVMValueRef index)
535 {
536 LLVMValueRef result = ac_build_indexed_load(ctx, base_ptr, index, true);
537 LLVMSetMetadata(result, ctx->invariant_load_md_kind, ctx->empty_md);
538 return result;
539 }
540
541 /* TBUFFER_STORE_FORMAT_{X,XY,XYZ,XYZW} <- the suffix is selected by num_channels=1..4.
542 * The type of vdata must be one of i32 (num_channels=1), v2i32 (num_channels=2),
543 * or v4i32 (num_channels=3,4).
544 */
545 void
546 ac_build_tbuffer_store(struct ac_llvm_context *ctx,
547 LLVMValueRef rsrc,
548 LLVMValueRef vdata,
549 unsigned num_channels,
550 LLVMValueRef vaddr,
551 LLVMValueRef soffset,
552 unsigned inst_offset,
553 unsigned dfmt,
554 unsigned nfmt,
555 unsigned offen,
556 unsigned idxen,
557 unsigned glc,
558 unsigned slc,
559 unsigned tfe)
560 {
561 LLVMValueRef args[] = {
562 rsrc,
563 vdata,
564 LLVMConstInt(ctx->i32, num_channels, 0),
565 vaddr,
566 soffset,
567 LLVMConstInt(ctx->i32, inst_offset, 0),
568 LLVMConstInt(ctx->i32, dfmt, 0),
569 LLVMConstInt(ctx->i32, nfmt, 0),
570 LLVMConstInt(ctx->i32, offen, 0),
571 LLVMConstInt(ctx->i32, idxen, 0),
572 LLVMConstInt(ctx->i32, glc, 0),
573 LLVMConstInt(ctx->i32, slc, 0),
574 LLVMConstInt(ctx->i32, tfe, 0)
575 };
576
577 /* The instruction offset field has 12 bits */
578 assert(offen || inst_offset < (1 << 12));
579
580 /* The intrinsic is overloaded, we need to add a type suffix for overloading to work. */
581 unsigned func = CLAMP(num_channels, 1, 3) - 1;
582 const char *types[] = {"i32", "v2i32", "v4i32"};
583 char name[256];
584 snprintf(name, sizeof(name), "llvm.SI.tbuffer.store.%s", types[func]);
585
586 ac_emit_llvm_intrinsic(ctx, name, ctx->voidt,
587 args, ARRAY_SIZE(args),
588 AC_FUNC_ATTR_LEGACY);
589 }
590
591 void
592 ac_build_tbuffer_store_dwords(struct ac_llvm_context *ctx,
593 LLVMValueRef rsrc,
594 LLVMValueRef vdata,
595 unsigned num_channels,
596 LLVMValueRef vaddr,
597 LLVMValueRef soffset,
598 unsigned inst_offset)
599 {
600 static unsigned dfmt[] = {
601 V_008F0C_BUF_DATA_FORMAT_32,
602 V_008F0C_BUF_DATA_FORMAT_32_32,
603 V_008F0C_BUF_DATA_FORMAT_32_32_32,
604 V_008F0C_BUF_DATA_FORMAT_32_32_32_32
605 };
606 assert(num_channels >= 1 && num_channels <= 4);
607
608 ac_build_tbuffer_store(ctx, rsrc, vdata, num_channels, vaddr, soffset,
609 inst_offset, dfmt[num_channels - 1],
610 V_008F0C_BUF_NUM_FORMAT_UINT, 1, 0, 1, 1, 0);
611 }
612
613 LLVMValueRef
614 ac_build_buffer_load(struct ac_llvm_context *ctx,
615 LLVMValueRef rsrc,
616 int num_channels,
617 LLVMValueRef vindex,
618 LLVMValueRef voffset,
619 LLVMValueRef soffset,
620 unsigned inst_offset,
621 unsigned glc,
622 unsigned slc,
623 bool readonly_memory)
624 {
625 unsigned func = CLAMP(num_channels, 1, 3) - 1;
626
627 if (HAVE_LLVM >= 0x309) {
628 LLVMValueRef args[] = {
629 LLVMBuildBitCast(ctx->builder, rsrc, ctx->v4i32, ""),
630 vindex ? vindex : LLVMConstInt(ctx->i32, 0, 0),
631 LLVMConstInt(ctx->i32, inst_offset, 0),
632 LLVMConstInt(ctx->i1, glc, 0),
633 LLVMConstInt(ctx->i1, slc, 0)
634 };
635
636 LLVMTypeRef types[] = {ctx->f32, LLVMVectorType(ctx->f32, 2),
637 ctx->v4f32};
638 const char *type_names[] = {"f32", "v2f32", "v4f32"};
639 char name[256];
640
641 if (voffset) {
642 args[2] = LLVMBuildAdd(ctx->builder, args[2], voffset,
643 "");
644 }
645
646 if (soffset) {
647 args[2] = LLVMBuildAdd(ctx->builder, args[2], soffset,
648 "");
649 }
650
651 snprintf(name, sizeof(name), "llvm.amdgcn.buffer.load.%s",
652 type_names[func]);
653
654 return ac_emit_llvm_intrinsic(ctx, name, types[func], args,
655 ARRAY_SIZE(args),
656 /* READNONE means writes can't
657 * affect it, while READONLY means
658 * that writes can affect it. */
659 readonly_memory ?
660 AC_FUNC_ATTR_READNONE :
661 AC_FUNC_ATTR_READONLY);
662 } else {
663 LLVMValueRef args[] = {
664 LLVMBuildBitCast(ctx->builder, rsrc, ctx->v16i8, ""),
665 voffset ? voffset : vindex,
666 soffset,
667 LLVMConstInt(ctx->i32, inst_offset, 0),
668 LLVMConstInt(ctx->i32, voffset ? 1 : 0, 0), // offen
669 LLVMConstInt(ctx->i32, vindex ? 1 : 0, 0), //idxen
670 LLVMConstInt(ctx->i32, glc, 0),
671 LLVMConstInt(ctx->i32, slc, 0),
672 LLVMConstInt(ctx->i32, 0, 0), // TFE
673 };
674
675 LLVMTypeRef types[] = {ctx->i32, LLVMVectorType(ctx->i32, 2),
676 ctx->v4i32};
677 const char *type_names[] = {"i32", "v2i32", "v4i32"};
678 const char *arg_type = "i32";
679 char name[256];
680
681 if (voffset && vindex) {
682 LLVMValueRef vaddr[] = {vindex, voffset};
683
684 arg_type = "v2i32";
685 args[1] = ac_build_gather_values(ctx, vaddr, 2);
686 }
687
688 snprintf(name, sizeof(name), "llvm.SI.buffer.load.dword.%s.%s",
689 type_names[func], arg_type);
690
691 return ac_emit_llvm_intrinsic(ctx, name, types[func], args,
692 ARRAY_SIZE(args), AC_FUNC_ATTR_READONLY);
693 }
694 }
695
696 /**
697 * Set range metadata on an instruction. This can only be used on load and
698 * call instructions. If you know an instruction can only produce the values
699 * 0, 1, 2, you would do set_range_metadata(value, 0, 3);
700 * \p lo is the minimum value inclusive.
701 * \p hi is the maximum value exclusive.
702 */
703 static void set_range_metadata(struct ac_llvm_context *ctx,
704 LLVMValueRef value, unsigned lo, unsigned hi)
705 {
706 LLVMValueRef range_md, md_args[2];
707 LLVMTypeRef type = LLVMTypeOf(value);
708 LLVMContextRef context = LLVMGetTypeContext(type);
709
710 md_args[0] = LLVMConstInt(type, lo, false);
711 md_args[1] = LLVMConstInt(type, hi, false);
712 range_md = LLVMMDNodeInContext(context, md_args, 2);
713 LLVMSetMetadata(value, ctx->range_md_kind, range_md);
714 }
715
716 LLVMValueRef
717 ac_get_thread_id(struct ac_llvm_context *ctx)
718 {
719 LLVMValueRef tid;
720
721 if (HAVE_LLVM < 0x0308) {
722 tid = ac_emit_llvm_intrinsic(ctx, "llvm.SI.tid",
723 ctx->i32,
724 NULL, 0, AC_FUNC_ATTR_READNONE);
725 } else {
726 LLVMValueRef tid_args[2];
727 tid_args[0] = LLVMConstInt(ctx->i32, 0xffffffff, false);
728 tid_args[1] = LLVMConstInt(ctx->i32, 0, false);
729 tid_args[1] = ac_emit_llvm_intrinsic(ctx,
730 "llvm.amdgcn.mbcnt.lo", ctx->i32,
731 tid_args, 2, AC_FUNC_ATTR_READNONE);
732
733 tid = ac_emit_llvm_intrinsic(ctx, "llvm.amdgcn.mbcnt.hi",
734 ctx->i32, tid_args,
735 2, AC_FUNC_ATTR_READNONE);
736 }
737 set_range_metadata(ctx, tid, 0, 64);
738 return tid;
739 }
740
741 /*
742 * SI implements derivatives using the local data store (LDS)
743 * All writes to the LDS happen in all executing threads at
744 * the same time. TID is the Thread ID for the current
745 * thread and is a value between 0 and 63, representing
746 * the thread's position in the wavefront.
747 *
748 * For the pixel shader threads are grouped into quads of four pixels.
749 * The TIDs of the pixels of a quad are:
750 *
751 * +------+------+
752 * |4n + 0|4n + 1|
753 * +------+------+
754 * |4n + 2|4n + 3|
755 * +------+------+
756 *
757 * So, masking the TID with 0xfffffffc yields the TID of the top left pixel
758 * of the quad, masking with 0xfffffffd yields the TID of the top pixel of
759 * the current pixel's column, and masking with 0xfffffffe yields the TID
760 * of the left pixel of the current pixel's row.
761 *
762 * Adding 1 yields the TID of the pixel to the right of the left pixel, and
763 * adding 2 yields the TID of the pixel below the top pixel.
764 */
765 LLVMValueRef
766 ac_emit_ddxy(struct ac_llvm_context *ctx,
767 bool has_ds_bpermute,
768 uint32_t mask,
769 int idx,
770 LLVMValueRef lds,
771 LLVMValueRef val)
772 {
773 LLVMValueRef thread_id, tl, trbl, tl_tid, trbl_tid, args[2];
774 LLVMValueRef result;
775
776 thread_id = ac_get_thread_id(ctx);
777
778 tl_tid = LLVMBuildAnd(ctx->builder, thread_id,
779 LLVMConstInt(ctx->i32, mask, false), "");
780
781 trbl_tid = LLVMBuildAdd(ctx->builder, tl_tid,
782 LLVMConstInt(ctx->i32, idx, false), "");
783
784 if (has_ds_bpermute) {
785 args[0] = LLVMBuildMul(ctx->builder, tl_tid,
786 LLVMConstInt(ctx->i32, 4, false), "");
787 args[1] = val;
788 tl = ac_emit_llvm_intrinsic(ctx,
789 "llvm.amdgcn.ds.bpermute", ctx->i32,
790 args, 2, AC_FUNC_ATTR_READNONE);
791
792 args[0] = LLVMBuildMul(ctx->builder, trbl_tid,
793 LLVMConstInt(ctx->i32, 4, false), "");
794 trbl = ac_emit_llvm_intrinsic(ctx,
795 "llvm.amdgcn.ds.bpermute", ctx->i32,
796 args, 2, AC_FUNC_ATTR_READNONE);
797 } else {
798 LLVMValueRef store_ptr, load_ptr0, load_ptr1;
799
800 store_ptr = ac_build_gep0(ctx, lds, thread_id);
801 load_ptr0 = ac_build_gep0(ctx, lds, tl_tid);
802 load_ptr1 = ac_build_gep0(ctx, lds, trbl_tid);
803
804 LLVMBuildStore(ctx->builder, val, store_ptr);
805 tl = LLVMBuildLoad(ctx->builder, load_ptr0, "");
806 trbl = LLVMBuildLoad(ctx->builder, load_ptr1, "");
807 }
808
809 tl = LLVMBuildBitCast(ctx->builder, tl, ctx->f32, "");
810 trbl = LLVMBuildBitCast(ctx->builder, trbl, ctx->f32, "");
811 result = LLVMBuildFSub(ctx->builder, trbl, tl, "");
812 return result;
813 }
814
815 void
816 ac_emit_sendmsg(struct ac_llvm_context *ctx,
817 uint32_t msg,
818 LLVMValueRef wave_id)
819 {
820 LLVMValueRef args[2];
821 const char *intr_name = (HAVE_LLVM < 0x0400) ? "llvm.SI.sendmsg" : "llvm.amdgcn.s.sendmsg";
822 args[0] = LLVMConstInt(ctx->i32, msg, false);
823 args[1] = wave_id;
824 ac_emit_llvm_intrinsic(ctx, intr_name, ctx->voidt,
825 args, 2, 0);
826 }
827
828 LLVMValueRef
829 ac_emit_imsb(struct ac_llvm_context *ctx,
830 LLVMValueRef arg,
831 LLVMTypeRef dst_type)
832 {
833 const char *intr_name = (HAVE_LLVM < 0x0400) ? "llvm.AMDGPU.flbit.i32" :
834 "llvm.amdgcn.sffbh.i32";
835 LLVMValueRef msb = ac_emit_llvm_intrinsic(ctx, intr_name,
836 dst_type, &arg, 1,
837 AC_FUNC_ATTR_READNONE);
838
839 /* The HW returns the last bit index from MSB, but NIR/TGSI wants
840 * the index from LSB. Invert it by doing "31 - msb". */
841 msb = LLVMBuildSub(ctx->builder, LLVMConstInt(ctx->i32, 31, false),
842 msb, "");
843
844 LLVMValueRef all_ones = LLVMConstInt(ctx->i32, -1, true);
845 LLVMValueRef cond = LLVMBuildOr(ctx->builder,
846 LLVMBuildICmp(ctx->builder, LLVMIntEQ,
847 arg, LLVMConstInt(ctx->i32, 0, 0), ""),
848 LLVMBuildICmp(ctx->builder, LLVMIntEQ,
849 arg, all_ones, ""), "");
850
851 return LLVMBuildSelect(ctx->builder, cond, all_ones, msb, "");
852 }
853
854 LLVMValueRef
855 ac_emit_umsb(struct ac_llvm_context *ctx,
856 LLVMValueRef arg,
857 LLVMTypeRef dst_type)
858 {
859 LLVMValueRef args[2] = {
860 arg,
861 LLVMConstInt(ctx->i1, 1, 0),
862 };
863 LLVMValueRef msb = ac_emit_llvm_intrinsic(ctx, "llvm.ctlz.i32",
864 dst_type, args, ARRAY_SIZE(args),
865 AC_FUNC_ATTR_READNONE);
866
867 /* The HW returns the last bit index from MSB, but TGSI/NIR wants
868 * the index from LSB. Invert it by doing "31 - msb". */
869 msb = LLVMBuildSub(ctx->builder, LLVMConstInt(ctx->i32, 31, false),
870 msb, "");
871
872 /* check for zero */
873 return LLVMBuildSelect(ctx->builder,
874 LLVMBuildICmp(ctx->builder, LLVMIntEQ, arg,
875 LLVMConstInt(ctx->i32, 0, 0), ""),
876 LLVMConstInt(ctx->i32, -1, true), msb, "");
877 }
878
879 LLVMValueRef ac_emit_clamp(struct ac_llvm_context *ctx, LLVMValueRef value)
880 {
881 if (HAVE_LLVM >= 0x0500) {
882 LLVMValueRef max[2] = {
883 value,
884 LLVMConstReal(ctx->f32, 0),
885 };
886 LLVMValueRef min[2] = {
887 LLVMConstReal(ctx->f32, 1),
888 };
889
890 min[1] = ac_emit_llvm_intrinsic(ctx, "llvm.maxnum.f32",
891 ctx->f32, max, 2,
892 AC_FUNC_ATTR_READNONE);
893 return ac_emit_llvm_intrinsic(ctx, "llvm.minnum.f32",
894 ctx->f32, min, 2,
895 AC_FUNC_ATTR_READNONE);
896 }
897
898 const char *intr = HAVE_LLVM >= 0x0308 ? "llvm.AMDGPU.clamp." :
899 "llvm.AMDIL.clamp.";
900 LLVMValueRef args[3] = {
901 value,
902 LLVMConstReal(ctx->f32, 0),
903 LLVMConstReal(ctx->f32, 1),
904 };
905
906 return ac_emit_llvm_intrinsic(ctx, intr, ctx->f32, args, 3,
907 AC_FUNC_ATTR_READNONE |
908 AC_FUNC_ATTR_LEGACY);
909 }
910
911 void ac_emit_export(struct ac_llvm_context *ctx, struct ac_export_args *a)
912 {
913 LLVMValueRef args[9];
914
915 if (HAVE_LLVM >= 0x0500) {
916 args[0] = LLVMConstInt(ctx->i32, a->target, 0);
917 args[1] = LLVMConstInt(ctx->i32, a->enabled_channels, 0);
918
919 if (a->compr) {
920 LLVMTypeRef i16 = LLVMInt16TypeInContext(ctx->context);
921 LLVMTypeRef v2i16 = LLVMVectorType(i16, 2);
922
923 args[2] = LLVMBuildBitCast(ctx->builder, a->out[0],
924 v2i16, "");
925 args[3] = LLVMBuildBitCast(ctx->builder, a->out[1],
926 v2i16, "");
927 args[4] = LLVMConstInt(ctx->i1, a->done, 0);
928 args[5] = LLVMConstInt(ctx->i1, a->valid_mask, 0);
929
930 ac_emit_llvm_intrinsic(ctx, "llvm.amdgcn.exp.compr.v2i16",
931 ctx->voidt, args, 6, 0);
932 } else {
933 args[2] = a->out[0];
934 args[3] = a->out[1];
935 args[4] = a->out[2];
936 args[5] = a->out[3];
937 args[6] = LLVMConstInt(ctx->i1, a->done, 0);
938 args[7] = LLVMConstInt(ctx->i1, a->valid_mask, 0);
939
940 ac_emit_llvm_intrinsic(ctx, "llvm.amdgcn.exp.f32",
941 ctx->voidt, args, 8, 0);
942 }
943 return;
944 }
945
946 args[0] = LLVMConstInt(ctx->i32, a->enabled_channels, 0);
947 args[1] = LLVMConstInt(ctx->i32, a->valid_mask, 0);
948 args[2] = LLVMConstInt(ctx->i32, a->done, 0);
949 args[3] = LLVMConstInt(ctx->i32, a->target, 0);
950 args[4] = LLVMConstInt(ctx->i32, a->compr, 0);
951 memcpy(args + 5, a->out, sizeof(a->out[0]) * 4);
952
953 ac_emit_llvm_intrinsic(ctx, "llvm.SI.export", ctx->voidt, args, 9,
954 AC_FUNC_ATTR_LEGACY);
955 }
956
957 LLVMValueRef ac_emit_image_opcode(struct ac_llvm_context *ctx,
958 struct ac_image_args *a)
959 {
960 LLVMTypeRef dst_type;
961 LLVMValueRef args[11];
962 unsigned num_args = 0;
963 const char *name;
964 char intr_name[128], type[64];
965
966 if (HAVE_LLVM >= 0x0400) {
967 bool sample = a->opcode == ac_image_sample ||
968 a->opcode == ac_image_gather4 ||
969 a->opcode == ac_image_get_lod;
970
971 if (sample)
972 args[num_args++] = bitcast_to_float(ctx, a->addr);
973 else
974 args[num_args++] = a->addr;
975
976 args[num_args++] = a->resource;
977 if (sample)
978 args[num_args++] = a->sampler;
979 args[num_args++] = LLVMConstInt(ctx->i32, a->dmask, 0);
980 if (sample)
981 args[num_args++] = LLVMConstInt(ctx->i1, a->unorm, 0);
982 args[num_args++] = LLVMConstInt(ctx->i1, 0, 0); /* glc */
983 args[num_args++] = LLVMConstInt(ctx->i1, 0, 0); /* slc */
984 args[num_args++] = LLVMConstInt(ctx->i1, 0, 0); /* lwe */
985 args[num_args++] = LLVMConstInt(ctx->i1, a->da, 0);
986
987 switch (a->opcode) {
988 case ac_image_sample:
989 name = "llvm.amdgcn.image.sample";
990 break;
991 case ac_image_gather4:
992 name = "llvm.amdgcn.image.gather4";
993 break;
994 case ac_image_load:
995 name = "llvm.amdgcn.image.load";
996 break;
997 case ac_image_load_mip:
998 name = "llvm.amdgcn.image.load.mip";
999 break;
1000 case ac_image_get_lod:
1001 name = "llvm.amdgcn.image.getlod";
1002 break;
1003 case ac_image_get_resinfo:
1004 name = "llvm.amdgcn.image.getresinfo";
1005 break;
1006 }
1007
1008 ac_build_type_name_for_intr(LLVMTypeOf(args[0]), type,
1009 sizeof(type));
1010
1011 snprintf(intr_name, sizeof(intr_name), "%s%s%s%s.v4f32.%s.v8i32",
1012 name,
1013 a->compare ? ".c" : "",
1014 a->bias ? ".b" :
1015 a->lod ? ".l" :
1016 a->deriv ? ".d" :
1017 a->level_zero ? ".lz" : "",
1018 a->offset ? ".o" : "",
1019 type);
1020
1021 LLVMValueRef result =
1022 ac_emit_llvm_intrinsic(ctx, intr_name,
1023 ctx->v4f32, args, num_args,
1024 AC_FUNC_ATTR_READNONE);
1025 if (!sample) {
1026 result = LLVMBuildBitCast(ctx->builder, result,
1027 ctx->v4i32, "");
1028 }
1029 return result;
1030 }
1031
1032 args[num_args++] = a->addr;
1033 args[num_args++] = a->resource;
1034
1035 if (a->opcode == ac_image_load ||
1036 a->opcode == ac_image_load_mip ||
1037 a->opcode == ac_image_get_resinfo) {
1038 dst_type = ctx->v4i32;
1039 } else {
1040 dst_type = ctx->v4f32;
1041 args[num_args++] = a->sampler;
1042 }
1043
1044 args[num_args++] = LLVMConstInt(ctx->i32, a->dmask, 0);
1045 args[num_args++] = LLVMConstInt(ctx->i32, a->unorm, 0);
1046 args[num_args++] = LLVMConstInt(ctx->i32, 0, 0); /* r128 */
1047 args[num_args++] = LLVMConstInt(ctx->i32, a->da, 0);
1048 args[num_args++] = LLVMConstInt(ctx->i32, 0, 0); /* glc */
1049 args[num_args++] = LLVMConstInt(ctx->i32, 0, 0); /* slc */
1050 args[num_args++] = LLVMConstInt(ctx->i32, 0, 0); /* tfe */
1051 args[num_args++] = LLVMConstInt(ctx->i32, 0, 0); /* lwe */
1052
1053 switch (a->opcode) {
1054 case ac_image_sample:
1055 name = "llvm.SI.image.sample";
1056 break;
1057 case ac_image_gather4:
1058 name = "llvm.SI.gather4";
1059 break;
1060 case ac_image_load:
1061 name = "llvm.SI.image.load";
1062 break;
1063 case ac_image_load_mip:
1064 name = "llvm.SI.image.load.mip";
1065 break;
1066 case ac_image_get_lod:
1067 name = "llvm.SI.getlod";
1068 break;
1069 case ac_image_get_resinfo:
1070 name = "llvm.SI.getresinfo";
1071 break;
1072 }
1073
1074 ac_build_type_name_for_intr(LLVMTypeOf(a->addr), type, sizeof(type));
1075 snprintf(intr_name, sizeof(intr_name), "%s%s%s%s.%s",
1076 name,
1077 a->compare ? ".c" : "",
1078 a->bias ? ".b" :
1079 a->lod ? ".l" :
1080 a->deriv ? ".d" :
1081 a->level_zero ? ".lz" : "",
1082 a->offset ? ".o" : "",
1083 type);
1084
1085 return ac_emit_llvm_intrinsic(ctx, intr_name,
1086 dst_type, args, num_args,
1087 AC_FUNC_ATTR_READNONE |
1088 AC_FUNC_ATTR_LEGACY);
1089 }
1090
1091 LLVMValueRef ac_emit_cvt_pkrtz_f16(struct ac_llvm_context *ctx,
1092 LLVMValueRef args[2])
1093 {
1094 if (HAVE_LLVM >= 0x0500) {
1095 LLVMTypeRef v2f16 =
1096 LLVMVectorType(LLVMHalfTypeInContext(ctx->context), 2);
1097 LLVMValueRef res =
1098 ac_emit_llvm_intrinsic(ctx, "llvm.amdgcn.cvt.pkrtz",
1099 v2f16, args, 2,
1100 AC_FUNC_ATTR_READNONE);
1101 return LLVMBuildBitCast(ctx->builder, res, ctx->i32, "");
1102 }
1103
1104 return ac_emit_llvm_intrinsic(ctx, "llvm.SI.packf16", ctx->i32, args, 2,
1105 AC_FUNC_ATTR_READNONE |
1106 AC_FUNC_ATTR_LEGACY);
1107 }