ac: normalize build helper names
[mesa.git] / src / amd / common / ac_llvm_build.c
1 /*
2 * Copyright 2014 Advanced Micro Devices, Inc.
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the
6 * "Software"), to deal in the Software without restriction, including
7 * without limitation the rights to use, copy, modify, merge, publish,
8 * distribute, sub license, and/or sell copies of the Software, and to
9 * permit persons to whom the Software is furnished to do so, subject to
10 * the following conditions:
11 *
12 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
13 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
14 * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
15 * THE COPYRIGHT HOLDERS, AUTHORS AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM,
16 * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
17 * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
18 * USE OR OTHER DEALINGS IN THE SOFTWARE.
19 *
20 * The above copyright notice and this permission notice (including the
21 * next paragraph) shall be included in all copies or substantial portions
22 * of the Software.
23 *
24 */
25 /* based on pieces from si_pipe.c and radeon_llvm_emit.c */
26 #include "ac_llvm_build.h"
27
28 #include <llvm-c/Core.h>
29
30 #include "c11/threads.h"
31
32 #include <assert.h>
33 #include <stdio.h>
34
35 #include "ac_llvm_util.h"
36
37 #include "util/bitscan.h"
38 #include "util/macros.h"
39 #include "sid.h"
40
41 /* Initialize module-independent parts of the context.
42 *
43 * The caller is responsible for initializing ctx::module and ctx::builder.
44 */
45 void
46 ac_llvm_context_init(struct ac_llvm_context *ctx, LLVMContextRef context)
47 {
48 LLVMValueRef args[1];
49
50 ctx->context = context;
51 ctx->module = NULL;
52 ctx->builder = NULL;
53
54 ctx->voidt = LLVMVoidTypeInContext(ctx->context);
55 ctx->i1 = LLVMInt1TypeInContext(ctx->context);
56 ctx->i8 = LLVMInt8TypeInContext(ctx->context);
57 ctx->i32 = LLVMIntTypeInContext(ctx->context, 32);
58 ctx->f32 = LLVMFloatTypeInContext(ctx->context);
59 ctx->v4i32 = LLVMVectorType(ctx->i32, 4);
60 ctx->v4f32 = LLVMVectorType(ctx->f32, 4);
61 ctx->v16i8 = LLVMVectorType(ctx->i8, 16);
62
63 ctx->range_md_kind = LLVMGetMDKindIDInContext(ctx->context,
64 "range", 5);
65
66 ctx->invariant_load_md_kind = LLVMGetMDKindIDInContext(ctx->context,
67 "invariant.load", 14);
68
69 ctx->fpmath_md_kind = LLVMGetMDKindIDInContext(ctx->context, "fpmath", 6);
70
71 args[0] = LLVMConstReal(ctx->f32, 2.5);
72 ctx->fpmath_md_2p5_ulp = LLVMMDNodeInContext(ctx->context, args, 1);
73
74 ctx->uniform_md_kind = LLVMGetMDKindIDInContext(ctx->context,
75 "amdgpu.uniform", 14);
76
77 ctx->empty_md = LLVMMDNodeInContext(ctx->context, NULL, 0);
78 }
79
80 LLVMValueRef
81 ac_build_intrinsic(struct ac_llvm_context *ctx, const char *name,
82 LLVMTypeRef return_type, LLVMValueRef *params,
83 unsigned param_count, unsigned attrib_mask)
84 {
85 LLVMValueRef function, call;
86 bool set_callsite_attrs = HAVE_LLVM >= 0x0400 &&
87 !(attrib_mask & AC_FUNC_ATTR_LEGACY);
88
89 function = LLVMGetNamedFunction(ctx->module, name);
90 if (!function) {
91 LLVMTypeRef param_types[32], function_type;
92 unsigned i;
93
94 assert(param_count <= 32);
95
96 for (i = 0; i < param_count; ++i) {
97 assert(params[i]);
98 param_types[i] = LLVMTypeOf(params[i]);
99 }
100 function_type =
101 LLVMFunctionType(return_type, param_types, param_count, 0);
102 function = LLVMAddFunction(ctx->module, name, function_type);
103
104 LLVMSetFunctionCallConv(function, LLVMCCallConv);
105 LLVMSetLinkage(function, LLVMExternalLinkage);
106
107 if (!set_callsite_attrs)
108 ac_add_func_attributes(ctx->context, function, attrib_mask);
109 }
110
111 call = LLVMBuildCall(ctx->builder, function, params, param_count, "");
112 if (set_callsite_attrs)
113 ac_add_func_attributes(ctx->context, call, attrib_mask);
114 return call;
115 }
116
117 static LLVMValueRef bitcast_to_float(struct ac_llvm_context *ctx,
118 LLVMValueRef value)
119 {
120 LLVMTypeRef type = LLVMTypeOf(value);
121 LLVMTypeRef new_type;
122
123 if (LLVMGetTypeKind(type) == LLVMVectorTypeKind)
124 new_type = LLVMVectorType(ctx->f32, LLVMGetVectorSize(type));
125 else
126 new_type = ctx->f32;
127
128 return LLVMBuildBitCast(ctx->builder, value, new_type, "");
129 }
130
131 /**
132 * Given the i32 or vNi32 \p type, generate the textual name (e.g. for use with
133 * intrinsic names).
134 */
135 void ac_build_type_name_for_intr(LLVMTypeRef type, char *buf, unsigned bufsize)
136 {
137 LLVMTypeRef elem_type = type;
138
139 assert(bufsize >= 8);
140
141 if (LLVMGetTypeKind(type) == LLVMVectorTypeKind) {
142 int ret = snprintf(buf, bufsize, "v%u",
143 LLVMGetVectorSize(type));
144 if (ret < 0) {
145 char *type_name = LLVMPrintTypeToString(type);
146 fprintf(stderr, "Error building type name for: %s\n",
147 type_name);
148 return;
149 }
150 elem_type = LLVMGetElementType(type);
151 buf += ret;
152 bufsize -= ret;
153 }
154 switch (LLVMGetTypeKind(elem_type)) {
155 default: break;
156 case LLVMIntegerTypeKind:
157 snprintf(buf, bufsize, "i%d", LLVMGetIntTypeWidth(elem_type));
158 break;
159 case LLVMFloatTypeKind:
160 snprintf(buf, bufsize, "f32");
161 break;
162 case LLVMDoubleTypeKind:
163 snprintf(buf, bufsize, "f64");
164 break;
165 }
166 }
167
168 LLVMValueRef
169 ac_build_gather_values_extended(struct ac_llvm_context *ctx,
170 LLVMValueRef *values,
171 unsigned value_count,
172 unsigned value_stride,
173 bool load)
174 {
175 LLVMBuilderRef builder = ctx->builder;
176 LLVMValueRef vec = NULL;
177 unsigned i;
178
179 if (value_count == 1) {
180 if (load)
181 return LLVMBuildLoad(builder, values[0], "");
182 return values[0];
183 } else if (!value_count)
184 unreachable("value_count is 0");
185
186 for (i = 0; i < value_count; i++) {
187 LLVMValueRef value = values[i * value_stride];
188 if (load)
189 value = LLVMBuildLoad(builder, value, "");
190
191 if (!i)
192 vec = LLVMGetUndef( LLVMVectorType(LLVMTypeOf(value), value_count));
193 LLVMValueRef index = LLVMConstInt(ctx->i32, i, false);
194 vec = LLVMBuildInsertElement(builder, vec, value, index, "");
195 }
196 return vec;
197 }
198
199 LLVMValueRef
200 ac_build_gather_values(struct ac_llvm_context *ctx,
201 LLVMValueRef *values,
202 unsigned value_count)
203 {
204 return ac_build_gather_values_extended(ctx, values, value_count, 1, false);
205 }
206
207 LLVMValueRef
208 ac_build_fdiv(struct ac_llvm_context *ctx,
209 LLVMValueRef num,
210 LLVMValueRef den)
211 {
212 LLVMValueRef ret = LLVMBuildFDiv(ctx->builder, num, den, "");
213
214 if (!LLVMIsConstant(ret))
215 LLVMSetMetadata(ret, ctx->fpmath_md_kind, ctx->fpmath_md_2p5_ulp);
216 return ret;
217 }
218
219 /* Coordinates for cube map selection. sc, tc, and ma are as in Table 8.27
220 * of the OpenGL 4.5 (Compatibility Profile) specification, except ma is
221 * already multiplied by two. id is the cube face number.
222 */
223 struct cube_selection_coords {
224 LLVMValueRef stc[2];
225 LLVMValueRef ma;
226 LLVMValueRef id;
227 };
228
229 static void
230 build_cube_intrinsic(struct ac_llvm_context *ctx,
231 LLVMValueRef in[3],
232 struct cube_selection_coords *out)
233 {
234 LLVMBuilderRef builder = ctx->builder;
235
236 if (HAVE_LLVM >= 0x0309) {
237 LLVMTypeRef f32 = ctx->f32;
238
239 out->stc[1] = ac_build_intrinsic(ctx, "llvm.amdgcn.cubetc",
240 f32, in, 3, AC_FUNC_ATTR_READNONE);
241 out->stc[0] = ac_build_intrinsic(ctx, "llvm.amdgcn.cubesc",
242 f32, in, 3, AC_FUNC_ATTR_READNONE);
243 out->ma = ac_build_intrinsic(ctx, "llvm.amdgcn.cubema",
244 f32, in, 3, AC_FUNC_ATTR_READNONE);
245 out->id = ac_build_intrinsic(ctx, "llvm.amdgcn.cubeid",
246 f32, in, 3, AC_FUNC_ATTR_READNONE);
247 } else {
248 LLVMValueRef c[4] = {
249 in[0],
250 in[1],
251 in[2],
252 LLVMGetUndef(LLVMTypeOf(in[0]))
253 };
254 LLVMValueRef vec = ac_build_gather_values(ctx, c, 4);
255
256 LLVMValueRef tmp =
257 ac_build_intrinsic(ctx, "llvm.AMDGPU.cube",
258 LLVMTypeOf(vec), &vec, 1,
259 AC_FUNC_ATTR_READNONE);
260
261 out->stc[1] = LLVMBuildExtractElement(builder, tmp,
262 LLVMConstInt(ctx->i32, 0, 0), "");
263 out->stc[0] = LLVMBuildExtractElement(builder, tmp,
264 LLVMConstInt(ctx->i32, 1, 0), "");
265 out->ma = LLVMBuildExtractElement(builder, tmp,
266 LLVMConstInt(ctx->i32, 2, 0), "");
267 out->id = LLVMBuildExtractElement(builder, tmp,
268 LLVMConstInt(ctx->i32, 3, 0), "");
269 }
270 }
271
272 /**
273 * Build a manual selection sequence for cube face sc/tc coordinates and
274 * major axis vector (multiplied by 2 for consistency) for the given
275 * vec3 \p coords, for the face implied by \p selcoords.
276 *
277 * For the major axis, we always adjust the sign to be in the direction of
278 * selcoords.ma; i.e., a positive out_ma means that coords is pointed towards
279 * the selcoords major axis.
280 */
281 static void build_cube_select(LLVMBuilderRef builder,
282 const struct cube_selection_coords *selcoords,
283 const LLVMValueRef *coords,
284 LLVMValueRef *out_st,
285 LLVMValueRef *out_ma)
286 {
287 LLVMTypeRef f32 = LLVMTypeOf(coords[0]);
288 LLVMValueRef is_ma_positive;
289 LLVMValueRef sgn_ma;
290 LLVMValueRef is_ma_z, is_not_ma_z;
291 LLVMValueRef is_ma_y;
292 LLVMValueRef is_ma_x;
293 LLVMValueRef sgn;
294 LLVMValueRef tmp;
295
296 is_ma_positive = LLVMBuildFCmp(builder, LLVMRealUGE,
297 selcoords->ma, LLVMConstReal(f32, 0.0), "");
298 sgn_ma = LLVMBuildSelect(builder, is_ma_positive,
299 LLVMConstReal(f32, 1.0), LLVMConstReal(f32, -1.0), "");
300
301 is_ma_z = LLVMBuildFCmp(builder, LLVMRealUGE, selcoords->id, LLVMConstReal(f32, 4.0), "");
302 is_not_ma_z = LLVMBuildNot(builder, is_ma_z, "");
303 is_ma_y = LLVMBuildAnd(builder, is_not_ma_z,
304 LLVMBuildFCmp(builder, LLVMRealUGE, selcoords->id, LLVMConstReal(f32, 2.0), ""), "");
305 is_ma_x = LLVMBuildAnd(builder, is_not_ma_z, LLVMBuildNot(builder, is_ma_y, ""), "");
306
307 /* Select sc */
308 tmp = LLVMBuildSelect(builder, is_ma_z, coords[2], coords[0], "");
309 sgn = LLVMBuildSelect(builder, is_ma_y, LLVMConstReal(f32, 1.0),
310 LLVMBuildSelect(builder, is_ma_x, sgn_ma,
311 LLVMBuildFNeg(builder, sgn_ma, ""), ""), "");
312 out_st[0] = LLVMBuildFMul(builder, tmp, sgn, "");
313
314 /* Select tc */
315 tmp = LLVMBuildSelect(builder, is_ma_y, coords[2], coords[1], "");
316 sgn = LLVMBuildSelect(builder, is_ma_y, LLVMBuildFNeg(builder, sgn_ma, ""),
317 LLVMConstReal(f32, -1.0), "");
318 out_st[1] = LLVMBuildFMul(builder, tmp, sgn, "");
319
320 /* Select ma */
321 tmp = LLVMBuildSelect(builder, is_ma_z, coords[2],
322 LLVMBuildSelect(builder, is_ma_y, coords[1], coords[0], ""), "");
323 sgn = LLVMBuildSelect(builder, is_ma_positive,
324 LLVMConstReal(f32, 2.0), LLVMConstReal(f32, -2.0), "");
325 *out_ma = LLVMBuildFMul(builder, tmp, sgn, "");
326 }
327
328 void
329 ac_prepare_cube_coords(struct ac_llvm_context *ctx,
330 bool is_deriv, bool is_array,
331 LLVMValueRef *coords_arg,
332 LLVMValueRef *derivs_arg)
333 {
334
335 LLVMBuilderRef builder = ctx->builder;
336 struct cube_selection_coords selcoords;
337 LLVMValueRef coords[3];
338 LLVMValueRef invma;
339
340 build_cube_intrinsic(ctx, coords_arg, &selcoords);
341
342 invma = ac_build_intrinsic(ctx, "llvm.fabs.f32",
343 ctx->f32, &selcoords.ma, 1, AC_FUNC_ATTR_READNONE);
344 invma = ac_build_fdiv(ctx, LLVMConstReal(ctx->f32, 1.0), invma);
345
346 for (int i = 0; i < 2; ++i)
347 coords[i] = LLVMBuildFMul(builder, selcoords.stc[i], invma, "");
348
349 coords[2] = selcoords.id;
350
351 if (is_deriv && derivs_arg) {
352 LLVMValueRef derivs[4];
353 int axis;
354
355 /* Convert cube derivatives to 2D derivatives. */
356 for (axis = 0; axis < 2; axis++) {
357 LLVMValueRef deriv_st[2];
358 LLVMValueRef deriv_ma;
359
360 /* Transform the derivative alongside the texture
361 * coordinate. Mathematically, the correct formula is
362 * as follows. Assume we're projecting onto the +Z face
363 * and denote by dx/dh the derivative of the (original)
364 * X texture coordinate with respect to horizontal
365 * window coordinates. The projection onto the +Z face
366 * plane is:
367 *
368 * f(x,z) = x/z
369 *
370 * Then df/dh = df/dx * dx/dh + df/dz * dz/dh
371 * = 1/z * dx/dh - x/z * 1/z * dz/dh.
372 *
373 * This motivatives the implementation below.
374 *
375 * Whether this actually gives the expected results for
376 * apps that might feed in derivatives obtained via
377 * finite differences is anyone's guess. The OpenGL spec
378 * seems awfully quiet about how textureGrad for cube
379 * maps should be handled.
380 */
381 build_cube_select(builder, &selcoords, &derivs_arg[axis * 3],
382 deriv_st, &deriv_ma);
383
384 deriv_ma = LLVMBuildFMul(builder, deriv_ma, invma, "");
385
386 for (int i = 0; i < 2; ++i)
387 derivs[axis * 2 + i] =
388 LLVMBuildFSub(builder,
389 LLVMBuildFMul(builder, deriv_st[i], invma, ""),
390 LLVMBuildFMul(builder, deriv_ma, coords[i], ""), "");
391 }
392
393 memcpy(derivs_arg, derivs, sizeof(derivs));
394 }
395
396 /* Shift the texture coordinate. This must be applied after the
397 * derivative calculation.
398 */
399 for (int i = 0; i < 2; ++i)
400 coords[i] = LLVMBuildFAdd(builder, coords[i], LLVMConstReal(ctx->f32, 1.5), "");
401
402 if (is_array) {
403 /* for cube arrays coord.z = coord.w(array_index) * 8 + face */
404 /* coords_arg.w component - array_index for cube arrays */
405 LLVMValueRef tmp = LLVMBuildFMul(ctx->builder, coords_arg[3], LLVMConstReal(ctx->f32, 8.0), "");
406 coords[2] = LLVMBuildFAdd(ctx->builder, tmp, coords[2], "");
407 }
408
409 memcpy(coords_arg, coords, sizeof(coords));
410 }
411
412
413 LLVMValueRef
414 ac_build_fs_interp(struct ac_llvm_context *ctx,
415 LLVMValueRef llvm_chan,
416 LLVMValueRef attr_number,
417 LLVMValueRef params,
418 LLVMValueRef i,
419 LLVMValueRef j)
420 {
421 LLVMValueRef args[5];
422 LLVMValueRef p1;
423
424 if (HAVE_LLVM < 0x0400) {
425 LLVMValueRef ij[2];
426 ij[0] = LLVMBuildBitCast(ctx->builder, i, ctx->i32, "");
427 ij[1] = LLVMBuildBitCast(ctx->builder, j, ctx->i32, "");
428
429 args[0] = llvm_chan;
430 args[1] = attr_number;
431 args[2] = params;
432 args[3] = ac_build_gather_values(ctx, ij, 2);
433 return ac_build_intrinsic(ctx, "llvm.SI.fs.interp",
434 ctx->f32, args, 4,
435 AC_FUNC_ATTR_READNONE);
436 }
437
438 args[0] = i;
439 args[1] = llvm_chan;
440 args[2] = attr_number;
441 args[3] = params;
442
443 p1 = ac_build_intrinsic(ctx, "llvm.amdgcn.interp.p1",
444 ctx->f32, args, 4, AC_FUNC_ATTR_READNONE);
445
446 args[0] = p1;
447 args[1] = j;
448 args[2] = llvm_chan;
449 args[3] = attr_number;
450 args[4] = params;
451
452 return ac_build_intrinsic(ctx, "llvm.amdgcn.interp.p2",
453 ctx->f32, args, 5, AC_FUNC_ATTR_READNONE);
454 }
455
456 LLVMValueRef
457 ac_build_fs_interp_mov(struct ac_llvm_context *ctx,
458 LLVMValueRef parameter,
459 LLVMValueRef llvm_chan,
460 LLVMValueRef attr_number,
461 LLVMValueRef params)
462 {
463 LLVMValueRef args[4];
464 if (HAVE_LLVM < 0x0400) {
465 args[0] = llvm_chan;
466 args[1] = attr_number;
467 args[2] = params;
468
469 return ac_build_intrinsic(ctx,
470 "llvm.SI.fs.constant",
471 ctx->f32, args, 3,
472 AC_FUNC_ATTR_READNONE);
473 }
474
475 args[0] = parameter;
476 args[1] = llvm_chan;
477 args[2] = attr_number;
478 args[3] = params;
479
480 return ac_build_intrinsic(ctx, "llvm.amdgcn.interp.mov",
481 ctx->f32, args, 4, AC_FUNC_ATTR_READNONE);
482 }
483
484 LLVMValueRef
485 ac_build_gep0(struct ac_llvm_context *ctx,
486 LLVMValueRef base_ptr,
487 LLVMValueRef index)
488 {
489 LLVMValueRef indices[2] = {
490 LLVMConstInt(ctx->i32, 0, 0),
491 index,
492 };
493 return LLVMBuildGEP(ctx->builder, base_ptr,
494 indices, 2, "");
495 }
496
497 void
498 ac_build_indexed_store(struct ac_llvm_context *ctx,
499 LLVMValueRef base_ptr, LLVMValueRef index,
500 LLVMValueRef value)
501 {
502 LLVMBuildStore(ctx->builder, value,
503 ac_build_gep0(ctx, base_ptr, index));
504 }
505
506 /**
507 * Build an LLVM bytecode indexed load using LLVMBuildGEP + LLVMBuildLoad.
508 * It's equivalent to doing a load from &base_ptr[index].
509 *
510 * \param base_ptr Where the array starts.
511 * \param index The element index into the array.
512 * \param uniform Whether the base_ptr and index can be assumed to be
513 * dynamically uniform
514 */
515 LLVMValueRef
516 ac_build_indexed_load(struct ac_llvm_context *ctx,
517 LLVMValueRef base_ptr, LLVMValueRef index,
518 bool uniform)
519 {
520 LLVMValueRef pointer;
521
522 pointer = ac_build_gep0(ctx, base_ptr, index);
523 if (uniform)
524 LLVMSetMetadata(pointer, ctx->uniform_md_kind, ctx->empty_md);
525 return LLVMBuildLoad(ctx->builder, pointer, "");
526 }
527
528 /**
529 * Do a load from &base_ptr[index], but also add a flag that it's loading
530 * a constant from a dynamically uniform index.
531 */
532 LLVMValueRef
533 ac_build_indexed_load_const(struct ac_llvm_context *ctx,
534 LLVMValueRef base_ptr, LLVMValueRef index)
535 {
536 LLVMValueRef result = ac_build_indexed_load(ctx, base_ptr, index, true);
537 LLVMSetMetadata(result, ctx->invariant_load_md_kind, ctx->empty_md);
538 return result;
539 }
540
541 /* TBUFFER_STORE_FORMAT_{X,XY,XYZ,XYZW} <- the suffix is selected by num_channels=1..4.
542 * The type of vdata must be one of i32 (num_channels=1), v2i32 (num_channels=2),
543 * or v4i32 (num_channels=3,4).
544 */
545 void
546 ac_build_buffer_store_dword(struct ac_llvm_context *ctx,
547 LLVMValueRef rsrc,
548 LLVMValueRef vdata,
549 unsigned num_channels,
550 LLVMValueRef voffset,
551 LLVMValueRef soffset,
552 unsigned inst_offset,
553 bool glc,
554 bool slc,
555 bool writeonly_memory,
556 bool has_add_tid)
557 {
558 /* TODO: Fix stores with ADD_TID and remove the "has_add_tid" flag. */
559 if (HAVE_LLVM >= 0x0309 && !has_add_tid) {
560 /* Split 3 channel stores, becase LLVM doesn't support 3-channel
561 * intrinsics. */
562 if (num_channels == 3) {
563 LLVMValueRef v[3], v01;
564
565 for (int i = 0; i < 3; i++) {
566 v[i] = LLVMBuildExtractElement(ctx->builder, vdata,
567 LLVMConstInt(ctx->i32, i, 0), "");
568 }
569 v01 = ac_build_gather_values(ctx, v, 2);
570
571 ac_build_buffer_store_dword(ctx, rsrc, v01, 2, voffset,
572 soffset, inst_offset, glc, slc,
573 writeonly_memory, has_add_tid);
574 ac_build_buffer_store_dword(ctx, rsrc, v[2], 1, voffset,
575 soffset, inst_offset + 8,
576 glc, slc,
577 writeonly_memory, has_add_tid);
578 return;
579 }
580
581 unsigned func = CLAMP(num_channels, 1, 3) - 1;
582 static const char *types[] = {"f32", "v2f32", "v4f32"};
583 char name[256];
584 LLVMValueRef offset = soffset;
585
586 if (inst_offset)
587 offset = LLVMBuildAdd(ctx->builder, offset,
588 LLVMConstInt(ctx->i32, inst_offset, 0), "");
589 if (voffset)
590 offset = LLVMBuildAdd(ctx->builder, offset, voffset, "");
591
592 LLVMValueRef args[] = {
593 bitcast_to_float(ctx, vdata),
594 LLVMBuildBitCast(ctx->builder, rsrc, ctx->v4i32, ""),
595 LLVMConstInt(ctx->i32, 0, 0),
596 offset,
597 LLVMConstInt(ctx->i1, glc, 0),
598 LLVMConstInt(ctx->i1, slc, 0),
599 };
600
601 snprintf(name, sizeof(name), "llvm.amdgcn.buffer.store.%s",
602 types[func]);
603
604 ac_build_intrinsic(ctx, name, ctx->voidt,
605 args, ARRAY_SIZE(args),
606 writeonly_memory ?
607 AC_FUNC_ATTR_INACCESSIBLE_MEM_ONLY :
608 AC_FUNC_ATTR_WRITEONLY);
609 return;
610 }
611
612 static unsigned dfmt[] = {
613 V_008F0C_BUF_DATA_FORMAT_32,
614 V_008F0C_BUF_DATA_FORMAT_32_32,
615 V_008F0C_BUF_DATA_FORMAT_32_32_32,
616 V_008F0C_BUF_DATA_FORMAT_32_32_32_32
617 };
618 assert(num_channels >= 1 && num_channels <= 4);
619
620 LLVMValueRef args[] = {
621 rsrc,
622 vdata,
623 LLVMConstInt(ctx->i32, num_channels, 0),
624 voffset ? voffset : LLVMGetUndef(ctx->i32),
625 soffset,
626 LLVMConstInt(ctx->i32, inst_offset, 0),
627 LLVMConstInt(ctx->i32, dfmt[num_channels - 1], 0),
628 LLVMConstInt(ctx->i32, V_008F0C_BUF_NUM_FORMAT_UINT, 0),
629 LLVMConstInt(ctx->i32, voffset != NULL, 0),
630 LLVMConstInt(ctx->i32, 0, 0), /* idxen */
631 LLVMConstInt(ctx->i32, glc, 0),
632 LLVMConstInt(ctx->i32, slc, 0),
633 LLVMConstInt(ctx->i32, 0, 0), /* tfe*/
634 };
635
636 /* The instruction offset field has 12 bits */
637 assert(voffset || inst_offset < (1 << 12));
638
639 /* The intrinsic is overloaded, we need to add a type suffix for overloading to work. */
640 unsigned func = CLAMP(num_channels, 1, 3) - 1;
641 const char *types[] = {"i32", "v2i32", "v4i32"};
642 char name[256];
643 snprintf(name, sizeof(name), "llvm.SI.tbuffer.store.%s", types[func]);
644
645 ac_build_intrinsic(ctx, name, ctx->voidt,
646 args, ARRAY_SIZE(args),
647 AC_FUNC_ATTR_LEGACY);
648 }
649
650 LLVMValueRef
651 ac_build_buffer_load(struct ac_llvm_context *ctx,
652 LLVMValueRef rsrc,
653 int num_channels,
654 LLVMValueRef vindex,
655 LLVMValueRef voffset,
656 LLVMValueRef soffset,
657 unsigned inst_offset,
658 unsigned glc,
659 unsigned slc,
660 bool readonly_memory)
661 {
662 unsigned func = CLAMP(num_channels, 1, 3) - 1;
663
664 if (HAVE_LLVM >= 0x309) {
665 LLVMValueRef args[] = {
666 LLVMBuildBitCast(ctx->builder, rsrc, ctx->v4i32, ""),
667 vindex ? vindex : LLVMConstInt(ctx->i32, 0, 0),
668 LLVMConstInt(ctx->i32, inst_offset, 0),
669 LLVMConstInt(ctx->i1, glc, 0),
670 LLVMConstInt(ctx->i1, slc, 0)
671 };
672
673 LLVMTypeRef types[] = {ctx->f32, LLVMVectorType(ctx->f32, 2),
674 ctx->v4f32};
675 const char *type_names[] = {"f32", "v2f32", "v4f32"};
676 char name[256];
677
678 if (voffset) {
679 args[2] = LLVMBuildAdd(ctx->builder, args[2], voffset,
680 "");
681 }
682
683 if (soffset) {
684 args[2] = LLVMBuildAdd(ctx->builder, args[2], soffset,
685 "");
686 }
687
688 snprintf(name, sizeof(name), "llvm.amdgcn.buffer.load.%s",
689 type_names[func]);
690
691 return ac_build_intrinsic(ctx, name, types[func], args,
692 ARRAY_SIZE(args),
693 /* READNONE means writes can't
694 * affect it, while READONLY means
695 * that writes can affect it. */
696 readonly_memory ?
697 AC_FUNC_ATTR_READNONE :
698 AC_FUNC_ATTR_READONLY);
699 } else {
700 LLVMValueRef args[] = {
701 LLVMBuildBitCast(ctx->builder, rsrc, ctx->v16i8, ""),
702 voffset ? voffset : vindex,
703 soffset,
704 LLVMConstInt(ctx->i32, inst_offset, 0),
705 LLVMConstInt(ctx->i32, voffset ? 1 : 0, 0), // offen
706 LLVMConstInt(ctx->i32, vindex ? 1 : 0, 0), //idxen
707 LLVMConstInt(ctx->i32, glc, 0),
708 LLVMConstInt(ctx->i32, slc, 0),
709 LLVMConstInt(ctx->i32, 0, 0), // TFE
710 };
711
712 LLVMTypeRef types[] = {ctx->i32, LLVMVectorType(ctx->i32, 2),
713 ctx->v4i32};
714 const char *type_names[] = {"i32", "v2i32", "v4i32"};
715 const char *arg_type = "i32";
716 char name[256];
717
718 if (voffset && vindex) {
719 LLVMValueRef vaddr[] = {vindex, voffset};
720
721 arg_type = "v2i32";
722 args[1] = ac_build_gather_values(ctx, vaddr, 2);
723 }
724
725 snprintf(name, sizeof(name), "llvm.SI.buffer.load.dword.%s.%s",
726 type_names[func], arg_type);
727
728 return ac_build_intrinsic(ctx, name, types[func], args,
729 ARRAY_SIZE(args), AC_FUNC_ATTR_READONLY);
730 }
731 }
732
733 LLVMValueRef ac_build_buffer_load_format(struct ac_llvm_context *ctx,
734 LLVMValueRef rsrc,
735 LLVMValueRef vindex,
736 LLVMValueRef voffset,
737 bool readonly_memory)
738 {
739 if (HAVE_LLVM >= 0x0309) {
740 LLVMValueRef args [] = {
741 LLVMBuildBitCast(ctx->builder, rsrc, ctx->v4i32, ""),
742 vindex,
743 voffset,
744 LLVMConstInt(ctx->i1, 0, 0), /* glc */
745 LLVMConstInt(ctx->i1, 0, 0), /* slc */
746 };
747
748 return ac_build_intrinsic(ctx,
749 "llvm.amdgcn.buffer.load.format.v4f32",
750 ctx->v4f32, args, ARRAY_SIZE(args),
751 /* READNONE means writes can't
752 * affect it, while READONLY means
753 * that writes can affect it. */
754 readonly_memory ?
755 AC_FUNC_ATTR_READNONE :
756 AC_FUNC_ATTR_READONLY);
757 }
758
759 LLVMValueRef args[] = {
760 rsrc,
761 voffset,
762 vindex,
763 };
764 return ac_build_intrinsic(ctx, "llvm.SI.vs.load.input",
765 ctx->v4f32, args, 3,
766 AC_FUNC_ATTR_READNONE |
767 AC_FUNC_ATTR_LEGACY);
768 }
769
770 /**
771 * Set range metadata on an instruction. This can only be used on load and
772 * call instructions. If you know an instruction can only produce the values
773 * 0, 1, 2, you would do set_range_metadata(value, 0, 3);
774 * \p lo is the minimum value inclusive.
775 * \p hi is the maximum value exclusive.
776 */
777 static void set_range_metadata(struct ac_llvm_context *ctx,
778 LLVMValueRef value, unsigned lo, unsigned hi)
779 {
780 LLVMValueRef range_md, md_args[2];
781 LLVMTypeRef type = LLVMTypeOf(value);
782 LLVMContextRef context = LLVMGetTypeContext(type);
783
784 md_args[0] = LLVMConstInt(type, lo, false);
785 md_args[1] = LLVMConstInt(type, hi, false);
786 range_md = LLVMMDNodeInContext(context, md_args, 2);
787 LLVMSetMetadata(value, ctx->range_md_kind, range_md);
788 }
789
790 LLVMValueRef
791 ac_get_thread_id(struct ac_llvm_context *ctx)
792 {
793 LLVMValueRef tid;
794
795 if (HAVE_LLVM < 0x0308) {
796 tid = ac_build_intrinsic(ctx, "llvm.SI.tid",
797 ctx->i32,
798 NULL, 0, AC_FUNC_ATTR_READNONE);
799 } else {
800 LLVMValueRef tid_args[2];
801 tid_args[0] = LLVMConstInt(ctx->i32, 0xffffffff, false);
802 tid_args[1] = LLVMConstInt(ctx->i32, 0, false);
803 tid_args[1] = ac_build_intrinsic(ctx,
804 "llvm.amdgcn.mbcnt.lo", ctx->i32,
805 tid_args, 2, AC_FUNC_ATTR_READNONE);
806
807 tid = ac_build_intrinsic(ctx, "llvm.amdgcn.mbcnt.hi",
808 ctx->i32, tid_args,
809 2, AC_FUNC_ATTR_READNONE);
810 }
811 set_range_metadata(ctx, tid, 0, 64);
812 return tid;
813 }
814
815 /*
816 * SI implements derivatives using the local data store (LDS)
817 * All writes to the LDS happen in all executing threads at
818 * the same time. TID is the Thread ID for the current
819 * thread and is a value between 0 and 63, representing
820 * the thread's position in the wavefront.
821 *
822 * For the pixel shader threads are grouped into quads of four pixels.
823 * The TIDs of the pixels of a quad are:
824 *
825 * +------+------+
826 * |4n + 0|4n + 1|
827 * +------+------+
828 * |4n + 2|4n + 3|
829 * +------+------+
830 *
831 * So, masking the TID with 0xfffffffc yields the TID of the top left pixel
832 * of the quad, masking with 0xfffffffd yields the TID of the top pixel of
833 * the current pixel's column, and masking with 0xfffffffe yields the TID
834 * of the left pixel of the current pixel's row.
835 *
836 * Adding 1 yields the TID of the pixel to the right of the left pixel, and
837 * adding 2 yields the TID of the pixel below the top pixel.
838 */
839 LLVMValueRef
840 ac_build_ddxy(struct ac_llvm_context *ctx,
841 bool has_ds_bpermute,
842 uint32_t mask,
843 int idx,
844 LLVMValueRef lds,
845 LLVMValueRef val)
846 {
847 LLVMValueRef thread_id, tl, trbl, tl_tid, trbl_tid, args[2];
848 LLVMValueRef result;
849
850 thread_id = ac_get_thread_id(ctx);
851
852 tl_tid = LLVMBuildAnd(ctx->builder, thread_id,
853 LLVMConstInt(ctx->i32, mask, false), "");
854
855 trbl_tid = LLVMBuildAdd(ctx->builder, tl_tid,
856 LLVMConstInt(ctx->i32, idx, false), "");
857
858 if (has_ds_bpermute) {
859 args[0] = LLVMBuildMul(ctx->builder, tl_tid,
860 LLVMConstInt(ctx->i32, 4, false), "");
861 args[1] = val;
862 tl = ac_build_intrinsic(ctx,
863 "llvm.amdgcn.ds.bpermute", ctx->i32,
864 args, 2, AC_FUNC_ATTR_READNONE);
865
866 args[0] = LLVMBuildMul(ctx->builder, trbl_tid,
867 LLVMConstInt(ctx->i32, 4, false), "");
868 trbl = ac_build_intrinsic(ctx,
869 "llvm.amdgcn.ds.bpermute", ctx->i32,
870 args, 2, AC_FUNC_ATTR_READNONE);
871 } else {
872 LLVMValueRef store_ptr, load_ptr0, load_ptr1;
873
874 store_ptr = ac_build_gep0(ctx, lds, thread_id);
875 load_ptr0 = ac_build_gep0(ctx, lds, tl_tid);
876 load_ptr1 = ac_build_gep0(ctx, lds, trbl_tid);
877
878 LLVMBuildStore(ctx->builder, val, store_ptr);
879 tl = LLVMBuildLoad(ctx->builder, load_ptr0, "");
880 trbl = LLVMBuildLoad(ctx->builder, load_ptr1, "");
881 }
882
883 tl = LLVMBuildBitCast(ctx->builder, tl, ctx->f32, "");
884 trbl = LLVMBuildBitCast(ctx->builder, trbl, ctx->f32, "");
885 result = LLVMBuildFSub(ctx->builder, trbl, tl, "");
886 return result;
887 }
888
889 void
890 ac_build_sendmsg(struct ac_llvm_context *ctx,
891 uint32_t msg,
892 LLVMValueRef wave_id)
893 {
894 LLVMValueRef args[2];
895 const char *intr_name = (HAVE_LLVM < 0x0400) ? "llvm.SI.sendmsg" : "llvm.amdgcn.s.sendmsg";
896 args[0] = LLVMConstInt(ctx->i32, msg, false);
897 args[1] = wave_id;
898 ac_build_intrinsic(ctx, intr_name, ctx->voidt, args, 2, 0);
899 }
900
901 LLVMValueRef
902 ac_build_imsb(struct ac_llvm_context *ctx,
903 LLVMValueRef arg,
904 LLVMTypeRef dst_type)
905 {
906 const char *intr_name = (HAVE_LLVM < 0x0400) ? "llvm.AMDGPU.flbit.i32" :
907 "llvm.amdgcn.sffbh.i32";
908 LLVMValueRef msb = ac_build_intrinsic(ctx, intr_name,
909 dst_type, &arg, 1,
910 AC_FUNC_ATTR_READNONE);
911
912 /* The HW returns the last bit index from MSB, but NIR/TGSI wants
913 * the index from LSB. Invert it by doing "31 - msb". */
914 msb = LLVMBuildSub(ctx->builder, LLVMConstInt(ctx->i32, 31, false),
915 msb, "");
916
917 LLVMValueRef all_ones = LLVMConstInt(ctx->i32, -1, true);
918 LLVMValueRef cond = LLVMBuildOr(ctx->builder,
919 LLVMBuildICmp(ctx->builder, LLVMIntEQ,
920 arg, LLVMConstInt(ctx->i32, 0, 0), ""),
921 LLVMBuildICmp(ctx->builder, LLVMIntEQ,
922 arg, all_ones, ""), "");
923
924 return LLVMBuildSelect(ctx->builder, cond, all_ones, msb, "");
925 }
926
927 LLVMValueRef
928 ac_build_umsb(struct ac_llvm_context *ctx,
929 LLVMValueRef arg,
930 LLVMTypeRef dst_type)
931 {
932 LLVMValueRef args[2] = {
933 arg,
934 LLVMConstInt(ctx->i1, 1, 0),
935 };
936 LLVMValueRef msb = ac_build_intrinsic(ctx, "llvm.ctlz.i32",
937 dst_type, args, ARRAY_SIZE(args),
938 AC_FUNC_ATTR_READNONE);
939
940 /* The HW returns the last bit index from MSB, but TGSI/NIR wants
941 * the index from LSB. Invert it by doing "31 - msb". */
942 msb = LLVMBuildSub(ctx->builder, LLVMConstInt(ctx->i32, 31, false),
943 msb, "");
944
945 /* check for zero */
946 return LLVMBuildSelect(ctx->builder,
947 LLVMBuildICmp(ctx->builder, LLVMIntEQ, arg,
948 LLVMConstInt(ctx->i32, 0, 0), ""),
949 LLVMConstInt(ctx->i32, -1, true), msb, "");
950 }
951
952 LLVMValueRef ac_build_clamp(struct ac_llvm_context *ctx, LLVMValueRef value)
953 {
954 if (HAVE_LLVM >= 0x0500) {
955 LLVMValueRef max[2] = {
956 value,
957 LLVMConstReal(ctx->f32, 0),
958 };
959 LLVMValueRef min[2] = {
960 LLVMConstReal(ctx->f32, 1),
961 };
962
963 min[1] = ac_build_intrinsic(ctx, "llvm.maxnum.f32",
964 ctx->f32, max, 2,
965 AC_FUNC_ATTR_READNONE);
966 return ac_build_intrinsic(ctx, "llvm.minnum.f32",
967 ctx->f32, min, 2,
968 AC_FUNC_ATTR_READNONE);
969 }
970
971 const char *intr = HAVE_LLVM >= 0x0308 ? "llvm.AMDGPU.clamp." :
972 "llvm.AMDIL.clamp.";
973 LLVMValueRef args[3] = {
974 value,
975 LLVMConstReal(ctx->f32, 0),
976 LLVMConstReal(ctx->f32, 1),
977 };
978
979 return ac_build_intrinsic(ctx, intr, ctx->f32, args, 3,
980 AC_FUNC_ATTR_READNONE |
981 AC_FUNC_ATTR_LEGACY);
982 }
983
984 void ac_build_export(struct ac_llvm_context *ctx, struct ac_export_args *a)
985 {
986 LLVMValueRef args[9];
987
988 if (HAVE_LLVM >= 0x0500) {
989 args[0] = LLVMConstInt(ctx->i32, a->target, 0);
990 args[1] = LLVMConstInt(ctx->i32, a->enabled_channels, 0);
991
992 if (a->compr) {
993 LLVMTypeRef i16 = LLVMInt16TypeInContext(ctx->context);
994 LLVMTypeRef v2i16 = LLVMVectorType(i16, 2);
995
996 args[2] = LLVMBuildBitCast(ctx->builder, a->out[0],
997 v2i16, "");
998 args[3] = LLVMBuildBitCast(ctx->builder, a->out[1],
999 v2i16, "");
1000 args[4] = LLVMConstInt(ctx->i1, a->done, 0);
1001 args[5] = LLVMConstInt(ctx->i1, a->valid_mask, 0);
1002
1003 ac_build_intrinsic(ctx, "llvm.amdgcn.exp.compr.v2i16",
1004 ctx->voidt, args, 6, 0);
1005 } else {
1006 args[2] = a->out[0];
1007 args[3] = a->out[1];
1008 args[4] = a->out[2];
1009 args[5] = a->out[3];
1010 args[6] = LLVMConstInt(ctx->i1, a->done, 0);
1011 args[7] = LLVMConstInt(ctx->i1, a->valid_mask, 0);
1012
1013 ac_build_intrinsic(ctx, "llvm.amdgcn.exp.f32",
1014 ctx->voidt, args, 8, 0);
1015 }
1016 return;
1017 }
1018
1019 args[0] = LLVMConstInt(ctx->i32, a->enabled_channels, 0);
1020 args[1] = LLVMConstInt(ctx->i32, a->valid_mask, 0);
1021 args[2] = LLVMConstInt(ctx->i32, a->done, 0);
1022 args[3] = LLVMConstInt(ctx->i32, a->target, 0);
1023 args[4] = LLVMConstInt(ctx->i32, a->compr, 0);
1024 memcpy(args + 5, a->out, sizeof(a->out[0]) * 4);
1025
1026 ac_build_intrinsic(ctx, "llvm.SI.export", ctx->voidt, args, 9,
1027 AC_FUNC_ATTR_LEGACY);
1028 }
1029
1030 LLVMValueRef ac_build_image_opcode(struct ac_llvm_context *ctx,
1031 struct ac_image_args *a)
1032 {
1033 LLVMTypeRef dst_type;
1034 LLVMValueRef args[11];
1035 unsigned num_args = 0;
1036 const char *name;
1037 char intr_name[128], type[64];
1038
1039 if (HAVE_LLVM >= 0x0400) {
1040 bool sample = a->opcode == ac_image_sample ||
1041 a->opcode == ac_image_gather4 ||
1042 a->opcode == ac_image_get_lod;
1043
1044 if (sample)
1045 args[num_args++] = bitcast_to_float(ctx, a->addr);
1046 else
1047 args[num_args++] = a->addr;
1048
1049 args[num_args++] = a->resource;
1050 if (sample)
1051 args[num_args++] = a->sampler;
1052 args[num_args++] = LLVMConstInt(ctx->i32, a->dmask, 0);
1053 if (sample)
1054 args[num_args++] = LLVMConstInt(ctx->i1, a->unorm, 0);
1055 args[num_args++] = LLVMConstInt(ctx->i1, 0, 0); /* glc */
1056 args[num_args++] = LLVMConstInt(ctx->i1, 0, 0); /* slc */
1057 args[num_args++] = LLVMConstInt(ctx->i1, 0, 0); /* lwe */
1058 args[num_args++] = LLVMConstInt(ctx->i1, a->da, 0);
1059
1060 switch (a->opcode) {
1061 case ac_image_sample:
1062 name = "llvm.amdgcn.image.sample";
1063 break;
1064 case ac_image_gather4:
1065 name = "llvm.amdgcn.image.gather4";
1066 break;
1067 case ac_image_load:
1068 name = "llvm.amdgcn.image.load";
1069 break;
1070 case ac_image_load_mip:
1071 name = "llvm.amdgcn.image.load.mip";
1072 break;
1073 case ac_image_get_lod:
1074 name = "llvm.amdgcn.image.getlod";
1075 break;
1076 case ac_image_get_resinfo:
1077 name = "llvm.amdgcn.image.getresinfo";
1078 break;
1079 }
1080
1081 ac_build_type_name_for_intr(LLVMTypeOf(args[0]), type,
1082 sizeof(type));
1083
1084 snprintf(intr_name, sizeof(intr_name), "%s%s%s%s.v4f32.%s.v8i32",
1085 name,
1086 a->compare ? ".c" : "",
1087 a->bias ? ".b" :
1088 a->lod ? ".l" :
1089 a->deriv ? ".d" :
1090 a->level_zero ? ".lz" : "",
1091 a->offset ? ".o" : "",
1092 type);
1093
1094 LLVMValueRef result =
1095 ac_build_intrinsic(ctx, intr_name,
1096 ctx->v4f32, args, num_args,
1097 AC_FUNC_ATTR_READNONE);
1098 if (!sample) {
1099 result = LLVMBuildBitCast(ctx->builder, result,
1100 ctx->v4i32, "");
1101 }
1102 return result;
1103 }
1104
1105 args[num_args++] = a->addr;
1106 args[num_args++] = a->resource;
1107
1108 if (a->opcode == ac_image_load ||
1109 a->opcode == ac_image_load_mip ||
1110 a->opcode == ac_image_get_resinfo) {
1111 dst_type = ctx->v4i32;
1112 } else {
1113 dst_type = ctx->v4f32;
1114 args[num_args++] = a->sampler;
1115 }
1116
1117 args[num_args++] = LLVMConstInt(ctx->i32, a->dmask, 0);
1118 args[num_args++] = LLVMConstInt(ctx->i32, a->unorm, 0);
1119 args[num_args++] = LLVMConstInt(ctx->i32, 0, 0); /* r128 */
1120 args[num_args++] = LLVMConstInt(ctx->i32, a->da, 0);
1121 args[num_args++] = LLVMConstInt(ctx->i32, 0, 0); /* glc */
1122 args[num_args++] = LLVMConstInt(ctx->i32, 0, 0); /* slc */
1123 args[num_args++] = LLVMConstInt(ctx->i32, 0, 0); /* tfe */
1124 args[num_args++] = LLVMConstInt(ctx->i32, 0, 0); /* lwe */
1125
1126 switch (a->opcode) {
1127 case ac_image_sample:
1128 name = "llvm.SI.image.sample";
1129 break;
1130 case ac_image_gather4:
1131 name = "llvm.SI.gather4";
1132 break;
1133 case ac_image_load:
1134 name = "llvm.SI.image.load";
1135 break;
1136 case ac_image_load_mip:
1137 name = "llvm.SI.image.load.mip";
1138 break;
1139 case ac_image_get_lod:
1140 name = "llvm.SI.getlod";
1141 break;
1142 case ac_image_get_resinfo:
1143 name = "llvm.SI.getresinfo";
1144 break;
1145 }
1146
1147 ac_build_type_name_for_intr(LLVMTypeOf(a->addr), type, sizeof(type));
1148 snprintf(intr_name, sizeof(intr_name), "%s%s%s%s.%s",
1149 name,
1150 a->compare ? ".c" : "",
1151 a->bias ? ".b" :
1152 a->lod ? ".l" :
1153 a->deriv ? ".d" :
1154 a->level_zero ? ".lz" : "",
1155 a->offset ? ".o" : "",
1156 type);
1157
1158 return ac_build_intrinsic(ctx, intr_name,
1159 dst_type, args, num_args,
1160 AC_FUNC_ATTR_READNONE |
1161 AC_FUNC_ATTR_LEGACY);
1162 }
1163
1164 LLVMValueRef ac_build_cvt_pkrtz_f16(struct ac_llvm_context *ctx,
1165 LLVMValueRef args[2])
1166 {
1167 if (HAVE_LLVM >= 0x0500) {
1168 LLVMTypeRef v2f16 =
1169 LLVMVectorType(LLVMHalfTypeInContext(ctx->context), 2);
1170 LLVMValueRef res =
1171 ac_build_intrinsic(ctx, "llvm.amdgcn.cvt.pkrtz",
1172 v2f16, args, 2,
1173 AC_FUNC_ATTR_READNONE);
1174 return LLVMBuildBitCast(ctx->builder, res, ctx->i32, "");
1175 }
1176
1177 return ac_build_intrinsic(ctx, "llvm.SI.packf16", ctx->i32, args, 2,
1178 AC_FUNC_ATTR_READNONE |
1179 AC_FUNC_ATTR_LEGACY);
1180 }
1181
1182 /**
1183 * KILL, AKA discard in GLSL.
1184 *
1185 * \param value kill if value < 0.0 or value == NULL.
1186 */
1187 void ac_build_kill(struct ac_llvm_context *ctx, LLVMValueRef value)
1188 {
1189 if (value) {
1190 ac_build_intrinsic(ctx, "llvm.AMDGPU.kill", ctx->voidt,
1191 &value, 1, AC_FUNC_ATTR_LEGACY);
1192 } else {
1193 ac_build_intrinsic(ctx, "llvm.AMDGPU.kilp", ctx->voidt,
1194 NULL, 0, AC_FUNC_ATTR_LEGACY);
1195 }
1196 }
1197
1198 LLVMValueRef ac_build_bfe(struct ac_llvm_context *ctx, LLVMValueRef input,
1199 LLVMValueRef offset, LLVMValueRef width,
1200 bool is_signed)
1201 {
1202 LLVMValueRef args[] = {
1203 input,
1204 offset,
1205 width,
1206 };
1207
1208 if (HAVE_LLVM >= 0x0500) {
1209 return ac_build_intrinsic(ctx,
1210 is_signed ? "llvm.amdgcn.sbfe.i32" :
1211 "llvm.amdgcn.ubfe.i32",
1212 ctx->i32, args, 3,
1213 AC_FUNC_ATTR_READNONE);
1214 }
1215
1216 return ac_build_intrinsic(ctx,
1217 is_signed ? "llvm.AMDGPU.bfe.i32" :
1218 "llvm.AMDGPU.bfe.u32",
1219 ctx->i32, args, 3,
1220 AC_FUNC_ATTR_READNONE |
1221 AC_FUNC_ATTR_LEGACY);
1222 }