ac/llvm: use min+max instead of AMDGPU.clamp on LLVM 5.0
[mesa.git] / src / amd / common / ac_llvm_build.c
1 /*
2 * Copyright 2014 Advanced Micro Devices, Inc.
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the
6 * "Software"), to deal in the Software without restriction, including
7 * without limitation the rights to use, copy, modify, merge, publish,
8 * distribute, sub license, and/or sell copies of the Software, and to
9 * permit persons to whom the Software is furnished to do so, subject to
10 * the following conditions:
11 *
12 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
13 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
14 * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
15 * THE COPYRIGHT HOLDERS, AUTHORS AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM,
16 * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
17 * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
18 * USE OR OTHER DEALINGS IN THE SOFTWARE.
19 *
20 * The above copyright notice and this permission notice (including the
21 * next paragraph) shall be included in all copies or substantial portions
22 * of the Software.
23 *
24 */
25 /* based on pieces from si_pipe.c and radeon_llvm_emit.c */
26 #include "ac_llvm_build.h"
27
28 #include <llvm-c/Core.h>
29
30 #include "c11/threads.h"
31
32 #include <assert.h>
33 #include <stdio.h>
34
35 #include "ac_llvm_util.h"
36
37 #include "util/bitscan.h"
38 #include "util/macros.h"
39 #include "sid.h"
40
41 /* Initialize module-independent parts of the context.
42 *
43 * The caller is responsible for initializing ctx::module and ctx::builder.
44 */
45 void
46 ac_llvm_context_init(struct ac_llvm_context *ctx, LLVMContextRef context)
47 {
48 LLVMValueRef args[1];
49
50 ctx->context = context;
51 ctx->module = NULL;
52 ctx->builder = NULL;
53
54 ctx->voidt = LLVMVoidTypeInContext(ctx->context);
55 ctx->i1 = LLVMInt1TypeInContext(ctx->context);
56 ctx->i8 = LLVMInt8TypeInContext(ctx->context);
57 ctx->i32 = LLVMIntTypeInContext(ctx->context, 32);
58 ctx->f32 = LLVMFloatTypeInContext(ctx->context);
59 ctx->v4i32 = LLVMVectorType(ctx->i32, 4);
60 ctx->v4f32 = LLVMVectorType(ctx->f32, 4);
61 ctx->v16i8 = LLVMVectorType(ctx->i8, 16);
62
63 ctx->range_md_kind = LLVMGetMDKindIDInContext(ctx->context,
64 "range", 5);
65
66 ctx->invariant_load_md_kind = LLVMGetMDKindIDInContext(ctx->context,
67 "invariant.load", 14);
68
69 ctx->fpmath_md_kind = LLVMGetMDKindIDInContext(ctx->context, "fpmath", 6);
70
71 args[0] = LLVMConstReal(ctx->f32, 2.5);
72 ctx->fpmath_md_2p5_ulp = LLVMMDNodeInContext(ctx->context, args, 1);
73
74 ctx->uniform_md_kind = LLVMGetMDKindIDInContext(ctx->context,
75 "amdgpu.uniform", 14);
76
77 ctx->empty_md = LLVMMDNodeInContext(ctx->context, NULL, 0);
78 }
79
80 LLVMValueRef
81 ac_emit_llvm_intrinsic(struct ac_llvm_context *ctx, const char *name,
82 LLVMTypeRef return_type, LLVMValueRef *params,
83 unsigned param_count, unsigned attrib_mask)
84 {
85 LLVMValueRef function;
86
87 function = LLVMGetNamedFunction(ctx->module, name);
88 if (!function) {
89 LLVMTypeRef param_types[32], function_type;
90 unsigned i;
91
92 assert(param_count <= 32);
93
94 for (i = 0; i < param_count; ++i) {
95 assert(params[i]);
96 param_types[i] = LLVMTypeOf(params[i]);
97 }
98 function_type =
99 LLVMFunctionType(return_type, param_types, param_count, 0);
100 function = LLVMAddFunction(ctx->module, name, function_type);
101
102 LLVMSetFunctionCallConv(function, LLVMCCallConv);
103 LLVMSetLinkage(function, LLVMExternalLinkage);
104
105 attrib_mask |= AC_FUNC_ATTR_NOUNWIND;
106 while (attrib_mask) {
107 enum ac_func_attr attr = 1u << u_bit_scan(&attrib_mask);
108 ac_add_function_attr(function, -1, attr);
109 }
110 }
111 return LLVMBuildCall(ctx->builder, function, params, param_count, "");
112 }
113
114 LLVMValueRef
115 ac_build_gather_values_extended(struct ac_llvm_context *ctx,
116 LLVMValueRef *values,
117 unsigned value_count,
118 unsigned value_stride,
119 bool load)
120 {
121 LLVMBuilderRef builder = ctx->builder;
122 LLVMValueRef vec;
123 unsigned i;
124
125
126 if (value_count == 1) {
127 if (load)
128 return LLVMBuildLoad(builder, values[0], "");
129 return values[0];
130 } else if (!value_count)
131 unreachable("value_count is 0");
132
133 for (i = 0; i < value_count; i++) {
134 LLVMValueRef value = values[i * value_stride];
135 if (load)
136 value = LLVMBuildLoad(builder, value, "");
137
138 if (!i)
139 vec = LLVMGetUndef( LLVMVectorType(LLVMTypeOf(value), value_count));
140 LLVMValueRef index = LLVMConstInt(ctx->i32, i, false);
141 vec = LLVMBuildInsertElement(builder, vec, value, index, "");
142 }
143 return vec;
144 }
145
146 LLVMValueRef
147 ac_build_gather_values(struct ac_llvm_context *ctx,
148 LLVMValueRef *values,
149 unsigned value_count)
150 {
151 return ac_build_gather_values_extended(ctx, values, value_count, 1, false);
152 }
153
154 LLVMValueRef
155 ac_emit_fdiv(struct ac_llvm_context *ctx,
156 LLVMValueRef num,
157 LLVMValueRef den)
158 {
159 LLVMValueRef ret = LLVMBuildFDiv(ctx->builder, num, den, "");
160
161 if (!LLVMIsConstant(ret))
162 LLVMSetMetadata(ret, ctx->fpmath_md_kind, ctx->fpmath_md_2p5_ulp);
163 return ret;
164 }
165
166 /* Coordinates for cube map selection. sc, tc, and ma are as in Table 8.27
167 * of the OpenGL 4.5 (Compatibility Profile) specification, except ma is
168 * already multiplied by two. id is the cube face number.
169 */
170 struct cube_selection_coords {
171 LLVMValueRef stc[2];
172 LLVMValueRef ma;
173 LLVMValueRef id;
174 };
175
176 static void
177 build_cube_intrinsic(struct ac_llvm_context *ctx,
178 LLVMValueRef in[3],
179 struct cube_selection_coords *out)
180 {
181 LLVMBuilderRef builder = ctx->builder;
182
183 if (HAVE_LLVM >= 0x0309) {
184 LLVMTypeRef f32 = ctx->f32;
185
186 out->stc[1] = ac_emit_llvm_intrinsic(ctx, "llvm.amdgcn.cubetc",
187 f32, in, 3, AC_FUNC_ATTR_READNONE);
188 out->stc[0] = ac_emit_llvm_intrinsic(ctx, "llvm.amdgcn.cubesc",
189 f32, in, 3, AC_FUNC_ATTR_READNONE);
190 out->ma = ac_emit_llvm_intrinsic(ctx, "llvm.amdgcn.cubema",
191 f32, in, 3, AC_FUNC_ATTR_READNONE);
192 out->id = ac_emit_llvm_intrinsic(ctx, "llvm.amdgcn.cubeid",
193 f32, in, 3, AC_FUNC_ATTR_READNONE);
194 } else {
195 LLVMValueRef c[4] = {
196 in[0],
197 in[1],
198 in[2],
199 LLVMGetUndef(LLVMTypeOf(in[0]))
200 };
201 LLVMValueRef vec = ac_build_gather_values(ctx, c, 4);
202
203 LLVMValueRef tmp =
204 ac_emit_llvm_intrinsic(ctx, "llvm.AMDGPU.cube",
205 LLVMTypeOf(vec), &vec, 1,
206 AC_FUNC_ATTR_READNONE);
207
208 out->stc[1] = LLVMBuildExtractElement(builder, tmp,
209 LLVMConstInt(ctx->i32, 0, 0), "");
210 out->stc[0] = LLVMBuildExtractElement(builder, tmp,
211 LLVMConstInt(ctx->i32, 1, 0), "");
212 out->ma = LLVMBuildExtractElement(builder, tmp,
213 LLVMConstInt(ctx->i32, 2, 0), "");
214 out->id = LLVMBuildExtractElement(builder, tmp,
215 LLVMConstInt(ctx->i32, 3, 0), "");
216 }
217 }
218
219 /**
220 * Build a manual selection sequence for cube face sc/tc coordinates and
221 * major axis vector (multiplied by 2 for consistency) for the given
222 * vec3 \p coords, for the face implied by \p selcoords.
223 *
224 * For the major axis, we always adjust the sign to be in the direction of
225 * selcoords.ma; i.e., a positive out_ma means that coords is pointed towards
226 * the selcoords major axis.
227 */
228 static void build_cube_select(LLVMBuilderRef builder,
229 const struct cube_selection_coords *selcoords,
230 const LLVMValueRef *coords,
231 LLVMValueRef *out_st,
232 LLVMValueRef *out_ma)
233 {
234 LLVMTypeRef f32 = LLVMTypeOf(coords[0]);
235 LLVMValueRef is_ma_positive;
236 LLVMValueRef sgn_ma;
237 LLVMValueRef is_ma_z, is_not_ma_z;
238 LLVMValueRef is_ma_y;
239 LLVMValueRef is_ma_x;
240 LLVMValueRef sgn;
241 LLVMValueRef tmp;
242
243 is_ma_positive = LLVMBuildFCmp(builder, LLVMRealUGE,
244 selcoords->ma, LLVMConstReal(f32, 0.0), "");
245 sgn_ma = LLVMBuildSelect(builder, is_ma_positive,
246 LLVMConstReal(f32, 1.0), LLVMConstReal(f32, -1.0), "");
247
248 is_ma_z = LLVMBuildFCmp(builder, LLVMRealUGE, selcoords->id, LLVMConstReal(f32, 4.0), "");
249 is_not_ma_z = LLVMBuildNot(builder, is_ma_z, "");
250 is_ma_y = LLVMBuildAnd(builder, is_not_ma_z,
251 LLVMBuildFCmp(builder, LLVMRealUGE, selcoords->id, LLVMConstReal(f32, 2.0), ""), "");
252 is_ma_x = LLVMBuildAnd(builder, is_not_ma_z, LLVMBuildNot(builder, is_ma_y, ""), "");
253
254 /* Select sc */
255 tmp = LLVMBuildSelect(builder, is_ma_z, coords[2], coords[0], "");
256 sgn = LLVMBuildSelect(builder, is_ma_y, LLVMConstReal(f32, 1.0),
257 LLVMBuildSelect(builder, is_ma_x, sgn_ma,
258 LLVMBuildFNeg(builder, sgn_ma, ""), ""), "");
259 out_st[0] = LLVMBuildFMul(builder, tmp, sgn, "");
260
261 /* Select tc */
262 tmp = LLVMBuildSelect(builder, is_ma_y, coords[2], coords[1], "");
263 sgn = LLVMBuildSelect(builder, is_ma_y, LLVMBuildFNeg(builder, sgn_ma, ""),
264 LLVMConstReal(f32, -1.0), "");
265 out_st[1] = LLVMBuildFMul(builder, tmp, sgn, "");
266
267 /* Select ma */
268 tmp = LLVMBuildSelect(builder, is_ma_z, coords[2],
269 LLVMBuildSelect(builder, is_ma_y, coords[1], coords[0], ""), "");
270 sgn = LLVMBuildSelect(builder, is_ma_positive,
271 LLVMConstReal(f32, 2.0), LLVMConstReal(f32, -2.0), "");
272 *out_ma = LLVMBuildFMul(builder, tmp, sgn, "");
273 }
274
275 void
276 ac_prepare_cube_coords(struct ac_llvm_context *ctx,
277 bool is_deriv, bool is_array,
278 LLVMValueRef *coords_arg,
279 LLVMValueRef *derivs_arg)
280 {
281
282 LLVMBuilderRef builder = ctx->builder;
283 struct cube_selection_coords selcoords;
284 LLVMValueRef coords[3];
285 LLVMValueRef invma;
286
287 build_cube_intrinsic(ctx, coords_arg, &selcoords);
288
289 invma = ac_emit_llvm_intrinsic(ctx, "llvm.fabs.f32",
290 ctx->f32, &selcoords.ma, 1, AC_FUNC_ATTR_READNONE);
291 invma = ac_emit_fdiv(ctx, LLVMConstReal(ctx->f32, 1.0), invma);
292
293 for (int i = 0; i < 2; ++i)
294 coords[i] = LLVMBuildFMul(builder, selcoords.stc[i], invma, "");
295
296 coords[2] = selcoords.id;
297
298 if (is_deriv && derivs_arg) {
299 LLVMValueRef derivs[4];
300 int axis;
301
302 /* Convert cube derivatives to 2D derivatives. */
303 for (axis = 0; axis < 2; axis++) {
304 LLVMValueRef deriv_st[2];
305 LLVMValueRef deriv_ma;
306
307 /* Transform the derivative alongside the texture
308 * coordinate. Mathematically, the correct formula is
309 * as follows. Assume we're projecting onto the +Z face
310 * and denote by dx/dh the derivative of the (original)
311 * X texture coordinate with respect to horizontal
312 * window coordinates. The projection onto the +Z face
313 * plane is:
314 *
315 * f(x,z) = x/z
316 *
317 * Then df/dh = df/dx * dx/dh + df/dz * dz/dh
318 * = 1/z * dx/dh - x/z * 1/z * dz/dh.
319 *
320 * This motivatives the implementation below.
321 *
322 * Whether this actually gives the expected results for
323 * apps that might feed in derivatives obtained via
324 * finite differences is anyone's guess. The OpenGL spec
325 * seems awfully quiet about how textureGrad for cube
326 * maps should be handled.
327 */
328 build_cube_select(builder, &selcoords, &derivs_arg[axis * 3],
329 deriv_st, &deriv_ma);
330
331 deriv_ma = LLVMBuildFMul(builder, deriv_ma, invma, "");
332
333 for (int i = 0; i < 2; ++i)
334 derivs[axis * 2 + i] =
335 LLVMBuildFSub(builder,
336 LLVMBuildFMul(builder, deriv_st[i], invma, ""),
337 LLVMBuildFMul(builder, deriv_ma, coords[i], ""), "");
338 }
339
340 memcpy(derivs_arg, derivs, sizeof(derivs));
341 }
342
343 /* Shift the texture coordinate. This must be applied after the
344 * derivative calculation.
345 */
346 for (int i = 0; i < 2; ++i)
347 coords[i] = LLVMBuildFAdd(builder, coords[i], LLVMConstReal(ctx->f32, 1.5), "");
348
349 if (is_array) {
350 /* for cube arrays coord.z = coord.w(array_index) * 8 + face */
351 /* coords_arg.w component - array_index for cube arrays */
352 LLVMValueRef tmp = LLVMBuildFMul(ctx->builder, coords_arg[3], LLVMConstReal(ctx->f32, 8.0), "");
353 coords[2] = LLVMBuildFAdd(ctx->builder, tmp, coords[2], "");
354 }
355
356 memcpy(coords_arg, coords, sizeof(coords));
357 }
358
359
360 LLVMValueRef
361 ac_build_fs_interp(struct ac_llvm_context *ctx,
362 LLVMValueRef llvm_chan,
363 LLVMValueRef attr_number,
364 LLVMValueRef params,
365 LLVMValueRef i,
366 LLVMValueRef j)
367 {
368 LLVMValueRef args[5];
369 LLVMValueRef p1;
370
371 if (HAVE_LLVM < 0x0400) {
372 LLVMValueRef ij[2];
373 ij[0] = LLVMBuildBitCast(ctx->builder, i, ctx->i32, "");
374 ij[1] = LLVMBuildBitCast(ctx->builder, j, ctx->i32, "");
375
376 args[0] = llvm_chan;
377 args[1] = attr_number;
378 args[2] = params;
379 args[3] = ac_build_gather_values(ctx, ij, 2);
380 return ac_emit_llvm_intrinsic(ctx, "llvm.SI.fs.interp",
381 ctx->f32, args, 4,
382 AC_FUNC_ATTR_READNONE);
383 }
384
385 args[0] = i;
386 args[1] = llvm_chan;
387 args[2] = attr_number;
388 args[3] = params;
389
390 p1 = ac_emit_llvm_intrinsic(ctx, "llvm.amdgcn.interp.p1",
391 ctx->f32, args, 4, AC_FUNC_ATTR_READNONE);
392
393 args[0] = p1;
394 args[1] = j;
395 args[2] = llvm_chan;
396 args[3] = attr_number;
397 args[4] = params;
398
399 return ac_emit_llvm_intrinsic(ctx, "llvm.amdgcn.interp.p2",
400 ctx->f32, args, 5, AC_FUNC_ATTR_READNONE);
401 }
402
403 LLVMValueRef
404 ac_build_fs_interp_mov(struct ac_llvm_context *ctx,
405 LLVMValueRef parameter,
406 LLVMValueRef llvm_chan,
407 LLVMValueRef attr_number,
408 LLVMValueRef params)
409 {
410 LLVMValueRef args[4];
411 if (HAVE_LLVM < 0x0400) {
412 args[0] = llvm_chan;
413 args[1] = attr_number;
414 args[2] = params;
415
416 return ac_emit_llvm_intrinsic(ctx,
417 "llvm.SI.fs.constant",
418 ctx->f32, args, 3,
419 AC_FUNC_ATTR_READNONE);
420 }
421
422 args[0] = parameter;
423 args[1] = llvm_chan;
424 args[2] = attr_number;
425 args[3] = params;
426
427 return ac_emit_llvm_intrinsic(ctx, "llvm.amdgcn.interp.mov",
428 ctx->f32, args, 4, AC_FUNC_ATTR_READNONE);
429 }
430
431 LLVMValueRef
432 ac_build_gep0(struct ac_llvm_context *ctx,
433 LLVMValueRef base_ptr,
434 LLVMValueRef index)
435 {
436 LLVMValueRef indices[2] = {
437 LLVMConstInt(ctx->i32, 0, 0),
438 index,
439 };
440 return LLVMBuildGEP(ctx->builder, base_ptr,
441 indices, 2, "");
442 }
443
444 void
445 ac_build_indexed_store(struct ac_llvm_context *ctx,
446 LLVMValueRef base_ptr, LLVMValueRef index,
447 LLVMValueRef value)
448 {
449 LLVMBuildStore(ctx->builder, value,
450 ac_build_gep0(ctx, base_ptr, index));
451 }
452
453 /**
454 * Build an LLVM bytecode indexed load using LLVMBuildGEP + LLVMBuildLoad.
455 * It's equivalent to doing a load from &base_ptr[index].
456 *
457 * \param base_ptr Where the array starts.
458 * \param index The element index into the array.
459 * \param uniform Whether the base_ptr and index can be assumed to be
460 * dynamically uniform
461 */
462 LLVMValueRef
463 ac_build_indexed_load(struct ac_llvm_context *ctx,
464 LLVMValueRef base_ptr, LLVMValueRef index,
465 bool uniform)
466 {
467 LLVMValueRef pointer;
468
469 pointer = ac_build_gep0(ctx, base_ptr, index);
470 if (uniform)
471 LLVMSetMetadata(pointer, ctx->uniform_md_kind, ctx->empty_md);
472 return LLVMBuildLoad(ctx->builder, pointer, "");
473 }
474
475 /**
476 * Do a load from &base_ptr[index], but also add a flag that it's loading
477 * a constant from a dynamically uniform index.
478 */
479 LLVMValueRef
480 ac_build_indexed_load_const(struct ac_llvm_context *ctx,
481 LLVMValueRef base_ptr, LLVMValueRef index)
482 {
483 LLVMValueRef result = ac_build_indexed_load(ctx, base_ptr, index, true);
484 LLVMSetMetadata(result, ctx->invariant_load_md_kind, ctx->empty_md);
485 return result;
486 }
487
488 /* TBUFFER_STORE_FORMAT_{X,XY,XYZ,XYZW} <- the suffix is selected by num_channels=1..4.
489 * The type of vdata must be one of i32 (num_channels=1), v2i32 (num_channels=2),
490 * or v4i32 (num_channels=3,4).
491 */
492 void
493 ac_build_tbuffer_store(struct ac_llvm_context *ctx,
494 LLVMValueRef rsrc,
495 LLVMValueRef vdata,
496 unsigned num_channels,
497 LLVMValueRef vaddr,
498 LLVMValueRef soffset,
499 unsigned inst_offset,
500 unsigned dfmt,
501 unsigned nfmt,
502 unsigned offen,
503 unsigned idxen,
504 unsigned glc,
505 unsigned slc,
506 unsigned tfe)
507 {
508 LLVMValueRef args[] = {
509 rsrc,
510 vdata,
511 LLVMConstInt(ctx->i32, num_channels, 0),
512 vaddr,
513 soffset,
514 LLVMConstInt(ctx->i32, inst_offset, 0),
515 LLVMConstInt(ctx->i32, dfmt, 0),
516 LLVMConstInt(ctx->i32, nfmt, 0),
517 LLVMConstInt(ctx->i32, offen, 0),
518 LLVMConstInt(ctx->i32, idxen, 0),
519 LLVMConstInt(ctx->i32, glc, 0),
520 LLVMConstInt(ctx->i32, slc, 0),
521 LLVMConstInt(ctx->i32, tfe, 0)
522 };
523
524 /* The instruction offset field has 12 bits */
525 assert(offen || inst_offset < (1 << 12));
526
527 /* The intrinsic is overloaded, we need to add a type suffix for overloading to work. */
528 unsigned func = CLAMP(num_channels, 1, 3) - 1;
529 const char *types[] = {"i32", "v2i32", "v4i32"};
530 char name[256];
531 snprintf(name, sizeof(name), "llvm.SI.tbuffer.store.%s", types[func]);
532
533 ac_emit_llvm_intrinsic(ctx, name, ctx->voidt,
534 args, ARRAY_SIZE(args), 0);
535 }
536
537 void
538 ac_build_tbuffer_store_dwords(struct ac_llvm_context *ctx,
539 LLVMValueRef rsrc,
540 LLVMValueRef vdata,
541 unsigned num_channels,
542 LLVMValueRef vaddr,
543 LLVMValueRef soffset,
544 unsigned inst_offset)
545 {
546 static unsigned dfmt[] = {
547 V_008F0C_BUF_DATA_FORMAT_32,
548 V_008F0C_BUF_DATA_FORMAT_32_32,
549 V_008F0C_BUF_DATA_FORMAT_32_32_32,
550 V_008F0C_BUF_DATA_FORMAT_32_32_32_32
551 };
552 assert(num_channels >= 1 && num_channels <= 4);
553
554 ac_build_tbuffer_store(ctx, rsrc, vdata, num_channels, vaddr, soffset,
555 inst_offset, dfmt[num_channels - 1],
556 V_008F0C_BUF_NUM_FORMAT_UINT, 1, 0, 1, 1, 0);
557 }
558
559 LLVMValueRef
560 ac_build_buffer_load(struct ac_llvm_context *ctx,
561 LLVMValueRef rsrc,
562 int num_channels,
563 LLVMValueRef vindex,
564 LLVMValueRef voffset,
565 LLVMValueRef soffset,
566 unsigned inst_offset,
567 unsigned glc,
568 unsigned slc)
569 {
570 unsigned func = CLAMP(num_channels, 1, 3) - 1;
571
572 if (HAVE_LLVM >= 0x309) {
573 LLVMValueRef args[] = {
574 LLVMBuildBitCast(ctx->builder, rsrc, ctx->v4i32, ""),
575 vindex ? vindex : LLVMConstInt(ctx->i32, 0, 0),
576 LLVMConstInt(ctx->i32, inst_offset, 0),
577 LLVMConstInt(ctx->i1, glc, 0),
578 LLVMConstInt(ctx->i1, slc, 0)
579 };
580
581 LLVMTypeRef types[] = {ctx->f32, LLVMVectorType(ctx->f32, 2),
582 ctx->v4f32};
583 const char *type_names[] = {"f32", "v2f32", "v4f32"};
584 char name[256];
585
586 if (voffset) {
587 args[2] = LLVMBuildAdd(ctx->builder, args[2], voffset,
588 "");
589 }
590
591 if (soffset) {
592 args[2] = LLVMBuildAdd(ctx->builder, args[2], soffset,
593 "");
594 }
595
596 snprintf(name, sizeof(name), "llvm.amdgcn.buffer.load.%s",
597 type_names[func]);
598
599 return ac_emit_llvm_intrinsic(ctx, name, types[func], args,
600 ARRAY_SIZE(args), AC_FUNC_ATTR_READONLY);
601 } else {
602 LLVMValueRef args[] = {
603 LLVMBuildBitCast(ctx->builder, rsrc, ctx->v16i8, ""),
604 voffset ? voffset : vindex,
605 soffset,
606 LLVMConstInt(ctx->i32, inst_offset, 0),
607 LLVMConstInt(ctx->i32, voffset ? 1 : 0, 0), // offen
608 LLVMConstInt(ctx->i32, vindex ? 1 : 0, 0), //idxen
609 LLVMConstInt(ctx->i32, glc, 0),
610 LLVMConstInt(ctx->i32, slc, 0),
611 LLVMConstInt(ctx->i32, 0, 0), // TFE
612 };
613
614 LLVMTypeRef types[] = {ctx->i32, LLVMVectorType(ctx->i32, 2),
615 ctx->v4i32};
616 const char *type_names[] = {"i32", "v2i32", "v4i32"};
617 const char *arg_type = "i32";
618 char name[256];
619
620 if (voffset && vindex) {
621 LLVMValueRef vaddr[] = {vindex, voffset};
622
623 arg_type = "v2i32";
624 args[1] = ac_build_gather_values(ctx, vaddr, 2);
625 }
626
627 snprintf(name, sizeof(name), "llvm.SI.buffer.load.dword.%s.%s",
628 type_names[func], arg_type);
629
630 return ac_emit_llvm_intrinsic(ctx, name, types[func], args,
631 ARRAY_SIZE(args), AC_FUNC_ATTR_READONLY);
632 }
633 }
634
635 /**
636 * Set range metadata on an instruction. This can only be used on load and
637 * call instructions. If you know an instruction can only produce the values
638 * 0, 1, 2, you would do set_range_metadata(value, 0, 3);
639 * \p lo is the minimum value inclusive.
640 * \p hi is the maximum value exclusive.
641 */
642 static void set_range_metadata(struct ac_llvm_context *ctx,
643 LLVMValueRef value, unsigned lo, unsigned hi)
644 {
645 LLVMValueRef range_md, md_args[2];
646 LLVMTypeRef type = LLVMTypeOf(value);
647 LLVMContextRef context = LLVMGetTypeContext(type);
648
649 md_args[0] = LLVMConstInt(type, lo, false);
650 md_args[1] = LLVMConstInt(type, hi, false);
651 range_md = LLVMMDNodeInContext(context, md_args, 2);
652 LLVMSetMetadata(value, ctx->range_md_kind, range_md);
653 }
654
655 LLVMValueRef
656 ac_get_thread_id(struct ac_llvm_context *ctx)
657 {
658 LLVMValueRef tid;
659
660 if (HAVE_LLVM < 0x0308) {
661 tid = ac_emit_llvm_intrinsic(ctx, "llvm.SI.tid",
662 ctx->i32,
663 NULL, 0, AC_FUNC_ATTR_READNONE);
664 } else {
665 LLVMValueRef tid_args[2];
666 tid_args[0] = LLVMConstInt(ctx->i32, 0xffffffff, false);
667 tid_args[1] = LLVMConstInt(ctx->i32, 0, false);
668 tid_args[1] = ac_emit_llvm_intrinsic(ctx,
669 "llvm.amdgcn.mbcnt.lo", ctx->i32,
670 tid_args, 2, AC_FUNC_ATTR_READNONE);
671
672 tid = ac_emit_llvm_intrinsic(ctx, "llvm.amdgcn.mbcnt.hi",
673 ctx->i32, tid_args,
674 2, AC_FUNC_ATTR_READNONE);
675 }
676 set_range_metadata(ctx, tid, 0, 64);
677 return tid;
678 }
679
680 /*
681 * SI implements derivatives using the local data store (LDS)
682 * All writes to the LDS happen in all executing threads at
683 * the same time. TID is the Thread ID for the current
684 * thread and is a value between 0 and 63, representing
685 * the thread's position in the wavefront.
686 *
687 * For the pixel shader threads are grouped into quads of four pixels.
688 * The TIDs of the pixels of a quad are:
689 *
690 * +------+------+
691 * |4n + 0|4n + 1|
692 * +------+------+
693 * |4n + 2|4n + 3|
694 * +------+------+
695 *
696 * So, masking the TID with 0xfffffffc yields the TID of the top left pixel
697 * of the quad, masking with 0xfffffffd yields the TID of the top pixel of
698 * the current pixel's column, and masking with 0xfffffffe yields the TID
699 * of the left pixel of the current pixel's row.
700 *
701 * Adding 1 yields the TID of the pixel to the right of the left pixel, and
702 * adding 2 yields the TID of the pixel below the top pixel.
703 */
704 LLVMValueRef
705 ac_emit_ddxy(struct ac_llvm_context *ctx,
706 bool has_ds_bpermute,
707 uint32_t mask,
708 int idx,
709 LLVMValueRef lds,
710 LLVMValueRef val)
711 {
712 LLVMValueRef thread_id, tl, trbl, tl_tid, trbl_tid, args[2];
713 LLVMValueRef result;
714
715 thread_id = ac_get_thread_id(ctx);
716
717 tl_tid = LLVMBuildAnd(ctx->builder, thread_id,
718 LLVMConstInt(ctx->i32, mask, false), "");
719
720 trbl_tid = LLVMBuildAdd(ctx->builder, tl_tid,
721 LLVMConstInt(ctx->i32, idx, false), "");
722
723 if (has_ds_bpermute) {
724 args[0] = LLVMBuildMul(ctx->builder, tl_tid,
725 LLVMConstInt(ctx->i32, 4, false), "");
726 args[1] = val;
727 tl = ac_emit_llvm_intrinsic(ctx,
728 "llvm.amdgcn.ds.bpermute", ctx->i32,
729 args, 2, AC_FUNC_ATTR_READNONE);
730
731 args[0] = LLVMBuildMul(ctx->builder, trbl_tid,
732 LLVMConstInt(ctx->i32, 4, false), "");
733 trbl = ac_emit_llvm_intrinsic(ctx,
734 "llvm.amdgcn.ds.bpermute", ctx->i32,
735 args, 2, AC_FUNC_ATTR_READNONE);
736 } else {
737 LLVMValueRef store_ptr, load_ptr0, load_ptr1;
738
739 store_ptr = ac_build_gep0(ctx, lds, thread_id);
740 load_ptr0 = ac_build_gep0(ctx, lds, tl_tid);
741 load_ptr1 = ac_build_gep0(ctx, lds, trbl_tid);
742
743 LLVMBuildStore(ctx->builder, val, store_ptr);
744 tl = LLVMBuildLoad(ctx->builder, load_ptr0, "");
745 trbl = LLVMBuildLoad(ctx->builder, load_ptr1, "");
746 }
747
748 tl = LLVMBuildBitCast(ctx->builder, tl, ctx->f32, "");
749 trbl = LLVMBuildBitCast(ctx->builder, trbl, ctx->f32, "");
750 result = LLVMBuildFSub(ctx->builder, trbl, tl, "");
751 return result;
752 }
753
754 void
755 ac_emit_sendmsg(struct ac_llvm_context *ctx,
756 uint32_t msg,
757 LLVMValueRef wave_id)
758 {
759 LLVMValueRef args[2];
760 const char *intr_name = (HAVE_LLVM < 0x0400) ? "llvm.SI.sendmsg" : "llvm.amdgcn.s.sendmsg";
761 args[0] = LLVMConstInt(ctx->i32, msg, false);
762 args[1] = wave_id;
763 ac_emit_llvm_intrinsic(ctx, intr_name, ctx->voidt,
764 args, 2, 0);
765 }
766
767 LLVMValueRef
768 ac_emit_imsb(struct ac_llvm_context *ctx,
769 LLVMValueRef arg,
770 LLVMTypeRef dst_type)
771 {
772 const char *intr_name = (HAVE_LLVM < 0x0400) ? "llvm.AMDGPU.flbit.i32" :
773 "llvm.amdgcn.sffbh.i32";
774 LLVMValueRef msb = ac_emit_llvm_intrinsic(ctx, intr_name,
775 dst_type, &arg, 1,
776 AC_FUNC_ATTR_READNONE);
777
778 /* The HW returns the last bit index from MSB, but NIR/TGSI wants
779 * the index from LSB. Invert it by doing "31 - msb". */
780 msb = LLVMBuildSub(ctx->builder, LLVMConstInt(ctx->i32, 31, false),
781 msb, "");
782
783 LLVMValueRef all_ones = LLVMConstInt(ctx->i32, -1, true);
784 LLVMValueRef cond = LLVMBuildOr(ctx->builder,
785 LLVMBuildICmp(ctx->builder, LLVMIntEQ,
786 arg, LLVMConstInt(ctx->i32, 0, 0), ""),
787 LLVMBuildICmp(ctx->builder, LLVMIntEQ,
788 arg, all_ones, ""), "");
789
790 return LLVMBuildSelect(ctx->builder, cond, all_ones, msb, "");
791 }
792
793 LLVMValueRef
794 ac_emit_umsb(struct ac_llvm_context *ctx,
795 LLVMValueRef arg,
796 LLVMTypeRef dst_type)
797 {
798 LLVMValueRef args[2] = {
799 arg,
800 LLVMConstInt(ctx->i1, 1, 0),
801 };
802 LLVMValueRef msb = ac_emit_llvm_intrinsic(ctx, "llvm.ctlz.i32",
803 dst_type, args, ARRAY_SIZE(args),
804 AC_FUNC_ATTR_READNONE);
805
806 /* The HW returns the last bit index from MSB, but TGSI/NIR wants
807 * the index from LSB. Invert it by doing "31 - msb". */
808 msb = LLVMBuildSub(ctx->builder, LLVMConstInt(ctx->i32, 31, false),
809 msb, "");
810
811 /* check for zero */
812 return LLVMBuildSelect(ctx->builder,
813 LLVMBuildICmp(ctx->builder, LLVMIntEQ, arg,
814 LLVMConstInt(ctx->i32, 0, 0), ""),
815 LLVMConstInt(ctx->i32, -1, true), msb, "");
816 }
817
818 LLVMValueRef ac_emit_clamp(struct ac_llvm_context *ctx, LLVMValueRef value)
819 {
820 if (HAVE_LLVM >= 0x0500) {
821 LLVMValueRef max[2] = {
822 value,
823 LLVMConstReal(ctx->f32, 0),
824 };
825 LLVMValueRef min[2] = {
826 LLVMConstReal(ctx->f32, 1),
827 };
828
829 min[1] = ac_emit_llvm_intrinsic(ctx, "llvm.maxnum.f32",
830 ctx->f32, max, 2,
831 AC_FUNC_ATTR_READNONE);
832 return ac_emit_llvm_intrinsic(ctx, "llvm.minnum.f32",
833 ctx->f32, min, 2,
834 AC_FUNC_ATTR_READNONE);
835 }
836
837 const char *intr = HAVE_LLVM >= 0x0308 ? "llvm.AMDGPU.clamp." :
838 "llvm.AMDIL.clamp.";
839 LLVMValueRef args[3] = {
840 value,
841 LLVMConstReal(ctx->f32, 0),
842 LLVMConstReal(ctx->f32, 1),
843 };
844
845 return ac_emit_llvm_intrinsic(ctx, intr, ctx->f32, args, 3,
846 AC_FUNC_ATTR_READNONE);
847 }