bc309bd81c47d315fd6b7f6bf341862f50fbebd9
[mesa.git] / src / amd / common / ac_llvm_util.c
1 /*
2 * Copyright 2014 Advanced Micro Devices, Inc.
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the
6 * "Software"), to deal in the Software without restriction, including
7 * without limitation the rights to use, copy, modify, merge, publish,
8 * distribute, sub license, and/or sell copies of the Software, and to
9 * permit persons to whom the Software is furnished to do so, subject to
10 * the following conditions:
11 *
12 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
13 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
14 * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
15 * THE COPYRIGHT HOLDERS, AUTHORS AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM,
16 * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
17 * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
18 * USE OR OTHER DEALINGS IN THE SOFTWARE.
19 *
20 * The above copyright notice and this permission notice (including the
21 * next paragraph) shall be included in all copies or substantial portions
22 * of the Software.
23 *
24 */
25 /* based on pieces from si_pipe.c and radeon_llvm_emit.c */
26 #include "ac_llvm_util.h"
27
28 #include <llvm-c/Core.h>
29
30 #include "c11/threads.h"
31
32 #include <assert.h>
33 #include <stdio.h>
34
35 #include "util/bitscan.h"
36 #include "util/macros.h"
37
38 #include "sid.h"
39
40 static void ac_init_llvm_target()
41 {
42 #if HAVE_LLVM < 0x0307
43 LLVMInitializeR600TargetInfo();
44 LLVMInitializeR600Target();
45 LLVMInitializeR600TargetMC();
46 LLVMInitializeR600AsmPrinter();
47 #else
48 LLVMInitializeAMDGPUTargetInfo();
49 LLVMInitializeAMDGPUTarget();
50 LLVMInitializeAMDGPUTargetMC();
51 LLVMInitializeAMDGPUAsmPrinter();
52 #endif
53 }
54
55 static once_flag ac_init_llvm_target_once_flag = ONCE_FLAG_INIT;
56
57 static LLVMTargetRef ac_get_llvm_target(const char *triple)
58 {
59 LLVMTargetRef target = NULL;
60 char *err_message = NULL;
61
62 call_once(&ac_init_llvm_target_once_flag, ac_init_llvm_target);
63
64 if (LLVMGetTargetFromTriple(triple, &target, &err_message)) {
65 fprintf(stderr, "Cannot find target for triple %s ", triple);
66 if (err_message) {
67 fprintf(stderr, "%s\n", err_message);
68 }
69 LLVMDisposeMessage(err_message);
70 return NULL;
71 }
72 return target;
73 }
74
75 static const char *ac_get_llvm_processor_name(enum radeon_family family)
76 {
77 switch (family) {
78 case CHIP_TAHITI:
79 return "tahiti";
80 case CHIP_PITCAIRN:
81 return "pitcairn";
82 case CHIP_VERDE:
83 return "verde";
84 case CHIP_OLAND:
85 return "oland";
86 case CHIP_HAINAN:
87 return "hainan";
88 case CHIP_BONAIRE:
89 return "bonaire";
90 case CHIP_KABINI:
91 return "kabini";
92 case CHIP_KAVERI:
93 return "kaveri";
94 case CHIP_HAWAII:
95 return "hawaii";
96 case CHIP_MULLINS:
97 return "mullins";
98 case CHIP_TONGA:
99 return "tonga";
100 case CHIP_ICELAND:
101 return "iceland";
102 case CHIP_CARRIZO:
103 return "carrizo";
104 #if HAVE_LLVM <= 0x0307
105 case CHIP_FIJI:
106 return "tonga";
107 case CHIP_STONEY:
108 return "carrizo";
109 #else
110 case CHIP_FIJI:
111 return "fiji";
112 case CHIP_STONEY:
113 return "stoney";
114 #endif
115 #if HAVE_LLVM <= 0x0308
116 case CHIP_POLARIS10:
117 return "tonga";
118 case CHIP_POLARIS11:
119 return "tonga";
120 #else
121 case CHIP_POLARIS10:
122 return "polaris10";
123 case CHIP_POLARIS11:
124 return "polaris11";
125 #endif
126 default:
127 return "";
128 }
129 }
130
131 LLVMTargetMachineRef ac_create_target_machine(enum radeon_family family, bool supports_spill)
132 {
133 assert(family >= CHIP_TAHITI);
134
135 const char *triple = supports_spill ? "amdgcn-mesa-mesa3d" : "amdgcn--";
136 LLVMTargetRef target = ac_get_llvm_target(triple);
137 LLVMTargetMachineRef tm = LLVMCreateTargetMachine(
138 target,
139 triple,
140 ac_get_llvm_processor_name(family),
141 "+DumpCode,+vgpr-spilling",
142 LLVMCodeGenLevelDefault,
143 LLVMRelocDefault,
144 LLVMCodeModelDefault);
145
146 return tm;
147 }
148
149 /* Initialize module-independent parts of the context.
150 *
151 * The caller is responsible for initializing ctx::module and ctx::builder.
152 */
153 void
154 ac_llvm_context_init(struct ac_llvm_context *ctx, LLVMContextRef context)
155 {
156 LLVMValueRef args[1];
157
158 ctx->context = context;
159 ctx->module = NULL;
160 ctx->builder = NULL;
161
162 ctx->voidt = LLVMVoidTypeInContext(ctx->context);
163 ctx->i1 = LLVMInt1TypeInContext(ctx->context);
164 ctx->i8 = LLVMInt8TypeInContext(ctx->context);
165 ctx->i32 = LLVMIntTypeInContext(ctx->context, 32);
166 ctx->f32 = LLVMFloatTypeInContext(ctx->context);
167 ctx->v4i32 = LLVMVectorType(ctx->i32, 4);
168 ctx->v4f32 = LLVMVectorType(ctx->f32, 4);
169 ctx->v16i8 = LLVMVectorType(ctx->i8, 16);
170
171 ctx->invariant_load_md_kind = LLVMGetMDKindIDInContext(ctx->context,
172 "invariant.load", 14);
173
174 ctx->fpmath_md_kind = LLVMGetMDKindIDInContext(ctx->context, "fpmath", 6);
175
176 args[0] = LLVMConstReal(ctx->f32, 2.5);
177 ctx->fpmath_md_2p5_ulp = LLVMMDNodeInContext(ctx->context, args, 1);
178
179 ctx->uniform_md_kind = LLVMGetMDKindIDInContext(ctx->context,
180 "amdgpu.uniform", 14);
181
182 ctx->empty_md = LLVMMDNodeInContext(ctx->context, NULL, 0);
183 }
184
185 #if HAVE_LLVM < 0x0400
186 static LLVMAttribute ac_attr_to_llvm_attr(enum ac_func_attr attr)
187 {
188 switch (attr) {
189 case AC_FUNC_ATTR_ALWAYSINLINE: return LLVMAlwaysInlineAttribute;
190 case AC_FUNC_ATTR_BYVAL: return LLVMByValAttribute;
191 case AC_FUNC_ATTR_INREG: return LLVMInRegAttribute;
192 case AC_FUNC_ATTR_NOALIAS: return LLVMNoAliasAttribute;
193 case AC_FUNC_ATTR_NOUNWIND: return LLVMNoUnwindAttribute;
194 case AC_FUNC_ATTR_READNONE: return LLVMReadNoneAttribute;
195 case AC_FUNC_ATTR_READONLY: return LLVMReadOnlyAttribute;
196 default:
197 fprintf(stderr, "Unhandled function attribute: %x\n", attr);
198 return 0;
199 }
200 }
201
202 #else
203
204 static const char *attr_to_str(enum ac_func_attr attr)
205 {
206 switch (attr) {
207 case AC_FUNC_ATTR_ALWAYSINLINE: return "alwaysinline";
208 case AC_FUNC_ATTR_BYVAL: return "byval";
209 case AC_FUNC_ATTR_INREG: return "inreg";
210 case AC_FUNC_ATTR_NOALIAS: return "noalias";
211 case AC_FUNC_ATTR_NOUNWIND: return "nounwind";
212 case AC_FUNC_ATTR_READNONE: return "readnone";
213 case AC_FUNC_ATTR_READONLY: return "readonly";
214 default:
215 fprintf(stderr, "Unhandled function attribute: %x\n", attr);
216 return 0;
217 }
218 }
219
220 #endif
221
222 void
223 ac_add_function_attr(LLVMValueRef function,
224 int attr_idx,
225 enum ac_func_attr attr)
226 {
227
228 #if HAVE_LLVM < 0x0400
229 LLVMAttribute llvm_attr = ac_attr_to_llvm_attr(attr);
230 if (attr_idx == -1) {
231 LLVMAddFunctionAttr(function, llvm_attr);
232 } else {
233 LLVMAddAttribute(LLVMGetParam(function, attr_idx - 1), llvm_attr);
234 }
235 #else
236 LLVMContextRef context = LLVMGetModuleContext(LLVMGetGlobalParent(function));
237 const char *attr_name = attr_to_str(attr);
238 unsigned kind_id = LLVMGetEnumAttributeKindForName(attr_name,
239 strlen(attr_name));
240 LLVMAttributeRef llvm_attr = LLVMCreateEnumAttribute(context, kind_id, 0);
241 LLVMAddAttributeAtIndex(function, attr_idx, llvm_attr);
242 #endif
243 }
244
245 LLVMValueRef
246 ac_emit_llvm_intrinsic(struct ac_llvm_context *ctx, const char *name,
247 LLVMTypeRef return_type, LLVMValueRef *params,
248 unsigned param_count, unsigned attrib_mask)
249 {
250 LLVMValueRef function;
251
252 function = LLVMGetNamedFunction(ctx->module, name);
253 if (!function) {
254 LLVMTypeRef param_types[32], function_type;
255 unsigned i;
256
257 assert(param_count <= 32);
258
259 for (i = 0; i < param_count; ++i) {
260 assert(params[i]);
261 param_types[i] = LLVMTypeOf(params[i]);
262 }
263 function_type =
264 LLVMFunctionType(return_type, param_types, param_count, 0);
265 function = LLVMAddFunction(ctx->module, name, function_type);
266
267 LLVMSetFunctionCallConv(function, LLVMCCallConv);
268 LLVMSetLinkage(function, LLVMExternalLinkage);
269
270 attrib_mask |= AC_FUNC_ATTR_NOUNWIND;
271 while (attrib_mask) {
272 enum ac_func_attr attr = 1u << u_bit_scan(&attrib_mask);
273 ac_add_function_attr(function, -1, attr);
274 }
275 }
276 return LLVMBuildCall(ctx->builder, function, params, param_count, "");
277 }
278
279 LLVMValueRef
280 ac_build_gather_values_extended(struct ac_llvm_context *ctx,
281 LLVMValueRef *values,
282 unsigned value_count,
283 unsigned value_stride,
284 bool load)
285 {
286 LLVMBuilderRef builder = ctx->builder;
287 LLVMValueRef vec;
288 unsigned i;
289
290
291 if (value_count == 1) {
292 if (load)
293 return LLVMBuildLoad(builder, values[0], "");
294 return values[0];
295 } else if (!value_count)
296 unreachable("value_count is 0");
297
298 for (i = 0; i < value_count; i++) {
299 LLVMValueRef value = values[i * value_stride];
300 if (load)
301 value = LLVMBuildLoad(builder, value, "");
302
303 if (!i)
304 vec = LLVMGetUndef( LLVMVectorType(LLVMTypeOf(value), value_count));
305 LLVMValueRef index = LLVMConstInt(ctx->i32, i, false);
306 vec = LLVMBuildInsertElement(builder, vec, value, index, "");
307 }
308 return vec;
309 }
310
311 LLVMValueRef
312 ac_build_gather_values(struct ac_llvm_context *ctx,
313 LLVMValueRef *values,
314 unsigned value_count)
315 {
316 return ac_build_gather_values_extended(ctx, values, value_count, 1, false);
317 }
318
319 LLVMValueRef
320 ac_emit_fdiv(struct ac_llvm_context *ctx,
321 LLVMValueRef num,
322 LLVMValueRef den)
323 {
324 LLVMValueRef ret = LLVMBuildFDiv(ctx->builder, num, den, "");
325
326 if (!LLVMIsConstant(ret))
327 LLVMSetMetadata(ret, ctx->fpmath_md_kind, ctx->fpmath_md_2p5_ulp);
328 return ret;
329 }
330
331 /* Coordinates for cube map selection. sc, tc, and ma are as in Table 8.27
332 * of the OpenGL 4.5 (Compatibility Profile) specification, except ma is
333 * already multiplied by two. id is the cube face number.
334 */
335 struct cube_selection_coords {
336 LLVMValueRef stc[2];
337 LLVMValueRef ma;
338 LLVMValueRef id;
339 };
340
341 static void
342 build_cube_intrinsic(struct ac_llvm_context *ctx,
343 LLVMValueRef in[3],
344 struct cube_selection_coords *out)
345 {
346 LLVMBuilderRef builder = ctx->builder;
347
348 if (HAVE_LLVM >= 0x0309) {
349 LLVMTypeRef f32 = ctx->f32;
350
351 out->stc[1] = ac_emit_llvm_intrinsic(ctx, "llvm.amdgcn.cubetc",
352 f32, in, 3, AC_FUNC_ATTR_READNONE);
353 out->stc[0] = ac_emit_llvm_intrinsic(ctx, "llvm.amdgcn.cubesc",
354 f32, in, 3, AC_FUNC_ATTR_READNONE);
355 out->ma = ac_emit_llvm_intrinsic(ctx, "llvm.amdgcn.cubema",
356 f32, in, 3, AC_FUNC_ATTR_READNONE);
357 out->id = ac_emit_llvm_intrinsic(ctx, "llvm.amdgcn.cubeid",
358 f32, in, 3, AC_FUNC_ATTR_READNONE);
359 } else {
360 LLVMValueRef c[4] = {
361 in[0],
362 in[1],
363 in[2],
364 LLVMGetUndef(LLVMTypeOf(in[0]))
365 };
366 LLVMValueRef vec = ac_build_gather_values(ctx, c, 4);
367
368 LLVMValueRef tmp =
369 ac_emit_llvm_intrinsic(ctx, "llvm.AMDGPU.cube",
370 LLVMTypeOf(vec), &vec, 1,
371 AC_FUNC_ATTR_READNONE);
372
373 out->stc[1] = LLVMBuildExtractElement(builder, tmp,
374 LLVMConstInt(ctx->i32, 0, 0), "");
375 out->stc[0] = LLVMBuildExtractElement(builder, tmp,
376 LLVMConstInt(ctx->i32, 1, 0), "");
377 out->ma = LLVMBuildExtractElement(builder, tmp,
378 LLVMConstInt(ctx->i32, 2, 0), "");
379 out->id = LLVMBuildExtractElement(builder, tmp,
380 LLVMConstInt(ctx->i32, 3, 0), "");
381 }
382 }
383
384 /**
385 * Build a manual selection sequence for cube face sc/tc coordinates and
386 * major axis vector (multiplied by 2 for consistency) for the given
387 * vec3 \p coords, for the face implied by \p selcoords.
388 *
389 * For the major axis, we always adjust the sign to be in the direction of
390 * selcoords.ma; i.e., a positive out_ma means that coords is pointed towards
391 * the selcoords major axis.
392 */
393 static void build_cube_select(LLVMBuilderRef builder,
394 const struct cube_selection_coords *selcoords,
395 const LLVMValueRef *coords,
396 LLVMValueRef *out_st,
397 LLVMValueRef *out_ma)
398 {
399 LLVMTypeRef f32 = LLVMTypeOf(coords[0]);
400 LLVMValueRef is_ma_positive;
401 LLVMValueRef sgn_ma;
402 LLVMValueRef is_ma_z, is_not_ma_z;
403 LLVMValueRef is_ma_y;
404 LLVMValueRef is_ma_x;
405 LLVMValueRef sgn;
406 LLVMValueRef tmp;
407
408 is_ma_positive = LLVMBuildFCmp(builder, LLVMRealUGE,
409 selcoords->ma, LLVMConstReal(f32, 0.0), "");
410 sgn_ma = LLVMBuildSelect(builder, is_ma_positive,
411 LLVMConstReal(f32, 1.0), LLVMConstReal(f32, -1.0), "");
412
413 is_ma_z = LLVMBuildFCmp(builder, LLVMRealUGE, selcoords->id, LLVMConstReal(f32, 4.0), "");
414 is_not_ma_z = LLVMBuildNot(builder, is_ma_z, "");
415 is_ma_y = LLVMBuildAnd(builder, is_not_ma_z,
416 LLVMBuildFCmp(builder, LLVMRealUGE, selcoords->id, LLVMConstReal(f32, 2.0), ""), "");
417 is_ma_x = LLVMBuildAnd(builder, is_not_ma_z, LLVMBuildNot(builder, is_ma_y, ""), "");
418
419 /* Select sc */
420 tmp = LLVMBuildSelect(builder, is_ma_z, coords[2], coords[0], "");
421 sgn = LLVMBuildSelect(builder, is_ma_y, LLVMConstReal(f32, 1.0),
422 LLVMBuildSelect(builder, is_ma_x, sgn_ma,
423 LLVMBuildFNeg(builder, sgn_ma, ""), ""), "");
424 out_st[0] = LLVMBuildFMul(builder, tmp, sgn, "");
425
426 /* Select tc */
427 tmp = LLVMBuildSelect(builder, is_ma_y, coords[2], coords[1], "");
428 sgn = LLVMBuildSelect(builder, is_ma_y, LLVMBuildFNeg(builder, sgn_ma, ""),
429 LLVMConstReal(f32, -1.0), "");
430 out_st[1] = LLVMBuildFMul(builder, tmp, sgn, "");
431
432 /* Select ma */
433 tmp = LLVMBuildSelect(builder, is_ma_z, coords[2],
434 LLVMBuildSelect(builder, is_ma_y, coords[1], coords[0], ""), "");
435 sgn = LLVMBuildSelect(builder, is_ma_positive,
436 LLVMConstReal(f32, 2.0), LLVMConstReal(f32, -2.0), "");
437 *out_ma = LLVMBuildFMul(builder, tmp, sgn, "");
438 }
439
440 void
441 ac_prepare_cube_coords(struct ac_llvm_context *ctx,
442 bool is_deriv, bool is_array,
443 LLVMValueRef *coords_arg,
444 LLVMValueRef *derivs_arg)
445 {
446
447 LLVMBuilderRef builder = ctx->builder;
448 struct cube_selection_coords selcoords;
449 LLVMValueRef coords[3];
450 LLVMValueRef invma;
451
452 build_cube_intrinsic(ctx, coords_arg, &selcoords);
453
454 invma = ac_emit_llvm_intrinsic(ctx, "llvm.fabs.f32",
455 ctx->f32, &selcoords.ma, 1, AC_FUNC_ATTR_READNONE);
456 invma = ac_emit_fdiv(ctx, LLVMConstReal(ctx->f32, 1.0), invma);
457
458 for (int i = 0; i < 2; ++i)
459 coords[i] = LLVMBuildFMul(builder, selcoords.stc[i], invma, "");
460
461 coords[2] = selcoords.id;
462
463 if (is_deriv && derivs_arg) {
464 LLVMValueRef derivs[4];
465 int axis;
466
467 /* Convert cube derivatives to 2D derivatives. */
468 for (axis = 0; axis < 2; axis++) {
469 LLVMValueRef deriv_st[2];
470 LLVMValueRef deriv_ma;
471
472 /* Transform the derivative alongside the texture
473 * coordinate. Mathematically, the correct formula is
474 * as follows. Assume we're projecting onto the +Z face
475 * and denote by dx/dh the derivative of the (original)
476 * X texture coordinate with respect to horizontal
477 * window coordinates. The projection onto the +Z face
478 * plane is:
479 *
480 * f(x,z) = x/z
481 *
482 * Then df/dh = df/dx * dx/dh + df/dz * dz/dh
483 * = 1/z * dx/dh - x/z * 1/z * dz/dh.
484 *
485 * This motivatives the implementation below.
486 *
487 * Whether this actually gives the expected results for
488 * apps that might feed in derivatives obtained via
489 * finite differences is anyone's guess. The OpenGL spec
490 * seems awfully quiet about how textureGrad for cube
491 * maps should be handled.
492 */
493 build_cube_select(builder, &selcoords, &derivs_arg[axis * 3],
494 deriv_st, &deriv_ma);
495
496 deriv_ma = LLVMBuildFMul(builder, deriv_ma, invma, "");
497
498 for (int i = 0; i < 2; ++i)
499 derivs[axis * 2 + i] =
500 LLVMBuildFSub(builder,
501 LLVMBuildFMul(builder, deriv_st[i], invma, ""),
502 LLVMBuildFMul(builder, deriv_ma, coords[i], ""), "");
503 }
504
505 memcpy(derivs_arg, derivs, sizeof(derivs));
506 }
507
508 /* Shift the texture coordinate. This must be applied after the
509 * derivative calculation.
510 */
511 for (int i = 0; i < 2; ++i)
512 coords[i] = LLVMBuildFAdd(builder, coords[i], LLVMConstReal(ctx->f32, 1.5), "");
513
514 if (is_array) {
515 /* for cube arrays coord.z = coord.w(array_index) * 8 + face */
516 /* coords_arg.w component - array_index for cube arrays */
517 LLVMValueRef tmp = LLVMBuildFMul(ctx->builder, coords_arg[3], LLVMConstReal(ctx->f32, 8.0), "");
518 coords[2] = LLVMBuildFAdd(ctx->builder, tmp, coords[2], "");
519 }
520
521 memcpy(coords_arg, coords, sizeof(coords));
522 }
523
524 void
525 ac_dump_module(LLVMModuleRef module)
526 {
527 char *str = LLVMPrintModuleToString(module);
528 fprintf(stderr, "%s", str);
529 LLVMDisposeMessage(str);
530 }
531
532 LLVMValueRef
533 ac_build_fs_interp(struct ac_llvm_context *ctx,
534 LLVMValueRef llvm_chan,
535 LLVMValueRef attr_number,
536 LLVMValueRef params,
537 LLVMValueRef i,
538 LLVMValueRef j)
539 {
540 LLVMValueRef args[5];
541 LLVMValueRef p1;
542
543 if (HAVE_LLVM < 0x0400) {
544 LLVMValueRef ij[2];
545 ij[0] = LLVMBuildBitCast(ctx->builder, i, ctx->i32, "");
546 ij[1] = LLVMBuildBitCast(ctx->builder, j, ctx->i32, "");
547
548 args[0] = llvm_chan;
549 args[1] = attr_number;
550 args[2] = params;
551 args[3] = ac_build_gather_values(ctx, ij, 2);
552 return ac_emit_llvm_intrinsic(ctx, "llvm.SI.fs.interp",
553 ctx->f32, args, 4,
554 AC_FUNC_ATTR_READNONE);
555 }
556
557 args[0] = i;
558 args[1] = llvm_chan;
559 args[2] = attr_number;
560 args[3] = params;
561
562 p1 = ac_emit_llvm_intrinsic(ctx, "llvm.amdgcn.interp.p1",
563 ctx->f32, args, 4, AC_FUNC_ATTR_READNONE);
564
565 args[0] = p1;
566 args[1] = j;
567 args[2] = llvm_chan;
568 args[3] = attr_number;
569 args[4] = params;
570
571 return ac_emit_llvm_intrinsic(ctx, "llvm.amdgcn.interp.p2",
572 ctx->f32, args, 5, AC_FUNC_ATTR_READNONE);
573 }
574
575 LLVMValueRef
576 ac_build_fs_interp_mov(struct ac_llvm_context *ctx,
577 LLVMValueRef parameter,
578 LLVMValueRef llvm_chan,
579 LLVMValueRef attr_number,
580 LLVMValueRef params)
581 {
582 LLVMValueRef args[4];
583 if (HAVE_LLVM < 0x0400) {
584 args[0] = llvm_chan;
585 args[1] = attr_number;
586 args[2] = params;
587
588 return ac_emit_llvm_intrinsic(ctx,
589 "llvm.SI.fs.constant",
590 ctx->f32, args, 3,
591 AC_FUNC_ATTR_READNONE);
592 }
593
594 args[0] = parameter;
595 args[1] = llvm_chan;
596 args[2] = attr_number;
597 args[3] = params;
598
599 return ac_emit_llvm_intrinsic(ctx, "llvm.amdgcn.interp.mov",
600 ctx->f32, args, 4, AC_FUNC_ATTR_READNONE);
601 }
602
603 LLVMValueRef
604 ac_build_gep0(struct ac_llvm_context *ctx,
605 LLVMValueRef base_ptr,
606 LLVMValueRef index)
607 {
608 LLVMValueRef indices[2] = {
609 LLVMConstInt(ctx->i32, 0, 0),
610 index,
611 };
612 return LLVMBuildGEP(ctx->builder, base_ptr,
613 indices, 2, "");
614 }
615
616 void
617 ac_build_indexed_store(struct ac_llvm_context *ctx,
618 LLVMValueRef base_ptr, LLVMValueRef index,
619 LLVMValueRef value)
620 {
621 LLVMBuildStore(ctx->builder, value,
622 ac_build_gep0(ctx, base_ptr, index));
623 }
624
625 /**
626 * Build an LLVM bytecode indexed load using LLVMBuildGEP + LLVMBuildLoad.
627 * It's equivalent to doing a load from &base_ptr[index].
628 *
629 * \param base_ptr Where the array starts.
630 * \param index The element index into the array.
631 * \param uniform Whether the base_ptr and index can be assumed to be
632 * dynamically uniform
633 */
634 LLVMValueRef
635 ac_build_indexed_load(struct ac_llvm_context *ctx,
636 LLVMValueRef base_ptr, LLVMValueRef index,
637 bool uniform)
638 {
639 LLVMValueRef pointer;
640
641 pointer = ac_build_gep0(ctx, base_ptr, index);
642 if (uniform)
643 LLVMSetMetadata(pointer, ctx->uniform_md_kind, ctx->empty_md);
644 return LLVMBuildLoad(ctx->builder, pointer, "");
645 }
646
647 /**
648 * Do a load from &base_ptr[index], but also add a flag that it's loading
649 * a constant from a dynamically uniform index.
650 */
651 LLVMValueRef
652 ac_build_indexed_load_const(struct ac_llvm_context *ctx,
653 LLVMValueRef base_ptr, LLVMValueRef index)
654 {
655 LLVMValueRef result = ac_build_indexed_load(ctx, base_ptr, index, true);
656 LLVMSetMetadata(result, ctx->invariant_load_md_kind, ctx->empty_md);
657 return result;
658 }
659
660 /* TBUFFER_STORE_FORMAT_{X,XY,XYZ,XYZW} <- the suffix is selected by num_channels=1..4.
661 * The type of vdata must be one of i32 (num_channels=1), v2i32 (num_channels=2),
662 * or v4i32 (num_channels=3,4).
663 */
664 void
665 ac_build_tbuffer_store(struct ac_llvm_context *ctx,
666 LLVMValueRef rsrc,
667 LLVMValueRef vdata,
668 unsigned num_channels,
669 LLVMValueRef vaddr,
670 LLVMValueRef soffset,
671 unsigned inst_offset,
672 unsigned dfmt,
673 unsigned nfmt,
674 unsigned offen,
675 unsigned idxen,
676 unsigned glc,
677 unsigned slc,
678 unsigned tfe)
679 {
680 LLVMValueRef args[] = {
681 rsrc,
682 vdata,
683 LLVMConstInt(ctx->i32, num_channels, 0),
684 vaddr,
685 soffset,
686 LLVMConstInt(ctx->i32, inst_offset, 0),
687 LLVMConstInt(ctx->i32, dfmt, 0),
688 LLVMConstInt(ctx->i32, nfmt, 0),
689 LLVMConstInt(ctx->i32, offen, 0),
690 LLVMConstInt(ctx->i32, idxen, 0),
691 LLVMConstInt(ctx->i32, glc, 0),
692 LLVMConstInt(ctx->i32, slc, 0),
693 LLVMConstInt(ctx->i32, tfe, 0)
694 };
695
696 /* The instruction offset field has 12 bits */
697 assert(offen || inst_offset < (1 << 12));
698
699 /* The intrinsic is overloaded, we need to add a type suffix for overloading to work. */
700 unsigned func = CLAMP(num_channels, 1, 3) - 1;
701 const char *types[] = {"i32", "v2i32", "v4i32"};
702 char name[256];
703 snprintf(name, sizeof(name), "llvm.SI.tbuffer.store.%s", types[func]);
704
705 ac_emit_llvm_intrinsic(ctx, name, ctx->voidt,
706 args, ARRAY_SIZE(args), 0);
707 }
708
709 void
710 ac_build_tbuffer_store_dwords(struct ac_llvm_context *ctx,
711 LLVMValueRef rsrc,
712 LLVMValueRef vdata,
713 unsigned num_channels,
714 LLVMValueRef vaddr,
715 LLVMValueRef soffset,
716 unsigned inst_offset)
717 {
718 static unsigned dfmt[] = {
719 V_008F0C_BUF_DATA_FORMAT_32,
720 V_008F0C_BUF_DATA_FORMAT_32_32,
721 V_008F0C_BUF_DATA_FORMAT_32_32_32,
722 V_008F0C_BUF_DATA_FORMAT_32_32_32_32
723 };
724 assert(num_channels >= 1 && num_channels <= 4);
725
726 ac_build_tbuffer_store(ctx, rsrc, vdata, num_channels, vaddr, soffset,
727 inst_offset, dfmt[num_channels - 1],
728 V_008F0C_BUF_NUM_FORMAT_UINT, 1, 0, 1, 1, 0);
729 }
730
731 LLVMValueRef
732 ac_build_buffer_load(struct ac_llvm_context *ctx,
733 LLVMValueRef rsrc,
734 int num_channels,
735 LLVMValueRef vindex,
736 LLVMValueRef voffset,
737 LLVMValueRef soffset,
738 unsigned inst_offset,
739 unsigned glc,
740 unsigned slc)
741 {
742 unsigned func = CLAMP(num_channels, 1, 3) - 1;
743
744 if (HAVE_LLVM >= 0x309) {
745 LLVMValueRef args[] = {
746 LLVMBuildBitCast(ctx->builder, rsrc, ctx->v4i32, ""),
747 vindex ? vindex : LLVMConstInt(ctx->i32, 0, 0),
748 LLVMConstInt(ctx->i32, inst_offset, 0),
749 LLVMConstInt(ctx->i1, glc, 0),
750 LLVMConstInt(ctx->i1, slc, 0)
751 };
752
753 LLVMTypeRef types[] = {ctx->f32, LLVMVectorType(ctx->f32, 2),
754 ctx->v4f32};
755 const char *type_names[] = {"f32", "v2f32", "v4f32"};
756 char name[256];
757
758 if (voffset) {
759 args[2] = LLVMBuildAdd(ctx->builder, args[2], voffset,
760 "");
761 }
762
763 if (soffset) {
764 args[2] = LLVMBuildAdd(ctx->builder, args[2], soffset,
765 "");
766 }
767
768 snprintf(name, sizeof(name), "llvm.amdgcn.buffer.load.%s",
769 type_names[func]);
770
771 return ac_emit_llvm_intrinsic(ctx, name, types[func], args,
772 ARRAY_SIZE(args), AC_FUNC_ATTR_READONLY);
773 } else {
774 LLVMValueRef args[] = {
775 LLVMBuildBitCast(ctx->builder, rsrc, ctx->v16i8, ""),
776 voffset ? voffset : vindex,
777 soffset,
778 LLVMConstInt(ctx->i32, inst_offset, 0),
779 LLVMConstInt(ctx->i32, voffset ? 1 : 0, 0), // offen
780 LLVMConstInt(ctx->i32, vindex ? 1 : 0, 0), //idxen
781 LLVMConstInt(ctx->i32, glc, 0),
782 LLVMConstInt(ctx->i32, slc, 0),
783 LLVMConstInt(ctx->i32, 0, 0), // TFE
784 };
785
786 LLVMTypeRef types[] = {ctx->i32, LLVMVectorType(ctx->i32, 2),
787 ctx->v4i32};
788 const char *type_names[] = {"i32", "v2i32", "v4i32"};
789 const char *arg_type = "i32";
790 char name[256];
791
792 if (voffset && vindex) {
793 LLVMValueRef vaddr[] = {vindex, voffset};
794
795 arg_type = "v2i32";
796 args[1] = ac_build_gather_values(ctx, vaddr, 2);
797 }
798
799 snprintf(name, sizeof(name), "llvm.SI.buffer.load.dword.%s.%s",
800 type_names[func], arg_type);
801
802 return ac_emit_llvm_intrinsic(ctx, name, types[func], args,
803 ARRAY_SIZE(args), AC_FUNC_ATTR_READONLY);
804 }
805 }