7f6139e5cddefb8e312f0c13d31437d5d10dd57e
[mesa.git] / src / amd / common / ac_llvm_build.h
1 /*
2 * Copyright 2016 Bas Nieuwenhuizen
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the
6 * "Software"), to deal in the Software without restriction, including
7 * without limitation the rights to use, copy, modify, merge, publish,
8 * distribute, sub license, and/or sell copies of the Software, and to
9 * permit persons to whom the Software is furnished to do so, subject to
10 * the following conditions:
11 *
12 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
13 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
14 * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
15 * THE COPYRIGHT HOLDERS, AUTHORS AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM,
16 * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
17 * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
18 * USE OR OTHER DEALINGS IN THE SOFTWARE.
19 *
20 * The above copyright notice and this permission notice (including the
21 * next paragraph) shall be included in all copies or substantial portions
22 * of the Software.
23 *
24 */
25 #ifndef AC_LLVM_BUILD_H
26 #define AC_LLVM_BUILD_H
27
28 #include <stdbool.h>
29 #include <llvm-c/Core.h>
30 #include "compiler/nir/nir.h"
31 #include "amd_family.h"
32
33 #ifdef __cplusplus
34 extern "C" {
35 #endif
36
37 enum {
38 AC_ADDR_SPACE_FLAT = 0, /* Slower than global. */
39 AC_ADDR_SPACE_GLOBAL = 1,
40 AC_ADDR_SPACE_GDS = 2,
41 AC_ADDR_SPACE_LDS = 3,
42 AC_ADDR_SPACE_CONST = 4, /* Global allowing SMEM. */
43 AC_ADDR_SPACE_CONST_32BIT = 6, /* same as CONST, but the pointer type has 32 bits */
44 };
45
46 #define AC_WAIT_LGKM (1 << 0) /* LDS, GDS, constant, message */
47 #define AC_WAIT_VLOAD (1 << 1) /* VMEM load/sample instructions */
48 #define AC_WAIT_VSTORE (1 << 2) /* VMEM store instructions */
49
50 struct ac_llvm_flow;
51
52 struct ac_llvm_context {
53 LLVMContextRef context;
54 LLVMModuleRef module;
55 LLVMBuilderRef builder;
56
57 LLVMTypeRef voidt;
58 LLVMTypeRef i1;
59 LLVMTypeRef i8;
60 LLVMTypeRef i16;
61 LLVMTypeRef i32;
62 LLVMTypeRef i64;
63 LLVMTypeRef intptr;
64 LLVMTypeRef f16;
65 LLVMTypeRef f32;
66 LLVMTypeRef f64;
67 LLVMTypeRef v2i16;
68 LLVMTypeRef v2i32;
69 LLVMTypeRef v3i32;
70 LLVMTypeRef v4i32;
71 LLVMTypeRef v2f32;
72 LLVMTypeRef v3f32;
73 LLVMTypeRef v4f32;
74 LLVMTypeRef v8i32;
75
76 LLVMValueRef i8_0;
77 LLVMValueRef i8_1;
78 LLVMValueRef i16_0;
79 LLVMValueRef i16_1;
80 LLVMValueRef i32_0;
81 LLVMValueRef i32_1;
82 LLVMValueRef i64_0;
83 LLVMValueRef i64_1;
84 LLVMValueRef f16_0;
85 LLVMValueRef f16_1;
86 LLVMValueRef f32_0;
87 LLVMValueRef f32_1;
88 LLVMValueRef f64_0;
89 LLVMValueRef f64_1;
90 LLVMValueRef i1true;
91 LLVMValueRef i1false;
92
93 struct ac_llvm_flow *flow;
94 unsigned flow_depth;
95 unsigned flow_depth_max;
96
97 unsigned range_md_kind;
98 unsigned invariant_load_md_kind;
99 unsigned uniform_md_kind;
100 unsigned fpmath_md_kind;
101 LLVMValueRef fpmath_md_2p5_ulp;
102 LLVMValueRef empty_md;
103
104 enum chip_class chip_class;
105 enum radeon_family family;
106
107 LLVMValueRef lds;
108 };
109
110 void
111 ac_llvm_context_init(struct ac_llvm_context *ctx,
112 enum chip_class chip_class, enum radeon_family family);
113
114 void
115 ac_llvm_context_dispose(struct ac_llvm_context *ctx);
116
117 int
118 ac_get_llvm_num_components(LLVMValueRef value);
119
120 int
121 ac_get_elem_bits(struct ac_llvm_context *ctx, LLVMTypeRef type);
122
123 LLVMValueRef
124 ac_llvm_extract_elem(struct ac_llvm_context *ac,
125 LLVMValueRef value,
126 int index);
127
128 unsigned ac_get_type_size(LLVMTypeRef type);
129
130 LLVMTypeRef ac_to_integer_type(struct ac_llvm_context *ctx, LLVMTypeRef t);
131 LLVMValueRef ac_to_integer(struct ac_llvm_context *ctx, LLVMValueRef v);
132 LLVMValueRef ac_to_integer_or_pointer(struct ac_llvm_context *ctx, LLVMValueRef v);
133 LLVMTypeRef ac_to_float_type(struct ac_llvm_context *ctx, LLVMTypeRef t);
134 LLVMValueRef ac_to_float(struct ac_llvm_context *ctx, LLVMValueRef v);
135
136 LLVMValueRef
137 ac_build_intrinsic(struct ac_llvm_context *ctx, const char *name,
138 LLVMTypeRef return_type, LLVMValueRef *params,
139 unsigned param_count, unsigned attrib_mask);
140
141 void ac_build_type_name_for_intr(LLVMTypeRef type, char *buf, unsigned bufsize);
142
143 LLVMValueRef
144 ac_build_phi(struct ac_llvm_context *ctx, LLVMTypeRef type,
145 unsigned count_incoming, LLVMValueRef *values,
146 LLVMBasicBlockRef *blocks);
147
148 void ac_build_s_barrier(struct ac_llvm_context *ctx);
149 void ac_build_optimization_barrier(struct ac_llvm_context *ctx,
150 LLVMValueRef *pvgpr);
151
152 LLVMValueRef ac_build_shader_clock(struct ac_llvm_context *ctx);
153
154 LLVMValueRef ac_build_ballot(struct ac_llvm_context *ctx, LLVMValueRef value);
155 LLVMValueRef ac_get_i1_sgpr_mask(struct ac_llvm_context *ctx,
156 LLVMValueRef value);
157
158 LLVMValueRef ac_build_vote_all(struct ac_llvm_context *ctx, LLVMValueRef value);
159
160 LLVMValueRef ac_build_vote_any(struct ac_llvm_context *ctx, LLVMValueRef value);
161
162 LLVMValueRef ac_build_vote_eq(struct ac_llvm_context *ctx, LLVMValueRef value);
163
164 LLVMValueRef
165 ac_build_varying_gather_values(struct ac_llvm_context *ctx, LLVMValueRef *values,
166 unsigned value_count, unsigned component);
167
168 LLVMValueRef
169 ac_build_gather_values_extended(struct ac_llvm_context *ctx,
170 LLVMValueRef *values,
171 unsigned value_count,
172 unsigned value_stride,
173 bool load,
174 bool always_vector);
175 LLVMValueRef
176 ac_build_gather_values(struct ac_llvm_context *ctx,
177 LLVMValueRef *values,
178 unsigned value_count);
179 LLVMValueRef ac_build_expand_to_vec4(struct ac_llvm_context *ctx,
180 LLVMValueRef value,
181 unsigned num_channels);
182 LLVMValueRef ac_build_round(struct ac_llvm_context *ctx, LLVMValueRef value);
183
184 LLVMValueRef
185 ac_build_fdiv(struct ac_llvm_context *ctx,
186 LLVMValueRef num,
187 LLVMValueRef den);
188
189 LLVMValueRef ac_build_fast_udiv(struct ac_llvm_context *ctx,
190 LLVMValueRef num,
191 LLVMValueRef multiplier,
192 LLVMValueRef pre_shift,
193 LLVMValueRef post_shift,
194 LLVMValueRef increment);
195 LLVMValueRef ac_build_fast_udiv_nuw(struct ac_llvm_context *ctx,
196 LLVMValueRef num,
197 LLVMValueRef multiplier,
198 LLVMValueRef pre_shift,
199 LLVMValueRef post_shift,
200 LLVMValueRef increment);
201 LLVMValueRef ac_build_fast_udiv_u31_d_not_one(struct ac_llvm_context *ctx,
202 LLVMValueRef num,
203 LLVMValueRef multiplier,
204 LLVMValueRef post_shift);
205
206 void
207 ac_prepare_cube_coords(struct ac_llvm_context *ctx,
208 bool is_deriv, bool is_array, bool is_lod,
209 LLVMValueRef *coords_arg,
210 LLVMValueRef *derivs_arg);
211
212
213 LLVMValueRef
214 ac_build_fs_interp(struct ac_llvm_context *ctx,
215 LLVMValueRef llvm_chan,
216 LLVMValueRef attr_number,
217 LLVMValueRef params,
218 LLVMValueRef i,
219 LLVMValueRef j);
220
221 LLVMValueRef
222 ac_build_fs_interp_f16(struct ac_llvm_context *ctx,
223 LLVMValueRef llvm_chan,
224 LLVMValueRef attr_number,
225 LLVMValueRef params,
226 LLVMValueRef i,
227 LLVMValueRef j);
228
229 LLVMValueRef
230 ac_build_fs_interp_mov(struct ac_llvm_context *ctx,
231 LLVMValueRef parameter,
232 LLVMValueRef llvm_chan,
233 LLVMValueRef attr_number,
234 LLVMValueRef params);
235
236 LLVMValueRef
237 ac_build_gep_ptr(struct ac_llvm_context *ctx,
238 LLVMValueRef base_ptr,
239 LLVMValueRef index);
240
241 LLVMValueRef
242 ac_build_gep0(struct ac_llvm_context *ctx,
243 LLVMValueRef base_ptr,
244 LLVMValueRef index);
245 LLVMValueRef ac_build_pointer_add(struct ac_llvm_context *ctx, LLVMValueRef ptr,
246 LLVMValueRef index);
247
248 void
249 ac_build_indexed_store(struct ac_llvm_context *ctx,
250 LLVMValueRef base_ptr, LLVMValueRef index,
251 LLVMValueRef value);
252
253 LLVMValueRef ac_build_load(struct ac_llvm_context *ctx, LLVMValueRef base_ptr,
254 LLVMValueRef index);
255 LLVMValueRef ac_build_load_invariant(struct ac_llvm_context *ctx,
256 LLVMValueRef base_ptr, LLVMValueRef index);
257 LLVMValueRef ac_build_load_to_sgpr(struct ac_llvm_context *ctx,
258 LLVMValueRef base_ptr, LLVMValueRef index);
259 LLVMValueRef ac_build_load_to_sgpr_uint_wraparound(struct ac_llvm_context *ctx,
260 LLVMValueRef base_ptr, LLVMValueRef index);
261
262 void
263 ac_build_buffer_store_dword(struct ac_llvm_context *ctx,
264 LLVMValueRef rsrc,
265 LLVMValueRef vdata,
266 unsigned num_channels,
267 LLVMValueRef voffset,
268 LLVMValueRef soffset,
269 unsigned inst_offset,
270 unsigned cache_policy,
271 bool swizzle_enable_hint);
272
273 void
274 ac_build_buffer_store_format(struct ac_llvm_context *ctx,
275 LLVMValueRef rsrc,
276 LLVMValueRef data,
277 LLVMValueRef vindex,
278 LLVMValueRef voffset,
279 unsigned num_channels,
280 unsigned cache_policy);
281
282 LLVMValueRef
283 ac_build_buffer_load(struct ac_llvm_context *ctx,
284 LLVMValueRef rsrc,
285 int num_channels,
286 LLVMValueRef vindex,
287 LLVMValueRef voffset,
288 LLVMValueRef soffset,
289 unsigned inst_offset,
290 unsigned cache_policy,
291 bool can_speculate,
292 bool allow_smem);
293
294 LLVMValueRef ac_build_buffer_load_format(struct ac_llvm_context *ctx,
295 LLVMValueRef rsrc,
296 LLVMValueRef vindex,
297 LLVMValueRef voffset,
298 unsigned num_channels,
299 unsigned cache_policy,
300 bool can_speculate);
301
302 /* load_format that handles the stride & element count better if idxen is
303 * disabled by LLVM. */
304 LLVMValueRef ac_build_buffer_load_format_gfx9_safe(struct ac_llvm_context *ctx,
305 LLVMValueRef rsrc,
306 LLVMValueRef vindex,
307 LLVMValueRef voffset,
308 unsigned num_channels,
309 unsigned cache_policy,
310 bool can_speculate);
311
312 LLVMValueRef
313 ac_build_tbuffer_load_short(struct ac_llvm_context *ctx,
314 LLVMValueRef rsrc,
315 LLVMValueRef voffset,
316 LLVMValueRef soffset,
317 LLVMValueRef immoffset,
318 unsigned cache_policy);
319
320 LLVMValueRef
321 ac_build_tbuffer_load_byte(struct ac_llvm_context *ctx,
322 LLVMValueRef rsrc,
323 LLVMValueRef voffset,
324 LLVMValueRef soffset,
325 LLVMValueRef immoffset,
326 unsigned cache_policy);
327
328 LLVMValueRef
329 ac_build_struct_tbuffer_load(struct ac_llvm_context *ctx,
330 LLVMValueRef rsrc,
331 LLVMValueRef vindex,
332 LLVMValueRef voffset,
333 LLVMValueRef soffset,
334 LLVMValueRef immoffset,
335 unsigned num_channels,
336 unsigned dfmt,
337 unsigned nfmt,
338 unsigned cache_policy,
339 bool can_speculate);
340
341 LLVMValueRef
342 ac_build_raw_tbuffer_load(struct ac_llvm_context *ctx,
343 LLVMValueRef rsrc,
344 LLVMValueRef voffset,
345 LLVMValueRef soffset,
346 LLVMValueRef immoffset,
347 unsigned num_channels,
348 unsigned dfmt,
349 unsigned nfmt,
350 unsigned cache_policy,
351 bool can_speculate);
352
353 /* For ac_build_fetch_format.
354 *
355 * Note: FLOAT must be 0 (used for convenience of encoding in radeonsi).
356 */
357 enum {
358 AC_FETCH_FORMAT_FLOAT = 0,
359 AC_FETCH_FORMAT_FIXED,
360 AC_FETCH_FORMAT_UNORM,
361 AC_FETCH_FORMAT_SNORM,
362 AC_FETCH_FORMAT_USCALED,
363 AC_FETCH_FORMAT_SSCALED,
364 AC_FETCH_FORMAT_UINT,
365 AC_FETCH_FORMAT_SINT,
366 };
367
368 LLVMValueRef
369 ac_build_opencoded_load_format(struct ac_llvm_context *ctx,
370 unsigned log_size,
371 unsigned num_channels,
372 unsigned format,
373 bool reverse,
374 bool known_aligned,
375 LLVMValueRef rsrc,
376 LLVMValueRef vindex,
377 LLVMValueRef voffset,
378 LLVMValueRef soffset,
379 unsigned cache_policy,
380 bool can_speculate);
381
382 void
383 ac_build_tbuffer_store_short(struct ac_llvm_context *ctx,
384 LLVMValueRef rsrc,
385 LLVMValueRef vdata,
386 LLVMValueRef voffset,
387 LLVMValueRef soffset,
388 unsigned cache_policy);
389
390 void
391 ac_build_tbuffer_store_byte(struct ac_llvm_context *ctx,
392 LLVMValueRef rsrc,
393 LLVMValueRef vdata,
394 LLVMValueRef voffset,
395 LLVMValueRef soffset,
396 unsigned cache_policy);
397
398 void
399 ac_build_struct_tbuffer_store(struct ac_llvm_context *ctx,
400 LLVMValueRef rsrc,
401 LLVMValueRef vdata,
402 LLVMValueRef vindex,
403 LLVMValueRef voffset,
404 LLVMValueRef soffset,
405 LLVMValueRef immoffset,
406 unsigned num_channels,
407 unsigned dfmt,
408 unsigned nfmt,
409 unsigned cache_policy);
410
411 void
412 ac_build_raw_tbuffer_store(struct ac_llvm_context *ctx,
413 LLVMValueRef rsrc,
414 LLVMValueRef vdata,
415 LLVMValueRef voffset,
416 LLVMValueRef soffset,
417 LLVMValueRef immoffset,
418 unsigned num_channels,
419 unsigned dfmt,
420 unsigned nfmt,
421 unsigned cache_policy);
422
423 LLVMValueRef
424 ac_get_thread_id(struct ac_llvm_context *ctx);
425
426 #define AC_TID_MASK_TOP_LEFT 0xfffffffc
427 #define AC_TID_MASK_TOP 0xfffffffd
428 #define AC_TID_MASK_LEFT 0xfffffffe
429
430 LLVMValueRef
431 ac_build_ddxy(struct ac_llvm_context *ctx,
432 uint32_t mask,
433 int idx,
434 LLVMValueRef val);
435
436 #define AC_SENDMSG_GS 2
437 #define AC_SENDMSG_GS_DONE 3
438 #define AC_SENDMSG_GS_ALLOC_REQ 9
439
440 #define AC_SENDMSG_GS_OP_NOP (0 << 4)
441 #define AC_SENDMSG_GS_OP_CUT (1 << 4)
442 #define AC_SENDMSG_GS_OP_EMIT (2 << 4)
443 #define AC_SENDMSG_GS_OP_EMIT_CUT (3 << 4)
444
445 void ac_build_sendmsg(struct ac_llvm_context *ctx,
446 uint32_t msg,
447 LLVMValueRef wave_id);
448
449 LLVMValueRef ac_build_imsb(struct ac_llvm_context *ctx,
450 LLVMValueRef arg,
451 LLVMTypeRef dst_type);
452
453 LLVMValueRef ac_build_umsb(struct ac_llvm_context *ctx,
454 LLVMValueRef arg,
455 LLVMTypeRef dst_type);
456 LLVMValueRef ac_build_fmin(struct ac_llvm_context *ctx, LLVMValueRef a,
457 LLVMValueRef b);
458 LLVMValueRef ac_build_fmax(struct ac_llvm_context *ctx, LLVMValueRef a,
459 LLVMValueRef b);
460 LLVMValueRef ac_build_imin(struct ac_llvm_context *ctx, LLVMValueRef a,
461 LLVMValueRef b);
462 LLVMValueRef ac_build_imax(struct ac_llvm_context *ctx, LLVMValueRef a,
463 LLVMValueRef b);
464 LLVMValueRef ac_build_umin(struct ac_llvm_context *ctx, LLVMValueRef a, LLVMValueRef b);
465 LLVMValueRef ac_build_umax(struct ac_llvm_context *ctx, LLVMValueRef a, LLVMValueRef b);
466 LLVMValueRef ac_build_clamp(struct ac_llvm_context *ctx, LLVMValueRef value);
467
468 struct ac_export_args {
469 LLVMValueRef out[4];
470 unsigned target;
471 unsigned enabled_channels;
472 bool compr;
473 bool done;
474 bool valid_mask;
475 };
476
477 void ac_build_export(struct ac_llvm_context *ctx, struct ac_export_args *a);
478
479 void ac_build_export_null(struct ac_llvm_context *ctx);
480
481 enum ac_image_opcode {
482 ac_image_sample,
483 ac_image_gather4,
484 ac_image_load,
485 ac_image_load_mip,
486 ac_image_store,
487 ac_image_store_mip,
488 ac_image_get_lod,
489 ac_image_get_resinfo,
490 ac_image_atomic,
491 ac_image_atomic_cmpswap,
492 };
493
494 enum ac_atomic_op {
495 ac_atomic_swap,
496 ac_atomic_add,
497 ac_atomic_sub,
498 ac_atomic_smin,
499 ac_atomic_umin,
500 ac_atomic_smax,
501 ac_atomic_umax,
502 ac_atomic_and,
503 ac_atomic_or,
504 ac_atomic_xor,
505 };
506
507 enum ac_image_dim {
508 ac_image_1d,
509 ac_image_2d,
510 ac_image_3d,
511 ac_image_cube, // includes cube arrays
512 ac_image_1darray,
513 ac_image_2darray,
514 ac_image_2dmsaa,
515 ac_image_2darraymsaa,
516 };
517
518 /* These cache policy bits match the definitions used by the LLVM intrinsics. */
519 enum ac_image_cache_policy {
520 ac_glc = 1 << 0, /* per-CU cache control */
521 ac_slc = 1 << 1, /* global L2 cache control */
522 ac_dlc = 1 << 2, /* per-shader-array cache control */
523 };
524
525 struct ac_image_args {
526 enum ac_image_opcode opcode : 4;
527 enum ac_atomic_op atomic : 4; /* for the ac_image_atomic opcode */
528 enum ac_image_dim dim : 3;
529 unsigned dmask : 4;
530 unsigned cache_policy : 3;
531 bool unorm : 1;
532 bool level_zero : 1;
533 unsigned attributes; /* additional call-site specific AC_FUNC_ATTRs */
534
535 LLVMValueRef resource;
536 LLVMValueRef sampler;
537 LLVMValueRef data[2]; /* data[0] is source data (vector); data[1] is cmp for cmpswap */
538 LLVMValueRef offset;
539 LLVMValueRef bias;
540 LLVMValueRef compare;
541 LLVMValueRef derivs[6];
542 LLVMValueRef coords[4];
543 LLVMValueRef lod; // also used by ac_image_get_resinfo
544 };
545
546 LLVMValueRef ac_build_image_opcode(struct ac_llvm_context *ctx,
547 struct ac_image_args *a);
548 LLVMValueRef ac_build_cvt_pkrtz_f16(struct ac_llvm_context *ctx,
549 LLVMValueRef args[2]);
550 LLVMValueRef ac_build_cvt_pknorm_i16(struct ac_llvm_context *ctx,
551 LLVMValueRef args[2]);
552 LLVMValueRef ac_build_cvt_pknorm_u16(struct ac_llvm_context *ctx,
553 LLVMValueRef args[2]);
554 LLVMValueRef ac_build_cvt_pk_i16(struct ac_llvm_context *ctx,
555 LLVMValueRef args[2], unsigned bits, bool hi);
556 LLVMValueRef ac_build_cvt_pk_u16(struct ac_llvm_context *ctx,
557 LLVMValueRef args[2], unsigned bits, bool hi);
558 LLVMValueRef ac_build_wqm_vote(struct ac_llvm_context *ctx, LLVMValueRef i1);
559 void ac_build_kill_if_false(struct ac_llvm_context *ctx, LLVMValueRef i1);
560 LLVMValueRef ac_build_bfe(struct ac_llvm_context *ctx, LLVMValueRef input,
561 LLVMValueRef offset, LLVMValueRef width,
562 bool is_signed);
563 LLVMValueRef ac_build_imad(struct ac_llvm_context *ctx, LLVMValueRef s0,
564 LLVMValueRef s1, LLVMValueRef s2);
565 LLVMValueRef ac_build_fmad(struct ac_llvm_context *ctx, LLVMValueRef s0,
566 LLVMValueRef s1, LLVMValueRef s2);
567
568 void ac_build_waitcnt(struct ac_llvm_context *ctx, unsigned wait_flags);
569
570 LLVMValueRef ac_build_fract(struct ac_llvm_context *ctx, LLVMValueRef src0,
571 unsigned bitsize);
572
573 LLVMValueRef ac_build_fmed3(struct ac_llvm_context *ctx, LLVMValueRef src0,
574 LLVMValueRef src1, LLVMValueRef src2,
575 unsigned bitsize);
576
577 LLVMValueRef ac_build_isign(struct ac_llvm_context *ctx, LLVMValueRef src0,
578 unsigned bitsize);
579
580 LLVMValueRef ac_build_fsign(struct ac_llvm_context *ctx, LLVMValueRef src0,
581 unsigned bitsize);
582
583 LLVMValueRef ac_build_bit_count(struct ac_llvm_context *ctx, LLVMValueRef src0);
584
585 LLVMValueRef ac_build_bitfield_reverse(struct ac_llvm_context *ctx,
586 LLVMValueRef src0);
587
588 void ac_optimize_vs_outputs(struct ac_llvm_context *ac,
589 LLVMValueRef main_fn,
590 uint8_t *vs_output_param_offset,
591 uint32_t num_outputs,
592 uint8_t *num_param_exports);
593 void ac_init_exec_full_mask(struct ac_llvm_context *ctx);
594
595 void ac_declare_lds_as_pointer(struct ac_llvm_context *ac);
596 LLVMValueRef ac_lds_load(struct ac_llvm_context *ctx,
597 LLVMValueRef dw_addr);
598 void ac_lds_store(struct ac_llvm_context *ctx,
599 LLVMValueRef dw_addr, LLVMValueRef value);
600
601 LLVMValueRef ac_find_lsb(struct ac_llvm_context *ctx,
602 LLVMTypeRef dst_type,
603 LLVMValueRef src0);
604
605 LLVMTypeRef ac_array_in_const_addr_space(LLVMTypeRef elem_type);
606 LLVMTypeRef ac_array_in_const32_addr_space(LLVMTypeRef elem_type);
607
608 void ac_build_bgnloop(struct ac_llvm_context *ctx, int lable_id);
609 void ac_build_break(struct ac_llvm_context *ctx);
610 void ac_build_continue(struct ac_llvm_context *ctx);
611 void ac_build_else(struct ac_llvm_context *ctx, int lable_id);
612 void ac_build_endif(struct ac_llvm_context *ctx, int lable_id);
613 void ac_build_endloop(struct ac_llvm_context *ctx, int lable_id);
614 void ac_build_ifcc(struct ac_llvm_context *ctx, LLVMValueRef cond, int label_id);
615 void ac_build_if(struct ac_llvm_context *ctx, LLVMValueRef value,
616 int lable_id);
617 void ac_build_uif(struct ac_llvm_context *ctx, LLVMValueRef value,
618 int lable_id);
619
620 LLVMValueRef ac_build_alloca(struct ac_llvm_context *ac, LLVMTypeRef type,
621 const char *name);
622 LLVMValueRef ac_build_alloca_undef(struct ac_llvm_context *ac, LLVMTypeRef type,
623 const char *name);
624
625 LLVMValueRef ac_cast_ptr(struct ac_llvm_context *ctx, LLVMValueRef ptr,
626 LLVMTypeRef type);
627
628 LLVMValueRef ac_trim_vector(struct ac_llvm_context *ctx, LLVMValueRef value,
629 unsigned count);
630
631 LLVMValueRef ac_unpack_param(struct ac_llvm_context *ctx, LLVMValueRef param,
632 unsigned rshift, unsigned bitwidth);
633
634 void ac_apply_fmask_to_sample(struct ac_llvm_context *ac, LLVMValueRef fmask,
635 LLVMValueRef *addr, bool is_array_tex);
636
637 LLVMValueRef
638 ac_build_ds_swizzle(struct ac_llvm_context *ctx, LLVMValueRef src, unsigned mask);
639
640 LLVMValueRef
641 ac_build_readlane(struct ac_llvm_context *ctx, LLVMValueRef src, LLVMValueRef lane);
642
643 LLVMValueRef
644 ac_build_writelane(struct ac_llvm_context *ctx, LLVMValueRef src, LLVMValueRef value, LLVMValueRef lane);
645
646 LLVMValueRef
647 ac_build_mbcnt(struct ac_llvm_context *ctx, LLVMValueRef mask);
648
649 LLVMValueRef
650 ac_build_inclusive_scan(struct ac_llvm_context *ctx, LLVMValueRef src, nir_op op);
651
652 LLVMValueRef
653 ac_build_exclusive_scan(struct ac_llvm_context *ctx, LLVMValueRef src, nir_op op);
654
655 LLVMValueRef
656 ac_build_reduce(struct ac_llvm_context *ctx, LLVMValueRef src, nir_op op, unsigned cluster_size);
657
658 /**
659 * Common arguments for a scan/reduce operation that accumulates per-wave
660 * values across an entire workgroup, while respecting the order of waves.
661 */
662 struct ac_wg_scan {
663 bool enable_reduce;
664 bool enable_exclusive;
665 bool enable_inclusive;
666 nir_op op;
667 LLVMValueRef src; /* clobbered! */
668 LLVMValueRef result_reduce;
669 LLVMValueRef result_exclusive;
670 LLVMValueRef result_inclusive;
671 LLVMValueRef extra;
672 LLVMValueRef waveidx;
673 LLVMValueRef numwaves; /* only needed for "reduce" operations */
674
675 /* T addrspace(LDS) pointer to the same type as value, at least maxwaves entries */
676 LLVMValueRef scratch;
677 unsigned maxwaves;
678 };
679
680 void
681 ac_build_wg_wavescan_top(struct ac_llvm_context *ctx, struct ac_wg_scan *ws);
682 void
683 ac_build_wg_wavescan_bottom(struct ac_llvm_context *ctx, struct ac_wg_scan *ws);
684 void
685 ac_build_wg_wavescan(struct ac_llvm_context *ctx, struct ac_wg_scan *ws);
686
687 void
688 ac_build_wg_scan_top(struct ac_llvm_context *ctx, struct ac_wg_scan *ws);
689 void
690 ac_build_wg_scan_bottom(struct ac_llvm_context *ctx, struct ac_wg_scan *ws);
691 void
692 ac_build_wg_scan(struct ac_llvm_context *ctx, struct ac_wg_scan *ws);
693
694 LLVMValueRef
695 ac_build_quad_swizzle(struct ac_llvm_context *ctx, LLVMValueRef src,
696 unsigned lane0, unsigned lane1, unsigned lane2, unsigned lane3);
697
698 LLVMValueRef
699 ac_build_shuffle(struct ac_llvm_context *ctx, LLVMValueRef src, LLVMValueRef index);
700
701 LLVMValueRef
702 ac_build_frexp_exp(struct ac_llvm_context *ctx, LLVMValueRef src0,
703 unsigned bitsize);
704
705 LLVMValueRef
706 ac_build_frexp_mant(struct ac_llvm_context *ctx, LLVMValueRef src0,
707 unsigned bitsize);
708
709 LLVMValueRef
710 ac_build_ddxy_interp(struct ac_llvm_context *ctx, LLVMValueRef interp_ij);
711
712 LLVMValueRef
713 ac_build_load_helper_invocation(struct ac_llvm_context *ctx);
714
715 LLVMValueRef ac_build_call(struct ac_llvm_context *ctx, LLVMValueRef func,
716 LLVMValueRef *args, unsigned num_args);
717
718 LLVMValueRef ac_build_atomic_rmw(struct ac_llvm_context *ctx, LLVMAtomicRMWBinOp op,
719 LLVMValueRef ptr, LLVMValueRef val,
720 const char *sync_scope);
721
722 LLVMValueRef ac_build_atomic_cmp_xchg(struct ac_llvm_context *ctx, LLVMValueRef ptr,
723 LLVMValueRef cmp, LLVMValueRef val,
724 const char *sync_scope);
725
726 #ifdef __cplusplus
727 }
728 #endif
729
730 #endif