ac: add various int8 definitions
[mesa.git] / src / amd / common / ac_llvm_build.h
1 /*
2 * Copyright 2016 Bas Nieuwenhuizen
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the
6 * "Software"), to deal in the Software without restriction, including
7 * without limitation the rights to use, copy, modify, merge, publish,
8 * distribute, sub license, and/or sell copies of the Software, and to
9 * permit persons to whom the Software is furnished to do so, subject to
10 * the following conditions:
11 *
12 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
13 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
14 * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
15 * THE COPYRIGHT HOLDERS, AUTHORS AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM,
16 * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
17 * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
18 * USE OR OTHER DEALINGS IN THE SOFTWARE.
19 *
20 * The above copyright notice and this permission notice (including the
21 * next paragraph) shall be included in all copies or substantial portions
22 * of the Software.
23 *
24 */
25 #ifndef AC_LLVM_BUILD_H
26 #define AC_LLVM_BUILD_H
27
28 #include <stdbool.h>
29 #include <llvm-c/TargetMachine.h>
30 #include "compiler/nir/nir.h"
31 #include "amd_family.h"
32
33 #ifdef __cplusplus
34 extern "C" {
35 #endif
36
37 enum {
38 AC_ADDR_SPACE_FLAT = 0, /* Slower than global. */
39 AC_ADDR_SPACE_GLOBAL = 1,
40 AC_ADDR_SPACE_GDS = 2,
41 AC_ADDR_SPACE_LDS = 3,
42 AC_ADDR_SPACE_CONST = 4, /* Global allowing SMEM. */
43 AC_ADDR_SPACE_CONST_32BIT = 6, /* same as CONST, but the pointer type has 32 bits */
44 };
45
46 /* Combine these with & instead of |. */
47 #define NOOP_WAITCNT 0xcf7f
48 #define LGKM_CNT 0xc07f
49 #define EXP_CNT 0xcf0f
50 #define VM_CNT 0x0f70 /* On GFX9, vmcnt has 6 bits in [0:3] and [14:15] */
51
52 struct ac_llvm_flow;
53
54 struct ac_llvm_context {
55 LLVMContextRef context;
56 LLVMModuleRef module;
57 LLVMBuilderRef builder;
58
59 LLVMTypeRef voidt;
60 LLVMTypeRef i1;
61 LLVMTypeRef i8;
62 LLVMTypeRef i16;
63 LLVMTypeRef i32;
64 LLVMTypeRef i64;
65 LLVMTypeRef intptr;
66 LLVMTypeRef f16;
67 LLVMTypeRef f32;
68 LLVMTypeRef f64;
69 LLVMTypeRef v2i16;
70 LLVMTypeRef v2i32;
71 LLVMTypeRef v3i32;
72 LLVMTypeRef v4i32;
73 LLVMTypeRef v2f32;
74 LLVMTypeRef v4f32;
75 LLVMTypeRef v8i32;
76
77 LLVMValueRef i8_0;
78 LLVMValueRef i8_1;
79 LLVMValueRef i16_0;
80 LLVMValueRef i16_1;
81 LLVMValueRef i32_0;
82 LLVMValueRef i32_1;
83 LLVMValueRef i64_0;
84 LLVMValueRef i64_1;
85 LLVMValueRef f32_0;
86 LLVMValueRef f32_1;
87 LLVMValueRef f64_0;
88 LLVMValueRef f64_1;
89 LLVMValueRef i1true;
90 LLVMValueRef i1false;
91
92 struct ac_llvm_flow *flow;
93 unsigned flow_depth;
94 unsigned flow_depth_max;
95
96 unsigned range_md_kind;
97 unsigned invariant_load_md_kind;
98 unsigned uniform_md_kind;
99 unsigned fpmath_md_kind;
100 LLVMValueRef fpmath_md_2p5_ulp;
101 LLVMValueRef empty_md;
102
103 enum chip_class chip_class;
104 enum radeon_family family;
105
106 LLVMValueRef lds;
107 };
108
109 void
110 ac_llvm_context_init(struct ac_llvm_context *ctx,
111 enum chip_class chip_class, enum radeon_family family);
112
113 void
114 ac_llvm_context_dispose(struct ac_llvm_context *ctx);
115
116 int
117 ac_get_llvm_num_components(LLVMValueRef value);
118
119 int
120 ac_get_elem_bits(struct ac_llvm_context *ctx, LLVMTypeRef type);
121
122 LLVMValueRef
123 ac_llvm_extract_elem(struct ac_llvm_context *ac,
124 LLVMValueRef value,
125 int index);
126
127 unsigned ac_get_type_size(LLVMTypeRef type);
128
129 LLVMTypeRef ac_to_integer_type(struct ac_llvm_context *ctx, LLVMTypeRef t);
130 LLVMValueRef ac_to_integer(struct ac_llvm_context *ctx, LLVMValueRef v);
131 LLVMValueRef ac_to_integer_or_pointer(struct ac_llvm_context *ctx, LLVMValueRef v);
132 LLVMTypeRef ac_to_float_type(struct ac_llvm_context *ctx, LLVMTypeRef t);
133 LLVMValueRef ac_to_float(struct ac_llvm_context *ctx, LLVMValueRef v);
134
135 LLVMValueRef
136 ac_build_intrinsic(struct ac_llvm_context *ctx, const char *name,
137 LLVMTypeRef return_type, LLVMValueRef *params,
138 unsigned param_count, unsigned attrib_mask);
139
140 void ac_build_type_name_for_intr(LLVMTypeRef type, char *buf, unsigned bufsize);
141
142 LLVMValueRef
143 ac_build_phi(struct ac_llvm_context *ctx, LLVMTypeRef type,
144 unsigned count_incoming, LLVMValueRef *values,
145 LLVMBasicBlockRef *blocks);
146
147 void ac_build_s_barrier(struct ac_llvm_context *ctx);
148 void ac_build_optimization_barrier(struct ac_llvm_context *ctx,
149 LLVMValueRef *pvgpr);
150
151 LLVMValueRef ac_build_shader_clock(struct ac_llvm_context *ctx);
152
153 LLVMValueRef ac_build_ballot(struct ac_llvm_context *ctx, LLVMValueRef value);
154
155 LLVMValueRef ac_build_vote_all(struct ac_llvm_context *ctx, LLVMValueRef value);
156
157 LLVMValueRef ac_build_vote_any(struct ac_llvm_context *ctx, LLVMValueRef value);
158
159 LLVMValueRef ac_build_vote_eq(struct ac_llvm_context *ctx, LLVMValueRef value);
160
161 LLVMValueRef
162 ac_build_varying_gather_values(struct ac_llvm_context *ctx, LLVMValueRef *values,
163 unsigned value_count, unsigned component);
164
165 LLVMValueRef
166 ac_build_gather_values_extended(struct ac_llvm_context *ctx,
167 LLVMValueRef *values,
168 unsigned value_count,
169 unsigned value_stride,
170 bool load,
171 bool always_vector);
172 LLVMValueRef
173 ac_build_gather_values(struct ac_llvm_context *ctx,
174 LLVMValueRef *values,
175 unsigned value_count);
176 LLVMValueRef ac_build_expand_to_vec4(struct ac_llvm_context *ctx,
177 LLVMValueRef value,
178 unsigned num_channels);
179 LLVMValueRef ac_build_round(struct ac_llvm_context *ctx, LLVMValueRef value);
180
181 LLVMValueRef
182 ac_build_fdiv(struct ac_llvm_context *ctx,
183 LLVMValueRef num,
184 LLVMValueRef den);
185
186 LLVMValueRef ac_build_fast_udiv(struct ac_llvm_context *ctx,
187 LLVMValueRef num,
188 LLVMValueRef multiplier,
189 LLVMValueRef pre_shift,
190 LLVMValueRef post_shift,
191 LLVMValueRef increment);
192 LLVMValueRef ac_build_fast_udiv_nuw(struct ac_llvm_context *ctx,
193 LLVMValueRef num,
194 LLVMValueRef multiplier,
195 LLVMValueRef pre_shift,
196 LLVMValueRef post_shift,
197 LLVMValueRef increment);
198 LLVMValueRef ac_build_fast_udiv_u31_d_not_one(struct ac_llvm_context *ctx,
199 LLVMValueRef num,
200 LLVMValueRef multiplier,
201 LLVMValueRef post_shift);
202
203 void
204 ac_prepare_cube_coords(struct ac_llvm_context *ctx,
205 bool is_deriv, bool is_array, bool is_lod,
206 LLVMValueRef *coords_arg,
207 LLVMValueRef *derivs_arg);
208
209
210 LLVMValueRef
211 ac_build_fs_interp(struct ac_llvm_context *ctx,
212 LLVMValueRef llvm_chan,
213 LLVMValueRef attr_number,
214 LLVMValueRef params,
215 LLVMValueRef i,
216 LLVMValueRef j);
217
218 LLVMValueRef
219 ac_build_fs_interp_f16(struct ac_llvm_context *ctx,
220 LLVMValueRef llvm_chan,
221 LLVMValueRef attr_number,
222 LLVMValueRef params,
223 LLVMValueRef i,
224 LLVMValueRef j);
225
226 LLVMValueRef
227 ac_build_fs_interp_mov(struct ac_llvm_context *ctx,
228 LLVMValueRef parameter,
229 LLVMValueRef llvm_chan,
230 LLVMValueRef attr_number,
231 LLVMValueRef params);
232
233 LLVMValueRef
234 ac_build_gep_ptr(struct ac_llvm_context *ctx,
235 LLVMValueRef base_ptr,
236 LLVMValueRef index);
237
238 LLVMValueRef
239 ac_build_gep0(struct ac_llvm_context *ctx,
240 LLVMValueRef base_ptr,
241 LLVMValueRef index);
242 LLVMValueRef ac_build_pointer_add(struct ac_llvm_context *ctx, LLVMValueRef ptr,
243 LLVMValueRef index);
244
245 void
246 ac_build_indexed_store(struct ac_llvm_context *ctx,
247 LLVMValueRef base_ptr, LLVMValueRef index,
248 LLVMValueRef value);
249
250 LLVMValueRef ac_build_load(struct ac_llvm_context *ctx, LLVMValueRef base_ptr,
251 LLVMValueRef index);
252 LLVMValueRef ac_build_load_invariant(struct ac_llvm_context *ctx,
253 LLVMValueRef base_ptr, LLVMValueRef index);
254 LLVMValueRef ac_build_load_to_sgpr(struct ac_llvm_context *ctx,
255 LLVMValueRef base_ptr, LLVMValueRef index);
256 LLVMValueRef ac_build_load_to_sgpr_uint_wraparound(struct ac_llvm_context *ctx,
257 LLVMValueRef base_ptr, LLVMValueRef index);
258
259 void
260 ac_build_buffer_store_dword(struct ac_llvm_context *ctx,
261 LLVMValueRef rsrc,
262 LLVMValueRef vdata,
263 unsigned num_channels,
264 LLVMValueRef voffset,
265 LLVMValueRef soffset,
266 unsigned inst_offset,
267 bool glc,
268 bool slc,
269 bool writeonly_memory,
270 bool swizzle_enable_hint);
271
272 void
273 ac_build_buffer_store_format(struct ac_llvm_context *ctx,
274 LLVMValueRef rsrc,
275 LLVMValueRef data,
276 LLVMValueRef vindex,
277 LLVMValueRef voffset,
278 unsigned num_channels,
279 bool glc,
280 bool writeonly_memory);
281
282 LLVMValueRef
283 ac_build_buffer_load(struct ac_llvm_context *ctx,
284 LLVMValueRef rsrc,
285 int num_channels,
286 LLVMValueRef vindex,
287 LLVMValueRef voffset,
288 LLVMValueRef soffset,
289 unsigned inst_offset,
290 unsigned glc,
291 unsigned slc,
292 bool can_speculate,
293 bool allow_smem);
294
295 LLVMValueRef ac_build_buffer_load_format(struct ac_llvm_context *ctx,
296 LLVMValueRef rsrc,
297 LLVMValueRef vindex,
298 LLVMValueRef voffset,
299 unsigned num_channels,
300 bool glc,
301 bool can_speculate);
302
303 /* load_format that handles the stride & element count better if idxen is
304 * disabled by LLVM. */
305 LLVMValueRef ac_build_buffer_load_format_gfx9_safe(struct ac_llvm_context *ctx,
306 LLVMValueRef rsrc,
307 LLVMValueRef vindex,
308 LLVMValueRef voffset,
309 unsigned num_channels,
310 bool glc,
311 bool can_speculate);
312
313 LLVMValueRef
314 ac_build_tbuffer_load_short(struct ac_llvm_context *ctx,
315 LLVMValueRef rsrc,
316 LLVMValueRef voffset,
317 LLVMValueRef soffset,
318 LLVMValueRef immoffset,
319 bool glc);
320
321 LLVMValueRef
322 ac_build_struct_tbuffer_load(struct ac_llvm_context *ctx,
323 LLVMValueRef rsrc,
324 LLVMValueRef vindex,
325 LLVMValueRef voffset,
326 LLVMValueRef soffset,
327 LLVMValueRef immoffset,
328 unsigned num_channels,
329 unsigned dfmt,
330 unsigned nfmt,
331 bool glc,
332 bool slc,
333 bool can_speculate);
334
335 LLVMValueRef
336 ac_build_raw_tbuffer_load(struct ac_llvm_context *ctx,
337 LLVMValueRef rsrc,
338 LLVMValueRef voffset,
339 LLVMValueRef soffset,
340 LLVMValueRef immoffset,
341 unsigned num_channels,
342 unsigned dfmt,
343 unsigned nfmt,
344 bool glc,
345 bool slc,
346 bool can_speculate);
347
348 void
349 ac_build_tbuffer_store_short(struct ac_llvm_context *ctx,
350 LLVMValueRef rsrc,
351 LLVMValueRef vdata,
352 LLVMValueRef voffset,
353 LLVMValueRef soffset,
354 bool glc,
355 bool writeonly_memory);
356
357 void
358 ac_build_struct_tbuffer_store(struct ac_llvm_context *ctx,
359 LLVMValueRef rsrc,
360 LLVMValueRef vdata,
361 LLVMValueRef vindex,
362 LLVMValueRef voffset,
363 LLVMValueRef soffset,
364 LLVMValueRef immoffset,
365 unsigned num_channels,
366 unsigned dfmt,
367 unsigned nfmt,
368 bool glc,
369 bool slc,
370 bool writeonly_memory);
371
372 void
373 ac_build_raw_tbuffer_store(struct ac_llvm_context *ctx,
374 LLVMValueRef rsrc,
375 LLVMValueRef vdata,
376 LLVMValueRef voffset,
377 LLVMValueRef soffset,
378 LLVMValueRef immoffset,
379 unsigned num_channels,
380 unsigned dfmt,
381 unsigned nfmt,
382 bool glc,
383 bool slc,
384 bool writeonly_memory);
385
386 LLVMValueRef
387 ac_get_thread_id(struct ac_llvm_context *ctx);
388
389 #define AC_TID_MASK_TOP_LEFT 0xfffffffc
390 #define AC_TID_MASK_TOP 0xfffffffd
391 #define AC_TID_MASK_LEFT 0xfffffffe
392
393 LLVMValueRef
394 ac_build_ddxy(struct ac_llvm_context *ctx,
395 uint32_t mask,
396 int idx,
397 LLVMValueRef val);
398
399 #define AC_SENDMSG_GS 2
400 #define AC_SENDMSG_GS_DONE 3
401
402 #define AC_SENDMSG_GS_OP_NOP (0 << 4)
403 #define AC_SENDMSG_GS_OP_CUT (1 << 4)
404 #define AC_SENDMSG_GS_OP_EMIT (2 << 4)
405 #define AC_SENDMSG_GS_OP_EMIT_CUT (3 << 4)
406
407 void ac_build_sendmsg(struct ac_llvm_context *ctx,
408 uint32_t msg,
409 LLVMValueRef wave_id);
410
411 LLVMValueRef ac_build_imsb(struct ac_llvm_context *ctx,
412 LLVMValueRef arg,
413 LLVMTypeRef dst_type);
414
415 LLVMValueRef ac_build_umsb(struct ac_llvm_context *ctx,
416 LLVMValueRef arg,
417 LLVMTypeRef dst_type);
418 LLVMValueRef ac_build_fmin(struct ac_llvm_context *ctx, LLVMValueRef a,
419 LLVMValueRef b);
420 LLVMValueRef ac_build_fmax(struct ac_llvm_context *ctx, LLVMValueRef a,
421 LLVMValueRef b);
422 LLVMValueRef ac_build_imin(struct ac_llvm_context *ctx, LLVMValueRef a,
423 LLVMValueRef b);
424 LLVMValueRef ac_build_imax(struct ac_llvm_context *ctx, LLVMValueRef a,
425 LLVMValueRef b);
426 LLVMValueRef ac_build_umin(struct ac_llvm_context *ctx, LLVMValueRef a, LLVMValueRef b);
427 LLVMValueRef ac_build_clamp(struct ac_llvm_context *ctx, LLVMValueRef value);
428
429 struct ac_export_args {
430 LLVMValueRef out[4];
431 unsigned target;
432 unsigned enabled_channels;
433 bool compr;
434 bool done;
435 bool valid_mask;
436 };
437
438 void ac_build_export(struct ac_llvm_context *ctx, struct ac_export_args *a);
439
440 void ac_build_export_null(struct ac_llvm_context *ctx);
441
442 enum ac_image_opcode {
443 ac_image_sample,
444 ac_image_gather4,
445 ac_image_load,
446 ac_image_load_mip,
447 ac_image_store,
448 ac_image_store_mip,
449 ac_image_get_lod,
450 ac_image_get_resinfo,
451 ac_image_atomic,
452 ac_image_atomic_cmpswap,
453 };
454
455 enum ac_atomic_op {
456 ac_atomic_swap,
457 ac_atomic_add,
458 ac_atomic_sub,
459 ac_atomic_smin,
460 ac_atomic_umin,
461 ac_atomic_smax,
462 ac_atomic_umax,
463 ac_atomic_and,
464 ac_atomic_or,
465 ac_atomic_xor,
466 };
467
468 enum ac_image_dim {
469 ac_image_1d,
470 ac_image_2d,
471 ac_image_3d,
472 ac_image_cube, // includes cube arrays
473 ac_image_1darray,
474 ac_image_2darray,
475 ac_image_2dmsaa,
476 ac_image_2darraymsaa,
477 };
478
479 /* These cache policy bits match the definitions used by the LLVM intrinsics. */
480 enum ac_image_cache_policy {
481 ac_glc = 1 << 0,
482 ac_slc = 1 << 1,
483 };
484
485 struct ac_image_args {
486 enum ac_image_opcode opcode : 4;
487 enum ac_atomic_op atomic : 4; /* for the ac_image_atomic opcode */
488 enum ac_image_dim dim : 3;
489 unsigned dmask : 4;
490 unsigned cache_policy : 2;
491 bool unorm : 1;
492 bool level_zero : 1;
493 unsigned attributes; /* additional call-site specific AC_FUNC_ATTRs */
494
495 LLVMValueRef resource;
496 LLVMValueRef sampler;
497 LLVMValueRef data[2]; /* data[0] is source data (vector); data[1] is cmp for cmpswap */
498 LLVMValueRef offset;
499 LLVMValueRef bias;
500 LLVMValueRef compare;
501 LLVMValueRef derivs[6];
502 LLVMValueRef coords[4];
503 LLVMValueRef lod; // also used by ac_image_get_resinfo
504 };
505
506 LLVMValueRef ac_build_image_opcode(struct ac_llvm_context *ctx,
507 struct ac_image_args *a);
508 LLVMValueRef ac_build_cvt_pkrtz_f16(struct ac_llvm_context *ctx,
509 LLVMValueRef args[2]);
510 LLVMValueRef ac_build_cvt_pknorm_i16(struct ac_llvm_context *ctx,
511 LLVMValueRef args[2]);
512 LLVMValueRef ac_build_cvt_pknorm_u16(struct ac_llvm_context *ctx,
513 LLVMValueRef args[2]);
514 LLVMValueRef ac_build_cvt_pk_i16(struct ac_llvm_context *ctx,
515 LLVMValueRef args[2], unsigned bits, bool hi);
516 LLVMValueRef ac_build_cvt_pk_u16(struct ac_llvm_context *ctx,
517 LLVMValueRef args[2], unsigned bits, bool hi);
518 LLVMValueRef ac_build_wqm_vote(struct ac_llvm_context *ctx, LLVMValueRef i1);
519 void ac_build_kill_if_false(struct ac_llvm_context *ctx, LLVMValueRef i1);
520 LLVMValueRef ac_build_bfe(struct ac_llvm_context *ctx, LLVMValueRef input,
521 LLVMValueRef offset, LLVMValueRef width,
522 bool is_signed);
523 LLVMValueRef ac_build_imad(struct ac_llvm_context *ctx, LLVMValueRef s0,
524 LLVMValueRef s1, LLVMValueRef s2);
525 LLVMValueRef ac_build_fmad(struct ac_llvm_context *ctx, LLVMValueRef s0,
526 LLVMValueRef s1, LLVMValueRef s2);
527
528 void ac_build_waitcnt(struct ac_llvm_context *ctx, unsigned simm16);
529
530 LLVMValueRef ac_build_fract(struct ac_llvm_context *ctx, LLVMValueRef src0,
531 unsigned bitsize);
532
533 LLVMValueRef ac_build_isign(struct ac_llvm_context *ctx, LLVMValueRef src0,
534 unsigned bitsize);
535
536 LLVMValueRef ac_build_fsign(struct ac_llvm_context *ctx, LLVMValueRef src0,
537 unsigned bitsize);
538
539 LLVMValueRef ac_build_bit_count(struct ac_llvm_context *ctx, LLVMValueRef src0);
540
541 LLVMValueRef ac_build_bitfield_reverse(struct ac_llvm_context *ctx,
542 LLVMValueRef src0);
543
544 void ac_optimize_vs_outputs(struct ac_llvm_context *ac,
545 LLVMValueRef main_fn,
546 uint8_t *vs_output_param_offset,
547 uint32_t num_outputs,
548 uint8_t *num_param_exports);
549 void ac_init_exec_full_mask(struct ac_llvm_context *ctx);
550
551 void ac_declare_lds_as_pointer(struct ac_llvm_context *ac);
552 LLVMValueRef ac_lds_load(struct ac_llvm_context *ctx,
553 LLVMValueRef dw_addr);
554 void ac_lds_store(struct ac_llvm_context *ctx,
555 LLVMValueRef dw_addr, LLVMValueRef value);
556
557 LLVMValueRef ac_find_lsb(struct ac_llvm_context *ctx,
558 LLVMTypeRef dst_type,
559 LLVMValueRef src0);
560
561 LLVMTypeRef ac_array_in_const_addr_space(LLVMTypeRef elem_type);
562 LLVMTypeRef ac_array_in_const32_addr_space(LLVMTypeRef elem_type);
563
564 void ac_build_bgnloop(struct ac_llvm_context *ctx, int lable_id);
565 void ac_build_break(struct ac_llvm_context *ctx);
566 void ac_build_continue(struct ac_llvm_context *ctx);
567 void ac_build_else(struct ac_llvm_context *ctx, int lable_id);
568 void ac_build_endif(struct ac_llvm_context *ctx, int lable_id);
569 void ac_build_endloop(struct ac_llvm_context *ctx, int lable_id);
570 void ac_build_ifcc(struct ac_llvm_context *ctx, LLVMValueRef cond, int label_id);
571 void ac_build_if(struct ac_llvm_context *ctx, LLVMValueRef value,
572 int lable_id);
573 void ac_build_uif(struct ac_llvm_context *ctx, LLVMValueRef value,
574 int lable_id);
575
576 LLVMValueRef ac_build_alloca(struct ac_llvm_context *ac, LLVMTypeRef type,
577 const char *name);
578 LLVMValueRef ac_build_alloca_undef(struct ac_llvm_context *ac, LLVMTypeRef type,
579 const char *name);
580
581 LLVMValueRef ac_cast_ptr(struct ac_llvm_context *ctx, LLVMValueRef ptr,
582 LLVMTypeRef type);
583
584 LLVMValueRef ac_trim_vector(struct ac_llvm_context *ctx, LLVMValueRef value,
585 unsigned count);
586
587 LLVMValueRef ac_unpack_param(struct ac_llvm_context *ctx, LLVMValueRef param,
588 unsigned rshift, unsigned bitwidth);
589
590 void ac_apply_fmask_to_sample(struct ac_llvm_context *ac, LLVMValueRef fmask,
591 LLVMValueRef *addr, bool is_array_tex);
592
593 LLVMValueRef
594 ac_build_ds_swizzle(struct ac_llvm_context *ctx, LLVMValueRef src, unsigned mask);
595
596 LLVMValueRef
597 ac_build_readlane(struct ac_llvm_context *ctx, LLVMValueRef src, LLVMValueRef lane);
598
599 LLVMValueRef
600 ac_build_writelane(struct ac_llvm_context *ctx, LLVMValueRef src, LLVMValueRef value, LLVMValueRef lane);
601
602 LLVMValueRef
603 ac_build_mbcnt(struct ac_llvm_context *ctx, LLVMValueRef mask);
604
605 LLVMValueRef
606 ac_build_inclusive_scan(struct ac_llvm_context *ctx, LLVMValueRef src, nir_op op);
607
608 LLVMValueRef
609 ac_build_exclusive_scan(struct ac_llvm_context *ctx, LLVMValueRef src, nir_op op);
610
611 LLVMValueRef
612 ac_build_reduce(struct ac_llvm_context *ctx, LLVMValueRef src, nir_op op, unsigned cluster_size);
613
614 /**
615 * Common arguments for a scan/reduce operation that accumulates per-wave
616 * values across an entire workgroup, while respecting the order of waves.
617 */
618 struct ac_wg_scan {
619 bool enable_reduce;
620 bool enable_exclusive;
621 bool enable_inclusive;
622 nir_op op;
623 LLVMValueRef src; /* clobbered! */
624 LLVMValueRef result_reduce;
625 LLVMValueRef result_exclusive;
626 LLVMValueRef result_inclusive;
627 LLVMValueRef extra;
628 LLVMValueRef waveidx;
629 LLVMValueRef numwaves; /* only needed for "reduce" operations */
630
631 /* T addrspace(LDS) pointer to the same type as value, at least maxwaves entries */
632 LLVMValueRef scratch;
633 unsigned maxwaves;
634 };
635
636 void
637 ac_build_wg_wavescan_top(struct ac_llvm_context *ctx, struct ac_wg_scan *ws);
638 void
639 ac_build_wg_wavescan_bottom(struct ac_llvm_context *ctx, struct ac_wg_scan *ws);
640 void
641 ac_build_wg_wavescan(struct ac_llvm_context *ctx, struct ac_wg_scan *ws);
642
643 void
644 ac_build_wg_scan_top(struct ac_llvm_context *ctx, struct ac_wg_scan *ws);
645 void
646 ac_build_wg_scan_bottom(struct ac_llvm_context *ctx, struct ac_wg_scan *ws);
647 void
648 ac_build_wg_scan(struct ac_llvm_context *ctx, struct ac_wg_scan *ws);
649
650 LLVMValueRef
651 ac_build_quad_swizzle(struct ac_llvm_context *ctx, LLVMValueRef src,
652 unsigned lane0, unsigned lane1, unsigned lane2, unsigned lane3);
653
654 LLVMValueRef
655 ac_build_shuffle(struct ac_llvm_context *ctx, LLVMValueRef src, LLVMValueRef index);
656
657 #ifdef __cplusplus
658 }
659 #endif
660
661 #endif