ac/nir: assert printfs will fit
[mesa.git] / src / amd / common / ac_nir_to_llvm.c
1 /*
2 * Copyright © 2016 Bas Nieuwenhuizen
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21 * IN THE SOFTWARE.
22 */
23
24 #include "ac_nir_to_llvm.h"
25 #include "ac_llvm_build.h"
26 #include "ac_llvm_util.h"
27 #include "ac_binary.h"
28 #include "sid.h"
29 #include "nir/nir.h"
30 #include "../vulkan/radv_descriptor_set.h"
31 #include "util/bitscan.h"
32 #include <llvm-c/Transforms/Scalar.h>
33 #include "ac_shader_info.h"
34 #include "ac_exp_param.h"
35
36 enum radeon_llvm_calling_convention {
37 RADEON_LLVM_AMDGPU_VS = 87,
38 RADEON_LLVM_AMDGPU_GS = 88,
39 RADEON_LLVM_AMDGPU_PS = 89,
40 RADEON_LLVM_AMDGPU_CS = 90,
41 };
42
43 #define CONST_ADDR_SPACE 2
44 #define LOCAL_ADDR_SPACE 3
45
46 #define RADEON_LLVM_MAX_INPUTS (VARYING_SLOT_VAR31 + 1)
47 #define RADEON_LLVM_MAX_OUTPUTS (VARYING_SLOT_VAR31 + 1)
48
49 enum desc_type {
50 DESC_IMAGE,
51 DESC_FMASK,
52 DESC_SAMPLER,
53 DESC_BUFFER,
54 };
55
56 struct nir_to_llvm_context {
57 struct ac_llvm_context ac;
58 const struct ac_nir_compiler_options *options;
59 struct ac_shader_variant_info *shader_info;
60 unsigned max_workgroup_size;
61 LLVMContextRef context;
62 LLVMModuleRef module;
63 LLVMBuilderRef builder;
64 LLVMValueRef main_function;
65
66 struct hash_table *defs;
67 struct hash_table *phis;
68
69 LLVMValueRef descriptor_sets[AC_UD_MAX_SETS];
70 LLVMValueRef ring_offsets;
71 LLVMValueRef push_constants;
72 LLVMValueRef num_work_groups;
73 LLVMValueRef workgroup_ids;
74 LLVMValueRef local_invocation_ids;
75 LLVMValueRef tg_size;
76
77 LLVMValueRef vertex_buffers;
78 LLVMValueRef base_vertex;
79 LLVMValueRef start_instance;
80 LLVMValueRef draw_index;
81 LLVMValueRef vertex_id;
82 LLVMValueRef rel_auto_id;
83 LLVMValueRef vs_prim_id;
84 LLVMValueRef instance_id;
85 LLVMValueRef ls_out_layout;
86 LLVMValueRef es2gs_offset;
87
88 LLVMValueRef tcs_offchip_layout;
89 LLVMValueRef tcs_out_offsets;
90 LLVMValueRef tcs_out_layout;
91 LLVMValueRef tcs_in_layout;
92 LLVMValueRef oc_lds;
93 LLVMValueRef tess_factor_offset;
94 LLVMValueRef tcs_patch_id;
95 LLVMValueRef tcs_rel_ids;
96 LLVMValueRef tes_rel_patch_id;
97 LLVMValueRef tes_patch_id;
98 LLVMValueRef tes_u;
99 LLVMValueRef tes_v;
100
101 LLVMValueRef gsvs_ring_stride;
102 LLVMValueRef gsvs_num_entries;
103 LLVMValueRef gs2vs_offset;
104 LLVMValueRef gs_wave_id;
105 LLVMValueRef gs_vtx_offset[6];
106 LLVMValueRef gs_prim_id, gs_invocation_id;
107
108 LLVMValueRef esgs_ring;
109 LLVMValueRef gsvs_ring;
110 LLVMValueRef hs_ring_tess_offchip;
111 LLVMValueRef hs_ring_tess_factor;
112
113 LLVMValueRef prim_mask;
114 LLVMValueRef sample_pos_offset;
115 LLVMValueRef persp_sample, persp_center, persp_centroid;
116 LLVMValueRef linear_sample, linear_center, linear_centroid;
117 LLVMValueRef front_face;
118 LLVMValueRef ancillary;
119 LLVMValueRef sample_coverage;
120 LLVMValueRef frag_pos[4];
121
122 LLVMBasicBlockRef continue_block;
123 LLVMBasicBlockRef break_block;
124
125 LLVMTypeRef i1;
126 LLVMTypeRef i8;
127 LLVMTypeRef i16;
128 LLVMTypeRef i32;
129 LLVMTypeRef i64;
130 LLVMTypeRef v2i32;
131 LLVMTypeRef v3i32;
132 LLVMTypeRef v4i32;
133 LLVMTypeRef v8i32;
134 LLVMTypeRef f64;
135 LLVMTypeRef f32;
136 LLVMTypeRef f16;
137 LLVMTypeRef v2f32;
138 LLVMTypeRef v4f32;
139 LLVMTypeRef v16i8;
140 LLVMTypeRef voidt;
141
142 LLVMValueRef i1true;
143 LLVMValueRef i1false;
144 LLVMValueRef i32zero;
145 LLVMValueRef i32one;
146 LLVMValueRef f32zero;
147 LLVMValueRef f32one;
148 LLVMValueRef v4f32empty;
149
150 unsigned uniform_md_kind;
151 LLVMValueRef empty_md;
152 gl_shader_stage stage;
153
154 LLVMValueRef lds;
155 LLVMValueRef inputs[RADEON_LLVM_MAX_INPUTS * 4];
156 LLVMValueRef outputs[RADEON_LLVM_MAX_OUTPUTS * 4];
157
158 LLVMValueRef shared_memory;
159 uint64_t input_mask;
160 uint64_t output_mask;
161 int num_locals;
162 LLVMValueRef *locals;
163 uint8_t num_output_clips;
164 uint8_t num_output_culls;
165
166 bool has_ds_bpermute;
167
168 bool is_gs_copy_shader;
169 LLVMValueRef gs_next_vertex;
170 unsigned gs_max_out_vertices;
171
172 unsigned tes_primitive_mode;
173 uint64_t tess_outputs_written;
174 uint64_t tess_patch_outputs_written;
175 };
176
177 static LLVMValueRef get_sampler_desc(struct nir_to_llvm_context *ctx,
178 const nir_deref_var *deref,
179 enum desc_type desc_type);
180 static unsigned radeon_llvm_reg_index_soa(unsigned index, unsigned chan)
181 {
182 return (index * 4) + chan;
183 }
184
185 static unsigned shader_io_get_unique_index(gl_varying_slot slot)
186 {
187 /* handle patch indices separate */
188 if (slot == VARYING_SLOT_TESS_LEVEL_OUTER)
189 return 0;
190 if (slot == VARYING_SLOT_TESS_LEVEL_INNER)
191 return 1;
192 if (slot >= VARYING_SLOT_PATCH0 && slot <= VARYING_SLOT_TESS_MAX)
193 return 2 + (slot - VARYING_SLOT_PATCH0);
194
195 if (slot == VARYING_SLOT_POS)
196 return 0;
197 if (slot == VARYING_SLOT_PSIZ)
198 return 1;
199 if (slot == VARYING_SLOT_CLIP_DIST0)
200 return 2;
201 /* 3 is reserved for clip dist as well */
202 if (slot >= VARYING_SLOT_VAR0 && slot <= VARYING_SLOT_VAR31)
203 return 4 + (slot - VARYING_SLOT_VAR0);
204 unreachable("illegal slot in get unique index\n");
205 }
206
207 static unsigned llvm_get_type_size(LLVMTypeRef type)
208 {
209 LLVMTypeKind kind = LLVMGetTypeKind(type);
210
211 switch (kind) {
212 case LLVMIntegerTypeKind:
213 return LLVMGetIntTypeWidth(type) / 8;
214 case LLVMFloatTypeKind:
215 return 4;
216 case LLVMPointerTypeKind:
217 return 8;
218 case LLVMVectorTypeKind:
219 return LLVMGetVectorSize(type) *
220 llvm_get_type_size(LLVMGetElementType(type));
221 default:
222 assert(0);
223 return 0;
224 }
225 }
226
227 static void set_llvm_calling_convention(LLVMValueRef func,
228 gl_shader_stage stage)
229 {
230 enum radeon_llvm_calling_convention calling_conv;
231
232 switch (stage) {
233 case MESA_SHADER_VERTEX:
234 case MESA_SHADER_TESS_CTRL:
235 case MESA_SHADER_TESS_EVAL:
236 calling_conv = RADEON_LLVM_AMDGPU_VS;
237 break;
238 case MESA_SHADER_GEOMETRY:
239 calling_conv = RADEON_LLVM_AMDGPU_GS;
240 break;
241 case MESA_SHADER_FRAGMENT:
242 calling_conv = RADEON_LLVM_AMDGPU_PS;
243 break;
244 case MESA_SHADER_COMPUTE:
245 calling_conv = RADEON_LLVM_AMDGPU_CS;
246 break;
247 default:
248 unreachable("Unhandle shader type");
249 }
250
251 LLVMSetFunctionCallConv(func, calling_conv);
252 }
253
254 #define MAX_ARGS 23
255 struct arg_info {
256 LLVMTypeRef types[MAX_ARGS];
257 LLVMValueRef *assign[MAX_ARGS];
258 unsigned array_params_mask;
259 uint8_t count;
260 uint8_t user_sgpr_count;
261 uint8_t sgpr_count;
262 uint8_t num_user_sgprs_used;
263 uint8_t num_sgprs_used;
264 uint8_t num_vgprs_used;
265 };
266
267 static inline void
268 add_argument(struct arg_info *info,
269 LLVMTypeRef type, LLVMValueRef *param_ptr)
270 {
271 assert(info->count < MAX_ARGS);
272 info->assign[info->count] = param_ptr;
273 info->types[info->count] = type;
274 info->count++;
275 }
276
277 static inline void
278 add_sgpr_argument(struct arg_info *info,
279 LLVMTypeRef type, LLVMValueRef *param_ptr)
280 {
281 add_argument(info, type, param_ptr);
282 info->num_sgprs_used += llvm_get_type_size(type) / 4;
283 info->sgpr_count++;
284 }
285
286 static inline void
287 add_user_sgpr_argument(struct arg_info *info,
288 LLVMTypeRef type,
289 LLVMValueRef *param_ptr)
290 {
291 add_sgpr_argument(info, type, param_ptr);
292 info->num_user_sgprs_used += llvm_get_type_size(type) / 4;
293 info->user_sgpr_count++;
294 }
295
296 static inline void
297 add_vgpr_argument(struct arg_info *info,
298 LLVMTypeRef type,
299 LLVMValueRef *param_ptr)
300 {
301 add_argument(info, type, param_ptr);
302 info->num_vgprs_used += llvm_get_type_size(type) / 4;
303 }
304
305 static inline void
306 add_user_sgpr_array_argument(struct arg_info *info,
307 LLVMTypeRef type,
308 LLVMValueRef *param_ptr)
309 {
310 info->array_params_mask |= (1 << info->count);
311 add_user_sgpr_argument(info, type, param_ptr);
312 }
313
314 static void assign_arguments(LLVMValueRef main_function,
315 struct arg_info *info)
316 {
317 unsigned i;
318 for (i = 0; i < info->count; i++) {
319 if (info->assign[i])
320 *info->assign[i] = LLVMGetParam(main_function, i);
321 }
322 }
323
324 static LLVMValueRef
325 create_llvm_function(LLVMContextRef ctx, LLVMModuleRef module,
326 LLVMBuilderRef builder, LLVMTypeRef *return_types,
327 unsigned num_return_elems,
328 struct arg_info *args,
329 unsigned max_workgroup_size,
330 bool unsafe_math)
331 {
332 LLVMTypeRef main_function_type, ret_type;
333 LLVMBasicBlockRef main_function_body;
334
335 if (num_return_elems)
336 ret_type = LLVMStructTypeInContext(ctx, return_types,
337 num_return_elems, true);
338 else
339 ret_type = LLVMVoidTypeInContext(ctx);
340
341 /* Setup the function */
342 main_function_type =
343 LLVMFunctionType(ret_type, args->types, args->count, 0);
344 LLVMValueRef main_function =
345 LLVMAddFunction(module, "main", main_function_type);
346 main_function_body =
347 LLVMAppendBasicBlockInContext(ctx, main_function, "main_body");
348 LLVMPositionBuilderAtEnd(builder, main_function_body);
349
350 LLVMSetFunctionCallConv(main_function, RADEON_LLVM_AMDGPU_CS);
351 for (unsigned i = 0; i < args->sgpr_count; ++i) {
352 if (args->array_params_mask & (1 << i)) {
353 LLVMValueRef P = LLVMGetParam(main_function, i);
354 ac_add_function_attr(ctx, main_function, i + 1, AC_FUNC_ATTR_BYVAL);
355 ac_add_attr_dereferenceable(P, UINT64_MAX);
356 }
357 else {
358 ac_add_function_attr(ctx, main_function, i + 1, AC_FUNC_ATTR_INREG);
359 }
360 }
361
362 if (max_workgroup_size) {
363 ac_llvm_add_target_dep_function_attr(main_function,
364 "amdgpu-max-work-group-size",
365 max_workgroup_size);
366 }
367 if (unsafe_math) {
368 /* These were copied from some LLVM test. */
369 LLVMAddTargetDependentFunctionAttr(main_function,
370 "less-precise-fpmad",
371 "true");
372 LLVMAddTargetDependentFunctionAttr(main_function,
373 "no-infs-fp-math",
374 "true");
375 LLVMAddTargetDependentFunctionAttr(main_function,
376 "no-nans-fp-math",
377 "true");
378 LLVMAddTargetDependentFunctionAttr(main_function,
379 "unsafe-fp-math",
380 "true");
381 }
382 return main_function;
383 }
384
385 static LLVMTypeRef const_array(LLVMTypeRef elem_type, int num_elements)
386 {
387 return LLVMPointerType(LLVMArrayType(elem_type, num_elements),
388 CONST_ADDR_SPACE);
389 }
390
391 static LLVMValueRef get_shared_memory_ptr(struct nir_to_llvm_context *ctx,
392 int idx,
393 LLVMTypeRef type)
394 {
395 LLVMValueRef offset;
396 LLVMValueRef ptr;
397 int addr_space;
398
399 offset = LLVMConstInt(ctx->i32, idx * 16, false);
400
401 ptr = ctx->shared_memory;
402 ptr = LLVMBuildGEP(ctx->builder, ptr, &offset, 1, "");
403 addr_space = LLVMGetPointerAddressSpace(LLVMTypeOf(ptr));
404 ptr = LLVMBuildBitCast(ctx->builder, ptr, LLVMPointerType(type, addr_space), "");
405 return ptr;
406 }
407
408 static LLVMTypeRef to_integer_type_scalar(struct ac_llvm_context *ctx, LLVMTypeRef t)
409 {
410 if (t == ctx->f16 || t == ctx->i16)
411 return ctx->i16;
412 else if (t == ctx->f32 || t == ctx->i32)
413 return ctx->i32;
414 else if (t == ctx->f64 || t == ctx->i64)
415 return ctx->i64;
416 else
417 unreachable("Unhandled integer size");
418 }
419
420 static LLVMTypeRef to_integer_type(struct ac_llvm_context *ctx, LLVMTypeRef t)
421 {
422 if (LLVMGetTypeKind(t) == LLVMVectorTypeKind) {
423 LLVMTypeRef elem_type = LLVMGetElementType(t);
424 return LLVMVectorType(to_integer_type_scalar(ctx, elem_type),
425 LLVMGetVectorSize(t));
426 }
427 return to_integer_type_scalar(ctx, t);
428 }
429
430 static LLVMValueRef to_integer(struct ac_llvm_context *ctx, LLVMValueRef v)
431 {
432 LLVMTypeRef type = LLVMTypeOf(v);
433 return LLVMBuildBitCast(ctx->builder, v, to_integer_type(ctx, type), "");
434 }
435
436 static LLVMTypeRef to_float_type_scalar(struct ac_llvm_context *ctx, LLVMTypeRef t)
437 {
438 if (t == ctx->i16 || t == ctx->f16)
439 return ctx->f16;
440 else if (t == ctx->i32 || t == ctx->f32)
441 return ctx->f32;
442 else if (t == ctx->i64 || t == ctx->f64)
443 return ctx->f64;
444 else
445 unreachable("Unhandled float size");
446 }
447
448 static LLVMTypeRef to_float_type(struct ac_llvm_context *ctx, LLVMTypeRef t)
449 {
450 if (LLVMGetTypeKind(t) == LLVMVectorTypeKind) {
451 LLVMTypeRef elem_type = LLVMGetElementType(t);
452 return LLVMVectorType(to_float_type_scalar(ctx, elem_type),
453 LLVMGetVectorSize(t));
454 }
455 return to_float_type_scalar(ctx, t);
456 }
457
458 static LLVMValueRef to_float(struct ac_llvm_context *ctx, LLVMValueRef v)
459 {
460 LLVMTypeRef type = LLVMTypeOf(v);
461 return LLVMBuildBitCast(ctx->builder, v, to_float_type(ctx, type), "");
462 }
463
464 static int get_elem_bits(struct ac_llvm_context *ctx, LLVMTypeRef type)
465 {
466 if (LLVMGetTypeKind(type) == LLVMVectorTypeKind)
467 type = LLVMGetElementType(type);
468
469 if (LLVMGetTypeKind(type) == LLVMIntegerTypeKind)
470 return LLVMGetIntTypeWidth(type);
471
472 if (type == ctx->f16)
473 return 16;
474 if (type == ctx->f32)
475 return 32;
476 if (type == ctx->f64)
477 return 64;
478
479 unreachable("Unhandled type kind in get_elem_bits");
480 }
481
482 static LLVMValueRef unpack_param(struct nir_to_llvm_context *ctx,
483 LLVMValueRef param, unsigned rshift,
484 unsigned bitwidth)
485 {
486 LLVMValueRef value = param;
487 if (rshift)
488 value = LLVMBuildLShr(ctx->builder, value,
489 LLVMConstInt(ctx->i32, rshift, false), "");
490
491 if (rshift + bitwidth < 32) {
492 unsigned mask = (1 << bitwidth) - 1;
493 value = LLVMBuildAnd(ctx->builder, value,
494 LLVMConstInt(ctx->i32, mask, false), "");
495 }
496 return value;
497 }
498
499 static LLVMValueRef get_rel_patch_id(struct nir_to_llvm_context *ctx)
500 {
501 switch (ctx->stage) {
502 case MESA_SHADER_TESS_CTRL:
503 return unpack_param(ctx, ctx->tcs_rel_ids, 0, 8);
504 case MESA_SHADER_TESS_EVAL:
505 return ctx->tes_rel_patch_id;
506 break;
507 default:
508 unreachable("Illegal stage");
509 }
510 }
511
512 /* Tessellation shaders pass outputs to the next shader using LDS.
513 *
514 * LS outputs = TCS inputs
515 * TCS outputs = TES inputs
516 *
517 * The LDS layout is:
518 * - TCS inputs for patch 0
519 * - TCS inputs for patch 1
520 * - TCS inputs for patch 2 = get_tcs_in_current_patch_offset (if RelPatchID==2)
521 * - ...
522 * - TCS outputs for patch 0 = get_tcs_out_patch0_offset
523 * - Per-patch TCS outputs for patch 0 = get_tcs_out_patch0_patch_data_offset
524 * - TCS outputs for patch 1
525 * - Per-patch TCS outputs for patch 1
526 * - TCS outputs for patch 2 = get_tcs_out_current_patch_offset (if RelPatchID==2)
527 * - Per-patch TCS outputs for patch 2 = get_tcs_out_current_patch_data_offset (if RelPatchID==2)
528 * - ...
529 *
530 * All three shaders VS(LS), TCS, TES share the same LDS space.
531 */
532 static LLVMValueRef
533 get_tcs_in_patch_stride(struct nir_to_llvm_context *ctx)
534 {
535 if (ctx->stage == MESA_SHADER_VERTEX)
536 return unpack_param(ctx, ctx->ls_out_layout, 0, 13);
537 else if (ctx->stage == MESA_SHADER_TESS_CTRL)
538 return unpack_param(ctx, ctx->tcs_in_layout, 0, 13);
539 else {
540 assert(0);
541 return NULL;
542 }
543 }
544
545 static LLVMValueRef
546 get_tcs_out_patch_stride(struct nir_to_llvm_context *ctx)
547 {
548 return unpack_param(ctx, ctx->tcs_out_layout, 0, 13);
549 }
550
551 static LLVMValueRef
552 get_tcs_out_patch0_offset(struct nir_to_llvm_context *ctx)
553 {
554 return LLVMBuildMul(ctx->builder,
555 unpack_param(ctx, ctx->tcs_out_offsets, 0, 16),
556 LLVMConstInt(ctx->i32, 4, false), "");
557 }
558
559 static LLVMValueRef
560 get_tcs_out_patch0_patch_data_offset(struct nir_to_llvm_context *ctx)
561 {
562 return LLVMBuildMul(ctx->builder,
563 unpack_param(ctx, ctx->tcs_out_offsets, 16, 16),
564 LLVMConstInt(ctx->i32, 4, false), "");
565 }
566
567 static LLVMValueRef
568 get_tcs_in_current_patch_offset(struct nir_to_llvm_context *ctx)
569 {
570 LLVMValueRef patch_stride = get_tcs_in_patch_stride(ctx);
571 LLVMValueRef rel_patch_id = get_rel_patch_id(ctx);
572
573 return LLVMBuildMul(ctx->builder, patch_stride, rel_patch_id, "");
574 }
575
576 static LLVMValueRef
577 get_tcs_out_current_patch_offset(struct nir_to_llvm_context *ctx)
578 {
579 LLVMValueRef patch0_offset = get_tcs_out_patch0_offset(ctx);
580 LLVMValueRef patch_stride = get_tcs_out_patch_stride(ctx);
581 LLVMValueRef rel_patch_id = get_rel_patch_id(ctx);
582
583 return LLVMBuildAdd(ctx->builder, patch0_offset,
584 LLVMBuildMul(ctx->builder, patch_stride,
585 rel_patch_id, ""),
586 "");
587 }
588
589 static LLVMValueRef
590 get_tcs_out_current_patch_data_offset(struct nir_to_llvm_context *ctx)
591 {
592 LLVMValueRef patch0_patch_data_offset =
593 get_tcs_out_patch0_patch_data_offset(ctx);
594 LLVMValueRef patch_stride = get_tcs_out_patch_stride(ctx);
595 LLVMValueRef rel_patch_id = get_rel_patch_id(ctx);
596
597 return LLVMBuildAdd(ctx->builder, patch0_patch_data_offset,
598 LLVMBuildMul(ctx->builder, patch_stride,
599 rel_patch_id, ""),
600 "");
601 }
602
603 static void set_userdata_location(struct ac_userdata_info *ud_info, uint8_t *sgpr_idx, uint8_t num_sgprs)
604 {
605 ud_info->sgpr_idx = *sgpr_idx;
606 ud_info->num_sgprs = num_sgprs;
607 ud_info->indirect = false;
608 ud_info->indirect_offset = 0;
609 *sgpr_idx += num_sgprs;
610 }
611
612 static void set_userdata_location_shader(struct nir_to_llvm_context *ctx,
613 int idx, uint8_t *sgpr_idx, uint8_t num_sgprs)
614 {
615 set_userdata_location(&ctx->shader_info->user_sgprs_locs.shader_data[idx], sgpr_idx, num_sgprs);
616 }
617
618
619 static void set_userdata_location_indirect(struct ac_userdata_info *ud_info, uint8_t sgpr_idx, uint8_t num_sgprs,
620 uint32_t indirect_offset)
621 {
622 ud_info->sgpr_idx = sgpr_idx;
623 ud_info->num_sgprs = num_sgprs;
624 ud_info->indirect = true;
625 ud_info->indirect_offset = indirect_offset;
626 }
627
628 static void declare_tess_lds(struct nir_to_llvm_context *ctx)
629 {
630 unsigned lds_size = ctx->options->chip_class >= CIK ? 65536 : 32768;
631 ctx->lds = LLVMBuildIntToPtr(ctx->builder, ctx->i32zero,
632 LLVMPointerType(LLVMArrayType(ctx->i32, lds_size / 4), LOCAL_ADDR_SPACE),
633 "tess_lds");
634 }
635
636 struct user_sgpr_info {
637 bool need_ring_offsets;
638 uint8_t sgpr_count;
639 bool indirect_all_descriptor_sets;
640 };
641
642 static void allocate_user_sgprs(struct nir_to_llvm_context *ctx,
643 struct user_sgpr_info *user_sgpr_info)
644 {
645 memset(user_sgpr_info, 0, sizeof(struct user_sgpr_info));
646
647 /* until we sort out scratch/global buffers always assign ring offsets for gs/vs/es */
648 if (ctx->stage == MESA_SHADER_GEOMETRY ||
649 ctx->stage == MESA_SHADER_VERTEX ||
650 ctx->stage == MESA_SHADER_TESS_CTRL ||
651 ctx->stage == MESA_SHADER_TESS_EVAL ||
652 ctx->is_gs_copy_shader)
653 user_sgpr_info->need_ring_offsets = true;
654
655 if (ctx->stage == MESA_SHADER_FRAGMENT &&
656 ctx->shader_info->info.ps.needs_sample_positions)
657 user_sgpr_info->need_ring_offsets = true;
658
659 /* 2 user sgprs will nearly always be allocated for scratch/rings */
660 if (ctx->options->supports_spill || user_sgpr_info->need_ring_offsets) {
661 user_sgpr_info->sgpr_count += 2;
662 }
663
664 switch (ctx->stage) {
665 case MESA_SHADER_COMPUTE:
666 user_sgpr_info->sgpr_count += ctx->shader_info->info.cs.grid_components_used;
667 break;
668 case MESA_SHADER_FRAGMENT:
669 user_sgpr_info->sgpr_count += ctx->shader_info->info.ps.needs_sample_positions;
670 break;
671 case MESA_SHADER_VERTEX:
672 if (!ctx->is_gs_copy_shader) {
673 user_sgpr_info->sgpr_count += ctx->shader_info->info.vs.has_vertex_buffers ? 2 : 0;
674 if (ctx->shader_info->info.vs.needs_draw_id) {
675 user_sgpr_info->sgpr_count += 3;
676 } else {
677 user_sgpr_info->sgpr_count += 2;
678 }
679 }
680 if (ctx->options->key.vs.as_ls)
681 user_sgpr_info->sgpr_count++;
682 break;
683 case MESA_SHADER_TESS_CTRL:
684 user_sgpr_info->sgpr_count += 4;
685 break;
686 case MESA_SHADER_TESS_EVAL:
687 user_sgpr_info->sgpr_count += 1;
688 break;
689 case MESA_SHADER_GEOMETRY:
690 user_sgpr_info->sgpr_count += 2;
691 break;
692 default:
693 break;
694 }
695
696 if (ctx->shader_info->info.needs_push_constants)
697 user_sgpr_info->sgpr_count += 2;
698
699 uint32_t remaining_sgprs = 16 - user_sgpr_info->sgpr_count;
700 if (remaining_sgprs / 2 < util_bitcount(ctx->shader_info->info.desc_set_used_mask)) {
701 user_sgpr_info->sgpr_count += 2;
702 user_sgpr_info->indirect_all_descriptor_sets = true;
703 } else {
704 user_sgpr_info->sgpr_count += util_bitcount(ctx->shader_info->info.desc_set_used_mask) * 2;
705 }
706 }
707
708 static void create_function(struct nir_to_llvm_context *ctx)
709 {
710 unsigned num_sets = ctx->options->layout ? ctx->options->layout->num_sets : 0;
711 uint8_t user_sgpr_idx;
712 struct user_sgpr_info user_sgpr_info;
713 struct arg_info args = {};
714 LLVMValueRef desc_sets;
715
716 allocate_user_sgprs(ctx, &user_sgpr_info);
717 if (user_sgpr_info.need_ring_offsets && !ctx->options->supports_spill) {
718 add_user_sgpr_argument(&args, const_array(ctx->v16i8, 16), &ctx->ring_offsets); /* address of rings */
719 }
720
721 /* 1 for each descriptor set */
722 if (!user_sgpr_info.indirect_all_descriptor_sets) {
723 for (unsigned i = 0; i < num_sets; ++i) {
724 if (ctx->options->layout->set[i].layout->shader_stages & (1 << ctx->stage)) {
725 add_user_sgpr_array_argument(&args, const_array(ctx->i8, 1024 * 1024), &ctx->descriptor_sets[i]);
726 }
727 }
728 } else
729 add_user_sgpr_array_argument(&args, const_array(const_array(ctx->i8, 1024 * 1024), 32), &desc_sets);
730
731 if (ctx->shader_info->info.needs_push_constants) {
732 /* 1 for push constants and dynamic descriptors */
733 add_user_sgpr_array_argument(&args, const_array(ctx->i8, 1024 * 1024), &ctx->push_constants);
734 }
735
736 switch (ctx->stage) {
737 case MESA_SHADER_COMPUTE:
738 if (ctx->shader_info->info.cs.grid_components_used)
739 add_user_sgpr_argument(&args, LLVMVectorType(ctx->i32, ctx->shader_info->info.cs.grid_components_used), &ctx->num_work_groups); /* grid size */
740 add_sgpr_argument(&args, LLVMVectorType(ctx->i32, 3), &ctx->workgroup_ids);
741 add_sgpr_argument(&args, ctx->i32, &ctx->tg_size);
742 add_vgpr_argument(&args, LLVMVectorType(ctx->i32, 3), &ctx->local_invocation_ids);
743 break;
744 case MESA_SHADER_VERTEX:
745 if (!ctx->is_gs_copy_shader) {
746 if (ctx->shader_info->info.vs.has_vertex_buffers)
747 add_user_sgpr_argument(&args, const_array(ctx->v16i8, 16), &ctx->vertex_buffers); /* vertex buffers */
748 add_user_sgpr_argument(&args, ctx->i32, &ctx->base_vertex); // base vertex
749 add_user_sgpr_argument(&args, ctx->i32, &ctx->start_instance);// start instance
750 if (ctx->shader_info->info.vs.needs_draw_id)
751 add_user_sgpr_argument(&args, ctx->i32, &ctx->draw_index); // draw id
752 }
753 if (ctx->options->key.vs.as_es)
754 add_sgpr_argument(&args, ctx->i32, &ctx->es2gs_offset); // es2gs offset
755 else if (ctx->options->key.vs.as_ls)
756 add_user_sgpr_argument(&args, ctx->i32, &ctx->ls_out_layout); // ls out layout
757 add_vgpr_argument(&args, ctx->i32, &ctx->vertex_id); // vertex id
758 if (!ctx->is_gs_copy_shader) {
759 add_vgpr_argument(&args, ctx->i32, &ctx->rel_auto_id); // rel auto id
760 add_vgpr_argument(&args, ctx->i32, &ctx->vs_prim_id); // vs prim id
761 add_vgpr_argument(&args, ctx->i32, &ctx->instance_id); // instance id
762 }
763 break;
764 case MESA_SHADER_TESS_CTRL:
765 add_user_sgpr_argument(&args, ctx->i32, &ctx->tcs_offchip_layout); // tcs offchip layout
766 add_user_sgpr_argument(&args, ctx->i32, &ctx->tcs_out_offsets); // tcs out offsets
767 add_user_sgpr_argument(&args, ctx->i32, &ctx->tcs_out_layout); // tcs out layout
768 add_user_sgpr_argument(&args, ctx->i32, &ctx->tcs_in_layout); // tcs in layout
769 add_sgpr_argument(&args, ctx->i32, &ctx->oc_lds); // param oc lds
770 add_sgpr_argument(&args, ctx->i32, &ctx->tess_factor_offset); // tess factor offset
771 add_vgpr_argument(&args, ctx->i32, &ctx->tcs_patch_id); // patch id
772 add_vgpr_argument(&args, ctx->i32, &ctx->tcs_rel_ids); // rel ids;
773 break;
774 case MESA_SHADER_TESS_EVAL:
775 add_user_sgpr_argument(&args, ctx->i32, &ctx->tcs_offchip_layout); // tcs offchip layout
776 if (ctx->options->key.tes.as_es) {
777 add_sgpr_argument(&args, ctx->i32, &ctx->oc_lds); // OC LDS
778 add_sgpr_argument(&args, ctx->i32, NULL); //
779 add_sgpr_argument(&args, ctx->i32, &ctx->es2gs_offset); // es2gs offset
780 } else {
781 add_sgpr_argument(&args, ctx->i32, NULL); //
782 add_sgpr_argument(&args, ctx->i32, &ctx->oc_lds); // OC LDS
783 }
784 add_vgpr_argument(&args, ctx->f32, &ctx->tes_u); // tes_u
785 add_vgpr_argument(&args, ctx->f32, &ctx->tes_v); // tes_v
786 add_vgpr_argument(&args, ctx->i32, &ctx->tes_rel_patch_id); // tes rel patch id
787 add_vgpr_argument(&args, ctx->i32, &ctx->tes_patch_id); // tes patch id
788 break;
789 case MESA_SHADER_GEOMETRY:
790 add_user_sgpr_argument(&args, ctx->i32, &ctx->gsvs_ring_stride); // gsvs stride
791 add_user_sgpr_argument(&args, ctx->i32, &ctx->gsvs_num_entries); // gsvs num entires
792 add_sgpr_argument(&args, ctx->i32, &ctx->gs2vs_offset); // gs2vs offset
793 add_sgpr_argument(&args, ctx->i32, &ctx->gs_wave_id); // wave id
794 add_vgpr_argument(&args, ctx->i32, &ctx->gs_vtx_offset[0]); // vtx0
795 add_vgpr_argument(&args, ctx->i32, &ctx->gs_vtx_offset[1]); // vtx1
796 add_vgpr_argument(&args, ctx->i32, &ctx->gs_prim_id); // prim id
797 add_vgpr_argument(&args, ctx->i32, &ctx->gs_vtx_offset[2]);
798 add_vgpr_argument(&args, ctx->i32, &ctx->gs_vtx_offset[3]);
799 add_vgpr_argument(&args, ctx->i32, &ctx->gs_vtx_offset[4]);
800 add_vgpr_argument(&args, ctx->i32, &ctx->gs_vtx_offset[5]);
801 add_vgpr_argument(&args, ctx->i32, &ctx->gs_invocation_id);
802 break;
803 case MESA_SHADER_FRAGMENT:
804 if (ctx->shader_info->info.ps.needs_sample_positions)
805 add_user_sgpr_argument(&args, ctx->i32, &ctx->sample_pos_offset); /* sample position offset */
806 add_sgpr_argument(&args, ctx->i32, &ctx->prim_mask); /* prim mask */
807 add_vgpr_argument(&args, ctx->v2i32, &ctx->persp_sample); /* persp sample */
808 add_vgpr_argument(&args, ctx->v2i32, &ctx->persp_center); /* persp center */
809 add_vgpr_argument(&args, ctx->v2i32, &ctx->persp_centroid); /* persp centroid */
810 add_vgpr_argument(&args, ctx->v3i32, NULL); /* persp pull model */
811 add_vgpr_argument(&args, ctx->v2i32, &ctx->linear_sample); /* linear sample */
812 add_vgpr_argument(&args, ctx->v2i32, &ctx->linear_center); /* linear center */
813 add_vgpr_argument(&args, ctx->v2i32, &ctx->linear_centroid); /* linear centroid */
814 add_vgpr_argument(&args, ctx->f32, NULL); /* line stipple tex */
815 add_vgpr_argument(&args, ctx->f32, &ctx->frag_pos[0]); /* pos x float */
816 add_vgpr_argument(&args, ctx->f32, &ctx->frag_pos[1]); /* pos y float */
817 add_vgpr_argument(&args, ctx->f32, &ctx->frag_pos[2]); /* pos z float */
818 add_vgpr_argument(&args, ctx->f32, &ctx->frag_pos[3]); /* pos w float */
819 add_vgpr_argument(&args, ctx->i32, &ctx->front_face); /* front face */
820 add_vgpr_argument(&args, ctx->i32, &ctx->ancillary); /* ancillary */
821 add_vgpr_argument(&args, ctx->i32, &ctx->sample_coverage); /* sample coverage */
822 add_vgpr_argument(&args, ctx->i32, NULL); /* fixed pt */
823 break;
824 default:
825 unreachable("Shader stage not implemented");
826 }
827
828 ctx->main_function = create_llvm_function(
829 ctx->context, ctx->module, ctx->builder, NULL, 0, &args,
830 ctx->max_workgroup_size,
831 ctx->options->unsafe_math);
832 set_llvm_calling_convention(ctx->main_function, ctx->stage);
833
834
835 ctx->shader_info->num_input_vgprs = 0;
836 ctx->shader_info->num_input_sgprs = ctx->shader_info->num_user_sgprs =
837 ctx->options->supports_spill ? 2 : 0;
838
839 ctx->shader_info->num_user_sgprs += args.num_user_sgprs_used;
840 ctx->shader_info->num_input_sgprs += args.num_sgprs_used;
841
842 if (ctx->stage != MESA_SHADER_FRAGMENT)
843 ctx->shader_info->num_input_vgprs = args.num_vgprs_used;
844
845 assign_arguments(ctx->main_function, &args);
846
847 user_sgpr_idx = 0;
848
849 if (ctx->options->supports_spill || user_sgpr_info.need_ring_offsets) {
850 set_userdata_location_shader(ctx, AC_UD_SCRATCH_RING_OFFSETS, &user_sgpr_idx, 2);
851 if (ctx->options->supports_spill) {
852 ctx->ring_offsets = ac_build_intrinsic(&ctx->ac, "llvm.amdgcn.implicit.buffer.ptr",
853 LLVMPointerType(ctx->i8, CONST_ADDR_SPACE),
854 NULL, 0, AC_FUNC_ATTR_READNONE);
855 ctx->ring_offsets = LLVMBuildBitCast(ctx->builder, ctx->ring_offsets,
856 const_array(ctx->v16i8, 16), "");
857 }
858 }
859
860 if (!user_sgpr_info.indirect_all_descriptor_sets) {
861 for (unsigned i = 0; i < num_sets; ++i) {
862 if (ctx->options->layout->set[i].layout->shader_stages & (1 << ctx->stage)) {
863 set_userdata_location(&ctx->shader_info->user_sgprs_locs.descriptor_sets[i], &user_sgpr_idx, 2);
864 } else
865 ctx->descriptor_sets[i] = NULL;
866 }
867 } else {
868 uint32_t desc_sgpr_idx = user_sgpr_idx;
869 set_userdata_location_shader(ctx, AC_UD_INDIRECT_DESCRIPTOR_SETS, &user_sgpr_idx, 2);
870
871 for (unsigned i = 0; i < num_sets; ++i) {
872 if (ctx->options->layout->set[i].layout->shader_stages & (1 << ctx->stage)) {
873 set_userdata_location_indirect(&ctx->shader_info->user_sgprs_locs.descriptor_sets[i], desc_sgpr_idx, 2, i * 8);
874 ctx->descriptor_sets[i] = ac_build_indexed_load_const(&ctx->ac, desc_sets, LLVMConstInt(ctx->i32, i, false));
875
876 } else
877 ctx->descriptor_sets[i] = NULL;
878 }
879 ctx->shader_info->need_indirect_descriptor_sets = true;
880 }
881
882 if (ctx->shader_info->info.needs_push_constants) {
883 set_userdata_location_shader(ctx, AC_UD_PUSH_CONSTANTS, &user_sgpr_idx, 2);
884 }
885
886 switch (ctx->stage) {
887 case MESA_SHADER_COMPUTE:
888 if (ctx->shader_info->info.cs.grid_components_used) {
889 set_userdata_location_shader(ctx, AC_UD_CS_GRID_SIZE, &user_sgpr_idx, ctx->shader_info->info.cs.grid_components_used);
890 }
891 break;
892 case MESA_SHADER_VERTEX:
893 if (!ctx->is_gs_copy_shader) {
894 if (ctx->shader_info->info.vs.has_vertex_buffers) {
895 set_userdata_location_shader(ctx, AC_UD_VS_VERTEX_BUFFERS, &user_sgpr_idx, 2);
896 }
897 unsigned vs_num = 2;
898 if (ctx->shader_info->info.vs.needs_draw_id)
899 vs_num++;
900
901 set_userdata_location_shader(ctx, AC_UD_VS_BASE_VERTEX_START_INSTANCE, &user_sgpr_idx, vs_num);
902 }
903 if (ctx->options->key.vs.as_ls) {
904 set_userdata_location_shader(ctx, AC_UD_VS_LS_TCS_IN_LAYOUT, &user_sgpr_idx, 1);
905 }
906 if (ctx->options->key.vs.as_ls)
907 declare_tess_lds(ctx);
908 break;
909 case MESA_SHADER_TESS_CTRL:
910 set_userdata_location_shader(ctx, AC_UD_TCS_OFFCHIP_LAYOUT, &user_sgpr_idx, 4);
911 declare_tess_lds(ctx);
912 break;
913 case MESA_SHADER_TESS_EVAL:
914 set_userdata_location_shader(ctx, AC_UD_TES_OFFCHIP_LAYOUT, &user_sgpr_idx, 1);
915 break;
916 case MESA_SHADER_GEOMETRY:
917 set_userdata_location_shader(ctx, AC_UD_GS_VS_RING_STRIDE_ENTRIES, &user_sgpr_idx, 2);
918 break;
919 case MESA_SHADER_FRAGMENT:
920 if (ctx->shader_info->info.ps.needs_sample_positions) {
921 set_userdata_location_shader(ctx, AC_UD_PS_SAMPLE_POS_OFFSET, &user_sgpr_idx, 1);
922 }
923 break;
924 default:
925 unreachable("Shader stage not implemented");
926 }
927 }
928
929 static void setup_types(struct nir_to_llvm_context *ctx)
930 {
931 LLVMValueRef args[4];
932
933 ctx->voidt = LLVMVoidTypeInContext(ctx->context);
934 ctx->i1 = LLVMIntTypeInContext(ctx->context, 1);
935 ctx->i8 = LLVMIntTypeInContext(ctx->context, 8);
936 ctx->i16 = LLVMIntTypeInContext(ctx->context, 16);
937 ctx->i32 = LLVMIntTypeInContext(ctx->context, 32);
938 ctx->i64 = LLVMIntTypeInContext(ctx->context, 64);
939 ctx->v2i32 = LLVMVectorType(ctx->i32, 2);
940 ctx->v3i32 = LLVMVectorType(ctx->i32, 3);
941 ctx->v4i32 = LLVMVectorType(ctx->i32, 4);
942 ctx->v8i32 = LLVMVectorType(ctx->i32, 8);
943 ctx->f32 = LLVMFloatTypeInContext(ctx->context);
944 ctx->f16 = LLVMHalfTypeInContext(ctx->context);
945 ctx->f64 = LLVMDoubleTypeInContext(ctx->context);
946 ctx->v2f32 = LLVMVectorType(ctx->f32, 2);
947 ctx->v4f32 = LLVMVectorType(ctx->f32, 4);
948 ctx->v16i8 = LLVMVectorType(ctx->i8, 16);
949
950 ctx->i1false = LLVMConstInt(ctx->i1, 0, false);
951 ctx->i1true = LLVMConstInt(ctx->i1, 1, false);
952 ctx->i32zero = LLVMConstInt(ctx->i32, 0, false);
953 ctx->i32one = LLVMConstInt(ctx->i32, 1, false);
954 ctx->f32zero = LLVMConstReal(ctx->f32, 0.0);
955 ctx->f32one = LLVMConstReal(ctx->f32, 1.0);
956
957 args[0] = ctx->f32zero;
958 args[1] = ctx->f32zero;
959 args[2] = ctx->f32zero;
960 args[3] = ctx->f32one;
961 ctx->v4f32empty = LLVMConstVector(args, 4);
962
963 ctx->uniform_md_kind =
964 LLVMGetMDKindIDInContext(ctx->context, "amdgpu.uniform", 14);
965 ctx->empty_md = LLVMMDNodeInContext(ctx->context, NULL, 0);
966
967 args[0] = LLVMConstReal(ctx->f32, 2.5);
968 }
969
970 static int get_llvm_num_components(LLVMValueRef value)
971 {
972 LLVMTypeRef type = LLVMTypeOf(value);
973 unsigned num_components = LLVMGetTypeKind(type) == LLVMVectorTypeKind
974 ? LLVMGetVectorSize(type)
975 : 1;
976 return num_components;
977 }
978
979 static LLVMValueRef llvm_extract_elem(struct nir_to_llvm_context *ctx,
980 LLVMValueRef value,
981 int index)
982 {
983 int count = get_llvm_num_components(value);
984
985 assert(index < count);
986 if (count == 1)
987 return value;
988
989 return LLVMBuildExtractElement(ctx->builder, value,
990 LLVMConstInt(ctx->i32, index, false), "");
991 }
992
993 static LLVMValueRef trim_vector(struct nir_to_llvm_context *ctx,
994 LLVMValueRef value, unsigned count)
995 {
996 unsigned num_components = get_llvm_num_components(value);
997 if (count == num_components)
998 return value;
999
1000 LLVMValueRef masks[] = {
1001 LLVMConstInt(ctx->i32, 0, false), LLVMConstInt(ctx->i32, 1, false),
1002 LLVMConstInt(ctx->i32, 2, false), LLVMConstInt(ctx->i32, 3, false)};
1003
1004 if (count == 1)
1005 return LLVMBuildExtractElement(ctx->builder, value, masks[0],
1006 "");
1007
1008 LLVMValueRef swizzle = LLVMConstVector(masks, count);
1009 return LLVMBuildShuffleVector(ctx->builder, value, value, swizzle, "");
1010 }
1011
1012 static void
1013 build_store_values_extended(struct nir_to_llvm_context *ctx,
1014 LLVMValueRef *values,
1015 unsigned value_count,
1016 unsigned value_stride,
1017 LLVMValueRef vec)
1018 {
1019 LLVMBuilderRef builder = ctx->builder;
1020 unsigned i;
1021
1022 if (value_count == 1) {
1023 LLVMBuildStore(builder, vec, values[0]);
1024 return;
1025 }
1026
1027 for (i = 0; i < value_count; i++) {
1028 LLVMValueRef ptr = values[i * value_stride];
1029 LLVMValueRef index = LLVMConstInt(ctx->i32, i, false);
1030 LLVMValueRef value = LLVMBuildExtractElement(builder, vec, index, "");
1031 LLVMBuildStore(builder, value, ptr);
1032 }
1033 }
1034
1035 static LLVMTypeRef get_def_type(struct nir_to_llvm_context *ctx,
1036 const nir_ssa_def *def)
1037 {
1038 LLVMTypeRef type = LLVMIntTypeInContext(ctx->context, def->bit_size);
1039 if (def->num_components > 1) {
1040 type = LLVMVectorType(type, def->num_components);
1041 }
1042 return type;
1043 }
1044
1045 static LLVMValueRef get_src(struct nir_to_llvm_context *ctx, nir_src src)
1046 {
1047 assert(src.is_ssa);
1048 struct hash_entry *entry = _mesa_hash_table_search(ctx->defs, src.ssa);
1049 return (LLVMValueRef)entry->data;
1050 }
1051
1052
1053 static LLVMBasicBlockRef get_block(struct nir_to_llvm_context *ctx,
1054 const struct nir_block *b)
1055 {
1056 struct hash_entry *entry = _mesa_hash_table_search(ctx->defs, b);
1057 return (LLVMBasicBlockRef)entry->data;
1058 }
1059
1060 static LLVMValueRef get_alu_src(struct nir_to_llvm_context *ctx,
1061 nir_alu_src src,
1062 unsigned num_components)
1063 {
1064 LLVMValueRef value = get_src(ctx, src.src);
1065 bool need_swizzle = false;
1066
1067 assert(value);
1068 LLVMTypeRef type = LLVMTypeOf(value);
1069 unsigned src_components = LLVMGetTypeKind(type) == LLVMVectorTypeKind
1070 ? LLVMGetVectorSize(type)
1071 : 1;
1072
1073 for (unsigned i = 0; i < num_components; ++i) {
1074 assert(src.swizzle[i] < src_components);
1075 if (src.swizzle[i] != i)
1076 need_swizzle = true;
1077 }
1078
1079 if (need_swizzle || num_components != src_components) {
1080 LLVMValueRef masks[] = {
1081 LLVMConstInt(ctx->i32, src.swizzle[0], false),
1082 LLVMConstInt(ctx->i32, src.swizzle[1], false),
1083 LLVMConstInt(ctx->i32, src.swizzle[2], false),
1084 LLVMConstInt(ctx->i32, src.swizzle[3], false)};
1085
1086 if (src_components > 1 && num_components == 1) {
1087 value = LLVMBuildExtractElement(ctx->builder, value,
1088 masks[0], "");
1089 } else if (src_components == 1 && num_components > 1) {
1090 LLVMValueRef values[] = {value, value, value, value};
1091 value = ac_build_gather_values(&ctx->ac, values, num_components);
1092 } else {
1093 LLVMValueRef swizzle = LLVMConstVector(masks, num_components);
1094 value = LLVMBuildShuffleVector(ctx->builder, value, value,
1095 swizzle, "");
1096 }
1097 }
1098 assert(!src.negate);
1099 assert(!src.abs);
1100 return value;
1101 }
1102
1103 static LLVMValueRef emit_int_cmp(struct ac_llvm_context *ctx,
1104 LLVMIntPredicate pred, LLVMValueRef src0,
1105 LLVMValueRef src1)
1106 {
1107 LLVMValueRef result = LLVMBuildICmp(ctx->builder, pred, src0, src1, "");
1108 return LLVMBuildSelect(ctx->builder, result,
1109 LLVMConstInt(ctx->i32, 0xFFFFFFFF, false),
1110 LLVMConstInt(ctx->i32, 0, false), "");
1111 }
1112
1113 static LLVMValueRef emit_float_cmp(struct ac_llvm_context *ctx,
1114 LLVMRealPredicate pred, LLVMValueRef src0,
1115 LLVMValueRef src1)
1116 {
1117 LLVMValueRef result;
1118 src0 = to_float(ctx, src0);
1119 src1 = to_float(ctx, src1);
1120 result = LLVMBuildFCmp(ctx->builder, pred, src0, src1, "");
1121 return LLVMBuildSelect(ctx->builder, result,
1122 LLVMConstInt(ctx->i32, 0xFFFFFFFF, false),
1123 LLVMConstInt(ctx->i32, 0, false), "");
1124 }
1125
1126 static LLVMValueRef emit_intrin_1f_param(struct ac_llvm_context *ctx,
1127 const char *intrin,
1128 LLVMTypeRef result_type,
1129 LLVMValueRef src0)
1130 {
1131 char name[64];
1132 LLVMValueRef params[] = {
1133 to_float(ctx, src0),
1134 };
1135
1136 MAYBE_UNUSED const int length = snprintf(name, sizeof(name), "%s.f%d", intrin,
1137 get_elem_bits(ctx, result_type));
1138 assert(length < sizeof(name));
1139 return ac_build_intrinsic(ctx, name, result_type, params, 1, AC_FUNC_ATTR_READNONE);
1140 }
1141
1142 static LLVMValueRef emit_intrin_2f_param(struct ac_llvm_context *ctx,
1143 const char *intrin,
1144 LLVMTypeRef result_type,
1145 LLVMValueRef src0, LLVMValueRef src1)
1146 {
1147 char name[64];
1148 LLVMValueRef params[] = {
1149 to_float(ctx, src0),
1150 to_float(ctx, src1),
1151 };
1152
1153 MAYBE_UNUSED const int length = snprintf(name, sizeof(name), "%s.f%d", intrin,
1154 get_elem_bits(ctx, result_type));
1155 assert(length < sizeof(name));
1156 return ac_build_intrinsic(ctx, name, result_type, params, 2, AC_FUNC_ATTR_READNONE);
1157 }
1158
1159 static LLVMValueRef emit_intrin_3f_param(struct ac_llvm_context *ctx,
1160 const char *intrin,
1161 LLVMTypeRef result_type,
1162 LLVMValueRef src0, LLVMValueRef src1, LLVMValueRef src2)
1163 {
1164 char name[64];
1165 LLVMValueRef params[] = {
1166 to_float(ctx, src0),
1167 to_float(ctx, src1),
1168 to_float(ctx, src2),
1169 };
1170
1171 MAYBE_UNUSED const int length = snprintf(name, sizeof(name), "%s.f%d", intrin,
1172 get_elem_bits(ctx, result_type));
1173 assert(length < sizeof(name));
1174 return ac_build_intrinsic(ctx, name, result_type, params, 3, AC_FUNC_ATTR_READNONE);
1175 }
1176
1177 static LLVMValueRef emit_bcsel(struct ac_llvm_context *ctx,
1178 LLVMValueRef src0, LLVMValueRef src1, LLVMValueRef src2)
1179 {
1180 LLVMValueRef v = LLVMBuildICmp(ctx->builder, LLVMIntNE, src0,
1181 ctx->i32_0, "");
1182 return LLVMBuildSelect(ctx->builder, v, src1, src2, "");
1183 }
1184
1185 static LLVMValueRef emit_find_lsb(struct ac_llvm_context *ctx,
1186 LLVMValueRef src0)
1187 {
1188 LLVMValueRef params[2] = {
1189 src0,
1190
1191 /* The value of 1 means that ffs(x=0) = undef, so LLVM won't
1192 * add special code to check for x=0. The reason is that
1193 * the LLVM behavior for x=0 is different from what we
1194 * need here.
1195 *
1196 * The hardware already implements the correct behavior.
1197 */
1198 LLVMConstInt(ctx->i1, 1, false),
1199 };
1200 return ac_build_intrinsic(ctx, "llvm.cttz.i32", ctx->i32, params, 2, AC_FUNC_ATTR_READNONE);
1201 }
1202
1203 static LLVMValueRef emit_ifind_msb(struct ac_llvm_context *ctx,
1204 LLVMValueRef src0)
1205 {
1206 return ac_build_imsb(ctx, src0, ctx->i32);
1207 }
1208
1209 static LLVMValueRef emit_ufind_msb(struct ac_llvm_context *ctx,
1210 LLVMValueRef src0)
1211 {
1212 return ac_build_umsb(ctx, src0, ctx->i32);
1213 }
1214
1215 static LLVMValueRef emit_minmax_int(struct ac_llvm_context *ctx,
1216 LLVMIntPredicate pred,
1217 LLVMValueRef src0, LLVMValueRef src1)
1218 {
1219 return LLVMBuildSelect(ctx->builder,
1220 LLVMBuildICmp(ctx->builder, pred, src0, src1, ""),
1221 src0,
1222 src1, "");
1223
1224 }
1225 static LLVMValueRef emit_iabs(struct ac_llvm_context *ctx,
1226 LLVMValueRef src0)
1227 {
1228 return emit_minmax_int(ctx, LLVMIntSGT, src0,
1229 LLVMBuildNeg(ctx->builder, src0, ""));
1230 }
1231
1232 static LLVMValueRef emit_fsign(struct ac_llvm_context *ctx,
1233 LLVMValueRef src0)
1234 {
1235 LLVMValueRef cmp, val;
1236
1237 cmp = LLVMBuildFCmp(ctx->builder, LLVMRealOGT, src0, ctx->f32_0, "");
1238 val = LLVMBuildSelect(ctx->builder, cmp, ctx->f32_1, src0, "");
1239 cmp = LLVMBuildFCmp(ctx->builder, LLVMRealOGE, val, ctx->f32_0, "");
1240 val = LLVMBuildSelect(ctx->builder, cmp, val, LLVMConstReal(ctx->f32, -1.0), "");
1241 return val;
1242 }
1243
1244 static LLVMValueRef emit_isign(struct ac_llvm_context *ctx,
1245 LLVMValueRef src0)
1246 {
1247 LLVMValueRef cmp, val;
1248
1249 cmp = LLVMBuildICmp(ctx->builder, LLVMIntSGT, src0, ctx->i32_0, "");
1250 val = LLVMBuildSelect(ctx->builder, cmp, ctx->i32_1, src0, "");
1251 cmp = LLVMBuildICmp(ctx->builder, LLVMIntSGE, val, ctx->i32_0, "");
1252 val = LLVMBuildSelect(ctx->builder, cmp, val, LLVMConstInt(ctx->i32, -1, true), "");
1253 return val;
1254 }
1255
1256 static LLVMValueRef emit_ffract(struct ac_llvm_context *ctx,
1257 LLVMValueRef src0)
1258 {
1259 const char *intr = "llvm.floor.f32";
1260 LLVMValueRef fsrc0 = to_float(ctx, src0);
1261 LLVMValueRef params[] = {
1262 fsrc0,
1263 };
1264 LLVMValueRef floor = ac_build_intrinsic(ctx, intr,
1265 ctx->f32, params, 1,
1266 AC_FUNC_ATTR_READNONE);
1267 return LLVMBuildFSub(ctx->builder, fsrc0, floor, "");
1268 }
1269
1270 static LLVMValueRef emit_uint_carry(struct ac_llvm_context *ctx,
1271 const char *intrin,
1272 LLVMValueRef src0, LLVMValueRef src1)
1273 {
1274 LLVMTypeRef ret_type;
1275 LLVMTypeRef types[] = { ctx->i32, ctx->i1 };
1276 LLVMValueRef res;
1277 LLVMValueRef params[] = { src0, src1 };
1278 ret_type = LLVMStructTypeInContext(ctx->context, types,
1279 2, true);
1280
1281 res = ac_build_intrinsic(ctx, intrin, ret_type,
1282 params, 2, AC_FUNC_ATTR_READNONE);
1283
1284 res = LLVMBuildExtractValue(ctx->builder, res, 1, "");
1285 res = LLVMBuildZExt(ctx->builder, res, ctx->i32, "");
1286 return res;
1287 }
1288
1289 static LLVMValueRef emit_b2f(struct ac_llvm_context *ctx,
1290 LLVMValueRef src0)
1291 {
1292 return LLVMBuildAnd(ctx->builder, src0, LLVMBuildBitCast(ctx->builder, LLVMConstReal(ctx->f32, 1.0), ctx->i32, ""), "");
1293 }
1294
1295 static LLVMValueRef emit_f2b(struct ac_llvm_context *ctx,
1296 LLVMValueRef src0)
1297 {
1298 src0 = to_float(ctx, src0);
1299 return LLVMBuildSExt(ctx->builder,
1300 LLVMBuildFCmp(ctx->builder, LLVMRealUNE, src0, ctx->f32_0, ""),
1301 ctx->i32, "");
1302 }
1303
1304 static LLVMValueRef emit_b2i(struct ac_llvm_context *ctx,
1305 LLVMValueRef src0)
1306 {
1307 return LLVMBuildAnd(ctx->builder, src0, ctx->i32_1, "");
1308 }
1309
1310 static LLVMValueRef emit_i2b(struct ac_llvm_context *ctx,
1311 LLVMValueRef src0)
1312 {
1313 return LLVMBuildSExt(ctx->builder,
1314 LLVMBuildICmp(ctx->builder, LLVMIntNE, src0, ctx->i32_0, ""),
1315 ctx->i32, "");
1316 }
1317
1318 static LLVMValueRef emit_f2f16(struct nir_to_llvm_context *ctx,
1319 LLVMValueRef src0)
1320 {
1321 LLVMValueRef result;
1322 LLVMValueRef cond;
1323
1324 src0 = to_float(&ctx->ac, src0);
1325 result = LLVMBuildFPTrunc(ctx->builder, src0, ctx->f16, "");
1326
1327 /* TODO SI/CIK options here */
1328 if (ctx->options->chip_class >= VI) {
1329 LLVMValueRef args[2];
1330 /* Check if the result is a denormal - and flush to 0 if so. */
1331 args[0] = result;
1332 args[1] = LLVMConstInt(ctx->i32, N_SUBNORMAL | P_SUBNORMAL, false);
1333 cond = ac_build_intrinsic(&ctx->ac, "llvm.amdgcn.class.f16", ctx->i1, args, 2, AC_FUNC_ATTR_READNONE);
1334 }
1335
1336 /* need to convert back up to f32 */
1337 result = LLVMBuildFPExt(ctx->builder, result, ctx->f32, "");
1338
1339 if (ctx->options->chip_class >= VI)
1340 result = LLVMBuildSelect(ctx->builder, cond, ctx->f32zero, result, "");
1341
1342 return result;
1343 }
1344
1345 static LLVMValueRef emit_umul_high(struct ac_llvm_context *ctx,
1346 LLVMValueRef src0, LLVMValueRef src1)
1347 {
1348 LLVMValueRef dst64, result;
1349 src0 = LLVMBuildZExt(ctx->builder, src0, ctx->i64, "");
1350 src1 = LLVMBuildZExt(ctx->builder, src1, ctx->i64, "");
1351
1352 dst64 = LLVMBuildMul(ctx->builder, src0, src1, "");
1353 dst64 = LLVMBuildLShr(ctx->builder, dst64, LLVMConstInt(ctx->i64, 32, false), "");
1354 result = LLVMBuildTrunc(ctx->builder, dst64, ctx->i32, "");
1355 return result;
1356 }
1357
1358 static LLVMValueRef emit_imul_high(struct ac_llvm_context *ctx,
1359 LLVMValueRef src0, LLVMValueRef src1)
1360 {
1361 LLVMValueRef dst64, result;
1362 src0 = LLVMBuildSExt(ctx->builder, src0, ctx->i64, "");
1363 src1 = LLVMBuildSExt(ctx->builder, src1, ctx->i64, "");
1364
1365 dst64 = LLVMBuildMul(ctx->builder, src0, src1, "");
1366 dst64 = LLVMBuildAShr(ctx->builder, dst64, LLVMConstInt(ctx->i64, 32, false), "");
1367 result = LLVMBuildTrunc(ctx->builder, dst64, ctx->i32, "");
1368 return result;
1369 }
1370
1371 static LLVMValueRef emit_bitfield_extract(struct ac_llvm_context *ctx,
1372 bool is_signed,
1373 const LLVMValueRef srcs[3])
1374 {
1375 LLVMValueRef result;
1376 LLVMValueRef icond = LLVMBuildICmp(ctx->builder, LLVMIntEQ, srcs[2], LLVMConstInt(ctx->i32, 32, false), "");
1377
1378 result = ac_build_bfe(ctx, srcs[0], srcs[1], srcs[2], is_signed);
1379 result = LLVMBuildSelect(ctx->builder, icond, srcs[0], result, "");
1380 return result;
1381 }
1382
1383 static LLVMValueRef emit_bitfield_insert(struct ac_llvm_context *ctx,
1384 LLVMValueRef src0, LLVMValueRef src1,
1385 LLVMValueRef src2, LLVMValueRef src3)
1386 {
1387 LLVMValueRef bfi_args[3], result;
1388
1389 bfi_args[0] = LLVMBuildShl(ctx->builder,
1390 LLVMBuildSub(ctx->builder,
1391 LLVMBuildShl(ctx->builder,
1392 ctx->i32_1,
1393 src3, ""),
1394 ctx->i32_1, ""),
1395 src2, "");
1396 bfi_args[1] = LLVMBuildShl(ctx->builder, src1, src2, "");
1397 bfi_args[2] = src0;
1398
1399 LLVMValueRef icond = LLVMBuildICmp(ctx->builder, LLVMIntEQ, src3, LLVMConstInt(ctx->i32, 32, false), "");
1400
1401 /* Calculate:
1402 * (arg0 & arg1) | (~arg0 & arg2) = arg2 ^ (arg0 & (arg1 ^ arg2)
1403 * Use the right-hand side, which the LLVM backend can convert to V_BFI.
1404 */
1405 result = LLVMBuildXor(ctx->builder, bfi_args[2],
1406 LLVMBuildAnd(ctx->builder, bfi_args[0],
1407 LLVMBuildXor(ctx->builder, bfi_args[1], bfi_args[2], ""), ""), "");
1408
1409 result = LLVMBuildSelect(ctx->builder, icond, src1, result, "");
1410 return result;
1411 }
1412
1413 static LLVMValueRef emit_pack_half_2x16(struct ac_llvm_context *ctx,
1414 LLVMValueRef src0)
1415 {
1416 LLVMValueRef const16 = LLVMConstInt(ctx->i32, 16, false);
1417 int i;
1418 LLVMValueRef comp[2];
1419
1420 src0 = to_float(ctx, src0);
1421 comp[0] = LLVMBuildExtractElement(ctx->builder, src0, ctx->i32_0, "");
1422 comp[1] = LLVMBuildExtractElement(ctx->builder, src0, ctx->i32_1, "");
1423 for (i = 0; i < 2; i++) {
1424 comp[i] = LLVMBuildFPTrunc(ctx->builder, comp[i], ctx->f16, "");
1425 comp[i] = LLVMBuildBitCast(ctx->builder, comp[i], ctx->i16, "");
1426 comp[i] = LLVMBuildZExt(ctx->builder, comp[i], ctx->i32, "");
1427 }
1428
1429 comp[1] = LLVMBuildShl(ctx->builder, comp[1], const16, "");
1430 comp[0] = LLVMBuildOr(ctx->builder, comp[0], comp[1], "");
1431
1432 return comp[0];
1433 }
1434
1435 static LLVMValueRef emit_unpack_half_2x16(struct ac_llvm_context *ctx,
1436 LLVMValueRef src0)
1437 {
1438 LLVMValueRef const16 = LLVMConstInt(ctx->i32, 16, false);
1439 LLVMValueRef temps[2], result, val;
1440 int i;
1441
1442 for (i = 0; i < 2; i++) {
1443 val = i == 1 ? LLVMBuildLShr(ctx->builder, src0, const16, "") : src0;
1444 val = LLVMBuildTrunc(ctx->builder, val, ctx->i16, "");
1445 val = LLVMBuildBitCast(ctx->builder, val, ctx->f16, "");
1446 temps[i] = LLVMBuildFPExt(ctx->builder, val, ctx->f32, "");
1447 }
1448
1449 LLVMTypeRef v2f32 = LLVMVectorType(ctx->f32, 2);
1450 result = LLVMBuildInsertElement(ctx->builder, LLVMGetUndef(v2f32), temps[0],
1451 ctx->i32_0, "");
1452 result = LLVMBuildInsertElement(ctx->builder, result, temps[1],
1453 ctx->i32_1, "");
1454 return result;
1455 }
1456
1457 static LLVMValueRef emit_ddxy(struct nir_to_llvm_context *ctx,
1458 nir_op op,
1459 LLVMValueRef src0)
1460 {
1461 unsigned mask;
1462 int idx;
1463 LLVMValueRef result;
1464
1465 if (!ctx->lds && !ctx->has_ds_bpermute)
1466 ctx->lds = LLVMAddGlobalInAddressSpace(ctx->module,
1467 LLVMArrayType(ctx->i32, 64),
1468 "ddxy_lds", LOCAL_ADDR_SPACE);
1469
1470 if (op == nir_op_fddx_fine || op == nir_op_fddx)
1471 mask = AC_TID_MASK_LEFT;
1472 else if (op == nir_op_fddy_fine || op == nir_op_fddy)
1473 mask = AC_TID_MASK_TOP;
1474 else
1475 mask = AC_TID_MASK_TOP_LEFT;
1476
1477 /* for DDX we want to next X pixel, DDY next Y pixel. */
1478 if (op == nir_op_fddx_fine ||
1479 op == nir_op_fddx_coarse ||
1480 op == nir_op_fddx)
1481 idx = 1;
1482 else
1483 idx = 2;
1484
1485 result = ac_build_ddxy(&ctx->ac, ctx->has_ds_bpermute,
1486 mask, idx, ctx->lds,
1487 src0);
1488 return result;
1489 }
1490
1491 /*
1492 * this takes an I,J coordinate pair,
1493 * and works out the X and Y derivatives.
1494 * it returns DDX(I), DDX(J), DDY(I), DDY(J).
1495 */
1496 static LLVMValueRef emit_ddxy_interp(
1497 struct nir_to_llvm_context *ctx,
1498 LLVMValueRef interp_ij)
1499 {
1500 LLVMValueRef result[4], a;
1501 unsigned i;
1502
1503 for (i = 0; i < 2; i++) {
1504 a = LLVMBuildExtractElement(ctx->builder, interp_ij,
1505 LLVMConstInt(ctx->i32, i, false), "");
1506 result[i] = emit_ddxy(ctx, nir_op_fddx, a);
1507 result[2+i] = emit_ddxy(ctx, nir_op_fddy, a);
1508 }
1509 return ac_build_gather_values(&ctx->ac, result, 4);
1510 }
1511
1512 static void visit_alu(struct nir_to_llvm_context *ctx, const nir_alu_instr *instr)
1513 {
1514 LLVMValueRef src[4], result = NULL;
1515 unsigned num_components = instr->dest.dest.ssa.num_components;
1516 unsigned src_components;
1517 LLVMTypeRef def_type = get_def_type(ctx, &instr->dest.dest.ssa);
1518
1519 assert(nir_op_infos[instr->op].num_inputs <= ARRAY_SIZE(src));
1520 switch (instr->op) {
1521 case nir_op_vec2:
1522 case nir_op_vec3:
1523 case nir_op_vec4:
1524 src_components = 1;
1525 break;
1526 case nir_op_pack_half_2x16:
1527 src_components = 2;
1528 break;
1529 case nir_op_unpack_half_2x16:
1530 src_components = 1;
1531 break;
1532 default:
1533 src_components = num_components;
1534 break;
1535 }
1536 for (unsigned i = 0; i < nir_op_infos[instr->op].num_inputs; i++)
1537 src[i] = get_alu_src(ctx, instr->src[i], src_components);
1538
1539 switch (instr->op) {
1540 case nir_op_fmov:
1541 case nir_op_imov:
1542 result = src[0];
1543 break;
1544 case nir_op_fneg:
1545 src[0] = to_float(&ctx->ac, src[0]);
1546 result = LLVMBuildFNeg(ctx->builder, src[0], "");
1547 break;
1548 case nir_op_ineg:
1549 result = LLVMBuildNeg(ctx->builder, src[0], "");
1550 break;
1551 case nir_op_inot:
1552 result = LLVMBuildNot(ctx->builder, src[0], "");
1553 break;
1554 case nir_op_iadd:
1555 result = LLVMBuildAdd(ctx->builder, src[0], src[1], "");
1556 break;
1557 case nir_op_fadd:
1558 src[0] = to_float(&ctx->ac, src[0]);
1559 src[1] = to_float(&ctx->ac, src[1]);
1560 result = LLVMBuildFAdd(ctx->builder, src[0], src[1], "");
1561 break;
1562 case nir_op_fsub:
1563 src[0] = to_float(&ctx->ac, src[0]);
1564 src[1] = to_float(&ctx->ac, src[1]);
1565 result = LLVMBuildFSub(ctx->builder, src[0], src[1], "");
1566 break;
1567 case nir_op_isub:
1568 result = LLVMBuildSub(ctx->builder, src[0], src[1], "");
1569 break;
1570 case nir_op_imul:
1571 result = LLVMBuildMul(ctx->builder, src[0], src[1], "");
1572 break;
1573 case nir_op_imod:
1574 result = LLVMBuildSRem(ctx->builder, src[0], src[1], "");
1575 break;
1576 case nir_op_umod:
1577 result = LLVMBuildURem(ctx->builder, src[0], src[1], "");
1578 break;
1579 case nir_op_fmod:
1580 src[0] = to_float(&ctx->ac, src[0]);
1581 src[1] = to_float(&ctx->ac, src[1]);
1582 result = ac_build_fdiv(&ctx->ac, src[0], src[1]);
1583 result = emit_intrin_1f_param(&ctx->ac, "llvm.floor",
1584 to_float_type(&ctx->ac, def_type), result);
1585 result = LLVMBuildFMul(ctx->builder, src[1] , result, "");
1586 result = LLVMBuildFSub(ctx->builder, src[0], result, "");
1587 break;
1588 case nir_op_frem:
1589 src[0] = to_float(&ctx->ac, src[0]);
1590 src[1] = to_float(&ctx->ac, src[1]);
1591 result = LLVMBuildFRem(ctx->builder, src[0], src[1], "");
1592 break;
1593 case nir_op_irem:
1594 result = LLVMBuildSRem(ctx->builder, src[0], src[1], "");
1595 break;
1596 case nir_op_idiv:
1597 result = LLVMBuildSDiv(ctx->builder, src[0], src[1], "");
1598 break;
1599 case nir_op_udiv:
1600 result = LLVMBuildUDiv(ctx->builder, src[0], src[1], "");
1601 break;
1602 case nir_op_fmul:
1603 src[0] = to_float(&ctx->ac, src[0]);
1604 src[1] = to_float(&ctx->ac, src[1]);
1605 result = LLVMBuildFMul(ctx->builder, src[0], src[1], "");
1606 break;
1607 case nir_op_fdiv:
1608 src[0] = to_float(&ctx->ac, src[0]);
1609 src[1] = to_float(&ctx->ac, src[1]);
1610 result = ac_build_fdiv(&ctx->ac, src[0], src[1]);
1611 break;
1612 case nir_op_frcp:
1613 src[0] = to_float(&ctx->ac, src[0]);
1614 result = ac_build_fdiv(&ctx->ac, ctx->f32one, src[0]);
1615 break;
1616 case nir_op_iand:
1617 result = LLVMBuildAnd(ctx->builder, src[0], src[1], "");
1618 break;
1619 case nir_op_ior:
1620 result = LLVMBuildOr(ctx->builder, src[0], src[1], "");
1621 break;
1622 case nir_op_ixor:
1623 result = LLVMBuildXor(ctx->builder, src[0], src[1], "");
1624 break;
1625 case nir_op_ishl:
1626 result = LLVMBuildShl(ctx->builder, src[0], src[1], "");
1627 break;
1628 case nir_op_ishr:
1629 result = LLVMBuildAShr(ctx->builder, src[0], src[1], "");
1630 break;
1631 case nir_op_ushr:
1632 result = LLVMBuildLShr(ctx->builder, src[0], src[1], "");
1633 break;
1634 case nir_op_ilt:
1635 result = emit_int_cmp(&ctx->ac, LLVMIntSLT, src[0], src[1]);
1636 break;
1637 case nir_op_ine:
1638 result = emit_int_cmp(&ctx->ac, LLVMIntNE, src[0], src[1]);
1639 break;
1640 case nir_op_ieq:
1641 result = emit_int_cmp(&ctx->ac, LLVMIntEQ, src[0], src[1]);
1642 break;
1643 case nir_op_ige:
1644 result = emit_int_cmp(&ctx->ac, LLVMIntSGE, src[0], src[1]);
1645 break;
1646 case nir_op_ult:
1647 result = emit_int_cmp(&ctx->ac, LLVMIntULT, src[0], src[1]);
1648 break;
1649 case nir_op_uge:
1650 result = emit_int_cmp(&ctx->ac, LLVMIntUGE, src[0], src[1]);
1651 break;
1652 case nir_op_feq:
1653 result = emit_float_cmp(&ctx->ac, LLVMRealUEQ, src[0], src[1]);
1654 break;
1655 case nir_op_fne:
1656 result = emit_float_cmp(&ctx->ac, LLVMRealUNE, src[0], src[1]);
1657 break;
1658 case nir_op_flt:
1659 result = emit_float_cmp(&ctx->ac, LLVMRealULT, src[0], src[1]);
1660 break;
1661 case nir_op_fge:
1662 result = emit_float_cmp(&ctx->ac, LLVMRealUGE, src[0], src[1]);
1663 break;
1664 case nir_op_fabs:
1665 result = emit_intrin_1f_param(&ctx->ac, "llvm.fabs",
1666 to_float_type(&ctx->ac, def_type), src[0]);
1667 break;
1668 case nir_op_iabs:
1669 result = emit_iabs(&ctx->ac, src[0]);
1670 break;
1671 case nir_op_imax:
1672 result = emit_minmax_int(&ctx->ac, LLVMIntSGT, src[0], src[1]);
1673 break;
1674 case nir_op_imin:
1675 result = emit_minmax_int(&ctx->ac, LLVMIntSLT, src[0], src[1]);
1676 break;
1677 case nir_op_umax:
1678 result = emit_minmax_int(&ctx->ac, LLVMIntUGT, src[0], src[1]);
1679 break;
1680 case nir_op_umin:
1681 result = emit_minmax_int(&ctx->ac, LLVMIntULT, src[0], src[1]);
1682 break;
1683 case nir_op_isign:
1684 result = emit_isign(&ctx->ac, src[0]);
1685 break;
1686 case nir_op_fsign:
1687 src[0] = to_float(&ctx->ac, src[0]);
1688 result = emit_fsign(&ctx->ac, src[0]);
1689 break;
1690 case nir_op_ffloor:
1691 result = emit_intrin_1f_param(&ctx->ac, "llvm.floor",
1692 to_float_type(&ctx->ac, def_type), src[0]);
1693 break;
1694 case nir_op_ftrunc:
1695 result = emit_intrin_1f_param(&ctx->ac, "llvm.trunc",
1696 to_float_type(&ctx->ac, def_type), src[0]);
1697 break;
1698 case nir_op_fceil:
1699 result = emit_intrin_1f_param(&ctx->ac, "llvm.ceil",
1700 to_float_type(&ctx->ac, def_type), src[0]);
1701 break;
1702 case nir_op_fround_even:
1703 result = emit_intrin_1f_param(&ctx->ac, "llvm.rint",
1704 to_float_type(&ctx->ac, def_type),src[0]);
1705 break;
1706 case nir_op_ffract:
1707 result = emit_ffract(&ctx->ac, src[0]);
1708 break;
1709 case nir_op_fsin:
1710 result = emit_intrin_1f_param(&ctx->ac, "llvm.sin",
1711 to_float_type(&ctx->ac, def_type), src[0]);
1712 break;
1713 case nir_op_fcos:
1714 result = emit_intrin_1f_param(&ctx->ac, "llvm.cos",
1715 to_float_type(&ctx->ac, def_type), src[0]);
1716 break;
1717 case nir_op_fsqrt:
1718 result = emit_intrin_1f_param(&ctx->ac, "llvm.sqrt",
1719 to_float_type(&ctx->ac, def_type), src[0]);
1720 break;
1721 case nir_op_fexp2:
1722 result = emit_intrin_1f_param(&ctx->ac, "llvm.exp2",
1723 to_float_type(&ctx->ac, def_type), src[0]);
1724 break;
1725 case nir_op_flog2:
1726 result = emit_intrin_1f_param(&ctx->ac, "llvm.log2",
1727 to_float_type(&ctx->ac, def_type), src[0]);
1728 break;
1729 case nir_op_frsq:
1730 result = emit_intrin_1f_param(&ctx->ac, "llvm.sqrt",
1731 to_float_type(&ctx->ac, def_type), src[0]);
1732 result = ac_build_fdiv(&ctx->ac, ctx->f32one, result);
1733 break;
1734 case nir_op_fpow:
1735 result = emit_intrin_2f_param(&ctx->ac, "llvm.pow",
1736 to_float_type(&ctx->ac, def_type), src[0], src[1]);
1737 break;
1738 case nir_op_fmax:
1739 result = emit_intrin_2f_param(&ctx->ac, "llvm.maxnum",
1740 to_float_type(&ctx->ac, def_type), src[0], src[1]);
1741 if (instr->dest.dest.ssa.bit_size == 32)
1742 result = emit_intrin_1f_param(&ctx->ac, "llvm.canonicalize",
1743 to_float_type(&ctx->ac, def_type),
1744 result);
1745 break;
1746 case nir_op_fmin:
1747 result = emit_intrin_2f_param(&ctx->ac, "llvm.minnum",
1748 to_float_type(&ctx->ac, def_type), src[0], src[1]);
1749 if (instr->dest.dest.ssa.bit_size == 32)
1750 result = emit_intrin_1f_param(&ctx->ac, "llvm.canonicalize",
1751 to_float_type(&ctx->ac, def_type),
1752 result);
1753 break;
1754 case nir_op_ffma:
1755 result = emit_intrin_3f_param(&ctx->ac, "llvm.fma",
1756 to_float_type(&ctx->ac, def_type), src[0], src[1], src[2]);
1757 break;
1758 case nir_op_ibitfield_extract:
1759 result = emit_bitfield_extract(&ctx->ac, true, src);
1760 break;
1761 case nir_op_ubitfield_extract:
1762 result = emit_bitfield_extract(&ctx->ac, false, src);
1763 break;
1764 case nir_op_bitfield_insert:
1765 result = emit_bitfield_insert(&ctx->ac, src[0], src[1], src[2], src[3]);
1766 break;
1767 case nir_op_bitfield_reverse:
1768 result = ac_build_intrinsic(&ctx->ac, "llvm.bitreverse.i32", ctx->i32, src, 1, AC_FUNC_ATTR_READNONE);
1769 break;
1770 case nir_op_bit_count:
1771 result = ac_build_intrinsic(&ctx->ac, "llvm.ctpop.i32", ctx->i32, src, 1, AC_FUNC_ATTR_READNONE);
1772 break;
1773 case nir_op_vec2:
1774 case nir_op_vec3:
1775 case nir_op_vec4:
1776 for (unsigned i = 0; i < nir_op_infos[instr->op].num_inputs; i++)
1777 src[i] = to_integer(&ctx->ac, src[i]);
1778 result = ac_build_gather_values(&ctx->ac, src, num_components);
1779 break;
1780 case nir_op_f2i32:
1781 case nir_op_f2i64:
1782 src[0] = to_float(&ctx->ac, src[0]);
1783 result = LLVMBuildFPToSI(ctx->builder, src[0], def_type, "");
1784 break;
1785 case nir_op_f2u32:
1786 case nir_op_f2u64:
1787 src[0] = to_float(&ctx->ac, src[0]);
1788 result = LLVMBuildFPToUI(ctx->builder, src[0], def_type, "");
1789 break;
1790 case nir_op_i2f32:
1791 case nir_op_i2f64:
1792 result = LLVMBuildSIToFP(ctx->builder, src[0], to_float_type(&ctx->ac, def_type), "");
1793 break;
1794 case nir_op_u2f32:
1795 case nir_op_u2f64:
1796 result = LLVMBuildUIToFP(ctx->builder, src[0], to_float_type(&ctx->ac, def_type), "");
1797 break;
1798 case nir_op_f2f64:
1799 result = LLVMBuildFPExt(ctx->builder, src[0], to_float_type(&ctx->ac, def_type), "");
1800 break;
1801 case nir_op_f2f32:
1802 result = LLVMBuildFPTrunc(ctx->builder, src[0], to_float_type(&ctx->ac, def_type), "");
1803 break;
1804 case nir_op_u2u32:
1805 case nir_op_u2u64:
1806 if (get_elem_bits(&ctx->ac, LLVMTypeOf(src[0])) < get_elem_bits(&ctx->ac, def_type))
1807 result = LLVMBuildZExt(ctx->builder, src[0], def_type, "");
1808 else
1809 result = LLVMBuildTrunc(ctx->builder, src[0], def_type, "");
1810 break;
1811 case nir_op_i2i32:
1812 case nir_op_i2i64:
1813 if (get_elem_bits(&ctx->ac, LLVMTypeOf(src[0])) < get_elem_bits(&ctx->ac, def_type))
1814 result = LLVMBuildSExt(ctx->builder, src[0], def_type, "");
1815 else
1816 result = LLVMBuildTrunc(ctx->builder, src[0], def_type, "");
1817 break;
1818 case nir_op_bcsel:
1819 result = emit_bcsel(&ctx->ac, src[0], src[1], src[2]);
1820 break;
1821 case nir_op_find_lsb:
1822 result = emit_find_lsb(&ctx->ac, src[0]);
1823 break;
1824 case nir_op_ufind_msb:
1825 result = emit_ufind_msb(&ctx->ac, src[0]);
1826 break;
1827 case nir_op_ifind_msb:
1828 result = emit_ifind_msb(&ctx->ac, src[0]);
1829 break;
1830 case nir_op_uadd_carry:
1831 result = emit_uint_carry(&ctx->ac, "llvm.uadd.with.overflow.i32", src[0], src[1]);
1832 break;
1833 case nir_op_usub_borrow:
1834 result = emit_uint_carry(&ctx->ac, "llvm.usub.with.overflow.i32", src[0], src[1]);
1835 break;
1836 case nir_op_b2f:
1837 result = emit_b2f(&ctx->ac, src[0]);
1838 break;
1839 case nir_op_f2b:
1840 result = emit_f2b(&ctx->ac, src[0]);
1841 break;
1842 case nir_op_b2i:
1843 result = emit_b2i(&ctx->ac, src[0]);
1844 break;
1845 case nir_op_i2b:
1846 result = emit_i2b(&ctx->ac, src[0]);
1847 break;
1848 case nir_op_fquantize2f16:
1849 result = emit_f2f16(ctx, src[0]);
1850 break;
1851 case nir_op_umul_high:
1852 result = emit_umul_high(&ctx->ac, src[0], src[1]);
1853 break;
1854 case nir_op_imul_high:
1855 result = emit_imul_high(&ctx->ac, src[0], src[1]);
1856 break;
1857 case nir_op_pack_half_2x16:
1858 result = emit_pack_half_2x16(&ctx->ac, src[0]);
1859 break;
1860 case nir_op_unpack_half_2x16:
1861 result = emit_unpack_half_2x16(&ctx->ac, src[0]);
1862 break;
1863 case nir_op_fddx:
1864 case nir_op_fddy:
1865 case nir_op_fddx_fine:
1866 case nir_op_fddy_fine:
1867 case nir_op_fddx_coarse:
1868 case nir_op_fddy_coarse:
1869 result = emit_ddxy(ctx, instr->op, src[0]);
1870 break;
1871 default:
1872 fprintf(stderr, "Unknown NIR alu instr: ");
1873 nir_print_instr(&instr->instr, stderr);
1874 fprintf(stderr, "\n");
1875 abort();
1876 }
1877
1878 if (result) {
1879 assert(instr->dest.dest.is_ssa);
1880 result = to_integer(&ctx->ac, result);
1881 _mesa_hash_table_insert(ctx->defs, &instr->dest.dest.ssa,
1882 result);
1883 }
1884 }
1885
1886 static void visit_load_const(struct nir_to_llvm_context *ctx,
1887 const nir_load_const_instr *instr)
1888 {
1889 LLVMValueRef values[4], value = NULL;
1890 LLVMTypeRef element_type =
1891 LLVMIntTypeInContext(ctx->context, instr->def.bit_size);
1892
1893 for (unsigned i = 0; i < instr->def.num_components; ++i) {
1894 switch (instr->def.bit_size) {
1895 case 32:
1896 values[i] = LLVMConstInt(element_type,
1897 instr->value.u32[i], false);
1898 break;
1899 case 64:
1900 values[i] = LLVMConstInt(element_type,
1901 instr->value.u64[i], false);
1902 break;
1903 default:
1904 fprintf(stderr,
1905 "unsupported nir load_const bit_size: %d\n",
1906 instr->def.bit_size);
1907 abort();
1908 }
1909 }
1910 if (instr->def.num_components > 1) {
1911 value = LLVMConstVector(values, instr->def.num_components);
1912 } else
1913 value = values[0];
1914
1915 _mesa_hash_table_insert(ctx->defs, &instr->def, value);
1916 }
1917
1918 static LLVMValueRef cast_ptr(struct nir_to_llvm_context *ctx, LLVMValueRef ptr,
1919 LLVMTypeRef type)
1920 {
1921 int addr_space = LLVMGetPointerAddressSpace(LLVMTypeOf(ptr));
1922 return LLVMBuildBitCast(ctx->builder, ptr,
1923 LLVMPointerType(type, addr_space), "");
1924 }
1925
1926 static LLVMValueRef
1927 get_buffer_size(struct nir_to_llvm_context *ctx, LLVMValueRef descriptor, bool in_elements)
1928 {
1929 LLVMValueRef size =
1930 LLVMBuildExtractElement(ctx->builder, descriptor,
1931 LLVMConstInt(ctx->i32, 2, false), "");
1932
1933 /* VI only */
1934 if (ctx->options->chip_class >= VI && in_elements) {
1935 /* On VI, the descriptor contains the size in bytes,
1936 * but TXQ must return the size in elements.
1937 * The stride is always non-zero for resources using TXQ.
1938 */
1939 LLVMValueRef stride =
1940 LLVMBuildExtractElement(ctx->builder, descriptor,
1941 LLVMConstInt(ctx->i32, 1, false), "");
1942 stride = LLVMBuildLShr(ctx->builder, stride,
1943 LLVMConstInt(ctx->i32, 16, false), "");
1944 stride = LLVMBuildAnd(ctx->builder, stride,
1945 LLVMConstInt(ctx->i32, 0x3fff, false), "");
1946
1947 size = LLVMBuildUDiv(ctx->builder, size, stride, "");
1948 }
1949 return size;
1950 }
1951
1952 /**
1953 * Given the i32 or vNi32 \p type, generate the textual name (e.g. for use with
1954 * intrinsic names).
1955 */
1956 static void build_int_type_name(
1957 LLVMTypeRef type,
1958 char *buf, unsigned bufsize)
1959 {
1960 assert(bufsize >= 6);
1961
1962 if (LLVMGetTypeKind(type) == LLVMVectorTypeKind)
1963 snprintf(buf, bufsize, "v%ui32",
1964 LLVMGetVectorSize(type));
1965 else
1966 strcpy(buf, "i32");
1967 }
1968
1969 static LLVMValueRef radv_lower_gather4_integer(struct nir_to_llvm_context *ctx,
1970 struct ac_image_args *args,
1971 const nir_tex_instr *instr)
1972 {
1973 enum glsl_base_type stype = glsl_get_sampler_result_type(instr->texture->var->type);
1974 LLVMValueRef coord = args->addr;
1975 LLVMValueRef half_texel[2];
1976 LLVMValueRef compare_cube_wa;
1977 LLVMValueRef result;
1978 int c;
1979 unsigned coord_vgpr_index = (unsigned)args->offset + (unsigned)args->compare;
1980
1981 //TODO Rect
1982 {
1983 struct ac_image_args txq_args = { 0 };
1984
1985 txq_args.da = instr->is_array || instr->sampler_dim == GLSL_SAMPLER_DIM_CUBE;
1986 txq_args.opcode = ac_image_get_resinfo;
1987 txq_args.dmask = 0xf;
1988 txq_args.addr = ctx->i32zero;
1989 txq_args.resource = args->resource;
1990 LLVMValueRef size = ac_build_image_opcode(&ctx->ac, &txq_args);
1991
1992 for (c = 0; c < 2; c++) {
1993 half_texel[c] = LLVMBuildExtractElement(ctx->builder, size,
1994 LLVMConstInt(ctx->i32, c, false), "");
1995 half_texel[c] = LLVMBuildUIToFP(ctx->builder, half_texel[c], ctx->f32, "");
1996 half_texel[c] = ac_build_fdiv(&ctx->ac, ctx->f32one, half_texel[c]);
1997 half_texel[c] = LLVMBuildFMul(ctx->builder, half_texel[c],
1998 LLVMConstReal(ctx->f32, -0.5), "");
1999 }
2000 }
2001
2002 LLVMValueRef orig_coords = args->addr;
2003
2004 for (c = 0; c < 2; c++) {
2005 LLVMValueRef tmp;
2006 LLVMValueRef index = LLVMConstInt(ctx->i32, coord_vgpr_index + c, 0);
2007 tmp = LLVMBuildExtractElement(ctx->builder, coord, index, "");
2008 tmp = LLVMBuildBitCast(ctx->builder, tmp, ctx->f32, "");
2009 tmp = LLVMBuildFAdd(ctx->builder, tmp, half_texel[c], "");
2010 tmp = LLVMBuildBitCast(ctx->builder, tmp, ctx->i32, "");
2011 coord = LLVMBuildInsertElement(ctx->builder, coord, tmp, index, "");
2012 }
2013
2014
2015 /*
2016 * Apparantly cube has issue with integer types that the workaround doesn't solve,
2017 * so this tests if the format is 8_8_8_8 and an integer type do an alternate
2018 * workaround by sampling using a scaled type and converting.
2019 * This is taken from amdgpu-pro shaders.
2020 */
2021 /* NOTE this produces some ugly code compared to amdgpu-pro,
2022 * LLVM ends up dumping SGPRs into VGPRs to deal with the compare/select,
2023 * and then reads them back. -pro generates two selects,
2024 * one s_cmp for the descriptor rewriting
2025 * one v_cmp for the coordinate and result changes.
2026 */
2027 if (instr->sampler_dim == GLSL_SAMPLER_DIM_CUBE) {
2028 LLVMValueRef tmp, tmp2;
2029
2030 /* workaround 8/8/8/8 uint/sint cube gather bug */
2031 /* first detect it then change to a scaled read and f2i */
2032 tmp = LLVMBuildExtractElement(ctx->builder, args->resource, ctx->i32one, "");
2033 tmp2 = tmp;
2034
2035 /* extract the DATA_FORMAT */
2036 tmp = ac_build_bfe(&ctx->ac, tmp, LLVMConstInt(ctx->i32, 20, false),
2037 LLVMConstInt(ctx->i32, 6, false), false);
2038
2039 /* is the DATA_FORMAT == 8_8_8_8 */
2040 compare_cube_wa = LLVMBuildICmp(ctx->builder, LLVMIntEQ, tmp, LLVMConstInt(ctx->i32, V_008F14_IMG_DATA_FORMAT_8_8_8_8, false), "");
2041
2042 if (stype == GLSL_TYPE_UINT)
2043 /* Create a NUM FORMAT - 0x2 or 0x4 - USCALED or UINT */
2044 tmp = LLVMBuildSelect(ctx->builder, compare_cube_wa, LLVMConstInt(ctx->i32, 0x8000000, false),
2045 LLVMConstInt(ctx->i32, 0x10000000, false), "");
2046 else
2047 /* Create a NUM FORMAT - 0x3 or 0x5 - SSCALED or SINT */
2048 tmp = LLVMBuildSelect(ctx->builder, compare_cube_wa, LLVMConstInt(ctx->i32, 0xc000000, false),
2049 LLVMConstInt(ctx->i32, 0x14000000, false), "");
2050
2051 /* replace the NUM FORMAT in the descriptor */
2052 tmp2 = LLVMBuildAnd(ctx->builder, tmp2, LLVMConstInt(ctx->i32, C_008F14_NUM_FORMAT_GFX6, false), "");
2053 tmp2 = LLVMBuildOr(ctx->builder, tmp2, tmp, "");
2054
2055 args->resource = LLVMBuildInsertElement(ctx->builder, args->resource, tmp2, ctx->i32one, "");
2056
2057 /* don't modify the coordinates for this case */
2058 coord = LLVMBuildSelect(ctx->builder, compare_cube_wa, orig_coords, coord, "");
2059 }
2060 args->addr = coord;
2061 result = ac_build_image_opcode(&ctx->ac, args);
2062
2063 if (instr->sampler_dim == GLSL_SAMPLER_DIM_CUBE) {
2064 LLVMValueRef tmp, tmp2;
2065
2066 /* if the cube workaround is in place, f2i the result. */
2067 for (c = 0; c < 4; c++) {
2068 tmp = LLVMBuildExtractElement(ctx->builder, result, LLVMConstInt(ctx->i32, c, false), "");
2069 if (stype == GLSL_TYPE_UINT)
2070 tmp2 = LLVMBuildFPToUI(ctx->builder, tmp, ctx->i32, "");
2071 else
2072 tmp2 = LLVMBuildFPToSI(ctx->builder, tmp, ctx->i32, "");
2073 tmp = LLVMBuildBitCast(ctx->builder, tmp, ctx->i32, "");
2074 tmp2 = LLVMBuildBitCast(ctx->builder, tmp2, ctx->i32, "");
2075 tmp = LLVMBuildSelect(ctx->builder, compare_cube_wa, tmp2, tmp, "");
2076 tmp = LLVMBuildBitCast(ctx->builder, tmp, ctx->f32, "");
2077 result = LLVMBuildInsertElement(ctx->builder, result, tmp, LLVMConstInt(ctx->i32, c, false), "");
2078 }
2079 }
2080 return result;
2081 }
2082
2083 static LLVMValueRef build_tex_intrinsic(struct nir_to_llvm_context *ctx,
2084 const nir_tex_instr *instr,
2085 bool lod_is_zero,
2086 struct ac_image_args *args)
2087 {
2088 if (instr->sampler_dim == GLSL_SAMPLER_DIM_BUF) {
2089 return ac_build_buffer_load_format(&ctx->ac,
2090 args->resource,
2091 args->addr,
2092 LLVMConstInt(ctx->i32, 0, false),
2093 true);
2094 }
2095
2096 args->opcode = ac_image_sample;
2097 args->compare = instr->is_shadow;
2098
2099 switch (instr->op) {
2100 case nir_texop_txf:
2101 case nir_texop_txf_ms:
2102 case nir_texop_samples_identical:
2103 args->opcode = instr->sampler_dim == GLSL_SAMPLER_DIM_MS ? ac_image_load : ac_image_load_mip;
2104 args->compare = false;
2105 args->offset = false;
2106 break;
2107 case nir_texop_txb:
2108 args->bias = true;
2109 break;
2110 case nir_texop_txl:
2111 if (lod_is_zero)
2112 args->level_zero = true;
2113 else
2114 args->lod = true;
2115 break;
2116 case nir_texop_txs:
2117 case nir_texop_query_levels:
2118 args->opcode = ac_image_get_resinfo;
2119 break;
2120 case nir_texop_tex:
2121 if (ctx->stage != MESA_SHADER_FRAGMENT)
2122 args->level_zero = true;
2123 break;
2124 case nir_texop_txd:
2125 args->deriv = true;
2126 break;
2127 case nir_texop_tg4:
2128 args->opcode = ac_image_gather4;
2129 args->level_zero = true;
2130 break;
2131 case nir_texop_lod:
2132 args->opcode = ac_image_get_lod;
2133 args->compare = false;
2134 args->offset = false;
2135 break;
2136 default:
2137 break;
2138 }
2139
2140 if (instr->op == nir_texop_tg4) {
2141 enum glsl_base_type stype = glsl_get_sampler_result_type(instr->texture->var->type);
2142 if (stype == GLSL_TYPE_UINT || stype == GLSL_TYPE_INT) {
2143 return radv_lower_gather4_integer(ctx, args, instr);
2144 }
2145 }
2146 return ac_build_image_opcode(&ctx->ac, args);
2147 }
2148
2149 static LLVMValueRef visit_vulkan_resource_index(struct nir_to_llvm_context *ctx,
2150 nir_intrinsic_instr *instr)
2151 {
2152 LLVMValueRef index = get_src(ctx, instr->src[0]);
2153 unsigned desc_set = nir_intrinsic_desc_set(instr);
2154 unsigned binding = nir_intrinsic_binding(instr);
2155 LLVMValueRef desc_ptr = ctx->descriptor_sets[desc_set];
2156 struct radv_pipeline_layout *pipeline_layout = ctx->options->layout;
2157 struct radv_descriptor_set_layout *layout = pipeline_layout->set[desc_set].layout;
2158 unsigned base_offset = layout->binding[binding].offset;
2159 LLVMValueRef offset, stride;
2160
2161 if (layout->binding[binding].type == VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER_DYNAMIC ||
2162 layout->binding[binding].type == VK_DESCRIPTOR_TYPE_STORAGE_BUFFER_DYNAMIC) {
2163 unsigned idx = pipeline_layout->set[desc_set].dynamic_offset_start +
2164 layout->binding[binding].dynamic_offset_offset;
2165 desc_ptr = ctx->push_constants;
2166 base_offset = pipeline_layout->push_constant_size + 16 * idx;
2167 stride = LLVMConstInt(ctx->i32, 16, false);
2168 } else
2169 stride = LLVMConstInt(ctx->i32, layout->binding[binding].size, false);
2170
2171 offset = LLVMConstInt(ctx->i32, base_offset, false);
2172 index = LLVMBuildMul(ctx->builder, index, stride, "");
2173 offset = LLVMBuildAdd(ctx->builder, offset, index, "");
2174
2175 desc_ptr = ac_build_gep0(&ctx->ac, desc_ptr, offset);
2176 desc_ptr = cast_ptr(ctx, desc_ptr, ctx->v4i32);
2177 LLVMSetMetadata(desc_ptr, ctx->uniform_md_kind, ctx->empty_md);
2178
2179 return LLVMBuildLoad(ctx->builder, desc_ptr, "");
2180 }
2181
2182 static LLVMValueRef visit_load_push_constant(struct nir_to_llvm_context *ctx,
2183 nir_intrinsic_instr *instr)
2184 {
2185 LLVMValueRef ptr, addr;
2186
2187 addr = LLVMConstInt(ctx->i32, nir_intrinsic_base(instr), 0);
2188 addr = LLVMBuildAdd(ctx->builder, addr, get_src(ctx, instr->src[0]), "");
2189
2190 ptr = ac_build_gep0(&ctx->ac, ctx->push_constants, addr);
2191 ptr = cast_ptr(ctx, ptr, get_def_type(ctx, &instr->dest.ssa));
2192
2193 return LLVMBuildLoad(ctx->builder, ptr, "");
2194 }
2195
2196 static LLVMValueRef visit_get_buffer_size(struct nir_to_llvm_context *ctx,
2197 const nir_intrinsic_instr *instr)
2198 {
2199 LLVMValueRef desc = get_src(ctx, instr->src[0]);
2200
2201 return get_buffer_size(ctx, desc, false);
2202 }
2203 static void visit_store_ssbo(struct nir_to_llvm_context *ctx,
2204 nir_intrinsic_instr *instr)
2205 {
2206 const char *store_name;
2207 LLVMValueRef src_data = get_src(ctx, instr->src[0]);
2208 LLVMTypeRef data_type = ctx->f32;
2209 int elem_size_mult = get_elem_bits(&ctx->ac, LLVMTypeOf(src_data)) / 32;
2210 int components_32bit = elem_size_mult * instr->num_components;
2211 unsigned writemask = nir_intrinsic_write_mask(instr);
2212 LLVMValueRef base_data, base_offset;
2213 LLVMValueRef params[6];
2214
2215 if (ctx->stage == MESA_SHADER_FRAGMENT)
2216 ctx->shader_info->fs.writes_memory = true;
2217
2218 params[1] = get_src(ctx, instr->src[1]);
2219 params[2] = LLVMConstInt(ctx->i32, 0, false); /* vindex */
2220 params[4] = ctx->i1false; /* glc */
2221 params[5] = ctx->i1false; /* slc */
2222
2223 if (components_32bit > 1)
2224 data_type = LLVMVectorType(ctx->f32, components_32bit);
2225
2226 base_data = to_float(&ctx->ac, src_data);
2227 base_data = trim_vector(ctx, base_data, instr->num_components);
2228 base_data = LLVMBuildBitCast(ctx->builder, base_data,
2229 data_type, "");
2230 base_offset = get_src(ctx, instr->src[2]); /* voffset */
2231 while (writemask) {
2232 int start, count;
2233 LLVMValueRef data;
2234 LLVMValueRef offset;
2235 LLVMValueRef tmp;
2236 u_bit_scan_consecutive_range(&writemask, &start, &count);
2237
2238 /* Due to an LLVM limitation, split 3-element writes
2239 * into a 2-element and a 1-element write. */
2240 if (count == 3) {
2241 writemask |= 1 << (start + 2);
2242 count = 2;
2243 }
2244
2245 start *= elem_size_mult;
2246 count *= elem_size_mult;
2247
2248 if (count > 4) {
2249 writemask |= ((1u << (count - 4)) - 1u) << (start + 4);
2250 count = 4;
2251 }
2252
2253 if (count == 4) {
2254 store_name = "llvm.amdgcn.buffer.store.v4f32";
2255 data = base_data;
2256 } else if (count == 2) {
2257 tmp = LLVMBuildExtractElement(ctx->builder,
2258 base_data, LLVMConstInt(ctx->i32, start, false), "");
2259 data = LLVMBuildInsertElement(ctx->builder, LLVMGetUndef(ctx->v2f32), tmp,
2260 ctx->i32zero, "");
2261
2262 tmp = LLVMBuildExtractElement(ctx->builder,
2263 base_data, LLVMConstInt(ctx->i32, start + 1, false), "");
2264 data = LLVMBuildInsertElement(ctx->builder, data, tmp,
2265 ctx->i32one, "");
2266 store_name = "llvm.amdgcn.buffer.store.v2f32";
2267
2268 } else {
2269 assert(count == 1);
2270 if (get_llvm_num_components(base_data) > 1)
2271 data = LLVMBuildExtractElement(ctx->builder, base_data,
2272 LLVMConstInt(ctx->i32, start, false), "");
2273 else
2274 data = base_data;
2275 store_name = "llvm.amdgcn.buffer.store.f32";
2276 }
2277
2278 offset = base_offset;
2279 if (start != 0) {
2280 offset = LLVMBuildAdd(ctx->builder, offset, LLVMConstInt(ctx->i32, start * 4, false), "");
2281 }
2282 params[0] = data;
2283 params[3] = offset;
2284 ac_build_intrinsic(&ctx->ac, store_name,
2285 ctx->voidt, params, 6, 0);
2286 }
2287 }
2288
2289 static LLVMValueRef visit_atomic_ssbo(struct nir_to_llvm_context *ctx,
2290 const nir_intrinsic_instr *instr)
2291 {
2292 const char *name;
2293 LLVMValueRef params[6];
2294 int arg_count = 0;
2295 if (ctx->stage == MESA_SHADER_FRAGMENT)
2296 ctx->shader_info->fs.writes_memory = true;
2297
2298 if (instr->intrinsic == nir_intrinsic_ssbo_atomic_comp_swap) {
2299 params[arg_count++] = llvm_extract_elem(ctx, get_src(ctx, instr->src[3]), 0);
2300 }
2301 params[arg_count++] = llvm_extract_elem(ctx, get_src(ctx, instr->src[2]), 0);
2302 params[arg_count++] = get_src(ctx, instr->src[0]);
2303 params[arg_count++] = LLVMConstInt(ctx->i32, 0, false); /* vindex */
2304 params[arg_count++] = get_src(ctx, instr->src[1]); /* voffset */
2305 params[arg_count++] = ctx->i1false; /* slc */
2306
2307 switch (instr->intrinsic) {
2308 case nir_intrinsic_ssbo_atomic_add:
2309 name = "llvm.amdgcn.buffer.atomic.add";
2310 break;
2311 case nir_intrinsic_ssbo_atomic_imin:
2312 name = "llvm.amdgcn.buffer.atomic.smin";
2313 break;
2314 case nir_intrinsic_ssbo_atomic_umin:
2315 name = "llvm.amdgcn.buffer.atomic.umin";
2316 break;
2317 case nir_intrinsic_ssbo_atomic_imax:
2318 name = "llvm.amdgcn.buffer.atomic.smax";
2319 break;
2320 case nir_intrinsic_ssbo_atomic_umax:
2321 name = "llvm.amdgcn.buffer.atomic.umax";
2322 break;
2323 case nir_intrinsic_ssbo_atomic_and:
2324 name = "llvm.amdgcn.buffer.atomic.and";
2325 break;
2326 case nir_intrinsic_ssbo_atomic_or:
2327 name = "llvm.amdgcn.buffer.atomic.or";
2328 break;
2329 case nir_intrinsic_ssbo_atomic_xor:
2330 name = "llvm.amdgcn.buffer.atomic.xor";
2331 break;
2332 case nir_intrinsic_ssbo_atomic_exchange:
2333 name = "llvm.amdgcn.buffer.atomic.swap";
2334 break;
2335 case nir_intrinsic_ssbo_atomic_comp_swap:
2336 name = "llvm.amdgcn.buffer.atomic.cmpswap";
2337 break;
2338 default:
2339 abort();
2340 }
2341
2342 return ac_build_intrinsic(&ctx->ac, name, ctx->i32, params, arg_count, 0);
2343 }
2344
2345 static LLVMValueRef visit_load_buffer(struct nir_to_llvm_context *ctx,
2346 const nir_intrinsic_instr *instr)
2347 {
2348 LLVMValueRef results[2];
2349 int load_components;
2350 int num_components = instr->num_components;
2351 if (instr->dest.ssa.bit_size == 64)
2352 num_components *= 2;
2353
2354 for (int i = 0; i < num_components; i += load_components) {
2355 load_components = MIN2(num_components - i, 4);
2356 const char *load_name;
2357 LLVMTypeRef data_type = ctx->f32;
2358 LLVMValueRef offset = LLVMConstInt(ctx->i32, i * 4, false);
2359 offset = LLVMBuildAdd(ctx->builder, get_src(ctx, instr->src[1]), offset, "");
2360
2361 if (load_components == 3)
2362 data_type = LLVMVectorType(ctx->f32, 4);
2363 else if (load_components > 1)
2364 data_type = LLVMVectorType(ctx->f32, load_components);
2365
2366 if (load_components >= 3)
2367 load_name = "llvm.amdgcn.buffer.load.v4f32";
2368 else if (load_components == 2)
2369 load_name = "llvm.amdgcn.buffer.load.v2f32";
2370 else if (load_components == 1)
2371 load_name = "llvm.amdgcn.buffer.load.f32";
2372 else
2373 unreachable("unhandled number of components");
2374
2375 LLVMValueRef params[] = {
2376 get_src(ctx, instr->src[0]),
2377 LLVMConstInt(ctx->i32, 0, false),
2378 offset,
2379 ctx->i1false,
2380 ctx->i1false,
2381 };
2382
2383 results[i] = ac_build_intrinsic(&ctx->ac, load_name, data_type, params, 5, 0);
2384
2385 }
2386
2387 LLVMValueRef ret = results[0];
2388 if (num_components > 4 || num_components == 3) {
2389 LLVMValueRef masks[] = {
2390 LLVMConstInt(ctx->i32, 0, false), LLVMConstInt(ctx->i32, 1, false),
2391 LLVMConstInt(ctx->i32, 2, false), LLVMConstInt(ctx->i32, 3, false),
2392 LLVMConstInt(ctx->i32, 4, false), LLVMConstInt(ctx->i32, 5, false),
2393 LLVMConstInt(ctx->i32, 6, false), LLVMConstInt(ctx->i32, 7, false)
2394 };
2395
2396 LLVMValueRef swizzle = LLVMConstVector(masks, num_components);
2397 ret = LLVMBuildShuffleVector(ctx->builder, results[0],
2398 results[num_components > 4 ? 1 : 0], swizzle, "");
2399 }
2400
2401 return LLVMBuildBitCast(ctx->builder, ret,
2402 get_def_type(ctx, &instr->dest.ssa), "");
2403 }
2404
2405 static LLVMValueRef visit_load_ubo_buffer(struct nir_to_llvm_context *ctx,
2406 const nir_intrinsic_instr *instr)
2407 {
2408 LLVMValueRef results[8], ret;
2409 LLVMValueRef rsrc = get_src(ctx, instr->src[0]);
2410 LLVMValueRef offset = get_src(ctx, instr->src[1]);
2411 int num_components = instr->num_components;
2412
2413 rsrc = LLVMBuildBitCast(ctx->builder, rsrc, LLVMVectorType(ctx->i8, 16), "");
2414
2415 if (instr->dest.ssa.bit_size == 64)
2416 num_components *= 2;
2417
2418 for (unsigned i = 0; i < num_components; ++i) {
2419 LLVMValueRef params[] = {
2420 rsrc,
2421 LLVMBuildAdd(ctx->builder, LLVMConstInt(ctx->i32, 4 * i, 0),
2422 offset, "")
2423 };
2424 results[i] = ac_build_intrinsic(&ctx->ac, "llvm.SI.load.const", ctx->f32,
2425 params, 2,
2426 AC_FUNC_ATTR_READNONE |
2427 AC_FUNC_ATTR_LEGACY);
2428 }
2429
2430
2431 ret = ac_build_gather_values(&ctx->ac, results, instr->num_components);
2432 return LLVMBuildBitCast(ctx->builder, ret,
2433 get_def_type(ctx, &instr->dest.ssa), "");
2434 }
2435
2436 static void
2437 radv_get_deref_offset(struct nir_to_llvm_context *ctx, nir_deref_var *deref,
2438 bool vs_in, unsigned *vertex_index_out,
2439 LLVMValueRef *vertex_index_ref,
2440 unsigned *const_out, LLVMValueRef *indir_out)
2441 {
2442 unsigned const_offset = 0;
2443 nir_deref *tail = &deref->deref;
2444 LLVMValueRef offset = NULL;
2445
2446 if (vertex_index_out != NULL || vertex_index_ref != NULL) {
2447 tail = tail->child;
2448 nir_deref_array *deref_array = nir_deref_as_array(tail);
2449 if (vertex_index_out)
2450 *vertex_index_out = deref_array->base_offset;
2451
2452 if (vertex_index_ref) {
2453 LLVMValueRef vtx = LLVMConstInt(ctx->i32, deref_array->base_offset, false);
2454 if (deref_array->deref_array_type == nir_deref_array_type_indirect) {
2455 vtx = LLVMBuildAdd(ctx->builder, vtx, get_src(ctx, deref_array->indirect), "");
2456 }
2457 *vertex_index_ref = vtx;
2458 }
2459 }
2460
2461 if (deref->var->data.compact) {
2462 assert(tail->child->deref_type == nir_deref_type_array);
2463 assert(glsl_type_is_scalar(glsl_without_array(deref->var->type)));
2464 nir_deref_array *deref_array = nir_deref_as_array(tail->child);
2465 /* We always lower indirect dereferences for "compact" array vars. */
2466 assert(deref_array->deref_array_type == nir_deref_array_type_direct);
2467
2468 const_offset = deref_array->base_offset;
2469 goto out;
2470 }
2471
2472 while (tail->child != NULL) {
2473 const struct glsl_type *parent_type = tail->type;
2474 tail = tail->child;
2475
2476 if (tail->deref_type == nir_deref_type_array) {
2477 nir_deref_array *deref_array = nir_deref_as_array(tail);
2478 LLVMValueRef index, stride, local_offset;
2479 unsigned size = glsl_count_attribute_slots(tail->type, vs_in);
2480
2481 const_offset += size * deref_array->base_offset;
2482 if (deref_array->deref_array_type == nir_deref_array_type_direct)
2483 continue;
2484
2485 assert(deref_array->deref_array_type == nir_deref_array_type_indirect);
2486 index = get_src(ctx, deref_array->indirect);
2487 stride = LLVMConstInt(ctx->i32, size, 0);
2488 local_offset = LLVMBuildMul(ctx->builder, stride, index, "");
2489
2490 if (offset)
2491 offset = LLVMBuildAdd(ctx->builder, offset, local_offset, "");
2492 else
2493 offset = local_offset;
2494 } else if (tail->deref_type == nir_deref_type_struct) {
2495 nir_deref_struct *deref_struct = nir_deref_as_struct(tail);
2496
2497 for (unsigned i = 0; i < deref_struct->index; i++) {
2498 const struct glsl_type *ft = glsl_get_struct_field(parent_type, i);
2499 const_offset += glsl_count_attribute_slots(ft, vs_in);
2500 }
2501 } else
2502 unreachable("unsupported deref type");
2503
2504 }
2505 out:
2506 if (const_offset && offset)
2507 offset = LLVMBuildAdd(ctx->builder, offset,
2508 LLVMConstInt(ctx->i32, const_offset, 0),
2509 "");
2510
2511 *const_out = const_offset;
2512 *indir_out = offset;
2513 }
2514
2515 static LLVMValueRef
2516 lds_load(struct nir_to_llvm_context *ctx,
2517 LLVMValueRef dw_addr)
2518 {
2519 LLVMValueRef value;
2520 value = ac_build_indexed_load(&ctx->ac, ctx->lds, dw_addr, false);
2521 return value;
2522 }
2523
2524 static void
2525 lds_store(struct nir_to_llvm_context *ctx,
2526 LLVMValueRef dw_addr, LLVMValueRef value)
2527 {
2528 value = LLVMBuildBitCast(ctx->builder, value, ctx->i32, "");
2529 ac_build_indexed_store(&ctx->ac, ctx->lds,
2530 dw_addr, value);
2531 }
2532
2533 /* The offchip buffer layout for TCS->TES is
2534 *
2535 * - attribute 0 of patch 0 vertex 0
2536 * - attribute 0 of patch 0 vertex 1
2537 * - attribute 0 of patch 0 vertex 2
2538 * ...
2539 * - attribute 0 of patch 1 vertex 0
2540 * - attribute 0 of patch 1 vertex 1
2541 * ...
2542 * - attribute 1 of patch 0 vertex 0
2543 * - attribute 1 of patch 0 vertex 1
2544 * ...
2545 * - per patch attribute 0 of patch 0
2546 * - per patch attribute 0 of patch 1
2547 * ...
2548 *
2549 * Note that every attribute has 4 components.
2550 */
2551 static LLVMValueRef get_tcs_tes_buffer_address(struct nir_to_llvm_context *ctx,
2552 LLVMValueRef vertex_index,
2553 LLVMValueRef param_index)
2554 {
2555 LLVMValueRef base_addr, vertices_per_patch, num_patches, total_vertices;
2556 LLVMValueRef param_stride, constant16;
2557 LLVMValueRef rel_patch_id = get_rel_patch_id(ctx);
2558
2559 vertices_per_patch = unpack_param(ctx, ctx->tcs_offchip_layout, 9, 6);
2560 num_patches = unpack_param(ctx, ctx->tcs_offchip_layout, 0, 9);
2561 total_vertices = LLVMBuildMul(ctx->builder, vertices_per_patch,
2562 num_patches, "");
2563
2564 constant16 = LLVMConstInt(ctx->i32, 16, false);
2565 if (vertex_index) {
2566 base_addr = LLVMBuildMul(ctx->builder, rel_patch_id,
2567 vertices_per_patch, "");
2568
2569 base_addr = LLVMBuildAdd(ctx->builder, base_addr,
2570 vertex_index, "");
2571
2572 param_stride = total_vertices;
2573 } else {
2574 base_addr = rel_patch_id;
2575 param_stride = num_patches;
2576 }
2577
2578 base_addr = LLVMBuildAdd(ctx->builder, base_addr,
2579 LLVMBuildMul(ctx->builder, param_index,
2580 param_stride, ""), "");
2581
2582 base_addr = LLVMBuildMul(ctx->builder, base_addr, constant16, "");
2583
2584 if (!vertex_index) {
2585 LLVMValueRef patch_data_offset =
2586 unpack_param(ctx, ctx->tcs_offchip_layout, 16, 16);
2587
2588 base_addr = LLVMBuildAdd(ctx->builder, base_addr,
2589 patch_data_offset, "");
2590 }
2591 return base_addr;
2592 }
2593
2594 static LLVMValueRef get_tcs_tes_buffer_address_params(struct nir_to_llvm_context *ctx,
2595 unsigned param,
2596 unsigned const_index,
2597 bool is_compact,
2598 LLVMValueRef vertex_index,
2599 LLVMValueRef indir_index)
2600 {
2601 LLVMValueRef param_index;
2602
2603 if (indir_index)
2604 param_index = LLVMBuildAdd(ctx->builder, LLVMConstInt(ctx->i32, param, false),
2605 indir_index, "");
2606 else {
2607 if (const_index && !is_compact)
2608 param += const_index;
2609 param_index = LLVMConstInt(ctx->i32, param, false);
2610 }
2611 return get_tcs_tes_buffer_address(ctx, vertex_index, param_index);
2612 }
2613
2614 static void
2615 mark_tess_output(struct nir_to_llvm_context *ctx,
2616 bool is_patch, uint32_t param)
2617
2618 {
2619 if (is_patch) {
2620 ctx->tess_patch_outputs_written |= (1ull << param);
2621 } else
2622 ctx->tess_outputs_written |= (1ull << param);
2623 }
2624
2625 static LLVMValueRef
2626 get_dw_address(struct nir_to_llvm_context *ctx,
2627 LLVMValueRef dw_addr,
2628 unsigned param,
2629 unsigned const_index,
2630 bool compact_const_index,
2631 LLVMValueRef vertex_index,
2632 LLVMValueRef stride,
2633 LLVMValueRef indir_index)
2634
2635 {
2636
2637 if (vertex_index) {
2638 dw_addr = LLVMBuildAdd(ctx->builder, dw_addr,
2639 LLVMBuildMul(ctx->builder,
2640 vertex_index,
2641 stride, ""), "");
2642 }
2643
2644 if (indir_index)
2645 dw_addr = LLVMBuildAdd(ctx->builder, dw_addr,
2646 LLVMBuildMul(ctx->builder, indir_index,
2647 LLVMConstInt(ctx->i32, 4, false), ""), "");
2648 else if (const_index && !compact_const_index)
2649 dw_addr = LLVMBuildAdd(ctx->builder, dw_addr,
2650 LLVMConstInt(ctx->i32, const_index, false), "");
2651
2652 dw_addr = LLVMBuildAdd(ctx->builder, dw_addr,
2653 LLVMConstInt(ctx->i32, param * 4, false), "");
2654
2655 if (const_index && compact_const_index)
2656 dw_addr = LLVMBuildAdd(ctx->builder, dw_addr,
2657 LLVMConstInt(ctx->i32, const_index, false), "");
2658 return dw_addr;
2659 }
2660
2661 static LLVMValueRef
2662 load_tcs_input(struct nir_to_llvm_context *ctx,
2663 nir_intrinsic_instr *instr)
2664 {
2665 LLVMValueRef dw_addr, stride;
2666 unsigned const_index;
2667 LLVMValueRef vertex_index;
2668 LLVMValueRef indir_index;
2669 unsigned param;
2670 LLVMValueRef value[4], result;
2671 const bool per_vertex = nir_is_per_vertex_io(instr->variables[0]->var, ctx->stage);
2672 const bool is_compact = instr->variables[0]->var->data.compact;
2673 param = shader_io_get_unique_index(instr->variables[0]->var->data.location);
2674 radv_get_deref_offset(ctx, instr->variables[0],
2675 false, NULL, per_vertex ? &vertex_index : NULL,
2676 &const_index, &indir_index);
2677
2678 stride = unpack_param(ctx, ctx->tcs_in_layout, 13, 8);
2679 dw_addr = get_tcs_in_current_patch_offset(ctx);
2680 dw_addr = get_dw_address(ctx, dw_addr, param, const_index, is_compact, vertex_index, stride,
2681 indir_index);
2682
2683 for (unsigned i = 0; i < instr->num_components; i++) {
2684 value[i] = lds_load(ctx, dw_addr);
2685 dw_addr = LLVMBuildAdd(ctx->builder, dw_addr,
2686 ctx->i32one, "");
2687 }
2688 result = ac_build_gather_values(&ctx->ac, value, instr->num_components);
2689 result = LLVMBuildBitCast(ctx->builder, result, get_def_type(ctx, &instr->dest.ssa), "");
2690 return result;
2691 }
2692
2693 static LLVMValueRef
2694 load_tcs_output(struct nir_to_llvm_context *ctx,
2695 nir_intrinsic_instr *instr)
2696 {
2697 LLVMValueRef dw_addr, stride;
2698 LLVMValueRef value[4], result;
2699 LLVMValueRef vertex_index = NULL;
2700 LLVMValueRef indir_index = NULL;
2701 unsigned const_index = 0;
2702 unsigned param;
2703 const bool per_vertex = nir_is_per_vertex_io(instr->variables[0]->var, ctx->stage);
2704 const bool is_compact = instr->variables[0]->var->data.compact;
2705 param = shader_io_get_unique_index(instr->variables[0]->var->data.location);
2706 radv_get_deref_offset(ctx, instr->variables[0],
2707 false, NULL, per_vertex ? &vertex_index : NULL,
2708 &const_index, &indir_index);
2709
2710 if (!instr->variables[0]->var->data.patch) {
2711 stride = unpack_param(ctx, ctx->tcs_out_layout, 13, 8);
2712 dw_addr = get_tcs_out_current_patch_offset(ctx);
2713 } else {
2714 dw_addr = get_tcs_out_current_patch_data_offset(ctx);
2715 }
2716
2717 dw_addr = get_dw_address(ctx, dw_addr, param, const_index, is_compact, vertex_index, stride,
2718 indir_index);
2719
2720 for (unsigned i = 0; i < instr->num_components; i++) {
2721 value[i] = lds_load(ctx, dw_addr);
2722 dw_addr = LLVMBuildAdd(ctx->builder, dw_addr,
2723 ctx->i32one, "");
2724 }
2725 result = ac_build_gather_values(&ctx->ac, value, instr->num_components);
2726 result = LLVMBuildBitCast(ctx->builder, result, get_def_type(ctx, &instr->dest.ssa), "");
2727 return result;
2728 }
2729
2730 static void
2731 store_tcs_output(struct nir_to_llvm_context *ctx,
2732 nir_intrinsic_instr *instr,
2733 LLVMValueRef src,
2734 unsigned writemask)
2735 {
2736 LLVMValueRef stride, dw_addr;
2737 LLVMValueRef buf_addr = NULL;
2738 LLVMValueRef vertex_index = NULL;
2739 LLVMValueRef indir_index = NULL;
2740 unsigned const_index = 0;
2741 unsigned param;
2742 const bool per_vertex = nir_is_per_vertex_io(instr->variables[0]->var, ctx->stage);
2743 const bool is_compact = instr->variables[0]->var->data.compact;
2744
2745 radv_get_deref_offset(ctx, instr->variables[0],
2746 false, NULL, per_vertex ? &vertex_index : NULL,
2747 &const_index, &indir_index);
2748
2749 param = shader_io_get_unique_index(instr->variables[0]->var->data.location);
2750 if (instr->variables[0]->var->data.location == VARYING_SLOT_CLIP_DIST0 &&
2751 is_compact && const_index > 3) {
2752 const_index -= 3;
2753 param++;
2754 }
2755
2756 if (!instr->variables[0]->var->data.patch) {
2757 stride = unpack_param(ctx, ctx->tcs_out_layout, 13, 8);
2758 dw_addr = get_tcs_out_current_patch_offset(ctx);
2759 } else {
2760 dw_addr = get_tcs_out_current_patch_data_offset(ctx);
2761 }
2762
2763 mark_tess_output(ctx, instr->variables[0]->var->data.patch, param);
2764
2765 dw_addr = get_dw_address(ctx, dw_addr, param, const_index, is_compact, vertex_index, stride,
2766 indir_index);
2767 buf_addr = get_tcs_tes_buffer_address_params(ctx, param, const_index, is_compact,
2768 vertex_index, indir_index);
2769
2770 unsigned base = is_compact ? const_index : 0;
2771 for (unsigned chan = 0; chan < 8; chan++) {
2772 bool is_tess_factor = false;
2773 if (!(writemask & (1 << chan)))
2774 continue;
2775 LLVMValueRef value = llvm_extract_elem(ctx, src, chan);
2776
2777 lds_store(ctx, dw_addr, value);
2778
2779 if (instr->variables[0]->var->data.location == VARYING_SLOT_TESS_LEVEL_INNER ||
2780 instr->variables[0]->var->data.location == VARYING_SLOT_TESS_LEVEL_OUTER)
2781 is_tess_factor = true;
2782
2783 if (!is_tess_factor && writemask != 0xF)
2784 ac_build_buffer_store_dword(&ctx->ac, ctx->hs_ring_tess_offchip, value, 1,
2785 buf_addr, ctx->oc_lds,
2786 4 * (base + chan), 1, 0, true, false);
2787
2788 dw_addr = LLVMBuildAdd(ctx->builder, dw_addr,
2789 ctx->i32one, "");
2790 }
2791
2792 if (writemask == 0xF) {
2793 ac_build_buffer_store_dword(&ctx->ac, ctx->hs_ring_tess_offchip, src, 4,
2794 buf_addr, ctx->oc_lds,
2795 (base * 4), 1, 0, true, false);
2796 }
2797 }
2798
2799 static LLVMValueRef
2800 load_tes_input(struct nir_to_llvm_context *ctx,
2801 const nir_intrinsic_instr *instr)
2802 {
2803 LLVMValueRef buf_addr;
2804 LLVMValueRef result;
2805 LLVMValueRef vertex_index = NULL;
2806 LLVMValueRef indir_index = NULL;
2807 unsigned const_index = 0;
2808 unsigned param;
2809 const bool per_vertex = nir_is_per_vertex_io(instr->variables[0]->var, ctx->stage);
2810 const bool is_compact = instr->variables[0]->var->data.compact;
2811
2812 radv_get_deref_offset(ctx, instr->variables[0],
2813 false, NULL, per_vertex ? &vertex_index : NULL,
2814 &const_index, &indir_index);
2815 param = shader_io_get_unique_index(instr->variables[0]->var->data.location);
2816 if (instr->variables[0]->var->data.location == VARYING_SLOT_CLIP_DIST0 &&
2817 is_compact && const_index > 3) {
2818 const_index -= 3;
2819 param++;
2820 }
2821 buf_addr = get_tcs_tes_buffer_address_params(ctx, param, const_index,
2822 is_compact, vertex_index, indir_index);
2823
2824 result = ac_build_buffer_load(&ctx->ac, ctx->hs_ring_tess_offchip, instr->num_components, NULL,
2825 buf_addr, ctx->oc_lds, is_compact ? (4 * const_index) : 0, 1, 0, true, false);
2826 result = trim_vector(ctx, result, instr->num_components);
2827 result = LLVMBuildBitCast(ctx->builder, result, get_def_type(ctx, &instr->dest.ssa), "");
2828 return result;
2829 }
2830
2831 static LLVMValueRef
2832 load_gs_input(struct nir_to_llvm_context *ctx,
2833 nir_intrinsic_instr *instr)
2834 {
2835 LLVMValueRef indir_index, vtx_offset;
2836 unsigned const_index;
2837 LLVMValueRef args[9];
2838 unsigned param, vtx_offset_param;
2839 LLVMValueRef value[4], result;
2840 unsigned vertex_index;
2841 radv_get_deref_offset(ctx, instr->variables[0],
2842 false, &vertex_index, NULL,
2843 &const_index, &indir_index);
2844 vtx_offset_param = vertex_index;
2845 assert(vtx_offset_param < 6);
2846 vtx_offset = LLVMBuildMul(ctx->builder, ctx->gs_vtx_offset[vtx_offset_param],
2847 LLVMConstInt(ctx->i32, 4, false), "");
2848
2849 param = shader_io_get_unique_index(instr->variables[0]->var->data.location);
2850 for (unsigned i = 0; i < instr->num_components; i++) {
2851
2852 args[0] = ctx->esgs_ring;
2853 args[1] = vtx_offset;
2854 args[2] = LLVMConstInt(ctx->i32, (param * 4 + i + const_index) * 256, false);
2855 args[3] = ctx->i32zero;
2856 args[4] = ctx->i32one; /* OFFEN */
2857 args[5] = ctx->i32zero; /* IDXEN */
2858 args[6] = ctx->i32one; /* GLC */
2859 args[7] = ctx->i32zero; /* SLC */
2860 args[8] = ctx->i32zero; /* TFE */
2861
2862 value[i] = ac_build_intrinsic(&ctx->ac, "llvm.SI.buffer.load.dword.i32.i32",
2863 ctx->i32, args, 9,
2864 AC_FUNC_ATTR_READONLY |
2865 AC_FUNC_ATTR_LEGACY);
2866 }
2867 result = ac_build_gather_values(&ctx->ac, value, instr->num_components);
2868
2869 return result;
2870 }
2871
2872 static LLVMValueRef visit_load_var(struct nir_to_llvm_context *ctx,
2873 nir_intrinsic_instr *instr)
2874 {
2875 LLVMValueRef values[8];
2876 int idx = instr->variables[0]->var->data.driver_location;
2877 int ve = instr->dest.ssa.num_components;
2878 LLVMValueRef indir_index;
2879 LLVMValueRef ret;
2880 unsigned const_index;
2881 bool vs_in = ctx->stage == MESA_SHADER_VERTEX &&
2882 instr->variables[0]->var->data.mode == nir_var_shader_in;
2883 radv_get_deref_offset(ctx, instr->variables[0], vs_in, NULL, NULL,
2884 &const_index, &indir_index);
2885
2886 if (instr->dest.ssa.bit_size == 64)
2887 ve *= 2;
2888
2889 switch (instr->variables[0]->var->data.mode) {
2890 case nir_var_shader_in:
2891 if (ctx->stage == MESA_SHADER_TESS_CTRL)
2892 return load_tcs_input(ctx, instr);
2893 if (ctx->stage == MESA_SHADER_TESS_EVAL)
2894 return load_tes_input(ctx, instr);
2895 if (ctx->stage == MESA_SHADER_GEOMETRY) {
2896 return load_gs_input(ctx, instr);
2897 }
2898 for (unsigned chan = 0; chan < ve; chan++) {
2899 if (indir_index) {
2900 unsigned count = glsl_count_attribute_slots(
2901 instr->variables[0]->var->type,
2902 ctx->stage == MESA_SHADER_VERTEX);
2903 count -= chan / 4;
2904 LLVMValueRef tmp_vec = ac_build_gather_values_extended(
2905 &ctx->ac, ctx->inputs + idx + chan, count,
2906 4, false);
2907
2908 values[chan] = LLVMBuildExtractElement(ctx->builder,
2909 tmp_vec,
2910 indir_index, "");
2911 } else
2912 values[chan] = ctx->inputs[idx + chan + const_index * 4];
2913 }
2914 break;
2915 case nir_var_local:
2916 for (unsigned chan = 0; chan < ve; chan++) {
2917 if (indir_index) {
2918 unsigned count = glsl_count_attribute_slots(
2919 instr->variables[0]->var->type, false);
2920 count -= chan / 4;
2921 LLVMValueRef tmp_vec = ac_build_gather_values_extended(
2922 &ctx->ac, ctx->locals + idx + chan, count,
2923 4, true);
2924
2925 values[chan] = LLVMBuildExtractElement(ctx->builder,
2926 tmp_vec,
2927 indir_index, "");
2928 } else {
2929 values[chan] = LLVMBuildLoad(ctx->builder, ctx->locals[idx + chan + const_index * 4], "");
2930 }
2931 }
2932 break;
2933 case nir_var_shader_out:
2934 if (ctx->stage == MESA_SHADER_TESS_CTRL)
2935 return load_tcs_output(ctx, instr);
2936 for (unsigned chan = 0; chan < ve; chan++) {
2937 if (indir_index) {
2938 unsigned count = glsl_count_attribute_slots(
2939 instr->variables[0]->var->type, false);
2940 count -= chan / 4;
2941 LLVMValueRef tmp_vec = ac_build_gather_values_extended(
2942 &ctx->ac, ctx->outputs + idx + chan, count,
2943 4, true);
2944
2945 values[chan] = LLVMBuildExtractElement(ctx->builder,
2946 tmp_vec,
2947 indir_index, "");
2948 } else {
2949 values[chan] = LLVMBuildLoad(ctx->builder,
2950 ctx->outputs[idx + chan + const_index * 4],
2951 "");
2952 }
2953 }
2954 break;
2955 case nir_var_shared: {
2956 LLVMValueRef ptr = get_shared_memory_ptr(ctx, idx, ctx->i32);
2957 LLVMValueRef derived_ptr;
2958
2959 if (indir_index)
2960 indir_index = LLVMBuildMul(ctx->builder, indir_index, LLVMConstInt(ctx->i32, 4, false), "");
2961
2962 for (unsigned chan = 0; chan < ve; chan++) {
2963 LLVMValueRef index = LLVMConstInt(ctx->i32, chan, false);
2964 if (indir_index)
2965 index = LLVMBuildAdd(ctx->builder, index, indir_index, "");
2966 derived_ptr = LLVMBuildGEP(ctx->builder, ptr, &index, 1, "");
2967
2968 values[chan] = LLVMBuildLoad(ctx->builder, derived_ptr, "");
2969 }
2970 break;
2971 }
2972 default:
2973 unreachable("unhandle variable mode");
2974 }
2975 ret = ac_build_gather_values(&ctx->ac, values, ve);
2976 return LLVMBuildBitCast(ctx->builder, ret, get_def_type(ctx, &instr->dest.ssa), "");
2977 }
2978
2979 static void
2980 visit_store_var(struct nir_to_llvm_context *ctx,
2981 nir_intrinsic_instr *instr)
2982 {
2983 LLVMValueRef temp_ptr, value;
2984 int idx = instr->variables[0]->var->data.driver_location;
2985 LLVMValueRef src = to_float(&ctx->ac, get_src(ctx, instr->src[0]));
2986 int writemask = instr->const_index[0];
2987 LLVMValueRef indir_index;
2988 unsigned const_index;
2989 radv_get_deref_offset(ctx, instr->variables[0], false,
2990 NULL, NULL, &const_index, &indir_index);
2991
2992 if (get_elem_bits(&ctx->ac, LLVMTypeOf(src)) == 64) {
2993 int old_writemask = writemask;
2994
2995 src = LLVMBuildBitCast(ctx->builder, src,
2996 LLVMVectorType(ctx->f32, get_llvm_num_components(src) * 2),
2997 "");
2998
2999 writemask = 0;
3000 for (unsigned chan = 0; chan < 4; chan++) {
3001 if (old_writemask & (1 << chan))
3002 writemask |= 3u << (2 * chan);
3003 }
3004 }
3005
3006 switch (instr->variables[0]->var->data.mode) {
3007 case nir_var_shader_out:
3008
3009 if (ctx->stage == MESA_SHADER_TESS_CTRL) {
3010 store_tcs_output(ctx, instr, src, writemask);
3011 return;
3012 }
3013
3014 for (unsigned chan = 0; chan < 8; chan++) {
3015 int stride = 4;
3016 if (!(writemask & (1 << chan)))
3017 continue;
3018
3019 value = llvm_extract_elem(ctx, src, chan);
3020
3021 if (instr->variables[0]->var->data.compact)
3022 stride = 1;
3023 if (indir_index) {
3024 unsigned count = glsl_count_attribute_slots(
3025 instr->variables[0]->var->type, false);
3026 count -= chan / 4;
3027 LLVMValueRef tmp_vec = ac_build_gather_values_extended(
3028 &ctx->ac, ctx->outputs + idx + chan, count,
3029 stride, true);
3030
3031 if (get_llvm_num_components(tmp_vec) > 1) {
3032 tmp_vec = LLVMBuildInsertElement(ctx->builder, tmp_vec,
3033 value, indir_index, "");
3034 } else
3035 tmp_vec = value;
3036 build_store_values_extended(ctx, ctx->outputs + idx + chan,
3037 count, stride, tmp_vec);
3038
3039 } else {
3040 temp_ptr = ctx->outputs[idx + chan + const_index * stride];
3041
3042 LLVMBuildStore(ctx->builder, value, temp_ptr);
3043 }
3044 }
3045 break;
3046 case nir_var_local:
3047 for (unsigned chan = 0; chan < 8; chan++) {
3048 if (!(writemask & (1 << chan)))
3049 continue;
3050
3051 value = llvm_extract_elem(ctx, src, chan);
3052 if (indir_index) {
3053 unsigned count = glsl_count_attribute_slots(
3054 instr->variables[0]->var->type, false);
3055 count -= chan / 4;
3056 LLVMValueRef tmp_vec = ac_build_gather_values_extended(
3057 &ctx->ac, ctx->locals + idx + chan, count,
3058 4, true);
3059
3060 tmp_vec = LLVMBuildInsertElement(ctx->builder, tmp_vec,
3061 value, indir_index, "");
3062 build_store_values_extended(ctx, ctx->locals + idx + chan,
3063 count, 4, tmp_vec);
3064 } else {
3065 temp_ptr = ctx->locals[idx + chan + const_index * 4];
3066
3067 LLVMBuildStore(ctx->builder, value, temp_ptr);
3068 }
3069 }
3070 break;
3071 case nir_var_shared: {
3072 LLVMValueRef ptr = get_shared_memory_ptr(ctx, idx, ctx->i32);
3073
3074 if (indir_index)
3075 indir_index = LLVMBuildMul(ctx->builder, indir_index, LLVMConstInt(ctx->i32, 4, false), "");
3076
3077 for (unsigned chan = 0; chan < 8; chan++) {
3078 if (!(writemask & (1 << chan)))
3079 continue;
3080 LLVMValueRef index = LLVMConstInt(ctx->i32, chan, false);
3081 LLVMValueRef derived_ptr;
3082
3083 if (indir_index)
3084 index = LLVMBuildAdd(ctx->builder, index, indir_index, "");
3085
3086 value = llvm_extract_elem(ctx, src, chan);
3087 derived_ptr = LLVMBuildGEP(ctx->builder, ptr, &index, 1, "");
3088 LLVMBuildStore(ctx->builder,
3089 to_integer(&ctx->ac, value), derived_ptr);
3090 }
3091 break;
3092 }
3093 default:
3094 break;
3095 }
3096 }
3097
3098 static int image_type_to_components_count(enum glsl_sampler_dim dim, bool array)
3099 {
3100 switch (dim) {
3101 case GLSL_SAMPLER_DIM_BUF:
3102 return 1;
3103 case GLSL_SAMPLER_DIM_1D:
3104 return array ? 2 : 1;
3105 case GLSL_SAMPLER_DIM_2D:
3106 return array ? 3 : 2;
3107 case GLSL_SAMPLER_DIM_MS:
3108 return array ? 4 : 3;
3109 case GLSL_SAMPLER_DIM_3D:
3110 case GLSL_SAMPLER_DIM_CUBE:
3111 return 3;
3112 case GLSL_SAMPLER_DIM_RECT:
3113 case GLSL_SAMPLER_DIM_SUBPASS:
3114 return 2;
3115 case GLSL_SAMPLER_DIM_SUBPASS_MS:
3116 return 3;
3117 default:
3118 break;
3119 }
3120 return 0;
3121 }
3122
3123
3124
3125 /* Adjust the sample index according to FMASK.
3126 *
3127 * For uncompressed MSAA surfaces, FMASK should return 0x76543210,
3128 * which is the identity mapping. Each nibble says which physical sample
3129 * should be fetched to get that sample.
3130 *
3131 * For example, 0x11111100 means there are only 2 samples stored and
3132 * the second sample covers 3/4 of the pixel. When reading samples 0
3133 * and 1, return physical sample 0 (determined by the first two 0s
3134 * in FMASK), otherwise return physical sample 1.
3135 *
3136 * The sample index should be adjusted as follows:
3137 * sample_index = (fmask >> (sample_index * 4)) & 0xF;
3138 */
3139 static LLVMValueRef adjust_sample_index_using_fmask(struct nir_to_llvm_context *ctx,
3140 LLVMValueRef coord_x, LLVMValueRef coord_y,
3141 LLVMValueRef coord_z,
3142 LLVMValueRef sample_index,
3143 LLVMValueRef fmask_desc_ptr)
3144 {
3145 LLVMValueRef fmask_load_address[4];
3146 LLVMValueRef res;
3147
3148 fmask_load_address[0] = coord_x;
3149 fmask_load_address[1] = coord_y;
3150 if (coord_z) {
3151 fmask_load_address[2] = coord_z;
3152 fmask_load_address[3] = LLVMGetUndef(ctx->i32);
3153 }
3154
3155 struct ac_image_args args = {0};
3156
3157 args.opcode = ac_image_load;
3158 args.da = coord_z ? true : false;
3159 args.resource = fmask_desc_ptr;
3160 args.dmask = 0xf;
3161 args.addr = ac_build_gather_values(&ctx->ac, fmask_load_address, coord_z ? 4 : 2);
3162
3163 res = ac_build_image_opcode(&ctx->ac, &args);
3164
3165 res = to_integer(&ctx->ac, res);
3166 LLVMValueRef four = LLVMConstInt(ctx->i32, 4, false);
3167 LLVMValueRef F = LLVMConstInt(ctx->i32, 0xf, false);
3168
3169 LLVMValueRef fmask = LLVMBuildExtractElement(ctx->builder,
3170 res,
3171 ctx->i32zero, "");
3172
3173 LLVMValueRef sample_index4 =
3174 LLVMBuildMul(ctx->builder, sample_index, four, "");
3175 LLVMValueRef shifted_fmask =
3176 LLVMBuildLShr(ctx->builder, fmask, sample_index4, "");
3177 LLVMValueRef final_sample =
3178 LLVMBuildAnd(ctx->builder, shifted_fmask, F, "");
3179
3180 /* Don't rewrite the sample index if WORD1.DATA_FORMAT of the FMASK
3181 * resource descriptor is 0 (invalid),
3182 */
3183 LLVMValueRef fmask_desc =
3184 LLVMBuildBitCast(ctx->builder, fmask_desc_ptr,
3185 ctx->v8i32, "");
3186
3187 LLVMValueRef fmask_word1 =
3188 LLVMBuildExtractElement(ctx->builder, fmask_desc,
3189 ctx->i32one, "");
3190
3191 LLVMValueRef word1_is_nonzero =
3192 LLVMBuildICmp(ctx->builder, LLVMIntNE,
3193 fmask_word1, ctx->i32zero, "");
3194
3195 /* Replace the MSAA sample index. */
3196 sample_index =
3197 LLVMBuildSelect(ctx->builder, word1_is_nonzero,
3198 final_sample, sample_index, "");
3199 return sample_index;
3200 }
3201
3202 static LLVMValueRef get_image_coords(struct nir_to_llvm_context *ctx,
3203 const nir_intrinsic_instr *instr)
3204 {
3205 const struct glsl_type *type = instr->variables[0]->var->type;
3206 if(instr->variables[0]->deref.child)
3207 type = instr->variables[0]->deref.child->type;
3208
3209 LLVMValueRef src0 = get_src(ctx, instr->src[0]);
3210 LLVMValueRef coords[4];
3211 LLVMValueRef masks[] = {
3212 LLVMConstInt(ctx->i32, 0, false), LLVMConstInt(ctx->i32, 1, false),
3213 LLVMConstInt(ctx->i32, 2, false), LLVMConstInt(ctx->i32, 3, false),
3214 };
3215 LLVMValueRef res;
3216 LLVMValueRef sample_index = llvm_extract_elem(ctx, get_src(ctx, instr->src[1]), 0);
3217
3218 int count;
3219 enum glsl_sampler_dim dim = glsl_get_sampler_dim(type);
3220 bool add_frag_pos = (dim == GLSL_SAMPLER_DIM_SUBPASS ||
3221 dim == GLSL_SAMPLER_DIM_SUBPASS_MS);
3222 bool is_ms = (dim == GLSL_SAMPLER_DIM_MS ||
3223 dim == GLSL_SAMPLER_DIM_SUBPASS_MS);
3224
3225 count = image_type_to_components_count(dim,
3226 glsl_sampler_type_is_array(type));
3227
3228 if (is_ms) {
3229 LLVMValueRef fmask_load_address[3];
3230 int chan;
3231
3232 fmask_load_address[0] = LLVMBuildExtractElement(ctx->builder, src0, masks[0], "");
3233 fmask_load_address[1] = LLVMBuildExtractElement(ctx->builder, src0, masks[1], "");
3234 if (glsl_sampler_type_is_array(type))
3235 fmask_load_address[2] = LLVMBuildExtractElement(ctx->builder, src0, masks[2], "");
3236 else
3237 fmask_load_address[2] = NULL;
3238 if (add_frag_pos) {
3239 for (chan = 0; chan < 2; ++chan)
3240 fmask_load_address[chan] = LLVMBuildAdd(ctx->builder, fmask_load_address[chan], LLVMBuildFPToUI(ctx->builder, ctx->frag_pos[chan], ctx->i32, ""), "");
3241 }
3242 sample_index = adjust_sample_index_using_fmask(ctx,
3243 fmask_load_address[0],
3244 fmask_load_address[1],
3245 fmask_load_address[2],
3246 sample_index,
3247 get_sampler_desc(ctx, instr->variables[0], DESC_FMASK));
3248 }
3249 if (count == 1) {
3250 if (instr->src[0].ssa->num_components)
3251 res = LLVMBuildExtractElement(ctx->builder, src0, masks[0], "");
3252 else
3253 res = src0;
3254 } else {
3255 int chan;
3256 if (is_ms)
3257 count--;
3258 for (chan = 0; chan < count; ++chan) {
3259 coords[chan] = LLVMBuildExtractElement(ctx->builder, src0, masks[chan], "");
3260 }
3261
3262 if (add_frag_pos) {
3263 for (chan = 0; chan < count; ++chan)
3264 coords[chan] = LLVMBuildAdd(ctx->builder, coords[chan], LLVMBuildFPToUI(ctx->builder, ctx->frag_pos[chan], ctx->i32, ""), "");
3265 }
3266 if (is_ms) {
3267 coords[count] = sample_index;
3268 count++;
3269 }
3270
3271 if (count == 3) {
3272 coords[3] = LLVMGetUndef(ctx->i32);
3273 count = 4;
3274 }
3275 res = ac_build_gather_values(&ctx->ac, coords, count);
3276 }
3277 return res;
3278 }
3279
3280 static LLVMValueRef visit_image_load(struct nir_to_llvm_context *ctx,
3281 const nir_intrinsic_instr *instr)
3282 {
3283 LLVMValueRef params[7];
3284 LLVMValueRef res;
3285 char intrinsic_name[64];
3286 const nir_variable *var = instr->variables[0]->var;
3287 const struct glsl_type *type = var->type;
3288 if(instr->variables[0]->deref.child)
3289 type = instr->variables[0]->deref.child->type;
3290
3291 type = glsl_without_array(type);
3292 if (glsl_get_sampler_dim(type) == GLSL_SAMPLER_DIM_BUF) {
3293 params[0] = get_sampler_desc(ctx, instr->variables[0], DESC_BUFFER);
3294 params[1] = LLVMBuildExtractElement(ctx->builder, get_src(ctx, instr->src[0]),
3295 LLVMConstInt(ctx->i32, 0, false), ""); /* vindex */
3296 params[2] = LLVMConstInt(ctx->i32, 0, false); /* voffset */
3297 params[3] = ctx->i1false; /* glc */
3298 params[4] = ctx->i1false; /* slc */
3299 res = ac_build_intrinsic(&ctx->ac, "llvm.amdgcn.buffer.load.format.v4f32", ctx->v4f32,
3300 params, 5, 0);
3301
3302 res = trim_vector(ctx, res, instr->dest.ssa.num_components);
3303 res = to_integer(&ctx->ac, res);
3304 } else {
3305 bool is_da = glsl_sampler_type_is_array(type) ||
3306 glsl_get_sampler_dim(type) == GLSL_SAMPLER_DIM_CUBE;
3307 LLVMValueRef da = is_da ? ctx->i1true : ctx->i1false;
3308 LLVMValueRef glc = ctx->i1false;
3309 LLVMValueRef slc = ctx->i1false;
3310
3311 params[0] = get_image_coords(ctx, instr);
3312 params[1] = get_sampler_desc(ctx, instr->variables[0], DESC_IMAGE);
3313 params[2] = LLVMConstInt(ctx->i32, 15, false); /* dmask */
3314 if (HAVE_LLVM <= 0x0309) {
3315 params[3] = ctx->i1false; /* r128 */
3316 params[4] = da;
3317 params[5] = glc;
3318 params[6] = slc;
3319 } else {
3320 LLVMValueRef lwe = ctx->i1false;
3321 params[3] = glc;
3322 params[4] = slc;
3323 params[5] = lwe;
3324 params[6] = da;
3325 }
3326
3327 ac_get_image_intr_name("llvm.amdgcn.image.load",
3328 ctx->v4f32, /* vdata */
3329 LLVMTypeOf(params[0]), /* coords */
3330 LLVMTypeOf(params[1]), /* rsrc */
3331 intrinsic_name, sizeof(intrinsic_name));
3332
3333 res = ac_build_intrinsic(&ctx->ac, intrinsic_name, ctx->v4f32,
3334 params, 7, AC_FUNC_ATTR_READONLY);
3335 }
3336 return to_integer(&ctx->ac, res);
3337 }
3338
3339 static void visit_image_store(struct nir_to_llvm_context *ctx,
3340 nir_intrinsic_instr *instr)
3341 {
3342 LLVMValueRef params[8];
3343 char intrinsic_name[64];
3344 const nir_variable *var = instr->variables[0]->var;
3345 const struct glsl_type *type = glsl_without_array(var->type);
3346
3347 if (ctx->stage == MESA_SHADER_FRAGMENT)
3348 ctx->shader_info->fs.writes_memory = true;
3349
3350 if (glsl_get_sampler_dim(type) == GLSL_SAMPLER_DIM_BUF) {
3351 params[0] = to_float(&ctx->ac, get_src(ctx, instr->src[2])); /* data */
3352 params[1] = get_sampler_desc(ctx, instr->variables[0], DESC_BUFFER);
3353 params[2] = LLVMBuildExtractElement(ctx->builder, get_src(ctx, instr->src[0]),
3354 LLVMConstInt(ctx->i32, 0, false), ""); /* vindex */
3355 params[3] = LLVMConstInt(ctx->i32, 0, false); /* voffset */
3356 params[4] = ctx->i1false; /* glc */
3357 params[5] = ctx->i1false; /* slc */
3358 ac_build_intrinsic(&ctx->ac, "llvm.amdgcn.buffer.store.format.v4f32", ctx->voidt,
3359 params, 6, 0);
3360 } else {
3361 bool is_da = glsl_sampler_type_is_array(type) ||
3362 glsl_get_sampler_dim(type) == GLSL_SAMPLER_DIM_CUBE;
3363 LLVMValueRef da = is_da ? ctx->i1true : ctx->i1false;
3364 LLVMValueRef glc = ctx->i1false;
3365 LLVMValueRef slc = ctx->i1false;
3366
3367 params[0] = to_float(&ctx->ac, get_src(ctx, instr->src[2]));
3368 params[1] = get_image_coords(ctx, instr); /* coords */
3369 params[2] = get_sampler_desc(ctx, instr->variables[0], DESC_IMAGE);
3370 params[3] = LLVMConstInt(ctx->i32, 15, false); /* dmask */
3371 if (HAVE_LLVM <= 0x0309) {
3372 params[4] = ctx->i1false; /* r128 */
3373 params[5] = da;
3374 params[6] = glc;
3375 params[7] = slc;
3376 } else {
3377 LLVMValueRef lwe = ctx->i1false;
3378 params[4] = glc;
3379 params[5] = slc;
3380 params[6] = lwe;
3381 params[7] = da;
3382 }
3383
3384 ac_get_image_intr_name("llvm.amdgcn.image.store",
3385 LLVMTypeOf(params[0]), /* vdata */
3386 LLVMTypeOf(params[1]), /* coords */
3387 LLVMTypeOf(params[2]), /* rsrc */
3388 intrinsic_name, sizeof(intrinsic_name));
3389
3390 ac_build_intrinsic(&ctx->ac, intrinsic_name, ctx->voidt,
3391 params, 8, 0);
3392 }
3393
3394 }
3395
3396 static LLVMValueRef visit_image_atomic(struct nir_to_llvm_context *ctx,
3397 const nir_intrinsic_instr *instr)
3398 {
3399 LLVMValueRef params[6];
3400 int param_count = 0;
3401 const nir_variable *var = instr->variables[0]->var;
3402
3403 const char *base_name = "llvm.amdgcn.image.atomic";
3404 const char *atomic_name;
3405 LLVMValueRef coords;
3406 char intrinsic_name[41], coords_type[8];
3407 const struct glsl_type *type = glsl_without_array(var->type);
3408
3409 if (ctx->stage == MESA_SHADER_FRAGMENT)
3410 ctx->shader_info->fs.writes_memory = true;
3411
3412 params[param_count++] = get_src(ctx, instr->src[2]);
3413 if (instr->intrinsic == nir_intrinsic_image_atomic_comp_swap)
3414 params[param_count++] = get_src(ctx, instr->src[3]);
3415
3416 if (glsl_get_sampler_dim(type) == GLSL_SAMPLER_DIM_BUF) {
3417 params[param_count++] = get_sampler_desc(ctx, instr->variables[0], DESC_BUFFER);
3418 coords = params[param_count++] = LLVMBuildExtractElement(ctx->builder, get_src(ctx, instr->src[0]),
3419 LLVMConstInt(ctx->i32, 0, false), ""); /* vindex */
3420 params[param_count++] = ctx->i32zero; /* voffset */
3421 params[param_count++] = ctx->i1false; /* glc */
3422 params[param_count++] = ctx->i1false; /* slc */
3423 } else {
3424 bool da = glsl_sampler_type_is_array(type) ||
3425 glsl_get_sampler_dim(type) == GLSL_SAMPLER_DIM_CUBE;
3426
3427 coords = params[param_count++] = get_image_coords(ctx, instr);
3428 params[param_count++] = get_sampler_desc(ctx, instr->variables[0], DESC_IMAGE);
3429 params[param_count++] = ctx->i1false; /* r128 */
3430 params[param_count++] = da ? ctx->i1true : ctx->i1false; /* da */
3431 params[param_count++] = ctx->i1false; /* slc */
3432 }
3433
3434 switch (instr->intrinsic) {
3435 case nir_intrinsic_image_atomic_add:
3436 atomic_name = "add";
3437 break;
3438 case nir_intrinsic_image_atomic_min:
3439 atomic_name = "smin";
3440 break;
3441 case nir_intrinsic_image_atomic_max:
3442 atomic_name = "smax";
3443 break;
3444 case nir_intrinsic_image_atomic_and:
3445 atomic_name = "and";
3446 break;
3447 case nir_intrinsic_image_atomic_or:
3448 atomic_name = "or";
3449 break;
3450 case nir_intrinsic_image_atomic_xor:
3451 atomic_name = "xor";
3452 break;
3453 case nir_intrinsic_image_atomic_exchange:
3454 atomic_name = "swap";
3455 break;
3456 case nir_intrinsic_image_atomic_comp_swap:
3457 atomic_name = "cmpswap";
3458 break;
3459 default:
3460 abort();
3461 }
3462 build_int_type_name(LLVMTypeOf(coords),
3463 coords_type, sizeof(coords_type));
3464
3465 MAYBE_UNUSED const int length = snprintf(intrinsic_name, sizeof(intrinsic_name),
3466 "%s.%s.%s", base_name, atomic_name, coords_type);
3467 assert(length < sizeof(intrinsic_name));
3468 return ac_build_intrinsic(&ctx->ac, intrinsic_name, ctx->i32, params, param_count, 0);
3469 }
3470
3471 static LLVMValueRef visit_image_size(struct nir_to_llvm_context *ctx,
3472 const nir_intrinsic_instr *instr)
3473 {
3474 LLVMValueRef res;
3475 const nir_variable *var = instr->variables[0]->var;
3476 const struct glsl_type *type = instr->variables[0]->var->type;
3477 bool da = glsl_sampler_type_is_array(var->type) ||
3478 glsl_get_sampler_dim(var->type) == GLSL_SAMPLER_DIM_CUBE;
3479 if(instr->variables[0]->deref.child)
3480 type = instr->variables[0]->deref.child->type;
3481
3482 if (glsl_get_sampler_dim(type) == GLSL_SAMPLER_DIM_BUF)
3483 return get_buffer_size(ctx, get_sampler_desc(ctx, instr->variables[0], DESC_BUFFER), true);
3484
3485 struct ac_image_args args = { 0 };
3486
3487 args.da = da;
3488 args.dmask = 0xf;
3489 args.resource = get_sampler_desc(ctx, instr->variables[0], DESC_IMAGE);
3490 args.opcode = ac_image_get_resinfo;
3491 args.addr = ctx->i32zero;
3492
3493 res = ac_build_image_opcode(&ctx->ac, &args);
3494
3495 if (glsl_get_sampler_dim(type) == GLSL_SAMPLER_DIM_CUBE &&
3496 glsl_sampler_type_is_array(type)) {
3497 LLVMValueRef two = LLVMConstInt(ctx->i32, 2, false);
3498 LLVMValueRef six = LLVMConstInt(ctx->i32, 6, false);
3499 LLVMValueRef z = LLVMBuildExtractElement(ctx->builder, res, two, "");
3500 z = LLVMBuildSDiv(ctx->builder, z, six, "");
3501 res = LLVMBuildInsertElement(ctx->builder, res, z, two, "");
3502 }
3503 return res;
3504 }
3505
3506 #define NOOP_WAITCNT 0xf7f
3507 #define LGKM_CNT 0x07f
3508 #define VM_CNT 0xf70
3509
3510 static void emit_waitcnt(struct nir_to_llvm_context *ctx,
3511 unsigned simm16)
3512 {
3513 LLVMValueRef args[1] = {
3514 LLVMConstInt(ctx->i32, simm16, false),
3515 };
3516 ac_build_intrinsic(&ctx->ac, "llvm.amdgcn.s.waitcnt",
3517 ctx->voidt, args, 1, 0);
3518 }
3519
3520 static void emit_barrier(struct nir_to_llvm_context *ctx)
3521 {
3522 /* SI only (thanks to a hw bug workaround):
3523 * The real barrier instruction isn’t needed, because an entire patch
3524 * always fits into a single wave.
3525 */
3526 if (ctx->options->chip_class == SI &&
3527 ctx->stage == MESA_SHADER_TESS_CTRL) {
3528 emit_waitcnt(ctx, LGKM_CNT & VM_CNT);
3529 return;
3530 }
3531 ac_build_intrinsic(&ctx->ac, "llvm.amdgcn.s.barrier",
3532 ctx->voidt, NULL, 0, AC_FUNC_ATTR_CONVERGENT);
3533 }
3534
3535 static void emit_discard_if(struct nir_to_llvm_context *ctx,
3536 const nir_intrinsic_instr *instr)
3537 {
3538 LLVMValueRef cond;
3539 ctx->shader_info->fs.can_discard = true;
3540
3541 cond = LLVMBuildICmp(ctx->builder, LLVMIntNE,
3542 get_src(ctx, instr->src[0]),
3543 ctx->i32zero, "");
3544
3545 cond = LLVMBuildSelect(ctx->builder, cond,
3546 LLVMConstReal(ctx->f32, -1.0f),
3547 ctx->f32zero, "");
3548 ac_build_kill(&ctx->ac, cond);
3549 }
3550
3551 static LLVMValueRef
3552 visit_load_local_invocation_index(struct nir_to_llvm_context *ctx)
3553 {
3554 LLVMValueRef result;
3555 LLVMValueRef thread_id = ac_get_thread_id(&ctx->ac);
3556 result = LLVMBuildAnd(ctx->builder, ctx->tg_size,
3557 LLVMConstInt(ctx->i32, 0xfc0, false), "");
3558
3559 return LLVMBuildAdd(ctx->builder, result, thread_id, "");
3560 }
3561
3562 static LLVMValueRef visit_var_atomic(struct nir_to_llvm_context *ctx,
3563 const nir_intrinsic_instr *instr)
3564 {
3565 LLVMValueRef ptr, result;
3566 int idx = instr->variables[0]->var->data.driver_location;
3567 LLVMValueRef src = get_src(ctx, instr->src[0]);
3568 ptr = get_shared_memory_ptr(ctx, idx, ctx->i32);
3569
3570 if (instr->intrinsic == nir_intrinsic_var_atomic_comp_swap) {
3571 LLVMValueRef src1 = get_src(ctx, instr->src[1]);
3572 result = LLVMBuildAtomicCmpXchg(ctx->builder,
3573 ptr, src, src1,
3574 LLVMAtomicOrderingSequentiallyConsistent,
3575 LLVMAtomicOrderingSequentiallyConsistent,
3576 false);
3577 } else {
3578 LLVMAtomicRMWBinOp op;
3579 switch (instr->intrinsic) {
3580 case nir_intrinsic_var_atomic_add:
3581 op = LLVMAtomicRMWBinOpAdd;
3582 break;
3583 case nir_intrinsic_var_atomic_umin:
3584 op = LLVMAtomicRMWBinOpUMin;
3585 break;
3586 case nir_intrinsic_var_atomic_umax:
3587 op = LLVMAtomicRMWBinOpUMax;
3588 break;
3589 case nir_intrinsic_var_atomic_imin:
3590 op = LLVMAtomicRMWBinOpMin;
3591 break;
3592 case nir_intrinsic_var_atomic_imax:
3593 op = LLVMAtomicRMWBinOpMax;
3594 break;
3595 case nir_intrinsic_var_atomic_and:
3596 op = LLVMAtomicRMWBinOpAnd;
3597 break;
3598 case nir_intrinsic_var_atomic_or:
3599 op = LLVMAtomicRMWBinOpOr;
3600 break;
3601 case nir_intrinsic_var_atomic_xor:
3602 op = LLVMAtomicRMWBinOpXor;
3603 break;
3604 case nir_intrinsic_var_atomic_exchange:
3605 op = LLVMAtomicRMWBinOpXchg;
3606 break;
3607 default:
3608 return NULL;
3609 }
3610
3611 result = LLVMBuildAtomicRMW(ctx->builder, op, ptr, to_integer(&ctx->ac, src),
3612 LLVMAtomicOrderingSequentiallyConsistent,
3613 false);
3614 }
3615 return result;
3616 }
3617
3618 #define INTERP_CENTER 0
3619 #define INTERP_CENTROID 1
3620 #define INTERP_SAMPLE 2
3621
3622 static LLVMValueRef lookup_interp_param(struct nir_to_llvm_context *ctx,
3623 enum glsl_interp_mode interp, unsigned location)
3624 {
3625 switch (interp) {
3626 case INTERP_MODE_FLAT:
3627 default:
3628 return NULL;
3629 case INTERP_MODE_SMOOTH:
3630 case INTERP_MODE_NONE:
3631 if (location == INTERP_CENTER)
3632 return ctx->persp_center;
3633 else if (location == INTERP_CENTROID)
3634 return ctx->persp_centroid;
3635 else if (location == INTERP_SAMPLE)
3636 return ctx->persp_sample;
3637 break;
3638 case INTERP_MODE_NOPERSPECTIVE:
3639 if (location == INTERP_CENTER)
3640 return ctx->linear_center;
3641 else if (location == INTERP_CENTROID)
3642 return ctx->linear_centroid;
3643 else if (location == INTERP_SAMPLE)
3644 return ctx->linear_sample;
3645 break;
3646 }
3647 return NULL;
3648 }
3649
3650 static LLVMValueRef load_sample_position(struct nir_to_llvm_context *ctx,
3651 LLVMValueRef sample_id)
3652 {
3653 LLVMValueRef result;
3654 LLVMValueRef ptr = ac_build_gep0(&ctx->ac, ctx->ring_offsets, LLVMConstInt(ctx->i32, RING_PS_SAMPLE_POSITIONS, false));
3655
3656 ptr = LLVMBuildBitCast(ctx->builder, ptr,
3657 const_array(ctx->v2f32, 64), "");
3658
3659 sample_id = LLVMBuildAdd(ctx->builder, sample_id, ctx->sample_pos_offset, "");
3660 result = ac_build_indexed_load(&ctx->ac, ptr, sample_id, false);
3661
3662 return result;
3663 }
3664
3665 static LLVMValueRef load_sample_pos(struct nir_to_llvm_context *ctx)
3666 {
3667 LLVMValueRef values[2];
3668
3669 values[0] = emit_ffract(&ctx->ac, ctx->frag_pos[0]);
3670 values[1] = emit_ffract(&ctx->ac, ctx->frag_pos[1]);
3671 return ac_build_gather_values(&ctx->ac, values, 2);
3672 }
3673
3674 static LLVMValueRef visit_interp(struct nir_to_llvm_context *ctx,
3675 const nir_intrinsic_instr *instr)
3676 {
3677 LLVMValueRef result[2];
3678 LLVMValueRef interp_param, attr_number;
3679 unsigned location;
3680 unsigned chan;
3681 LLVMValueRef src_c0, src_c1;
3682 LLVMValueRef src0;
3683 int input_index = instr->variables[0]->var->data.location - VARYING_SLOT_VAR0;
3684 switch (instr->intrinsic) {
3685 case nir_intrinsic_interp_var_at_centroid:
3686 location = INTERP_CENTROID;
3687 break;
3688 case nir_intrinsic_interp_var_at_sample:
3689 case nir_intrinsic_interp_var_at_offset:
3690 location = INTERP_CENTER;
3691 src0 = get_src(ctx, instr->src[0]);
3692 break;
3693 default:
3694 break;
3695 }
3696
3697 if (instr->intrinsic == nir_intrinsic_interp_var_at_offset) {
3698 src_c0 = to_float(&ctx->ac, LLVMBuildExtractElement(ctx->builder, src0, ctx->i32zero, ""));
3699 src_c1 = to_float(&ctx->ac, LLVMBuildExtractElement(ctx->builder, src0, ctx->i32one, ""));
3700 } else if (instr->intrinsic == nir_intrinsic_interp_var_at_sample) {
3701 LLVMValueRef sample_position;
3702 LLVMValueRef halfval = LLVMConstReal(ctx->f32, 0.5f);
3703
3704 /* fetch sample ID */
3705 sample_position = load_sample_position(ctx, src0);
3706
3707 src_c0 = LLVMBuildExtractElement(ctx->builder, sample_position, ctx->i32zero, "");
3708 src_c0 = LLVMBuildFSub(ctx->builder, src_c0, halfval, "");
3709 src_c1 = LLVMBuildExtractElement(ctx->builder, sample_position, ctx->i32one, "");
3710 src_c1 = LLVMBuildFSub(ctx->builder, src_c1, halfval, "");
3711 }
3712 interp_param = lookup_interp_param(ctx, instr->variables[0]->var->data.interpolation, location);
3713 attr_number = LLVMConstInt(ctx->i32, input_index, false);
3714
3715 if (location == INTERP_SAMPLE || location == INTERP_CENTER) {
3716 LLVMValueRef ij_out[2];
3717 LLVMValueRef ddxy_out = emit_ddxy_interp(ctx, interp_param);
3718
3719 /*
3720 * take the I then J parameters, and the DDX/Y for it, and
3721 * calculate the IJ inputs for the interpolator.
3722 * temp1 = ddx * offset/sample.x + I;
3723 * interp_param.I = ddy * offset/sample.y + temp1;
3724 * temp1 = ddx * offset/sample.x + J;
3725 * interp_param.J = ddy * offset/sample.y + temp1;
3726 */
3727 for (unsigned i = 0; i < 2; i++) {
3728 LLVMValueRef ix_ll = LLVMConstInt(ctx->i32, i, false);
3729 LLVMValueRef iy_ll = LLVMConstInt(ctx->i32, i + 2, false);
3730 LLVMValueRef ddx_el = LLVMBuildExtractElement(ctx->builder,
3731 ddxy_out, ix_ll, "");
3732 LLVMValueRef ddy_el = LLVMBuildExtractElement(ctx->builder,
3733 ddxy_out, iy_ll, "");
3734 LLVMValueRef interp_el = LLVMBuildExtractElement(ctx->builder,
3735 interp_param, ix_ll, "");
3736 LLVMValueRef temp1, temp2;
3737
3738 interp_el = LLVMBuildBitCast(ctx->builder, interp_el,
3739 ctx->f32, "");
3740
3741 temp1 = LLVMBuildFMul(ctx->builder, ddx_el, src_c0, "");
3742 temp1 = LLVMBuildFAdd(ctx->builder, temp1, interp_el, "");
3743
3744 temp2 = LLVMBuildFMul(ctx->builder, ddy_el, src_c1, "");
3745 temp2 = LLVMBuildFAdd(ctx->builder, temp2, temp1, "");
3746
3747 ij_out[i] = LLVMBuildBitCast(ctx->builder,
3748 temp2, ctx->i32, "");
3749 }
3750 interp_param = ac_build_gather_values(&ctx->ac, ij_out, 2);
3751
3752 }
3753
3754 for (chan = 0; chan < 2; chan++) {
3755 LLVMValueRef llvm_chan = LLVMConstInt(ctx->i32, chan, false);
3756
3757 if (interp_param) {
3758 interp_param = LLVMBuildBitCast(ctx->builder,
3759 interp_param, LLVMVectorType(ctx->f32, 2), "");
3760 LLVMValueRef i = LLVMBuildExtractElement(
3761 ctx->builder, interp_param, ctx->i32zero, "");
3762 LLVMValueRef j = LLVMBuildExtractElement(
3763 ctx->builder, interp_param, ctx->i32one, "");
3764
3765 result[chan] = ac_build_fs_interp(&ctx->ac,
3766 llvm_chan, attr_number,
3767 ctx->prim_mask, i, j);
3768 } else {
3769 result[chan] = ac_build_fs_interp_mov(&ctx->ac,
3770 LLVMConstInt(ctx->i32, 2, false),
3771 llvm_chan, attr_number,
3772 ctx->prim_mask);
3773 }
3774 }
3775 return ac_build_gather_values(&ctx->ac, result, 2);
3776 }
3777
3778 static void
3779 visit_emit_vertex(struct nir_to_llvm_context *ctx,
3780 const nir_intrinsic_instr *instr)
3781 {
3782 LLVMValueRef gs_next_vertex;
3783 LLVMValueRef can_emit, kill;
3784 int idx;
3785
3786 assert(instr->const_index[0] == 0);
3787 /* Write vertex attribute values to GSVS ring */
3788 gs_next_vertex = LLVMBuildLoad(ctx->builder,
3789 ctx->gs_next_vertex,
3790 "");
3791
3792 /* If this thread has already emitted the declared maximum number of
3793 * vertices, kill it: excessive vertex emissions are not supposed to
3794 * have any effect, and GS threads have no externally observable
3795 * effects other than emitting vertices.
3796 */
3797 can_emit = LLVMBuildICmp(ctx->builder, LLVMIntULT, gs_next_vertex,
3798 LLVMConstInt(ctx->i32, ctx->gs_max_out_vertices, false), "");
3799
3800 kill = LLVMBuildSelect(ctx->builder, can_emit,
3801 LLVMConstReal(ctx->f32, 1.0f),
3802 LLVMConstReal(ctx->f32, -1.0f), "");
3803 ac_build_kill(&ctx->ac, kill);
3804
3805 /* loop num outputs */
3806 idx = 0;
3807 for (unsigned i = 0; i < RADEON_LLVM_MAX_OUTPUTS; ++i) {
3808 LLVMValueRef *out_ptr = &ctx->outputs[i * 4];
3809 int length = 4;
3810 int slot = idx;
3811 int slot_inc = 1;
3812
3813 if (!(ctx->output_mask & (1ull << i)))
3814 continue;
3815
3816 if (i == VARYING_SLOT_CLIP_DIST0) {
3817 /* pack clip and cull into a single set of slots */
3818 length = ctx->num_output_clips + ctx->num_output_culls;
3819 if (length > 4)
3820 slot_inc = 2;
3821 }
3822 for (unsigned j = 0; j < length; j++) {
3823 LLVMValueRef out_val = LLVMBuildLoad(ctx->builder,
3824 out_ptr[j], "");
3825 LLVMValueRef voffset = LLVMConstInt(ctx->i32, (slot * 4 + j) * ctx->gs_max_out_vertices, false);
3826 voffset = LLVMBuildAdd(ctx->builder, voffset, gs_next_vertex, "");
3827 voffset = LLVMBuildMul(ctx->builder, voffset, LLVMConstInt(ctx->i32, 4, false), "");
3828
3829 out_val = LLVMBuildBitCast(ctx->builder, out_val, ctx->i32, "");
3830
3831 ac_build_buffer_store_dword(&ctx->ac, ctx->gsvs_ring,
3832 out_val, 1,
3833 voffset, ctx->gs2vs_offset, 0,
3834 1, 1, true, true);
3835 }
3836 idx += slot_inc;
3837 }
3838
3839 gs_next_vertex = LLVMBuildAdd(ctx->builder, gs_next_vertex,
3840 ctx->i32one, "");
3841 LLVMBuildStore(ctx->builder, gs_next_vertex, ctx->gs_next_vertex);
3842
3843 ac_build_sendmsg(&ctx->ac, AC_SENDMSG_GS_OP_EMIT | AC_SENDMSG_GS | (0 << 8), ctx->gs_wave_id);
3844 }
3845
3846 static void
3847 visit_end_primitive(struct nir_to_llvm_context *ctx,
3848 const nir_intrinsic_instr *instr)
3849 {
3850 ac_build_sendmsg(&ctx->ac, AC_SENDMSG_GS_OP_CUT | AC_SENDMSG_GS | (0 << 8), ctx->gs_wave_id);
3851 }
3852
3853 static LLVMValueRef
3854 visit_load_tess_coord(struct nir_to_llvm_context *ctx,
3855 const nir_intrinsic_instr *instr)
3856 {
3857 LLVMValueRef coord[4] = {
3858 ctx->tes_u,
3859 ctx->tes_v,
3860 ctx->f32zero,
3861 ctx->f32zero,
3862 };
3863
3864 if (ctx->tes_primitive_mode == GL_TRIANGLES)
3865 coord[2] = LLVMBuildFSub(ctx->builder, ctx->f32one,
3866 LLVMBuildFAdd(ctx->builder, coord[0], coord[1], ""), "");
3867
3868 LLVMValueRef result = ac_build_gather_values(&ctx->ac, coord, instr->num_components);
3869 return LLVMBuildBitCast(ctx->builder, result,
3870 get_def_type(ctx, &instr->dest.ssa), "");
3871 }
3872
3873 static void visit_intrinsic(struct nir_to_llvm_context *ctx,
3874 nir_intrinsic_instr *instr)
3875 {
3876 LLVMValueRef result = NULL;
3877
3878 switch (instr->intrinsic) {
3879 case nir_intrinsic_load_work_group_id: {
3880 result = ctx->workgroup_ids;
3881 break;
3882 }
3883 case nir_intrinsic_load_base_vertex: {
3884 result = ctx->base_vertex;
3885 break;
3886 }
3887 case nir_intrinsic_load_vertex_id_zero_base: {
3888 result = ctx->vertex_id;
3889 break;
3890 }
3891 case nir_intrinsic_load_local_invocation_id: {
3892 result = ctx->local_invocation_ids;
3893 break;
3894 }
3895 case nir_intrinsic_load_base_instance:
3896 result = ctx->start_instance;
3897 break;
3898 case nir_intrinsic_load_draw_id:
3899 result = ctx->draw_index;
3900 break;
3901 case nir_intrinsic_load_invocation_id:
3902 if (ctx->stage == MESA_SHADER_TESS_CTRL)
3903 result = unpack_param(ctx, ctx->tcs_rel_ids, 8, 5);
3904 else
3905 result = ctx->gs_invocation_id;
3906 break;
3907 case nir_intrinsic_load_primitive_id:
3908 if (ctx->stage == MESA_SHADER_GEOMETRY) {
3909 ctx->shader_info->gs.uses_prim_id = true;
3910 result = ctx->gs_prim_id;
3911 } else if (ctx->stage == MESA_SHADER_TESS_CTRL) {
3912 ctx->shader_info->tcs.uses_prim_id = true;
3913 result = ctx->tcs_patch_id;
3914 } else if (ctx->stage == MESA_SHADER_TESS_EVAL) {
3915 ctx->shader_info->tcs.uses_prim_id = true;
3916 result = ctx->tes_patch_id;
3917 } else
3918 fprintf(stderr, "Unknown primitive id intrinsic: %d", ctx->stage);
3919 break;
3920 case nir_intrinsic_load_sample_id:
3921 ctx->shader_info->fs.force_persample = true;
3922 result = unpack_param(ctx, ctx->ancillary, 8, 4);
3923 break;
3924 case nir_intrinsic_load_sample_pos:
3925 ctx->shader_info->fs.force_persample = true;
3926 result = load_sample_pos(ctx);
3927 break;
3928 case nir_intrinsic_load_sample_mask_in:
3929 result = ctx->sample_coverage;
3930 break;
3931 case nir_intrinsic_load_front_face:
3932 result = ctx->front_face;
3933 break;
3934 case nir_intrinsic_load_instance_id:
3935 result = ctx->instance_id;
3936 ctx->shader_info->vs.vgpr_comp_cnt = MAX2(3,
3937 ctx->shader_info->vs.vgpr_comp_cnt);
3938 break;
3939 case nir_intrinsic_load_num_work_groups:
3940 result = ctx->num_work_groups;
3941 break;
3942 case nir_intrinsic_load_local_invocation_index:
3943 result = visit_load_local_invocation_index(ctx);
3944 break;
3945 case nir_intrinsic_load_push_constant:
3946 result = visit_load_push_constant(ctx, instr);
3947 break;
3948 case nir_intrinsic_vulkan_resource_index:
3949 result = visit_vulkan_resource_index(ctx, instr);
3950 break;
3951 case nir_intrinsic_store_ssbo:
3952 visit_store_ssbo(ctx, instr);
3953 break;
3954 case nir_intrinsic_load_ssbo:
3955 result = visit_load_buffer(ctx, instr);
3956 break;
3957 case nir_intrinsic_ssbo_atomic_add:
3958 case nir_intrinsic_ssbo_atomic_imin:
3959 case nir_intrinsic_ssbo_atomic_umin:
3960 case nir_intrinsic_ssbo_atomic_imax:
3961 case nir_intrinsic_ssbo_atomic_umax:
3962 case nir_intrinsic_ssbo_atomic_and:
3963 case nir_intrinsic_ssbo_atomic_or:
3964 case nir_intrinsic_ssbo_atomic_xor:
3965 case nir_intrinsic_ssbo_atomic_exchange:
3966 case nir_intrinsic_ssbo_atomic_comp_swap:
3967 result = visit_atomic_ssbo(ctx, instr);
3968 break;
3969 case nir_intrinsic_load_ubo:
3970 result = visit_load_ubo_buffer(ctx, instr);
3971 break;
3972 case nir_intrinsic_get_buffer_size:
3973 result = visit_get_buffer_size(ctx, instr);
3974 break;
3975 case nir_intrinsic_load_var:
3976 result = visit_load_var(ctx, instr);
3977 break;
3978 case nir_intrinsic_store_var:
3979 visit_store_var(ctx, instr);
3980 break;
3981 case nir_intrinsic_image_load:
3982 result = visit_image_load(ctx, instr);
3983 break;
3984 case nir_intrinsic_image_store:
3985 visit_image_store(ctx, instr);
3986 break;
3987 case nir_intrinsic_image_atomic_add:
3988 case nir_intrinsic_image_atomic_min:
3989 case nir_intrinsic_image_atomic_max:
3990 case nir_intrinsic_image_atomic_and:
3991 case nir_intrinsic_image_atomic_or:
3992 case nir_intrinsic_image_atomic_xor:
3993 case nir_intrinsic_image_atomic_exchange:
3994 case nir_intrinsic_image_atomic_comp_swap:
3995 result = visit_image_atomic(ctx, instr);
3996 break;
3997 case nir_intrinsic_image_size:
3998 result = visit_image_size(ctx, instr);
3999 break;
4000 case nir_intrinsic_discard:
4001 ctx->shader_info->fs.can_discard = true;
4002 ac_build_intrinsic(&ctx->ac, "llvm.AMDGPU.kilp",
4003 ctx->voidt,
4004 NULL, 0, AC_FUNC_ATTR_LEGACY);
4005 break;
4006 case nir_intrinsic_discard_if:
4007 emit_discard_if(ctx, instr);
4008 break;
4009 case nir_intrinsic_memory_barrier:
4010 emit_waitcnt(ctx, VM_CNT);
4011 break;
4012 case nir_intrinsic_barrier:
4013 emit_barrier(ctx);
4014 break;
4015 case nir_intrinsic_var_atomic_add:
4016 case nir_intrinsic_var_atomic_imin:
4017 case nir_intrinsic_var_atomic_umin:
4018 case nir_intrinsic_var_atomic_imax:
4019 case nir_intrinsic_var_atomic_umax:
4020 case nir_intrinsic_var_atomic_and:
4021 case nir_intrinsic_var_atomic_or:
4022 case nir_intrinsic_var_atomic_xor:
4023 case nir_intrinsic_var_atomic_exchange:
4024 case nir_intrinsic_var_atomic_comp_swap:
4025 result = visit_var_atomic(ctx, instr);
4026 break;
4027 case nir_intrinsic_interp_var_at_centroid:
4028 case nir_intrinsic_interp_var_at_sample:
4029 case nir_intrinsic_interp_var_at_offset:
4030 result = visit_interp(ctx, instr);
4031 break;
4032 case nir_intrinsic_emit_vertex:
4033 visit_emit_vertex(ctx, instr);
4034 break;
4035 case nir_intrinsic_end_primitive:
4036 visit_end_primitive(ctx, instr);
4037 break;
4038 case nir_intrinsic_load_tess_coord:
4039 result = visit_load_tess_coord(ctx, instr);
4040 break;
4041 case nir_intrinsic_load_patch_vertices_in:
4042 result = LLVMConstInt(ctx->i32, ctx->options->key.tcs.input_vertices, false);
4043 break;
4044 default:
4045 fprintf(stderr, "Unknown intrinsic: ");
4046 nir_print_instr(&instr->instr, stderr);
4047 fprintf(stderr, "\n");
4048 break;
4049 }
4050 if (result) {
4051 _mesa_hash_table_insert(ctx->defs, &instr->dest.ssa, result);
4052 }
4053 }
4054
4055 static LLVMValueRef get_sampler_desc(struct nir_to_llvm_context *ctx,
4056 const nir_deref_var *deref,
4057 enum desc_type desc_type)
4058 {
4059 unsigned desc_set = deref->var->data.descriptor_set;
4060 LLVMValueRef list = ctx->descriptor_sets[desc_set];
4061 struct radv_descriptor_set_layout *layout = ctx->options->layout->set[desc_set].layout;
4062 struct radv_descriptor_set_binding_layout *binding = layout->binding + deref->var->data.binding;
4063 unsigned offset = binding->offset;
4064 unsigned stride = binding->size;
4065 unsigned type_size;
4066 LLVMBuilderRef builder = ctx->builder;
4067 LLVMTypeRef type;
4068 LLVMValueRef index = NULL;
4069 unsigned constant_index = 0;
4070
4071 assert(deref->var->data.binding < layout->binding_count);
4072
4073 switch (desc_type) {
4074 case DESC_IMAGE:
4075 type = ctx->v8i32;
4076 type_size = 32;
4077 break;
4078 case DESC_FMASK:
4079 type = ctx->v8i32;
4080 offset += 32;
4081 type_size = 32;
4082 break;
4083 case DESC_SAMPLER:
4084 type = ctx->v4i32;
4085 if (binding->type == VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER)
4086 offset += 64;
4087
4088 type_size = 16;
4089 break;
4090 case DESC_BUFFER:
4091 type = ctx->v4i32;
4092 type_size = 16;
4093 break;
4094 default:
4095 unreachable("invalid desc_type\n");
4096 }
4097
4098 if (deref->deref.child) {
4099 const nir_deref_array *child =
4100 (const nir_deref_array *)deref->deref.child;
4101
4102 assert(child->deref_array_type != nir_deref_array_type_wildcard);
4103 offset += child->base_offset * stride;
4104 if (child->deref_array_type == nir_deref_array_type_indirect) {
4105 index = get_src(ctx, child->indirect);
4106 }
4107
4108 constant_index = child->base_offset;
4109 }
4110 if (desc_type == DESC_SAMPLER && binding->immutable_samplers_offset &&
4111 (!index || binding->immutable_samplers_equal)) {
4112 if (binding->immutable_samplers_equal)
4113 constant_index = 0;
4114
4115 const uint32_t *samplers = radv_immutable_samplers(layout, binding);
4116
4117 LLVMValueRef constants[] = {
4118 LLVMConstInt(ctx->i32, samplers[constant_index * 4 + 0], 0),
4119 LLVMConstInt(ctx->i32, samplers[constant_index * 4 + 1], 0),
4120 LLVMConstInt(ctx->i32, samplers[constant_index * 4 + 2], 0),
4121 LLVMConstInt(ctx->i32, samplers[constant_index * 4 + 3], 0),
4122 };
4123 return ac_build_gather_values(&ctx->ac, constants, 4);
4124 }
4125
4126 assert(stride % type_size == 0);
4127
4128 if (!index)
4129 index = ctx->i32zero;
4130
4131 index = LLVMBuildMul(builder, index, LLVMConstInt(ctx->i32, stride / type_size, 0), "");
4132
4133 list = ac_build_gep0(&ctx->ac, list, LLVMConstInt(ctx->i32, offset, 0));
4134 list = LLVMBuildPointerCast(builder, list, const_array(type, 0), "");
4135
4136 return ac_build_indexed_load_const(&ctx->ac, list, index);
4137 }
4138
4139 static void set_tex_fetch_args(struct nir_to_llvm_context *ctx,
4140 struct ac_image_args *args,
4141 const nir_tex_instr *instr,
4142 nir_texop op,
4143 LLVMValueRef res_ptr, LLVMValueRef samp_ptr,
4144 LLVMValueRef *param, unsigned count,
4145 unsigned dmask)
4146 {
4147 unsigned is_rect = 0;
4148 bool da = instr->is_array || instr->sampler_dim == GLSL_SAMPLER_DIM_CUBE;
4149
4150 if (op == nir_texop_lod)
4151 da = false;
4152 /* Pad to power of two vector */
4153 while (count < util_next_power_of_two(count))
4154 param[count++] = LLVMGetUndef(ctx->i32);
4155
4156 if (count > 1)
4157 args->addr = ac_build_gather_values(&ctx->ac, param, count);
4158 else
4159 args->addr = param[0];
4160
4161 args->resource = res_ptr;
4162 args->sampler = samp_ptr;
4163
4164 if (instr->sampler_dim == GLSL_SAMPLER_DIM_BUF && op == nir_texop_txf) {
4165 args->addr = param[0];
4166 return;
4167 }
4168
4169 args->dmask = dmask;
4170 args->unorm = is_rect;
4171 args->da = da;
4172 }
4173
4174 /* Disable anisotropic filtering if BASE_LEVEL == LAST_LEVEL.
4175 *
4176 * SI-CI:
4177 * If BASE_LEVEL == LAST_LEVEL, the shader must disable anisotropic
4178 * filtering manually. The driver sets img7 to a mask clearing
4179 * MAX_ANISO_RATIO if BASE_LEVEL == LAST_LEVEL. The shader must do:
4180 * s_and_b32 samp0, samp0, img7
4181 *
4182 * VI:
4183 * The ANISO_OVERRIDE sampler field enables this fix in TA.
4184 */
4185 static LLVMValueRef sici_fix_sampler_aniso(struct nir_to_llvm_context *ctx,
4186 LLVMValueRef res, LLVMValueRef samp)
4187 {
4188 LLVMBuilderRef builder = ctx->builder;
4189 LLVMValueRef img7, samp0;
4190
4191 if (ctx->options->chip_class >= VI)
4192 return samp;
4193
4194 img7 = LLVMBuildExtractElement(builder, res,
4195 LLVMConstInt(ctx->i32, 7, 0), "");
4196 samp0 = LLVMBuildExtractElement(builder, samp,
4197 LLVMConstInt(ctx->i32, 0, 0), "");
4198 samp0 = LLVMBuildAnd(builder, samp0, img7, "");
4199 return LLVMBuildInsertElement(builder, samp, samp0,
4200 LLVMConstInt(ctx->i32, 0, 0), "");
4201 }
4202
4203 static void tex_fetch_ptrs(struct nir_to_llvm_context *ctx,
4204 nir_tex_instr *instr,
4205 LLVMValueRef *res_ptr, LLVMValueRef *samp_ptr,
4206 LLVMValueRef *fmask_ptr)
4207 {
4208 if (instr->sampler_dim == GLSL_SAMPLER_DIM_BUF)
4209 *res_ptr = get_sampler_desc(ctx, instr->texture, DESC_BUFFER);
4210 else
4211 *res_ptr = get_sampler_desc(ctx, instr->texture, DESC_IMAGE);
4212 if (samp_ptr) {
4213 if (instr->sampler)
4214 *samp_ptr = get_sampler_desc(ctx, instr->sampler, DESC_SAMPLER);
4215 else
4216 *samp_ptr = get_sampler_desc(ctx, instr->texture, DESC_SAMPLER);
4217 if (instr->sampler_dim < GLSL_SAMPLER_DIM_RECT)
4218 *samp_ptr = sici_fix_sampler_aniso(ctx, *res_ptr, *samp_ptr);
4219 }
4220 if (fmask_ptr && !instr->sampler && (instr->op == nir_texop_txf_ms ||
4221 instr->op == nir_texop_samples_identical))
4222 *fmask_ptr = get_sampler_desc(ctx, instr->texture, DESC_FMASK);
4223 }
4224
4225 static LLVMValueRef apply_round_slice(struct nir_to_llvm_context *ctx,
4226 LLVMValueRef coord)
4227 {
4228 coord = to_float(&ctx->ac, coord);
4229 coord = ac_build_intrinsic(&ctx->ac, "llvm.rint.f32", ctx->f32, &coord, 1, 0);
4230 coord = to_integer(&ctx->ac, coord);
4231 return coord;
4232 }
4233
4234 static void visit_tex(struct nir_to_llvm_context *ctx, nir_tex_instr *instr)
4235 {
4236 LLVMValueRef result = NULL;
4237 struct ac_image_args args = { 0 };
4238 unsigned dmask = 0xf;
4239 LLVMValueRef address[16];
4240 LLVMValueRef coords[5];
4241 LLVMValueRef coord = NULL, lod = NULL, comparator = NULL;
4242 LLVMValueRef bias = NULL, offsets = NULL;
4243 LLVMValueRef res_ptr, samp_ptr, fmask_ptr = NULL, sample_index = NULL;
4244 LLVMValueRef ddx = NULL, ddy = NULL;
4245 LLVMValueRef derivs[6];
4246 unsigned chan, count = 0;
4247 unsigned const_src = 0, num_deriv_comp = 0;
4248 bool lod_is_zero = false;
4249 tex_fetch_ptrs(ctx, instr, &res_ptr, &samp_ptr, &fmask_ptr);
4250
4251 for (unsigned i = 0; i < instr->num_srcs; i++) {
4252 switch (instr->src[i].src_type) {
4253 case nir_tex_src_coord:
4254 coord = get_src(ctx, instr->src[i].src);
4255 break;
4256 case nir_tex_src_projector:
4257 break;
4258 case nir_tex_src_comparator:
4259 comparator = get_src(ctx, instr->src[i].src);
4260 break;
4261 case nir_tex_src_offset:
4262 offsets = get_src(ctx, instr->src[i].src);
4263 const_src = i;
4264 break;
4265 case nir_tex_src_bias:
4266 bias = get_src(ctx, instr->src[i].src);
4267 break;
4268 case nir_tex_src_lod: {
4269 nir_const_value *val = nir_src_as_const_value(instr->src[i].src);
4270
4271 if (val && val->i32[0] == 0)
4272 lod_is_zero = true;
4273 lod = get_src(ctx, instr->src[i].src);
4274 break;
4275 }
4276 case nir_tex_src_ms_index:
4277 sample_index = get_src(ctx, instr->src[i].src);
4278 break;
4279 case nir_tex_src_ms_mcs:
4280 break;
4281 case nir_tex_src_ddx:
4282 ddx = get_src(ctx, instr->src[i].src);
4283 num_deriv_comp = instr->src[i].src.ssa->num_components;
4284 break;
4285 case nir_tex_src_ddy:
4286 ddy = get_src(ctx, instr->src[i].src);
4287 break;
4288 case nir_tex_src_texture_offset:
4289 case nir_tex_src_sampler_offset:
4290 case nir_tex_src_plane:
4291 default:
4292 break;
4293 }
4294 }
4295
4296 if (instr->op == nir_texop_txs && instr->sampler_dim == GLSL_SAMPLER_DIM_BUF) {
4297 result = get_buffer_size(ctx, res_ptr, true);
4298 goto write_result;
4299 }
4300
4301 if (instr->op == nir_texop_texture_samples) {
4302 LLVMValueRef res, samples, is_msaa;
4303 res = LLVMBuildBitCast(ctx->builder, res_ptr, ctx->v8i32, "");
4304 samples = LLVMBuildExtractElement(ctx->builder, res,
4305 LLVMConstInt(ctx->i32, 3, false), "");
4306 is_msaa = LLVMBuildLShr(ctx->builder, samples,
4307 LLVMConstInt(ctx->i32, 28, false), "");
4308 is_msaa = LLVMBuildAnd(ctx->builder, is_msaa,
4309 LLVMConstInt(ctx->i32, 0xe, false), "");
4310 is_msaa = LLVMBuildICmp(ctx->builder, LLVMIntEQ, is_msaa,
4311 LLVMConstInt(ctx->i32, 0xe, false), "");
4312
4313 samples = LLVMBuildLShr(ctx->builder, samples,
4314 LLVMConstInt(ctx->i32, 16, false), "");
4315 samples = LLVMBuildAnd(ctx->builder, samples,
4316 LLVMConstInt(ctx->i32, 0xf, false), "");
4317 samples = LLVMBuildShl(ctx->builder, ctx->i32one,
4318 samples, "");
4319 samples = LLVMBuildSelect(ctx->builder, is_msaa, samples,
4320 ctx->i32one, "");
4321 result = samples;
4322 goto write_result;
4323 }
4324
4325 if (coord)
4326 for (chan = 0; chan < instr->coord_components; chan++)
4327 coords[chan] = llvm_extract_elem(ctx, coord, chan);
4328
4329 if (offsets && instr->op != nir_texop_txf) {
4330 LLVMValueRef offset[3], pack;
4331 for (chan = 0; chan < 3; ++chan)
4332 offset[chan] = ctx->i32zero;
4333
4334 args.offset = true;
4335 for (chan = 0; chan < get_llvm_num_components(offsets); chan++) {
4336 offset[chan] = llvm_extract_elem(ctx, offsets, chan);
4337 offset[chan] = LLVMBuildAnd(ctx->builder, offset[chan],
4338 LLVMConstInt(ctx->i32, 0x3f, false), "");
4339 if (chan)
4340 offset[chan] = LLVMBuildShl(ctx->builder, offset[chan],
4341 LLVMConstInt(ctx->i32, chan * 8, false), "");
4342 }
4343 pack = LLVMBuildOr(ctx->builder, offset[0], offset[1], "");
4344 pack = LLVMBuildOr(ctx->builder, pack, offset[2], "");
4345 address[count++] = pack;
4346
4347 }
4348 /* pack LOD bias value */
4349 if (instr->op == nir_texop_txb && bias) {
4350 address[count++] = bias;
4351 }
4352
4353 /* Pack depth comparison value */
4354 if (instr->is_shadow && comparator) {
4355 address[count++] = llvm_extract_elem(ctx, comparator, 0);
4356 }
4357
4358 /* pack derivatives */
4359 if (ddx || ddy) {
4360 switch (instr->sampler_dim) {
4361 case GLSL_SAMPLER_DIM_3D:
4362 case GLSL_SAMPLER_DIM_CUBE:
4363 num_deriv_comp = 3;
4364 break;
4365 case GLSL_SAMPLER_DIM_2D:
4366 default:
4367 num_deriv_comp = 2;
4368 break;
4369 case GLSL_SAMPLER_DIM_1D:
4370 num_deriv_comp = 1;
4371 break;
4372 }
4373
4374 for (unsigned i = 0; i < num_deriv_comp; i++) {
4375 derivs[i] = to_float(&ctx->ac, llvm_extract_elem(ctx, ddx, i));
4376 derivs[num_deriv_comp + i] = to_float(&ctx->ac, llvm_extract_elem(ctx, ddy, i));
4377 }
4378 }
4379
4380 if (instr->sampler_dim == GLSL_SAMPLER_DIM_CUBE && coord) {
4381 if (instr->is_array && instr->op != nir_texop_lod)
4382 coords[3] = apply_round_slice(ctx, coords[3]);
4383 for (chan = 0; chan < instr->coord_components; chan++)
4384 coords[chan] = to_float(&ctx->ac, coords[chan]);
4385 if (instr->coord_components == 3)
4386 coords[3] = LLVMGetUndef(ctx->f32);
4387 ac_prepare_cube_coords(&ctx->ac,
4388 instr->op == nir_texop_txd, instr->is_array,
4389 coords, derivs);
4390 if (num_deriv_comp)
4391 num_deriv_comp--;
4392 }
4393
4394 if (ddx || ddy) {
4395 for (unsigned i = 0; i < num_deriv_comp * 2; i++)
4396 address[count++] = derivs[i];
4397 }
4398
4399 /* Pack texture coordinates */
4400 if (coord) {
4401 address[count++] = coords[0];
4402 if (instr->coord_components > 1) {
4403 if (instr->sampler_dim == GLSL_SAMPLER_DIM_1D && instr->is_array && instr->op != nir_texop_txf) {
4404 coords[1] = apply_round_slice(ctx, coords[1]);
4405 }
4406 address[count++] = coords[1];
4407 }
4408 if (instr->coord_components > 2) {
4409 /* This seems like a bit of a hack - but it passes Vulkan CTS with it */
4410 if (instr->sampler_dim != GLSL_SAMPLER_DIM_3D &&
4411 instr->sampler_dim != GLSL_SAMPLER_DIM_CUBE &&
4412 instr->op != nir_texop_txf) {
4413 coords[2] = apply_round_slice(ctx, coords[2]);
4414 }
4415 address[count++] = coords[2];
4416 }
4417 }
4418
4419 /* Pack LOD */
4420 if (lod && ((instr->op == nir_texop_txl && !lod_is_zero) ||
4421 instr->op == nir_texop_txf)) {
4422 address[count++] = lod;
4423 } else if (instr->op == nir_texop_txf_ms && sample_index) {
4424 address[count++] = sample_index;
4425 } else if(instr->op == nir_texop_txs) {
4426 count = 0;
4427 if (lod)
4428 address[count++] = lod;
4429 else
4430 address[count++] = ctx->i32zero;
4431 }
4432
4433 for (chan = 0; chan < count; chan++) {
4434 address[chan] = LLVMBuildBitCast(ctx->builder,
4435 address[chan], ctx->i32, "");
4436 }
4437
4438 if (instr->op == nir_texop_samples_identical) {
4439 LLVMValueRef txf_address[4];
4440 struct ac_image_args txf_args = { 0 };
4441 unsigned txf_count = count;
4442 memcpy(txf_address, address, sizeof(txf_address));
4443
4444 if (!instr->is_array)
4445 txf_address[2] = ctx->i32zero;
4446 txf_address[3] = ctx->i32zero;
4447
4448 set_tex_fetch_args(ctx, &txf_args, instr, nir_texop_txf,
4449 fmask_ptr, NULL,
4450 txf_address, txf_count, 0xf);
4451
4452 result = build_tex_intrinsic(ctx, instr, false, &txf_args);
4453
4454 result = LLVMBuildExtractElement(ctx->builder, result, ctx->i32zero, "");
4455 result = emit_int_cmp(&ctx->ac, LLVMIntEQ, result, ctx->i32zero);
4456 goto write_result;
4457 }
4458
4459 if (instr->sampler_dim == GLSL_SAMPLER_DIM_MS &&
4460 instr->op != nir_texop_txs) {
4461 unsigned sample_chan = instr->is_array ? 3 : 2;
4462 address[sample_chan] = adjust_sample_index_using_fmask(ctx,
4463 address[0],
4464 address[1],
4465 instr->is_array ? address[2] : NULL,
4466 address[sample_chan],
4467 fmask_ptr);
4468 }
4469
4470 if (offsets && instr->op == nir_texop_txf) {
4471 nir_const_value *const_offset =
4472 nir_src_as_const_value(instr->src[const_src].src);
4473 int num_offsets = instr->src[const_src].src.ssa->num_components;
4474 assert(const_offset);
4475 num_offsets = MIN2(num_offsets, instr->coord_components);
4476 if (num_offsets > 2)
4477 address[2] = LLVMBuildAdd(ctx->builder,
4478 address[2], LLVMConstInt(ctx->i32, const_offset->i32[2], false), "");
4479 if (num_offsets > 1)
4480 address[1] = LLVMBuildAdd(ctx->builder,
4481 address[1], LLVMConstInt(ctx->i32, const_offset->i32[1], false), "");
4482 address[0] = LLVMBuildAdd(ctx->builder,
4483 address[0], LLVMConstInt(ctx->i32, const_offset->i32[0], false), "");
4484
4485 }
4486
4487 /* TODO TG4 support */
4488 if (instr->op == nir_texop_tg4) {
4489 if (instr->is_shadow)
4490 dmask = 1;
4491 else
4492 dmask = 1 << instr->component;
4493 }
4494 set_tex_fetch_args(ctx, &args, instr, instr->op,
4495 res_ptr, samp_ptr, address, count, dmask);
4496
4497 result = build_tex_intrinsic(ctx, instr, lod_is_zero, &args);
4498
4499 if (instr->op == nir_texop_query_levels)
4500 result = LLVMBuildExtractElement(ctx->builder, result, LLVMConstInt(ctx->i32, 3, false), "");
4501 else if (instr->is_shadow && instr->op != nir_texop_txs && instr->op != nir_texop_lod && instr->op != nir_texop_tg4)
4502 result = LLVMBuildExtractElement(ctx->builder, result, ctx->i32zero, "");
4503 else if (instr->op == nir_texop_txs &&
4504 instr->sampler_dim == GLSL_SAMPLER_DIM_CUBE &&
4505 instr->is_array) {
4506 LLVMValueRef two = LLVMConstInt(ctx->i32, 2, false);
4507 LLVMValueRef six = LLVMConstInt(ctx->i32, 6, false);
4508 LLVMValueRef z = LLVMBuildExtractElement(ctx->builder, result, two, "");
4509 z = LLVMBuildSDiv(ctx->builder, z, six, "");
4510 result = LLVMBuildInsertElement(ctx->builder, result, z, two, "");
4511 } else if (instr->dest.ssa.num_components != 4)
4512 result = trim_vector(ctx, result, instr->dest.ssa.num_components);
4513
4514 write_result:
4515 if (result) {
4516 assert(instr->dest.is_ssa);
4517 result = to_integer(&ctx->ac, result);
4518 _mesa_hash_table_insert(ctx->defs, &instr->dest.ssa, result);
4519 }
4520 }
4521
4522
4523 static void visit_phi(struct nir_to_llvm_context *ctx, nir_phi_instr *instr)
4524 {
4525 LLVMTypeRef type = get_def_type(ctx, &instr->dest.ssa);
4526 LLVMValueRef result = LLVMBuildPhi(ctx->builder, type, "");
4527
4528 _mesa_hash_table_insert(ctx->defs, &instr->dest.ssa, result);
4529 _mesa_hash_table_insert(ctx->phis, instr, result);
4530 }
4531
4532 static void visit_post_phi(struct nir_to_llvm_context *ctx,
4533 nir_phi_instr *instr,
4534 LLVMValueRef llvm_phi)
4535 {
4536 nir_foreach_phi_src(src, instr) {
4537 LLVMBasicBlockRef block = get_block(ctx, src->pred);
4538 LLVMValueRef llvm_src = get_src(ctx, src->src);
4539
4540 LLVMAddIncoming(llvm_phi, &llvm_src, &block, 1);
4541 }
4542 }
4543
4544 static void phi_post_pass(struct nir_to_llvm_context *ctx)
4545 {
4546 struct hash_entry *entry;
4547 hash_table_foreach(ctx->phis, entry) {
4548 visit_post_phi(ctx, (nir_phi_instr*)entry->key,
4549 (LLVMValueRef)entry->data);
4550 }
4551 }
4552
4553
4554 static void visit_ssa_undef(struct nir_to_llvm_context *ctx,
4555 const nir_ssa_undef_instr *instr)
4556 {
4557 unsigned num_components = instr->def.num_components;
4558 LLVMValueRef undef;
4559
4560 if (num_components == 1)
4561 undef = LLVMGetUndef(ctx->i32);
4562 else {
4563 undef = LLVMGetUndef(LLVMVectorType(ctx->i32, num_components));
4564 }
4565 _mesa_hash_table_insert(ctx->defs, &instr->def, undef);
4566 }
4567
4568 static void visit_jump(struct nir_to_llvm_context *ctx,
4569 const nir_jump_instr *instr)
4570 {
4571 switch (instr->type) {
4572 case nir_jump_break:
4573 LLVMBuildBr(ctx->builder, ctx->break_block);
4574 LLVMClearInsertionPosition(ctx->builder);
4575 break;
4576 case nir_jump_continue:
4577 LLVMBuildBr(ctx->builder, ctx->continue_block);
4578 LLVMClearInsertionPosition(ctx->builder);
4579 break;
4580 default:
4581 fprintf(stderr, "Unknown NIR jump instr: ");
4582 nir_print_instr(&instr->instr, stderr);
4583 fprintf(stderr, "\n");
4584 abort();
4585 }
4586 }
4587
4588 static void visit_cf_list(struct nir_to_llvm_context *ctx,
4589 struct exec_list *list);
4590
4591 static void visit_block(struct nir_to_llvm_context *ctx, nir_block *block)
4592 {
4593 LLVMBasicBlockRef llvm_block = LLVMGetInsertBlock(ctx->builder);
4594 nir_foreach_instr(instr, block)
4595 {
4596 switch (instr->type) {
4597 case nir_instr_type_alu:
4598 visit_alu(ctx, nir_instr_as_alu(instr));
4599 break;
4600 case nir_instr_type_load_const:
4601 visit_load_const(ctx, nir_instr_as_load_const(instr));
4602 break;
4603 case nir_instr_type_intrinsic:
4604 visit_intrinsic(ctx, nir_instr_as_intrinsic(instr));
4605 break;
4606 case nir_instr_type_tex:
4607 visit_tex(ctx, nir_instr_as_tex(instr));
4608 break;
4609 case nir_instr_type_phi:
4610 visit_phi(ctx, nir_instr_as_phi(instr));
4611 break;
4612 case nir_instr_type_ssa_undef:
4613 visit_ssa_undef(ctx, nir_instr_as_ssa_undef(instr));
4614 break;
4615 case nir_instr_type_jump:
4616 visit_jump(ctx, nir_instr_as_jump(instr));
4617 break;
4618 default:
4619 fprintf(stderr, "Unknown NIR instr type: ");
4620 nir_print_instr(instr, stderr);
4621 fprintf(stderr, "\n");
4622 abort();
4623 }
4624 }
4625
4626 _mesa_hash_table_insert(ctx->defs, block, llvm_block);
4627 }
4628
4629 static void visit_if(struct nir_to_llvm_context *ctx, nir_if *if_stmt)
4630 {
4631 LLVMValueRef value = get_src(ctx, if_stmt->condition);
4632
4633 LLVMBasicBlockRef merge_block =
4634 LLVMAppendBasicBlockInContext(ctx->context, ctx->main_function, "");
4635 LLVMBasicBlockRef if_block =
4636 LLVMAppendBasicBlockInContext(ctx->context, ctx->main_function, "");
4637 LLVMBasicBlockRef else_block = merge_block;
4638 if (!exec_list_is_empty(&if_stmt->else_list))
4639 else_block = LLVMAppendBasicBlockInContext(
4640 ctx->context, ctx->main_function, "");
4641
4642 LLVMValueRef cond = LLVMBuildICmp(ctx->builder, LLVMIntNE, value,
4643 LLVMConstInt(ctx->i32, 0, false), "");
4644 LLVMBuildCondBr(ctx->builder, cond, if_block, else_block);
4645
4646 LLVMPositionBuilderAtEnd(ctx->builder, if_block);
4647 visit_cf_list(ctx, &if_stmt->then_list);
4648 if (LLVMGetInsertBlock(ctx->builder))
4649 LLVMBuildBr(ctx->builder, merge_block);
4650
4651 if (!exec_list_is_empty(&if_stmt->else_list)) {
4652 LLVMPositionBuilderAtEnd(ctx->builder, else_block);
4653 visit_cf_list(ctx, &if_stmt->else_list);
4654 if (LLVMGetInsertBlock(ctx->builder))
4655 LLVMBuildBr(ctx->builder, merge_block);
4656 }
4657
4658 LLVMPositionBuilderAtEnd(ctx->builder, merge_block);
4659 }
4660
4661 static void visit_loop(struct nir_to_llvm_context *ctx, nir_loop *loop)
4662 {
4663 LLVMBasicBlockRef continue_parent = ctx->continue_block;
4664 LLVMBasicBlockRef break_parent = ctx->break_block;
4665
4666 ctx->continue_block =
4667 LLVMAppendBasicBlockInContext(ctx->context, ctx->main_function, "");
4668 ctx->break_block =
4669 LLVMAppendBasicBlockInContext(ctx->context, ctx->main_function, "");
4670
4671 LLVMBuildBr(ctx->builder, ctx->continue_block);
4672 LLVMPositionBuilderAtEnd(ctx->builder, ctx->continue_block);
4673 visit_cf_list(ctx, &loop->body);
4674
4675 if (LLVMGetInsertBlock(ctx->builder))
4676 LLVMBuildBr(ctx->builder, ctx->continue_block);
4677 LLVMPositionBuilderAtEnd(ctx->builder, ctx->break_block);
4678
4679 ctx->continue_block = continue_parent;
4680 ctx->break_block = break_parent;
4681 }
4682
4683 static void visit_cf_list(struct nir_to_llvm_context *ctx,
4684 struct exec_list *list)
4685 {
4686 foreach_list_typed(nir_cf_node, node, node, list)
4687 {
4688 switch (node->type) {
4689 case nir_cf_node_block:
4690 visit_block(ctx, nir_cf_node_as_block(node));
4691 break;
4692
4693 case nir_cf_node_if:
4694 visit_if(ctx, nir_cf_node_as_if(node));
4695 break;
4696
4697 case nir_cf_node_loop:
4698 visit_loop(ctx, nir_cf_node_as_loop(node));
4699 break;
4700
4701 default:
4702 assert(0);
4703 }
4704 }
4705 }
4706
4707 static void
4708 handle_vs_input_decl(struct nir_to_llvm_context *ctx,
4709 struct nir_variable *variable)
4710 {
4711 LLVMValueRef t_list_ptr = ctx->vertex_buffers;
4712 LLVMValueRef t_offset;
4713 LLVMValueRef t_list;
4714 LLVMValueRef input;
4715 LLVMValueRef buffer_index;
4716 int index = variable->data.location - VERT_ATTRIB_GENERIC0;
4717 int idx = variable->data.location;
4718 unsigned attrib_count = glsl_count_attribute_slots(variable->type, true);
4719
4720 variable->data.driver_location = idx * 4;
4721
4722 if (ctx->options->key.vs.instance_rate_inputs & (1u << index)) {
4723 buffer_index = LLVMBuildAdd(ctx->builder, ctx->instance_id,
4724 ctx->start_instance, "");
4725 ctx->shader_info->vs.vgpr_comp_cnt = MAX2(3,
4726 ctx->shader_info->vs.vgpr_comp_cnt);
4727 } else
4728 buffer_index = LLVMBuildAdd(ctx->builder, ctx->vertex_id,
4729 ctx->base_vertex, "");
4730
4731 for (unsigned i = 0; i < attrib_count; ++i, ++idx) {
4732 t_offset = LLVMConstInt(ctx->i32, index + i, false);
4733
4734 t_list = ac_build_indexed_load_const(&ctx->ac, t_list_ptr, t_offset);
4735
4736 input = ac_build_buffer_load_format(&ctx->ac, t_list,
4737 buffer_index,
4738 LLVMConstInt(ctx->i32, 0, false),
4739 true);
4740
4741 for (unsigned chan = 0; chan < 4; chan++) {
4742 LLVMValueRef llvm_chan = LLVMConstInt(ctx->i32, chan, false);
4743 ctx->inputs[radeon_llvm_reg_index_soa(idx, chan)] =
4744 to_integer(&ctx->ac, LLVMBuildExtractElement(ctx->builder,
4745 input, llvm_chan, ""));
4746 }
4747 }
4748 }
4749
4750 static void interp_fs_input(struct nir_to_llvm_context *ctx,
4751 unsigned attr,
4752 LLVMValueRef interp_param,
4753 LLVMValueRef prim_mask,
4754 LLVMValueRef result[4])
4755 {
4756 LLVMValueRef attr_number;
4757 unsigned chan;
4758 LLVMValueRef i, j;
4759 bool interp = interp_param != NULL;
4760
4761 attr_number = LLVMConstInt(ctx->i32, attr, false);
4762
4763 /* fs.constant returns the param from the middle vertex, so it's not
4764 * really useful for flat shading. It's meant to be used for custom
4765 * interpolation (but the intrinsic can't fetch from the other two
4766 * vertices).
4767 *
4768 * Luckily, it doesn't matter, because we rely on the FLAT_SHADE state
4769 * to do the right thing. The only reason we use fs.constant is that
4770 * fs.interp cannot be used on integers, because they can be equal
4771 * to NaN.
4772 */
4773 if (interp) {
4774 interp_param = LLVMBuildBitCast(ctx->builder, interp_param,
4775 LLVMVectorType(ctx->f32, 2), "");
4776
4777 i = LLVMBuildExtractElement(ctx->builder, interp_param,
4778 ctx->i32zero, "");
4779 j = LLVMBuildExtractElement(ctx->builder, interp_param,
4780 ctx->i32one, "");
4781 }
4782
4783 for (chan = 0; chan < 4; chan++) {
4784 LLVMValueRef llvm_chan = LLVMConstInt(ctx->i32, chan, false);
4785
4786 if (interp) {
4787 result[chan] = ac_build_fs_interp(&ctx->ac,
4788 llvm_chan,
4789 attr_number,
4790 prim_mask, i, j);
4791 } else {
4792 result[chan] = ac_build_fs_interp_mov(&ctx->ac,
4793 LLVMConstInt(ctx->i32, 2, false),
4794 llvm_chan,
4795 attr_number,
4796 prim_mask);
4797 }
4798 }
4799 }
4800
4801 static void
4802 handle_fs_input_decl(struct nir_to_llvm_context *ctx,
4803 struct nir_variable *variable)
4804 {
4805 int idx = variable->data.location;
4806 unsigned attrib_count = glsl_count_attribute_slots(variable->type, false);
4807 LLVMValueRef interp;
4808
4809 variable->data.driver_location = idx * 4;
4810 ctx->input_mask |= ((1ull << attrib_count) - 1) << variable->data.location;
4811
4812 if (glsl_get_base_type(glsl_without_array(variable->type)) == GLSL_TYPE_FLOAT) {
4813 unsigned interp_type;
4814 if (variable->data.sample) {
4815 interp_type = INTERP_SAMPLE;
4816 ctx->shader_info->fs.force_persample = true;
4817 } else if (variable->data.centroid)
4818 interp_type = INTERP_CENTROID;
4819 else
4820 interp_type = INTERP_CENTER;
4821
4822 interp = lookup_interp_param(ctx, variable->data.interpolation, interp_type);
4823 } else
4824 interp = NULL;
4825
4826 for (unsigned i = 0; i < attrib_count; ++i)
4827 ctx->inputs[radeon_llvm_reg_index_soa(idx + i, 0)] = interp;
4828
4829 }
4830
4831 static void
4832 handle_shader_input_decl(struct nir_to_llvm_context *ctx,
4833 struct nir_variable *variable)
4834 {
4835 switch (ctx->stage) {
4836 case MESA_SHADER_VERTEX:
4837 handle_vs_input_decl(ctx, variable);
4838 break;
4839 case MESA_SHADER_FRAGMENT:
4840 handle_fs_input_decl(ctx, variable);
4841 break;
4842 default:
4843 break;
4844 }
4845
4846 }
4847
4848 static void
4849 handle_fs_inputs_pre(struct nir_to_llvm_context *ctx,
4850 struct nir_shader *nir)
4851 {
4852 unsigned index = 0;
4853 for (unsigned i = 0; i < RADEON_LLVM_MAX_INPUTS; ++i) {
4854 LLVMValueRef interp_param;
4855 LLVMValueRef *inputs = ctx->inputs +radeon_llvm_reg_index_soa(i, 0);
4856
4857 if (!(ctx->input_mask & (1ull << i)))
4858 continue;
4859
4860 if (i >= VARYING_SLOT_VAR0 || i == VARYING_SLOT_PNTC ||
4861 i == VARYING_SLOT_PRIMITIVE_ID || i == VARYING_SLOT_LAYER) {
4862 interp_param = *inputs;
4863 interp_fs_input(ctx, index, interp_param, ctx->prim_mask,
4864 inputs);
4865
4866 if (!interp_param)
4867 ctx->shader_info->fs.flat_shaded_mask |= 1u << index;
4868 ++index;
4869 } else if (i == VARYING_SLOT_POS) {
4870 for(int i = 0; i < 3; ++i)
4871 inputs[i] = ctx->frag_pos[i];
4872
4873 inputs[3] = ac_build_fdiv(&ctx->ac, ctx->f32one, ctx->frag_pos[3]);
4874 }
4875 }
4876 ctx->shader_info->fs.num_interp = index;
4877 if (ctx->input_mask & (1 << VARYING_SLOT_PNTC))
4878 ctx->shader_info->fs.has_pcoord = true;
4879 if (ctx->input_mask & (1 << VARYING_SLOT_PRIMITIVE_ID))
4880 ctx->shader_info->fs.prim_id_input = true;
4881 if (ctx->input_mask & (1 << VARYING_SLOT_LAYER))
4882 ctx->shader_info->fs.layer_input = true;
4883 ctx->shader_info->fs.input_mask = ctx->input_mask >> VARYING_SLOT_VAR0;
4884 }
4885
4886 static LLVMValueRef
4887 ac_build_alloca(struct nir_to_llvm_context *ctx,
4888 LLVMTypeRef type,
4889 const char *name)
4890 {
4891 LLVMBuilderRef builder = ctx->builder;
4892 LLVMBasicBlockRef current_block = LLVMGetInsertBlock(builder);
4893 LLVMValueRef function = LLVMGetBasicBlockParent(current_block);
4894 LLVMBasicBlockRef first_block = LLVMGetEntryBasicBlock(function);
4895 LLVMValueRef first_instr = LLVMGetFirstInstruction(first_block);
4896 LLVMBuilderRef first_builder = LLVMCreateBuilderInContext(ctx->context);
4897 LLVMValueRef res;
4898
4899 if (first_instr) {
4900 LLVMPositionBuilderBefore(first_builder, first_instr);
4901 } else {
4902 LLVMPositionBuilderAtEnd(first_builder, first_block);
4903 }
4904
4905 res = LLVMBuildAlloca(first_builder, type, name);
4906 LLVMBuildStore(builder, LLVMConstNull(type), res);
4907
4908 LLVMDisposeBuilder(first_builder);
4909
4910 return res;
4911 }
4912
4913 static LLVMValueRef si_build_alloca_undef(struct nir_to_llvm_context *ctx,
4914 LLVMTypeRef type,
4915 const char *name)
4916 {
4917 LLVMValueRef ptr = ac_build_alloca(ctx, type, name);
4918 LLVMBuildStore(ctx->builder, LLVMGetUndef(type), ptr);
4919 return ptr;
4920 }
4921
4922 static void
4923 handle_shader_output_decl(struct nir_to_llvm_context *ctx,
4924 struct nir_variable *variable)
4925 {
4926 int idx = variable->data.location + variable->data.index;
4927 unsigned attrib_count = glsl_count_attribute_slots(variable->type, false);
4928 uint64_t mask_attribs;
4929 variable->data.driver_location = idx * 4;
4930
4931 /* tess ctrl has it's own load/store paths for outputs */
4932 if (ctx->stage == MESA_SHADER_TESS_CTRL)
4933 return;
4934
4935 mask_attribs = ((1ull << attrib_count) - 1) << idx;
4936 if (ctx->stage == MESA_SHADER_VERTEX ||
4937 ctx->stage == MESA_SHADER_TESS_EVAL ||
4938 ctx->stage == MESA_SHADER_GEOMETRY) {
4939 if (idx == VARYING_SLOT_CLIP_DIST0) {
4940 int length = ctx->num_output_clips + ctx->num_output_culls;
4941 if (ctx->stage == MESA_SHADER_VERTEX) {
4942 ctx->shader_info->vs.outinfo.clip_dist_mask = (1 << ctx->num_output_clips) - 1;
4943 ctx->shader_info->vs.outinfo.cull_dist_mask = (1 << ctx->num_output_culls) - 1;
4944 }
4945 if (ctx->stage == MESA_SHADER_TESS_EVAL) {
4946 ctx->shader_info->tes.outinfo.clip_dist_mask = (1 << ctx->num_output_clips) - 1;
4947 ctx->shader_info->tes.outinfo.cull_dist_mask = (1 << ctx->num_output_culls) - 1;
4948 }
4949
4950 if (length > 4)
4951 attrib_count = 2;
4952 else
4953 attrib_count = 1;
4954 mask_attribs = 1ull << idx;
4955 }
4956 }
4957
4958 for (unsigned i = 0; i < attrib_count; ++i) {
4959 for (unsigned chan = 0; chan < 4; chan++) {
4960 ctx->outputs[radeon_llvm_reg_index_soa(idx + i, chan)] =
4961 si_build_alloca_undef(ctx, ctx->f32, "");
4962 }
4963 }
4964 ctx->output_mask |= mask_attribs;
4965 }
4966
4967 static void
4968 setup_locals(struct nir_to_llvm_context *ctx,
4969 struct nir_function *func)
4970 {
4971 int i, j;
4972 ctx->num_locals = 0;
4973 nir_foreach_variable(variable, &func->impl->locals) {
4974 unsigned attrib_count = glsl_count_attribute_slots(variable->type, false);
4975 variable->data.driver_location = ctx->num_locals * 4;
4976 ctx->num_locals += attrib_count;
4977 }
4978 ctx->locals = malloc(4 * ctx->num_locals * sizeof(LLVMValueRef));
4979 if (!ctx->locals)
4980 return;
4981
4982 for (i = 0; i < ctx->num_locals; i++) {
4983 for (j = 0; j < 4; j++) {
4984 ctx->locals[i * 4 + j] =
4985 si_build_alloca_undef(ctx, ctx->f32, "temp");
4986 }
4987 }
4988 }
4989
4990 static LLVMValueRef
4991 emit_float_saturate(struct ac_llvm_context *ctx, LLVMValueRef v, float lo, float hi)
4992 {
4993 v = to_float(ctx, v);
4994 v = emit_intrin_2f_param(ctx, "llvm.maxnum.f32", ctx->f32, v, LLVMConstReal(ctx->f32, lo));
4995 return emit_intrin_2f_param(ctx, "llvm.minnum.f32", ctx->f32, v, LLVMConstReal(ctx->f32, hi));
4996 }
4997
4998
4999 static LLVMValueRef emit_pack_int16(struct nir_to_llvm_context *ctx,
5000 LLVMValueRef src0, LLVMValueRef src1)
5001 {
5002 LLVMValueRef const16 = LLVMConstInt(ctx->i32, 16, false);
5003 LLVMValueRef comp[2];
5004
5005 comp[0] = LLVMBuildAnd(ctx->builder, src0, LLVMConstInt(ctx-> i32, 65535, 0), "");
5006 comp[1] = LLVMBuildAnd(ctx->builder, src1, LLVMConstInt(ctx-> i32, 65535, 0), "");
5007 comp[1] = LLVMBuildShl(ctx->builder, comp[1], const16, "");
5008 return LLVMBuildOr(ctx->builder, comp[0], comp[1], "");
5009 }
5010
5011 /* Initialize arguments for the shader export intrinsic */
5012 static void
5013 si_llvm_init_export_args(struct nir_to_llvm_context *ctx,
5014 LLVMValueRef *values,
5015 unsigned target,
5016 struct ac_export_args *args)
5017 {
5018 /* Default is 0xf. Adjusted below depending on the format. */
5019 args->enabled_channels = 0xf;
5020
5021 /* Specify whether the EXEC mask represents the valid mask */
5022 args->valid_mask = 0;
5023
5024 /* Specify whether this is the last export */
5025 args->done = 0;
5026
5027 /* Specify the target we are exporting */
5028 args->target = target;
5029
5030 args->compr = false;
5031 args->out[0] = LLVMGetUndef(ctx->f32);
5032 args->out[1] = LLVMGetUndef(ctx->f32);
5033 args->out[2] = LLVMGetUndef(ctx->f32);
5034 args->out[3] = LLVMGetUndef(ctx->f32);
5035
5036 if (!values)
5037 return;
5038
5039 if (ctx->stage == MESA_SHADER_FRAGMENT && target >= V_008DFC_SQ_EXP_MRT) {
5040 LLVMValueRef val[4];
5041 unsigned index = target - V_008DFC_SQ_EXP_MRT;
5042 unsigned col_format = (ctx->options->key.fs.col_format >> (4 * index)) & 0xf;
5043 bool is_int8 = (ctx->options->key.fs.is_int8 >> index) & 1;
5044
5045 switch(col_format) {
5046 case V_028714_SPI_SHADER_ZERO:
5047 args->enabled_channels = 0; /* writemask */
5048 args->target = V_008DFC_SQ_EXP_NULL;
5049 break;
5050
5051 case V_028714_SPI_SHADER_32_R:
5052 args->enabled_channels = 1;
5053 args->out[0] = values[0];
5054 break;
5055
5056 case V_028714_SPI_SHADER_32_GR:
5057 args->enabled_channels = 0x3;
5058 args->out[0] = values[0];
5059 args->out[1] = values[1];
5060 break;
5061
5062 case V_028714_SPI_SHADER_32_AR:
5063 args->enabled_channels = 0x9;
5064 args->out[0] = values[0];
5065 args->out[3] = values[3];
5066 break;
5067
5068 case V_028714_SPI_SHADER_FP16_ABGR:
5069 args->compr = 1;
5070
5071 for (unsigned chan = 0; chan < 2; chan++) {
5072 LLVMValueRef pack_args[2] = {
5073 values[2 * chan],
5074 values[2 * chan + 1]
5075 };
5076 LLVMValueRef packed;
5077
5078 packed = ac_build_cvt_pkrtz_f16(&ctx->ac, pack_args);
5079 args->out[chan] = packed;
5080 }
5081 break;
5082
5083 case V_028714_SPI_SHADER_UNORM16_ABGR:
5084 for (unsigned chan = 0; chan < 4; chan++) {
5085 val[chan] = ac_build_clamp(&ctx->ac, values[chan]);
5086 val[chan] = LLVMBuildFMul(ctx->builder, val[chan],
5087 LLVMConstReal(ctx->f32, 65535), "");
5088 val[chan] = LLVMBuildFAdd(ctx->builder, val[chan],
5089 LLVMConstReal(ctx->f32, 0.5), "");
5090 val[chan] = LLVMBuildFPToUI(ctx->builder, val[chan],
5091 ctx->i32, "");
5092 }
5093
5094 args->compr = 1;
5095 args->out[0] = emit_pack_int16(ctx, val[0], val[1]);
5096 args->out[1] = emit_pack_int16(ctx, val[2], val[3]);
5097 break;
5098
5099 case V_028714_SPI_SHADER_SNORM16_ABGR:
5100 for (unsigned chan = 0; chan < 4; chan++) {
5101 val[chan] = emit_float_saturate(&ctx->ac, values[chan], -1, 1);
5102 val[chan] = LLVMBuildFMul(ctx->builder, val[chan],
5103 LLVMConstReal(ctx->f32, 32767), "");
5104
5105 /* If positive, add 0.5, else add -0.5. */
5106 val[chan] = LLVMBuildFAdd(ctx->builder, val[chan],
5107 LLVMBuildSelect(ctx->builder,
5108 LLVMBuildFCmp(ctx->builder, LLVMRealOGE,
5109 val[chan], ctx->f32zero, ""),
5110 LLVMConstReal(ctx->f32, 0.5),
5111 LLVMConstReal(ctx->f32, -0.5), ""), "");
5112 val[chan] = LLVMBuildFPToSI(ctx->builder, val[chan], ctx->i32, "");
5113 }
5114
5115 args->compr = 1;
5116 args->out[0] = emit_pack_int16(ctx, val[0], val[1]);
5117 args->out[1] = emit_pack_int16(ctx, val[2], val[3]);
5118 break;
5119
5120 case V_028714_SPI_SHADER_UINT16_ABGR: {
5121 LLVMValueRef max = LLVMConstInt(ctx->i32, is_int8 ? 255 : 65535, 0);
5122
5123 for (unsigned chan = 0; chan < 4; chan++) {
5124 val[chan] = to_integer(&ctx->ac, values[chan]);
5125 val[chan] = emit_minmax_int(&ctx->ac, LLVMIntULT, val[chan], max);
5126 }
5127
5128 args->compr = 1;
5129 args->out[0] = emit_pack_int16(ctx, val[0], val[1]);
5130 args->out[1] = emit_pack_int16(ctx, val[2], val[3]);
5131 break;
5132 }
5133
5134 case V_028714_SPI_SHADER_SINT16_ABGR: {
5135 LLVMValueRef max = LLVMConstInt(ctx->i32, is_int8 ? 127 : 32767, 0);
5136 LLVMValueRef min = LLVMConstInt(ctx->i32, is_int8 ? -128 : -32768, 0);
5137
5138 /* Clamp. */
5139 for (unsigned chan = 0; chan < 4; chan++) {
5140 val[chan] = to_integer(&ctx->ac, values[chan]);
5141 val[chan] = emit_minmax_int(&ctx->ac, LLVMIntSLT, val[chan], max);
5142 val[chan] = emit_minmax_int(&ctx->ac, LLVMIntSGT, val[chan], min);
5143 }
5144
5145 args->compr = 1;
5146 args->out[0] = emit_pack_int16(ctx, val[0], val[1]);
5147 args->out[1] = emit_pack_int16(ctx, val[2], val[3]);
5148 break;
5149 }
5150
5151 default:
5152 case V_028714_SPI_SHADER_32_ABGR:
5153 memcpy(&args->out[0], values, sizeof(values[0]) * 4);
5154 break;
5155 }
5156 } else
5157 memcpy(&args->out[0], values, sizeof(values[0]) * 4);
5158
5159 for (unsigned i = 0; i < 4; ++i)
5160 args->out[i] = to_float(&ctx->ac, args->out[i]);
5161 }
5162
5163 static void
5164 handle_vs_outputs_post(struct nir_to_llvm_context *ctx,
5165 bool export_prim_id,
5166 struct ac_vs_output_info *outinfo)
5167 {
5168 uint32_t param_count = 0;
5169 unsigned target;
5170 unsigned pos_idx, num_pos_exports = 0;
5171 struct ac_export_args args, pos_args[4] = {};
5172 LLVMValueRef psize_value = NULL, layer_value = NULL, viewport_index_value = NULL;
5173 int i;
5174
5175 memset(outinfo->vs_output_param_offset, AC_EXP_PARAM_UNDEFINED,
5176 sizeof(outinfo->vs_output_param_offset));
5177
5178 if (ctx->output_mask & (1ull << VARYING_SLOT_CLIP_DIST0)) {
5179 LLVMValueRef slots[8];
5180 unsigned j;
5181
5182 if (outinfo->cull_dist_mask)
5183 outinfo->cull_dist_mask <<= ctx->num_output_clips;
5184
5185 i = VARYING_SLOT_CLIP_DIST0;
5186 for (j = 0; j < ctx->num_output_clips + ctx->num_output_culls; j++)
5187 slots[j] = to_float(&ctx->ac, LLVMBuildLoad(ctx->builder,
5188 ctx->outputs[radeon_llvm_reg_index_soa(i, j)], ""));
5189
5190 for (i = ctx->num_output_clips + ctx->num_output_culls; i < 8; i++)
5191 slots[i] = LLVMGetUndef(ctx->f32);
5192
5193 if (ctx->num_output_clips + ctx->num_output_culls > 4) {
5194 target = V_008DFC_SQ_EXP_POS + 3;
5195 si_llvm_init_export_args(ctx, &slots[4], target, &args);
5196 memcpy(&pos_args[target - V_008DFC_SQ_EXP_POS],
5197 &args, sizeof(args));
5198 }
5199
5200 target = V_008DFC_SQ_EXP_POS + 2;
5201 si_llvm_init_export_args(ctx, &slots[0], target, &args);
5202 memcpy(&pos_args[target - V_008DFC_SQ_EXP_POS],
5203 &args, sizeof(args));
5204
5205 }
5206
5207 for (unsigned i = 0; i < RADEON_LLVM_MAX_OUTPUTS; ++i) {
5208 LLVMValueRef values[4];
5209 if (!(ctx->output_mask & (1ull << i)))
5210 continue;
5211
5212 for (unsigned j = 0; j < 4; j++)
5213 values[j] = to_float(&ctx->ac, LLVMBuildLoad(ctx->builder,
5214 ctx->outputs[radeon_llvm_reg_index_soa(i, j)], ""));
5215
5216 if (i == VARYING_SLOT_POS) {
5217 target = V_008DFC_SQ_EXP_POS;
5218 } else if (i == VARYING_SLOT_CLIP_DIST0) {
5219 continue;
5220 } else if (i == VARYING_SLOT_PSIZ) {
5221 outinfo->writes_pointsize = true;
5222 psize_value = values[0];
5223 continue;
5224 } else if (i == VARYING_SLOT_LAYER) {
5225 outinfo->writes_layer = true;
5226 layer_value = values[0];
5227 target = V_008DFC_SQ_EXP_PARAM + param_count;
5228 outinfo->vs_output_param_offset[VARYING_SLOT_LAYER] = param_count;
5229 param_count++;
5230 } else if (i == VARYING_SLOT_VIEWPORT) {
5231 outinfo->writes_viewport_index = true;
5232 viewport_index_value = values[0];
5233 continue;
5234 } else if (i == VARYING_SLOT_PRIMITIVE_ID) {
5235 target = V_008DFC_SQ_EXP_PARAM + param_count;
5236 outinfo->vs_output_param_offset[VARYING_SLOT_PRIMITIVE_ID] = param_count;
5237 param_count++;
5238 } else if (i >= VARYING_SLOT_VAR0) {
5239 outinfo->export_mask |= 1u << (i - VARYING_SLOT_VAR0);
5240 target = V_008DFC_SQ_EXP_PARAM + param_count;
5241 outinfo->vs_output_param_offset[i] = param_count;
5242 param_count++;
5243 }
5244
5245 si_llvm_init_export_args(ctx, values, target, &args);
5246
5247 if (target >= V_008DFC_SQ_EXP_POS &&
5248 target <= (V_008DFC_SQ_EXP_POS + 3)) {
5249 memcpy(&pos_args[target - V_008DFC_SQ_EXP_POS],
5250 &args, sizeof(args));
5251 } else {
5252 ac_build_export(&ctx->ac, &args);
5253 }
5254 }
5255
5256 /* We need to add the position output manually if it's missing. */
5257 if (!pos_args[0].out[0]) {
5258 pos_args[0].enabled_channels = 0xf;
5259 pos_args[0].valid_mask = 0;
5260 pos_args[0].done = 0;
5261 pos_args[0].target = V_008DFC_SQ_EXP_POS;
5262 pos_args[0].compr = 0;
5263 pos_args[0].out[0] = ctx->f32zero; /* X */
5264 pos_args[0].out[1] = ctx->f32zero; /* Y */
5265 pos_args[0].out[2] = ctx->f32zero; /* Z */
5266 pos_args[0].out[3] = ctx->f32one; /* W */
5267 }
5268
5269 uint32_t mask = ((outinfo->writes_pointsize == true ? 1 : 0) |
5270 (outinfo->writes_layer == true ? 4 : 0) |
5271 (outinfo->writes_viewport_index == true ? 8 : 0));
5272 if (mask) {
5273 pos_args[1].enabled_channels = mask;
5274 pos_args[1].valid_mask = 0;
5275 pos_args[1].done = 0;
5276 pos_args[1].target = V_008DFC_SQ_EXP_POS + 1;
5277 pos_args[1].compr = 0;
5278 pos_args[1].out[0] = ctx->f32zero; /* X */
5279 pos_args[1].out[1] = ctx->f32zero; /* Y */
5280 pos_args[1].out[2] = ctx->f32zero; /* Z */
5281 pos_args[1].out[3] = ctx->f32zero; /* W */
5282
5283 if (outinfo->writes_pointsize == true)
5284 pos_args[1].out[0] = psize_value;
5285 if (outinfo->writes_layer == true)
5286 pos_args[1].out[2] = layer_value;
5287 if (outinfo->writes_viewport_index == true)
5288 pos_args[1].out[3] = viewport_index_value;
5289 }
5290 for (i = 0; i < 4; i++) {
5291 if (pos_args[i].out[0])
5292 num_pos_exports++;
5293 }
5294
5295 pos_idx = 0;
5296 for (i = 0; i < 4; i++) {
5297 if (!pos_args[i].out[0])
5298 continue;
5299
5300 /* Specify the target we are exporting */
5301 pos_args[i].target = V_008DFC_SQ_EXP_POS + pos_idx++;
5302 if (pos_idx == num_pos_exports)
5303 pos_args[i].done = 1;
5304 ac_build_export(&ctx->ac, &pos_args[i]);
5305 }
5306
5307
5308 if (export_prim_id) {
5309 LLVMValueRef values[4];
5310 target = V_008DFC_SQ_EXP_PARAM + param_count;
5311 outinfo->vs_output_param_offset[VARYING_SLOT_PRIMITIVE_ID] = param_count;
5312 param_count++;
5313
5314 values[0] = ctx->vs_prim_id;
5315 ctx->shader_info->vs.vgpr_comp_cnt = MAX2(2,
5316 ctx->shader_info->vs.vgpr_comp_cnt);
5317 for (unsigned j = 1; j < 4; j++)
5318 values[j] = ctx->f32zero;
5319 si_llvm_init_export_args(ctx, values, target, &args);
5320 ac_build_export(&ctx->ac, &args);
5321 outinfo->export_prim_id = true;
5322 }
5323
5324 outinfo->pos_exports = num_pos_exports;
5325 outinfo->param_exports = param_count;
5326 }
5327
5328 static void
5329 handle_es_outputs_post(struct nir_to_llvm_context *ctx,
5330 struct ac_es_output_info *outinfo)
5331 {
5332 int j;
5333 uint64_t max_output_written = 0;
5334 for (unsigned i = 0; i < RADEON_LLVM_MAX_OUTPUTS; ++i) {
5335 LLVMValueRef *out_ptr = &ctx->outputs[i * 4];
5336 int param_index;
5337 int length = 4;
5338
5339 if (!(ctx->output_mask & (1ull << i)))
5340 continue;
5341
5342 if (i == VARYING_SLOT_CLIP_DIST0)
5343 length = ctx->num_output_clips + ctx->num_output_culls;
5344
5345 param_index = shader_io_get_unique_index(i);
5346
5347 max_output_written = MAX2(param_index + (length > 4), max_output_written);
5348
5349 for (j = 0; j < length; j++) {
5350 LLVMValueRef out_val = LLVMBuildLoad(ctx->builder, out_ptr[j], "");
5351 out_val = LLVMBuildBitCast(ctx->builder, out_val, ctx->i32, "");
5352
5353 ac_build_buffer_store_dword(&ctx->ac,
5354 ctx->esgs_ring,
5355 out_val, 1,
5356 NULL, ctx->es2gs_offset,
5357 (4 * param_index + j) * 4,
5358 1, 1, true, true);
5359 }
5360 }
5361 outinfo->esgs_itemsize = (max_output_written + 1) * 16;
5362 }
5363
5364 static void
5365 handle_ls_outputs_post(struct nir_to_llvm_context *ctx)
5366 {
5367 LLVMValueRef vertex_id = ctx->rel_auto_id;
5368 LLVMValueRef vertex_dw_stride = unpack_param(ctx, ctx->ls_out_layout, 13, 8);
5369 LLVMValueRef base_dw_addr = LLVMBuildMul(ctx->builder, vertex_id,
5370 vertex_dw_stride, "");
5371
5372 for (unsigned i = 0; i < RADEON_LLVM_MAX_OUTPUTS; ++i) {
5373 LLVMValueRef *out_ptr = &ctx->outputs[i * 4];
5374 int length = 4;
5375
5376 if (!(ctx->output_mask & (1ull << i)))
5377 continue;
5378
5379 if (i == VARYING_SLOT_CLIP_DIST0)
5380 length = ctx->num_output_clips + ctx->num_output_culls;
5381 int param = shader_io_get_unique_index(i);
5382 mark_tess_output(ctx, false, param);
5383 if (length > 4)
5384 mark_tess_output(ctx, false, param + 1);
5385 LLVMValueRef dw_addr = LLVMBuildAdd(ctx->builder, base_dw_addr,
5386 LLVMConstInt(ctx->i32, param * 4, false),
5387 "");
5388 for (unsigned j = 0; j < length; j++) {
5389 lds_store(ctx, dw_addr,
5390 LLVMBuildLoad(ctx->builder, out_ptr[j], ""));
5391 dw_addr = LLVMBuildAdd(ctx->builder, dw_addr, ctx->i32one, "");
5392 }
5393 }
5394 }
5395
5396 struct ac_build_if_state
5397 {
5398 struct nir_to_llvm_context *ctx;
5399 LLVMValueRef condition;
5400 LLVMBasicBlockRef entry_block;
5401 LLVMBasicBlockRef true_block;
5402 LLVMBasicBlockRef false_block;
5403 LLVMBasicBlockRef merge_block;
5404 };
5405
5406 static LLVMBasicBlockRef
5407 ac_build_insert_new_block(struct nir_to_llvm_context *ctx, const char *name)
5408 {
5409 LLVMBasicBlockRef current_block;
5410 LLVMBasicBlockRef next_block;
5411 LLVMBasicBlockRef new_block;
5412
5413 /* get current basic block */
5414 current_block = LLVMGetInsertBlock(ctx->builder);
5415
5416 /* chqeck if there's another block after this one */
5417 next_block = LLVMGetNextBasicBlock(current_block);
5418 if (next_block) {
5419 /* insert the new block before the next block */
5420 new_block = LLVMInsertBasicBlockInContext(ctx->context, next_block, name);
5421 }
5422 else {
5423 /* append new block after current block */
5424 LLVMValueRef function = LLVMGetBasicBlockParent(current_block);
5425 new_block = LLVMAppendBasicBlockInContext(ctx->context, function, name);
5426 }
5427 return new_block;
5428 }
5429
5430 static void
5431 ac_nir_build_if(struct ac_build_if_state *ifthen,
5432 struct nir_to_llvm_context *ctx,
5433 LLVMValueRef condition)
5434 {
5435 LLVMBasicBlockRef block = LLVMGetInsertBlock(ctx->builder);
5436
5437 memset(ifthen, 0, sizeof *ifthen);
5438 ifthen->ctx = ctx;
5439 ifthen->condition = condition;
5440 ifthen->entry_block = block;
5441
5442 /* create endif/merge basic block for the phi functions */
5443 ifthen->merge_block = ac_build_insert_new_block(ctx, "endif-block");
5444
5445 /* create/insert true_block before merge_block */
5446 ifthen->true_block =
5447 LLVMInsertBasicBlockInContext(ctx->context,
5448 ifthen->merge_block,
5449 "if-true-block");
5450
5451 /* successive code goes into the true block */
5452 LLVMPositionBuilderAtEnd(ctx->builder, ifthen->true_block);
5453 }
5454
5455 /**
5456 * End a conditional.
5457 */
5458 static void
5459 ac_nir_build_endif(struct ac_build_if_state *ifthen)
5460 {
5461 LLVMBuilderRef builder = ifthen->ctx->builder;
5462
5463 /* Insert branch to the merge block from current block */
5464 LLVMBuildBr(builder, ifthen->merge_block);
5465
5466 /*
5467 * Now patch in the various branch instructions.
5468 */
5469
5470 /* Insert the conditional branch instruction at the end of entry_block */
5471 LLVMPositionBuilderAtEnd(builder, ifthen->entry_block);
5472 if (ifthen->false_block) {
5473 /* we have an else clause */
5474 LLVMBuildCondBr(builder, ifthen->condition,
5475 ifthen->true_block, ifthen->false_block);
5476 }
5477 else {
5478 /* no else clause */
5479 LLVMBuildCondBr(builder, ifthen->condition,
5480 ifthen->true_block, ifthen->merge_block);
5481 }
5482
5483 /* Resume building code at end of the ifthen->merge_block */
5484 LLVMPositionBuilderAtEnd(builder, ifthen->merge_block);
5485 }
5486
5487 static void
5488 write_tess_factors(struct nir_to_llvm_context *ctx)
5489 {
5490 unsigned stride, outer_comps, inner_comps;
5491 struct ac_build_if_state if_ctx, inner_if_ctx;
5492 LLVMValueRef invocation_id = unpack_param(ctx, ctx->tcs_rel_ids, 8, 5);
5493 LLVMValueRef rel_patch_id = unpack_param(ctx, ctx->tcs_rel_ids, 0, 8);
5494 unsigned tess_inner_index, tess_outer_index;
5495 LLVMValueRef lds_base, lds_inner, lds_outer, byteoffset, buffer;
5496 LLVMValueRef out[6], vec0, vec1, tf_base, inner[4], outer[4];
5497 int i;
5498 emit_barrier(ctx);
5499
5500 switch (ctx->options->key.tcs.primitive_mode) {
5501 case GL_ISOLINES:
5502 stride = 2;
5503 outer_comps = 2;
5504 inner_comps = 0;
5505 break;
5506 case GL_TRIANGLES:
5507 stride = 4;
5508 outer_comps = 3;
5509 inner_comps = 1;
5510 break;
5511 case GL_QUADS:
5512 stride = 6;
5513 outer_comps = 4;
5514 inner_comps = 2;
5515 break;
5516 default:
5517 return;
5518 }
5519
5520 ac_nir_build_if(&if_ctx, ctx,
5521 LLVMBuildICmp(ctx->builder, LLVMIntEQ,
5522 invocation_id, ctx->i32zero, ""));
5523
5524 tess_inner_index = shader_io_get_unique_index(VARYING_SLOT_TESS_LEVEL_INNER);
5525 tess_outer_index = shader_io_get_unique_index(VARYING_SLOT_TESS_LEVEL_OUTER);
5526
5527 mark_tess_output(ctx, true, tess_inner_index);
5528 mark_tess_output(ctx, true, tess_outer_index);
5529 lds_base = get_tcs_out_current_patch_data_offset(ctx);
5530 lds_inner = LLVMBuildAdd(ctx->builder, lds_base,
5531 LLVMConstInt(ctx->i32, tess_inner_index * 4, false), "");
5532 lds_outer = LLVMBuildAdd(ctx->builder, lds_base,
5533 LLVMConstInt(ctx->i32, tess_outer_index * 4, false), "");
5534
5535 for (i = 0; i < 4; i++) {
5536 inner[i] = LLVMGetUndef(ctx->i32);
5537 outer[i] = LLVMGetUndef(ctx->i32);
5538 }
5539
5540 // LINES reverseal
5541 if (ctx->options->key.tcs.primitive_mode == GL_ISOLINES) {
5542 outer[0] = out[1] = lds_load(ctx, lds_outer);
5543 lds_outer = LLVMBuildAdd(ctx->builder, lds_outer,
5544 LLVMConstInt(ctx->i32, 1, false), "");
5545 outer[1] = out[0] = lds_load(ctx, lds_outer);
5546 } else {
5547 for (i = 0; i < outer_comps; i++) {
5548 outer[i] = out[i] =
5549 lds_load(ctx, lds_outer);
5550 lds_outer = LLVMBuildAdd(ctx->builder, lds_outer,
5551 LLVMConstInt(ctx->i32, 1, false), "");
5552 }
5553 for (i = 0; i < inner_comps; i++) {
5554 inner[i] = out[outer_comps+i] =
5555 lds_load(ctx, lds_inner);
5556 lds_inner = LLVMBuildAdd(ctx->builder, lds_inner,
5557 LLVMConstInt(ctx->i32, 1, false), "");
5558 }
5559 }
5560
5561 /* Convert the outputs to vectors for stores. */
5562 vec0 = ac_build_gather_values(&ctx->ac, out, MIN2(stride, 4));
5563 vec1 = NULL;
5564
5565 if (stride > 4)
5566 vec1 = ac_build_gather_values(&ctx->ac, out + 4, stride - 4);
5567
5568
5569 buffer = ctx->hs_ring_tess_factor;
5570 tf_base = ctx->tess_factor_offset;
5571 byteoffset = LLVMBuildMul(ctx->builder, rel_patch_id,
5572 LLVMConstInt(ctx->i32, 4 * stride, false), "");
5573
5574 ac_nir_build_if(&inner_if_ctx, ctx,
5575 LLVMBuildICmp(ctx->builder, LLVMIntEQ,
5576 rel_patch_id, ctx->i32zero, ""));
5577
5578 /* Store the dynamic HS control word. */
5579 ac_build_buffer_store_dword(&ctx->ac, buffer,
5580 LLVMConstInt(ctx->i32, 0x80000000, false),
5581 1, ctx->i32zero, tf_base,
5582 0, 1, 0, true, false);
5583 ac_nir_build_endif(&inner_if_ctx);
5584
5585 /* Store the tessellation factors. */
5586 ac_build_buffer_store_dword(&ctx->ac, buffer, vec0,
5587 MIN2(stride, 4), byteoffset, tf_base,
5588 4, 1, 0, true, false);
5589 if (vec1)
5590 ac_build_buffer_store_dword(&ctx->ac, buffer, vec1,
5591 stride - 4, byteoffset, tf_base,
5592 20, 1, 0, true, false);
5593
5594 //TODO store to offchip for TES to read - only if TES reads them
5595 if (1) {
5596 LLVMValueRef inner_vec, outer_vec, tf_outer_offset;
5597 LLVMValueRef tf_inner_offset;
5598 unsigned param_outer, param_inner;
5599
5600 param_outer = shader_io_get_unique_index(VARYING_SLOT_TESS_LEVEL_OUTER);
5601 tf_outer_offset = get_tcs_tes_buffer_address(ctx, NULL,
5602 LLVMConstInt(ctx->i32, param_outer, 0));
5603
5604 outer_vec = ac_build_gather_values(&ctx->ac, outer,
5605 util_next_power_of_two(outer_comps));
5606
5607 ac_build_buffer_store_dword(&ctx->ac, ctx->hs_ring_tess_offchip, outer_vec,
5608 outer_comps, tf_outer_offset,
5609 ctx->oc_lds, 0, 1, 0, true, false);
5610 if (inner_comps) {
5611 param_inner = shader_io_get_unique_index(VARYING_SLOT_TESS_LEVEL_INNER);
5612 tf_inner_offset = get_tcs_tes_buffer_address(ctx, NULL,
5613 LLVMConstInt(ctx->i32, param_inner, 0));
5614
5615 inner_vec = inner_comps == 1 ? inner[0] :
5616 ac_build_gather_values(&ctx->ac, inner, inner_comps);
5617 ac_build_buffer_store_dword(&ctx->ac, ctx->hs_ring_tess_offchip, inner_vec,
5618 inner_comps, tf_inner_offset,
5619 ctx->oc_lds, 0, 1, 0, true, false);
5620 }
5621 }
5622 ac_nir_build_endif(&if_ctx);
5623 }
5624
5625 static void
5626 handle_tcs_outputs_post(struct nir_to_llvm_context *ctx)
5627 {
5628 write_tess_factors(ctx);
5629 }
5630
5631 static bool
5632 si_export_mrt_color(struct nir_to_llvm_context *ctx,
5633 LLVMValueRef *color, unsigned param, bool is_last,
5634 struct ac_export_args *args)
5635 {
5636 /* Export */
5637 si_llvm_init_export_args(ctx, color, param,
5638 args);
5639
5640 if (is_last) {
5641 args->valid_mask = 1; /* whether the EXEC mask is valid */
5642 args->done = 1; /* DONE bit */
5643 } else if (!args->enabled_channels)
5644 return false; /* unnecessary NULL export */
5645
5646 return true;
5647 }
5648
5649 static void
5650 si_export_mrt_z(struct nir_to_llvm_context *ctx,
5651 LLVMValueRef depth, LLVMValueRef stencil,
5652 LLVMValueRef samplemask)
5653 {
5654 struct ac_export_args args;
5655
5656 args.enabled_channels = 0;
5657 args.valid_mask = 1;
5658 args.done = 1;
5659 args.target = V_008DFC_SQ_EXP_MRTZ;
5660 args.compr = false;
5661
5662 args.out[0] = LLVMGetUndef(ctx->f32); /* R, depth */
5663 args.out[1] = LLVMGetUndef(ctx->f32); /* G, stencil test val[0:7], stencil op val[8:15] */
5664 args.out[2] = LLVMGetUndef(ctx->f32); /* B, sample mask */
5665 args.out[3] = LLVMGetUndef(ctx->f32); /* A, alpha to mask */
5666
5667 if (depth) {
5668 args.out[0] = depth;
5669 args.enabled_channels |= 0x1;
5670 }
5671
5672 if (stencil) {
5673 args.out[1] = stencil;
5674 args.enabled_channels |= 0x2;
5675 }
5676
5677 if (samplemask) {
5678 args.out[2] = samplemask;
5679 args.enabled_channels |= 0x4;
5680 }
5681
5682 /* SI (except OLAND) has a bug that it only looks
5683 * at the X writemask component. */
5684 if (ctx->options->chip_class == SI &&
5685 ctx->options->family != CHIP_OLAND)
5686 args.enabled_channels |= 0x1;
5687
5688 ac_build_export(&ctx->ac, &args);
5689 }
5690
5691 static void
5692 handle_fs_outputs_post(struct nir_to_llvm_context *ctx)
5693 {
5694 unsigned index = 0;
5695 LLVMValueRef depth = NULL, stencil = NULL, samplemask = NULL;
5696 struct ac_export_args color_args[8];
5697
5698 for (unsigned i = 0; i < RADEON_LLVM_MAX_OUTPUTS; ++i) {
5699 LLVMValueRef values[4];
5700
5701 if (!(ctx->output_mask & (1ull << i)))
5702 continue;
5703
5704 if (i == FRAG_RESULT_DEPTH) {
5705 ctx->shader_info->fs.writes_z = true;
5706 depth = to_float(&ctx->ac, LLVMBuildLoad(ctx->builder,
5707 ctx->outputs[radeon_llvm_reg_index_soa(i, 0)], ""));
5708 } else if (i == FRAG_RESULT_STENCIL) {
5709 ctx->shader_info->fs.writes_stencil = true;
5710 stencil = to_float(&ctx->ac, LLVMBuildLoad(ctx->builder,
5711 ctx->outputs[radeon_llvm_reg_index_soa(i, 0)], ""));
5712 } else if (i == FRAG_RESULT_SAMPLE_MASK) {
5713 ctx->shader_info->fs.writes_sample_mask = true;
5714 samplemask = to_float(&ctx->ac, LLVMBuildLoad(ctx->builder,
5715 ctx->outputs[radeon_llvm_reg_index_soa(i, 0)], ""));
5716 } else {
5717 bool last = false;
5718 for (unsigned j = 0; j < 4; j++)
5719 values[j] = to_float(&ctx->ac, LLVMBuildLoad(ctx->builder,
5720 ctx->outputs[radeon_llvm_reg_index_soa(i, j)], ""));
5721
5722 if (!ctx->shader_info->fs.writes_z && !ctx->shader_info->fs.writes_stencil && !ctx->shader_info->fs.writes_sample_mask)
5723 last = ctx->output_mask <= ((1ull << (i + 1)) - 1);
5724
5725 bool ret = si_export_mrt_color(ctx, values, V_008DFC_SQ_EXP_MRT + (i - FRAG_RESULT_DATA0), last, &color_args[index]);
5726 if (ret)
5727 index++;
5728 }
5729 }
5730
5731 for (unsigned i = 0; i < index; i++)
5732 ac_build_export(&ctx->ac, &color_args[i]);
5733 if (depth || stencil || samplemask)
5734 si_export_mrt_z(ctx, depth, stencil, samplemask);
5735 else if (!index) {
5736 si_export_mrt_color(ctx, NULL, V_008DFC_SQ_EXP_NULL, true, &color_args[0]);
5737 ac_build_export(&ctx->ac, &color_args[0]);
5738 }
5739
5740 ctx->shader_info->fs.output_mask = index ? ((1ull << index) - 1) : 0;
5741 }
5742
5743 static void
5744 emit_gs_epilogue(struct nir_to_llvm_context *ctx)
5745 {
5746 ac_build_sendmsg(&ctx->ac, AC_SENDMSG_GS_OP_NOP | AC_SENDMSG_GS_DONE, ctx->gs_wave_id);
5747 }
5748
5749 static void
5750 handle_shader_outputs_post(struct nir_to_llvm_context *ctx)
5751 {
5752 switch (ctx->stage) {
5753 case MESA_SHADER_VERTEX:
5754 if (ctx->options->key.vs.as_ls)
5755 handle_ls_outputs_post(ctx);
5756 else if (ctx->options->key.vs.as_es)
5757 handle_es_outputs_post(ctx, &ctx->shader_info->vs.es_info);
5758 else
5759 handle_vs_outputs_post(ctx, ctx->options->key.vs.export_prim_id,
5760 &ctx->shader_info->vs.outinfo);
5761 break;
5762 case MESA_SHADER_FRAGMENT:
5763 handle_fs_outputs_post(ctx);
5764 break;
5765 case MESA_SHADER_GEOMETRY:
5766 emit_gs_epilogue(ctx);
5767 break;
5768 case MESA_SHADER_TESS_CTRL:
5769 handle_tcs_outputs_post(ctx);
5770 break;
5771 case MESA_SHADER_TESS_EVAL:
5772 if (ctx->options->key.tes.as_es)
5773 handle_es_outputs_post(ctx, &ctx->shader_info->tes.es_info);
5774 else
5775 handle_vs_outputs_post(ctx, ctx->options->key.tes.export_prim_id,
5776 &ctx->shader_info->tes.outinfo);
5777 break;
5778 default:
5779 break;
5780 }
5781 }
5782
5783 static void
5784 handle_shared_compute_var(struct nir_to_llvm_context *ctx,
5785 struct nir_variable *variable, uint32_t *offset, int idx)
5786 {
5787 unsigned size = glsl_count_attribute_slots(variable->type, false);
5788 variable->data.driver_location = *offset;
5789 *offset += size;
5790 }
5791
5792 static void ac_llvm_finalize_module(struct nir_to_llvm_context * ctx)
5793 {
5794 LLVMPassManagerRef passmgr;
5795 /* Create the pass manager */
5796 passmgr = LLVMCreateFunctionPassManagerForModule(
5797 ctx->module);
5798
5799 /* This pass should eliminate all the load and store instructions */
5800 LLVMAddPromoteMemoryToRegisterPass(passmgr);
5801
5802 /* Add some optimization passes */
5803 LLVMAddScalarReplAggregatesPass(passmgr);
5804 LLVMAddLICMPass(passmgr);
5805 LLVMAddAggressiveDCEPass(passmgr);
5806 LLVMAddCFGSimplificationPass(passmgr);
5807 LLVMAddInstructionCombiningPass(passmgr);
5808
5809 /* Run the pass */
5810 LLVMInitializeFunctionPassManager(passmgr);
5811 LLVMRunFunctionPassManager(passmgr, ctx->main_function);
5812 LLVMFinalizeFunctionPassManager(passmgr);
5813
5814 LLVMDisposeBuilder(ctx->builder);
5815 LLVMDisposePassManager(passmgr);
5816 }
5817
5818 static void
5819 ac_nir_eliminate_const_vs_outputs(struct nir_to_llvm_context *ctx)
5820 {
5821 struct ac_vs_output_info *outinfo;
5822
5823 switch (ctx->stage) {
5824 case MESA_SHADER_FRAGMENT:
5825 case MESA_SHADER_COMPUTE:
5826 case MESA_SHADER_TESS_CTRL:
5827 case MESA_SHADER_GEOMETRY:
5828 return;
5829 case MESA_SHADER_VERTEX:
5830 if (ctx->options->key.vs.as_ls ||
5831 ctx->options->key.vs.as_es)
5832 return;
5833 outinfo = &ctx->shader_info->vs.outinfo;
5834 break;
5835 case MESA_SHADER_TESS_EVAL:
5836 if (ctx->options->key.vs.as_es)
5837 return;
5838 outinfo = &ctx->shader_info->tes.outinfo;
5839 break;
5840 default:
5841 unreachable("Unhandled shader type");
5842 }
5843
5844 ac_optimize_vs_outputs(&ctx->ac,
5845 ctx->main_function,
5846 outinfo->vs_output_param_offset,
5847 VARYING_SLOT_MAX,
5848 &outinfo->param_exports);
5849 }
5850
5851 static void
5852 ac_setup_rings(struct nir_to_llvm_context *ctx)
5853 {
5854 if ((ctx->stage == MESA_SHADER_VERTEX && ctx->options->key.vs.as_es) ||
5855 (ctx->stage == MESA_SHADER_TESS_EVAL && ctx->options->key.tes.as_es)) {
5856 ctx->esgs_ring = ac_build_indexed_load_const(&ctx->ac, ctx->ring_offsets, LLVMConstInt(ctx->i32, RING_ESGS_VS, false));
5857 }
5858
5859 if (ctx->is_gs_copy_shader) {
5860 ctx->gsvs_ring = ac_build_indexed_load_const(&ctx->ac, ctx->ring_offsets, LLVMConstInt(ctx->i32, RING_GSVS_VS, false));
5861 }
5862 if (ctx->stage == MESA_SHADER_GEOMETRY) {
5863 LLVMValueRef tmp;
5864 ctx->esgs_ring = ac_build_indexed_load_const(&ctx->ac, ctx->ring_offsets, LLVMConstInt(ctx->i32, RING_ESGS_GS, false));
5865 ctx->gsvs_ring = ac_build_indexed_load_const(&ctx->ac, ctx->ring_offsets, LLVMConstInt(ctx->i32, RING_GSVS_GS, false));
5866
5867 ctx->gsvs_ring = LLVMBuildBitCast(ctx->builder, ctx->gsvs_ring, ctx->v4i32, "");
5868
5869 ctx->gsvs_ring = LLVMBuildInsertElement(ctx->builder, ctx->gsvs_ring, ctx->gsvs_num_entries, LLVMConstInt(ctx->i32, 2, false), "");
5870 tmp = LLVMBuildExtractElement(ctx->builder, ctx->gsvs_ring, ctx->i32one, "");
5871 tmp = LLVMBuildOr(ctx->builder, tmp, ctx->gsvs_ring_stride, "");
5872 ctx->gsvs_ring = LLVMBuildInsertElement(ctx->builder, ctx->gsvs_ring, tmp, ctx->i32one, "");
5873
5874 ctx->gsvs_ring = LLVMBuildBitCast(ctx->builder, ctx->gsvs_ring, ctx->v16i8, "");
5875 }
5876
5877 if (ctx->stage == MESA_SHADER_TESS_CTRL ||
5878 ctx->stage == MESA_SHADER_TESS_EVAL) {
5879 ctx->hs_ring_tess_offchip = ac_build_indexed_load_const(&ctx->ac, ctx->ring_offsets, LLVMConstInt(ctx->i32, RING_HS_TESS_OFFCHIP, false));
5880 ctx->hs_ring_tess_factor = ac_build_indexed_load_const(&ctx->ac, ctx->ring_offsets, LLVMConstInt(ctx->i32, RING_HS_TESS_FACTOR, false));
5881 }
5882 }
5883
5884 static unsigned
5885 ac_nir_get_max_workgroup_size(enum chip_class chip_class,
5886 const struct nir_shader *nir)
5887 {
5888 switch (nir->stage) {
5889 case MESA_SHADER_TESS_CTRL:
5890 return chip_class >= CIK ? 128 : 64;
5891 case MESA_SHADER_GEOMETRY:
5892 return 64;
5893 case MESA_SHADER_COMPUTE:
5894 break;
5895 default:
5896 return 0;
5897 }
5898
5899 unsigned max_workgroup_size = nir->info.cs.local_size[0] *
5900 nir->info.cs.local_size[1] *
5901 nir->info.cs.local_size[2];
5902 return max_workgroup_size;
5903 }
5904
5905 static
5906 LLVMModuleRef ac_translate_nir_to_llvm(LLVMTargetMachineRef tm,
5907 struct nir_shader *nir,
5908 struct ac_shader_variant_info *shader_info,
5909 const struct ac_nir_compiler_options *options)
5910 {
5911 struct nir_to_llvm_context ctx = {0};
5912 struct nir_function *func;
5913 unsigned i;
5914 ctx.options = options;
5915 ctx.shader_info = shader_info;
5916 ctx.context = LLVMContextCreate();
5917 ctx.module = LLVMModuleCreateWithNameInContext("shader", ctx.context);
5918
5919 ac_llvm_context_init(&ctx.ac, ctx.context);
5920 ctx.ac.module = ctx.module;
5921
5922 ctx.has_ds_bpermute = ctx.options->chip_class >= VI;
5923
5924 memset(shader_info, 0, sizeof(*shader_info));
5925
5926 ac_nir_shader_info_pass(nir, options, &shader_info->info);
5927
5928 LLVMSetTarget(ctx.module, options->supports_spill ? "amdgcn-mesa-mesa3d" : "amdgcn--");
5929
5930 LLVMTargetDataRef data_layout = LLVMCreateTargetDataLayout(tm);
5931 char *data_layout_str = LLVMCopyStringRepOfTargetData(data_layout);
5932 LLVMSetDataLayout(ctx.module, data_layout_str);
5933 LLVMDisposeTargetData(data_layout);
5934 LLVMDisposeMessage(data_layout_str);
5935
5936 setup_types(&ctx);
5937
5938 ctx.builder = LLVMCreateBuilderInContext(ctx.context);
5939 ctx.ac.builder = ctx.builder;
5940 ctx.stage = nir->stage;
5941 ctx.max_workgroup_size = ac_nir_get_max_workgroup_size(ctx.options->chip_class, nir);
5942
5943 for (i = 0; i < AC_UD_MAX_SETS; i++)
5944 shader_info->user_sgprs_locs.descriptor_sets[i].sgpr_idx = -1;
5945 for (i = 0; i < AC_UD_MAX_UD; i++)
5946 shader_info->user_sgprs_locs.shader_data[i].sgpr_idx = -1;
5947
5948 create_function(&ctx);
5949
5950 if (nir->stage == MESA_SHADER_COMPUTE) {
5951 int num_shared = 0;
5952 nir_foreach_variable(variable, &nir->shared)
5953 num_shared++;
5954 if (num_shared) {
5955 int idx = 0;
5956 uint32_t shared_size = 0;
5957 LLVMValueRef var;
5958 LLVMTypeRef i8p = LLVMPointerType(ctx.i8, LOCAL_ADDR_SPACE);
5959 nir_foreach_variable(variable, &nir->shared) {
5960 handle_shared_compute_var(&ctx, variable, &shared_size, idx);
5961 idx++;
5962 }
5963
5964 shared_size *= 16;
5965 var = LLVMAddGlobalInAddressSpace(ctx.module,
5966 LLVMArrayType(ctx.i8, shared_size),
5967 "compute_lds",
5968 LOCAL_ADDR_SPACE);
5969 LLVMSetAlignment(var, 4);
5970 ctx.shared_memory = LLVMBuildBitCast(ctx.builder, var, i8p, "");
5971 }
5972 } else if (nir->stage == MESA_SHADER_GEOMETRY) {
5973 ctx.gs_next_vertex = ac_build_alloca(&ctx, ctx.i32, "gs_next_vertex");
5974
5975 ctx.gs_max_out_vertices = nir->info.gs.vertices_out;
5976 } else if (nir->stage == MESA_SHADER_TESS_EVAL) {
5977 ctx.tes_primitive_mode = nir->info.tess.primitive_mode;
5978 }
5979
5980 ac_setup_rings(&ctx);
5981
5982 nir_foreach_variable(variable, &nir->inputs)
5983 handle_shader_input_decl(&ctx, variable);
5984
5985 if (nir->stage == MESA_SHADER_FRAGMENT)
5986 handle_fs_inputs_pre(&ctx, nir);
5987
5988 ctx.num_output_clips = nir->info.clip_distance_array_size;
5989 ctx.num_output_culls = nir->info.cull_distance_array_size;
5990
5991 nir_foreach_variable(variable, &nir->outputs)
5992 handle_shader_output_decl(&ctx, variable);
5993
5994 ctx.defs = _mesa_hash_table_create(NULL, _mesa_hash_pointer,
5995 _mesa_key_pointer_equal);
5996 ctx.phis = _mesa_hash_table_create(NULL, _mesa_hash_pointer,
5997 _mesa_key_pointer_equal);
5998
5999 func = (struct nir_function *)exec_list_get_head(&nir->functions);
6000
6001 setup_locals(&ctx, func);
6002
6003 visit_cf_list(&ctx, &func->impl->body);
6004 phi_post_pass(&ctx);
6005
6006 handle_shader_outputs_post(&ctx);
6007 LLVMBuildRetVoid(ctx.builder);
6008
6009 ac_llvm_finalize_module(&ctx);
6010
6011 ac_nir_eliminate_const_vs_outputs(&ctx);
6012 free(ctx.locals);
6013 ralloc_free(ctx.defs);
6014 ralloc_free(ctx.phis);
6015
6016 if (nir->stage == MESA_SHADER_GEOMETRY) {
6017 unsigned addclip = ctx.num_output_clips + ctx.num_output_culls > 4;
6018 shader_info->gs.gsvs_vertex_size = (util_bitcount64(ctx.output_mask) + addclip) * 16;
6019 shader_info->gs.max_gsvs_emit_size = shader_info->gs.gsvs_vertex_size *
6020 nir->info.gs.vertices_out;
6021 } else if (nir->stage == MESA_SHADER_TESS_CTRL) {
6022 shader_info->tcs.outputs_written = ctx.tess_outputs_written;
6023 shader_info->tcs.patch_outputs_written = ctx.tess_patch_outputs_written;
6024 } else if (nir->stage == MESA_SHADER_VERTEX && ctx.options->key.vs.as_ls) {
6025 shader_info->vs.outputs_written = ctx.tess_outputs_written;
6026 }
6027
6028 return ctx.module;
6029 }
6030
6031 static void ac_diagnostic_handler(LLVMDiagnosticInfoRef di, void *context)
6032 {
6033 unsigned *retval = (unsigned *)context;
6034 LLVMDiagnosticSeverity severity = LLVMGetDiagInfoSeverity(di);
6035 char *description = LLVMGetDiagInfoDescription(di);
6036
6037 if (severity == LLVMDSError) {
6038 *retval = 1;
6039 fprintf(stderr, "LLVM triggered Diagnostic Handler: %s\n",
6040 description);
6041 }
6042
6043 LLVMDisposeMessage(description);
6044 }
6045
6046 static unsigned ac_llvm_compile(LLVMModuleRef M,
6047 struct ac_shader_binary *binary,
6048 LLVMTargetMachineRef tm)
6049 {
6050 unsigned retval = 0;
6051 char *err;
6052 LLVMContextRef llvm_ctx;
6053 LLVMMemoryBufferRef out_buffer;
6054 unsigned buffer_size;
6055 const char *buffer_data;
6056 LLVMBool mem_err;
6057
6058 /* Setup Diagnostic Handler*/
6059 llvm_ctx = LLVMGetModuleContext(M);
6060
6061 LLVMContextSetDiagnosticHandler(llvm_ctx, ac_diagnostic_handler,
6062 &retval);
6063
6064 /* Compile IR*/
6065 mem_err = LLVMTargetMachineEmitToMemoryBuffer(tm, M, LLVMObjectFile,
6066 &err, &out_buffer);
6067
6068 /* Process Errors/Warnings */
6069 if (mem_err) {
6070 fprintf(stderr, "%s: %s", __FUNCTION__, err);
6071 free(err);
6072 retval = 1;
6073 goto out;
6074 }
6075
6076 /* Extract Shader Code*/
6077 buffer_size = LLVMGetBufferSize(out_buffer);
6078 buffer_data = LLVMGetBufferStart(out_buffer);
6079
6080 ac_elf_read(buffer_data, buffer_size, binary);
6081
6082 /* Clean up */
6083 LLVMDisposeMemoryBuffer(out_buffer);
6084
6085 out:
6086 return retval;
6087 }
6088
6089 static void ac_compile_llvm_module(LLVMTargetMachineRef tm,
6090 LLVMModuleRef llvm_module,
6091 struct ac_shader_binary *binary,
6092 struct ac_shader_config *config,
6093 struct ac_shader_variant_info *shader_info,
6094 gl_shader_stage stage,
6095 bool dump_shader, bool supports_spill)
6096 {
6097 if (dump_shader)
6098 ac_dump_module(llvm_module);
6099
6100 memset(binary, 0, sizeof(*binary));
6101 int v = ac_llvm_compile(llvm_module, binary, tm);
6102 if (v) {
6103 fprintf(stderr, "compile failed\n");
6104 }
6105
6106 if (dump_shader)
6107 fprintf(stderr, "disasm:\n%s\n", binary->disasm_string);
6108
6109 ac_shader_binary_read_config(binary, config, 0, supports_spill);
6110
6111 LLVMContextRef ctx = LLVMGetModuleContext(llvm_module);
6112 LLVMDisposeModule(llvm_module);
6113 LLVMContextDispose(ctx);
6114
6115 if (stage == MESA_SHADER_FRAGMENT) {
6116 shader_info->num_input_vgprs = 0;
6117 if (G_0286CC_PERSP_SAMPLE_ENA(config->spi_ps_input_addr))
6118 shader_info->num_input_vgprs += 2;
6119 if (G_0286CC_PERSP_CENTER_ENA(config->spi_ps_input_addr))
6120 shader_info->num_input_vgprs += 2;
6121 if (G_0286CC_PERSP_CENTROID_ENA(config->spi_ps_input_addr))
6122 shader_info->num_input_vgprs += 2;
6123 if (G_0286CC_PERSP_PULL_MODEL_ENA(config->spi_ps_input_addr))
6124 shader_info->num_input_vgprs += 3;
6125 if (G_0286CC_LINEAR_SAMPLE_ENA(config->spi_ps_input_addr))
6126 shader_info->num_input_vgprs += 2;
6127 if (G_0286CC_LINEAR_CENTER_ENA(config->spi_ps_input_addr))
6128 shader_info->num_input_vgprs += 2;
6129 if (G_0286CC_LINEAR_CENTROID_ENA(config->spi_ps_input_addr))
6130 shader_info->num_input_vgprs += 2;
6131 if (G_0286CC_LINE_STIPPLE_TEX_ENA(config->spi_ps_input_addr))
6132 shader_info->num_input_vgprs += 1;
6133 if (G_0286CC_POS_X_FLOAT_ENA(config->spi_ps_input_addr))
6134 shader_info->num_input_vgprs += 1;
6135 if (G_0286CC_POS_Y_FLOAT_ENA(config->spi_ps_input_addr))
6136 shader_info->num_input_vgprs += 1;
6137 if (G_0286CC_POS_Z_FLOAT_ENA(config->spi_ps_input_addr))
6138 shader_info->num_input_vgprs += 1;
6139 if (G_0286CC_POS_W_FLOAT_ENA(config->spi_ps_input_addr))
6140 shader_info->num_input_vgprs += 1;
6141 if (G_0286CC_FRONT_FACE_ENA(config->spi_ps_input_addr))
6142 shader_info->num_input_vgprs += 1;
6143 if (G_0286CC_ANCILLARY_ENA(config->spi_ps_input_addr))
6144 shader_info->num_input_vgprs += 1;
6145 if (G_0286CC_SAMPLE_COVERAGE_ENA(config->spi_ps_input_addr))
6146 shader_info->num_input_vgprs += 1;
6147 if (G_0286CC_POS_FIXED_PT_ENA(config->spi_ps_input_addr))
6148 shader_info->num_input_vgprs += 1;
6149 }
6150 config->num_vgprs = MAX2(config->num_vgprs, shader_info->num_input_vgprs);
6151
6152 /* +3 for scratch wave offset and VCC */
6153 config->num_sgprs = MAX2(config->num_sgprs,
6154 shader_info->num_input_sgprs + 3);
6155 }
6156
6157 void ac_compile_nir_shader(LLVMTargetMachineRef tm,
6158 struct ac_shader_binary *binary,
6159 struct ac_shader_config *config,
6160 struct ac_shader_variant_info *shader_info,
6161 struct nir_shader *nir,
6162 const struct ac_nir_compiler_options *options,
6163 bool dump_shader)
6164 {
6165
6166 LLVMModuleRef llvm_module = ac_translate_nir_to_llvm(tm, nir, shader_info,
6167 options);
6168
6169 ac_compile_llvm_module(tm, llvm_module, binary, config, shader_info, nir->stage, dump_shader, options->supports_spill);
6170 switch (nir->stage) {
6171 case MESA_SHADER_COMPUTE:
6172 for (int i = 0; i < 3; ++i)
6173 shader_info->cs.block_size[i] = nir->info.cs.local_size[i];
6174 break;
6175 case MESA_SHADER_FRAGMENT:
6176 shader_info->fs.early_fragment_test = nir->info.fs.early_fragment_tests;
6177 break;
6178 case MESA_SHADER_GEOMETRY:
6179 shader_info->gs.vertices_in = nir->info.gs.vertices_in;
6180 shader_info->gs.vertices_out = nir->info.gs.vertices_out;
6181 shader_info->gs.output_prim = nir->info.gs.output_primitive;
6182 shader_info->gs.invocations = nir->info.gs.invocations;
6183 break;
6184 case MESA_SHADER_TESS_EVAL:
6185 shader_info->tes.primitive_mode = nir->info.tess.primitive_mode;
6186 shader_info->tes.spacing = nir->info.tess.spacing;
6187 shader_info->tes.ccw = nir->info.tess.ccw;
6188 shader_info->tes.point_mode = nir->info.tess.point_mode;
6189 shader_info->tes.as_es = options->key.tes.as_es;
6190 break;
6191 case MESA_SHADER_TESS_CTRL:
6192 shader_info->tcs.tcs_vertices_out = nir->info.tess.tcs_vertices_out;
6193 break;
6194 case MESA_SHADER_VERTEX:
6195 shader_info->vs.as_es = options->key.vs.as_es;
6196 shader_info->vs.as_ls = options->key.vs.as_ls;
6197 /* in LS mode we need at least 1, invocation id needs 3, handled elsewhere */
6198 if (options->key.vs.as_ls)
6199 shader_info->vs.vgpr_comp_cnt = MAX2(1, shader_info->vs.vgpr_comp_cnt);
6200 break;
6201 default:
6202 break;
6203 }
6204 }
6205
6206 static void
6207 ac_gs_copy_shader_emit(struct nir_to_llvm_context *ctx)
6208 {
6209 LLVMValueRef args[9];
6210 args[0] = ctx->gsvs_ring;
6211 args[1] = LLVMBuildMul(ctx->builder, ctx->vertex_id, LLVMConstInt(ctx->i32, 4, false), "");
6212 args[3] = ctx->i32zero;
6213 args[4] = ctx->i32one; /* OFFEN */
6214 args[5] = ctx->i32zero; /* IDXEN */
6215 args[6] = ctx->i32one; /* GLC */
6216 args[7] = ctx->i32one; /* SLC */
6217 args[8] = ctx->i32zero; /* TFE */
6218
6219 int idx = 0;
6220
6221 for (unsigned i = 0; i < RADEON_LLVM_MAX_OUTPUTS; ++i) {
6222 int length = 4;
6223 int slot = idx;
6224 int slot_inc = 1;
6225 if (!(ctx->output_mask & (1ull << i)))
6226 continue;
6227
6228 if (i == VARYING_SLOT_CLIP_DIST0) {
6229 /* unpack clip and cull from a single set of slots */
6230 length = ctx->num_output_clips + ctx->num_output_culls;
6231 if (length > 4)
6232 slot_inc = 2;
6233 }
6234
6235 for (unsigned j = 0; j < length; j++) {
6236 LLVMValueRef value;
6237 args[2] = LLVMConstInt(ctx->i32,
6238 (slot * 4 + j) *
6239 ctx->gs_max_out_vertices * 16 * 4, false);
6240
6241 value = ac_build_intrinsic(&ctx->ac,
6242 "llvm.SI.buffer.load.dword.i32.i32",
6243 ctx->i32, args, 9,
6244 AC_FUNC_ATTR_READONLY |
6245 AC_FUNC_ATTR_LEGACY);
6246
6247 LLVMBuildStore(ctx->builder,
6248 to_float(&ctx->ac, value), ctx->outputs[radeon_llvm_reg_index_soa(i, j)]);
6249 }
6250 idx += slot_inc;
6251 }
6252 handle_vs_outputs_post(ctx, false, &ctx->shader_info->vs.outinfo);
6253 }
6254
6255 void ac_create_gs_copy_shader(LLVMTargetMachineRef tm,
6256 struct nir_shader *geom_shader,
6257 struct ac_shader_binary *binary,
6258 struct ac_shader_config *config,
6259 struct ac_shader_variant_info *shader_info,
6260 const struct ac_nir_compiler_options *options,
6261 bool dump_shader)
6262 {
6263 struct nir_to_llvm_context ctx = {0};
6264 ctx.context = LLVMContextCreate();
6265 ctx.module = LLVMModuleCreateWithNameInContext("shader", ctx.context);
6266 ctx.options = options;
6267 ctx.shader_info = shader_info;
6268
6269 ac_llvm_context_init(&ctx.ac, ctx.context);
6270 ctx.ac.module = ctx.module;
6271
6272 ctx.is_gs_copy_shader = true;
6273 LLVMSetTarget(ctx.module, "amdgcn--");
6274 setup_types(&ctx);
6275
6276 ctx.builder = LLVMCreateBuilderInContext(ctx.context);
6277 ctx.ac.builder = ctx.builder;
6278 ctx.stage = MESA_SHADER_VERTEX;
6279
6280 create_function(&ctx);
6281
6282 ctx.gs_max_out_vertices = geom_shader->info.gs.vertices_out;
6283 ac_setup_rings(&ctx);
6284
6285 ctx.num_output_clips = geom_shader->info.clip_distance_array_size;
6286 ctx.num_output_culls = geom_shader->info.cull_distance_array_size;
6287
6288 nir_foreach_variable(variable, &geom_shader->outputs)
6289 handle_shader_output_decl(&ctx, variable);
6290
6291 ac_gs_copy_shader_emit(&ctx);
6292
6293 LLVMBuildRetVoid(ctx.builder);
6294
6295 ac_llvm_finalize_module(&ctx);
6296
6297 ac_compile_llvm_module(tm, ctx.module, binary, config, shader_info,
6298 MESA_SHADER_VERTEX,
6299 dump_shader, options->supports_spill);
6300 }