ac/nir: set the noalias attribute on input pointers
[mesa.git] / src / amd / common / ac_nir_to_llvm.c
1 /*
2 * Copyright © 2016 Bas Nieuwenhuizen
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21 * IN THE SOFTWARE.
22 */
23
24 #include "ac_nir_to_llvm.h"
25 #include "ac_llvm_build.h"
26 #include "ac_llvm_util.h"
27 #include "ac_binary.h"
28 #include "sid.h"
29 #include "nir/nir.h"
30 #include "../vulkan/radv_descriptor_set.h"
31 #include "util/bitscan.h"
32 #include <llvm-c/Transforms/Scalar.h>
33 #include "ac_shader_abi.h"
34 #include "ac_shader_info.h"
35 #include "ac_shader_util.h"
36 #include "ac_exp_param.h"
37
38 enum radeon_llvm_calling_convention {
39 RADEON_LLVM_AMDGPU_VS = 87,
40 RADEON_LLVM_AMDGPU_GS = 88,
41 RADEON_LLVM_AMDGPU_PS = 89,
42 RADEON_LLVM_AMDGPU_CS = 90,
43 RADEON_LLVM_AMDGPU_HS = 93,
44 };
45
46 #define CONST_ADDR_SPACE 2
47 #define LOCAL_ADDR_SPACE 3
48
49 #define RADEON_LLVM_MAX_INPUTS (VARYING_SLOT_VAR31 + 1)
50 #define RADEON_LLVM_MAX_OUTPUTS (VARYING_SLOT_VAR31 + 1)
51
52 struct nir_to_llvm_context;
53
54 struct ac_nir_context {
55 struct ac_llvm_context ac;
56 struct ac_shader_abi *abi;
57
58 gl_shader_stage stage;
59
60 struct hash_table *defs;
61 struct hash_table *phis;
62 struct hash_table *vars;
63
64 LLVMValueRef main_function;
65 LLVMBasicBlockRef continue_block;
66 LLVMBasicBlockRef break_block;
67
68 LLVMValueRef outputs[RADEON_LLVM_MAX_OUTPUTS * 4];
69
70 int num_locals;
71 LLVMValueRef *locals;
72
73 struct nir_to_llvm_context *nctx; /* TODO get rid of this */
74 };
75
76 struct nir_to_llvm_context {
77 struct ac_llvm_context ac;
78 const struct ac_nir_compiler_options *options;
79 struct ac_shader_variant_info *shader_info;
80 struct ac_shader_abi abi;
81 struct ac_nir_context *nir;
82
83 unsigned max_workgroup_size;
84 LLVMContextRef context;
85 LLVMModuleRef module;
86 LLVMBuilderRef builder;
87 LLVMValueRef main_function;
88
89 struct hash_table *defs;
90 struct hash_table *phis;
91
92 LLVMValueRef descriptor_sets[AC_UD_MAX_SETS];
93 LLVMValueRef ring_offsets;
94 LLVMValueRef push_constants;
95 LLVMValueRef view_index;
96 LLVMValueRef num_work_groups;
97 LLVMValueRef workgroup_ids[3];
98 LLVMValueRef local_invocation_ids;
99 LLVMValueRef tg_size;
100
101 LLVMValueRef vertex_buffers;
102 LLVMValueRef rel_auto_id;
103 LLVMValueRef vs_prim_id;
104 LLVMValueRef ls_out_layout;
105 LLVMValueRef es2gs_offset;
106
107 LLVMValueRef tcs_offchip_layout;
108 LLVMValueRef tcs_out_offsets;
109 LLVMValueRef tcs_out_layout;
110 LLVMValueRef tcs_in_layout;
111 LLVMValueRef oc_lds;
112 LLVMValueRef merged_wave_info;
113 LLVMValueRef tess_factor_offset;
114 LLVMValueRef tes_rel_patch_id;
115 LLVMValueRef tes_u;
116 LLVMValueRef tes_v;
117
118 LLVMValueRef gsvs_ring_stride;
119 LLVMValueRef gsvs_num_entries;
120 LLVMValueRef gs2vs_offset;
121 LLVMValueRef gs_wave_id;
122 LLVMValueRef gs_vtx_offset[6];
123
124 LLVMValueRef esgs_ring;
125 LLVMValueRef gsvs_ring;
126 LLVMValueRef hs_ring_tess_offchip;
127 LLVMValueRef hs_ring_tess_factor;
128
129 LLVMValueRef prim_mask;
130 LLVMValueRef sample_pos_offset;
131 LLVMValueRef persp_sample, persp_center, persp_centroid;
132 LLVMValueRef linear_sample, linear_center, linear_centroid;
133
134 gl_shader_stage stage;
135
136 LLVMValueRef inputs[RADEON_LLVM_MAX_INPUTS * 4];
137
138 uint64_t input_mask;
139 uint64_t output_mask;
140 uint8_t num_output_clips;
141 uint8_t num_output_culls;
142
143 bool is_gs_copy_shader;
144 LLVMValueRef gs_next_vertex;
145 unsigned gs_max_out_vertices;
146
147 unsigned tes_primitive_mode;
148 uint64_t tess_outputs_written;
149 uint64_t tess_patch_outputs_written;
150
151 uint32_t tcs_patch_outputs_read;
152 uint64_t tcs_outputs_read;
153 };
154
155 static inline struct nir_to_llvm_context *
156 nir_to_llvm_context_from_abi(struct ac_shader_abi *abi)
157 {
158 struct nir_to_llvm_context *ctx = NULL;
159 return container_of(abi, ctx, abi);
160 }
161
162 static LLVMTypeRef
163 nir2llvmtype(struct ac_nir_context *ctx,
164 const struct glsl_type *type)
165 {
166 switch (glsl_get_base_type(glsl_without_array(type))) {
167 case GLSL_TYPE_UINT:
168 case GLSL_TYPE_INT:
169 return ctx->ac.i32;
170 case GLSL_TYPE_UINT64:
171 case GLSL_TYPE_INT64:
172 return ctx->ac.i64;
173 case GLSL_TYPE_DOUBLE:
174 return ctx->ac.f64;
175 case GLSL_TYPE_FLOAT:
176 return ctx->ac.f32;
177 default:
178 assert(!"Unsupported type in nir2llvmtype()");
179 break;
180 }
181 return 0;
182 }
183
184 static LLVMValueRef get_sampler_desc(struct ac_nir_context *ctx,
185 const nir_deref_var *deref,
186 enum ac_descriptor_type desc_type,
187 const nir_tex_instr *instr,
188 bool image, bool write);
189
190 static unsigned radeon_llvm_reg_index_soa(unsigned index, unsigned chan)
191 {
192 return (index * 4) + chan;
193 }
194
195 static unsigned shader_io_get_unique_index(gl_varying_slot slot)
196 {
197 /* handle patch indices separate */
198 if (slot == VARYING_SLOT_TESS_LEVEL_OUTER)
199 return 0;
200 if (slot == VARYING_SLOT_TESS_LEVEL_INNER)
201 return 1;
202 if (slot >= VARYING_SLOT_PATCH0 && slot <= VARYING_SLOT_TESS_MAX)
203 return 2 + (slot - VARYING_SLOT_PATCH0);
204
205 if (slot == VARYING_SLOT_POS)
206 return 0;
207 if (slot == VARYING_SLOT_PSIZ)
208 return 1;
209 if (slot == VARYING_SLOT_CLIP_DIST0)
210 return 2;
211 /* 3 is reserved for clip dist as well */
212 if (slot >= VARYING_SLOT_VAR0 && slot <= VARYING_SLOT_VAR31)
213 return 4 + (slot - VARYING_SLOT_VAR0);
214 unreachable("illegal slot in get unique index\n");
215 }
216
217 static void set_llvm_calling_convention(LLVMValueRef func,
218 gl_shader_stage stage)
219 {
220 enum radeon_llvm_calling_convention calling_conv;
221
222 switch (stage) {
223 case MESA_SHADER_VERTEX:
224 case MESA_SHADER_TESS_EVAL:
225 calling_conv = RADEON_LLVM_AMDGPU_VS;
226 break;
227 case MESA_SHADER_GEOMETRY:
228 calling_conv = RADEON_LLVM_AMDGPU_GS;
229 break;
230 case MESA_SHADER_TESS_CTRL:
231 calling_conv = HAVE_LLVM >= 0x0500 ? RADEON_LLVM_AMDGPU_HS : RADEON_LLVM_AMDGPU_VS;
232 break;
233 case MESA_SHADER_FRAGMENT:
234 calling_conv = RADEON_LLVM_AMDGPU_PS;
235 break;
236 case MESA_SHADER_COMPUTE:
237 calling_conv = RADEON_LLVM_AMDGPU_CS;
238 break;
239 default:
240 unreachable("Unhandle shader type");
241 }
242
243 LLVMSetFunctionCallConv(func, calling_conv);
244 }
245
246 #define MAX_ARGS 23
247 struct arg_info {
248 LLVMTypeRef types[MAX_ARGS];
249 LLVMValueRef *assign[MAX_ARGS];
250 unsigned array_params_mask;
251 uint8_t count;
252 uint8_t sgpr_count;
253 uint8_t num_sgprs_used;
254 uint8_t num_vgprs_used;
255 };
256
257 enum ac_arg_regfile {
258 ARG_SGPR,
259 ARG_VGPR,
260 };
261
262 static void
263 add_arg(struct arg_info *info, enum ac_arg_regfile regfile, LLVMTypeRef type,
264 LLVMValueRef *param_ptr)
265 {
266 assert(info->count < MAX_ARGS);
267
268 info->assign[info->count] = param_ptr;
269 info->types[info->count] = type;
270 info->count++;
271
272 if (regfile == ARG_SGPR) {
273 info->num_sgprs_used += ac_get_type_size(type) / 4;
274 info->sgpr_count++;
275 } else {
276 assert(regfile == ARG_VGPR);
277 info->num_vgprs_used += ac_get_type_size(type) / 4;
278 }
279 }
280
281 static inline void
282 add_array_arg(struct arg_info *info, LLVMTypeRef type, LLVMValueRef *param_ptr)
283 {
284 info->array_params_mask |= (1 << info->count);
285 add_arg(info, ARG_SGPR, type, param_ptr);
286 }
287
288 static void assign_arguments(LLVMValueRef main_function,
289 struct arg_info *info)
290 {
291 unsigned i;
292 for (i = 0; i < info->count; i++) {
293 if (info->assign[i])
294 *info->assign[i] = LLVMGetParam(main_function, i);
295 }
296 }
297
298 static LLVMValueRef
299 create_llvm_function(LLVMContextRef ctx, LLVMModuleRef module,
300 LLVMBuilderRef builder, LLVMTypeRef *return_types,
301 unsigned num_return_elems,
302 struct arg_info *args,
303 unsigned max_workgroup_size,
304 bool unsafe_math)
305 {
306 LLVMTypeRef main_function_type, ret_type;
307 LLVMBasicBlockRef main_function_body;
308
309 if (num_return_elems)
310 ret_type = LLVMStructTypeInContext(ctx, return_types,
311 num_return_elems, true);
312 else
313 ret_type = LLVMVoidTypeInContext(ctx);
314
315 /* Setup the function */
316 main_function_type =
317 LLVMFunctionType(ret_type, args->types, args->count, 0);
318 LLVMValueRef main_function =
319 LLVMAddFunction(module, "main", main_function_type);
320 main_function_body =
321 LLVMAppendBasicBlockInContext(ctx, main_function, "main_body");
322 LLVMPositionBuilderAtEnd(builder, main_function_body);
323
324 LLVMSetFunctionCallConv(main_function, RADEON_LLVM_AMDGPU_CS);
325 for (unsigned i = 0; i < args->sgpr_count; ++i) {
326 if (args->array_params_mask & (1 << i)) {
327 LLVMValueRef P = LLVMGetParam(main_function, i);
328 ac_add_function_attr(ctx, main_function, i + 1, AC_FUNC_ATTR_BYVAL);
329 ac_add_function_attr(ctx, main_function, i + 1, AC_FUNC_ATTR_NOALIAS);
330 ac_add_attr_dereferenceable(P, UINT64_MAX);
331 }
332 else {
333 ac_add_function_attr(ctx, main_function, i + 1, AC_FUNC_ATTR_INREG);
334 }
335 }
336
337 if (max_workgroup_size) {
338 ac_llvm_add_target_dep_function_attr(main_function,
339 "amdgpu-max-work-group-size",
340 max_workgroup_size);
341 }
342 if (unsafe_math) {
343 /* These were copied from some LLVM test. */
344 LLVMAddTargetDependentFunctionAttr(main_function,
345 "less-precise-fpmad",
346 "true");
347 LLVMAddTargetDependentFunctionAttr(main_function,
348 "no-infs-fp-math",
349 "true");
350 LLVMAddTargetDependentFunctionAttr(main_function,
351 "no-nans-fp-math",
352 "true");
353 LLVMAddTargetDependentFunctionAttr(main_function,
354 "unsafe-fp-math",
355 "true");
356 LLVMAddTargetDependentFunctionAttr(main_function,
357 "no-signed-zeros-fp-math",
358 "true");
359 }
360 return main_function;
361 }
362
363 static LLVMTypeRef const_array(LLVMTypeRef elem_type, int num_elements)
364 {
365 return LLVMPointerType(LLVMArrayType(elem_type, num_elements),
366 CONST_ADDR_SPACE);
367 }
368
369 static int get_elem_bits(struct ac_llvm_context *ctx, LLVMTypeRef type)
370 {
371 if (LLVMGetTypeKind(type) == LLVMVectorTypeKind)
372 type = LLVMGetElementType(type);
373
374 if (LLVMGetTypeKind(type) == LLVMIntegerTypeKind)
375 return LLVMGetIntTypeWidth(type);
376
377 if (type == ctx->f16)
378 return 16;
379 if (type == ctx->f32)
380 return 32;
381 if (type == ctx->f64)
382 return 64;
383
384 unreachable("Unhandled type kind in get_elem_bits");
385 }
386
387 static LLVMValueRef unpack_param(struct ac_llvm_context *ctx,
388 LLVMValueRef param, unsigned rshift,
389 unsigned bitwidth)
390 {
391 LLVMValueRef value = param;
392 if (rshift)
393 value = LLVMBuildLShr(ctx->builder, value,
394 LLVMConstInt(ctx->i32, rshift, false), "");
395
396 if (rshift + bitwidth < 32) {
397 unsigned mask = (1 << bitwidth) - 1;
398 value = LLVMBuildAnd(ctx->builder, value,
399 LLVMConstInt(ctx->i32, mask, false), "");
400 }
401 return value;
402 }
403
404 static LLVMValueRef get_rel_patch_id(struct nir_to_llvm_context *ctx)
405 {
406 switch (ctx->stage) {
407 case MESA_SHADER_TESS_CTRL:
408 return unpack_param(&ctx->ac, ctx->abi.tcs_rel_ids, 0, 8);
409 case MESA_SHADER_TESS_EVAL:
410 return ctx->tes_rel_patch_id;
411 break;
412 default:
413 unreachable("Illegal stage");
414 }
415 }
416
417 /* Tessellation shaders pass outputs to the next shader using LDS.
418 *
419 * LS outputs = TCS inputs
420 * TCS outputs = TES inputs
421 *
422 * The LDS layout is:
423 * - TCS inputs for patch 0
424 * - TCS inputs for patch 1
425 * - TCS inputs for patch 2 = get_tcs_in_current_patch_offset (if RelPatchID==2)
426 * - ...
427 * - TCS outputs for patch 0 = get_tcs_out_patch0_offset
428 * - Per-patch TCS outputs for patch 0 = get_tcs_out_patch0_patch_data_offset
429 * - TCS outputs for patch 1
430 * - Per-patch TCS outputs for patch 1
431 * - TCS outputs for patch 2 = get_tcs_out_current_patch_offset (if RelPatchID==2)
432 * - Per-patch TCS outputs for patch 2 = get_tcs_out_current_patch_data_offset (if RelPatchID==2)
433 * - ...
434 *
435 * All three shaders VS(LS), TCS, TES share the same LDS space.
436 */
437 static LLVMValueRef
438 get_tcs_in_patch_stride(struct nir_to_llvm_context *ctx)
439 {
440 if (ctx->stage == MESA_SHADER_VERTEX)
441 return unpack_param(&ctx->ac, ctx->ls_out_layout, 0, 13);
442 else if (ctx->stage == MESA_SHADER_TESS_CTRL)
443 return unpack_param(&ctx->ac, ctx->tcs_in_layout, 0, 13);
444 else {
445 assert(0);
446 return NULL;
447 }
448 }
449
450 static LLVMValueRef
451 get_tcs_out_patch_stride(struct nir_to_llvm_context *ctx)
452 {
453 return unpack_param(&ctx->ac, ctx->tcs_out_layout, 0, 13);
454 }
455
456 static LLVMValueRef
457 get_tcs_out_patch0_offset(struct nir_to_llvm_context *ctx)
458 {
459 return LLVMBuildMul(ctx->builder,
460 unpack_param(&ctx->ac, ctx->tcs_out_offsets, 0, 16),
461 LLVMConstInt(ctx->ac.i32, 4, false), "");
462 }
463
464 static LLVMValueRef
465 get_tcs_out_patch0_patch_data_offset(struct nir_to_llvm_context *ctx)
466 {
467 return LLVMBuildMul(ctx->builder,
468 unpack_param(&ctx->ac, ctx->tcs_out_offsets, 16, 16),
469 LLVMConstInt(ctx->ac.i32, 4, false), "");
470 }
471
472 static LLVMValueRef
473 get_tcs_in_current_patch_offset(struct nir_to_llvm_context *ctx)
474 {
475 LLVMValueRef patch_stride = get_tcs_in_patch_stride(ctx);
476 LLVMValueRef rel_patch_id = get_rel_patch_id(ctx);
477
478 return LLVMBuildMul(ctx->builder, patch_stride, rel_patch_id, "");
479 }
480
481 static LLVMValueRef
482 get_tcs_out_current_patch_offset(struct nir_to_llvm_context *ctx)
483 {
484 LLVMValueRef patch0_offset = get_tcs_out_patch0_offset(ctx);
485 LLVMValueRef patch_stride = get_tcs_out_patch_stride(ctx);
486 LLVMValueRef rel_patch_id = get_rel_patch_id(ctx);
487
488 return LLVMBuildAdd(ctx->builder, patch0_offset,
489 LLVMBuildMul(ctx->builder, patch_stride,
490 rel_patch_id, ""),
491 "");
492 }
493
494 static LLVMValueRef
495 get_tcs_out_current_patch_data_offset(struct nir_to_llvm_context *ctx)
496 {
497 LLVMValueRef patch0_patch_data_offset =
498 get_tcs_out_patch0_patch_data_offset(ctx);
499 LLVMValueRef patch_stride = get_tcs_out_patch_stride(ctx);
500 LLVMValueRef rel_patch_id = get_rel_patch_id(ctx);
501
502 return LLVMBuildAdd(ctx->builder, patch0_patch_data_offset,
503 LLVMBuildMul(ctx->builder, patch_stride,
504 rel_patch_id, ""),
505 "");
506 }
507
508 static void
509 set_loc(struct ac_userdata_info *ud_info, uint8_t *sgpr_idx, uint8_t num_sgprs,
510 uint32_t indirect_offset)
511 {
512 ud_info->sgpr_idx = *sgpr_idx;
513 ud_info->num_sgprs = num_sgprs;
514 ud_info->indirect = indirect_offset > 0;
515 ud_info->indirect_offset = indirect_offset;
516 *sgpr_idx += num_sgprs;
517 }
518
519 static void
520 set_loc_shader(struct nir_to_llvm_context *ctx, int idx, uint8_t *sgpr_idx,
521 uint8_t num_sgprs)
522 {
523 struct ac_userdata_info *ud_info =
524 &ctx->shader_info->user_sgprs_locs.shader_data[idx];
525 assert(ud_info);
526
527 set_loc(ud_info, sgpr_idx, num_sgprs, 0);
528 }
529
530 static void
531 set_loc_desc(struct nir_to_llvm_context *ctx, int idx, uint8_t *sgpr_idx,
532 uint32_t indirect_offset)
533 {
534 struct ac_userdata_info *ud_info =
535 &ctx->shader_info->user_sgprs_locs.descriptor_sets[idx];
536 assert(ud_info);
537
538 set_loc(ud_info, sgpr_idx, 2, indirect_offset);
539 }
540
541 struct user_sgpr_info {
542 bool need_ring_offsets;
543 uint8_t sgpr_count;
544 bool indirect_all_descriptor_sets;
545 };
546
547 static bool needs_view_index_sgpr(struct nir_to_llvm_context *ctx,
548 gl_shader_stage stage)
549 {
550 switch (stage) {
551 case MESA_SHADER_VERTEX:
552 if (ctx->shader_info->info.needs_multiview_view_index ||
553 (!ctx->options->key.vs.as_es && !ctx->options->key.vs.as_ls && ctx->options->key.has_multiview_view_index))
554 return true;
555 break;
556 case MESA_SHADER_TESS_EVAL:
557 if (ctx->shader_info->info.needs_multiview_view_index || (!ctx->options->key.tes.as_es && ctx->options->key.has_multiview_view_index))
558 return true;
559 break;
560 case MESA_SHADER_GEOMETRY:
561 case MESA_SHADER_TESS_CTRL:
562 if (ctx->shader_info->info.needs_multiview_view_index)
563 return true;
564 break;
565 default:
566 break;
567 }
568 return false;
569 }
570
571 static void allocate_user_sgprs(struct nir_to_llvm_context *ctx,
572 gl_shader_stage stage,
573 bool needs_view_index,
574 struct user_sgpr_info *user_sgpr_info)
575 {
576 memset(user_sgpr_info, 0, sizeof(struct user_sgpr_info));
577
578 /* until we sort out scratch/global buffers always assign ring offsets for gs/vs/es */
579 if (stage == MESA_SHADER_GEOMETRY ||
580 stage == MESA_SHADER_VERTEX ||
581 stage == MESA_SHADER_TESS_CTRL ||
582 stage == MESA_SHADER_TESS_EVAL ||
583 ctx->is_gs_copy_shader)
584 user_sgpr_info->need_ring_offsets = true;
585
586 if (stage == MESA_SHADER_FRAGMENT &&
587 ctx->shader_info->info.ps.needs_sample_positions)
588 user_sgpr_info->need_ring_offsets = true;
589
590 /* 2 user sgprs will nearly always be allocated for scratch/rings */
591 if (ctx->options->supports_spill || user_sgpr_info->need_ring_offsets) {
592 user_sgpr_info->sgpr_count += 2;
593 }
594
595 /* FIXME: fix the number of user sgprs for merged shaders on GFX9 */
596 switch (stage) {
597 case MESA_SHADER_COMPUTE:
598 if (ctx->shader_info->info.cs.uses_grid_size)
599 user_sgpr_info->sgpr_count += 3;
600 break;
601 case MESA_SHADER_FRAGMENT:
602 user_sgpr_info->sgpr_count += ctx->shader_info->info.ps.needs_sample_positions;
603 break;
604 case MESA_SHADER_VERTEX:
605 if (!ctx->is_gs_copy_shader) {
606 user_sgpr_info->sgpr_count += ctx->shader_info->info.vs.has_vertex_buffers ? 2 : 0;
607 if (ctx->shader_info->info.vs.needs_draw_id) {
608 user_sgpr_info->sgpr_count += 3;
609 } else {
610 user_sgpr_info->sgpr_count += 2;
611 }
612 }
613 if (ctx->options->key.vs.as_ls)
614 user_sgpr_info->sgpr_count++;
615 break;
616 case MESA_SHADER_TESS_CTRL:
617 user_sgpr_info->sgpr_count += 4;
618 break;
619 case MESA_SHADER_TESS_EVAL:
620 user_sgpr_info->sgpr_count += 1;
621 break;
622 case MESA_SHADER_GEOMETRY:
623 user_sgpr_info->sgpr_count += 2;
624 break;
625 default:
626 break;
627 }
628
629 if (needs_view_index)
630 user_sgpr_info->sgpr_count++;
631
632 if (ctx->shader_info->info.loads_push_constants)
633 user_sgpr_info->sgpr_count += 2;
634
635 uint32_t available_sgprs = ctx->options->chip_class >= GFX9 ? 32 : 16;
636 uint32_t remaining_sgprs = available_sgprs - user_sgpr_info->sgpr_count;
637
638 if (remaining_sgprs / 2 < util_bitcount(ctx->shader_info->info.desc_set_used_mask)) {
639 user_sgpr_info->sgpr_count += 2;
640 user_sgpr_info->indirect_all_descriptor_sets = true;
641 } else {
642 user_sgpr_info->sgpr_count += util_bitcount(ctx->shader_info->info.desc_set_used_mask) * 2;
643 }
644 }
645
646 static void
647 declare_global_input_sgprs(struct nir_to_llvm_context *ctx,
648 gl_shader_stage stage,
649 bool has_previous_stage,
650 gl_shader_stage previous_stage,
651 const struct user_sgpr_info *user_sgpr_info,
652 struct arg_info *args,
653 LLVMValueRef *desc_sets)
654 {
655 LLVMTypeRef type = const_array(ctx->ac.i8, 1024 * 1024);
656 unsigned num_sets = ctx->options->layout ?
657 ctx->options->layout->num_sets : 0;
658 unsigned stage_mask = 1 << stage;
659
660 if (has_previous_stage)
661 stage_mask |= 1 << previous_stage;
662
663 /* 1 for each descriptor set */
664 if (!user_sgpr_info->indirect_all_descriptor_sets) {
665 for (unsigned i = 0; i < num_sets; ++i) {
666 if (ctx->options->layout->set[i].layout->shader_stages & stage_mask) {
667 add_array_arg(args, type,
668 &ctx->descriptor_sets[i]);
669 }
670 }
671 } else {
672 add_array_arg(args, const_array(type, 32), desc_sets);
673 }
674
675 if (ctx->shader_info->info.loads_push_constants) {
676 /* 1 for push constants and dynamic descriptors */
677 add_array_arg(args, type, &ctx->push_constants);
678 }
679 }
680
681 static void
682 declare_vs_specific_input_sgprs(struct nir_to_llvm_context *ctx,
683 gl_shader_stage stage,
684 bool has_previous_stage,
685 gl_shader_stage previous_stage,
686 struct arg_info *args)
687 {
688 if (!ctx->is_gs_copy_shader &&
689 (stage == MESA_SHADER_VERTEX ||
690 (has_previous_stage && previous_stage == MESA_SHADER_VERTEX))) {
691 if (ctx->shader_info->info.vs.has_vertex_buffers) {
692 add_arg(args, ARG_SGPR, const_array(ctx->ac.v4i32, 16),
693 &ctx->vertex_buffers);
694 }
695 add_arg(args, ARG_SGPR, ctx->ac.i32, &ctx->abi.base_vertex);
696 add_arg(args, ARG_SGPR, ctx->ac.i32, &ctx->abi.start_instance);
697 if (ctx->shader_info->info.vs.needs_draw_id) {
698 add_arg(args, ARG_SGPR, ctx->ac.i32, &ctx->abi.draw_id);
699 }
700 }
701 }
702
703 static void
704 declare_vs_input_vgprs(struct nir_to_llvm_context *ctx, struct arg_info *args)
705 {
706 add_arg(args, ARG_VGPR, ctx->ac.i32, &ctx->abi.vertex_id);
707 if (!ctx->is_gs_copy_shader) {
708 if (ctx->options->key.vs.as_ls) {
709 add_arg(args, ARG_VGPR, ctx->ac.i32, &ctx->rel_auto_id);
710 add_arg(args, ARG_VGPR, ctx->ac.i32, &ctx->abi.instance_id);
711 } else {
712 add_arg(args, ARG_VGPR, ctx->ac.i32, &ctx->abi.instance_id);
713 add_arg(args, ARG_VGPR, ctx->ac.i32, &ctx->vs_prim_id);
714 }
715 add_arg(args, ARG_VGPR, ctx->ac.i32, NULL); /* unused */
716 }
717 }
718
719 static void
720 declare_tes_input_vgprs(struct nir_to_llvm_context *ctx, struct arg_info *args)
721 {
722 add_arg(args, ARG_VGPR, ctx->ac.f32, &ctx->tes_u);
723 add_arg(args, ARG_VGPR, ctx->ac.f32, &ctx->tes_v);
724 add_arg(args, ARG_VGPR, ctx->ac.i32, &ctx->tes_rel_patch_id);
725 add_arg(args, ARG_VGPR, ctx->ac.i32, &ctx->abi.tes_patch_id);
726 }
727
728 static void
729 set_global_input_locs(struct nir_to_llvm_context *ctx, gl_shader_stage stage,
730 bool has_previous_stage, gl_shader_stage previous_stage,
731 const struct user_sgpr_info *user_sgpr_info,
732 LLVMValueRef desc_sets, uint8_t *user_sgpr_idx)
733 {
734 unsigned num_sets = ctx->options->layout ?
735 ctx->options->layout->num_sets : 0;
736 unsigned stage_mask = 1 << stage;
737
738 if (has_previous_stage)
739 stage_mask |= 1 << previous_stage;
740
741 if (!user_sgpr_info->indirect_all_descriptor_sets) {
742 for (unsigned i = 0; i < num_sets; ++i) {
743 if (ctx->options->layout->set[i].layout->shader_stages & stage_mask) {
744 set_loc_desc(ctx, i, user_sgpr_idx, 0);
745 } else
746 ctx->descriptor_sets[i] = NULL;
747 }
748 } else {
749 set_loc_shader(ctx, AC_UD_INDIRECT_DESCRIPTOR_SETS,
750 user_sgpr_idx, 2);
751
752 for (unsigned i = 0; i < num_sets; ++i) {
753 if (ctx->options->layout->set[i].layout->shader_stages & stage_mask) {
754 set_loc_desc(ctx, i, user_sgpr_idx, i * 8);
755 ctx->descriptor_sets[i] =
756 ac_build_load_to_sgpr(&ctx->ac,
757 desc_sets,
758 LLVMConstInt(ctx->ac.i32, i, false));
759
760 } else
761 ctx->descriptor_sets[i] = NULL;
762 }
763 ctx->shader_info->need_indirect_descriptor_sets = true;
764 }
765
766 if (ctx->shader_info->info.loads_push_constants) {
767 set_loc_shader(ctx, AC_UD_PUSH_CONSTANTS, user_sgpr_idx, 2);
768 }
769 }
770
771 static void
772 set_vs_specific_input_locs(struct nir_to_llvm_context *ctx,
773 gl_shader_stage stage, bool has_previous_stage,
774 gl_shader_stage previous_stage,
775 uint8_t *user_sgpr_idx)
776 {
777 if (!ctx->is_gs_copy_shader &&
778 (stage == MESA_SHADER_VERTEX ||
779 (has_previous_stage && previous_stage == MESA_SHADER_VERTEX))) {
780 if (ctx->shader_info->info.vs.has_vertex_buffers) {
781 set_loc_shader(ctx, AC_UD_VS_VERTEX_BUFFERS,
782 user_sgpr_idx, 2);
783 }
784
785 unsigned vs_num = 2;
786 if (ctx->shader_info->info.vs.needs_draw_id)
787 vs_num++;
788
789 set_loc_shader(ctx, AC_UD_VS_BASE_VERTEX_START_INSTANCE,
790 user_sgpr_idx, vs_num);
791 }
792 }
793
794 static void create_function(struct nir_to_llvm_context *ctx,
795 gl_shader_stage stage,
796 bool has_previous_stage,
797 gl_shader_stage previous_stage)
798 {
799 uint8_t user_sgpr_idx;
800 struct user_sgpr_info user_sgpr_info;
801 struct arg_info args = {};
802 LLVMValueRef desc_sets;
803 bool needs_view_index = needs_view_index_sgpr(ctx, stage);
804 allocate_user_sgprs(ctx, stage, needs_view_index, &user_sgpr_info);
805
806 if (user_sgpr_info.need_ring_offsets && !ctx->options->supports_spill) {
807 add_arg(&args, ARG_SGPR, const_array(ctx->ac.v4i32, 16),
808 &ctx->ring_offsets);
809 }
810
811 switch (stage) {
812 case MESA_SHADER_COMPUTE:
813 declare_global_input_sgprs(ctx, stage, has_previous_stage,
814 previous_stage, &user_sgpr_info,
815 &args, &desc_sets);
816
817 if (ctx->shader_info->info.cs.uses_grid_size) {
818 add_arg(&args, ARG_SGPR, ctx->ac.v3i32,
819 &ctx->num_work_groups);
820 }
821
822 for (int i = 0; i < 3; i++) {
823 ctx->workgroup_ids[i] = NULL;
824 if (ctx->shader_info->info.cs.uses_block_id[i]) {
825 add_arg(&args, ARG_SGPR, ctx->ac.i32,
826 &ctx->workgroup_ids[i]);
827 }
828 }
829
830 if (ctx->shader_info->info.cs.uses_local_invocation_idx)
831 add_arg(&args, ARG_SGPR, ctx->ac.i32, &ctx->tg_size);
832 add_arg(&args, ARG_VGPR, ctx->ac.v3i32,
833 &ctx->local_invocation_ids);
834 break;
835 case MESA_SHADER_VERTEX:
836 declare_global_input_sgprs(ctx, stage, has_previous_stage,
837 previous_stage, &user_sgpr_info,
838 &args, &desc_sets);
839 declare_vs_specific_input_sgprs(ctx, stage, has_previous_stage,
840 previous_stage, &args);
841
842 if (needs_view_index)
843 add_arg(&args, ARG_SGPR, ctx->ac.i32, &ctx->view_index);
844 if (ctx->options->key.vs.as_es)
845 add_arg(&args, ARG_SGPR, ctx->ac.i32,
846 &ctx->es2gs_offset);
847 else if (ctx->options->key.vs.as_ls)
848 add_arg(&args, ARG_SGPR, ctx->ac.i32,
849 &ctx->ls_out_layout);
850
851 declare_vs_input_vgprs(ctx, &args);
852 break;
853 case MESA_SHADER_TESS_CTRL:
854 if (has_previous_stage) {
855 // First 6 system regs
856 add_arg(&args, ARG_SGPR, ctx->ac.i32, &ctx->oc_lds);
857 add_arg(&args, ARG_SGPR, ctx->ac.i32,
858 &ctx->merged_wave_info);
859 add_arg(&args, ARG_SGPR, ctx->ac.i32,
860 &ctx->tess_factor_offset);
861
862 add_arg(&args, ARG_SGPR, ctx->ac.i32, NULL); // scratch offset
863 add_arg(&args, ARG_SGPR, ctx->ac.i32, NULL); // unknown
864 add_arg(&args, ARG_SGPR, ctx->ac.i32, NULL); // unknown
865
866 declare_global_input_sgprs(ctx, stage,
867 has_previous_stage,
868 previous_stage,
869 &user_sgpr_info, &args,
870 &desc_sets);
871 declare_vs_specific_input_sgprs(ctx, stage,
872 has_previous_stage,
873 previous_stage, &args);
874
875 add_arg(&args, ARG_SGPR, ctx->ac.i32,
876 &ctx->ls_out_layout);
877
878 add_arg(&args, ARG_SGPR, ctx->ac.i32,
879 &ctx->tcs_offchip_layout);
880 add_arg(&args, ARG_SGPR, ctx->ac.i32,
881 &ctx->tcs_out_offsets);
882 add_arg(&args, ARG_SGPR, ctx->ac.i32,
883 &ctx->tcs_out_layout);
884 add_arg(&args, ARG_SGPR, ctx->ac.i32,
885 &ctx->tcs_in_layout);
886 if (needs_view_index)
887 add_arg(&args, ARG_SGPR, ctx->ac.i32,
888 &ctx->view_index);
889
890 add_arg(&args, ARG_VGPR, ctx->ac.i32,
891 &ctx->abi.tcs_patch_id);
892 add_arg(&args, ARG_VGPR, ctx->ac.i32,
893 &ctx->abi.tcs_rel_ids);
894
895 declare_vs_input_vgprs(ctx, &args);
896 } else {
897 declare_global_input_sgprs(ctx, stage,
898 has_previous_stage,
899 previous_stage,
900 &user_sgpr_info, &args,
901 &desc_sets);
902
903 add_arg(&args, ARG_SGPR, ctx->ac.i32,
904 &ctx->tcs_offchip_layout);
905 add_arg(&args, ARG_SGPR, ctx->ac.i32,
906 &ctx->tcs_out_offsets);
907 add_arg(&args, ARG_SGPR, ctx->ac.i32,
908 &ctx->tcs_out_layout);
909 add_arg(&args, ARG_SGPR, ctx->ac.i32,
910 &ctx->tcs_in_layout);
911 if (needs_view_index)
912 add_arg(&args, ARG_SGPR, ctx->ac.i32,
913 &ctx->view_index);
914
915 add_arg(&args, ARG_SGPR, ctx->ac.i32, &ctx->oc_lds);
916 add_arg(&args, ARG_SGPR, ctx->ac.i32,
917 &ctx->tess_factor_offset);
918 add_arg(&args, ARG_VGPR, ctx->ac.i32,
919 &ctx->abi.tcs_patch_id);
920 add_arg(&args, ARG_VGPR, ctx->ac.i32,
921 &ctx->abi.tcs_rel_ids);
922 }
923 break;
924 case MESA_SHADER_TESS_EVAL:
925 declare_global_input_sgprs(ctx, stage, has_previous_stage,
926 previous_stage, &user_sgpr_info,
927 &args, &desc_sets);
928
929 add_arg(&args, ARG_SGPR, ctx->ac.i32, &ctx->tcs_offchip_layout);
930 if (needs_view_index)
931 add_arg(&args, ARG_SGPR, ctx->ac.i32, &ctx->view_index);
932
933 if (ctx->options->key.tes.as_es) {
934 add_arg(&args, ARG_SGPR, ctx->ac.i32, &ctx->oc_lds);
935 add_arg(&args, ARG_SGPR, ctx->ac.i32, NULL);
936 add_arg(&args, ARG_SGPR, ctx->ac.i32,
937 &ctx->es2gs_offset);
938 } else {
939 add_arg(&args, ARG_SGPR, ctx->ac.i32, NULL);
940 add_arg(&args, ARG_SGPR, ctx->ac.i32, &ctx->oc_lds);
941 }
942 declare_tes_input_vgprs(ctx, &args);
943 break;
944 case MESA_SHADER_GEOMETRY:
945 if (has_previous_stage) {
946 // First 6 system regs
947 add_arg(&args, ARG_SGPR, ctx->ac.i32,
948 &ctx->gs2vs_offset);
949 add_arg(&args, ARG_SGPR, ctx->ac.i32,
950 &ctx->merged_wave_info);
951 add_arg(&args, ARG_SGPR, ctx->ac.i32, &ctx->oc_lds);
952
953 add_arg(&args, ARG_SGPR, ctx->ac.i32, NULL); // scratch offset
954 add_arg(&args, ARG_SGPR, ctx->ac.i32, NULL); // unknown
955 add_arg(&args, ARG_SGPR, ctx->ac.i32, NULL); // unknown
956
957 declare_global_input_sgprs(ctx, stage,
958 has_previous_stage,
959 previous_stage,
960 &user_sgpr_info, &args,
961 &desc_sets);
962
963 if (previous_stage == MESA_SHADER_TESS_EVAL) {
964 add_arg(&args, ARG_SGPR, ctx->ac.i32,
965 &ctx->tcs_offchip_layout);
966 } else {
967 declare_vs_specific_input_sgprs(ctx, stage,
968 has_previous_stage,
969 previous_stage,
970 &args);
971 }
972
973 add_arg(&args, ARG_SGPR, ctx->ac.i32,
974 &ctx->gsvs_ring_stride);
975 add_arg(&args, ARG_SGPR, ctx->ac.i32,
976 &ctx->gsvs_num_entries);
977 if (needs_view_index)
978 add_arg(&args, ARG_SGPR, ctx->ac.i32,
979 &ctx->view_index);
980
981 add_arg(&args, ARG_VGPR, ctx->ac.i32,
982 &ctx->gs_vtx_offset[0]);
983 add_arg(&args, ARG_VGPR, ctx->ac.i32,
984 &ctx->gs_vtx_offset[2]);
985 add_arg(&args, ARG_VGPR, ctx->ac.i32,
986 &ctx->abi.gs_prim_id);
987 add_arg(&args, ARG_VGPR, ctx->ac.i32,
988 &ctx->abi.gs_invocation_id);
989 add_arg(&args, ARG_VGPR, ctx->ac.i32,
990 &ctx->gs_vtx_offset[4]);
991
992 if (previous_stage == MESA_SHADER_VERTEX) {
993 declare_vs_input_vgprs(ctx, &args);
994 } else {
995 declare_tes_input_vgprs(ctx, &args);
996 }
997 } else {
998 declare_global_input_sgprs(ctx, stage,
999 has_previous_stage,
1000 previous_stage,
1001 &user_sgpr_info, &args,
1002 &desc_sets);
1003
1004 add_arg(&args, ARG_SGPR, ctx->ac.i32,
1005 &ctx->gsvs_ring_stride);
1006 add_arg(&args, ARG_SGPR, ctx->ac.i32,
1007 &ctx->gsvs_num_entries);
1008 if (needs_view_index)
1009 add_arg(&args, ARG_SGPR, ctx->ac.i32,
1010 &ctx->view_index);
1011
1012 add_arg(&args, ARG_SGPR, ctx->ac.i32, &ctx->gs2vs_offset);
1013 add_arg(&args, ARG_SGPR, ctx->ac.i32, &ctx->gs_wave_id);
1014 add_arg(&args, ARG_VGPR, ctx->ac.i32,
1015 &ctx->gs_vtx_offset[0]);
1016 add_arg(&args, ARG_VGPR, ctx->ac.i32,
1017 &ctx->gs_vtx_offset[1]);
1018 add_arg(&args, ARG_VGPR, ctx->ac.i32,
1019 &ctx->abi.gs_prim_id);
1020 add_arg(&args, ARG_VGPR, ctx->ac.i32,
1021 &ctx->gs_vtx_offset[2]);
1022 add_arg(&args, ARG_VGPR, ctx->ac.i32,
1023 &ctx->gs_vtx_offset[3]);
1024 add_arg(&args, ARG_VGPR, ctx->ac.i32,
1025 &ctx->gs_vtx_offset[4]);
1026 add_arg(&args, ARG_VGPR, ctx->ac.i32,
1027 &ctx->gs_vtx_offset[5]);
1028 add_arg(&args, ARG_VGPR, ctx->ac.i32,
1029 &ctx->abi.gs_invocation_id);
1030 }
1031 break;
1032 case MESA_SHADER_FRAGMENT:
1033 declare_global_input_sgprs(ctx, stage, has_previous_stage,
1034 previous_stage, &user_sgpr_info,
1035 &args, &desc_sets);
1036
1037 if (ctx->shader_info->info.ps.needs_sample_positions)
1038 add_arg(&args, ARG_SGPR, ctx->ac.i32,
1039 &ctx->sample_pos_offset);
1040
1041 add_arg(&args, ARG_SGPR, ctx->ac.i32, &ctx->prim_mask);
1042 add_arg(&args, ARG_VGPR, ctx->ac.v2i32, &ctx->persp_sample);
1043 add_arg(&args, ARG_VGPR, ctx->ac.v2i32, &ctx->persp_center);
1044 add_arg(&args, ARG_VGPR, ctx->ac.v2i32, &ctx->persp_centroid);
1045 add_arg(&args, ARG_VGPR, ctx->ac.v3i32, NULL); /* persp pull model */
1046 add_arg(&args, ARG_VGPR, ctx->ac.v2i32, &ctx->linear_sample);
1047 add_arg(&args, ARG_VGPR, ctx->ac.v2i32, &ctx->linear_center);
1048 add_arg(&args, ARG_VGPR, ctx->ac.v2i32, &ctx->linear_centroid);
1049 add_arg(&args, ARG_VGPR, ctx->ac.f32, NULL); /* line stipple tex */
1050 add_arg(&args, ARG_VGPR, ctx->ac.f32, &ctx->abi.frag_pos[0]);
1051 add_arg(&args, ARG_VGPR, ctx->ac.f32, &ctx->abi.frag_pos[1]);
1052 add_arg(&args, ARG_VGPR, ctx->ac.f32, &ctx->abi.frag_pos[2]);
1053 add_arg(&args, ARG_VGPR, ctx->ac.f32, &ctx->abi.frag_pos[3]);
1054 add_arg(&args, ARG_VGPR, ctx->ac.i32, &ctx->abi.front_face);
1055 add_arg(&args, ARG_VGPR, ctx->ac.i32, &ctx->abi.ancillary);
1056 add_arg(&args, ARG_VGPR, ctx->ac.i32, &ctx->abi.sample_coverage);
1057 add_arg(&args, ARG_VGPR, ctx->ac.i32, NULL); /* fixed pt */
1058 break;
1059 default:
1060 unreachable("Shader stage not implemented");
1061 }
1062
1063 ctx->main_function = create_llvm_function(
1064 ctx->context, ctx->module, ctx->builder, NULL, 0, &args,
1065 ctx->max_workgroup_size,
1066 ctx->options->unsafe_math);
1067 set_llvm_calling_convention(ctx->main_function, stage);
1068
1069
1070 ctx->shader_info->num_input_vgprs = 0;
1071 ctx->shader_info->num_input_sgprs = ctx->options->supports_spill ? 2 : 0;
1072
1073 ctx->shader_info->num_input_sgprs += args.num_sgprs_used;
1074
1075 if (ctx->stage != MESA_SHADER_FRAGMENT)
1076 ctx->shader_info->num_input_vgprs = args.num_vgprs_used;
1077
1078 assign_arguments(ctx->main_function, &args);
1079
1080 user_sgpr_idx = 0;
1081
1082 if (ctx->options->supports_spill || user_sgpr_info.need_ring_offsets) {
1083 set_loc_shader(ctx, AC_UD_SCRATCH_RING_OFFSETS,
1084 &user_sgpr_idx, 2);
1085 if (ctx->options->supports_spill) {
1086 ctx->ring_offsets = ac_build_intrinsic(&ctx->ac, "llvm.amdgcn.implicit.buffer.ptr",
1087 LLVMPointerType(ctx->ac.i8, CONST_ADDR_SPACE),
1088 NULL, 0, AC_FUNC_ATTR_READNONE);
1089 ctx->ring_offsets = LLVMBuildBitCast(ctx->builder, ctx->ring_offsets,
1090 const_array(ctx->ac.v4i32, 16), "");
1091 }
1092 }
1093
1094 /* For merged shaders the user SGPRs start at 8, with 8 system SGPRs in front (including
1095 * the rw_buffers at s0/s1. With user SGPR0 = s8, lets restart the count from 0 */
1096 if (has_previous_stage)
1097 user_sgpr_idx = 0;
1098
1099 set_global_input_locs(ctx, stage, has_previous_stage, previous_stage,
1100 &user_sgpr_info, desc_sets, &user_sgpr_idx);
1101
1102 switch (stage) {
1103 case MESA_SHADER_COMPUTE:
1104 if (ctx->shader_info->info.cs.uses_grid_size) {
1105 set_loc_shader(ctx, AC_UD_CS_GRID_SIZE,
1106 &user_sgpr_idx, 3);
1107 }
1108 break;
1109 case MESA_SHADER_VERTEX:
1110 set_vs_specific_input_locs(ctx, stage, has_previous_stage,
1111 previous_stage, &user_sgpr_idx);
1112 if (ctx->view_index)
1113 set_loc_shader(ctx, AC_UD_VIEW_INDEX, &user_sgpr_idx, 1);
1114 if (ctx->options->key.vs.as_ls) {
1115 set_loc_shader(ctx, AC_UD_VS_LS_TCS_IN_LAYOUT,
1116 &user_sgpr_idx, 1);
1117 }
1118 if (ctx->options->key.vs.as_ls)
1119 ac_declare_lds_as_pointer(&ctx->ac);
1120 break;
1121 case MESA_SHADER_TESS_CTRL:
1122 set_vs_specific_input_locs(ctx, stage, has_previous_stage,
1123 previous_stage, &user_sgpr_idx);
1124 if (has_previous_stage)
1125 set_loc_shader(ctx, AC_UD_VS_LS_TCS_IN_LAYOUT,
1126 &user_sgpr_idx, 1);
1127 set_loc_shader(ctx, AC_UD_TCS_OFFCHIP_LAYOUT, &user_sgpr_idx, 4);
1128 if (ctx->view_index)
1129 set_loc_shader(ctx, AC_UD_VIEW_INDEX, &user_sgpr_idx, 1);
1130 ac_declare_lds_as_pointer(&ctx->ac);
1131 break;
1132 case MESA_SHADER_TESS_EVAL:
1133 set_loc_shader(ctx, AC_UD_TES_OFFCHIP_LAYOUT, &user_sgpr_idx, 1);
1134 if (ctx->view_index)
1135 set_loc_shader(ctx, AC_UD_VIEW_INDEX, &user_sgpr_idx, 1);
1136 break;
1137 case MESA_SHADER_GEOMETRY:
1138 if (has_previous_stage) {
1139 if (previous_stage == MESA_SHADER_VERTEX)
1140 set_vs_specific_input_locs(ctx, stage,
1141 has_previous_stage,
1142 previous_stage,
1143 &user_sgpr_idx);
1144 else
1145 set_loc_shader(ctx, AC_UD_TES_OFFCHIP_LAYOUT,
1146 &user_sgpr_idx, 1);
1147 }
1148 set_loc_shader(ctx, AC_UD_GS_VS_RING_STRIDE_ENTRIES,
1149 &user_sgpr_idx, 2);
1150 if (ctx->view_index)
1151 set_loc_shader(ctx, AC_UD_VIEW_INDEX, &user_sgpr_idx, 1);
1152 if (has_previous_stage)
1153 ac_declare_lds_as_pointer(&ctx->ac);
1154 break;
1155 case MESA_SHADER_FRAGMENT:
1156 if (ctx->shader_info->info.ps.needs_sample_positions) {
1157 set_loc_shader(ctx, AC_UD_PS_SAMPLE_POS_OFFSET,
1158 &user_sgpr_idx, 1);
1159 }
1160 break;
1161 default:
1162 unreachable("Shader stage not implemented");
1163 }
1164
1165 ctx->shader_info->num_user_sgprs = user_sgpr_idx;
1166 }
1167
1168 static LLVMValueRef trim_vector(struct ac_llvm_context *ctx,
1169 LLVMValueRef value, unsigned count)
1170 {
1171 unsigned num_components = ac_get_llvm_num_components(value);
1172 if (count == num_components)
1173 return value;
1174
1175 LLVMValueRef masks[] = {
1176 LLVMConstInt(ctx->i32, 0, false), LLVMConstInt(ctx->i32, 1, false),
1177 LLVMConstInt(ctx->i32, 2, false), LLVMConstInt(ctx->i32, 3, false)};
1178
1179 if (count == 1)
1180 return LLVMBuildExtractElement(ctx->builder, value, masks[0],
1181 "");
1182
1183 LLVMValueRef swizzle = LLVMConstVector(masks, count);
1184 return LLVMBuildShuffleVector(ctx->builder, value, value, swizzle, "");
1185 }
1186
1187 static void
1188 build_store_values_extended(struct ac_llvm_context *ac,
1189 LLVMValueRef *values,
1190 unsigned value_count,
1191 unsigned value_stride,
1192 LLVMValueRef vec)
1193 {
1194 LLVMBuilderRef builder = ac->builder;
1195 unsigned i;
1196
1197 for (i = 0; i < value_count; i++) {
1198 LLVMValueRef ptr = values[i * value_stride];
1199 LLVMValueRef index = LLVMConstInt(ac->i32, i, false);
1200 LLVMValueRef value = LLVMBuildExtractElement(builder, vec, index, "");
1201 LLVMBuildStore(builder, value, ptr);
1202 }
1203 }
1204
1205 static LLVMTypeRef get_def_type(struct ac_nir_context *ctx,
1206 const nir_ssa_def *def)
1207 {
1208 LLVMTypeRef type = LLVMIntTypeInContext(ctx->ac.context, def->bit_size);
1209 if (def->num_components > 1) {
1210 type = LLVMVectorType(type, def->num_components);
1211 }
1212 return type;
1213 }
1214
1215 static LLVMValueRef get_src(struct ac_nir_context *nir, nir_src src)
1216 {
1217 assert(src.is_ssa);
1218 struct hash_entry *entry = _mesa_hash_table_search(nir->defs, src.ssa);
1219 return (LLVMValueRef)entry->data;
1220 }
1221
1222
1223 static LLVMBasicBlockRef get_block(struct ac_nir_context *nir,
1224 const struct nir_block *b)
1225 {
1226 struct hash_entry *entry = _mesa_hash_table_search(nir->defs, b);
1227 return (LLVMBasicBlockRef)entry->data;
1228 }
1229
1230 static LLVMValueRef get_alu_src(struct ac_nir_context *ctx,
1231 nir_alu_src src,
1232 unsigned num_components)
1233 {
1234 LLVMValueRef value = get_src(ctx, src.src);
1235 bool need_swizzle = false;
1236
1237 assert(value);
1238 LLVMTypeRef type = LLVMTypeOf(value);
1239 unsigned src_components = LLVMGetTypeKind(type) == LLVMVectorTypeKind
1240 ? LLVMGetVectorSize(type)
1241 : 1;
1242
1243 for (unsigned i = 0; i < num_components; ++i) {
1244 assert(src.swizzle[i] < src_components);
1245 if (src.swizzle[i] != i)
1246 need_swizzle = true;
1247 }
1248
1249 if (need_swizzle || num_components != src_components) {
1250 LLVMValueRef masks[] = {
1251 LLVMConstInt(ctx->ac.i32, src.swizzle[0], false),
1252 LLVMConstInt(ctx->ac.i32, src.swizzle[1], false),
1253 LLVMConstInt(ctx->ac.i32, src.swizzle[2], false),
1254 LLVMConstInt(ctx->ac.i32, src.swizzle[3], false)};
1255
1256 if (src_components > 1 && num_components == 1) {
1257 value = LLVMBuildExtractElement(ctx->ac.builder, value,
1258 masks[0], "");
1259 } else if (src_components == 1 && num_components > 1) {
1260 LLVMValueRef values[] = {value, value, value, value};
1261 value = ac_build_gather_values(&ctx->ac, values, num_components);
1262 } else {
1263 LLVMValueRef swizzle = LLVMConstVector(masks, num_components);
1264 value = LLVMBuildShuffleVector(ctx->ac.builder, value, value,
1265 swizzle, "");
1266 }
1267 }
1268 assert(!src.negate);
1269 assert(!src.abs);
1270 return value;
1271 }
1272
1273 static LLVMValueRef emit_int_cmp(struct ac_llvm_context *ctx,
1274 LLVMIntPredicate pred, LLVMValueRef src0,
1275 LLVMValueRef src1)
1276 {
1277 LLVMValueRef result = LLVMBuildICmp(ctx->builder, pred, src0, src1, "");
1278 return LLVMBuildSelect(ctx->builder, result,
1279 LLVMConstInt(ctx->i32, 0xFFFFFFFF, false),
1280 ctx->i32_0, "");
1281 }
1282
1283 static LLVMValueRef emit_float_cmp(struct ac_llvm_context *ctx,
1284 LLVMRealPredicate pred, LLVMValueRef src0,
1285 LLVMValueRef src1)
1286 {
1287 LLVMValueRef result;
1288 src0 = ac_to_float(ctx, src0);
1289 src1 = ac_to_float(ctx, src1);
1290 result = LLVMBuildFCmp(ctx->builder, pred, src0, src1, "");
1291 return LLVMBuildSelect(ctx->builder, result,
1292 LLVMConstInt(ctx->i32, 0xFFFFFFFF, false),
1293 ctx->i32_0, "");
1294 }
1295
1296 static LLVMValueRef emit_intrin_1f_param(struct ac_llvm_context *ctx,
1297 const char *intrin,
1298 LLVMTypeRef result_type,
1299 LLVMValueRef src0)
1300 {
1301 char name[64];
1302 LLVMValueRef params[] = {
1303 ac_to_float(ctx, src0),
1304 };
1305
1306 MAYBE_UNUSED const int length = snprintf(name, sizeof(name), "%s.f%d", intrin,
1307 get_elem_bits(ctx, result_type));
1308 assert(length < sizeof(name));
1309 return ac_build_intrinsic(ctx, name, result_type, params, 1, AC_FUNC_ATTR_READNONE);
1310 }
1311
1312 static LLVMValueRef emit_intrin_2f_param(struct ac_llvm_context *ctx,
1313 const char *intrin,
1314 LLVMTypeRef result_type,
1315 LLVMValueRef src0, LLVMValueRef src1)
1316 {
1317 char name[64];
1318 LLVMValueRef params[] = {
1319 ac_to_float(ctx, src0),
1320 ac_to_float(ctx, src1),
1321 };
1322
1323 MAYBE_UNUSED const int length = snprintf(name, sizeof(name), "%s.f%d", intrin,
1324 get_elem_bits(ctx, result_type));
1325 assert(length < sizeof(name));
1326 return ac_build_intrinsic(ctx, name, result_type, params, 2, AC_FUNC_ATTR_READNONE);
1327 }
1328
1329 static LLVMValueRef emit_intrin_3f_param(struct ac_llvm_context *ctx,
1330 const char *intrin,
1331 LLVMTypeRef result_type,
1332 LLVMValueRef src0, LLVMValueRef src1, LLVMValueRef src2)
1333 {
1334 char name[64];
1335 LLVMValueRef params[] = {
1336 ac_to_float(ctx, src0),
1337 ac_to_float(ctx, src1),
1338 ac_to_float(ctx, src2),
1339 };
1340
1341 MAYBE_UNUSED const int length = snprintf(name, sizeof(name), "%s.f%d", intrin,
1342 get_elem_bits(ctx, result_type));
1343 assert(length < sizeof(name));
1344 return ac_build_intrinsic(ctx, name, result_type, params, 3, AC_FUNC_ATTR_READNONE);
1345 }
1346
1347 static LLVMValueRef emit_bcsel(struct ac_llvm_context *ctx,
1348 LLVMValueRef src0, LLVMValueRef src1, LLVMValueRef src2)
1349 {
1350 LLVMValueRef v = LLVMBuildICmp(ctx->builder, LLVMIntNE, src0,
1351 ctx->i32_0, "");
1352 return LLVMBuildSelect(ctx->builder, v, src1, src2, "");
1353 }
1354
1355 static LLVMValueRef emit_minmax_int(struct ac_llvm_context *ctx,
1356 LLVMIntPredicate pred,
1357 LLVMValueRef src0, LLVMValueRef src1)
1358 {
1359 return LLVMBuildSelect(ctx->builder,
1360 LLVMBuildICmp(ctx->builder, pred, src0, src1, ""),
1361 src0,
1362 src1, "");
1363
1364 }
1365 static LLVMValueRef emit_iabs(struct ac_llvm_context *ctx,
1366 LLVMValueRef src0)
1367 {
1368 return emit_minmax_int(ctx, LLVMIntSGT, src0,
1369 LLVMBuildNeg(ctx->builder, src0, ""));
1370 }
1371
1372 static LLVMValueRef emit_fsign(struct ac_llvm_context *ctx,
1373 LLVMValueRef src0,
1374 unsigned bitsize)
1375 {
1376 LLVMValueRef cmp, val, zero, one;
1377 LLVMTypeRef type;
1378
1379 if (bitsize == 32) {
1380 type = ctx->f32;
1381 zero = ctx->f32_0;
1382 one = ctx->f32_1;
1383 } else {
1384 type = ctx->f64;
1385 zero = ctx->f64_0;
1386 one = ctx->f64_1;
1387 }
1388
1389 cmp = LLVMBuildFCmp(ctx->builder, LLVMRealOGT, src0, zero, "");
1390 val = LLVMBuildSelect(ctx->builder, cmp, one, src0, "");
1391 cmp = LLVMBuildFCmp(ctx->builder, LLVMRealOGE, val, zero, "");
1392 val = LLVMBuildSelect(ctx->builder, cmp, val, LLVMConstReal(type, -1.0), "");
1393 return val;
1394 }
1395
1396 static LLVMValueRef emit_isign(struct ac_llvm_context *ctx,
1397 LLVMValueRef src0, unsigned bitsize)
1398 {
1399 LLVMValueRef cmp, val, zero, one;
1400 LLVMTypeRef type;
1401
1402 if (bitsize == 32) {
1403 type = ctx->i32;
1404 zero = ctx->i32_0;
1405 one = ctx->i32_1;
1406 } else {
1407 type = ctx->i64;
1408 zero = ctx->i64_0;
1409 one = ctx->i64_1;
1410 }
1411
1412 cmp = LLVMBuildICmp(ctx->builder, LLVMIntSGT, src0, zero, "");
1413 val = LLVMBuildSelect(ctx->builder, cmp, one, src0, "");
1414 cmp = LLVMBuildICmp(ctx->builder, LLVMIntSGE, val, zero, "");
1415 val = LLVMBuildSelect(ctx->builder, cmp, val, LLVMConstInt(type, -1, true), "");
1416 return val;
1417 }
1418
1419 static LLVMValueRef emit_ffract(struct ac_llvm_context *ctx,
1420 LLVMValueRef src0)
1421 {
1422 const char *intr = "llvm.floor.f32";
1423 LLVMValueRef fsrc0 = ac_to_float(ctx, src0);
1424 LLVMValueRef params[] = {
1425 fsrc0,
1426 };
1427 LLVMValueRef floor = ac_build_intrinsic(ctx, intr,
1428 ctx->f32, params, 1,
1429 AC_FUNC_ATTR_READNONE);
1430 return LLVMBuildFSub(ctx->builder, fsrc0, floor, "");
1431 }
1432
1433 static LLVMValueRef emit_uint_carry(struct ac_llvm_context *ctx,
1434 const char *intrin,
1435 LLVMValueRef src0, LLVMValueRef src1)
1436 {
1437 LLVMTypeRef ret_type;
1438 LLVMTypeRef types[] = { ctx->i32, ctx->i1 };
1439 LLVMValueRef res;
1440 LLVMValueRef params[] = { src0, src1 };
1441 ret_type = LLVMStructTypeInContext(ctx->context, types,
1442 2, true);
1443
1444 res = ac_build_intrinsic(ctx, intrin, ret_type,
1445 params, 2, AC_FUNC_ATTR_READNONE);
1446
1447 res = LLVMBuildExtractValue(ctx->builder, res, 1, "");
1448 res = LLVMBuildZExt(ctx->builder, res, ctx->i32, "");
1449 return res;
1450 }
1451
1452 static LLVMValueRef emit_b2f(struct ac_llvm_context *ctx,
1453 LLVMValueRef src0)
1454 {
1455 return LLVMBuildAnd(ctx->builder, src0, LLVMBuildBitCast(ctx->builder, LLVMConstReal(ctx->f32, 1.0), ctx->i32, ""), "");
1456 }
1457
1458 static LLVMValueRef emit_f2b(struct ac_llvm_context *ctx,
1459 LLVMValueRef src0)
1460 {
1461 src0 = ac_to_float(ctx, src0);
1462 return LLVMBuildSExt(ctx->builder,
1463 LLVMBuildFCmp(ctx->builder, LLVMRealUNE, src0, ctx->f32_0, ""),
1464 ctx->i32, "");
1465 }
1466
1467 static LLVMValueRef emit_b2i(struct ac_llvm_context *ctx,
1468 LLVMValueRef src0,
1469 unsigned bitsize)
1470 {
1471 LLVMValueRef result = LLVMBuildAnd(ctx->builder, src0, ctx->i32_1, "");
1472
1473 if (bitsize == 32)
1474 return result;
1475
1476 return LLVMBuildZExt(ctx->builder, result, ctx->i64, "");
1477 }
1478
1479 static LLVMValueRef emit_i2b(struct ac_llvm_context *ctx,
1480 LLVMValueRef src0)
1481 {
1482 return LLVMBuildSExt(ctx->builder,
1483 LLVMBuildICmp(ctx->builder, LLVMIntNE, src0, ctx->i32_0, ""),
1484 ctx->i32, "");
1485 }
1486
1487 static LLVMValueRef emit_f2f16(struct nir_to_llvm_context *ctx,
1488 LLVMValueRef src0)
1489 {
1490 LLVMValueRef result;
1491 LLVMValueRef cond = NULL;
1492
1493 src0 = ac_to_float(&ctx->ac, src0);
1494 result = LLVMBuildFPTrunc(ctx->builder, src0, ctx->ac.f16, "");
1495
1496 if (ctx->options->chip_class >= VI) {
1497 LLVMValueRef args[2];
1498 /* Check if the result is a denormal - and flush to 0 if so. */
1499 args[0] = result;
1500 args[1] = LLVMConstInt(ctx->ac.i32, N_SUBNORMAL | P_SUBNORMAL, false);
1501 cond = ac_build_intrinsic(&ctx->ac, "llvm.amdgcn.class.f16", ctx->ac.i1, args, 2, AC_FUNC_ATTR_READNONE);
1502 }
1503
1504 /* need to convert back up to f32 */
1505 result = LLVMBuildFPExt(ctx->builder, result, ctx->ac.f32, "");
1506
1507 if (ctx->options->chip_class >= VI)
1508 result = LLVMBuildSelect(ctx->builder, cond, ctx->ac.f32_0, result, "");
1509 else {
1510 /* for SI/CIK */
1511 /* 0x38800000 is smallest half float value (2^-14) in 32-bit float,
1512 * so compare the result and flush to 0 if it's smaller.
1513 */
1514 LLVMValueRef temp, cond2;
1515 temp = emit_intrin_1f_param(&ctx->ac, "llvm.fabs",
1516 ctx->ac.f32, result);
1517 cond = LLVMBuildFCmp(ctx->builder, LLVMRealUGT,
1518 LLVMBuildBitCast(ctx->builder, LLVMConstInt(ctx->ac.i32, 0x38800000, false), ctx->ac.f32, ""),
1519 temp, "");
1520 cond2 = LLVMBuildFCmp(ctx->builder, LLVMRealUNE,
1521 temp, ctx->ac.f32_0, "");
1522 cond = LLVMBuildAnd(ctx->builder, cond, cond2, "");
1523 result = LLVMBuildSelect(ctx->builder, cond, ctx->ac.f32_0, result, "");
1524 }
1525 return result;
1526 }
1527
1528 static LLVMValueRef emit_umul_high(struct ac_llvm_context *ctx,
1529 LLVMValueRef src0, LLVMValueRef src1)
1530 {
1531 LLVMValueRef dst64, result;
1532 src0 = LLVMBuildZExt(ctx->builder, src0, ctx->i64, "");
1533 src1 = LLVMBuildZExt(ctx->builder, src1, ctx->i64, "");
1534
1535 dst64 = LLVMBuildMul(ctx->builder, src0, src1, "");
1536 dst64 = LLVMBuildLShr(ctx->builder, dst64, LLVMConstInt(ctx->i64, 32, false), "");
1537 result = LLVMBuildTrunc(ctx->builder, dst64, ctx->i32, "");
1538 return result;
1539 }
1540
1541 static LLVMValueRef emit_imul_high(struct ac_llvm_context *ctx,
1542 LLVMValueRef src0, LLVMValueRef src1)
1543 {
1544 LLVMValueRef dst64, result;
1545 src0 = LLVMBuildSExt(ctx->builder, src0, ctx->i64, "");
1546 src1 = LLVMBuildSExt(ctx->builder, src1, ctx->i64, "");
1547
1548 dst64 = LLVMBuildMul(ctx->builder, src0, src1, "");
1549 dst64 = LLVMBuildAShr(ctx->builder, dst64, LLVMConstInt(ctx->i64, 32, false), "");
1550 result = LLVMBuildTrunc(ctx->builder, dst64, ctx->i32, "");
1551 return result;
1552 }
1553
1554 static LLVMValueRef emit_bitfield_extract(struct ac_llvm_context *ctx,
1555 bool is_signed,
1556 const LLVMValueRef srcs[3])
1557 {
1558 LLVMValueRef result;
1559 LLVMValueRef icond = LLVMBuildICmp(ctx->builder, LLVMIntEQ, srcs[2], LLVMConstInt(ctx->i32, 32, false), "");
1560
1561 result = ac_build_bfe(ctx, srcs[0], srcs[1], srcs[2], is_signed);
1562 result = LLVMBuildSelect(ctx->builder, icond, srcs[0], result, "");
1563 return result;
1564 }
1565
1566 static LLVMValueRef emit_bitfield_insert(struct ac_llvm_context *ctx,
1567 LLVMValueRef src0, LLVMValueRef src1,
1568 LLVMValueRef src2, LLVMValueRef src3)
1569 {
1570 LLVMValueRef bfi_args[3], result;
1571
1572 bfi_args[0] = LLVMBuildShl(ctx->builder,
1573 LLVMBuildSub(ctx->builder,
1574 LLVMBuildShl(ctx->builder,
1575 ctx->i32_1,
1576 src3, ""),
1577 ctx->i32_1, ""),
1578 src2, "");
1579 bfi_args[1] = LLVMBuildShl(ctx->builder, src1, src2, "");
1580 bfi_args[2] = src0;
1581
1582 LLVMValueRef icond = LLVMBuildICmp(ctx->builder, LLVMIntEQ, src3, LLVMConstInt(ctx->i32, 32, false), "");
1583
1584 /* Calculate:
1585 * (arg0 & arg1) | (~arg0 & arg2) = arg2 ^ (arg0 & (arg1 ^ arg2)
1586 * Use the right-hand side, which the LLVM backend can convert to V_BFI.
1587 */
1588 result = LLVMBuildXor(ctx->builder, bfi_args[2],
1589 LLVMBuildAnd(ctx->builder, bfi_args[0],
1590 LLVMBuildXor(ctx->builder, bfi_args[1], bfi_args[2], ""), ""), "");
1591
1592 result = LLVMBuildSelect(ctx->builder, icond, src1, result, "");
1593 return result;
1594 }
1595
1596 static LLVMValueRef emit_pack_half_2x16(struct ac_llvm_context *ctx,
1597 LLVMValueRef src0)
1598 {
1599 LLVMValueRef comp[2];
1600
1601 src0 = ac_to_float(ctx, src0);
1602 comp[0] = LLVMBuildExtractElement(ctx->builder, src0, ctx->i32_0, "");
1603 comp[1] = LLVMBuildExtractElement(ctx->builder, src0, ctx->i32_1, "");
1604
1605 return ac_build_cvt_pkrtz_f16(ctx, comp);
1606 }
1607
1608 static LLVMValueRef emit_unpack_half_2x16(struct ac_llvm_context *ctx,
1609 LLVMValueRef src0)
1610 {
1611 LLVMValueRef const16 = LLVMConstInt(ctx->i32, 16, false);
1612 LLVMValueRef temps[2], result, val;
1613 int i;
1614
1615 for (i = 0; i < 2; i++) {
1616 val = i == 1 ? LLVMBuildLShr(ctx->builder, src0, const16, "") : src0;
1617 val = LLVMBuildTrunc(ctx->builder, val, ctx->i16, "");
1618 val = LLVMBuildBitCast(ctx->builder, val, ctx->f16, "");
1619 temps[i] = LLVMBuildFPExt(ctx->builder, val, ctx->f32, "");
1620 }
1621
1622 result = LLVMBuildInsertElement(ctx->builder, LLVMGetUndef(ctx->v2f32), temps[0],
1623 ctx->i32_0, "");
1624 result = LLVMBuildInsertElement(ctx->builder, result, temps[1],
1625 ctx->i32_1, "");
1626 return result;
1627 }
1628
1629 static LLVMValueRef emit_ddxy(struct ac_nir_context *ctx,
1630 nir_op op,
1631 LLVMValueRef src0)
1632 {
1633 unsigned mask;
1634 int idx;
1635 LLVMValueRef result;
1636
1637 if (op == nir_op_fddx_fine || op == nir_op_fddx)
1638 mask = AC_TID_MASK_LEFT;
1639 else if (op == nir_op_fddy_fine || op == nir_op_fddy)
1640 mask = AC_TID_MASK_TOP;
1641 else
1642 mask = AC_TID_MASK_TOP_LEFT;
1643
1644 /* for DDX we want to next X pixel, DDY next Y pixel. */
1645 if (op == nir_op_fddx_fine ||
1646 op == nir_op_fddx_coarse ||
1647 op == nir_op_fddx)
1648 idx = 1;
1649 else
1650 idx = 2;
1651
1652 result = ac_build_ddxy(&ctx->ac, mask, idx, src0);
1653 return result;
1654 }
1655
1656 /*
1657 * this takes an I,J coordinate pair,
1658 * and works out the X and Y derivatives.
1659 * it returns DDX(I), DDX(J), DDY(I), DDY(J).
1660 */
1661 static LLVMValueRef emit_ddxy_interp(
1662 struct ac_nir_context *ctx,
1663 LLVMValueRef interp_ij)
1664 {
1665 LLVMValueRef result[4], a;
1666 unsigned i;
1667
1668 for (i = 0; i < 2; i++) {
1669 a = LLVMBuildExtractElement(ctx->ac.builder, interp_ij,
1670 LLVMConstInt(ctx->ac.i32, i, false), "");
1671 result[i] = emit_ddxy(ctx, nir_op_fddx, a);
1672 result[2+i] = emit_ddxy(ctx, nir_op_fddy, a);
1673 }
1674 return ac_build_gather_values(&ctx->ac, result, 4);
1675 }
1676
1677 static void visit_alu(struct ac_nir_context *ctx, const nir_alu_instr *instr)
1678 {
1679 LLVMValueRef src[4], result = NULL;
1680 unsigned num_components = instr->dest.dest.ssa.num_components;
1681 unsigned src_components;
1682 LLVMTypeRef def_type = get_def_type(ctx, &instr->dest.dest.ssa);
1683
1684 assert(nir_op_infos[instr->op].num_inputs <= ARRAY_SIZE(src));
1685 switch (instr->op) {
1686 case nir_op_vec2:
1687 case nir_op_vec3:
1688 case nir_op_vec4:
1689 src_components = 1;
1690 break;
1691 case nir_op_pack_half_2x16:
1692 src_components = 2;
1693 break;
1694 case nir_op_unpack_half_2x16:
1695 src_components = 1;
1696 break;
1697 default:
1698 src_components = num_components;
1699 break;
1700 }
1701 for (unsigned i = 0; i < nir_op_infos[instr->op].num_inputs; i++)
1702 src[i] = get_alu_src(ctx, instr->src[i], src_components);
1703
1704 switch (instr->op) {
1705 case nir_op_fmov:
1706 case nir_op_imov:
1707 result = src[0];
1708 break;
1709 case nir_op_fneg:
1710 src[0] = ac_to_float(&ctx->ac, src[0]);
1711 result = LLVMBuildFNeg(ctx->ac.builder, src[0], "");
1712 break;
1713 case nir_op_ineg:
1714 result = LLVMBuildNeg(ctx->ac.builder, src[0], "");
1715 break;
1716 case nir_op_inot:
1717 result = LLVMBuildNot(ctx->ac.builder, src[0], "");
1718 break;
1719 case nir_op_iadd:
1720 result = LLVMBuildAdd(ctx->ac.builder, src[0], src[1], "");
1721 break;
1722 case nir_op_fadd:
1723 src[0] = ac_to_float(&ctx->ac, src[0]);
1724 src[1] = ac_to_float(&ctx->ac, src[1]);
1725 result = LLVMBuildFAdd(ctx->ac.builder, src[0], src[1], "");
1726 break;
1727 case nir_op_fsub:
1728 src[0] = ac_to_float(&ctx->ac, src[0]);
1729 src[1] = ac_to_float(&ctx->ac, src[1]);
1730 result = LLVMBuildFSub(ctx->ac.builder, src[0], src[1], "");
1731 break;
1732 case nir_op_isub:
1733 result = LLVMBuildSub(ctx->ac.builder, src[0], src[1], "");
1734 break;
1735 case nir_op_imul:
1736 result = LLVMBuildMul(ctx->ac.builder, src[0], src[1], "");
1737 break;
1738 case nir_op_imod:
1739 result = LLVMBuildSRem(ctx->ac.builder, src[0], src[1], "");
1740 break;
1741 case nir_op_umod:
1742 result = LLVMBuildURem(ctx->ac.builder, src[0], src[1], "");
1743 break;
1744 case nir_op_fmod:
1745 src[0] = ac_to_float(&ctx->ac, src[0]);
1746 src[1] = ac_to_float(&ctx->ac, src[1]);
1747 result = ac_build_fdiv(&ctx->ac, src[0], src[1]);
1748 result = emit_intrin_1f_param(&ctx->ac, "llvm.floor",
1749 ac_to_float_type(&ctx->ac, def_type), result);
1750 result = LLVMBuildFMul(ctx->ac.builder, src[1] , result, "");
1751 result = LLVMBuildFSub(ctx->ac.builder, src[0], result, "");
1752 break;
1753 case nir_op_frem:
1754 src[0] = ac_to_float(&ctx->ac, src[0]);
1755 src[1] = ac_to_float(&ctx->ac, src[1]);
1756 result = LLVMBuildFRem(ctx->ac.builder, src[0], src[1], "");
1757 break;
1758 case nir_op_irem:
1759 result = LLVMBuildSRem(ctx->ac.builder, src[0], src[1], "");
1760 break;
1761 case nir_op_idiv:
1762 result = LLVMBuildSDiv(ctx->ac.builder, src[0], src[1], "");
1763 break;
1764 case nir_op_udiv:
1765 result = LLVMBuildUDiv(ctx->ac.builder, src[0], src[1], "");
1766 break;
1767 case nir_op_fmul:
1768 src[0] = ac_to_float(&ctx->ac, src[0]);
1769 src[1] = ac_to_float(&ctx->ac, src[1]);
1770 result = LLVMBuildFMul(ctx->ac.builder, src[0], src[1], "");
1771 break;
1772 case nir_op_fdiv:
1773 src[0] = ac_to_float(&ctx->ac, src[0]);
1774 src[1] = ac_to_float(&ctx->ac, src[1]);
1775 result = ac_build_fdiv(&ctx->ac, src[0], src[1]);
1776 break;
1777 case nir_op_frcp:
1778 src[0] = ac_to_float(&ctx->ac, src[0]);
1779 result = ac_build_fdiv(&ctx->ac, instr->dest.dest.ssa.bit_size == 32 ? ctx->ac.f32_1 : ctx->ac.f64_1,
1780 src[0]);
1781 break;
1782 case nir_op_iand:
1783 result = LLVMBuildAnd(ctx->ac.builder, src[0], src[1], "");
1784 break;
1785 case nir_op_ior:
1786 result = LLVMBuildOr(ctx->ac.builder, src[0], src[1], "");
1787 break;
1788 case nir_op_ixor:
1789 result = LLVMBuildXor(ctx->ac.builder, src[0], src[1], "");
1790 break;
1791 case nir_op_ishl:
1792 result = LLVMBuildShl(ctx->ac.builder, src[0],
1793 LLVMBuildZExt(ctx->ac.builder, src[1],
1794 LLVMTypeOf(src[0]), ""),
1795 "");
1796 break;
1797 case nir_op_ishr:
1798 result = LLVMBuildAShr(ctx->ac.builder, src[0],
1799 LLVMBuildZExt(ctx->ac.builder, src[1],
1800 LLVMTypeOf(src[0]), ""),
1801 "");
1802 break;
1803 case nir_op_ushr:
1804 result = LLVMBuildLShr(ctx->ac.builder, src[0],
1805 LLVMBuildZExt(ctx->ac.builder, src[1],
1806 LLVMTypeOf(src[0]), ""),
1807 "");
1808 break;
1809 case nir_op_ilt:
1810 result = emit_int_cmp(&ctx->ac, LLVMIntSLT, src[0], src[1]);
1811 break;
1812 case nir_op_ine:
1813 result = emit_int_cmp(&ctx->ac, LLVMIntNE, src[0], src[1]);
1814 break;
1815 case nir_op_ieq:
1816 result = emit_int_cmp(&ctx->ac, LLVMIntEQ, src[0], src[1]);
1817 break;
1818 case nir_op_ige:
1819 result = emit_int_cmp(&ctx->ac, LLVMIntSGE, src[0], src[1]);
1820 break;
1821 case nir_op_ult:
1822 result = emit_int_cmp(&ctx->ac, LLVMIntULT, src[0], src[1]);
1823 break;
1824 case nir_op_uge:
1825 result = emit_int_cmp(&ctx->ac, LLVMIntUGE, src[0], src[1]);
1826 break;
1827 case nir_op_feq:
1828 result = emit_float_cmp(&ctx->ac, LLVMRealUEQ, src[0], src[1]);
1829 break;
1830 case nir_op_fne:
1831 result = emit_float_cmp(&ctx->ac, LLVMRealUNE, src[0], src[1]);
1832 break;
1833 case nir_op_flt:
1834 result = emit_float_cmp(&ctx->ac, LLVMRealULT, src[0], src[1]);
1835 break;
1836 case nir_op_fge:
1837 result = emit_float_cmp(&ctx->ac, LLVMRealUGE, src[0], src[1]);
1838 break;
1839 case nir_op_fabs:
1840 result = emit_intrin_1f_param(&ctx->ac, "llvm.fabs",
1841 ac_to_float_type(&ctx->ac, def_type), src[0]);
1842 break;
1843 case nir_op_iabs:
1844 result = emit_iabs(&ctx->ac, src[0]);
1845 break;
1846 case nir_op_imax:
1847 result = emit_minmax_int(&ctx->ac, LLVMIntSGT, src[0], src[1]);
1848 break;
1849 case nir_op_imin:
1850 result = emit_minmax_int(&ctx->ac, LLVMIntSLT, src[0], src[1]);
1851 break;
1852 case nir_op_umax:
1853 result = emit_minmax_int(&ctx->ac, LLVMIntUGT, src[0], src[1]);
1854 break;
1855 case nir_op_umin:
1856 result = emit_minmax_int(&ctx->ac, LLVMIntULT, src[0], src[1]);
1857 break;
1858 case nir_op_isign:
1859 result = emit_isign(&ctx->ac, src[0], instr->dest.dest.ssa.bit_size);
1860 break;
1861 case nir_op_fsign:
1862 src[0] = ac_to_float(&ctx->ac, src[0]);
1863 result = emit_fsign(&ctx->ac, src[0], instr->dest.dest.ssa.bit_size);
1864 break;
1865 case nir_op_ffloor:
1866 result = emit_intrin_1f_param(&ctx->ac, "llvm.floor",
1867 ac_to_float_type(&ctx->ac, def_type), src[0]);
1868 break;
1869 case nir_op_ftrunc:
1870 result = emit_intrin_1f_param(&ctx->ac, "llvm.trunc",
1871 ac_to_float_type(&ctx->ac, def_type), src[0]);
1872 break;
1873 case nir_op_fceil:
1874 result = emit_intrin_1f_param(&ctx->ac, "llvm.ceil",
1875 ac_to_float_type(&ctx->ac, def_type), src[0]);
1876 break;
1877 case nir_op_fround_even:
1878 result = emit_intrin_1f_param(&ctx->ac, "llvm.rint",
1879 ac_to_float_type(&ctx->ac, def_type),src[0]);
1880 break;
1881 case nir_op_ffract:
1882 result = emit_ffract(&ctx->ac, src[0]);
1883 break;
1884 case nir_op_fsin:
1885 result = emit_intrin_1f_param(&ctx->ac, "llvm.sin",
1886 ac_to_float_type(&ctx->ac, def_type), src[0]);
1887 break;
1888 case nir_op_fcos:
1889 result = emit_intrin_1f_param(&ctx->ac, "llvm.cos",
1890 ac_to_float_type(&ctx->ac, def_type), src[0]);
1891 break;
1892 case nir_op_fsqrt:
1893 result = emit_intrin_1f_param(&ctx->ac, "llvm.sqrt",
1894 ac_to_float_type(&ctx->ac, def_type), src[0]);
1895 break;
1896 case nir_op_fexp2:
1897 result = emit_intrin_1f_param(&ctx->ac, "llvm.exp2",
1898 ac_to_float_type(&ctx->ac, def_type), src[0]);
1899 break;
1900 case nir_op_flog2:
1901 result = emit_intrin_1f_param(&ctx->ac, "llvm.log2",
1902 ac_to_float_type(&ctx->ac, def_type), src[0]);
1903 break;
1904 case nir_op_frsq:
1905 result = emit_intrin_1f_param(&ctx->ac, "llvm.sqrt",
1906 ac_to_float_type(&ctx->ac, def_type), src[0]);
1907 result = ac_build_fdiv(&ctx->ac, instr->dest.dest.ssa.bit_size == 32 ? ctx->ac.f32_1 : ctx->ac.f64_1,
1908 result);
1909 break;
1910 case nir_op_fpow:
1911 result = emit_intrin_2f_param(&ctx->ac, "llvm.pow",
1912 ac_to_float_type(&ctx->ac, def_type), src[0], src[1]);
1913 break;
1914 case nir_op_fmax:
1915 result = emit_intrin_2f_param(&ctx->ac, "llvm.maxnum",
1916 ac_to_float_type(&ctx->ac, def_type), src[0], src[1]);
1917 if (ctx->ac.chip_class < GFX9 &&
1918 instr->dest.dest.ssa.bit_size == 32) {
1919 /* Only pre-GFX9 chips do not flush denorms. */
1920 result = emit_intrin_1f_param(&ctx->ac, "llvm.canonicalize",
1921 ac_to_float_type(&ctx->ac, def_type),
1922 result);
1923 }
1924 break;
1925 case nir_op_fmin:
1926 result = emit_intrin_2f_param(&ctx->ac, "llvm.minnum",
1927 ac_to_float_type(&ctx->ac, def_type), src[0], src[1]);
1928 if (ctx->ac.chip_class < GFX9 &&
1929 instr->dest.dest.ssa.bit_size == 32) {
1930 /* Only pre-GFX9 chips do not flush denorms. */
1931 result = emit_intrin_1f_param(&ctx->ac, "llvm.canonicalize",
1932 ac_to_float_type(&ctx->ac, def_type),
1933 result);
1934 }
1935 break;
1936 case nir_op_ffma:
1937 result = emit_intrin_3f_param(&ctx->ac, "llvm.fmuladd",
1938 ac_to_float_type(&ctx->ac, def_type), src[0], src[1], src[2]);
1939 break;
1940 case nir_op_ibitfield_extract:
1941 result = emit_bitfield_extract(&ctx->ac, true, src);
1942 break;
1943 case nir_op_ubitfield_extract:
1944 result = emit_bitfield_extract(&ctx->ac, false, src);
1945 break;
1946 case nir_op_bitfield_insert:
1947 result = emit_bitfield_insert(&ctx->ac, src[0], src[1], src[2], src[3]);
1948 break;
1949 case nir_op_bitfield_reverse:
1950 result = ac_build_intrinsic(&ctx->ac, "llvm.bitreverse.i32", ctx->ac.i32, src, 1, AC_FUNC_ATTR_READNONE);
1951 break;
1952 case nir_op_bit_count:
1953 result = ac_build_intrinsic(&ctx->ac, "llvm.ctpop.i32", ctx->ac.i32, src, 1, AC_FUNC_ATTR_READNONE);
1954 break;
1955 case nir_op_vec2:
1956 case nir_op_vec3:
1957 case nir_op_vec4:
1958 for (unsigned i = 0; i < nir_op_infos[instr->op].num_inputs; i++)
1959 src[i] = ac_to_integer(&ctx->ac, src[i]);
1960 result = ac_build_gather_values(&ctx->ac, src, num_components);
1961 break;
1962 case nir_op_f2i32:
1963 case nir_op_f2i64:
1964 src[0] = ac_to_float(&ctx->ac, src[0]);
1965 result = LLVMBuildFPToSI(ctx->ac.builder, src[0], def_type, "");
1966 break;
1967 case nir_op_f2u32:
1968 case nir_op_f2u64:
1969 src[0] = ac_to_float(&ctx->ac, src[0]);
1970 result = LLVMBuildFPToUI(ctx->ac.builder, src[0], def_type, "");
1971 break;
1972 case nir_op_i2f32:
1973 case nir_op_i2f64:
1974 src[0] = ac_to_integer(&ctx->ac, src[0]);
1975 result = LLVMBuildSIToFP(ctx->ac.builder, src[0], ac_to_float_type(&ctx->ac, def_type), "");
1976 break;
1977 case nir_op_u2f32:
1978 case nir_op_u2f64:
1979 src[0] = ac_to_integer(&ctx->ac, src[0]);
1980 result = LLVMBuildUIToFP(ctx->ac.builder, src[0], ac_to_float_type(&ctx->ac, def_type), "");
1981 break;
1982 case nir_op_f2f64:
1983 src[0] = ac_to_float(&ctx->ac, src[0]);
1984 result = LLVMBuildFPExt(ctx->ac.builder, src[0], ac_to_float_type(&ctx->ac, def_type), "");
1985 break;
1986 case nir_op_f2f32:
1987 result = LLVMBuildFPTrunc(ctx->ac.builder, src[0], ac_to_float_type(&ctx->ac, def_type), "");
1988 break;
1989 case nir_op_u2u32:
1990 case nir_op_u2u64:
1991 src[0] = ac_to_integer(&ctx->ac, src[0]);
1992 if (get_elem_bits(&ctx->ac, LLVMTypeOf(src[0])) < get_elem_bits(&ctx->ac, def_type))
1993 result = LLVMBuildZExt(ctx->ac.builder, src[0], def_type, "");
1994 else
1995 result = LLVMBuildTrunc(ctx->ac.builder, src[0], def_type, "");
1996 break;
1997 case nir_op_i2i32:
1998 case nir_op_i2i64:
1999 src[0] = ac_to_integer(&ctx->ac, src[0]);
2000 if (get_elem_bits(&ctx->ac, LLVMTypeOf(src[0])) < get_elem_bits(&ctx->ac, def_type))
2001 result = LLVMBuildSExt(ctx->ac.builder, src[0], def_type, "");
2002 else
2003 result = LLVMBuildTrunc(ctx->ac.builder, src[0], def_type, "");
2004 break;
2005 case nir_op_bcsel:
2006 result = emit_bcsel(&ctx->ac, src[0], src[1], src[2]);
2007 break;
2008 case nir_op_find_lsb:
2009 src[0] = ac_to_integer(&ctx->ac, src[0]);
2010 result = ac_find_lsb(&ctx->ac, ctx->ac.i32, src[0]);
2011 break;
2012 case nir_op_ufind_msb:
2013 src[0] = ac_to_integer(&ctx->ac, src[0]);
2014 result = ac_build_umsb(&ctx->ac, src[0], ctx->ac.i32);
2015 break;
2016 case nir_op_ifind_msb:
2017 src[0] = ac_to_integer(&ctx->ac, src[0]);
2018 result = ac_build_imsb(&ctx->ac, src[0], ctx->ac.i32);
2019 break;
2020 case nir_op_uadd_carry:
2021 src[0] = ac_to_integer(&ctx->ac, src[0]);
2022 src[1] = ac_to_integer(&ctx->ac, src[1]);
2023 result = emit_uint_carry(&ctx->ac, "llvm.uadd.with.overflow.i32", src[0], src[1]);
2024 break;
2025 case nir_op_usub_borrow:
2026 src[0] = ac_to_integer(&ctx->ac, src[0]);
2027 src[1] = ac_to_integer(&ctx->ac, src[1]);
2028 result = emit_uint_carry(&ctx->ac, "llvm.usub.with.overflow.i32", src[0], src[1]);
2029 break;
2030 case nir_op_b2f:
2031 result = emit_b2f(&ctx->ac, src[0]);
2032 break;
2033 case nir_op_f2b:
2034 result = emit_f2b(&ctx->ac, src[0]);
2035 break;
2036 case nir_op_b2i:
2037 result = emit_b2i(&ctx->ac, src[0], instr->dest.dest.ssa.bit_size);
2038 break;
2039 case nir_op_i2b:
2040 src[0] = ac_to_integer(&ctx->ac, src[0]);
2041 result = emit_i2b(&ctx->ac, src[0]);
2042 break;
2043 case nir_op_fquantize2f16:
2044 result = emit_f2f16(ctx->nctx, src[0]);
2045 break;
2046 case nir_op_umul_high:
2047 src[0] = ac_to_integer(&ctx->ac, src[0]);
2048 src[1] = ac_to_integer(&ctx->ac, src[1]);
2049 result = emit_umul_high(&ctx->ac, src[0], src[1]);
2050 break;
2051 case nir_op_imul_high:
2052 src[0] = ac_to_integer(&ctx->ac, src[0]);
2053 src[1] = ac_to_integer(&ctx->ac, src[1]);
2054 result = emit_imul_high(&ctx->ac, src[0], src[1]);
2055 break;
2056 case nir_op_pack_half_2x16:
2057 result = emit_pack_half_2x16(&ctx->ac, src[0]);
2058 break;
2059 case nir_op_unpack_half_2x16:
2060 result = emit_unpack_half_2x16(&ctx->ac, src[0]);
2061 break;
2062 case nir_op_fddx:
2063 case nir_op_fddy:
2064 case nir_op_fddx_fine:
2065 case nir_op_fddy_fine:
2066 case nir_op_fddx_coarse:
2067 case nir_op_fddy_coarse:
2068 result = emit_ddxy(ctx, instr->op, src[0]);
2069 break;
2070
2071 case nir_op_unpack_64_2x32_split_x: {
2072 assert(instr->src[0].src.ssa->num_components == 1);
2073 LLVMValueRef tmp = LLVMBuildBitCast(ctx->ac.builder, src[0],
2074 ctx->ac.v2i32,
2075 "");
2076 result = LLVMBuildExtractElement(ctx->ac.builder, tmp,
2077 ctx->ac.i32_0, "");
2078 break;
2079 }
2080
2081 case nir_op_unpack_64_2x32_split_y: {
2082 assert(instr->src[0].src.ssa->num_components == 1);
2083 LLVMValueRef tmp = LLVMBuildBitCast(ctx->ac.builder, src[0],
2084 ctx->ac.v2i32,
2085 "");
2086 result = LLVMBuildExtractElement(ctx->ac.builder, tmp,
2087 ctx->ac.i32_1, "");
2088 break;
2089 }
2090
2091 case nir_op_pack_64_2x32_split: {
2092 LLVMValueRef tmp = LLVMGetUndef(ctx->ac.v2i32);
2093 tmp = LLVMBuildInsertElement(ctx->ac.builder, tmp,
2094 src[0], ctx->ac.i32_0, "");
2095 tmp = LLVMBuildInsertElement(ctx->ac.builder, tmp,
2096 src[1], ctx->ac.i32_1, "");
2097 result = LLVMBuildBitCast(ctx->ac.builder, tmp, ctx->ac.i64, "");
2098 break;
2099 }
2100
2101 default:
2102 fprintf(stderr, "Unknown NIR alu instr: ");
2103 nir_print_instr(&instr->instr, stderr);
2104 fprintf(stderr, "\n");
2105 abort();
2106 }
2107
2108 if (result) {
2109 assert(instr->dest.dest.is_ssa);
2110 result = ac_to_integer(&ctx->ac, result);
2111 _mesa_hash_table_insert(ctx->defs, &instr->dest.dest.ssa,
2112 result);
2113 }
2114 }
2115
2116 static void visit_load_const(struct ac_nir_context *ctx,
2117 const nir_load_const_instr *instr)
2118 {
2119 LLVMValueRef values[4], value = NULL;
2120 LLVMTypeRef element_type =
2121 LLVMIntTypeInContext(ctx->ac.context, instr->def.bit_size);
2122
2123 for (unsigned i = 0; i < instr->def.num_components; ++i) {
2124 switch (instr->def.bit_size) {
2125 case 32:
2126 values[i] = LLVMConstInt(element_type,
2127 instr->value.u32[i], false);
2128 break;
2129 case 64:
2130 values[i] = LLVMConstInt(element_type,
2131 instr->value.u64[i], false);
2132 break;
2133 default:
2134 fprintf(stderr,
2135 "unsupported nir load_const bit_size: %d\n",
2136 instr->def.bit_size);
2137 abort();
2138 }
2139 }
2140 if (instr->def.num_components > 1) {
2141 value = LLVMConstVector(values, instr->def.num_components);
2142 } else
2143 value = values[0];
2144
2145 _mesa_hash_table_insert(ctx->defs, &instr->def, value);
2146 }
2147
2148 static LLVMValueRef cast_ptr(struct nir_to_llvm_context *ctx, LLVMValueRef ptr,
2149 LLVMTypeRef type)
2150 {
2151 int addr_space = LLVMGetPointerAddressSpace(LLVMTypeOf(ptr));
2152 return LLVMBuildBitCast(ctx->builder, ptr,
2153 LLVMPointerType(type, addr_space), "");
2154 }
2155
2156 static LLVMValueRef
2157 get_buffer_size(struct ac_nir_context *ctx, LLVMValueRef descriptor, bool in_elements)
2158 {
2159 LLVMValueRef size =
2160 LLVMBuildExtractElement(ctx->ac.builder, descriptor,
2161 LLVMConstInt(ctx->ac.i32, 2, false), "");
2162
2163 /* VI only */
2164 if (ctx->ac.chip_class == VI && in_elements) {
2165 /* On VI, the descriptor contains the size in bytes,
2166 * but TXQ must return the size in elements.
2167 * The stride is always non-zero for resources using TXQ.
2168 */
2169 LLVMValueRef stride =
2170 LLVMBuildExtractElement(ctx->ac.builder, descriptor,
2171 ctx->ac.i32_1, "");
2172 stride = LLVMBuildLShr(ctx->ac.builder, stride,
2173 LLVMConstInt(ctx->ac.i32, 16, false), "");
2174 stride = LLVMBuildAnd(ctx->ac.builder, stride,
2175 LLVMConstInt(ctx->ac.i32, 0x3fff, false), "");
2176
2177 size = LLVMBuildUDiv(ctx->ac.builder, size, stride, "");
2178 }
2179 return size;
2180 }
2181
2182 /**
2183 * Given the i32 or vNi32 \p type, generate the textual name (e.g. for use with
2184 * intrinsic names).
2185 */
2186 static void build_int_type_name(
2187 LLVMTypeRef type,
2188 char *buf, unsigned bufsize)
2189 {
2190 assert(bufsize >= 6);
2191
2192 if (LLVMGetTypeKind(type) == LLVMVectorTypeKind)
2193 snprintf(buf, bufsize, "v%ui32",
2194 LLVMGetVectorSize(type));
2195 else
2196 strcpy(buf, "i32");
2197 }
2198
2199 static LLVMValueRef radv_lower_gather4_integer(struct ac_llvm_context *ctx,
2200 struct ac_image_args *args,
2201 const nir_tex_instr *instr)
2202 {
2203 enum glsl_base_type stype = glsl_get_sampler_result_type(instr->texture->var->type);
2204 LLVMValueRef coord = args->addr;
2205 LLVMValueRef half_texel[2];
2206 LLVMValueRef compare_cube_wa = NULL;
2207 LLVMValueRef result;
2208 int c;
2209 unsigned coord_vgpr_index = (unsigned)args->offset + (unsigned)args->compare;
2210
2211 //TODO Rect
2212 {
2213 struct ac_image_args txq_args = { 0 };
2214
2215 txq_args.da = instr->is_array || instr->sampler_dim == GLSL_SAMPLER_DIM_CUBE;
2216 txq_args.opcode = ac_image_get_resinfo;
2217 txq_args.dmask = 0xf;
2218 txq_args.addr = ctx->i32_0;
2219 txq_args.resource = args->resource;
2220 LLVMValueRef size = ac_build_image_opcode(ctx, &txq_args);
2221
2222 for (c = 0; c < 2; c++) {
2223 half_texel[c] = LLVMBuildExtractElement(ctx->builder, size,
2224 LLVMConstInt(ctx->i32, c, false), "");
2225 half_texel[c] = LLVMBuildUIToFP(ctx->builder, half_texel[c], ctx->f32, "");
2226 half_texel[c] = ac_build_fdiv(ctx, ctx->f32_1, half_texel[c]);
2227 half_texel[c] = LLVMBuildFMul(ctx->builder, half_texel[c],
2228 LLVMConstReal(ctx->f32, -0.5), "");
2229 }
2230 }
2231
2232 LLVMValueRef orig_coords = args->addr;
2233
2234 for (c = 0; c < 2; c++) {
2235 LLVMValueRef tmp;
2236 LLVMValueRef index = LLVMConstInt(ctx->i32, coord_vgpr_index + c, 0);
2237 tmp = LLVMBuildExtractElement(ctx->builder, coord, index, "");
2238 tmp = LLVMBuildBitCast(ctx->builder, tmp, ctx->f32, "");
2239 tmp = LLVMBuildFAdd(ctx->builder, tmp, half_texel[c], "");
2240 tmp = LLVMBuildBitCast(ctx->builder, tmp, ctx->i32, "");
2241 coord = LLVMBuildInsertElement(ctx->builder, coord, tmp, index, "");
2242 }
2243
2244
2245 /*
2246 * Apparantly cube has issue with integer types that the workaround doesn't solve,
2247 * so this tests if the format is 8_8_8_8 and an integer type do an alternate
2248 * workaround by sampling using a scaled type and converting.
2249 * This is taken from amdgpu-pro shaders.
2250 */
2251 /* NOTE this produces some ugly code compared to amdgpu-pro,
2252 * LLVM ends up dumping SGPRs into VGPRs to deal with the compare/select,
2253 * and then reads them back. -pro generates two selects,
2254 * one s_cmp for the descriptor rewriting
2255 * one v_cmp for the coordinate and result changes.
2256 */
2257 if (instr->sampler_dim == GLSL_SAMPLER_DIM_CUBE) {
2258 LLVMValueRef tmp, tmp2;
2259
2260 /* workaround 8/8/8/8 uint/sint cube gather bug */
2261 /* first detect it then change to a scaled read and f2i */
2262 tmp = LLVMBuildExtractElement(ctx->builder, args->resource, ctx->i32_1, "");
2263 tmp2 = tmp;
2264
2265 /* extract the DATA_FORMAT */
2266 tmp = ac_build_bfe(ctx, tmp, LLVMConstInt(ctx->i32, 20, false),
2267 LLVMConstInt(ctx->i32, 6, false), false);
2268
2269 /* is the DATA_FORMAT == 8_8_8_8 */
2270 compare_cube_wa = LLVMBuildICmp(ctx->builder, LLVMIntEQ, tmp, LLVMConstInt(ctx->i32, V_008F14_IMG_DATA_FORMAT_8_8_8_8, false), "");
2271
2272 if (stype == GLSL_TYPE_UINT)
2273 /* Create a NUM FORMAT - 0x2 or 0x4 - USCALED or UINT */
2274 tmp = LLVMBuildSelect(ctx->builder, compare_cube_wa, LLVMConstInt(ctx->i32, 0x8000000, false),
2275 LLVMConstInt(ctx->i32, 0x10000000, false), "");
2276 else
2277 /* Create a NUM FORMAT - 0x3 or 0x5 - SSCALED or SINT */
2278 tmp = LLVMBuildSelect(ctx->builder, compare_cube_wa, LLVMConstInt(ctx->i32, 0xc000000, false),
2279 LLVMConstInt(ctx->i32, 0x14000000, false), "");
2280
2281 /* replace the NUM FORMAT in the descriptor */
2282 tmp2 = LLVMBuildAnd(ctx->builder, tmp2, LLVMConstInt(ctx->i32, C_008F14_NUM_FORMAT_GFX6, false), "");
2283 tmp2 = LLVMBuildOr(ctx->builder, tmp2, tmp, "");
2284
2285 args->resource = LLVMBuildInsertElement(ctx->builder, args->resource, tmp2, ctx->i32_1, "");
2286
2287 /* don't modify the coordinates for this case */
2288 coord = LLVMBuildSelect(ctx->builder, compare_cube_wa, orig_coords, coord, "");
2289 }
2290 args->addr = coord;
2291 result = ac_build_image_opcode(ctx, args);
2292
2293 if (instr->sampler_dim == GLSL_SAMPLER_DIM_CUBE) {
2294 LLVMValueRef tmp, tmp2;
2295
2296 /* if the cube workaround is in place, f2i the result. */
2297 for (c = 0; c < 4; c++) {
2298 tmp = LLVMBuildExtractElement(ctx->builder, result, LLVMConstInt(ctx->i32, c, false), "");
2299 if (stype == GLSL_TYPE_UINT)
2300 tmp2 = LLVMBuildFPToUI(ctx->builder, tmp, ctx->i32, "");
2301 else
2302 tmp2 = LLVMBuildFPToSI(ctx->builder, tmp, ctx->i32, "");
2303 tmp = LLVMBuildBitCast(ctx->builder, tmp, ctx->i32, "");
2304 tmp2 = LLVMBuildBitCast(ctx->builder, tmp2, ctx->i32, "");
2305 tmp = LLVMBuildSelect(ctx->builder, compare_cube_wa, tmp2, tmp, "");
2306 tmp = LLVMBuildBitCast(ctx->builder, tmp, ctx->f32, "");
2307 result = LLVMBuildInsertElement(ctx->builder, result, tmp, LLVMConstInt(ctx->i32, c, false), "");
2308 }
2309 }
2310 return result;
2311 }
2312
2313 static LLVMValueRef build_tex_intrinsic(struct ac_nir_context *ctx,
2314 const nir_tex_instr *instr,
2315 bool lod_is_zero,
2316 struct ac_image_args *args)
2317 {
2318 if (instr->sampler_dim == GLSL_SAMPLER_DIM_BUF) {
2319 unsigned mask = nir_ssa_def_components_read(&instr->dest.ssa);
2320
2321 return ac_build_buffer_load_format(&ctx->ac,
2322 args->resource,
2323 args->addr,
2324 ctx->ac.i32_0,
2325 util_last_bit(mask),
2326 true);
2327 }
2328
2329 args->opcode = ac_image_sample;
2330 args->compare = instr->is_shadow;
2331
2332 switch (instr->op) {
2333 case nir_texop_txf:
2334 case nir_texop_txf_ms:
2335 case nir_texop_samples_identical:
2336 args->opcode = lod_is_zero ||
2337 instr->sampler_dim == GLSL_SAMPLER_DIM_MS ?
2338 ac_image_load : ac_image_load_mip;
2339 args->compare = false;
2340 args->offset = false;
2341 break;
2342 case nir_texop_txb:
2343 args->bias = true;
2344 break;
2345 case nir_texop_txl:
2346 if (lod_is_zero)
2347 args->level_zero = true;
2348 else
2349 args->lod = true;
2350 break;
2351 case nir_texop_txs:
2352 case nir_texop_query_levels:
2353 args->opcode = ac_image_get_resinfo;
2354 break;
2355 case nir_texop_tex:
2356 if (ctx->stage != MESA_SHADER_FRAGMENT)
2357 args->level_zero = true;
2358 break;
2359 case nir_texop_txd:
2360 args->deriv = true;
2361 break;
2362 case nir_texop_tg4:
2363 args->opcode = ac_image_gather4;
2364 args->level_zero = true;
2365 break;
2366 case nir_texop_lod:
2367 args->opcode = ac_image_get_lod;
2368 args->compare = false;
2369 args->offset = false;
2370 break;
2371 default:
2372 break;
2373 }
2374
2375 if (instr->op == nir_texop_tg4 && ctx->ac.chip_class <= VI) {
2376 enum glsl_base_type stype = glsl_get_sampler_result_type(instr->texture->var->type);
2377 if (stype == GLSL_TYPE_UINT || stype == GLSL_TYPE_INT) {
2378 return radv_lower_gather4_integer(&ctx->ac, args, instr);
2379 }
2380 }
2381 return ac_build_image_opcode(&ctx->ac, args);
2382 }
2383
2384 static LLVMValueRef visit_vulkan_resource_index(struct nir_to_llvm_context *ctx,
2385 nir_intrinsic_instr *instr)
2386 {
2387 LLVMValueRef index = get_src(ctx->nir, instr->src[0]);
2388 unsigned desc_set = nir_intrinsic_desc_set(instr);
2389 unsigned binding = nir_intrinsic_binding(instr);
2390 LLVMValueRef desc_ptr = ctx->descriptor_sets[desc_set];
2391 struct radv_pipeline_layout *pipeline_layout = ctx->options->layout;
2392 struct radv_descriptor_set_layout *layout = pipeline_layout->set[desc_set].layout;
2393 unsigned base_offset = layout->binding[binding].offset;
2394 LLVMValueRef offset, stride;
2395
2396 if (layout->binding[binding].type == VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER_DYNAMIC ||
2397 layout->binding[binding].type == VK_DESCRIPTOR_TYPE_STORAGE_BUFFER_DYNAMIC) {
2398 unsigned idx = pipeline_layout->set[desc_set].dynamic_offset_start +
2399 layout->binding[binding].dynamic_offset_offset;
2400 desc_ptr = ctx->push_constants;
2401 base_offset = pipeline_layout->push_constant_size + 16 * idx;
2402 stride = LLVMConstInt(ctx->ac.i32, 16, false);
2403 } else
2404 stride = LLVMConstInt(ctx->ac.i32, layout->binding[binding].size, false);
2405
2406 offset = LLVMConstInt(ctx->ac.i32, base_offset, false);
2407 index = LLVMBuildMul(ctx->builder, index, stride, "");
2408 offset = LLVMBuildAdd(ctx->builder, offset, index, "");
2409
2410 desc_ptr = ac_build_gep0(&ctx->ac, desc_ptr, offset);
2411 desc_ptr = cast_ptr(ctx, desc_ptr, ctx->ac.v4i32);
2412 LLVMSetMetadata(desc_ptr, ctx->ac.uniform_md_kind, ctx->ac.empty_md);
2413
2414 return desc_ptr;
2415 }
2416
2417 static LLVMValueRef visit_vulkan_resource_reindex(struct nir_to_llvm_context *ctx,
2418 nir_intrinsic_instr *instr)
2419 {
2420 LLVMValueRef ptr = get_src(ctx->nir, instr->src[0]);
2421 LLVMValueRef index = get_src(ctx->nir, instr->src[1]);
2422
2423 LLVMValueRef result = LLVMBuildGEP(ctx->builder, ptr, &index, 1, "");
2424 LLVMSetMetadata(result, ctx->ac.uniform_md_kind, ctx->ac.empty_md);
2425 return result;
2426 }
2427
2428 static LLVMValueRef visit_load_push_constant(struct nir_to_llvm_context *ctx,
2429 nir_intrinsic_instr *instr)
2430 {
2431 LLVMValueRef ptr, addr;
2432
2433 addr = LLVMConstInt(ctx->ac.i32, nir_intrinsic_base(instr), 0);
2434 addr = LLVMBuildAdd(ctx->builder, addr, get_src(ctx->nir, instr->src[0]), "");
2435
2436 ptr = ac_build_gep0(&ctx->ac, ctx->push_constants, addr);
2437 ptr = cast_ptr(ctx, ptr, get_def_type(ctx->nir, &instr->dest.ssa));
2438
2439 return LLVMBuildLoad(ctx->builder, ptr, "");
2440 }
2441
2442 static LLVMValueRef visit_get_buffer_size(struct ac_nir_context *ctx,
2443 const nir_intrinsic_instr *instr)
2444 {
2445 LLVMValueRef index = get_src(ctx, instr->src[0]);
2446
2447 return get_buffer_size(ctx, ctx->abi->load_ssbo(ctx->abi, index, false), false);
2448 }
2449
2450 static uint32_t widen_mask(uint32_t mask, unsigned multiplier)
2451 {
2452 uint32_t new_mask = 0;
2453 for(unsigned i = 0; i < 32 && (1u << i) <= mask; ++i)
2454 if (mask & (1u << i))
2455 new_mask |= ((1u << multiplier) - 1u) << (i * multiplier);
2456 return new_mask;
2457 }
2458
2459 static LLVMValueRef extract_vector_range(struct ac_llvm_context *ctx, LLVMValueRef src,
2460 unsigned start, unsigned count)
2461 {
2462 LLVMTypeRef type = LLVMTypeOf(src);
2463
2464 if (LLVMGetTypeKind(type) != LLVMVectorTypeKind) {
2465 assert(start == 0);
2466 assert(count == 1);
2467 return src;
2468 }
2469
2470 unsigned src_elements = LLVMGetVectorSize(type);
2471 assert(start < src_elements);
2472 assert(start + count <= src_elements);
2473
2474 if (start == 0 && count == src_elements)
2475 return src;
2476
2477 if (count == 1)
2478 return LLVMBuildExtractElement(ctx->builder, src, LLVMConstInt(ctx->i32, start, false), "");
2479
2480 assert(count <= 8);
2481 LLVMValueRef indices[8];
2482 for (unsigned i = 0; i < count; ++i)
2483 indices[i] = LLVMConstInt(ctx->i32, start + i, false);
2484
2485 LLVMValueRef swizzle = LLVMConstVector(indices, count);
2486 return LLVMBuildShuffleVector(ctx->builder, src, src, swizzle, "");
2487 }
2488
2489 static void visit_store_ssbo(struct ac_nir_context *ctx,
2490 nir_intrinsic_instr *instr)
2491 {
2492 const char *store_name;
2493 LLVMValueRef src_data = get_src(ctx, instr->src[0]);
2494 LLVMTypeRef data_type = ctx->ac.f32;
2495 int elem_size_mult = get_elem_bits(&ctx->ac, LLVMTypeOf(src_data)) / 32;
2496 int components_32bit = elem_size_mult * instr->num_components;
2497 unsigned writemask = nir_intrinsic_write_mask(instr);
2498 LLVMValueRef base_data, base_offset;
2499 LLVMValueRef params[6];
2500
2501 params[1] = ctx->abi->load_ssbo(ctx->abi,
2502 get_src(ctx, instr->src[1]), true);
2503 params[2] = ctx->ac.i32_0; /* vindex */
2504 params[4] = ctx->ac.i1false; /* glc */
2505 params[5] = ctx->ac.i1false; /* slc */
2506
2507 if (components_32bit > 1)
2508 data_type = LLVMVectorType(ctx->ac.f32, components_32bit);
2509
2510 writemask = widen_mask(writemask, elem_size_mult);
2511
2512 base_data = ac_to_float(&ctx->ac, src_data);
2513 base_data = trim_vector(&ctx->ac, base_data, instr->num_components);
2514 base_data = LLVMBuildBitCast(ctx->ac.builder, base_data,
2515 data_type, "");
2516 base_offset = get_src(ctx, instr->src[2]); /* voffset */
2517 while (writemask) {
2518 int start, count;
2519 LLVMValueRef data;
2520 LLVMValueRef offset;
2521
2522 u_bit_scan_consecutive_range(&writemask, &start, &count);
2523
2524 /* Due to an LLVM limitation, split 3-element writes
2525 * into a 2-element and a 1-element write. */
2526 if (count == 3) {
2527 writemask |= 1 << (start + 2);
2528 count = 2;
2529 }
2530
2531 if (count > 4) {
2532 writemask |= ((1u << (count - 4)) - 1u) << (start + 4);
2533 count = 4;
2534 }
2535
2536 if (count == 4) {
2537 store_name = "llvm.amdgcn.buffer.store.v4f32";
2538 } else if (count == 2) {
2539 store_name = "llvm.amdgcn.buffer.store.v2f32";
2540
2541 } else {
2542 assert(count == 1);
2543 store_name = "llvm.amdgcn.buffer.store.f32";
2544 }
2545 data = extract_vector_range(&ctx->ac, base_data, start, count);
2546
2547 offset = base_offset;
2548 if (start != 0) {
2549 offset = LLVMBuildAdd(ctx->ac.builder, offset, LLVMConstInt(ctx->ac.i32, start * 4, false), "");
2550 }
2551 params[0] = data;
2552 params[3] = offset;
2553 ac_build_intrinsic(&ctx->ac, store_name,
2554 ctx->ac.voidt, params, 6, 0);
2555 }
2556 }
2557
2558 static LLVMValueRef visit_atomic_ssbo(struct ac_nir_context *ctx,
2559 const nir_intrinsic_instr *instr)
2560 {
2561 const char *name;
2562 LLVMValueRef params[6];
2563 int arg_count = 0;
2564
2565 if (instr->intrinsic == nir_intrinsic_ssbo_atomic_comp_swap) {
2566 params[arg_count++] = ac_llvm_extract_elem(&ctx->ac, get_src(ctx, instr->src[3]), 0);
2567 }
2568 params[arg_count++] = ac_llvm_extract_elem(&ctx->ac, get_src(ctx, instr->src[2]), 0);
2569 params[arg_count++] = ctx->abi->load_ssbo(ctx->abi,
2570 get_src(ctx, instr->src[0]),
2571 true);
2572 params[arg_count++] = ctx->ac.i32_0; /* vindex */
2573 params[arg_count++] = get_src(ctx, instr->src[1]); /* voffset */
2574 params[arg_count++] = LLVMConstInt(ctx->ac.i1, 0, false); /* slc */
2575
2576 switch (instr->intrinsic) {
2577 case nir_intrinsic_ssbo_atomic_add:
2578 name = "llvm.amdgcn.buffer.atomic.add";
2579 break;
2580 case nir_intrinsic_ssbo_atomic_imin:
2581 name = "llvm.amdgcn.buffer.atomic.smin";
2582 break;
2583 case nir_intrinsic_ssbo_atomic_umin:
2584 name = "llvm.amdgcn.buffer.atomic.umin";
2585 break;
2586 case nir_intrinsic_ssbo_atomic_imax:
2587 name = "llvm.amdgcn.buffer.atomic.smax";
2588 break;
2589 case nir_intrinsic_ssbo_atomic_umax:
2590 name = "llvm.amdgcn.buffer.atomic.umax";
2591 break;
2592 case nir_intrinsic_ssbo_atomic_and:
2593 name = "llvm.amdgcn.buffer.atomic.and";
2594 break;
2595 case nir_intrinsic_ssbo_atomic_or:
2596 name = "llvm.amdgcn.buffer.atomic.or";
2597 break;
2598 case nir_intrinsic_ssbo_atomic_xor:
2599 name = "llvm.amdgcn.buffer.atomic.xor";
2600 break;
2601 case nir_intrinsic_ssbo_atomic_exchange:
2602 name = "llvm.amdgcn.buffer.atomic.swap";
2603 break;
2604 case nir_intrinsic_ssbo_atomic_comp_swap:
2605 name = "llvm.amdgcn.buffer.atomic.cmpswap";
2606 break;
2607 default:
2608 abort();
2609 }
2610
2611 return ac_build_intrinsic(&ctx->ac, name, ctx->ac.i32, params, arg_count, 0);
2612 }
2613
2614 static LLVMValueRef visit_load_buffer(struct ac_nir_context *ctx,
2615 const nir_intrinsic_instr *instr)
2616 {
2617 LLVMValueRef results[2];
2618 int load_components;
2619 int num_components = instr->num_components;
2620 if (instr->dest.ssa.bit_size == 64)
2621 num_components *= 2;
2622
2623 for (int i = 0; i < num_components; i += load_components) {
2624 load_components = MIN2(num_components - i, 4);
2625 const char *load_name;
2626 LLVMTypeRef data_type = ctx->ac.f32;
2627 LLVMValueRef offset = LLVMConstInt(ctx->ac.i32, i * 4, false);
2628 offset = LLVMBuildAdd(ctx->ac.builder, get_src(ctx, instr->src[1]), offset, "");
2629
2630 if (load_components == 3)
2631 data_type = LLVMVectorType(ctx->ac.f32, 4);
2632 else if (load_components > 1)
2633 data_type = LLVMVectorType(ctx->ac.f32, load_components);
2634
2635 if (load_components >= 3)
2636 load_name = "llvm.amdgcn.buffer.load.v4f32";
2637 else if (load_components == 2)
2638 load_name = "llvm.amdgcn.buffer.load.v2f32";
2639 else if (load_components == 1)
2640 load_name = "llvm.amdgcn.buffer.load.f32";
2641 else
2642 unreachable("unhandled number of components");
2643
2644 LLVMValueRef params[] = {
2645 ctx->abi->load_ssbo(ctx->abi,
2646 get_src(ctx, instr->src[0]),
2647 false),
2648 ctx->ac.i32_0,
2649 offset,
2650 ctx->ac.i1false,
2651 ctx->ac.i1false,
2652 };
2653
2654 results[i > 0 ? 1 : 0] = ac_build_intrinsic(&ctx->ac, load_name, data_type, params, 5, 0);
2655 }
2656
2657 assume(results[0]);
2658 LLVMValueRef ret = results[0];
2659 if (num_components > 4 || num_components == 3) {
2660 LLVMValueRef masks[] = {
2661 LLVMConstInt(ctx->ac.i32, 0, false), LLVMConstInt(ctx->ac.i32, 1, false),
2662 LLVMConstInt(ctx->ac.i32, 2, false), LLVMConstInt(ctx->ac.i32, 3, false),
2663 LLVMConstInt(ctx->ac.i32, 4, false), LLVMConstInt(ctx->ac.i32, 5, false),
2664 LLVMConstInt(ctx->ac.i32, 6, false), LLVMConstInt(ctx->ac.i32, 7, false)
2665 };
2666
2667 LLVMValueRef swizzle = LLVMConstVector(masks, num_components);
2668 ret = LLVMBuildShuffleVector(ctx->ac.builder, results[0],
2669 results[num_components > 4 ? 1 : 0], swizzle, "");
2670 }
2671
2672 return LLVMBuildBitCast(ctx->ac.builder, ret,
2673 get_def_type(ctx, &instr->dest.ssa), "");
2674 }
2675
2676 static LLVMValueRef visit_load_ubo_buffer(struct ac_nir_context *ctx,
2677 const nir_intrinsic_instr *instr)
2678 {
2679 LLVMValueRef ret;
2680 LLVMValueRef rsrc = get_src(ctx, instr->src[0]);
2681 LLVMValueRef offset = get_src(ctx, instr->src[1]);
2682 int num_components = instr->num_components;
2683
2684 if (ctx->abi->load_ubo)
2685 rsrc = ctx->abi->load_ubo(ctx->abi, rsrc);
2686
2687 if (instr->dest.ssa.bit_size == 64)
2688 num_components *= 2;
2689
2690 ret = ac_build_buffer_load(&ctx->ac, rsrc, num_components, NULL, offset,
2691 NULL, 0, false, false, true, true);
2692 ret = trim_vector(&ctx->ac, ret, num_components);
2693 return LLVMBuildBitCast(ctx->ac.builder, ret,
2694 get_def_type(ctx, &instr->dest.ssa), "");
2695 }
2696
2697 static void
2698 get_deref_offset(struct ac_nir_context *ctx, nir_deref_var *deref,
2699 bool vs_in, unsigned *vertex_index_out,
2700 LLVMValueRef *vertex_index_ref,
2701 unsigned *const_out, LLVMValueRef *indir_out)
2702 {
2703 unsigned const_offset = 0;
2704 nir_deref *tail = &deref->deref;
2705 LLVMValueRef offset = NULL;
2706
2707 if (vertex_index_out != NULL || vertex_index_ref != NULL) {
2708 tail = tail->child;
2709 nir_deref_array *deref_array = nir_deref_as_array(tail);
2710 if (vertex_index_out)
2711 *vertex_index_out = deref_array->base_offset;
2712
2713 if (vertex_index_ref) {
2714 LLVMValueRef vtx = LLVMConstInt(ctx->ac.i32, deref_array->base_offset, false);
2715 if (deref_array->deref_array_type == nir_deref_array_type_indirect) {
2716 vtx = LLVMBuildAdd(ctx->ac.builder, vtx, get_src(ctx, deref_array->indirect), "");
2717 }
2718 *vertex_index_ref = vtx;
2719 }
2720 }
2721
2722 if (deref->var->data.compact) {
2723 assert(tail->child->deref_type == nir_deref_type_array);
2724 assert(glsl_type_is_scalar(glsl_without_array(deref->var->type)));
2725 nir_deref_array *deref_array = nir_deref_as_array(tail->child);
2726 /* We always lower indirect dereferences for "compact" array vars. */
2727 assert(deref_array->deref_array_type == nir_deref_array_type_direct);
2728
2729 const_offset = deref_array->base_offset;
2730 goto out;
2731 }
2732
2733 while (tail->child != NULL) {
2734 const struct glsl_type *parent_type = tail->type;
2735 tail = tail->child;
2736
2737 if (tail->deref_type == nir_deref_type_array) {
2738 nir_deref_array *deref_array = nir_deref_as_array(tail);
2739 LLVMValueRef index, stride, local_offset;
2740 unsigned size = glsl_count_attribute_slots(tail->type, vs_in);
2741
2742 const_offset += size * deref_array->base_offset;
2743 if (deref_array->deref_array_type == nir_deref_array_type_direct)
2744 continue;
2745
2746 assert(deref_array->deref_array_type == nir_deref_array_type_indirect);
2747 index = get_src(ctx, deref_array->indirect);
2748 stride = LLVMConstInt(ctx->ac.i32, size, 0);
2749 local_offset = LLVMBuildMul(ctx->ac.builder, stride, index, "");
2750
2751 if (offset)
2752 offset = LLVMBuildAdd(ctx->ac.builder, offset, local_offset, "");
2753 else
2754 offset = local_offset;
2755 } else if (tail->deref_type == nir_deref_type_struct) {
2756 nir_deref_struct *deref_struct = nir_deref_as_struct(tail);
2757
2758 for (unsigned i = 0; i < deref_struct->index; i++) {
2759 const struct glsl_type *ft = glsl_get_struct_field(parent_type, i);
2760 const_offset += glsl_count_attribute_slots(ft, vs_in);
2761 }
2762 } else
2763 unreachable("unsupported deref type");
2764
2765 }
2766 out:
2767 if (const_offset && offset)
2768 offset = LLVMBuildAdd(ctx->ac.builder, offset,
2769 LLVMConstInt(ctx->ac.i32, const_offset, 0),
2770 "");
2771
2772 *const_out = const_offset;
2773 *indir_out = offset;
2774 }
2775
2776
2777 /* The offchip buffer layout for TCS->TES is
2778 *
2779 * - attribute 0 of patch 0 vertex 0
2780 * - attribute 0 of patch 0 vertex 1
2781 * - attribute 0 of patch 0 vertex 2
2782 * ...
2783 * - attribute 0 of patch 1 vertex 0
2784 * - attribute 0 of patch 1 vertex 1
2785 * ...
2786 * - attribute 1 of patch 0 vertex 0
2787 * - attribute 1 of patch 0 vertex 1
2788 * ...
2789 * - per patch attribute 0 of patch 0
2790 * - per patch attribute 0 of patch 1
2791 * ...
2792 *
2793 * Note that every attribute has 4 components.
2794 */
2795 static LLVMValueRef get_tcs_tes_buffer_address(struct nir_to_llvm_context *ctx,
2796 LLVMValueRef vertex_index,
2797 LLVMValueRef param_index)
2798 {
2799 LLVMValueRef base_addr, vertices_per_patch, num_patches, total_vertices;
2800 LLVMValueRef param_stride, constant16;
2801 LLVMValueRef rel_patch_id = get_rel_patch_id(ctx);
2802
2803 vertices_per_patch = unpack_param(&ctx->ac, ctx->tcs_offchip_layout, 9, 6);
2804 num_patches = unpack_param(&ctx->ac, ctx->tcs_offchip_layout, 0, 9);
2805 total_vertices = LLVMBuildMul(ctx->builder, vertices_per_patch,
2806 num_patches, "");
2807
2808 constant16 = LLVMConstInt(ctx->ac.i32, 16, false);
2809 if (vertex_index) {
2810 base_addr = LLVMBuildMul(ctx->builder, rel_patch_id,
2811 vertices_per_patch, "");
2812
2813 base_addr = LLVMBuildAdd(ctx->builder, base_addr,
2814 vertex_index, "");
2815
2816 param_stride = total_vertices;
2817 } else {
2818 base_addr = rel_patch_id;
2819 param_stride = num_patches;
2820 }
2821
2822 base_addr = LLVMBuildAdd(ctx->builder, base_addr,
2823 LLVMBuildMul(ctx->builder, param_index,
2824 param_stride, ""), "");
2825
2826 base_addr = LLVMBuildMul(ctx->builder, base_addr, constant16, "");
2827
2828 if (!vertex_index) {
2829 LLVMValueRef patch_data_offset =
2830 unpack_param(&ctx->ac, ctx->tcs_offchip_layout, 16, 16);
2831
2832 base_addr = LLVMBuildAdd(ctx->builder, base_addr,
2833 patch_data_offset, "");
2834 }
2835 return base_addr;
2836 }
2837
2838 static LLVMValueRef get_tcs_tes_buffer_address_params(struct nir_to_llvm_context *ctx,
2839 unsigned param,
2840 unsigned const_index,
2841 bool is_compact,
2842 LLVMValueRef vertex_index,
2843 LLVMValueRef indir_index)
2844 {
2845 LLVMValueRef param_index;
2846
2847 if (indir_index)
2848 param_index = LLVMBuildAdd(ctx->builder, LLVMConstInt(ctx->ac.i32, param, false),
2849 indir_index, "");
2850 else {
2851 if (const_index && !is_compact)
2852 param += const_index;
2853 param_index = LLVMConstInt(ctx->ac.i32, param, false);
2854 }
2855 return get_tcs_tes_buffer_address(ctx, vertex_index, param_index);
2856 }
2857
2858 static void
2859 mark_tess_output(struct nir_to_llvm_context *ctx,
2860 bool is_patch, uint32_t param)
2861
2862 {
2863 if (is_patch) {
2864 ctx->tess_patch_outputs_written |= (1ull << param);
2865 } else
2866 ctx->tess_outputs_written |= (1ull << param);
2867 }
2868
2869 static LLVMValueRef
2870 get_dw_address(struct nir_to_llvm_context *ctx,
2871 LLVMValueRef dw_addr,
2872 unsigned param,
2873 unsigned const_index,
2874 bool compact_const_index,
2875 LLVMValueRef vertex_index,
2876 LLVMValueRef stride,
2877 LLVMValueRef indir_index)
2878
2879 {
2880
2881 if (vertex_index) {
2882 dw_addr = LLVMBuildAdd(ctx->builder, dw_addr,
2883 LLVMBuildMul(ctx->builder,
2884 vertex_index,
2885 stride, ""), "");
2886 }
2887
2888 if (indir_index)
2889 dw_addr = LLVMBuildAdd(ctx->builder, dw_addr,
2890 LLVMBuildMul(ctx->builder, indir_index,
2891 LLVMConstInt(ctx->ac.i32, 4, false), ""), "");
2892 else if (const_index && !compact_const_index)
2893 dw_addr = LLVMBuildAdd(ctx->builder, dw_addr,
2894 LLVMConstInt(ctx->ac.i32, const_index, false), "");
2895
2896 dw_addr = LLVMBuildAdd(ctx->builder, dw_addr,
2897 LLVMConstInt(ctx->ac.i32, param * 4, false), "");
2898
2899 if (const_index && compact_const_index)
2900 dw_addr = LLVMBuildAdd(ctx->builder, dw_addr,
2901 LLVMConstInt(ctx->ac.i32, const_index, false), "");
2902 return dw_addr;
2903 }
2904
2905 static LLVMValueRef
2906 load_tcs_varyings(struct ac_shader_abi *abi,
2907 LLVMValueRef vertex_index,
2908 LLVMValueRef indir_index,
2909 unsigned const_index,
2910 unsigned location,
2911 unsigned driver_location,
2912 unsigned component,
2913 unsigned num_components,
2914 bool is_patch,
2915 bool is_compact,
2916 bool load_input)
2917 {
2918 struct nir_to_llvm_context *ctx = nir_to_llvm_context_from_abi(abi);
2919 LLVMValueRef dw_addr, stride;
2920 LLVMValueRef value[4], result;
2921 unsigned param = shader_io_get_unique_index(location);
2922
2923 if (load_input) {
2924 stride = unpack_param(&ctx->ac, ctx->tcs_in_layout, 13, 8);
2925 dw_addr = get_tcs_in_current_patch_offset(ctx);
2926 } else {
2927 if (!is_patch) {
2928 stride = unpack_param(&ctx->ac, ctx->tcs_out_layout, 13, 8);
2929 dw_addr = get_tcs_out_current_patch_offset(ctx);
2930 } else {
2931 dw_addr = get_tcs_out_current_patch_data_offset(ctx);
2932 stride = NULL;
2933 }
2934 }
2935
2936 dw_addr = get_dw_address(ctx, dw_addr, param, const_index, is_compact, vertex_index, stride,
2937 indir_index);
2938
2939 for (unsigned i = 0; i < num_components + component; i++) {
2940 value[i] = ac_lds_load(&ctx->ac, dw_addr);
2941 dw_addr = LLVMBuildAdd(ctx->builder, dw_addr,
2942 ctx->ac.i32_1, "");
2943 }
2944 result = ac_build_varying_gather_values(&ctx->ac, value, num_components, component);
2945 return result;
2946 }
2947
2948 static void
2949 store_tcs_output(struct ac_shader_abi *abi,
2950 LLVMValueRef vertex_index,
2951 LLVMValueRef param_index,
2952 unsigned const_index,
2953 unsigned location,
2954 unsigned driver_location,
2955 LLVMValueRef src,
2956 unsigned component,
2957 bool is_patch,
2958 bool is_compact,
2959 unsigned writemask)
2960 {
2961 struct nir_to_llvm_context *ctx = nir_to_llvm_context_from_abi(abi);
2962 LLVMValueRef dw_addr;
2963 LLVMValueRef stride = NULL;
2964 LLVMValueRef buf_addr = NULL;
2965 unsigned param;
2966 bool store_lds = true;
2967
2968 if (is_patch) {
2969 if (!(ctx->tcs_patch_outputs_read & (1U << (location - VARYING_SLOT_PATCH0))))
2970 store_lds = false;
2971 } else {
2972 if (!(ctx->tcs_outputs_read & (1ULL << location)))
2973 store_lds = false;
2974 }
2975
2976 param = shader_io_get_unique_index(location);
2977 if (location == VARYING_SLOT_CLIP_DIST0 &&
2978 is_compact && const_index > 3) {
2979 const_index -= 3;
2980 param++;
2981 }
2982
2983 if (!is_patch) {
2984 stride = unpack_param(&ctx->ac, ctx->tcs_out_layout, 13, 8);
2985 dw_addr = get_tcs_out_current_patch_offset(ctx);
2986 } else {
2987 dw_addr = get_tcs_out_current_patch_data_offset(ctx);
2988 }
2989
2990 mark_tess_output(ctx, is_patch, param);
2991
2992 dw_addr = get_dw_address(ctx, dw_addr, param, const_index, is_compact, vertex_index, stride,
2993 param_index);
2994 buf_addr = get_tcs_tes_buffer_address_params(ctx, param, const_index, is_compact,
2995 vertex_index, param_index);
2996
2997 bool is_tess_factor = false;
2998 if (location == VARYING_SLOT_TESS_LEVEL_INNER ||
2999 location == VARYING_SLOT_TESS_LEVEL_OUTER)
3000 is_tess_factor = true;
3001
3002 unsigned base = is_compact ? const_index : 0;
3003 for (unsigned chan = 0; chan < 8; chan++) {
3004 if (!(writemask & (1 << chan)))
3005 continue;
3006 LLVMValueRef value = ac_llvm_extract_elem(&ctx->ac, src, chan - component);
3007
3008 if (store_lds || is_tess_factor) {
3009 LLVMValueRef dw_addr_chan =
3010 LLVMBuildAdd(ctx->builder, dw_addr,
3011 LLVMConstInt(ctx->ac.i32, chan, false), "");
3012 ac_lds_store(&ctx->ac, dw_addr_chan, value);
3013 }
3014
3015 if (!is_tess_factor && writemask != 0xF)
3016 ac_build_buffer_store_dword(&ctx->ac, ctx->hs_ring_tess_offchip, value, 1,
3017 buf_addr, ctx->oc_lds,
3018 4 * (base + chan), 1, 0, true, false);
3019 }
3020
3021 if (writemask == 0xF) {
3022 ac_build_buffer_store_dword(&ctx->ac, ctx->hs_ring_tess_offchip, src, 4,
3023 buf_addr, ctx->oc_lds,
3024 (base * 4), 1, 0, true, false);
3025 }
3026 }
3027
3028 static LLVMValueRef
3029 load_tes_input(struct ac_shader_abi *abi,
3030 LLVMValueRef vertex_index,
3031 LLVMValueRef param_index,
3032 unsigned const_index,
3033 unsigned location,
3034 unsigned driver_location,
3035 unsigned component,
3036 unsigned num_components,
3037 bool is_patch,
3038 bool is_compact,
3039 bool load_input)
3040 {
3041 struct nir_to_llvm_context *ctx = nir_to_llvm_context_from_abi(abi);
3042 LLVMValueRef buf_addr;
3043 LLVMValueRef result;
3044 unsigned param = shader_io_get_unique_index(location);
3045
3046 if (location == VARYING_SLOT_CLIP_DIST0 && is_compact && const_index > 3) {
3047 const_index -= 3;
3048 param++;
3049 }
3050
3051 buf_addr = get_tcs_tes_buffer_address_params(ctx, param, const_index,
3052 is_compact, vertex_index, param_index);
3053
3054 LLVMValueRef comp_offset = LLVMConstInt(ctx->ac.i32, component * 4, false);
3055 buf_addr = LLVMBuildAdd(ctx->builder, buf_addr, comp_offset, "");
3056
3057 result = ac_build_buffer_load(&ctx->ac, ctx->hs_ring_tess_offchip, num_components, NULL,
3058 buf_addr, ctx->oc_lds, is_compact ? (4 * const_index) : 0, 1, 0, true, false);
3059 result = trim_vector(&ctx->ac, result, num_components);
3060 return result;
3061 }
3062
3063 static LLVMValueRef
3064 load_gs_input(struct ac_shader_abi *abi,
3065 unsigned location,
3066 unsigned driver_location,
3067 unsigned component,
3068 unsigned num_components,
3069 unsigned vertex_index,
3070 unsigned const_index,
3071 LLVMTypeRef type)
3072 {
3073 struct nir_to_llvm_context *ctx = nir_to_llvm_context_from_abi(abi);
3074 LLVMValueRef vtx_offset;
3075 LLVMValueRef args[9];
3076 unsigned param, vtx_offset_param;
3077 LLVMValueRef value[4], result;
3078
3079 vtx_offset_param = vertex_index;
3080 assert(vtx_offset_param < 6);
3081 vtx_offset = LLVMBuildMul(ctx->builder, ctx->gs_vtx_offset[vtx_offset_param],
3082 LLVMConstInt(ctx->ac.i32, 4, false), "");
3083
3084 param = shader_io_get_unique_index(location);
3085
3086 for (unsigned i = component; i < num_components + component; i++) {
3087 if (ctx->ac.chip_class >= GFX9) {
3088 LLVMValueRef dw_addr = ctx->gs_vtx_offset[vtx_offset_param];
3089 dw_addr = LLVMBuildAdd(ctx->ac.builder, dw_addr,
3090 LLVMConstInt(ctx->ac.i32, param * 4 + i + const_index, 0), "");
3091 value[i] = ac_lds_load(&ctx->ac, dw_addr);
3092 } else {
3093 args[0] = ctx->esgs_ring;
3094 args[1] = vtx_offset;
3095 args[2] = LLVMConstInt(ctx->ac.i32, (param * 4 + i + const_index) * 256, false);
3096 args[3] = ctx->ac.i32_0;
3097 args[4] = ctx->ac.i32_1; /* OFFEN */
3098 args[5] = ctx->ac.i32_0; /* IDXEN */
3099 args[6] = ctx->ac.i32_1; /* GLC */
3100 args[7] = ctx->ac.i32_0; /* SLC */
3101 args[8] = ctx->ac.i32_0; /* TFE */
3102
3103 value[i] = ac_build_intrinsic(&ctx->ac, "llvm.SI.buffer.load.dword.i32.i32",
3104 ctx->ac.i32, args, 9,
3105 AC_FUNC_ATTR_READONLY |
3106 AC_FUNC_ATTR_LEGACY);
3107 }
3108 }
3109 result = ac_build_varying_gather_values(&ctx->ac, value, num_components, component);
3110
3111 return result;
3112 }
3113
3114 static LLVMValueRef
3115 build_gep_for_deref(struct ac_nir_context *ctx,
3116 nir_deref_var *deref)
3117 {
3118 struct hash_entry *entry = _mesa_hash_table_search(ctx->vars, deref->var);
3119 assert(entry->data);
3120 LLVMValueRef val = entry->data;
3121 nir_deref *tail = deref->deref.child;
3122 while (tail != NULL) {
3123 LLVMValueRef offset;
3124 switch (tail->deref_type) {
3125 case nir_deref_type_array: {
3126 nir_deref_array *array = nir_deref_as_array(tail);
3127 offset = LLVMConstInt(ctx->ac.i32, array->base_offset, 0);
3128 if (array->deref_array_type ==
3129 nir_deref_array_type_indirect) {
3130 offset = LLVMBuildAdd(ctx->ac.builder, offset,
3131 get_src(ctx,
3132 array->indirect),
3133 "");
3134 }
3135 break;
3136 }
3137 case nir_deref_type_struct: {
3138 nir_deref_struct *deref_struct =
3139 nir_deref_as_struct(tail);
3140 offset = LLVMConstInt(ctx->ac.i32,
3141 deref_struct->index, 0);
3142 break;
3143 }
3144 default:
3145 unreachable("bad deref type");
3146 }
3147 val = ac_build_gep0(&ctx->ac, val, offset);
3148 tail = tail->child;
3149 }
3150 return val;
3151 }
3152
3153 static LLVMValueRef load_tess_varyings(struct ac_nir_context *ctx,
3154 nir_intrinsic_instr *instr,
3155 bool load_inputs)
3156 {
3157 LLVMValueRef result;
3158 LLVMValueRef vertex_index = NULL;
3159 LLVMValueRef indir_index = NULL;
3160 unsigned const_index = 0;
3161 unsigned location = instr->variables[0]->var->data.location;
3162 unsigned driver_location = instr->variables[0]->var->data.driver_location;
3163 const bool is_patch = instr->variables[0]->var->data.patch;
3164 const bool is_compact = instr->variables[0]->var->data.compact;
3165
3166 get_deref_offset(ctx, instr->variables[0],
3167 false, NULL, is_patch ? NULL : &vertex_index,
3168 &const_index, &indir_index);
3169
3170 result = ctx->abi->load_tess_varyings(ctx->abi, vertex_index, indir_index,
3171 const_index, location, driver_location,
3172 instr->variables[0]->var->data.location_frac,
3173 instr->num_components,
3174 is_patch, is_compact, load_inputs);
3175 return LLVMBuildBitCast(ctx->ac.builder, result, get_def_type(ctx, &instr->dest.ssa), "");
3176 }
3177
3178 static LLVMValueRef visit_load_var(struct ac_nir_context *ctx,
3179 nir_intrinsic_instr *instr)
3180 {
3181 LLVMValueRef values[8];
3182 int idx = instr->variables[0]->var->data.driver_location;
3183 int ve = instr->dest.ssa.num_components;
3184 unsigned comp = instr->variables[0]->var->data.location_frac;
3185 LLVMValueRef indir_index;
3186 LLVMValueRef ret;
3187 unsigned const_index;
3188 unsigned stride = instr->variables[0]->var->data.compact ? 1 : 4;
3189 bool vs_in = ctx->stage == MESA_SHADER_VERTEX &&
3190 instr->variables[0]->var->data.mode == nir_var_shader_in;
3191 get_deref_offset(ctx, instr->variables[0], vs_in, NULL, NULL,
3192 &const_index, &indir_index);
3193
3194 if (instr->dest.ssa.bit_size == 64)
3195 ve *= 2;
3196
3197 switch (instr->variables[0]->var->data.mode) {
3198 case nir_var_shader_in:
3199 if (ctx->stage == MESA_SHADER_TESS_CTRL ||
3200 ctx->stage == MESA_SHADER_TESS_EVAL) {
3201 return load_tess_varyings(ctx, instr, true);
3202 }
3203
3204 if (ctx->stage == MESA_SHADER_GEOMETRY) {
3205 LLVMValueRef indir_index;
3206 unsigned const_index, vertex_index;
3207 get_deref_offset(ctx, instr->variables[0],
3208 false, &vertex_index, NULL,
3209 &const_index, &indir_index);
3210 return ctx->abi->load_inputs(ctx->abi, instr->variables[0]->var->data.location,
3211 instr->variables[0]->var->data.driver_location,
3212 instr->variables[0]->var->data.location_frac, ve,
3213 vertex_index, const_index,
3214 nir2llvmtype(ctx, instr->variables[0]->var->type));
3215 }
3216
3217 for (unsigned chan = comp; chan < ve + comp; chan++) {
3218 if (indir_index) {
3219 unsigned count = glsl_count_attribute_slots(
3220 instr->variables[0]->var->type,
3221 ctx->stage == MESA_SHADER_VERTEX);
3222 count -= chan / 4;
3223 LLVMValueRef tmp_vec = ac_build_gather_values_extended(
3224 &ctx->ac, ctx->abi->inputs + idx + chan, count,
3225 stride, false, true);
3226
3227 values[chan] = LLVMBuildExtractElement(ctx->ac.builder,
3228 tmp_vec,
3229 indir_index, "");
3230 } else
3231 values[chan] = ctx->abi->inputs[idx + chan + const_index * stride];
3232 }
3233 break;
3234 case nir_var_local:
3235 for (unsigned chan = 0; chan < ve; chan++) {
3236 if (indir_index) {
3237 unsigned count = glsl_count_attribute_slots(
3238 instr->variables[0]->var->type, false);
3239 count -= chan / 4;
3240 LLVMValueRef tmp_vec = ac_build_gather_values_extended(
3241 &ctx->ac, ctx->locals + idx + chan, count,
3242 stride, true, true);
3243
3244 values[chan] = LLVMBuildExtractElement(ctx->ac.builder,
3245 tmp_vec,
3246 indir_index, "");
3247 } else {
3248 values[chan] = LLVMBuildLoad(ctx->ac.builder, ctx->locals[idx + chan + const_index * stride], "");
3249 }
3250 }
3251 break;
3252 case nir_var_shared: {
3253 LLVMValueRef address = build_gep_for_deref(ctx,
3254 instr->variables[0]);
3255 LLVMValueRef val = LLVMBuildLoad(ctx->ac.builder, address, "");
3256 return LLVMBuildBitCast(ctx->ac.builder, val,
3257 get_def_type(ctx, &instr->dest.ssa),
3258 "");
3259 }
3260 case nir_var_shader_out:
3261 if (ctx->stage == MESA_SHADER_TESS_CTRL) {
3262 return load_tess_varyings(ctx, instr, false);
3263 }
3264
3265 for (unsigned chan = comp; chan < ve + comp; chan++) {
3266 if (indir_index) {
3267 unsigned count = glsl_count_attribute_slots(
3268 instr->variables[0]->var->type, false);
3269 count -= chan / 4;
3270 LLVMValueRef tmp_vec = ac_build_gather_values_extended(
3271 &ctx->ac, ctx->outputs + idx + chan, count,
3272 stride, true, true);
3273
3274 values[chan] = LLVMBuildExtractElement(ctx->ac.builder,
3275 tmp_vec,
3276 indir_index, "");
3277 } else {
3278 values[chan] = LLVMBuildLoad(ctx->ac.builder,
3279 ctx->outputs[idx + chan + const_index * stride],
3280 "");
3281 }
3282 }
3283 break;
3284 default:
3285 unreachable("unhandle variable mode");
3286 }
3287 ret = ac_build_varying_gather_values(&ctx->ac, values, ve, comp);
3288 return LLVMBuildBitCast(ctx->ac.builder, ret, get_def_type(ctx, &instr->dest.ssa), "");
3289 }
3290
3291 static void
3292 visit_store_var(struct ac_nir_context *ctx,
3293 nir_intrinsic_instr *instr)
3294 {
3295 LLVMValueRef temp_ptr, value;
3296 int idx = instr->variables[0]->var->data.driver_location;
3297 unsigned comp = instr->variables[0]->var->data.location_frac;
3298 LLVMValueRef src = ac_to_float(&ctx->ac, get_src(ctx, instr->src[0]));
3299 int writemask = instr->const_index[0] << comp;
3300 LLVMValueRef indir_index;
3301 unsigned const_index;
3302 get_deref_offset(ctx, instr->variables[0], false,
3303 NULL, NULL, &const_index, &indir_index);
3304
3305 if (get_elem_bits(&ctx->ac, LLVMTypeOf(src)) == 64) {
3306
3307 src = LLVMBuildBitCast(ctx->ac.builder, src,
3308 LLVMVectorType(ctx->ac.f32, ac_get_llvm_num_components(src) * 2),
3309 "");
3310
3311 writemask = widen_mask(writemask, 2);
3312 }
3313
3314 switch (instr->variables[0]->var->data.mode) {
3315 case nir_var_shader_out:
3316
3317 if (ctx->stage == MESA_SHADER_TESS_CTRL) {
3318 LLVMValueRef vertex_index = NULL;
3319 LLVMValueRef indir_index = NULL;
3320 unsigned const_index = 0;
3321 const unsigned location = instr->variables[0]->var->data.location;
3322 const unsigned driver_location = instr->variables[0]->var->data.driver_location;
3323 const unsigned comp = instr->variables[0]->var->data.location_frac;
3324 const bool is_patch = instr->variables[0]->var->data.patch;
3325 const bool is_compact = instr->variables[0]->var->data.compact;
3326
3327 get_deref_offset(ctx, instr->variables[0],
3328 false, NULL, is_patch ? NULL : &vertex_index,
3329 &const_index, &indir_index);
3330
3331 ctx->abi->store_tcs_outputs(ctx->abi, vertex_index, indir_index,
3332 const_index, location, driver_location,
3333 src, comp, is_patch, is_compact, writemask);
3334 return;
3335 }
3336
3337 for (unsigned chan = 0; chan < 8; chan++) {
3338 int stride = 4;
3339 if (!(writemask & (1 << chan)))
3340 continue;
3341
3342 value = ac_llvm_extract_elem(&ctx->ac, src, chan - comp);
3343
3344 if (instr->variables[0]->var->data.compact)
3345 stride = 1;
3346 if (indir_index) {
3347 unsigned count = glsl_count_attribute_slots(
3348 instr->variables[0]->var->type, false);
3349 count -= chan / 4;
3350 LLVMValueRef tmp_vec = ac_build_gather_values_extended(
3351 &ctx->ac, ctx->outputs + idx + chan, count,
3352 stride, true, true);
3353
3354 tmp_vec = LLVMBuildInsertElement(ctx->ac.builder, tmp_vec,
3355 value, indir_index, "");
3356 build_store_values_extended(&ctx->ac, ctx->outputs + idx + chan,
3357 count, stride, tmp_vec);
3358
3359 } else {
3360 temp_ptr = ctx->outputs[idx + chan + const_index * stride];
3361
3362 LLVMBuildStore(ctx->ac.builder, value, temp_ptr);
3363 }
3364 }
3365 break;
3366 case nir_var_local:
3367 for (unsigned chan = 0; chan < 8; chan++) {
3368 if (!(writemask & (1 << chan)))
3369 continue;
3370
3371 value = ac_llvm_extract_elem(&ctx->ac, src, chan);
3372 if (indir_index) {
3373 unsigned count = glsl_count_attribute_slots(
3374 instr->variables[0]->var->type, false);
3375 count -= chan / 4;
3376 LLVMValueRef tmp_vec = ac_build_gather_values_extended(
3377 &ctx->ac, ctx->locals + idx + chan, count,
3378 4, true, true);
3379
3380 tmp_vec = LLVMBuildInsertElement(ctx->ac.builder, tmp_vec,
3381 value, indir_index, "");
3382 build_store_values_extended(&ctx->ac, ctx->locals + idx + chan,
3383 count, 4, tmp_vec);
3384 } else {
3385 temp_ptr = ctx->locals[idx + chan + const_index * 4];
3386
3387 LLVMBuildStore(ctx->ac.builder, value, temp_ptr);
3388 }
3389 }
3390 break;
3391 case nir_var_shared: {
3392 int writemask = instr->const_index[0];
3393 LLVMValueRef address = build_gep_for_deref(ctx,
3394 instr->variables[0]);
3395 LLVMValueRef val = get_src(ctx, instr->src[0]);
3396 unsigned components =
3397 glsl_get_vector_elements(
3398 nir_deref_tail(&instr->variables[0]->deref)->type);
3399 if (writemask == (1 << components) - 1) {
3400 val = LLVMBuildBitCast(
3401 ctx->ac.builder, val,
3402 LLVMGetElementType(LLVMTypeOf(address)), "");
3403 LLVMBuildStore(ctx->ac.builder, val, address);
3404 } else {
3405 for (unsigned chan = 0; chan < 4; chan++) {
3406 if (!(writemask & (1 << chan)))
3407 continue;
3408 LLVMValueRef ptr =
3409 LLVMBuildStructGEP(ctx->ac.builder,
3410 address, chan, "");
3411 LLVMValueRef src = ac_llvm_extract_elem(&ctx->ac, val,
3412 chan);
3413 src = LLVMBuildBitCast(
3414 ctx->ac.builder, src,
3415 LLVMGetElementType(LLVMTypeOf(ptr)), "");
3416 LLVMBuildStore(ctx->ac.builder, src, ptr);
3417 }
3418 }
3419 break;
3420 }
3421 default:
3422 break;
3423 }
3424 }
3425
3426 static int image_type_to_components_count(enum glsl_sampler_dim dim, bool array)
3427 {
3428 switch (dim) {
3429 case GLSL_SAMPLER_DIM_BUF:
3430 return 1;
3431 case GLSL_SAMPLER_DIM_1D:
3432 return array ? 2 : 1;
3433 case GLSL_SAMPLER_DIM_2D:
3434 return array ? 3 : 2;
3435 case GLSL_SAMPLER_DIM_MS:
3436 return array ? 4 : 3;
3437 case GLSL_SAMPLER_DIM_3D:
3438 case GLSL_SAMPLER_DIM_CUBE:
3439 return 3;
3440 case GLSL_SAMPLER_DIM_RECT:
3441 case GLSL_SAMPLER_DIM_SUBPASS:
3442 return 2;
3443 case GLSL_SAMPLER_DIM_SUBPASS_MS:
3444 return 3;
3445 default:
3446 break;
3447 }
3448 return 0;
3449 }
3450
3451
3452
3453 /* Adjust the sample index according to FMASK.
3454 *
3455 * For uncompressed MSAA surfaces, FMASK should return 0x76543210,
3456 * which is the identity mapping. Each nibble says which physical sample
3457 * should be fetched to get that sample.
3458 *
3459 * For example, 0x11111100 means there are only 2 samples stored and
3460 * the second sample covers 3/4 of the pixel. When reading samples 0
3461 * and 1, return physical sample 0 (determined by the first two 0s
3462 * in FMASK), otherwise return physical sample 1.
3463 *
3464 * The sample index should be adjusted as follows:
3465 * sample_index = (fmask >> (sample_index * 4)) & 0xF;
3466 */
3467 static LLVMValueRef adjust_sample_index_using_fmask(struct ac_llvm_context *ctx,
3468 LLVMValueRef coord_x, LLVMValueRef coord_y,
3469 LLVMValueRef coord_z,
3470 LLVMValueRef sample_index,
3471 LLVMValueRef fmask_desc_ptr)
3472 {
3473 LLVMValueRef fmask_load_address[4];
3474 LLVMValueRef res;
3475
3476 fmask_load_address[0] = coord_x;
3477 fmask_load_address[1] = coord_y;
3478 if (coord_z) {
3479 fmask_load_address[2] = coord_z;
3480 fmask_load_address[3] = LLVMGetUndef(ctx->i32);
3481 }
3482
3483 struct ac_image_args args = {0};
3484
3485 args.opcode = ac_image_load;
3486 args.da = coord_z ? true : false;
3487 args.resource = fmask_desc_ptr;
3488 args.dmask = 0xf;
3489 args.addr = ac_build_gather_values(ctx, fmask_load_address, coord_z ? 4 : 2);
3490
3491 res = ac_build_image_opcode(ctx, &args);
3492
3493 res = ac_to_integer(ctx, res);
3494 LLVMValueRef four = LLVMConstInt(ctx->i32, 4, false);
3495 LLVMValueRef F = LLVMConstInt(ctx->i32, 0xf, false);
3496
3497 LLVMValueRef fmask = LLVMBuildExtractElement(ctx->builder,
3498 res,
3499 ctx->i32_0, "");
3500
3501 LLVMValueRef sample_index4 =
3502 LLVMBuildMul(ctx->builder, sample_index, four, "");
3503 LLVMValueRef shifted_fmask =
3504 LLVMBuildLShr(ctx->builder, fmask, sample_index4, "");
3505 LLVMValueRef final_sample =
3506 LLVMBuildAnd(ctx->builder, shifted_fmask, F, "");
3507
3508 /* Don't rewrite the sample index if WORD1.DATA_FORMAT of the FMASK
3509 * resource descriptor is 0 (invalid),
3510 */
3511 LLVMValueRef fmask_desc =
3512 LLVMBuildBitCast(ctx->builder, fmask_desc_ptr,
3513 ctx->v8i32, "");
3514
3515 LLVMValueRef fmask_word1 =
3516 LLVMBuildExtractElement(ctx->builder, fmask_desc,
3517 ctx->i32_1, "");
3518
3519 LLVMValueRef word1_is_nonzero =
3520 LLVMBuildICmp(ctx->builder, LLVMIntNE,
3521 fmask_word1, ctx->i32_0, "");
3522
3523 /* Replace the MSAA sample index. */
3524 sample_index =
3525 LLVMBuildSelect(ctx->builder, word1_is_nonzero,
3526 final_sample, sample_index, "");
3527 return sample_index;
3528 }
3529
3530 static LLVMValueRef get_image_coords(struct ac_nir_context *ctx,
3531 const nir_intrinsic_instr *instr)
3532 {
3533 const struct glsl_type *type = glsl_without_array(instr->variables[0]->var->type);
3534
3535 LLVMValueRef src0 = get_src(ctx, instr->src[0]);
3536 LLVMValueRef coords[4];
3537 LLVMValueRef masks[] = {
3538 LLVMConstInt(ctx->ac.i32, 0, false), LLVMConstInt(ctx->ac.i32, 1, false),
3539 LLVMConstInt(ctx->ac.i32, 2, false), LLVMConstInt(ctx->ac.i32, 3, false),
3540 };
3541 LLVMValueRef res;
3542 LLVMValueRef sample_index = ac_llvm_extract_elem(&ctx->ac, get_src(ctx, instr->src[1]), 0);
3543
3544 int count;
3545 enum glsl_sampler_dim dim = glsl_get_sampler_dim(type);
3546 bool is_array = glsl_sampler_type_is_array(type);
3547 bool add_frag_pos = (dim == GLSL_SAMPLER_DIM_SUBPASS ||
3548 dim == GLSL_SAMPLER_DIM_SUBPASS_MS);
3549 bool is_ms = (dim == GLSL_SAMPLER_DIM_MS ||
3550 dim == GLSL_SAMPLER_DIM_SUBPASS_MS);
3551 bool gfx9_1d = ctx->ac.chip_class >= GFX9 && dim == GLSL_SAMPLER_DIM_1D;
3552 count = image_type_to_components_count(dim, is_array);
3553
3554 if (is_ms) {
3555 LLVMValueRef fmask_load_address[3];
3556 int chan;
3557
3558 fmask_load_address[0] = LLVMBuildExtractElement(ctx->ac.builder, src0, masks[0], "");
3559 fmask_load_address[1] = LLVMBuildExtractElement(ctx->ac.builder, src0, masks[1], "");
3560 if (is_array)
3561 fmask_load_address[2] = LLVMBuildExtractElement(ctx->ac.builder, src0, masks[2], "");
3562 else
3563 fmask_load_address[2] = NULL;
3564 if (add_frag_pos) {
3565 for (chan = 0; chan < 2; ++chan)
3566 fmask_load_address[chan] =
3567 LLVMBuildAdd(ctx->ac.builder, fmask_load_address[chan],
3568 LLVMBuildFPToUI(ctx->ac.builder, ctx->abi->frag_pos[chan],
3569 ctx->ac.i32, ""), "");
3570 fmask_load_address[2] = ac_to_integer(&ctx->ac, ctx->abi->inputs[radeon_llvm_reg_index_soa(VARYING_SLOT_LAYER, 0)]);
3571 }
3572 sample_index = adjust_sample_index_using_fmask(&ctx->ac,
3573 fmask_load_address[0],
3574 fmask_load_address[1],
3575 fmask_load_address[2],
3576 sample_index,
3577 get_sampler_desc(ctx, instr->variables[0], AC_DESC_FMASK, NULL, true, false));
3578 }
3579 if (count == 1 && !gfx9_1d) {
3580 if (instr->src[0].ssa->num_components)
3581 res = LLVMBuildExtractElement(ctx->ac.builder, src0, masks[0], "");
3582 else
3583 res = src0;
3584 } else {
3585 int chan;
3586 if (is_ms)
3587 count--;
3588 for (chan = 0; chan < count; ++chan) {
3589 coords[chan] = ac_llvm_extract_elem(&ctx->ac, src0, chan);
3590 }
3591 if (add_frag_pos) {
3592 for (chan = 0; chan < 2; ++chan)
3593 coords[chan] = LLVMBuildAdd(ctx->ac.builder, coords[chan], LLVMBuildFPToUI(ctx->ac.builder, ctx->abi->frag_pos[chan],
3594 ctx->ac.i32, ""), "");
3595 coords[2] = ac_to_integer(&ctx->ac, ctx->abi->inputs[radeon_llvm_reg_index_soa(VARYING_SLOT_LAYER, 0)]);
3596 count++;
3597 }
3598
3599 if (gfx9_1d) {
3600 if (is_array) {
3601 coords[2] = coords[1];
3602 coords[1] = ctx->ac.i32_0;
3603 } else
3604 coords[1] = ctx->ac.i32_0;
3605 count++;
3606 }
3607
3608 if (is_ms) {
3609 coords[count] = sample_index;
3610 count++;
3611 }
3612
3613 if (count == 3) {
3614 coords[3] = LLVMGetUndef(ctx->ac.i32);
3615 count = 4;
3616 }
3617 res = ac_build_gather_values(&ctx->ac, coords, count);
3618 }
3619 return res;
3620 }
3621
3622 static LLVMValueRef visit_image_load(struct ac_nir_context *ctx,
3623 const nir_intrinsic_instr *instr)
3624 {
3625 LLVMValueRef params[7];
3626 LLVMValueRef res;
3627 char intrinsic_name[64];
3628 const nir_variable *var = instr->variables[0]->var;
3629 const struct glsl_type *type = var->type;
3630
3631 if(instr->variables[0]->deref.child)
3632 type = instr->variables[0]->deref.child->type;
3633
3634 type = glsl_without_array(type);
3635 if (glsl_get_sampler_dim(type) == GLSL_SAMPLER_DIM_BUF) {
3636 params[0] = get_sampler_desc(ctx, instr->variables[0], AC_DESC_BUFFER, NULL, true, false);
3637 params[1] = LLVMBuildExtractElement(ctx->ac.builder, get_src(ctx, instr->src[0]),
3638 ctx->ac.i32_0, ""); /* vindex */
3639 params[2] = ctx->ac.i32_0; /* voffset */
3640 params[3] = ctx->ac.i1false; /* glc */
3641 params[4] = ctx->ac.i1false; /* slc */
3642 res = ac_build_intrinsic(&ctx->ac, "llvm.amdgcn.buffer.load.format.v4f32", ctx->ac.v4f32,
3643 params, 5, 0);
3644
3645 res = trim_vector(&ctx->ac, res, instr->dest.ssa.num_components);
3646 res = ac_to_integer(&ctx->ac, res);
3647 } else {
3648 bool is_da = glsl_sampler_type_is_array(type) ||
3649 glsl_get_sampler_dim(type) == GLSL_SAMPLER_DIM_CUBE ||
3650 glsl_get_sampler_dim(type) == GLSL_SAMPLER_DIM_3D ||
3651 glsl_get_sampler_dim(type) == GLSL_SAMPLER_DIM_SUBPASS ||
3652 glsl_get_sampler_dim(type) == GLSL_SAMPLER_DIM_SUBPASS_MS;
3653 LLVMValueRef da = is_da ? ctx->ac.i1true : ctx->ac.i1false;
3654 LLVMValueRef glc = ctx->ac.i1false;
3655 LLVMValueRef slc = ctx->ac.i1false;
3656
3657 params[0] = get_image_coords(ctx, instr);
3658 params[1] = get_sampler_desc(ctx, instr->variables[0], AC_DESC_IMAGE, NULL, true, false);
3659 params[2] = LLVMConstInt(ctx->ac.i32, 15, false); /* dmask */
3660 if (HAVE_LLVM <= 0x0309) {
3661 params[3] = ctx->ac.i1false; /* r128 */
3662 params[4] = da;
3663 params[5] = glc;
3664 params[6] = slc;
3665 } else {
3666 LLVMValueRef lwe = ctx->ac.i1false;
3667 params[3] = glc;
3668 params[4] = slc;
3669 params[5] = lwe;
3670 params[6] = da;
3671 }
3672
3673 ac_get_image_intr_name("llvm.amdgcn.image.load",
3674 ctx->ac.v4f32, /* vdata */
3675 LLVMTypeOf(params[0]), /* coords */
3676 LLVMTypeOf(params[1]), /* rsrc */
3677 intrinsic_name, sizeof(intrinsic_name));
3678
3679 res = ac_build_intrinsic(&ctx->ac, intrinsic_name, ctx->ac.v4f32,
3680 params, 7, AC_FUNC_ATTR_READONLY);
3681 }
3682 return ac_to_integer(&ctx->ac, res);
3683 }
3684
3685 static void visit_image_store(struct ac_nir_context *ctx,
3686 nir_intrinsic_instr *instr)
3687 {
3688 LLVMValueRef params[8];
3689 char intrinsic_name[64];
3690 const nir_variable *var = instr->variables[0]->var;
3691 const struct glsl_type *type = glsl_without_array(var->type);
3692 LLVMValueRef glc = ctx->ac.i1false;
3693 bool force_glc = ctx->ac.chip_class == SI;
3694 if (force_glc)
3695 glc = ctx->ac.i1true;
3696
3697 if (glsl_get_sampler_dim(type) == GLSL_SAMPLER_DIM_BUF) {
3698 params[0] = ac_to_float(&ctx->ac, get_src(ctx, instr->src[2])); /* data */
3699 params[1] = get_sampler_desc(ctx, instr->variables[0], AC_DESC_BUFFER, NULL, true, true);
3700 params[2] = LLVMBuildExtractElement(ctx->ac.builder, get_src(ctx, instr->src[0]),
3701 ctx->ac.i32_0, ""); /* vindex */
3702 params[3] = ctx->ac.i32_0; /* voffset */
3703 params[4] = glc; /* glc */
3704 params[5] = ctx->ac.i1false; /* slc */
3705 ac_build_intrinsic(&ctx->ac, "llvm.amdgcn.buffer.store.format.v4f32", ctx->ac.voidt,
3706 params, 6, 0);
3707 } else {
3708 bool is_da = glsl_sampler_type_is_array(type) ||
3709 glsl_get_sampler_dim(type) == GLSL_SAMPLER_DIM_CUBE ||
3710 glsl_get_sampler_dim(type) == GLSL_SAMPLER_DIM_3D;
3711 LLVMValueRef da = is_da ? ctx->ac.i1true : ctx->ac.i1false;
3712 LLVMValueRef slc = ctx->ac.i1false;
3713
3714 params[0] = ac_to_float(&ctx->ac, get_src(ctx, instr->src[2]));
3715 params[1] = get_image_coords(ctx, instr); /* coords */
3716 params[2] = get_sampler_desc(ctx, instr->variables[0], AC_DESC_IMAGE, NULL, true, true);
3717 params[3] = LLVMConstInt(ctx->ac.i32, 15, false); /* dmask */
3718 if (HAVE_LLVM <= 0x0309) {
3719 params[4] = ctx->ac.i1false; /* r128 */
3720 params[5] = da;
3721 params[6] = glc;
3722 params[7] = slc;
3723 } else {
3724 LLVMValueRef lwe = ctx->ac.i1false;
3725 params[4] = glc;
3726 params[5] = slc;
3727 params[6] = lwe;
3728 params[7] = da;
3729 }
3730
3731 ac_get_image_intr_name("llvm.amdgcn.image.store",
3732 LLVMTypeOf(params[0]), /* vdata */
3733 LLVMTypeOf(params[1]), /* coords */
3734 LLVMTypeOf(params[2]), /* rsrc */
3735 intrinsic_name, sizeof(intrinsic_name));
3736
3737 ac_build_intrinsic(&ctx->ac, intrinsic_name, ctx->ac.voidt,
3738 params, 8, 0);
3739 }
3740
3741 }
3742
3743 static LLVMValueRef visit_image_atomic(struct ac_nir_context *ctx,
3744 const nir_intrinsic_instr *instr)
3745 {
3746 LLVMValueRef params[7];
3747 int param_count = 0;
3748 const nir_variable *var = instr->variables[0]->var;
3749
3750 const char *atomic_name;
3751 char intrinsic_name[41];
3752 const struct glsl_type *type = glsl_without_array(var->type);
3753 MAYBE_UNUSED int length;
3754
3755 bool is_unsigned = glsl_get_sampler_result_type(type) == GLSL_TYPE_UINT;
3756
3757 switch (instr->intrinsic) {
3758 case nir_intrinsic_image_atomic_add:
3759 atomic_name = "add";
3760 break;
3761 case nir_intrinsic_image_atomic_min:
3762 atomic_name = is_unsigned ? "umin" : "smin";
3763 break;
3764 case nir_intrinsic_image_atomic_max:
3765 atomic_name = is_unsigned ? "umax" : "smax";
3766 break;
3767 case nir_intrinsic_image_atomic_and:
3768 atomic_name = "and";
3769 break;
3770 case nir_intrinsic_image_atomic_or:
3771 atomic_name = "or";
3772 break;
3773 case nir_intrinsic_image_atomic_xor:
3774 atomic_name = "xor";
3775 break;
3776 case nir_intrinsic_image_atomic_exchange:
3777 atomic_name = "swap";
3778 break;
3779 case nir_intrinsic_image_atomic_comp_swap:
3780 atomic_name = "cmpswap";
3781 break;
3782 default:
3783 abort();
3784 }
3785
3786 if (instr->intrinsic == nir_intrinsic_image_atomic_comp_swap)
3787 params[param_count++] = get_src(ctx, instr->src[3]);
3788 params[param_count++] = get_src(ctx, instr->src[2]);
3789
3790 if (glsl_get_sampler_dim(type) == GLSL_SAMPLER_DIM_BUF) {
3791 params[param_count++] = get_sampler_desc(ctx, instr->variables[0], AC_DESC_BUFFER,
3792 NULL, true, true);
3793 params[param_count++] = LLVMBuildExtractElement(ctx->ac.builder, get_src(ctx, instr->src[0]),
3794 ctx->ac.i32_0, ""); /* vindex */
3795 params[param_count++] = ctx->ac.i32_0; /* voffset */
3796 params[param_count++] = ctx->ac.i1false; /* slc */
3797
3798 length = snprintf(intrinsic_name, sizeof(intrinsic_name),
3799 "llvm.amdgcn.buffer.atomic.%s", atomic_name);
3800 } else {
3801 char coords_type[8];
3802
3803 bool da = glsl_sampler_type_is_array(type) ||
3804 glsl_get_sampler_dim(type) == GLSL_SAMPLER_DIM_CUBE;
3805
3806 LLVMValueRef coords = params[param_count++] = get_image_coords(ctx, instr);
3807 params[param_count++] = get_sampler_desc(ctx, instr->variables[0], AC_DESC_IMAGE,
3808 NULL, true, true);
3809 params[param_count++] = ctx->ac.i1false; /* r128 */
3810 params[param_count++] = da ? ctx->ac.i1true : ctx->ac.i1false; /* da */
3811 params[param_count++] = ctx->ac.i1false; /* slc */
3812
3813 build_int_type_name(LLVMTypeOf(coords),
3814 coords_type, sizeof(coords_type));
3815
3816 length = snprintf(intrinsic_name, sizeof(intrinsic_name),
3817 "llvm.amdgcn.image.atomic.%s.%s", atomic_name, coords_type);
3818 }
3819
3820 assert(length < sizeof(intrinsic_name));
3821 return ac_build_intrinsic(&ctx->ac, intrinsic_name, ctx->ac.i32, params, param_count, 0);
3822 }
3823
3824 static LLVMValueRef visit_image_size(struct ac_nir_context *ctx,
3825 const nir_intrinsic_instr *instr)
3826 {
3827 LLVMValueRef res;
3828 const nir_variable *var = instr->variables[0]->var;
3829 const struct glsl_type *type = instr->variables[0]->var->type;
3830 bool da = glsl_sampler_type_is_array(var->type) ||
3831 glsl_get_sampler_dim(var->type) == GLSL_SAMPLER_DIM_CUBE ||
3832 glsl_get_sampler_dim(var->type) == GLSL_SAMPLER_DIM_3D;
3833 if(instr->variables[0]->deref.child)
3834 type = instr->variables[0]->deref.child->type;
3835
3836 if (glsl_get_sampler_dim(type) == GLSL_SAMPLER_DIM_BUF)
3837 return get_buffer_size(ctx,
3838 get_sampler_desc(ctx, instr->variables[0],
3839 AC_DESC_BUFFER, NULL, true, false), true);
3840
3841 struct ac_image_args args = { 0 };
3842
3843 args.da = da;
3844 args.dmask = 0xf;
3845 args.resource = get_sampler_desc(ctx, instr->variables[0], AC_DESC_IMAGE, NULL, true, false);
3846 args.opcode = ac_image_get_resinfo;
3847 args.addr = ctx->ac.i32_0;
3848
3849 res = ac_build_image_opcode(&ctx->ac, &args);
3850
3851 LLVMValueRef two = LLVMConstInt(ctx->ac.i32, 2, false);
3852
3853 if (glsl_get_sampler_dim(type) == GLSL_SAMPLER_DIM_CUBE &&
3854 glsl_sampler_type_is_array(type)) {
3855 LLVMValueRef six = LLVMConstInt(ctx->ac.i32, 6, false);
3856 LLVMValueRef z = LLVMBuildExtractElement(ctx->ac.builder, res, two, "");
3857 z = LLVMBuildSDiv(ctx->ac.builder, z, six, "");
3858 res = LLVMBuildInsertElement(ctx->ac.builder, res, z, two, "");
3859 }
3860 if (ctx->ac.chip_class >= GFX9 &&
3861 glsl_get_sampler_dim(type) == GLSL_SAMPLER_DIM_1D &&
3862 glsl_sampler_type_is_array(type)) {
3863 LLVMValueRef layers = LLVMBuildExtractElement(ctx->ac.builder, res, two, "");
3864 res = LLVMBuildInsertElement(ctx->ac.builder, res, layers,
3865 ctx->ac.i32_1, "");
3866
3867 }
3868 return res;
3869 }
3870
3871 #define NOOP_WAITCNT 0xf7f
3872 #define LGKM_CNT 0x07f
3873 #define VM_CNT 0xf70
3874
3875 static void emit_membar(struct nir_to_llvm_context *ctx,
3876 const nir_intrinsic_instr *instr)
3877 {
3878 unsigned waitcnt = NOOP_WAITCNT;
3879
3880 switch (instr->intrinsic) {
3881 case nir_intrinsic_memory_barrier:
3882 case nir_intrinsic_group_memory_barrier:
3883 waitcnt &= VM_CNT & LGKM_CNT;
3884 break;
3885 case nir_intrinsic_memory_barrier_atomic_counter:
3886 case nir_intrinsic_memory_barrier_buffer:
3887 case nir_intrinsic_memory_barrier_image:
3888 waitcnt &= VM_CNT;
3889 break;
3890 case nir_intrinsic_memory_barrier_shared:
3891 waitcnt &= LGKM_CNT;
3892 break;
3893 default:
3894 break;
3895 }
3896 if (waitcnt != NOOP_WAITCNT)
3897 ac_build_waitcnt(&ctx->ac, waitcnt);
3898 }
3899
3900 static void emit_barrier(struct ac_llvm_context *ac, gl_shader_stage stage)
3901 {
3902 /* SI only (thanks to a hw bug workaround):
3903 * The real barrier instruction isn’t needed, because an entire patch
3904 * always fits into a single wave.
3905 */
3906 if (ac->chip_class == SI && stage == MESA_SHADER_TESS_CTRL) {
3907 ac_build_waitcnt(ac, LGKM_CNT & VM_CNT);
3908 return;
3909 }
3910 ac_build_intrinsic(ac, "llvm.amdgcn.s.barrier",
3911 ac->voidt, NULL, 0, AC_FUNC_ATTR_CONVERGENT);
3912 }
3913
3914 static void emit_discard(struct ac_nir_context *ctx,
3915 const nir_intrinsic_instr *instr)
3916 {
3917 LLVMValueRef cond;
3918
3919 if (instr->intrinsic == nir_intrinsic_discard_if) {
3920 cond = LLVMBuildICmp(ctx->ac.builder, LLVMIntEQ,
3921 get_src(ctx, instr->src[0]),
3922 ctx->ac.i32_0, "");
3923 } else {
3924 assert(instr->intrinsic == nir_intrinsic_discard);
3925 cond = LLVMConstInt(ctx->ac.i1, false, 0);
3926 }
3927
3928 ac_build_kill_if_false(&ctx->ac, cond);
3929 }
3930
3931 static LLVMValueRef
3932 visit_load_helper_invocation(struct ac_nir_context *ctx)
3933 {
3934 LLVMValueRef result = ac_build_intrinsic(&ctx->ac,
3935 "llvm.amdgcn.ps.live",
3936 ctx->ac.i1, NULL, 0,
3937 AC_FUNC_ATTR_READNONE);
3938 result = LLVMBuildNot(ctx->ac.builder, result, "");
3939 return LLVMBuildSExt(ctx->ac.builder, result, ctx->ac.i32, "");
3940 }
3941
3942 static LLVMValueRef
3943 visit_load_local_invocation_index(struct nir_to_llvm_context *ctx)
3944 {
3945 LLVMValueRef result;
3946 LLVMValueRef thread_id = ac_get_thread_id(&ctx->ac);
3947 result = LLVMBuildAnd(ctx->builder, ctx->tg_size,
3948 LLVMConstInt(ctx->ac.i32, 0xfc0, false), "");
3949
3950 return LLVMBuildAdd(ctx->builder, result, thread_id, "");
3951 }
3952
3953 static LLVMValueRef visit_var_atomic(struct nir_to_llvm_context *ctx,
3954 const nir_intrinsic_instr *instr)
3955 {
3956 LLVMValueRef ptr, result;
3957 LLVMValueRef src = get_src(ctx->nir, instr->src[0]);
3958 ptr = build_gep_for_deref(ctx->nir, instr->variables[0]);
3959
3960 if (instr->intrinsic == nir_intrinsic_var_atomic_comp_swap) {
3961 LLVMValueRef src1 = get_src(ctx->nir, instr->src[1]);
3962 result = LLVMBuildAtomicCmpXchg(ctx->builder,
3963 ptr, src, src1,
3964 LLVMAtomicOrderingSequentiallyConsistent,
3965 LLVMAtomicOrderingSequentiallyConsistent,
3966 false);
3967 } else {
3968 LLVMAtomicRMWBinOp op;
3969 switch (instr->intrinsic) {
3970 case nir_intrinsic_var_atomic_add:
3971 op = LLVMAtomicRMWBinOpAdd;
3972 break;
3973 case nir_intrinsic_var_atomic_umin:
3974 op = LLVMAtomicRMWBinOpUMin;
3975 break;
3976 case nir_intrinsic_var_atomic_umax:
3977 op = LLVMAtomicRMWBinOpUMax;
3978 break;
3979 case nir_intrinsic_var_atomic_imin:
3980 op = LLVMAtomicRMWBinOpMin;
3981 break;
3982 case nir_intrinsic_var_atomic_imax:
3983 op = LLVMAtomicRMWBinOpMax;
3984 break;
3985 case nir_intrinsic_var_atomic_and:
3986 op = LLVMAtomicRMWBinOpAnd;
3987 break;
3988 case nir_intrinsic_var_atomic_or:
3989 op = LLVMAtomicRMWBinOpOr;
3990 break;
3991 case nir_intrinsic_var_atomic_xor:
3992 op = LLVMAtomicRMWBinOpXor;
3993 break;
3994 case nir_intrinsic_var_atomic_exchange:
3995 op = LLVMAtomicRMWBinOpXchg;
3996 break;
3997 default:
3998 return NULL;
3999 }
4000
4001 result = LLVMBuildAtomicRMW(ctx->builder, op, ptr, ac_to_integer(&ctx->ac, src),
4002 LLVMAtomicOrderingSequentiallyConsistent,
4003 false);
4004 }
4005 return result;
4006 }
4007
4008 #define INTERP_CENTER 0
4009 #define INTERP_CENTROID 1
4010 #define INTERP_SAMPLE 2
4011
4012 static LLVMValueRef lookup_interp_param(struct nir_to_llvm_context *ctx,
4013 enum glsl_interp_mode interp, unsigned location)
4014 {
4015 switch (interp) {
4016 case INTERP_MODE_FLAT:
4017 default:
4018 return NULL;
4019 case INTERP_MODE_SMOOTH:
4020 case INTERP_MODE_NONE:
4021 if (location == INTERP_CENTER)
4022 return ctx->persp_center;
4023 else if (location == INTERP_CENTROID)
4024 return ctx->persp_centroid;
4025 else if (location == INTERP_SAMPLE)
4026 return ctx->persp_sample;
4027 break;
4028 case INTERP_MODE_NOPERSPECTIVE:
4029 if (location == INTERP_CENTER)
4030 return ctx->linear_center;
4031 else if (location == INTERP_CENTROID)
4032 return ctx->linear_centroid;
4033 else if (location == INTERP_SAMPLE)
4034 return ctx->linear_sample;
4035 break;
4036 }
4037 return NULL;
4038 }
4039
4040 static LLVMValueRef load_sample_position(struct nir_to_llvm_context *ctx,
4041 LLVMValueRef sample_id)
4042 {
4043 LLVMValueRef result;
4044 LLVMValueRef ptr = ac_build_gep0(&ctx->ac, ctx->ring_offsets, LLVMConstInt(ctx->ac.i32, RING_PS_SAMPLE_POSITIONS, false));
4045
4046 ptr = LLVMBuildBitCast(ctx->builder, ptr,
4047 const_array(ctx->ac.v2f32, 64), "");
4048
4049 sample_id = LLVMBuildAdd(ctx->builder, sample_id, ctx->sample_pos_offset, "");
4050 result = ac_build_load_invariant(&ctx->ac, ptr, sample_id);
4051
4052 return result;
4053 }
4054
4055 static LLVMValueRef load_sample_pos(struct ac_nir_context *ctx)
4056 {
4057 LLVMValueRef values[2];
4058
4059 values[0] = emit_ffract(&ctx->ac, ctx->abi->frag_pos[0]);
4060 values[1] = emit_ffract(&ctx->ac, ctx->abi->frag_pos[1]);
4061 return ac_build_gather_values(&ctx->ac, values, 2);
4062 }
4063
4064 static LLVMValueRef load_sample_mask_in(struct ac_nir_context *ctx)
4065 {
4066 uint8_t log2_ps_iter_samples = ctx->nctx->shader_info->info.ps.force_persample ? ctx->nctx->options->key.fs.log2_num_samples : ctx->nctx->options->key.fs.log2_ps_iter_samples;
4067
4068 /* The bit pattern matches that used by fixed function fragment
4069 * processing. */
4070 static const uint16_t ps_iter_masks[] = {
4071 0xffff, /* not used */
4072 0x5555,
4073 0x1111,
4074 0x0101,
4075 0x0001,
4076 };
4077 assert(log2_ps_iter_samples < ARRAY_SIZE(ps_iter_masks));
4078
4079 uint32_t ps_iter_mask = ps_iter_masks[log2_ps_iter_samples];
4080
4081 LLVMValueRef result, sample_id;
4082 sample_id = unpack_param(&ctx->ac, ctx->abi->ancillary, 8, 4);
4083 sample_id = LLVMBuildShl(ctx->ac.builder, LLVMConstInt(ctx->ac.i32, ps_iter_mask, false), sample_id, "");
4084 result = LLVMBuildAnd(ctx->ac.builder, sample_id, ctx->abi->sample_coverage, "");
4085 return result;
4086 }
4087
4088 static LLVMValueRef visit_interp(struct nir_to_llvm_context *ctx,
4089 const nir_intrinsic_instr *instr)
4090 {
4091 LLVMValueRef result[4];
4092 LLVMValueRef interp_param, attr_number;
4093 unsigned location;
4094 unsigned chan;
4095 LLVMValueRef src_c0 = NULL;
4096 LLVMValueRef src_c1 = NULL;
4097 LLVMValueRef src0 = NULL;
4098 int input_index = instr->variables[0]->var->data.location - VARYING_SLOT_VAR0;
4099 switch (instr->intrinsic) {
4100 case nir_intrinsic_interp_var_at_centroid:
4101 location = INTERP_CENTROID;
4102 break;
4103 case nir_intrinsic_interp_var_at_sample:
4104 case nir_intrinsic_interp_var_at_offset:
4105 location = INTERP_CENTER;
4106 src0 = get_src(ctx->nir, instr->src[0]);
4107 break;
4108 default:
4109 break;
4110 }
4111
4112 if (instr->intrinsic == nir_intrinsic_interp_var_at_offset) {
4113 src_c0 = ac_to_float(&ctx->ac, LLVMBuildExtractElement(ctx->builder, src0, ctx->ac.i32_0, ""));
4114 src_c1 = ac_to_float(&ctx->ac, LLVMBuildExtractElement(ctx->builder, src0, ctx->ac.i32_1, ""));
4115 } else if (instr->intrinsic == nir_intrinsic_interp_var_at_sample) {
4116 LLVMValueRef sample_position;
4117 LLVMValueRef halfval = LLVMConstReal(ctx->ac.f32, 0.5f);
4118
4119 /* fetch sample ID */
4120 sample_position = load_sample_position(ctx, src0);
4121
4122 src_c0 = LLVMBuildExtractElement(ctx->builder, sample_position, ctx->ac.i32_0, "");
4123 src_c0 = LLVMBuildFSub(ctx->builder, src_c0, halfval, "");
4124 src_c1 = LLVMBuildExtractElement(ctx->builder, sample_position, ctx->ac.i32_1, "");
4125 src_c1 = LLVMBuildFSub(ctx->builder, src_c1, halfval, "");
4126 }
4127 interp_param = lookup_interp_param(ctx, instr->variables[0]->var->data.interpolation, location);
4128 attr_number = LLVMConstInt(ctx->ac.i32, input_index, false);
4129
4130 if (location == INTERP_CENTER) {
4131 LLVMValueRef ij_out[2];
4132 LLVMValueRef ddxy_out = emit_ddxy_interp(ctx->nir, interp_param);
4133
4134 /*
4135 * take the I then J parameters, and the DDX/Y for it, and
4136 * calculate the IJ inputs for the interpolator.
4137 * temp1 = ddx * offset/sample.x + I;
4138 * interp_param.I = ddy * offset/sample.y + temp1;
4139 * temp1 = ddx * offset/sample.x + J;
4140 * interp_param.J = ddy * offset/sample.y + temp1;
4141 */
4142 for (unsigned i = 0; i < 2; i++) {
4143 LLVMValueRef ix_ll = LLVMConstInt(ctx->ac.i32, i, false);
4144 LLVMValueRef iy_ll = LLVMConstInt(ctx->ac.i32, i + 2, false);
4145 LLVMValueRef ddx_el = LLVMBuildExtractElement(ctx->builder,
4146 ddxy_out, ix_ll, "");
4147 LLVMValueRef ddy_el = LLVMBuildExtractElement(ctx->builder,
4148 ddxy_out, iy_ll, "");
4149 LLVMValueRef interp_el = LLVMBuildExtractElement(ctx->builder,
4150 interp_param, ix_ll, "");
4151 LLVMValueRef temp1, temp2;
4152
4153 interp_el = LLVMBuildBitCast(ctx->builder, interp_el,
4154 ctx->ac.f32, "");
4155
4156 temp1 = LLVMBuildFMul(ctx->builder, ddx_el, src_c0, "");
4157 temp1 = LLVMBuildFAdd(ctx->builder, temp1, interp_el, "");
4158
4159 temp2 = LLVMBuildFMul(ctx->builder, ddy_el, src_c1, "");
4160 temp2 = LLVMBuildFAdd(ctx->builder, temp2, temp1, "");
4161
4162 ij_out[i] = LLVMBuildBitCast(ctx->builder,
4163 temp2, ctx->ac.i32, "");
4164 }
4165 interp_param = ac_build_gather_values(&ctx->ac, ij_out, 2);
4166
4167 }
4168
4169 for (chan = 0; chan < 4; chan++) {
4170 LLVMValueRef llvm_chan = LLVMConstInt(ctx->ac.i32, chan, false);
4171
4172 if (interp_param) {
4173 interp_param = LLVMBuildBitCast(ctx->builder,
4174 interp_param, ctx->ac.v2f32, "");
4175 LLVMValueRef i = LLVMBuildExtractElement(
4176 ctx->builder, interp_param, ctx->ac.i32_0, "");
4177 LLVMValueRef j = LLVMBuildExtractElement(
4178 ctx->builder, interp_param, ctx->ac.i32_1, "");
4179
4180 result[chan] = ac_build_fs_interp(&ctx->ac,
4181 llvm_chan, attr_number,
4182 ctx->prim_mask, i, j);
4183 } else {
4184 result[chan] = ac_build_fs_interp_mov(&ctx->ac,
4185 LLVMConstInt(ctx->ac.i32, 2, false),
4186 llvm_chan, attr_number,
4187 ctx->prim_mask);
4188 }
4189 }
4190 return ac_build_varying_gather_values(&ctx->ac, result, instr->num_components,
4191 instr->variables[0]->var->data.location_frac);
4192 }
4193
4194 static void
4195 visit_emit_vertex(struct ac_shader_abi *abi, unsigned stream, LLVMValueRef *addrs)
4196 {
4197 LLVMValueRef gs_next_vertex;
4198 LLVMValueRef can_emit;
4199 int idx;
4200 struct nir_to_llvm_context *ctx = nir_to_llvm_context_from_abi(abi);
4201
4202 assert(stream == 0);
4203
4204 /* Write vertex attribute values to GSVS ring */
4205 gs_next_vertex = LLVMBuildLoad(ctx->builder,
4206 ctx->gs_next_vertex,
4207 "");
4208
4209 /* If this thread has already emitted the declared maximum number of
4210 * vertices, kill it: excessive vertex emissions are not supposed to
4211 * have any effect, and GS threads have no externally observable
4212 * effects other than emitting vertices.
4213 */
4214 can_emit = LLVMBuildICmp(ctx->builder, LLVMIntULT, gs_next_vertex,
4215 LLVMConstInt(ctx->ac.i32, ctx->gs_max_out_vertices, false), "");
4216 ac_build_kill_if_false(&ctx->ac, can_emit);
4217
4218 /* loop num outputs */
4219 idx = 0;
4220 for (unsigned i = 0; i < RADEON_LLVM_MAX_OUTPUTS; ++i) {
4221 LLVMValueRef *out_ptr = &addrs[i * 4];
4222 int length = 4;
4223 int slot = idx;
4224 int slot_inc = 1;
4225
4226 if (!(ctx->output_mask & (1ull << i)))
4227 continue;
4228
4229 if (i == VARYING_SLOT_CLIP_DIST0) {
4230 /* pack clip and cull into a single set of slots */
4231 length = ctx->num_output_clips + ctx->num_output_culls;
4232 if (length > 4)
4233 slot_inc = 2;
4234 }
4235 for (unsigned j = 0; j < length; j++) {
4236 LLVMValueRef out_val = LLVMBuildLoad(ctx->builder,
4237 out_ptr[j], "");
4238 LLVMValueRef voffset = LLVMConstInt(ctx->ac.i32, (slot * 4 + j) * ctx->gs_max_out_vertices, false);
4239 voffset = LLVMBuildAdd(ctx->builder, voffset, gs_next_vertex, "");
4240 voffset = LLVMBuildMul(ctx->builder, voffset, LLVMConstInt(ctx->ac.i32, 4, false), "");
4241
4242 out_val = LLVMBuildBitCast(ctx->builder, out_val, ctx->ac.i32, "");
4243
4244 ac_build_buffer_store_dword(&ctx->ac, ctx->gsvs_ring,
4245 out_val, 1,
4246 voffset, ctx->gs2vs_offset, 0,
4247 1, 1, true, true);
4248 }
4249 idx += slot_inc;
4250 }
4251
4252 gs_next_vertex = LLVMBuildAdd(ctx->builder, gs_next_vertex,
4253 ctx->ac.i32_1, "");
4254 LLVMBuildStore(ctx->builder, gs_next_vertex, ctx->gs_next_vertex);
4255
4256 ac_build_sendmsg(&ctx->ac, AC_SENDMSG_GS_OP_EMIT | AC_SENDMSG_GS | (0 << 8), ctx->gs_wave_id);
4257 }
4258
4259 static void
4260 visit_end_primitive(struct ac_shader_abi *abi, unsigned stream)
4261 {
4262 struct nir_to_llvm_context *ctx = nir_to_llvm_context_from_abi(abi);
4263 ac_build_sendmsg(&ctx->ac, AC_SENDMSG_GS_OP_CUT | AC_SENDMSG_GS | (stream << 8), ctx->gs_wave_id);
4264 }
4265
4266 static LLVMValueRef
4267 load_tess_coord(struct ac_shader_abi *abi, LLVMTypeRef type,
4268 unsigned num_components)
4269 {
4270 struct nir_to_llvm_context *ctx = nir_to_llvm_context_from_abi(abi);
4271
4272 LLVMValueRef coord[4] = {
4273 ctx->tes_u,
4274 ctx->tes_v,
4275 ctx->ac.f32_0,
4276 ctx->ac.f32_0,
4277 };
4278
4279 if (ctx->tes_primitive_mode == GL_TRIANGLES)
4280 coord[2] = LLVMBuildFSub(ctx->builder, ctx->ac.f32_1,
4281 LLVMBuildFAdd(ctx->builder, coord[0], coord[1], ""), "");
4282
4283 LLVMValueRef result = ac_build_gather_values(&ctx->ac, coord, num_components);
4284 return LLVMBuildBitCast(ctx->builder, result, type, "");
4285 }
4286
4287 static LLVMValueRef
4288 load_patch_vertices_in(struct ac_shader_abi *abi)
4289 {
4290 struct nir_to_llvm_context *ctx = nir_to_llvm_context_from_abi(abi);
4291 return LLVMConstInt(ctx->ac.i32, ctx->options->key.tcs.input_vertices, false);
4292 }
4293
4294 static void visit_intrinsic(struct ac_nir_context *ctx,
4295 nir_intrinsic_instr *instr)
4296 {
4297 LLVMValueRef result = NULL;
4298
4299 switch (instr->intrinsic) {
4300 case nir_intrinsic_ballot:
4301 result = ac_build_ballot(&ctx->ac, get_src(ctx, instr->src[0]));
4302 break;
4303 case nir_intrinsic_read_invocation:
4304 case nir_intrinsic_read_first_invocation: {
4305 LLVMValueRef args[2];
4306
4307 /* Value */
4308 args[0] = get_src(ctx, instr->src[0]);
4309
4310 unsigned num_args;
4311 const char *intr_name;
4312 if (instr->intrinsic == nir_intrinsic_read_invocation) {
4313 num_args = 2;
4314 intr_name = "llvm.amdgcn.readlane";
4315
4316 /* Invocation */
4317 args[1] = get_src(ctx, instr->src[1]);
4318 } else {
4319 num_args = 1;
4320 intr_name = "llvm.amdgcn.readfirstlane";
4321 }
4322
4323 /* We currently have no other way to prevent LLVM from lifting the icmp
4324 * calls to a dominating basic block.
4325 */
4326 ac_build_optimization_barrier(&ctx->ac, &args[0]);
4327
4328 result = ac_build_intrinsic(&ctx->ac, intr_name,
4329 ctx->ac.i32, args, num_args,
4330 AC_FUNC_ATTR_READNONE |
4331 AC_FUNC_ATTR_CONVERGENT);
4332 break;
4333 }
4334 case nir_intrinsic_load_subgroup_invocation:
4335 result = ac_get_thread_id(&ctx->ac);
4336 break;
4337 case nir_intrinsic_load_work_group_id: {
4338 LLVMValueRef values[3];
4339
4340 for (int i = 0; i < 3; i++) {
4341 values[i] = ctx->nctx->workgroup_ids[i] ?
4342 ctx->nctx->workgroup_ids[i] : ctx->ac.i32_0;
4343 }
4344
4345 result = ac_build_gather_values(&ctx->ac, values, 3);
4346 break;
4347 }
4348 case nir_intrinsic_load_base_vertex: {
4349 result = ctx->abi->base_vertex;
4350 break;
4351 }
4352 case nir_intrinsic_load_vertex_id_zero_base: {
4353 result = ctx->abi->vertex_id;
4354 break;
4355 }
4356 case nir_intrinsic_load_local_invocation_id: {
4357 result = ctx->nctx->local_invocation_ids;
4358 break;
4359 }
4360 case nir_intrinsic_load_base_instance:
4361 result = ctx->abi->start_instance;
4362 break;
4363 case nir_intrinsic_load_draw_id:
4364 result = ctx->abi->draw_id;
4365 break;
4366 case nir_intrinsic_load_view_index:
4367 result = ctx->nctx->view_index ? ctx->nctx->view_index : ctx->ac.i32_0;
4368 break;
4369 case nir_intrinsic_load_invocation_id:
4370 if (ctx->stage == MESA_SHADER_TESS_CTRL)
4371 result = unpack_param(&ctx->ac, ctx->abi->tcs_rel_ids, 8, 5);
4372 else
4373 result = ctx->abi->gs_invocation_id;
4374 break;
4375 case nir_intrinsic_load_primitive_id:
4376 if (ctx->stage == MESA_SHADER_GEOMETRY) {
4377 result = ctx->abi->gs_prim_id;
4378 } else if (ctx->stage == MESA_SHADER_TESS_CTRL) {
4379 result = ctx->abi->tcs_patch_id;
4380 } else if (ctx->stage == MESA_SHADER_TESS_EVAL) {
4381 result = ctx->abi->tes_patch_id;
4382 } else
4383 fprintf(stderr, "Unknown primitive id intrinsic: %d", ctx->stage);
4384 break;
4385 case nir_intrinsic_load_sample_id:
4386 result = unpack_param(&ctx->ac, ctx->abi->ancillary, 8, 4);
4387 break;
4388 case nir_intrinsic_load_sample_pos:
4389 result = load_sample_pos(ctx);
4390 break;
4391 case nir_intrinsic_load_sample_mask_in:
4392 if (ctx->nctx)
4393 result = load_sample_mask_in(ctx);
4394 else
4395 result = ctx->abi->sample_coverage;
4396 break;
4397 case nir_intrinsic_load_frag_coord: {
4398 LLVMValueRef values[4] = {
4399 ctx->abi->frag_pos[0],
4400 ctx->abi->frag_pos[1],
4401 ctx->abi->frag_pos[2],
4402 ac_build_fdiv(&ctx->ac, ctx->ac.f32_1, ctx->abi->frag_pos[3])
4403 };
4404 result = ac_build_gather_values(&ctx->ac, values, 4);
4405 break;
4406 }
4407 case nir_intrinsic_load_front_face:
4408 result = ctx->abi->front_face;
4409 break;
4410 case nir_intrinsic_load_helper_invocation:
4411 result = visit_load_helper_invocation(ctx);
4412 break;
4413 case nir_intrinsic_load_instance_id:
4414 result = ctx->abi->instance_id;
4415 break;
4416 case nir_intrinsic_load_num_work_groups:
4417 result = ctx->nctx->num_work_groups;
4418 break;
4419 case nir_intrinsic_load_local_invocation_index:
4420 result = visit_load_local_invocation_index(ctx->nctx);
4421 break;
4422 case nir_intrinsic_load_push_constant:
4423 result = visit_load_push_constant(ctx->nctx, instr);
4424 break;
4425 case nir_intrinsic_vulkan_resource_index:
4426 result = visit_vulkan_resource_index(ctx->nctx, instr);
4427 break;
4428 case nir_intrinsic_vulkan_resource_reindex:
4429 result = visit_vulkan_resource_reindex(ctx->nctx, instr);
4430 break;
4431 case nir_intrinsic_store_ssbo:
4432 visit_store_ssbo(ctx, instr);
4433 break;
4434 case nir_intrinsic_load_ssbo:
4435 result = visit_load_buffer(ctx, instr);
4436 break;
4437 case nir_intrinsic_ssbo_atomic_add:
4438 case nir_intrinsic_ssbo_atomic_imin:
4439 case nir_intrinsic_ssbo_atomic_umin:
4440 case nir_intrinsic_ssbo_atomic_imax:
4441 case nir_intrinsic_ssbo_atomic_umax:
4442 case nir_intrinsic_ssbo_atomic_and:
4443 case nir_intrinsic_ssbo_atomic_or:
4444 case nir_intrinsic_ssbo_atomic_xor:
4445 case nir_intrinsic_ssbo_atomic_exchange:
4446 case nir_intrinsic_ssbo_atomic_comp_swap:
4447 result = visit_atomic_ssbo(ctx, instr);
4448 break;
4449 case nir_intrinsic_load_ubo:
4450 result = visit_load_ubo_buffer(ctx, instr);
4451 break;
4452 case nir_intrinsic_get_buffer_size:
4453 result = visit_get_buffer_size(ctx, instr);
4454 break;
4455 case nir_intrinsic_load_var:
4456 result = visit_load_var(ctx, instr);
4457 break;
4458 case nir_intrinsic_store_var:
4459 visit_store_var(ctx, instr);
4460 break;
4461 case nir_intrinsic_image_load:
4462 result = visit_image_load(ctx, instr);
4463 break;
4464 case nir_intrinsic_image_store:
4465 visit_image_store(ctx, instr);
4466 break;
4467 case nir_intrinsic_image_atomic_add:
4468 case nir_intrinsic_image_atomic_min:
4469 case nir_intrinsic_image_atomic_max:
4470 case nir_intrinsic_image_atomic_and:
4471 case nir_intrinsic_image_atomic_or:
4472 case nir_intrinsic_image_atomic_xor:
4473 case nir_intrinsic_image_atomic_exchange:
4474 case nir_intrinsic_image_atomic_comp_swap:
4475 result = visit_image_atomic(ctx, instr);
4476 break;
4477 case nir_intrinsic_image_size:
4478 result = visit_image_size(ctx, instr);
4479 break;
4480 case nir_intrinsic_discard:
4481 case nir_intrinsic_discard_if:
4482 emit_discard(ctx, instr);
4483 break;
4484 case nir_intrinsic_memory_barrier:
4485 case nir_intrinsic_group_memory_barrier:
4486 case nir_intrinsic_memory_barrier_atomic_counter:
4487 case nir_intrinsic_memory_barrier_buffer:
4488 case nir_intrinsic_memory_barrier_image:
4489 case nir_intrinsic_memory_barrier_shared:
4490 emit_membar(ctx->nctx, instr);
4491 break;
4492 case nir_intrinsic_barrier:
4493 emit_barrier(&ctx->ac, ctx->stage);
4494 break;
4495 case nir_intrinsic_var_atomic_add:
4496 case nir_intrinsic_var_atomic_imin:
4497 case nir_intrinsic_var_atomic_umin:
4498 case nir_intrinsic_var_atomic_imax:
4499 case nir_intrinsic_var_atomic_umax:
4500 case nir_intrinsic_var_atomic_and:
4501 case nir_intrinsic_var_atomic_or:
4502 case nir_intrinsic_var_atomic_xor:
4503 case nir_intrinsic_var_atomic_exchange:
4504 case nir_intrinsic_var_atomic_comp_swap:
4505 result = visit_var_atomic(ctx->nctx, instr);
4506 break;
4507 case nir_intrinsic_interp_var_at_centroid:
4508 case nir_intrinsic_interp_var_at_sample:
4509 case nir_intrinsic_interp_var_at_offset:
4510 result = visit_interp(ctx->nctx, instr);
4511 break;
4512 case nir_intrinsic_emit_vertex:
4513 ctx->abi->emit_vertex(ctx->abi, nir_intrinsic_stream_id(instr), ctx->outputs);
4514 break;
4515 case nir_intrinsic_end_primitive:
4516 ctx->abi->emit_primitive(ctx->abi, nir_intrinsic_stream_id(instr));
4517 break;
4518 case nir_intrinsic_load_tess_coord: {
4519 LLVMTypeRef type = ctx->nctx ?
4520 get_def_type(ctx->nctx->nir, &instr->dest.ssa) :
4521 NULL;
4522 result = ctx->abi->load_tess_coord(ctx->abi, type, instr->num_components);
4523 break;
4524 }
4525 case nir_intrinsic_load_tess_level_outer:
4526 result = ctx->abi->load_tess_level(ctx->abi, VARYING_SLOT_TESS_LEVEL_OUTER);
4527 break;
4528 case nir_intrinsic_load_tess_level_inner:
4529 result = ctx->abi->load_tess_level(ctx->abi, VARYING_SLOT_TESS_LEVEL_INNER);
4530 break;
4531 case nir_intrinsic_load_patch_vertices_in:
4532 result = ctx->abi->load_patch_vertices_in(ctx->abi);
4533 break;
4534 case nir_intrinsic_vote_all: {
4535 LLVMValueRef tmp = ac_build_vote_all(&ctx->ac, get_src(ctx, instr->src[0]));
4536 result = LLVMBuildSExt(ctx->ac.builder, tmp, ctx->ac.i32, "");
4537 break;
4538 }
4539 case nir_intrinsic_vote_any: {
4540 LLVMValueRef tmp = ac_build_vote_any(&ctx->ac, get_src(ctx, instr->src[0]));
4541 result = LLVMBuildSExt(ctx->ac.builder, tmp, ctx->ac.i32, "");
4542 break;
4543 }
4544 case nir_intrinsic_vote_eq: {
4545 LLVMValueRef tmp = ac_build_vote_eq(&ctx->ac, get_src(ctx, instr->src[0]));
4546 result = LLVMBuildSExt(ctx->ac.builder, tmp, ctx->ac.i32, "");
4547 break;
4548 }
4549 default:
4550 fprintf(stderr, "Unknown intrinsic: ");
4551 nir_print_instr(&instr->instr, stderr);
4552 fprintf(stderr, "\n");
4553 break;
4554 }
4555 if (result) {
4556 _mesa_hash_table_insert(ctx->defs, &instr->dest.ssa, result);
4557 }
4558 }
4559
4560 static LLVMValueRef radv_load_ssbo(struct ac_shader_abi *abi,
4561 LLVMValueRef buffer_ptr, bool write)
4562 {
4563 struct nir_to_llvm_context *ctx = nir_to_llvm_context_from_abi(abi);
4564
4565 return LLVMBuildLoad(ctx->builder, buffer_ptr, "");
4566 }
4567
4568 static LLVMValueRef radv_load_ubo(struct ac_shader_abi *abi, LLVMValueRef buffer_ptr)
4569 {
4570 struct nir_to_llvm_context *ctx = nir_to_llvm_context_from_abi(abi);
4571
4572 return LLVMBuildLoad(ctx->builder, buffer_ptr, "");
4573 }
4574
4575 static LLVMValueRef radv_get_sampler_desc(struct ac_shader_abi *abi,
4576 unsigned descriptor_set,
4577 unsigned base_index,
4578 unsigned constant_index,
4579 LLVMValueRef index,
4580 enum ac_descriptor_type desc_type,
4581 bool image, bool write)
4582 {
4583 struct nir_to_llvm_context *ctx = nir_to_llvm_context_from_abi(abi);
4584 LLVMValueRef list = ctx->descriptor_sets[descriptor_set];
4585 struct radv_descriptor_set_layout *layout = ctx->options->layout->set[descriptor_set].layout;
4586 struct radv_descriptor_set_binding_layout *binding = layout->binding + base_index;
4587 unsigned offset = binding->offset;
4588 unsigned stride = binding->size;
4589 unsigned type_size;
4590 LLVMBuilderRef builder = ctx->builder;
4591 LLVMTypeRef type;
4592
4593 assert(base_index < layout->binding_count);
4594
4595 switch (desc_type) {
4596 case AC_DESC_IMAGE:
4597 type = ctx->ac.v8i32;
4598 type_size = 32;
4599 break;
4600 case AC_DESC_FMASK:
4601 type = ctx->ac.v8i32;
4602 offset += 32;
4603 type_size = 32;
4604 break;
4605 case AC_DESC_SAMPLER:
4606 type = ctx->ac.v4i32;
4607 if (binding->type == VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER)
4608 offset += 64;
4609
4610 type_size = 16;
4611 break;
4612 case AC_DESC_BUFFER:
4613 type = ctx->ac.v4i32;
4614 type_size = 16;
4615 break;
4616 default:
4617 unreachable("invalid desc_type\n");
4618 }
4619
4620 offset += constant_index * stride;
4621
4622 if (desc_type == AC_DESC_SAMPLER && binding->immutable_samplers_offset &&
4623 (!index || binding->immutable_samplers_equal)) {
4624 if (binding->immutable_samplers_equal)
4625 constant_index = 0;
4626
4627 const uint32_t *samplers = radv_immutable_samplers(layout, binding);
4628
4629 LLVMValueRef constants[] = {
4630 LLVMConstInt(ctx->ac.i32, samplers[constant_index * 4 + 0], 0),
4631 LLVMConstInt(ctx->ac.i32, samplers[constant_index * 4 + 1], 0),
4632 LLVMConstInt(ctx->ac.i32, samplers[constant_index * 4 + 2], 0),
4633 LLVMConstInt(ctx->ac.i32, samplers[constant_index * 4 + 3], 0),
4634 };
4635 return ac_build_gather_values(&ctx->ac, constants, 4);
4636 }
4637
4638 assert(stride % type_size == 0);
4639
4640 if (!index)
4641 index = ctx->ac.i32_0;
4642
4643 index = LLVMBuildMul(builder, index, LLVMConstInt(ctx->ac.i32, stride / type_size, 0), "");
4644
4645 list = ac_build_gep0(&ctx->ac, list, LLVMConstInt(ctx->ac.i32, offset, 0));
4646 list = LLVMBuildPointerCast(builder, list, const_array(type, 0), "");
4647
4648 return ac_build_load_to_sgpr(&ctx->ac, list, index);
4649 }
4650
4651 static LLVMValueRef get_sampler_desc(struct ac_nir_context *ctx,
4652 const nir_deref_var *deref,
4653 enum ac_descriptor_type desc_type,
4654 const nir_tex_instr *tex_instr,
4655 bool image, bool write)
4656 {
4657 LLVMValueRef index = NULL;
4658 unsigned constant_index = 0;
4659 unsigned descriptor_set;
4660 unsigned base_index;
4661
4662 if (!deref) {
4663 assert(tex_instr && !image);
4664 descriptor_set = 0;
4665 base_index = tex_instr->sampler_index;
4666 } else {
4667 const nir_deref *tail = &deref->deref;
4668 while (tail->child) {
4669 const nir_deref_array *child = nir_deref_as_array(tail->child);
4670 unsigned array_size = glsl_get_aoa_size(tail->child->type);
4671
4672 if (!array_size)
4673 array_size = 1;
4674
4675 assert(child->deref_array_type != nir_deref_array_type_wildcard);
4676
4677 if (child->deref_array_type == nir_deref_array_type_indirect) {
4678 LLVMValueRef indirect = get_src(ctx, child->indirect);
4679
4680 indirect = LLVMBuildMul(ctx->ac.builder, indirect,
4681 LLVMConstInt(ctx->ac.i32, array_size, false), "");
4682
4683 if (!index)
4684 index = indirect;
4685 else
4686 index = LLVMBuildAdd(ctx->ac.builder, index, indirect, "");
4687 }
4688
4689 constant_index += child->base_offset * array_size;
4690
4691 tail = &child->deref;
4692 }
4693 descriptor_set = deref->var->data.descriptor_set;
4694 base_index = deref->var->data.binding;
4695 }
4696
4697 return ctx->abi->load_sampler_desc(ctx->abi,
4698 descriptor_set,
4699 base_index,
4700 constant_index, index,
4701 desc_type, image, write);
4702 }
4703
4704 static void set_tex_fetch_args(struct ac_llvm_context *ctx,
4705 struct ac_image_args *args,
4706 const nir_tex_instr *instr,
4707 nir_texop op,
4708 LLVMValueRef res_ptr, LLVMValueRef samp_ptr,
4709 LLVMValueRef *param, unsigned count,
4710 unsigned dmask)
4711 {
4712 unsigned is_rect = 0;
4713 bool da = instr->is_array || instr->sampler_dim == GLSL_SAMPLER_DIM_CUBE;
4714
4715 if (op == nir_texop_lod)
4716 da = false;
4717 /* Pad to power of two vector */
4718 while (count < util_next_power_of_two(count))
4719 param[count++] = LLVMGetUndef(ctx->i32);
4720
4721 if (count > 1)
4722 args->addr = ac_build_gather_values(ctx, param, count);
4723 else
4724 args->addr = param[0];
4725
4726 args->resource = res_ptr;
4727 args->sampler = samp_ptr;
4728
4729 if (instr->sampler_dim == GLSL_SAMPLER_DIM_BUF && op == nir_texop_txf) {
4730 args->addr = param[0];
4731 return;
4732 }
4733
4734 args->dmask = dmask;
4735 args->unorm = is_rect;
4736 args->da = da;
4737 }
4738
4739 /* Disable anisotropic filtering if BASE_LEVEL == LAST_LEVEL.
4740 *
4741 * SI-CI:
4742 * If BASE_LEVEL == LAST_LEVEL, the shader must disable anisotropic
4743 * filtering manually. The driver sets img7 to a mask clearing
4744 * MAX_ANISO_RATIO if BASE_LEVEL == LAST_LEVEL. The shader must do:
4745 * s_and_b32 samp0, samp0, img7
4746 *
4747 * VI:
4748 * The ANISO_OVERRIDE sampler field enables this fix in TA.
4749 */
4750 static LLVMValueRef sici_fix_sampler_aniso(struct ac_nir_context *ctx,
4751 LLVMValueRef res, LLVMValueRef samp)
4752 {
4753 LLVMBuilderRef builder = ctx->ac.builder;
4754 LLVMValueRef img7, samp0;
4755
4756 if (ctx->ac.chip_class >= VI)
4757 return samp;
4758
4759 img7 = LLVMBuildExtractElement(builder, res,
4760 LLVMConstInt(ctx->ac.i32, 7, 0), "");
4761 samp0 = LLVMBuildExtractElement(builder, samp,
4762 LLVMConstInt(ctx->ac.i32, 0, 0), "");
4763 samp0 = LLVMBuildAnd(builder, samp0, img7, "");
4764 return LLVMBuildInsertElement(builder, samp, samp0,
4765 LLVMConstInt(ctx->ac.i32, 0, 0), "");
4766 }
4767
4768 static void tex_fetch_ptrs(struct ac_nir_context *ctx,
4769 nir_tex_instr *instr,
4770 LLVMValueRef *res_ptr, LLVMValueRef *samp_ptr,
4771 LLVMValueRef *fmask_ptr)
4772 {
4773 if (instr->sampler_dim == GLSL_SAMPLER_DIM_BUF)
4774 *res_ptr = get_sampler_desc(ctx, instr->texture, AC_DESC_BUFFER, instr, false, false);
4775 else
4776 *res_ptr = get_sampler_desc(ctx, instr->texture, AC_DESC_IMAGE, instr, false, false);
4777 if (samp_ptr) {
4778 if (instr->sampler)
4779 *samp_ptr = get_sampler_desc(ctx, instr->sampler, AC_DESC_SAMPLER, instr, false, false);
4780 else
4781 *samp_ptr = get_sampler_desc(ctx, instr->texture, AC_DESC_SAMPLER, instr, false, false);
4782 if (instr->sampler_dim < GLSL_SAMPLER_DIM_RECT)
4783 *samp_ptr = sici_fix_sampler_aniso(ctx, *res_ptr, *samp_ptr);
4784 }
4785 if (fmask_ptr && !instr->sampler && (instr->op == nir_texop_txf_ms ||
4786 instr->op == nir_texop_samples_identical))
4787 *fmask_ptr = get_sampler_desc(ctx, instr->texture, AC_DESC_FMASK, instr, false, false);
4788 }
4789
4790 static LLVMValueRef apply_round_slice(struct ac_llvm_context *ctx,
4791 LLVMValueRef coord)
4792 {
4793 coord = ac_to_float(ctx, coord);
4794 coord = ac_build_intrinsic(ctx, "llvm.rint.f32", ctx->f32, &coord, 1, 0);
4795 coord = ac_to_integer(ctx, coord);
4796 return coord;
4797 }
4798
4799 static void visit_tex(struct ac_nir_context *ctx, nir_tex_instr *instr)
4800 {
4801 LLVMValueRef result = NULL;
4802 struct ac_image_args args = { 0 };
4803 unsigned dmask = 0xf;
4804 LLVMValueRef address[16];
4805 LLVMValueRef coords[5];
4806 LLVMValueRef coord = NULL, lod = NULL, comparator = NULL;
4807 LLVMValueRef bias = NULL, offsets = NULL;
4808 LLVMValueRef res_ptr, samp_ptr, fmask_ptr = NULL, sample_index = NULL;
4809 LLVMValueRef ddx = NULL, ddy = NULL;
4810 LLVMValueRef derivs[6];
4811 unsigned chan, count = 0;
4812 unsigned const_src = 0, num_deriv_comp = 0;
4813 bool lod_is_zero = false;
4814
4815 tex_fetch_ptrs(ctx, instr, &res_ptr, &samp_ptr, &fmask_ptr);
4816
4817 for (unsigned i = 0; i < instr->num_srcs; i++) {
4818 switch (instr->src[i].src_type) {
4819 case nir_tex_src_coord:
4820 coord = get_src(ctx, instr->src[i].src);
4821 break;
4822 case nir_tex_src_projector:
4823 break;
4824 case nir_tex_src_comparator:
4825 comparator = get_src(ctx, instr->src[i].src);
4826 break;
4827 case nir_tex_src_offset:
4828 offsets = get_src(ctx, instr->src[i].src);
4829 const_src = i;
4830 break;
4831 case nir_tex_src_bias:
4832 bias = get_src(ctx, instr->src[i].src);
4833 break;
4834 case nir_tex_src_lod: {
4835 nir_const_value *val = nir_src_as_const_value(instr->src[i].src);
4836
4837 if (val && val->i32[0] == 0)
4838 lod_is_zero = true;
4839 lod = get_src(ctx, instr->src[i].src);
4840 break;
4841 }
4842 case nir_tex_src_ms_index:
4843 sample_index = get_src(ctx, instr->src[i].src);
4844 break;
4845 case nir_tex_src_ms_mcs:
4846 break;
4847 case nir_tex_src_ddx:
4848 ddx = get_src(ctx, instr->src[i].src);
4849 num_deriv_comp = instr->src[i].src.ssa->num_components;
4850 break;
4851 case nir_tex_src_ddy:
4852 ddy = get_src(ctx, instr->src[i].src);
4853 break;
4854 case nir_tex_src_texture_offset:
4855 case nir_tex_src_sampler_offset:
4856 case nir_tex_src_plane:
4857 default:
4858 break;
4859 }
4860 }
4861
4862 if (instr->op == nir_texop_txs && instr->sampler_dim == GLSL_SAMPLER_DIM_BUF) {
4863 result = get_buffer_size(ctx, res_ptr, true);
4864 goto write_result;
4865 }
4866
4867 if (instr->op == nir_texop_texture_samples) {
4868 LLVMValueRef res, samples, is_msaa;
4869 res = LLVMBuildBitCast(ctx->ac.builder, res_ptr, ctx->ac.v8i32, "");
4870 samples = LLVMBuildExtractElement(ctx->ac.builder, res,
4871 LLVMConstInt(ctx->ac.i32, 3, false), "");
4872 is_msaa = LLVMBuildLShr(ctx->ac.builder, samples,
4873 LLVMConstInt(ctx->ac.i32, 28, false), "");
4874 is_msaa = LLVMBuildAnd(ctx->ac.builder, is_msaa,
4875 LLVMConstInt(ctx->ac.i32, 0xe, false), "");
4876 is_msaa = LLVMBuildICmp(ctx->ac.builder, LLVMIntEQ, is_msaa,
4877 LLVMConstInt(ctx->ac.i32, 0xe, false), "");
4878
4879 samples = LLVMBuildLShr(ctx->ac.builder, samples,
4880 LLVMConstInt(ctx->ac.i32, 16, false), "");
4881 samples = LLVMBuildAnd(ctx->ac.builder, samples,
4882 LLVMConstInt(ctx->ac.i32, 0xf, false), "");
4883 samples = LLVMBuildShl(ctx->ac.builder, ctx->ac.i32_1,
4884 samples, "");
4885 samples = LLVMBuildSelect(ctx->ac.builder, is_msaa, samples,
4886 ctx->ac.i32_1, "");
4887 result = samples;
4888 goto write_result;
4889 }
4890
4891 if (coord)
4892 for (chan = 0; chan < instr->coord_components; chan++)
4893 coords[chan] = ac_llvm_extract_elem(&ctx->ac, coord, chan);
4894
4895 if (offsets && instr->op != nir_texop_txf) {
4896 LLVMValueRef offset[3], pack;
4897 for (chan = 0; chan < 3; ++chan)
4898 offset[chan] = ctx->ac.i32_0;
4899
4900 args.offset = true;
4901 for (chan = 0; chan < ac_get_llvm_num_components(offsets); chan++) {
4902 offset[chan] = ac_llvm_extract_elem(&ctx->ac, offsets, chan);
4903 offset[chan] = LLVMBuildAnd(ctx->ac.builder, offset[chan],
4904 LLVMConstInt(ctx->ac.i32, 0x3f, false), "");
4905 if (chan)
4906 offset[chan] = LLVMBuildShl(ctx->ac.builder, offset[chan],
4907 LLVMConstInt(ctx->ac.i32, chan * 8, false), "");
4908 }
4909 pack = LLVMBuildOr(ctx->ac.builder, offset[0], offset[1], "");
4910 pack = LLVMBuildOr(ctx->ac.builder, pack, offset[2], "");
4911 address[count++] = pack;
4912
4913 }
4914 /* pack LOD bias value */
4915 if (instr->op == nir_texop_txb && bias) {
4916 address[count++] = bias;
4917 }
4918
4919 /* Pack depth comparison value */
4920 if (instr->is_shadow && comparator) {
4921 LLVMValueRef z = ac_to_float(&ctx->ac,
4922 ac_llvm_extract_elem(&ctx->ac, comparator, 0));
4923
4924 /* TC-compatible HTILE on radeonsi promotes Z16 and Z24 to Z32_FLOAT,
4925 * so the depth comparison value isn't clamped for Z16 and
4926 * Z24 anymore. Do it manually here.
4927 *
4928 * It's unnecessary if the original texture format was
4929 * Z32_FLOAT, but we don't know that here.
4930 */
4931 if (ctx->ac.chip_class == VI && ctx->abi->clamp_shadow_reference)
4932 z = ac_build_clamp(&ctx->ac, z);
4933
4934 address[count++] = z;
4935 }
4936
4937 /* pack derivatives */
4938 if (ddx || ddy) {
4939 int num_src_deriv_channels, num_dest_deriv_channels;
4940 switch (instr->sampler_dim) {
4941 case GLSL_SAMPLER_DIM_3D:
4942 case GLSL_SAMPLER_DIM_CUBE:
4943 num_deriv_comp = 3;
4944 num_src_deriv_channels = 3;
4945 num_dest_deriv_channels = 3;
4946 break;
4947 case GLSL_SAMPLER_DIM_2D:
4948 default:
4949 num_src_deriv_channels = 2;
4950 num_dest_deriv_channels = 2;
4951 num_deriv_comp = 2;
4952 break;
4953 case GLSL_SAMPLER_DIM_1D:
4954 num_src_deriv_channels = 1;
4955 if (ctx->ac.chip_class >= GFX9) {
4956 num_dest_deriv_channels = 2;
4957 num_deriv_comp = 2;
4958 } else {
4959 num_dest_deriv_channels = 1;
4960 num_deriv_comp = 1;
4961 }
4962 break;
4963 }
4964
4965 for (unsigned i = 0; i < num_src_deriv_channels; i++) {
4966 derivs[i] = ac_to_float(&ctx->ac, ac_llvm_extract_elem(&ctx->ac, ddx, i));
4967 derivs[num_dest_deriv_channels + i] = ac_to_float(&ctx->ac, ac_llvm_extract_elem(&ctx->ac, ddy, i));
4968 }
4969 for (unsigned i = num_src_deriv_channels; i < num_dest_deriv_channels; i++) {
4970 derivs[i] = ctx->ac.f32_0;
4971 derivs[num_dest_deriv_channels + i] = ctx->ac.f32_0;
4972 }
4973 }
4974
4975 if (instr->sampler_dim == GLSL_SAMPLER_DIM_CUBE && coord) {
4976 for (chan = 0; chan < instr->coord_components; chan++)
4977 coords[chan] = ac_to_float(&ctx->ac, coords[chan]);
4978 if (instr->coord_components == 3)
4979 coords[3] = LLVMGetUndef(ctx->ac.f32);
4980 ac_prepare_cube_coords(&ctx->ac,
4981 instr->op == nir_texop_txd, instr->is_array,
4982 instr->op == nir_texop_lod, coords, derivs);
4983 if (num_deriv_comp)
4984 num_deriv_comp--;
4985 }
4986
4987 if (ddx || ddy) {
4988 for (unsigned i = 0; i < num_deriv_comp * 2; i++)
4989 address[count++] = derivs[i];
4990 }
4991
4992 /* Pack texture coordinates */
4993 if (coord) {
4994 address[count++] = coords[0];
4995 if (instr->coord_components > 1) {
4996 if (instr->sampler_dim == GLSL_SAMPLER_DIM_1D && instr->is_array && instr->op != nir_texop_txf) {
4997 coords[1] = apply_round_slice(&ctx->ac, coords[1]);
4998 }
4999 address[count++] = coords[1];
5000 }
5001 if (instr->coord_components > 2) {
5002 /* This seems like a bit of a hack - but it passes Vulkan CTS with it */
5003 if (instr->sampler_dim != GLSL_SAMPLER_DIM_3D &&
5004 instr->sampler_dim != GLSL_SAMPLER_DIM_CUBE &&
5005 instr->op != nir_texop_txf) {
5006 coords[2] = apply_round_slice(&ctx->ac, coords[2]);
5007 }
5008 address[count++] = coords[2];
5009 }
5010
5011 if (ctx->ac.chip_class >= GFX9) {
5012 LLVMValueRef filler;
5013 if (instr->op == nir_texop_txf)
5014 filler = ctx->ac.i32_0;
5015 else
5016 filler = LLVMConstReal(ctx->ac.f32, 0.5);
5017
5018 if (instr->sampler_dim == GLSL_SAMPLER_DIM_1D) {
5019 /* No nir_texop_lod, because it does not take a slice
5020 * even with array textures. */
5021 if (instr->is_array && instr->op != nir_texop_lod ) {
5022 address[count] = address[count - 1];
5023 address[count - 1] = filler;
5024 count++;
5025 } else
5026 address[count++] = filler;
5027 }
5028 }
5029 }
5030
5031 /* Pack LOD */
5032 if (lod && ((instr->op == nir_texop_txl && !lod_is_zero) ||
5033 instr->op == nir_texop_txf)) {
5034 address[count++] = lod;
5035 } else if (instr->op == nir_texop_txf_ms && sample_index) {
5036 address[count++] = sample_index;
5037 } else if(instr->op == nir_texop_txs) {
5038 count = 0;
5039 if (lod)
5040 address[count++] = lod;
5041 else
5042 address[count++] = ctx->ac.i32_0;
5043 }
5044
5045 for (chan = 0; chan < count; chan++) {
5046 address[chan] = LLVMBuildBitCast(ctx->ac.builder,
5047 address[chan], ctx->ac.i32, "");
5048 }
5049
5050 if (instr->op == nir_texop_samples_identical) {
5051 LLVMValueRef txf_address[4];
5052 struct ac_image_args txf_args = { 0 };
5053 unsigned txf_count = count;
5054 memcpy(txf_address, address, sizeof(txf_address));
5055
5056 if (!instr->is_array)
5057 txf_address[2] = ctx->ac.i32_0;
5058 txf_address[3] = ctx->ac.i32_0;
5059
5060 set_tex_fetch_args(&ctx->ac, &txf_args, instr, nir_texop_txf,
5061 fmask_ptr, NULL,
5062 txf_address, txf_count, 0xf);
5063
5064 result = build_tex_intrinsic(ctx, instr, false, &txf_args);
5065
5066 result = LLVMBuildExtractElement(ctx->ac.builder, result, ctx->ac.i32_0, "");
5067 result = emit_int_cmp(&ctx->ac, LLVMIntEQ, result, ctx->ac.i32_0);
5068 goto write_result;
5069 }
5070
5071 if (instr->sampler_dim == GLSL_SAMPLER_DIM_MS &&
5072 instr->op != nir_texop_txs) {
5073 unsigned sample_chan = instr->is_array ? 3 : 2;
5074 address[sample_chan] = adjust_sample_index_using_fmask(&ctx->ac,
5075 address[0],
5076 address[1],
5077 instr->is_array ? address[2] : NULL,
5078 address[sample_chan],
5079 fmask_ptr);
5080 }
5081
5082 if (offsets && instr->op == nir_texop_txf) {
5083 nir_const_value *const_offset =
5084 nir_src_as_const_value(instr->src[const_src].src);
5085 int num_offsets = instr->src[const_src].src.ssa->num_components;
5086 assert(const_offset);
5087 num_offsets = MIN2(num_offsets, instr->coord_components);
5088 if (num_offsets > 2)
5089 address[2] = LLVMBuildAdd(ctx->ac.builder,
5090 address[2], LLVMConstInt(ctx->ac.i32, const_offset->i32[2], false), "");
5091 if (num_offsets > 1)
5092 address[1] = LLVMBuildAdd(ctx->ac.builder,
5093 address[1], LLVMConstInt(ctx->ac.i32, const_offset->i32[1], false), "");
5094 address[0] = LLVMBuildAdd(ctx->ac.builder,
5095 address[0], LLVMConstInt(ctx->ac.i32, const_offset->i32[0], false), "");
5096
5097 }
5098
5099 /* TODO TG4 support */
5100 if (instr->op == nir_texop_tg4) {
5101 if (instr->is_shadow)
5102 dmask = 1;
5103 else
5104 dmask = 1 << instr->component;
5105 }
5106 set_tex_fetch_args(&ctx->ac, &args, instr, instr->op,
5107 res_ptr, samp_ptr, address, count, dmask);
5108
5109 result = build_tex_intrinsic(ctx, instr, lod_is_zero, &args);
5110
5111 if (instr->op == nir_texop_query_levels)
5112 result = LLVMBuildExtractElement(ctx->ac.builder, result, LLVMConstInt(ctx->ac.i32, 3, false), "");
5113 else if (instr->is_shadow && instr->is_new_style_shadow &&
5114 instr->op != nir_texop_txs && instr->op != nir_texop_lod &&
5115 instr->op != nir_texop_tg4)
5116 result = LLVMBuildExtractElement(ctx->ac.builder, result, ctx->ac.i32_0, "");
5117 else if (instr->op == nir_texop_txs &&
5118 instr->sampler_dim == GLSL_SAMPLER_DIM_CUBE &&
5119 instr->is_array) {
5120 LLVMValueRef two = LLVMConstInt(ctx->ac.i32, 2, false);
5121 LLVMValueRef six = LLVMConstInt(ctx->ac.i32, 6, false);
5122 LLVMValueRef z = LLVMBuildExtractElement(ctx->ac.builder, result, two, "");
5123 z = LLVMBuildSDiv(ctx->ac.builder, z, six, "");
5124 result = LLVMBuildInsertElement(ctx->ac.builder, result, z, two, "");
5125 } else if (ctx->ac.chip_class >= GFX9 &&
5126 instr->op == nir_texop_txs &&
5127 instr->sampler_dim == GLSL_SAMPLER_DIM_1D &&
5128 instr->is_array) {
5129 LLVMValueRef two = LLVMConstInt(ctx->ac.i32, 2, false);
5130 LLVMValueRef layers = LLVMBuildExtractElement(ctx->ac.builder, result, two, "");
5131 result = LLVMBuildInsertElement(ctx->ac.builder, result, layers,
5132 ctx->ac.i32_1, "");
5133 } else if (instr->dest.ssa.num_components != 4)
5134 result = trim_vector(&ctx->ac, result, instr->dest.ssa.num_components);
5135
5136 write_result:
5137 if (result) {
5138 assert(instr->dest.is_ssa);
5139 result = ac_to_integer(&ctx->ac, result);
5140 _mesa_hash_table_insert(ctx->defs, &instr->dest.ssa, result);
5141 }
5142 }
5143
5144
5145 static void visit_phi(struct ac_nir_context *ctx, nir_phi_instr *instr)
5146 {
5147 LLVMTypeRef type = get_def_type(ctx, &instr->dest.ssa);
5148 LLVMValueRef result = LLVMBuildPhi(ctx->ac.builder, type, "");
5149
5150 _mesa_hash_table_insert(ctx->defs, &instr->dest.ssa, result);
5151 _mesa_hash_table_insert(ctx->phis, instr, result);
5152 }
5153
5154 static void visit_post_phi(struct ac_nir_context *ctx,
5155 nir_phi_instr *instr,
5156 LLVMValueRef llvm_phi)
5157 {
5158 nir_foreach_phi_src(src, instr) {
5159 LLVMBasicBlockRef block = get_block(ctx, src->pred);
5160 LLVMValueRef llvm_src = get_src(ctx, src->src);
5161
5162 LLVMAddIncoming(llvm_phi, &llvm_src, &block, 1);
5163 }
5164 }
5165
5166 static void phi_post_pass(struct ac_nir_context *ctx)
5167 {
5168 struct hash_entry *entry;
5169 hash_table_foreach(ctx->phis, entry) {
5170 visit_post_phi(ctx, (nir_phi_instr*)entry->key,
5171 (LLVMValueRef)entry->data);
5172 }
5173 }
5174
5175
5176 static void visit_ssa_undef(struct ac_nir_context *ctx,
5177 const nir_ssa_undef_instr *instr)
5178 {
5179 unsigned num_components = instr->def.num_components;
5180 LLVMTypeRef type = LLVMIntTypeInContext(ctx->ac.context, instr->def.bit_size);
5181 LLVMValueRef undef;
5182
5183 if (num_components == 1)
5184 undef = LLVMGetUndef(type);
5185 else {
5186 undef = LLVMGetUndef(LLVMVectorType(type, num_components));
5187 }
5188 _mesa_hash_table_insert(ctx->defs, &instr->def, undef);
5189 }
5190
5191 static void visit_jump(struct ac_nir_context *ctx,
5192 const nir_jump_instr *instr)
5193 {
5194 switch (instr->type) {
5195 case nir_jump_break:
5196 LLVMBuildBr(ctx->ac.builder, ctx->break_block);
5197 LLVMClearInsertionPosition(ctx->ac.builder);
5198 break;
5199 case nir_jump_continue:
5200 LLVMBuildBr(ctx->ac.builder, ctx->continue_block);
5201 LLVMClearInsertionPosition(ctx->ac.builder);
5202 break;
5203 default:
5204 fprintf(stderr, "Unknown NIR jump instr: ");
5205 nir_print_instr(&instr->instr, stderr);
5206 fprintf(stderr, "\n");
5207 abort();
5208 }
5209 }
5210
5211 static void visit_cf_list(struct ac_nir_context *ctx,
5212 struct exec_list *list);
5213
5214 static void visit_block(struct ac_nir_context *ctx, nir_block *block)
5215 {
5216 LLVMBasicBlockRef llvm_block = LLVMGetInsertBlock(ctx->ac.builder);
5217 nir_foreach_instr(instr, block)
5218 {
5219 switch (instr->type) {
5220 case nir_instr_type_alu:
5221 visit_alu(ctx, nir_instr_as_alu(instr));
5222 break;
5223 case nir_instr_type_load_const:
5224 visit_load_const(ctx, nir_instr_as_load_const(instr));
5225 break;
5226 case nir_instr_type_intrinsic:
5227 visit_intrinsic(ctx, nir_instr_as_intrinsic(instr));
5228 break;
5229 case nir_instr_type_tex:
5230 visit_tex(ctx, nir_instr_as_tex(instr));
5231 break;
5232 case nir_instr_type_phi:
5233 visit_phi(ctx, nir_instr_as_phi(instr));
5234 break;
5235 case nir_instr_type_ssa_undef:
5236 visit_ssa_undef(ctx, nir_instr_as_ssa_undef(instr));
5237 break;
5238 case nir_instr_type_jump:
5239 visit_jump(ctx, nir_instr_as_jump(instr));
5240 break;
5241 default:
5242 fprintf(stderr, "Unknown NIR instr type: ");
5243 nir_print_instr(instr, stderr);
5244 fprintf(stderr, "\n");
5245 abort();
5246 }
5247 }
5248
5249 _mesa_hash_table_insert(ctx->defs, block, llvm_block);
5250 }
5251
5252 static void visit_if(struct ac_nir_context *ctx, nir_if *if_stmt)
5253 {
5254 LLVMValueRef value = get_src(ctx, if_stmt->condition);
5255
5256 LLVMValueRef fn = LLVMGetBasicBlockParent(LLVMGetInsertBlock(ctx->ac.builder));
5257 LLVMBasicBlockRef merge_block =
5258 LLVMAppendBasicBlockInContext(ctx->ac.context, fn, "");
5259 LLVMBasicBlockRef if_block =
5260 LLVMAppendBasicBlockInContext(ctx->ac.context, fn, "");
5261 LLVMBasicBlockRef else_block = merge_block;
5262 if (!exec_list_is_empty(&if_stmt->else_list))
5263 else_block = LLVMAppendBasicBlockInContext(
5264 ctx->ac.context, fn, "");
5265
5266 LLVMValueRef cond = LLVMBuildICmp(ctx->ac.builder, LLVMIntNE, value,
5267 ctx->ac.i32_0, "");
5268 LLVMBuildCondBr(ctx->ac.builder, cond, if_block, else_block);
5269
5270 LLVMPositionBuilderAtEnd(ctx->ac.builder, if_block);
5271 visit_cf_list(ctx, &if_stmt->then_list);
5272 if (LLVMGetInsertBlock(ctx->ac.builder))
5273 LLVMBuildBr(ctx->ac.builder, merge_block);
5274
5275 if (!exec_list_is_empty(&if_stmt->else_list)) {
5276 LLVMPositionBuilderAtEnd(ctx->ac.builder, else_block);
5277 visit_cf_list(ctx, &if_stmt->else_list);
5278 if (LLVMGetInsertBlock(ctx->ac.builder))
5279 LLVMBuildBr(ctx->ac.builder, merge_block);
5280 }
5281
5282 LLVMPositionBuilderAtEnd(ctx->ac.builder, merge_block);
5283 }
5284
5285 static void visit_loop(struct ac_nir_context *ctx, nir_loop *loop)
5286 {
5287 LLVMValueRef fn = LLVMGetBasicBlockParent(LLVMGetInsertBlock(ctx->ac.builder));
5288 LLVMBasicBlockRef continue_parent = ctx->continue_block;
5289 LLVMBasicBlockRef break_parent = ctx->break_block;
5290
5291 ctx->continue_block =
5292 LLVMAppendBasicBlockInContext(ctx->ac.context, fn, "");
5293 ctx->break_block =
5294 LLVMAppendBasicBlockInContext(ctx->ac.context, fn, "");
5295
5296 LLVMBuildBr(ctx->ac.builder, ctx->continue_block);
5297 LLVMPositionBuilderAtEnd(ctx->ac.builder, ctx->continue_block);
5298 visit_cf_list(ctx, &loop->body);
5299
5300 if (LLVMGetInsertBlock(ctx->ac.builder))
5301 LLVMBuildBr(ctx->ac.builder, ctx->continue_block);
5302 LLVMPositionBuilderAtEnd(ctx->ac.builder, ctx->break_block);
5303
5304 ctx->continue_block = continue_parent;
5305 ctx->break_block = break_parent;
5306 }
5307
5308 static void visit_cf_list(struct ac_nir_context *ctx,
5309 struct exec_list *list)
5310 {
5311 foreach_list_typed(nir_cf_node, node, node, list)
5312 {
5313 switch (node->type) {
5314 case nir_cf_node_block:
5315 visit_block(ctx, nir_cf_node_as_block(node));
5316 break;
5317
5318 case nir_cf_node_if:
5319 visit_if(ctx, nir_cf_node_as_if(node));
5320 break;
5321
5322 case nir_cf_node_loop:
5323 visit_loop(ctx, nir_cf_node_as_loop(node));
5324 break;
5325
5326 default:
5327 assert(0);
5328 }
5329 }
5330 }
5331
5332 static void
5333 handle_vs_input_decl(struct nir_to_llvm_context *ctx,
5334 struct nir_variable *variable)
5335 {
5336 LLVMValueRef t_list_ptr = ctx->vertex_buffers;
5337 LLVMValueRef t_offset;
5338 LLVMValueRef t_list;
5339 LLVMValueRef input;
5340 LLVMValueRef buffer_index;
5341 int index = variable->data.location - VERT_ATTRIB_GENERIC0;
5342 int idx = variable->data.location;
5343 unsigned attrib_count = glsl_count_attribute_slots(variable->type, true);
5344
5345 variable->data.driver_location = idx * 4;
5346
5347 for (unsigned i = 0; i < attrib_count; ++i, ++idx) {
5348 if (ctx->options->key.vs.instance_rate_inputs & (1u << (index + i))) {
5349 buffer_index = LLVMBuildAdd(ctx->builder, ctx->abi.instance_id,
5350 ctx->abi.start_instance, "");
5351 if (ctx->options->key.vs.as_ls) {
5352 ctx->shader_info->vs.vgpr_comp_cnt =
5353 MAX2(2, ctx->shader_info->vs.vgpr_comp_cnt);
5354 } else {
5355 ctx->shader_info->vs.vgpr_comp_cnt =
5356 MAX2(1, ctx->shader_info->vs.vgpr_comp_cnt);
5357 }
5358 } else
5359 buffer_index = LLVMBuildAdd(ctx->builder, ctx->abi.vertex_id,
5360 ctx->abi.base_vertex, "");
5361 t_offset = LLVMConstInt(ctx->ac.i32, index + i, false);
5362
5363 t_list = ac_build_load_to_sgpr(&ctx->ac, t_list_ptr, t_offset);
5364
5365 input = ac_build_buffer_load_format(&ctx->ac, t_list,
5366 buffer_index,
5367 ctx->ac.i32_0,
5368 4, true);
5369
5370 for (unsigned chan = 0; chan < 4; chan++) {
5371 LLVMValueRef llvm_chan = LLVMConstInt(ctx->ac.i32, chan, false);
5372 ctx->inputs[radeon_llvm_reg_index_soa(idx, chan)] =
5373 ac_to_integer(&ctx->ac, LLVMBuildExtractElement(ctx->builder,
5374 input, llvm_chan, ""));
5375 }
5376 }
5377 }
5378
5379 static void interp_fs_input(struct nir_to_llvm_context *ctx,
5380 unsigned attr,
5381 LLVMValueRef interp_param,
5382 LLVMValueRef prim_mask,
5383 LLVMValueRef result[4])
5384 {
5385 LLVMValueRef attr_number;
5386 unsigned chan;
5387 LLVMValueRef i, j;
5388 bool interp = interp_param != NULL;
5389
5390 attr_number = LLVMConstInt(ctx->ac.i32, attr, false);
5391
5392 /* fs.constant returns the param from the middle vertex, so it's not
5393 * really useful for flat shading. It's meant to be used for custom
5394 * interpolation (but the intrinsic can't fetch from the other two
5395 * vertices).
5396 *
5397 * Luckily, it doesn't matter, because we rely on the FLAT_SHADE state
5398 * to do the right thing. The only reason we use fs.constant is that
5399 * fs.interp cannot be used on integers, because they can be equal
5400 * to NaN.
5401 */
5402 if (interp) {
5403 interp_param = LLVMBuildBitCast(ctx->builder, interp_param,
5404 ctx->ac.v2f32, "");
5405
5406 i = LLVMBuildExtractElement(ctx->builder, interp_param,
5407 ctx->ac.i32_0, "");
5408 j = LLVMBuildExtractElement(ctx->builder, interp_param,
5409 ctx->ac.i32_1, "");
5410 }
5411
5412 for (chan = 0; chan < 4; chan++) {
5413 LLVMValueRef llvm_chan = LLVMConstInt(ctx->ac.i32, chan, false);
5414
5415 if (interp) {
5416 result[chan] = ac_build_fs_interp(&ctx->ac,
5417 llvm_chan,
5418 attr_number,
5419 prim_mask, i, j);
5420 } else {
5421 result[chan] = ac_build_fs_interp_mov(&ctx->ac,
5422 LLVMConstInt(ctx->ac.i32, 2, false),
5423 llvm_chan,
5424 attr_number,
5425 prim_mask);
5426 }
5427 }
5428 }
5429
5430 static void
5431 handle_fs_input_decl(struct nir_to_llvm_context *ctx,
5432 struct nir_variable *variable)
5433 {
5434 int idx = variable->data.location;
5435 unsigned attrib_count = glsl_count_attribute_slots(variable->type, false);
5436 LLVMValueRef interp;
5437
5438 variable->data.driver_location = idx * 4;
5439 ctx->input_mask |= ((1ull << attrib_count) - 1) << variable->data.location;
5440
5441 if (glsl_get_base_type(glsl_without_array(variable->type)) == GLSL_TYPE_FLOAT) {
5442 unsigned interp_type;
5443 if (variable->data.sample) {
5444 interp_type = INTERP_SAMPLE;
5445 ctx->shader_info->info.ps.force_persample = true;
5446 } else if (variable->data.centroid)
5447 interp_type = INTERP_CENTROID;
5448 else
5449 interp_type = INTERP_CENTER;
5450
5451 interp = lookup_interp_param(ctx, variable->data.interpolation, interp_type);
5452 } else
5453 interp = NULL;
5454
5455 for (unsigned i = 0; i < attrib_count; ++i)
5456 ctx->inputs[radeon_llvm_reg_index_soa(idx + i, 0)] = interp;
5457
5458 }
5459
5460 static void
5461 handle_vs_inputs(struct nir_to_llvm_context *ctx,
5462 struct nir_shader *nir) {
5463 nir_foreach_variable(variable, &nir->inputs)
5464 handle_vs_input_decl(ctx, variable);
5465 }
5466
5467 static void
5468 prepare_interp_optimize(struct nir_to_llvm_context *ctx,
5469 struct nir_shader *nir)
5470 {
5471 if (!ctx->options->key.fs.multisample)
5472 return;
5473
5474 bool uses_center = false;
5475 bool uses_centroid = false;
5476 nir_foreach_variable(variable, &nir->inputs) {
5477 if (glsl_get_base_type(glsl_without_array(variable->type)) != GLSL_TYPE_FLOAT ||
5478 variable->data.sample)
5479 continue;
5480
5481 if (variable->data.centroid)
5482 uses_centroid = true;
5483 else
5484 uses_center = true;
5485 }
5486
5487 if (uses_center && uses_centroid) {
5488 LLVMValueRef sel = LLVMBuildICmp(ctx->builder, LLVMIntSLT, ctx->prim_mask, ctx->ac.i32_0, "");
5489 ctx->persp_centroid = LLVMBuildSelect(ctx->builder, sel, ctx->persp_center, ctx->persp_centroid, "");
5490 ctx->linear_centroid = LLVMBuildSelect(ctx->builder, sel, ctx->linear_center, ctx->linear_centroid, "");
5491 }
5492 }
5493
5494 static void
5495 handle_fs_inputs(struct nir_to_llvm_context *ctx,
5496 struct nir_shader *nir)
5497 {
5498 prepare_interp_optimize(ctx, nir);
5499
5500 nir_foreach_variable(variable, &nir->inputs)
5501 handle_fs_input_decl(ctx, variable);
5502
5503 unsigned index = 0;
5504
5505 if (ctx->shader_info->info.ps.uses_input_attachments ||
5506 ctx->shader_info->info.needs_multiview_view_index)
5507 ctx->input_mask |= 1ull << VARYING_SLOT_LAYER;
5508
5509 for (unsigned i = 0; i < RADEON_LLVM_MAX_INPUTS; ++i) {
5510 LLVMValueRef interp_param;
5511 LLVMValueRef *inputs = ctx->inputs +radeon_llvm_reg_index_soa(i, 0);
5512
5513 if (!(ctx->input_mask & (1ull << i)))
5514 continue;
5515
5516 if (i >= VARYING_SLOT_VAR0 || i == VARYING_SLOT_PNTC ||
5517 i == VARYING_SLOT_PRIMITIVE_ID || i == VARYING_SLOT_LAYER) {
5518 interp_param = *inputs;
5519 interp_fs_input(ctx, index, interp_param, ctx->prim_mask,
5520 inputs);
5521
5522 if (!interp_param)
5523 ctx->shader_info->fs.flat_shaded_mask |= 1u << index;
5524 ++index;
5525 } else if (i == VARYING_SLOT_POS) {
5526 for(int i = 0; i < 3; ++i)
5527 inputs[i] = ctx->abi.frag_pos[i];
5528
5529 inputs[3] = ac_build_fdiv(&ctx->ac, ctx->ac.f32_1,
5530 ctx->abi.frag_pos[3]);
5531 }
5532 }
5533 ctx->shader_info->fs.num_interp = index;
5534 if (ctx->input_mask & (1 << VARYING_SLOT_PNTC))
5535 ctx->shader_info->fs.has_pcoord = true;
5536 if (ctx->input_mask & (1 << VARYING_SLOT_PRIMITIVE_ID))
5537 ctx->shader_info->fs.prim_id_input = true;
5538 if (ctx->input_mask & (1 << VARYING_SLOT_LAYER))
5539 ctx->shader_info->fs.layer_input = true;
5540 ctx->shader_info->fs.input_mask = ctx->input_mask >> VARYING_SLOT_VAR0;
5541
5542 if (ctx->shader_info->info.needs_multiview_view_index)
5543 ctx->view_index = ctx->inputs[radeon_llvm_reg_index_soa(VARYING_SLOT_LAYER, 0)];
5544 }
5545
5546 static LLVMValueRef
5547 ac_build_alloca(struct ac_llvm_context *ac,
5548 LLVMTypeRef type,
5549 const char *name)
5550 {
5551 LLVMBuilderRef builder = ac->builder;
5552 LLVMBasicBlockRef current_block = LLVMGetInsertBlock(builder);
5553 LLVMValueRef function = LLVMGetBasicBlockParent(current_block);
5554 LLVMBasicBlockRef first_block = LLVMGetEntryBasicBlock(function);
5555 LLVMValueRef first_instr = LLVMGetFirstInstruction(first_block);
5556 LLVMBuilderRef first_builder = LLVMCreateBuilderInContext(ac->context);
5557 LLVMValueRef res;
5558
5559 if (first_instr) {
5560 LLVMPositionBuilderBefore(first_builder, first_instr);
5561 } else {
5562 LLVMPositionBuilderAtEnd(first_builder, first_block);
5563 }
5564
5565 res = LLVMBuildAlloca(first_builder, type, name);
5566 LLVMBuildStore(builder, LLVMConstNull(type), res);
5567
5568 LLVMDisposeBuilder(first_builder);
5569
5570 return res;
5571 }
5572
5573 static LLVMValueRef si_build_alloca_undef(struct ac_llvm_context *ac,
5574 LLVMTypeRef type,
5575 const char *name)
5576 {
5577 LLVMValueRef ptr = ac_build_alloca(ac, type, name);
5578 LLVMBuildStore(ac->builder, LLVMGetUndef(type), ptr);
5579 return ptr;
5580 }
5581
5582 static void
5583 scan_shader_output_decl(struct nir_to_llvm_context *ctx,
5584 struct nir_variable *variable,
5585 struct nir_shader *shader,
5586 gl_shader_stage stage)
5587 {
5588 int idx = variable->data.location + variable->data.index;
5589 unsigned attrib_count = glsl_count_attribute_slots(variable->type, false);
5590 uint64_t mask_attribs;
5591
5592 variable->data.driver_location = idx * 4;
5593
5594 /* tess ctrl has it's own load/store paths for outputs */
5595 if (stage == MESA_SHADER_TESS_CTRL)
5596 return;
5597
5598 mask_attribs = ((1ull << attrib_count) - 1) << idx;
5599 if (stage == MESA_SHADER_VERTEX ||
5600 stage == MESA_SHADER_TESS_EVAL ||
5601 stage == MESA_SHADER_GEOMETRY) {
5602 if (idx == VARYING_SLOT_CLIP_DIST0) {
5603 int length = shader->info.clip_distance_array_size +
5604 shader->info.cull_distance_array_size;
5605 if (stage == MESA_SHADER_VERTEX) {
5606 ctx->shader_info->vs.outinfo.clip_dist_mask = (1 << shader->info.clip_distance_array_size) - 1;
5607 ctx->shader_info->vs.outinfo.cull_dist_mask = (1 << shader->info.cull_distance_array_size) - 1;
5608 }
5609 if (stage == MESA_SHADER_TESS_EVAL) {
5610 ctx->shader_info->tes.outinfo.clip_dist_mask = (1 << shader->info.clip_distance_array_size) - 1;
5611 ctx->shader_info->tes.outinfo.cull_dist_mask = (1 << shader->info.cull_distance_array_size) - 1;
5612 }
5613
5614 if (length > 4)
5615 attrib_count = 2;
5616 else
5617 attrib_count = 1;
5618 mask_attribs = 1ull << idx;
5619 }
5620 }
5621
5622 ctx->output_mask |= mask_attribs;
5623 }
5624
5625 static void
5626 handle_shader_output_decl(struct ac_nir_context *ctx,
5627 struct nir_shader *nir,
5628 struct nir_variable *variable)
5629 {
5630 unsigned output_loc = variable->data.driver_location / 4;
5631 unsigned attrib_count = glsl_count_attribute_slots(variable->type, false);
5632
5633 /* tess ctrl has it's own load/store paths for outputs */
5634 if (ctx->stage == MESA_SHADER_TESS_CTRL)
5635 return;
5636
5637 if (ctx->stage == MESA_SHADER_VERTEX ||
5638 ctx->stage == MESA_SHADER_TESS_EVAL ||
5639 ctx->stage == MESA_SHADER_GEOMETRY) {
5640 int idx = variable->data.location + variable->data.index;
5641 if (idx == VARYING_SLOT_CLIP_DIST0) {
5642 int length = nir->info.clip_distance_array_size +
5643 nir->info.cull_distance_array_size;
5644
5645 if (length > 4)
5646 attrib_count = 2;
5647 else
5648 attrib_count = 1;
5649 }
5650 }
5651
5652 for (unsigned i = 0; i < attrib_count; ++i) {
5653 for (unsigned chan = 0; chan < 4; chan++) {
5654 ctx->outputs[radeon_llvm_reg_index_soa(output_loc + i, chan)] =
5655 si_build_alloca_undef(&ctx->ac, ctx->ac.f32, "");
5656 }
5657 }
5658 }
5659
5660 static LLVMTypeRef
5661 glsl_base_to_llvm_type(struct nir_to_llvm_context *ctx,
5662 enum glsl_base_type type)
5663 {
5664 switch (type) {
5665 case GLSL_TYPE_INT:
5666 case GLSL_TYPE_UINT:
5667 case GLSL_TYPE_BOOL:
5668 case GLSL_TYPE_SUBROUTINE:
5669 return ctx->ac.i32;
5670 case GLSL_TYPE_FLOAT: /* TODO handle mediump */
5671 return ctx->ac.f32;
5672 case GLSL_TYPE_INT64:
5673 case GLSL_TYPE_UINT64:
5674 return ctx->ac.i64;
5675 case GLSL_TYPE_DOUBLE:
5676 return ctx->ac.f64;
5677 default:
5678 unreachable("unknown GLSL type");
5679 }
5680 }
5681
5682 static LLVMTypeRef
5683 glsl_to_llvm_type(struct nir_to_llvm_context *ctx,
5684 const struct glsl_type *type)
5685 {
5686 if (glsl_type_is_scalar(type)) {
5687 return glsl_base_to_llvm_type(ctx, glsl_get_base_type(type));
5688 }
5689
5690 if (glsl_type_is_vector(type)) {
5691 return LLVMVectorType(
5692 glsl_base_to_llvm_type(ctx, glsl_get_base_type(type)),
5693 glsl_get_vector_elements(type));
5694 }
5695
5696 if (glsl_type_is_matrix(type)) {
5697 return LLVMArrayType(
5698 glsl_to_llvm_type(ctx, glsl_get_column_type(type)),
5699 glsl_get_matrix_columns(type));
5700 }
5701
5702 if (glsl_type_is_array(type)) {
5703 return LLVMArrayType(
5704 glsl_to_llvm_type(ctx, glsl_get_array_element(type)),
5705 glsl_get_length(type));
5706 }
5707
5708 assert(glsl_type_is_struct(type));
5709
5710 LLVMTypeRef member_types[glsl_get_length(type)];
5711
5712 for (unsigned i = 0; i < glsl_get_length(type); i++) {
5713 member_types[i] =
5714 glsl_to_llvm_type(ctx,
5715 glsl_get_struct_field(type, i));
5716 }
5717
5718 return LLVMStructTypeInContext(ctx->context, member_types,
5719 glsl_get_length(type), false);
5720 }
5721
5722 static void
5723 setup_locals(struct ac_nir_context *ctx,
5724 struct nir_function *func)
5725 {
5726 int i, j;
5727 ctx->num_locals = 0;
5728 nir_foreach_variable(variable, &func->impl->locals) {
5729 unsigned attrib_count = glsl_count_attribute_slots(variable->type, false);
5730 variable->data.driver_location = ctx->num_locals * 4;
5731 variable->data.location_frac = 0;
5732 ctx->num_locals += attrib_count;
5733 }
5734 ctx->locals = malloc(4 * ctx->num_locals * sizeof(LLVMValueRef));
5735 if (!ctx->locals)
5736 return;
5737
5738 for (i = 0; i < ctx->num_locals; i++) {
5739 for (j = 0; j < 4; j++) {
5740 ctx->locals[i * 4 + j] =
5741 si_build_alloca_undef(&ctx->ac, ctx->ac.f32, "temp");
5742 }
5743 }
5744 }
5745
5746 static void
5747 setup_shared(struct ac_nir_context *ctx,
5748 struct nir_shader *nir)
5749 {
5750 nir_foreach_variable(variable, &nir->shared) {
5751 LLVMValueRef shared =
5752 LLVMAddGlobalInAddressSpace(
5753 ctx->ac.module, glsl_to_llvm_type(ctx->nctx, variable->type),
5754 variable->name ? variable->name : "",
5755 LOCAL_ADDR_SPACE);
5756 _mesa_hash_table_insert(ctx->vars, variable, shared);
5757 }
5758 }
5759
5760 static LLVMValueRef
5761 emit_float_saturate(struct ac_llvm_context *ctx, LLVMValueRef v, float lo, float hi)
5762 {
5763 v = ac_to_float(ctx, v);
5764 v = emit_intrin_2f_param(ctx, "llvm.maxnum", ctx->f32, v, LLVMConstReal(ctx->f32, lo));
5765 return emit_intrin_2f_param(ctx, "llvm.minnum", ctx->f32, v, LLVMConstReal(ctx->f32, hi));
5766 }
5767
5768
5769 static LLVMValueRef emit_pack_int16(struct nir_to_llvm_context *ctx,
5770 LLVMValueRef src0, LLVMValueRef src1)
5771 {
5772 LLVMValueRef const16 = LLVMConstInt(ctx->ac.i32, 16, false);
5773 LLVMValueRef comp[2];
5774
5775 comp[0] = LLVMBuildAnd(ctx->builder, src0, LLVMConstInt(ctx->ac.i32, 65535, 0), "");
5776 comp[1] = LLVMBuildAnd(ctx->builder, src1, LLVMConstInt(ctx->ac.i32, 65535, 0), "");
5777 comp[1] = LLVMBuildShl(ctx->builder, comp[1], const16, "");
5778 return LLVMBuildOr(ctx->builder, comp[0], comp[1], "");
5779 }
5780
5781 /* Initialize arguments for the shader export intrinsic */
5782 static void
5783 si_llvm_init_export_args(struct nir_to_llvm_context *ctx,
5784 LLVMValueRef *values,
5785 unsigned target,
5786 struct ac_export_args *args)
5787 {
5788 /* Default is 0xf. Adjusted below depending on the format. */
5789 args->enabled_channels = 0xf;
5790
5791 /* Specify whether the EXEC mask represents the valid mask */
5792 args->valid_mask = 0;
5793
5794 /* Specify whether this is the last export */
5795 args->done = 0;
5796
5797 /* Specify the target we are exporting */
5798 args->target = target;
5799
5800 args->compr = false;
5801 args->out[0] = LLVMGetUndef(ctx->ac.f32);
5802 args->out[1] = LLVMGetUndef(ctx->ac.f32);
5803 args->out[2] = LLVMGetUndef(ctx->ac.f32);
5804 args->out[3] = LLVMGetUndef(ctx->ac.f32);
5805
5806 if (!values)
5807 return;
5808
5809 if (ctx->stage == MESA_SHADER_FRAGMENT && target >= V_008DFC_SQ_EXP_MRT) {
5810 LLVMValueRef val[4];
5811 unsigned index = target - V_008DFC_SQ_EXP_MRT;
5812 unsigned col_format = (ctx->options->key.fs.col_format >> (4 * index)) & 0xf;
5813 bool is_int8 = (ctx->options->key.fs.is_int8 >> index) & 1;
5814 bool is_int10 = (ctx->options->key.fs.is_int10 >> index) & 1;
5815
5816 switch(col_format) {
5817 case V_028714_SPI_SHADER_ZERO:
5818 args->enabled_channels = 0; /* writemask */
5819 args->target = V_008DFC_SQ_EXP_NULL;
5820 break;
5821
5822 case V_028714_SPI_SHADER_32_R:
5823 args->enabled_channels = 1;
5824 args->out[0] = values[0];
5825 break;
5826
5827 case V_028714_SPI_SHADER_32_GR:
5828 args->enabled_channels = 0x3;
5829 args->out[0] = values[0];
5830 args->out[1] = values[1];
5831 break;
5832
5833 case V_028714_SPI_SHADER_32_AR:
5834 args->enabled_channels = 0x9;
5835 args->out[0] = values[0];
5836 args->out[3] = values[3];
5837 break;
5838
5839 case V_028714_SPI_SHADER_FP16_ABGR:
5840 args->compr = 1;
5841
5842 for (unsigned chan = 0; chan < 2; chan++) {
5843 LLVMValueRef pack_args[2] = {
5844 values[2 * chan],
5845 values[2 * chan + 1]
5846 };
5847 LLVMValueRef packed;
5848
5849 packed = ac_build_cvt_pkrtz_f16(&ctx->ac, pack_args);
5850 args->out[chan] = packed;
5851 }
5852 break;
5853
5854 case V_028714_SPI_SHADER_UNORM16_ABGR:
5855 for (unsigned chan = 0; chan < 4; chan++) {
5856 val[chan] = ac_build_clamp(&ctx->ac, values[chan]);
5857 val[chan] = LLVMBuildFMul(ctx->builder, val[chan],
5858 LLVMConstReal(ctx->ac.f32, 65535), "");
5859 val[chan] = LLVMBuildFAdd(ctx->builder, val[chan],
5860 LLVMConstReal(ctx->ac.f32, 0.5), "");
5861 val[chan] = LLVMBuildFPToUI(ctx->builder, val[chan],
5862 ctx->ac.i32, "");
5863 }
5864
5865 args->compr = 1;
5866 args->out[0] = emit_pack_int16(ctx, val[0], val[1]);
5867 args->out[1] = emit_pack_int16(ctx, val[2], val[3]);
5868 break;
5869
5870 case V_028714_SPI_SHADER_SNORM16_ABGR:
5871 for (unsigned chan = 0; chan < 4; chan++) {
5872 val[chan] = emit_float_saturate(&ctx->ac, values[chan], -1, 1);
5873 val[chan] = LLVMBuildFMul(ctx->builder, val[chan],
5874 LLVMConstReal(ctx->ac.f32, 32767), "");
5875
5876 /* If positive, add 0.5, else add -0.5. */
5877 val[chan] = LLVMBuildFAdd(ctx->builder, val[chan],
5878 LLVMBuildSelect(ctx->builder,
5879 LLVMBuildFCmp(ctx->builder, LLVMRealOGE,
5880 val[chan], ctx->ac.f32_0, ""),
5881 LLVMConstReal(ctx->ac.f32, 0.5),
5882 LLVMConstReal(ctx->ac.f32, -0.5), ""), "");
5883 val[chan] = LLVMBuildFPToSI(ctx->builder, val[chan], ctx->ac.i32, "");
5884 }
5885
5886 args->compr = 1;
5887 args->out[0] = emit_pack_int16(ctx, val[0], val[1]);
5888 args->out[1] = emit_pack_int16(ctx, val[2], val[3]);
5889 break;
5890
5891 case V_028714_SPI_SHADER_UINT16_ABGR: {
5892 LLVMValueRef max_rgb = LLVMConstInt(ctx->ac.i32,
5893 is_int8 ? 255 : is_int10 ? 1023 : 65535, 0);
5894 LLVMValueRef max_alpha = !is_int10 ? max_rgb : LLVMConstInt(ctx->ac.i32, 3, 0);
5895
5896 for (unsigned chan = 0; chan < 4; chan++) {
5897 val[chan] = ac_to_integer(&ctx->ac, values[chan]);
5898 val[chan] = emit_minmax_int(&ctx->ac, LLVMIntULT, val[chan], chan == 3 ? max_alpha : max_rgb);
5899 }
5900
5901 args->compr = 1;
5902 args->out[0] = emit_pack_int16(ctx, val[0], val[1]);
5903 args->out[1] = emit_pack_int16(ctx, val[2], val[3]);
5904 break;
5905 }
5906
5907 case V_028714_SPI_SHADER_SINT16_ABGR: {
5908 LLVMValueRef max_rgb = LLVMConstInt(ctx->ac.i32,
5909 is_int8 ? 127 : is_int10 ? 511 : 32767, 0);
5910 LLVMValueRef min_rgb = LLVMConstInt(ctx->ac.i32,
5911 is_int8 ? -128 : is_int10 ? -512 : -32768, 0);
5912 LLVMValueRef max_alpha = !is_int10 ? max_rgb : ctx->ac.i32_1;
5913 LLVMValueRef min_alpha = !is_int10 ? min_rgb : LLVMConstInt(ctx->ac.i32, -2, 0);
5914
5915 /* Clamp. */
5916 for (unsigned chan = 0; chan < 4; chan++) {
5917 val[chan] = ac_to_integer(&ctx->ac, values[chan]);
5918 val[chan] = emit_minmax_int(&ctx->ac, LLVMIntSLT, val[chan], chan == 3 ? max_alpha : max_rgb);
5919 val[chan] = emit_minmax_int(&ctx->ac, LLVMIntSGT, val[chan], chan == 3 ? min_alpha : min_rgb);
5920 }
5921
5922 args->compr = 1;
5923 args->out[0] = emit_pack_int16(ctx, val[0], val[1]);
5924 args->out[1] = emit_pack_int16(ctx, val[2], val[3]);
5925 break;
5926 }
5927
5928 default:
5929 case V_028714_SPI_SHADER_32_ABGR:
5930 memcpy(&args->out[0], values, sizeof(values[0]) * 4);
5931 break;
5932 }
5933 } else
5934 memcpy(&args->out[0], values, sizeof(values[0]) * 4);
5935
5936 for (unsigned i = 0; i < 4; ++i)
5937 args->out[i] = ac_to_float(&ctx->ac, args->out[i]);
5938 }
5939
5940 static void
5941 handle_vs_outputs_post(struct nir_to_llvm_context *ctx,
5942 bool export_prim_id,
5943 struct ac_vs_output_info *outinfo)
5944 {
5945 uint32_t param_count = 0;
5946 unsigned target;
5947 unsigned pos_idx, num_pos_exports = 0;
5948 struct ac_export_args args, pos_args[4] = {};
5949 LLVMValueRef psize_value = NULL, layer_value = NULL, viewport_index_value = NULL;
5950 int i;
5951
5952 if (ctx->options->key.has_multiview_view_index) {
5953 LLVMValueRef* tmp_out = &ctx->nir->outputs[radeon_llvm_reg_index_soa(VARYING_SLOT_LAYER, 0)];
5954 if(!*tmp_out) {
5955 for(unsigned i = 0; i < 4; ++i)
5956 ctx->nir->outputs[radeon_llvm_reg_index_soa(VARYING_SLOT_LAYER, i)] =
5957 si_build_alloca_undef(&ctx->ac, ctx->ac.f32, "");
5958 }
5959
5960 LLVMBuildStore(ctx->builder, ac_to_float(&ctx->ac, ctx->view_index), *tmp_out);
5961 ctx->output_mask |= 1ull << VARYING_SLOT_LAYER;
5962 }
5963
5964 memset(outinfo->vs_output_param_offset, AC_EXP_PARAM_UNDEFINED,
5965 sizeof(outinfo->vs_output_param_offset));
5966
5967 if (ctx->output_mask & (1ull << VARYING_SLOT_CLIP_DIST0)) {
5968 LLVMValueRef slots[8];
5969 unsigned j;
5970
5971 if (outinfo->cull_dist_mask)
5972 outinfo->cull_dist_mask <<= ctx->num_output_clips;
5973
5974 i = VARYING_SLOT_CLIP_DIST0;
5975 for (j = 0; j < ctx->num_output_clips + ctx->num_output_culls; j++)
5976 slots[j] = ac_to_float(&ctx->ac, LLVMBuildLoad(ctx->builder,
5977 ctx->nir->outputs[radeon_llvm_reg_index_soa(i, j)], ""));
5978
5979 for (i = ctx->num_output_clips + ctx->num_output_culls; i < 8; i++)
5980 slots[i] = LLVMGetUndef(ctx->ac.f32);
5981
5982 if (ctx->num_output_clips + ctx->num_output_culls > 4) {
5983 target = V_008DFC_SQ_EXP_POS + 3;
5984 si_llvm_init_export_args(ctx, &slots[4], target, &args);
5985 memcpy(&pos_args[target - V_008DFC_SQ_EXP_POS],
5986 &args, sizeof(args));
5987 }
5988
5989 target = V_008DFC_SQ_EXP_POS + 2;
5990 si_llvm_init_export_args(ctx, &slots[0], target, &args);
5991 memcpy(&pos_args[target - V_008DFC_SQ_EXP_POS],
5992 &args, sizeof(args));
5993
5994 }
5995
5996 LLVMValueRef pos_values[4] = {ctx->ac.f32_0, ctx->ac.f32_0, ctx->ac.f32_0, ctx->ac.f32_1};
5997 if (ctx->output_mask & (1ull << VARYING_SLOT_POS)) {
5998 for (unsigned j = 0; j < 4; j++)
5999 pos_values[j] = LLVMBuildLoad(ctx->builder,
6000 ctx->nir->outputs[radeon_llvm_reg_index_soa(VARYING_SLOT_POS, j)], "");
6001 }
6002 si_llvm_init_export_args(ctx, pos_values, V_008DFC_SQ_EXP_POS, &pos_args[0]);
6003
6004 if (ctx->output_mask & (1ull << VARYING_SLOT_PSIZ)) {
6005 outinfo->writes_pointsize = true;
6006 psize_value = LLVMBuildLoad(ctx->builder,
6007 ctx->nir->outputs[radeon_llvm_reg_index_soa(VARYING_SLOT_PSIZ, 0)], "");
6008 }
6009
6010 if (ctx->output_mask & (1ull << VARYING_SLOT_LAYER)) {
6011 outinfo->writes_layer = true;
6012 layer_value = LLVMBuildLoad(ctx->builder,
6013 ctx->nir->outputs[radeon_llvm_reg_index_soa(VARYING_SLOT_LAYER, 0)], "");
6014 }
6015
6016 if (ctx->output_mask & (1ull << VARYING_SLOT_VIEWPORT)) {
6017 outinfo->writes_viewport_index = true;
6018 viewport_index_value = LLVMBuildLoad(ctx->builder,
6019 ctx->nir->outputs[radeon_llvm_reg_index_soa(VARYING_SLOT_VIEWPORT, 0)], "");
6020 }
6021
6022 if (outinfo->writes_pointsize ||
6023 outinfo->writes_layer ||
6024 outinfo->writes_viewport_index) {
6025 pos_args[1].enabled_channels = ((outinfo->writes_pointsize == true ? 1 : 0) |
6026 (outinfo->writes_layer == true ? 4 : 0));
6027 pos_args[1].valid_mask = 0;
6028 pos_args[1].done = 0;
6029 pos_args[1].target = V_008DFC_SQ_EXP_POS + 1;
6030 pos_args[1].compr = 0;
6031 pos_args[1].out[0] = ctx->ac.f32_0; /* X */
6032 pos_args[1].out[1] = ctx->ac.f32_0; /* Y */
6033 pos_args[1].out[2] = ctx->ac.f32_0; /* Z */
6034 pos_args[1].out[3] = ctx->ac.f32_0; /* W */
6035
6036 if (outinfo->writes_pointsize == true)
6037 pos_args[1].out[0] = psize_value;
6038 if (outinfo->writes_layer == true)
6039 pos_args[1].out[2] = layer_value;
6040 if (outinfo->writes_viewport_index == true) {
6041 if (ctx->options->chip_class >= GFX9) {
6042 /* GFX9 has the layer in out.z[10:0] and the viewport
6043 * index in out.z[19:16].
6044 */
6045 LLVMValueRef v = viewport_index_value;
6046 v = ac_to_integer(&ctx->ac, v);
6047 v = LLVMBuildShl(ctx->builder, v,
6048 LLVMConstInt(ctx->ac.i32, 16, false),
6049 "");
6050 v = LLVMBuildOr(ctx->builder, v,
6051 ac_to_integer(&ctx->ac, pos_args[1].out[2]), "");
6052
6053 pos_args[1].out[2] = ac_to_float(&ctx->ac, v);
6054 pos_args[1].enabled_channels |= 1 << 2;
6055 } else {
6056 pos_args[1].out[3] = viewport_index_value;
6057 pos_args[1].enabled_channels |= 1 << 3;
6058 }
6059 }
6060 }
6061 for (i = 0; i < 4; i++) {
6062 if (pos_args[i].out[0])
6063 num_pos_exports++;
6064 }
6065
6066 pos_idx = 0;
6067 for (i = 0; i < 4; i++) {
6068 if (!pos_args[i].out[0])
6069 continue;
6070
6071 /* Specify the target we are exporting */
6072 pos_args[i].target = V_008DFC_SQ_EXP_POS + pos_idx++;
6073 if (pos_idx == num_pos_exports)
6074 pos_args[i].done = 1;
6075 ac_build_export(&ctx->ac, &pos_args[i]);
6076 }
6077
6078 for (unsigned i = 0; i < RADEON_LLVM_MAX_OUTPUTS; ++i) {
6079 LLVMValueRef values[4];
6080 if (!(ctx->output_mask & (1ull << i)))
6081 continue;
6082
6083 for (unsigned j = 0; j < 4; j++)
6084 values[j] = ac_to_float(&ctx->ac, LLVMBuildLoad(ctx->builder,
6085 ctx->nir->outputs[radeon_llvm_reg_index_soa(i, j)], ""));
6086
6087 if (i == VARYING_SLOT_LAYER) {
6088 target = V_008DFC_SQ_EXP_PARAM + param_count;
6089 outinfo->vs_output_param_offset[VARYING_SLOT_LAYER] = param_count;
6090 param_count++;
6091 } else if (i == VARYING_SLOT_PRIMITIVE_ID) {
6092 target = V_008DFC_SQ_EXP_PARAM + param_count;
6093 outinfo->vs_output_param_offset[VARYING_SLOT_PRIMITIVE_ID] = param_count;
6094 param_count++;
6095 } else if (i >= VARYING_SLOT_VAR0) {
6096 outinfo->export_mask |= 1u << (i - VARYING_SLOT_VAR0);
6097 target = V_008DFC_SQ_EXP_PARAM + param_count;
6098 outinfo->vs_output_param_offset[i] = param_count;
6099 param_count++;
6100 } else
6101 continue;
6102
6103 si_llvm_init_export_args(ctx, values, target, &args);
6104
6105 if (target >= V_008DFC_SQ_EXP_POS &&
6106 target <= (V_008DFC_SQ_EXP_POS + 3)) {
6107 memcpy(&pos_args[target - V_008DFC_SQ_EXP_POS],
6108 &args, sizeof(args));
6109 } else {
6110 ac_build_export(&ctx->ac, &args);
6111 }
6112 }
6113
6114 if (export_prim_id) {
6115 LLVMValueRef values[4];
6116 target = V_008DFC_SQ_EXP_PARAM + param_count;
6117 outinfo->vs_output_param_offset[VARYING_SLOT_PRIMITIVE_ID] = param_count;
6118 param_count++;
6119
6120 values[0] = ctx->vs_prim_id;
6121 ctx->shader_info->vs.vgpr_comp_cnt = MAX2(2,
6122 ctx->shader_info->vs.vgpr_comp_cnt);
6123 for (unsigned j = 1; j < 4; j++)
6124 values[j] = ctx->ac.f32_0;
6125 si_llvm_init_export_args(ctx, values, target, &args);
6126 ac_build_export(&ctx->ac, &args);
6127 outinfo->export_prim_id = true;
6128 }
6129
6130 outinfo->pos_exports = num_pos_exports;
6131 outinfo->param_exports = param_count;
6132 }
6133
6134 static void
6135 handle_es_outputs_post(struct nir_to_llvm_context *ctx,
6136 struct ac_es_output_info *outinfo)
6137 {
6138 int j;
6139 uint64_t max_output_written = 0;
6140 LLVMValueRef lds_base = NULL;
6141
6142 for (unsigned i = 0; i < RADEON_LLVM_MAX_OUTPUTS; ++i) {
6143 int param_index;
6144 int length = 4;
6145
6146 if (!(ctx->output_mask & (1ull << i)))
6147 continue;
6148
6149 if (i == VARYING_SLOT_CLIP_DIST0)
6150 length = ctx->num_output_clips + ctx->num_output_culls;
6151
6152 param_index = shader_io_get_unique_index(i);
6153
6154 max_output_written = MAX2(param_index + (length > 4), max_output_written);
6155 }
6156
6157 outinfo->esgs_itemsize = (max_output_written + 1) * 16;
6158
6159 if (ctx->ac.chip_class >= GFX9) {
6160 unsigned itemsize_dw = outinfo->esgs_itemsize / 4;
6161 LLVMValueRef vertex_idx = ac_get_thread_id(&ctx->ac);
6162 LLVMValueRef wave_idx = ac_build_bfe(&ctx->ac, ctx->merged_wave_info,
6163 LLVMConstInt(ctx->ac.i32, 24, false),
6164 LLVMConstInt(ctx->ac.i32, 4, false), false);
6165 vertex_idx = LLVMBuildOr(ctx->ac.builder, vertex_idx,
6166 LLVMBuildMul(ctx->ac.builder, wave_idx,
6167 LLVMConstInt(ctx->ac.i32, 64, false), ""), "");
6168 lds_base = LLVMBuildMul(ctx->ac.builder, vertex_idx,
6169 LLVMConstInt(ctx->ac.i32, itemsize_dw, 0), "");
6170 }
6171
6172 for (unsigned i = 0; i < RADEON_LLVM_MAX_OUTPUTS; ++i) {
6173 LLVMValueRef dw_addr;
6174 LLVMValueRef *out_ptr = &ctx->nir->outputs[i * 4];
6175 int param_index;
6176 int length = 4;
6177
6178 if (!(ctx->output_mask & (1ull << i)))
6179 continue;
6180
6181 if (i == VARYING_SLOT_CLIP_DIST0)
6182 length = ctx->num_output_clips + ctx->num_output_culls;
6183
6184 param_index = shader_io_get_unique_index(i);
6185
6186 if (lds_base) {
6187 dw_addr = LLVMBuildAdd(ctx->builder, lds_base,
6188 LLVMConstInt(ctx->ac.i32, param_index * 4, false),
6189 "");
6190 }
6191 for (j = 0; j < length; j++) {
6192 LLVMValueRef out_val = LLVMBuildLoad(ctx->builder, out_ptr[j], "");
6193 out_val = LLVMBuildBitCast(ctx->builder, out_val, ctx->ac.i32, "");
6194
6195 if (ctx->ac.chip_class >= GFX9) {
6196 ac_lds_store(&ctx->ac, dw_addr,
6197 LLVMBuildLoad(ctx->builder, out_ptr[j], ""));
6198 dw_addr = LLVMBuildAdd(ctx->builder, dw_addr, ctx->ac.i32_1, "");
6199 } else {
6200 ac_build_buffer_store_dword(&ctx->ac,
6201 ctx->esgs_ring,
6202 out_val, 1,
6203 NULL, ctx->es2gs_offset,
6204 (4 * param_index + j) * 4,
6205 1, 1, true, true);
6206 }
6207 }
6208 }
6209 }
6210
6211 static void
6212 handle_ls_outputs_post(struct nir_to_llvm_context *ctx)
6213 {
6214 LLVMValueRef vertex_id = ctx->rel_auto_id;
6215 LLVMValueRef vertex_dw_stride = unpack_param(&ctx->ac, ctx->ls_out_layout, 13, 8);
6216 LLVMValueRef base_dw_addr = LLVMBuildMul(ctx->builder, vertex_id,
6217 vertex_dw_stride, "");
6218
6219 for (unsigned i = 0; i < RADEON_LLVM_MAX_OUTPUTS; ++i) {
6220 LLVMValueRef *out_ptr = &ctx->nir->outputs[i * 4];
6221 int length = 4;
6222
6223 if (!(ctx->output_mask & (1ull << i)))
6224 continue;
6225
6226 if (i == VARYING_SLOT_CLIP_DIST0)
6227 length = ctx->num_output_clips + ctx->num_output_culls;
6228 int param = shader_io_get_unique_index(i);
6229 mark_tess_output(ctx, false, param);
6230 if (length > 4)
6231 mark_tess_output(ctx, false, param + 1);
6232 LLVMValueRef dw_addr = LLVMBuildAdd(ctx->builder, base_dw_addr,
6233 LLVMConstInt(ctx->ac.i32, param * 4, false),
6234 "");
6235 for (unsigned j = 0; j < length; j++) {
6236 ac_lds_store(&ctx->ac, dw_addr,
6237 LLVMBuildLoad(ctx->builder, out_ptr[j], ""));
6238 dw_addr = LLVMBuildAdd(ctx->builder, dw_addr, ctx->ac.i32_1, "");
6239 }
6240 }
6241 }
6242
6243 struct ac_build_if_state
6244 {
6245 struct nir_to_llvm_context *ctx;
6246 LLVMValueRef condition;
6247 LLVMBasicBlockRef entry_block;
6248 LLVMBasicBlockRef true_block;
6249 LLVMBasicBlockRef false_block;
6250 LLVMBasicBlockRef merge_block;
6251 };
6252
6253 static LLVMBasicBlockRef
6254 ac_build_insert_new_block(struct nir_to_llvm_context *ctx, const char *name)
6255 {
6256 LLVMBasicBlockRef current_block;
6257 LLVMBasicBlockRef next_block;
6258 LLVMBasicBlockRef new_block;
6259
6260 /* get current basic block */
6261 current_block = LLVMGetInsertBlock(ctx->builder);
6262
6263 /* chqeck if there's another block after this one */
6264 next_block = LLVMGetNextBasicBlock(current_block);
6265 if (next_block) {
6266 /* insert the new block before the next block */
6267 new_block = LLVMInsertBasicBlockInContext(ctx->context, next_block, name);
6268 }
6269 else {
6270 /* append new block after current block */
6271 LLVMValueRef function = LLVMGetBasicBlockParent(current_block);
6272 new_block = LLVMAppendBasicBlockInContext(ctx->context, function, name);
6273 }
6274 return new_block;
6275 }
6276
6277 static void
6278 ac_nir_build_if(struct ac_build_if_state *ifthen,
6279 struct nir_to_llvm_context *ctx,
6280 LLVMValueRef condition)
6281 {
6282 LLVMBasicBlockRef block = LLVMGetInsertBlock(ctx->builder);
6283
6284 memset(ifthen, 0, sizeof *ifthen);
6285 ifthen->ctx = ctx;
6286 ifthen->condition = condition;
6287 ifthen->entry_block = block;
6288
6289 /* create endif/merge basic block for the phi functions */
6290 ifthen->merge_block = ac_build_insert_new_block(ctx, "endif-block");
6291
6292 /* create/insert true_block before merge_block */
6293 ifthen->true_block =
6294 LLVMInsertBasicBlockInContext(ctx->context,
6295 ifthen->merge_block,
6296 "if-true-block");
6297
6298 /* successive code goes into the true block */
6299 LLVMPositionBuilderAtEnd(ctx->builder, ifthen->true_block);
6300 }
6301
6302 /**
6303 * End a conditional.
6304 */
6305 static void
6306 ac_nir_build_endif(struct ac_build_if_state *ifthen)
6307 {
6308 LLVMBuilderRef builder = ifthen->ctx->builder;
6309
6310 /* Insert branch to the merge block from current block */
6311 LLVMBuildBr(builder, ifthen->merge_block);
6312
6313 /*
6314 * Now patch in the various branch instructions.
6315 */
6316
6317 /* Insert the conditional branch instruction at the end of entry_block */
6318 LLVMPositionBuilderAtEnd(builder, ifthen->entry_block);
6319 if (ifthen->false_block) {
6320 /* we have an else clause */
6321 LLVMBuildCondBr(builder, ifthen->condition,
6322 ifthen->true_block, ifthen->false_block);
6323 }
6324 else {
6325 /* no else clause */
6326 LLVMBuildCondBr(builder, ifthen->condition,
6327 ifthen->true_block, ifthen->merge_block);
6328 }
6329
6330 /* Resume building code at end of the ifthen->merge_block */
6331 LLVMPositionBuilderAtEnd(builder, ifthen->merge_block);
6332 }
6333
6334 static void
6335 write_tess_factors(struct nir_to_llvm_context *ctx)
6336 {
6337 unsigned stride, outer_comps, inner_comps;
6338 struct ac_build_if_state if_ctx, inner_if_ctx;
6339 LLVMValueRef invocation_id = unpack_param(&ctx->ac, ctx->abi.tcs_rel_ids, 8, 5);
6340 LLVMValueRef rel_patch_id = unpack_param(&ctx->ac, ctx->abi.tcs_rel_ids, 0, 8);
6341 unsigned tess_inner_index, tess_outer_index;
6342 LLVMValueRef lds_base, lds_inner, lds_outer, byteoffset, buffer;
6343 LLVMValueRef out[6], vec0, vec1, tf_base, inner[4], outer[4];
6344 int i;
6345 emit_barrier(&ctx->ac, ctx->stage);
6346
6347 switch (ctx->options->key.tcs.primitive_mode) {
6348 case GL_ISOLINES:
6349 stride = 2;
6350 outer_comps = 2;
6351 inner_comps = 0;
6352 break;
6353 case GL_TRIANGLES:
6354 stride = 4;
6355 outer_comps = 3;
6356 inner_comps = 1;
6357 break;
6358 case GL_QUADS:
6359 stride = 6;
6360 outer_comps = 4;
6361 inner_comps = 2;
6362 break;
6363 default:
6364 return;
6365 }
6366
6367 ac_nir_build_if(&if_ctx, ctx,
6368 LLVMBuildICmp(ctx->builder, LLVMIntEQ,
6369 invocation_id, ctx->ac.i32_0, ""));
6370
6371 tess_inner_index = shader_io_get_unique_index(VARYING_SLOT_TESS_LEVEL_INNER);
6372 tess_outer_index = shader_io_get_unique_index(VARYING_SLOT_TESS_LEVEL_OUTER);
6373
6374 mark_tess_output(ctx, true, tess_inner_index);
6375 mark_tess_output(ctx, true, tess_outer_index);
6376 lds_base = get_tcs_out_current_patch_data_offset(ctx);
6377 lds_inner = LLVMBuildAdd(ctx->builder, lds_base,
6378 LLVMConstInt(ctx->ac.i32, tess_inner_index * 4, false), "");
6379 lds_outer = LLVMBuildAdd(ctx->builder, lds_base,
6380 LLVMConstInt(ctx->ac.i32, tess_outer_index * 4, false), "");
6381
6382 for (i = 0; i < 4; i++) {
6383 inner[i] = LLVMGetUndef(ctx->ac.i32);
6384 outer[i] = LLVMGetUndef(ctx->ac.i32);
6385 }
6386
6387 // LINES reverseal
6388 if (ctx->options->key.tcs.primitive_mode == GL_ISOLINES) {
6389 outer[0] = out[1] = ac_lds_load(&ctx->ac, lds_outer);
6390 lds_outer = LLVMBuildAdd(ctx->builder, lds_outer,
6391 ctx->ac.i32_1, "");
6392 outer[1] = out[0] = ac_lds_load(&ctx->ac, lds_outer);
6393 } else {
6394 for (i = 0; i < outer_comps; i++) {
6395 outer[i] = out[i] =
6396 ac_lds_load(&ctx->ac, lds_outer);
6397 lds_outer = LLVMBuildAdd(ctx->builder, lds_outer,
6398 ctx->ac.i32_1, "");
6399 }
6400 for (i = 0; i < inner_comps; i++) {
6401 inner[i] = out[outer_comps+i] =
6402 ac_lds_load(&ctx->ac, lds_inner);
6403 lds_inner = LLVMBuildAdd(ctx->builder, lds_inner,
6404 ctx->ac.i32_1, "");
6405 }
6406 }
6407
6408 /* Convert the outputs to vectors for stores. */
6409 vec0 = ac_build_gather_values(&ctx->ac, out, MIN2(stride, 4));
6410 vec1 = NULL;
6411
6412 if (stride > 4)
6413 vec1 = ac_build_gather_values(&ctx->ac, out + 4, stride - 4);
6414
6415
6416 buffer = ctx->hs_ring_tess_factor;
6417 tf_base = ctx->tess_factor_offset;
6418 byteoffset = LLVMBuildMul(ctx->builder, rel_patch_id,
6419 LLVMConstInt(ctx->ac.i32, 4 * stride, false), "");
6420 unsigned tf_offset = 0;
6421
6422 if (ctx->options->chip_class <= VI) {
6423 ac_nir_build_if(&inner_if_ctx, ctx,
6424 LLVMBuildICmp(ctx->builder, LLVMIntEQ,
6425 rel_patch_id, ctx->ac.i32_0, ""));
6426
6427 /* Store the dynamic HS control word. */
6428 ac_build_buffer_store_dword(&ctx->ac, buffer,
6429 LLVMConstInt(ctx->ac.i32, 0x80000000, false),
6430 1, ctx->ac.i32_0, tf_base,
6431 0, 1, 0, true, false);
6432 tf_offset += 4;
6433
6434 ac_nir_build_endif(&inner_if_ctx);
6435 }
6436
6437 /* Store the tessellation factors. */
6438 ac_build_buffer_store_dword(&ctx->ac, buffer, vec0,
6439 MIN2(stride, 4), byteoffset, tf_base,
6440 tf_offset, 1, 0, true, false);
6441 if (vec1)
6442 ac_build_buffer_store_dword(&ctx->ac, buffer, vec1,
6443 stride - 4, byteoffset, tf_base,
6444 16 + tf_offset, 1, 0, true, false);
6445
6446 //store to offchip for TES to read - only if TES reads them
6447 if (ctx->options->key.tcs.tes_reads_tess_factors) {
6448 LLVMValueRef inner_vec, outer_vec, tf_outer_offset;
6449 LLVMValueRef tf_inner_offset;
6450 unsigned param_outer, param_inner;
6451
6452 param_outer = shader_io_get_unique_index(VARYING_SLOT_TESS_LEVEL_OUTER);
6453 tf_outer_offset = get_tcs_tes_buffer_address(ctx, NULL,
6454 LLVMConstInt(ctx->ac.i32, param_outer, 0));
6455
6456 outer_vec = ac_build_gather_values(&ctx->ac, outer,
6457 util_next_power_of_two(outer_comps));
6458
6459 ac_build_buffer_store_dword(&ctx->ac, ctx->hs_ring_tess_offchip, outer_vec,
6460 outer_comps, tf_outer_offset,
6461 ctx->oc_lds, 0, 1, 0, true, false);
6462 if (inner_comps) {
6463 param_inner = shader_io_get_unique_index(VARYING_SLOT_TESS_LEVEL_INNER);
6464 tf_inner_offset = get_tcs_tes_buffer_address(ctx, NULL,
6465 LLVMConstInt(ctx->ac.i32, param_inner, 0));
6466
6467 inner_vec = inner_comps == 1 ? inner[0] :
6468 ac_build_gather_values(&ctx->ac, inner, inner_comps);
6469 ac_build_buffer_store_dword(&ctx->ac, ctx->hs_ring_tess_offchip, inner_vec,
6470 inner_comps, tf_inner_offset,
6471 ctx->oc_lds, 0, 1, 0, true, false);
6472 }
6473 }
6474 ac_nir_build_endif(&if_ctx);
6475 }
6476
6477 static void
6478 handle_tcs_outputs_post(struct nir_to_llvm_context *ctx)
6479 {
6480 write_tess_factors(ctx);
6481 }
6482
6483 static bool
6484 si_export_mrt_color(struct nir_to_llvm_context *ctx,
6485 LLVMValueRef *color, unsigned param, bool is_last,
6486 struct ac_export_args *args)
6487 {
6488 /* Export */
6489 si_llvm_init_export_args(ctx, color, param,
6490 args);
6491
6492 if (is_last) {
6493 args->valid_mask = 1; /* whether the EXEC mask is valid */
6494 args->done = 1; /* DONE bit */
6495 } else if (!args->enabled_channels)
6496 return false; /* unnecessary NULL export */
6497
6498 return true;
6499 }
6500
6501 static void
6502 radv_export_mrt_z(struct nir_to_llvm_context *ctx,
6503 LLVMValueRef depth, LLVMValueRef stencil,
6504 LLVMValueRef samplemask)
6505 {
6506 struct ac_export_args args;
6507
6508 ac_export_mrt_z(&ctx->ac, depth, stencil, samplemask, &args);
6509
6510 ac_build_export(&ctx->ac, &args);
6511 }
6512
6513 static void
6514 handle_fs_outputs_post(struct nir_to_llvm_context *ctx)
6515 {
6516 unsigned index = 0;
6517 LLVMValueRef depth = NULL, stencil = NULL, samplemask = NULL;
6518 struct ac_export_args color_args[8];
6519
6520 for (unsigned i = 0; i < RADEON_LLVM_MAX_OUTPUTS; ++i) {
6521 LLVMValueRef values[4];
6522
6523 if (!(ctx->output_mask & (1ull << i)))
6524 continue;
6525
6526 if (i == FRAG_RESULT_DEPTH) {
6527 ctx->shader_info->fs.writes_z = true;
6528 depth = ac_to_float(&ctx->ac, LLVMBuildLoad(ctx->builder,
6529 ctx->nir->outputs[radeon_llvm_reg_index_soa(i, 0)], ""));
6530 } else if (i == FRAG_RESULT_STENCIL) {
6531 ctx->shader_info->fs.writes_stencil = true;
6532 stencil = ac_to_float(&ctx->ac, LLVMBuildLoad(ctx->builder,
6533 ctx->nir->outputs[radeon_llvm_reg_index_soa(i, 0)], ""));
6534 } else if (i == FRAG_RESULT_SAMPLE_MASK) {
6535 ctx->shader_info->fs.writes_sample_mask = true;
6536 samplemask = ac_to_float(&ctx->ac, LLVMBuildLoad(ctx->builder,
6537 ctx->nir->outputs[radeon_llvm_reg_index_soa(i, 0)], ""));
6538 } else {
6539 bool last = false;
6540 for (unsigned j = 0; j < 4; j++)
6541 values[j] = ac_to_float(&ctx->ac, LLVMBuildLoad(ctx->builder,
6542 ctx->nir->outputs[radeon_llvm_reg_index_soa(i, j)], ""));
6543
6544 if (!ctx->shader_info->fs.writes_z && !ctx->shader_info->fs.writes_stencil && !ctx->shader_info->fs.writes_sample_mask)
6545 last = ctx->output_mask <= ((1ull << (i + 1)) - 1);
6546
6547 bool ret = si_export_mrt_color(ctx, values, V_008DFC_SQ_EXP_MRT + (i - FRAG_RESULT_DATA0), last, &color_args[index]);
6548 if (ret)
6549 index++;
6550 }
6551 }
6552
6553 for (unsigned i = 0; i < index; i++)
6554 ac_build_export(&ctx->ac, &color_args[i]);
6555 if (depth || stencil || samplemask)
6556 radv_export_mrt_z(ctx, depth, stencil, samplemask);
6557 else if (!index) {
6558 si_export_mrt_color(ctx, NULL, V_008DFC_SQ_EXP_NULL, true, &color_args[0]);
6559 ac_build_export(&ctx->ac, &color_args[0]);
6560 }
6561 }
6562
6563 static void
6564 emit_gs_epilogue(struct nir_to_llvm_context *ctx)
6565 {
6566 ac_build_sendmsg(&ctx->ac, AC_SENDMSG_GS_OP_NOP | AC_SENDMSG_GS_DONE, ctx->gs_wave_id);
6567 }
6568
6569 static void
6570 handle_shader_outputs_post(struct ac_shader_abi *abi, unsigned max_outputs,
6571 LLVMValueRef *addrs)
6572 {
6573 struct nir_to_llvm_context *ctx = nir_to_llvm_context_from_abi(abi);
6574
6575 switch (ctx->stage) {
6576 case MESA_SHADER_VERTEX:
6577 if (ctx->options->key.vs.as_ls)
6578 handle_ls_outputs_post(ctx);
6579 else if (ctx->options->key.vs.as_es)
6580 handle_es_outputs_post(ctx, &ctx->shader_info->vs.es_info);
6581 else
6582 handle_vs_outputs_post(ctx, ctx->options->key.vs.export_prim_id,
6583 &ctx->shader_info->vs.outinfo);
6584 break;
6585 case MESA_SHADER_FRAGMENT:
6586 handle_fs_outputs_post(ctx);
6587 break;
6588 case MESA_SHADER_GEOMETRY:
6589 emit_gs_epilogue(ctx);
6590 break;
6591 case MESA_SHADER_TESS_CTRL:
6592 handle_tcs_outputs_post(ctx);
6593 break;
6594 case MESA_SHADER_TESS_EVAL:
6595 if (ctx->options->key.tes.as_es)
6596 handle_es_outputs_post(ctx, &ctx->shader_info->tes.es_info);
6597 else
6598 handle_vs_outputs_post(ctx, ctx->options->key.tes.export_prim_id,
6599 &ctx->shader_info->tes.outinfo);
6600 break;
6601 default:
6602 break;
6603 }
6604 }
6605
6606 static void ac_llvm_finalize_module(struct nir_to_llvm_context * ctx)
6607 {
6608 LLVMPassManagerRef passmgr;
6609 /* Create the pass manager */
6610 passmgr = LLVMCreateFunctionPassManagerForModule(
6611 ctx->module);
6612
6613 /* This pass should eliminate all the load and store instructions */
6614 LLVMAddPromoteMemoryToRegisterPass(passmgr);
6615
6616 /* Add some optimization passes */
6617 LLVMAddScalarReplAggregatesPass(passmgr);
6618 LLVMAddLICMPass(passmgr);
6619 LLVMAddAggressiveDCEPass(passmgr);
6620 LLVMAddCFGSimplificationPass(passmgr);
6621 LLVMAddInstructionCombiningPass(passmgr);
6622
6623 /* Run the pass */
6624 LLVMInitializeFunctionPassManager(passmgr);
6625 LLVMRunFunctionPassManager(passmgr, ctx->main_function);
6626 LLVMFinalizeFunctionPassManager(passmgr);
6627
6628 LLVMDisposeBuilder(ctx->builder);
6629 LLVMDisposePassManager(passmgr);
6630 }
6631
6632 static void
6633 ac_nir_eliminate_const_vs_outputs(struct nir_to_llvm_context *ctx)
6634 {
6635 struct ac_vs_output_info *outinfo;
6636
6637 switch (ctx->stage) {
6638 case MESA_SHADER_FRAGMENT:
6639 case MESA_SHADER_COMPUTE:
6640 case MESA_SHADER_TESS_CTRL:
6641 case MESA_SHADER_GEOMETRY:
6642 return;
6643 case MESA_SHADER_VERTEX:
6644 if (ctx->options->key.vs.as_ls ||
6645 ctx->options->key.vs.as_es)
6646 return;
6647 outinfo = &ctx->shader_info->vs.outinfo;
6648 break;
6649 case MESA_SHADER_TESS_EVAL:
6650 if (ctx->options->key.vs.as_es)
6651 return;
6652 outinfo = &ctx->shader_info->tes.outinfo;
6653 break;
6654 default:
6655 unreachable("Unhandled shader type");
6656 }
6657
6658 ac_optimize_vs_outputs(&ctx->ac,
6659 ctx->main_function,
6660 outinfo->vs_output_param_offset,
6661 VARYING_SLOT_MAX,
6662 &outinfo->param_exports);
6663 }
6664
6665 static void
6666 ac_setup_rings(struct nir_to_llvm_context *ctx)
6667 {
6668 if ((ctx->stage == MESA_SHADER_VERTEX && ctx->options->key.vs.as_es) ||
6669 (ctx->stage == MESA_SHADER_TESS_EVAL && ctx->options->key.tes.as_es)) {
6670 ctx->esgs_ring = ac_build_load_to_sgpr(&ctx->ac, ctx->ring_offsets, LLVMConstInt(ctx->ac.i32, RING_ESGS_VS, false));
6671 }
6672
6673 if (ctx->is_gs_copy_shader) {
6674 ctx->gsvs_ring = ac_build_load_to_sgpr(&ctx->ac, ctx->ring_offsets, LLVMConstInt(ctx->ac.i32, RING_GSVS_VS, false));
6675 }
6676 if (ctx->stage == MESA_SHADER_GEOMETRY) {
6677 LLVMValueRef tmp;
6678 ctx->esgs_ring = ac_build_load_to_sgpr(&ctx->ac, ctx->ring_offsets, LLVMConstInt(ctx->ac.i32, RING_ESGS_GS, false));
6679 ctx->gsvs_ring = ac_build_load_to_sgpr(&ctx->ac, ctx->ring_offsets, LLVMConstInt(ctx->ac.i32, RING_GSVS_GS, false));
6680
6681 ctx->gsvs_ring = LLVMBuildBitCast(ctx->builder, ctx->gsvs_ring, ctx->ac.v4i32, "");
6682
6683 ctx->gsvs_ring = LLVMBuildInsertElement(ctx->builder, ctx->gsvs_ring, ctx->gsvs_num_entries, LLVMConstInt(ctx->ac.i32, 2, false), "");
6684 tmp = LLVMBuildExtractElement(ctx->builder, ctx->gsvs_ring, ctx->ac.i32_1, "");
6685 tmp = LLVMBuildOr(ctx->builder, tmp, ctx->gsvs_ring_stride, "");
6686 ctx->gsvs_ring = LLVMBuildInsertElement(ctx->builder, ctx->gsvs_ring, tmp, ctx->ac.i32_1, "");
6687 }
6688
6689 if (ctx->stage == MESA_SHADER_TESS_CTRL ||
6690 ctx->stage == MESA_SHADER_TESS_EVAL) {
6691 ctx->hs_ring_tess_offchip = ac_build_load_to_sgpr(&ctx->ac, ctx->ring_offsets, LLVMConstInt(ctx->ac.i32, RING_HS_TESS_OFFCHIP, false));
6692 ctx->hs_ring_tess_factor = ac_build_load_to_sgpr(&ctx->ac, ctx->ring_offsets, LLVMConstInt(ctx->ac.i32, RING_HS_TESS_FACTOR, false));
6693 }
6694 }
6695
6696 static unsigned
6697 ac_nir_get_max_workgroup_size(enum chip_class chip_class,
6698 const struct nir_shader *nir)
6699 {
6700 switch (nir->info.stage) {
6701 case MESA_SHADER_TESS_CTRL:
6702 return chip_class >= CIK ? 128 : 64;
6703 case MESA_SHADER_GEOMETRY:
6704 return chip_class >= GFX9 ? 128 : 64;
6705 case MESA_SHADER_COMPUTE:
6706 break;
6707 default:
6708 return 0;
6709 }
6710
6711 unsigned max_workgroup_size = nir->info.cs.local_size[0] *
6712 nir->info.cs.local_size[1] *
6713 nir->info.cs.local_size[2];
6714 return max_workgroup_size;
6715 }
6716
6717 /* Fixup the HW not emitting the TCS regs if there are no HS threads. */
6718 static void ac_nir_fixup_ls_hs_input_vgprs(struct nir_to_llvm_context *ctx)
6719 {
6720 LLVMValueRef count = ac_build_bfe(&ctx->ac, ctx->merged_wave_info,
6721 LLVMConstInt(ctx->ac.i32, 8, false),
6722 LLVMConstInt(ctx->ac.i32, 8, false), false);
6723 LLVMValueRef hs_empty = LLVMBuildICmp(ctx->ac.builder, LLVMIntEQ, count,
6724 ctx->ac.i32_0, "");
6725 ctx->abi.instance_id = LLVMBuildSelect(ctx->ac.builder, hs_empty, ctx->rel_auto_id, ctx->abi.instance_id, "");
6726 ctx->vs_prim_id = LLVMBuildSelect(ctx->ac.builder, hs_empty, ctx->abi.vertex_id, ctx->vs_prim_id, "");
6727 ctx->rel_auto_id = LLVMBuildSelect(ctx->ac.builder, hs_empty, ctx->abi.tcs_rel_ids, ctx->rel_auto_id, "");
6728 ctx->abi.vertex_id = LLVMBuildSelect(ctx->ac.builder, hs_empty, ctx->abi.tcs_patch_id, ctx->abi.vertex_id, "");
6729 }
6730
6731 static void prepare_gs_input_vgprs(struct nir_to_llvm_context *ctx)
6732 {
6733 for(int i = 5; i >= 0; --i) {
6734 ctx->gs_vtx_offset[i] = ac_build_bfe(&ctx->ac, ctx->gs_vtx_offset[i & ~1],
6735 LLVMConstInt(ctx->ac.i32, (i & 1) * 16, false),
6736 LLVMConstInt(ctx->ac.i32, 16, false), false);
6737 }
6738
6739 ctx->gs_wave_id = ac_build_bfe(&ctx->ac, ctx->merged_wave_info,
6740 LLVMConstInt(ctx->ac.i32, 16, false),
6741 LLVMConstInt(ctx->ac.i32, 8, false), false);
6742 }
6743
6744 void ac_nir_translate(struct ac_llvm_context *ac, struct ac_shader_abi *abi,
6745 struct nir_shader *nir, struct nir_to_llvm_context *nctx)
6746 {
6747 struct ac_nir_context ctx = {};
6748 struct nir_function *func;
6749
6750 ctx.ac = *ac;
6751 ctx.abi = abi;
6752
6753 ctx.nctx = nctx;
6754 if (nctx)
6755 nctx->nir = &ctx;
6756
6757 ctx.stage = nir->info.stage;
6758
6759 ctx.main_function = LLVMGetBasicBlockParent(LLVMGetInsertBlock(ctx.ac.builder));
6760
6761 nir_foreach_variable(variable, &nir->outputs)
6762 handle_shader_output_decl(&ctx, nir, variable);
6763
6764 ctx.defs = _mesa_hash_table_create(NULL, _mesa_hash_pointer,
6765 _mesa_key_pointer_equal);
6766 ctx.phis = _mesa_hash_table_create(NULL, _mesa_hash_pointer,
6767 _mesa_key_pointer_equal);
6768 ctx.vars = _mesa_hash_table_create(NULL, _mesa_hash_pointer,
6769 _mesa_key_pointer_equal);
6770
6771 func = (struct nir_function *)exec_list_get_head(&nir->functions);
6772
6773 setup_locals(&ctx, func);
6774
6775 if (nir->info.stage == MESA_SHADER_COMPUTE)
6776 setup_shared(&ctx, nir);
6777
6778 visit_cf_list(&ctx, &func->impl->body);
6779 phi_post_pass(&ctx);
6780
6781 ctx.abi->emit_outputs(ctx.abi, RADEON_LLVM_MAX_OUTPUTS,
6782 ctx.outputs);
6783
6784 free(ctx.locals);
6785 ralloc_free(ctx.defs);
6786 ralloc_free(ctx.phis);
6787 ralloc_free(ctx.vars);
6788
6789 if (nctx)
6790 nctx->nir = NULL;
6791 }
6792
6793 static
6794 LLVMModuleRef ac_translate_nir_to_llvm(LLVMTargetMachineRef tm,
6795 struct nir_shader *const *shaders,
6796 int shader_count,
6797 struct ac_shader_variant_info *shader_info,
6798 const struct ac_nir_compiler_options *options)
6799 {
6800 struct nir_to_llvm_context ctx = {0};
6801 unsigned i;
6802 ctx.options = options;
6803 ctx.shader_info = shader_info;
6804 ctx.context = LLVMContextCreate();
6805 ctx.module = LLVMModuleCreateWithNameInContext("shader", ctx.context);
6806
6807 ac_llvm_context_init(&ctx.ac, ctx.context, options->chip_class,
6808 options->family);
6809 ctx.ac.module = ctx.module;
6810 LLVMSetTarget(ctx.module, options->supports_spill ? "amdgcn-mesa-mesa3d" : "amdgcn--");
6811
6812 LLVMTargetDataRef data_layout = LLVMCreateTargetDataLayout(tm);
6813 char *data_layout_str = LLVMCopyStringRepOfTargetData(data_layout);
6814 LLVMSetDataLayout(ctx.module, data_layout_str);
6815 LLVMDisposeTargetData(data_layout);
6816 LLVMDisposeMessage(data_layout_str);
6817
6818 enum ac_float_mode float_mode =
6819 options->unsafe_math ? AC_FLOAT_MODE_UNSAFE_FP_MATH :
6820 AC_FLOAT_MODE_DEFAULT;
6821
6822 ctx.builder = ac_create_builder(ctx.context, float_mode);
6823 ctx.ac.builder = ctx.builder;
6824
6825 memset(shader_info, 0, sizeof(*shader_info));
6826
6827 for(int i = 0; i < shader_count; ++i)
6828 ac_nir_shader_info_pass(shaders[i], options, &shader_info->info);
6829
6830 for (i = 0; i < AC_UD_MAX_SETS; i++)
6831 shader_info->user_sgprs_locs.descriptor_sets[i].sgpr_idx = -1;
6832 for (i = 0; i < AC_UD_MAX_UD; i++)
6833 shader_info->user_sgprs_locs.shader_data[i].sgpr_idx = -1;
6834
6835 ctx.max_workgroup_size = 0;
6836 for (int i = 0; i < shader_count; ++i) {
6837 ctx.max_workgroup_size = MAX2(ctx.max_workgroup_size,
6838 ac_nir_get_max_workgroup_size(ctx.options->chip_class,
6839 shaders[i]));
6840 }
6841
6842 create_function(&ctx, shaders[shader_count - 1]->info.stage, shader_count >= 2,
6843 shader_count >= 2 ? shaders[shader_count - 2]->info.stage : MESA_SHADER_VERTEX);
6844
6845 ctx.abi.inputs = &ctx.inputs[0];
6846 ctx.abi.emit_outputs = handle_shader_outputs_post;
6847 ctx.abi.emit_vertex = visit_emit_vertex;
6848 ctx.abi.load_ubo = radv_load_ubo;
6849 ctx.abi.load_ssbo = radv_load_ssbo;
6850 ctx.abi.load_sampler_desc = radv_get_sampler_desc;
6851 ctx.abi.clamp_shadow_reference = false;
6852
6853 if (shader_count >= 2)
6854 ac_init_exec_full_mask(&ctx.ac);
6855
6856 if (ctx.ac.chip_class == GFX9 &&
6857 shaders[shader_count - 1]->info.stage == MESA_SHADER_TESS_CTRL)
6858 ac_nir_fixup_ls_hs_input_vgprs(&ctx);
6859
6860 for(int i = 0; i < shader_count; ++i) {
6861 ctx.stage = shaders[i]->info.stage;
6862 ctx.output_mask = 0;
6863 ctx.tess_outputs_written = 0;
6864 ctx.num_output_clips = shaders[i]->info.clip_distance_array_size;
6865 ctx.num_output_culls = shaders[i]->info.cull_distance_array_size;
6866
6867 if (shaders[i]->info.stage == MESA_SHADER_GEOMETRY) {
6868 ctx.gs_next_vertex = ac_build_alloca(&ctx.ac, ctx.ac.i32, "gs_next_vertex");
6869 ctx.gs_max_out_vertices = shaders[i]->info.gs.vertices_out;
6870 ctx.abi.load_inputs = load_gs_input;
6871 ctx.abi.emit_primitive = visit_end_primitive;
6872 } else if (shaders[i]->info.stage == MESA_SHADER_TESS_CTRL) {
6873 ctx.tcs_outputs_read = shaders[i]->info.outputs_read;
6874 ctx.tcs_patch_outputs_read = shaders[i]->info.patch_outputs_read;
6875 ctx.abi.load_tess_varyings = load_tcs_varyings;
6876 ctx.abi.load_patch_vertices_in = load_patch_vertices_in;
6877 ctx.abi.store_tcs_outputs = store_tcs_output;
6878 } else if (shaders[i]->info.stage == MESA_SHADER_TESS_EVAL) {
6879 ctx.tes_primitive_mode = shaders[i]->info.tess.primitive_mode;
6880 ctx.abi.load_tess_varyings = load_tes_input;
6881 ctx.abi.load_tess_coord = load_tess_coord;
6882 ctx.abi.load_patch_vertices_in = load_patch_vertices_in;
6883 } else if (shaders[i]->info.stage == MESA_SHADER_VERTEX) {
6884 if (shader_info->info.vs.needs_instance_id) {
6885 if (ctx.options->key.vs.as_ls) {
6886 ctx.shader_info->vs.vgpr_comp_cnt =
6887 MAX2(2, ctx.shader_info->vs.vgpr_comp_cnt);
6888 } else {
6889 ctx.shader_info->vs.vgpr_comp_cnt =
6890 MAX2(1, ctx.shader_info->vs.vgpr_comp_cnt);
6891 }
6892 }
6893 } else if (shaders[i]->info.stage == MESA_SHADER_FRAGMENT) {
6894 shader_info->fs.can_discard = shaders[i]->info.fs.uses_discard;
6895 }
6896
6897 if (i)
6898 emit_barrier(&ctx.ac, ctx.stage);
6899
6900 ac_setup_rings(&ctx);
6901
6902 LLVMBasicBlockRef merge_block;
6903 if (shader_count >= 2) {
6904 LLVMValueRef fn = LLVMGetBasicBlockParent(LLVMGetInsertBlock(ctx.ac.builder));
6905 LLVMBasicBlockRef then_block = LLVMAppendBasicBlockInContext(ctx.ac.context, fn, "");
6906 merge_block = LLVMAppendBasicBlockInContext(ctx.ac.context, fn, "");
6907
6908 LLVMValueRef count = ac_build_bfe(&ctx.ac, ctx.merged_wave_info,
6909 LLVMConstInt(ctx.ac.i32, 8 * i, false),
6910 LLVMConstInt(ctx.ac.i32, 8, false), false);
6911 LLVMValueRef thread_id = ac_get_thread_id(&ctx.ac);
6912 LLVMValueRef cond = LLVMBuildICmp(ctx.ac.builder, LLVMIntULT,
6913 thread_id, count, "");
6914 LLVMBuildCondBr(ctx.ac.builder, cond, then_block, merge_block);
6915
6916 LLVMPositionBuilderAtEnd(ctx.ac.builder, then_block);
6917 }
6918
6919 if (shaders[i]->info.stage == MESA_SHADER_FRAGMENT)
6920 handle_fs_inputs(&ctx, shaders[i]);
6921 else if(shaders[i]->info.stage == MESA_SHADER_VERTEX)
6922 handle_vs_inputs(&ctx, shaders[i]);
6923 else if(shader_count >= 2 && shaders[i]->info.stage == MESA_SHADER_GEOMETRY)
6924 prepare_gs_input_vgprs(&ctx);
6925
6926 nir_foreach_variable(variable, &shaders[i]->outputs)
6927 scan_shader_output_decl(&ctx, variable, shaders[i], shaders[i]->info.stage);
6928
6929 ac_nir_translate(&ctx.ac, &ctx.abi, shaders[i], &ctx);
6930
6931 if (shader_count >= 2) {
6932 LLVMBuildBr(ctx.ac.builder, merge_block);
6933 LLVMPositionBuilderAtEnd(ctx.ac.builder, merge_block);
6934 }
6935
6936 if (shaders[i]->info.stage == MESA_SHADER_GEOMETRY) {
6937 unsigned addclip = shaders[i]->info.clip_distance_array_size +
6938 shaders[i]->info.cull_distance_array_size > 4;
6939 shader_info->gs.gsvs_vertex_size = (util_bitcount64(ctx.output_mask) + addclip) * 16;
6940 shader_info->gs.max_gsvs_emit_size = shader_info->gs.gsvs_vertex_size *
6941 shaders[i]->info.gs.vertices_out;
6942 } else if (shaders[i]->info.stage == MESA_SHADER_TESS_CTRL) {
6943 shader_info->tcs.outputs_written = ctx.tess_outputs_written;
6944 shader_info->tcs.patch_outputs_written = ctx.tess_patch_outputs_written;
6945 } else if (shaders[i]->info.stage == MESA_SHADER_VERTEX && ctx.options->key.vs.as_ls) {
6946 shader_info->vs.outputs_written = ctx.tess_outputs_written;
6947 }
6948 }
6949
6950 LLVMBuildRetVoid(ctx.builder);
6951
6952 if (options->dump_preoptir)
6953 ac_dump_module(ctx.module);
6954
6955 ac_llvm_finalize_module(&ctx);
6956
6957 if (shader_count == 1)
6958 ac_nir_eliminate_const_vs_outputs(&ctx);
6959
6960 return ctx.module;
6961 }
6962
6963 static void ac_diagnostic_handler(LLVMDiagnosticInfoRef di, void *context)
6964 {
6965 unsigned *retval = (unsigned *)context;
6966 LLVMDiagnosticSeverity severity = LLVMGetDiagInfoSeverity(di);
6967 char *description = LLVMGetDiagInfoDescription(di);
6968
6969 if (severity == LLVMDSError) {
6970 *retval = 1;
6971 fprintf(stderr, "LLVM triggered Diagnostic Handler: %s\n",
6972 description);
6973 }
6974
6975 LLVMDisposeMessage(description);
6976 }
6977
6978 static unsigned ac_llvm_compile(LLVMModuleRef M,
6979 struct ac_shader_binary *binary,
6980 LLVMTargetMachineRef tm)
6981 {
6982 unsigned retval = 0;
6983 char *err;
6984 LLVMContextRef llvm_ctx;
6985 LLVMMemoryBufferRef out_buffer;
6986 unsigned buffer_size;
6987 const char *buffer_data;
6988 LLVMBool mem_err;
6989
6990 /* Setup Diagnostic Handler*/
6991 llvm_ctx = LLVMGetModuleContext(M);
6992
6993 LLVMContextSetDiagnosticHandler(llvm_ctx, ac_diagnostic_handler,
6994 &retval);
6995
6996 /* Compile IR*/
6997 mem_err = LLVMTargetMachineEmitToMemoryBuffer(tm, M, LLVMObjectFile,
6998 &err, &out_buffer);
6999
7000 /* Process Errors/Warnings */
7001 if (mem_err) {
7002 fprintf(stderr, "%s: %s", __FUNCTION__, err);
7003 free(err);
7004 retval = 1;
7005 goto out;
7006 }
7007
7008 /* Extract Shader Code*/
7009 buffer_size = LLVMGetBufferSize(out_buffer);
7010 buffer_data = LLVMGetBufferStart(out_buffer);
7011
7012 ac_elf_read(buffer_data, buffer_size, binary);
7013
7014 /* Clean up */
7015 LLVMDisposeMemoryBuffer(out_buffer);
7016
7017 out:
7018 return retval;
7019 }
7020
7021 static void ac_compile_llvm_module(LLVMTargetMachineRef tm,
7022 LLVMModuleRef llvm_module,
7023 struct ac_shader_binary *binary,
7024 struct ac_shader_config *config,
7025 struct ac_shader_variant_info *shader_info,
7026 gl_shader_stage stage,
7027 bool dump_shader, bool supports_spill)
7028 {
7029 if (dump_shader)
7030 ac_dump_module(llvm_module);
7031
7032 memset(binary, 0, sizeof(*binary));
7033 int v = ac_llvm_compile(llvm_module, binary, tm);
7034 if (v) {
7035 fprintf(stderr, "compile failed\n");
7036 }
7037
7038 if (dump_shader)
7039 fprintf(stderr, "disasm:\n%s\n", binary->disasm_string);
7040
7041 ac_shader_binary_read_config(binary, config, 0, supports_spill);
7042
7043 LLVMContextRef ctx = LLVMGetModuleContext(llvm_module);
7044 LLVMDisposeModule(llvm_module);
7045 LLVMContextDispose(ctx);
7046
7047 if (stage == MESA_SHADER_FRAGMENT) {
7048 shader_info->num_input_vgprs = 0;
7049 if (G_0286CC_PERSP_SAMPLE_ENA(config->spi_ps_input_addr))
7050 shader_info->num_input_vgprs += 2;
7051 if (G_0286CC_PERSP_CENTER_ENA(config->spi_ps_input_addr))
7052 shader_info->num_input_vgprs += 2;
7053 if (G_0286CC_PERSP_CENTROID_ENA(config->spi_ps_input_addr))
7054 shader_info->num_input_vgprs += 2;
7055 if (G_0286CC_PERSP_PULL_MODEL_ENA(config->spi_ps_input_addr))
7056 shader_info->num_input_vgprs += 3;
7057 if (G_0286CC_LINEAR_SAMPLE_ENA(config->spi_ps_input_addr))
7058 shader_info->num_input_vgprs += 2;
7059 if (G_0286CC_LINEAR_CENTER_ENA(config->spi_ps_input_addr))
7060 shader_info->num_input_vgprs += 2;
7061 if (G_0286CC_LINEAR_CENTROID_ENA(config->spi_ps_input_addr))
7062 shader_info->num_input_vgprs += 2;
7063 if (G_0286CC_LINE_STIPPLE_TEX_ENA(config->spi_ps_input_addr))
7064 shader_info->num_input_vgprs += 1;
7065 if (G_0286CC_POS_X_FLOAT_ENA(config->spi_ps_input_addr))
7066 shader_info->num_input_vgprs += 1;
7067 if (G_0286CC_POS_Y_FLOAT_ENA(config->spi_ps_input_addr))
7068 shader_info->num_input_vgprs += 1;
7069 if (G_0286CC_POS_Z_FLOAT_ENA(config->spi_ps_input_addr))
7070 shader_info->num_input_vgprs += 1;
7071 if (G_0286CC_POS_W_FLOAT_ENA(config->spi_ps_input_addr))
7072 shader_info->num_input_vgprs += 1;
7073 if (G_0286CC_FRONT_FACE_ENA(config->spi_ps_input_addr))
7074 shader_info->num_input_vgprs += 1;
7075 if (G_0286CC_ANCILLARY_ENA(config->spi_ps_input_addr))
7076 shader_info->num_input_vgprs += 1;
7077 if (G_0286CC_SAMPLE_COVERAGE_ENA(config->spi_ps_input_addr))
7078 shader_info->num_input_vgprs += 1;
7079 if (G_0286CC_POS_FIXED_PT_ENA(config->spi_ps_input_addr))
7080 shader_info->num_input_vgprs += 1;
7081 }
7082 config->num_vgprs = MAX2(config->num_vgprs, shader_info->num_input_vgprs);
7083
7084 /* +3 for scratch wave offset and VCC */
7085 config->num_sgprs = MAX2(config->num_sgprs,
7086 shader_info->num_input_sgprs + 3);
7087
7088 /* Enable 64-bit and 16-bit denormals, because there is no performance
7089 * cost.
7090 *
7091 * If denormals are enabled, all floating-point output modifiers are
7092 * ignored.
7093 *
7094 * Don't enable denormals for 32-bit floats, because:
7095 * - Floating-point output modifiers would be ignored by the hw.
7096 * - Some opcodes don't support denormals, such as v_mad_f32. We would
7097 * have to stop using those.
7098 * - SI & CI would be very slow.
7099 */
7100 config->float_mode |= V_00B028_FP_64_DENORMS;
7101 }
7102
7103 static void
7104 ac_fill_shader_info(struct ac_shader_variant_info *shader_info, struct nir_shader *nir, const struct ac_nir_compiler_options *options)
7105 {
7106 switch (nir->info.stage) {
7107 case MESA_SHADER_COMPUTE:
7108 for (int i = 0; i < 3; ++i)
7109 shader_info->cs.block_size[i] = nir->info.cs.local_size[i];
7110 break;
7111 case MESA_SHADER_FRAGMENT:
7112 shader_info->fs.early_fragment_test = nir->info.fs.early_fragment_tests;
7113 break;
7114 case MESA_SHADER_GEOMETRY:
7115 shader_info->gs.vertices_in = nir->info.gs.vertices_in;
7116 shader_info->gs.vertices_out = nir->info.gs.vertices_out;
7117 shader_info->gs.output_prim = nir->info.gs.output_primitive;
7118 shader_info->gs.invocations = nir->info.gs.invocations;
7119 break;
7120 case MESA_SHADER_TESS_EVAL:
7121 shader_info->tes.primitive_mode = nir->info.tess.primitive_mode;
7122 shader_info->tes.spacing = nir->info.tess.spacing;
7123 shader_info->tes.ccw = nir->info.tess.ccw;
7124 shader_info->tes.point_mode = nir->info.tess.point_mode;
7125 shader_info->tes.as_es = options->key.tes.as_es;
7126 break;
7127 case MESA_SHADER_TESS_CTRL:
7128 shader_info->tcs.tcs_vertices_out = nir->info.tess.tcs_vertices_out;
7129 break;
7130 case MESA_SHADER_VERTEX:
7131 shader_info->vs.as_es = options->key.vs.as_es;
7132 shader_info->vs.as_ls = options->key.vs.as_ls;
7133 /* in LS mode we need at least 1, invocation id needs 2, handled elsewhere */
7134 if (options->key.vs.as_ls)
7135 shader_info->vs.vgpr_comp_cnt = MAX2(1, shader_info->vs.vgpr_comp_cnt);
7136 break;
7137 default:
7138 break;
7139 }
7140 }
7141
7142 void ac_compile_nir_shader(LLVMTargetMachineRef tm,
7143 struct ac_shader_binary *binary,
7144 struct ac_shader_config *config,
7145 struct ac_shader_variant_info *shader_info,
7146 struct nir_shader *const *nir,
7147 int nir_count,
7148 const struct ac_nir_compiler_options *options,
7149 bool dump_shader)
7150 {
7151
7152 LLVMModuleRef llvm_module = ac_translate_nir_to_llvm(tm, nir, nir_count, shader_info,
7153 options);
7154
7155 ac_compile_llvm_module(tm, llvm_module, binary, config, shader_info, nir[0]->info.stage, dump_shader, options->supports_spill);
7156 for (int i = 0; i < nir_count; ++i)
7157 ac_fill_shader_info(shader_info, nir[i], options);
7158
7159 /* Determine the ES type (VS or TES) for the GS on GFX9. */
7160 if (options->chip_class == GFX9) {
7161 if (nir_count == 2 &&
7162 nir[1]->info.stage == MESA_SHADER_GEOMETRY) {
7163 shader_info->gs.es_type = nir[0]->info.stage;
7164 }
7165 }
7166 }
7167
7168 static void
7169 ac_gs_copy_shader_emit(struct nir_to_llvm_context *ctx)
7170 {
7171 LLVMValueRef args[9];
7172 args[0] = ctx->gsvs_ring;
7173 args[1] = LLVMBuildMul(ctx->builder, ctx->abi.vertex_id, LLVMConstInt(ctx->ac.i32, 4, false), "");
7174 args[3] = ctx->ac.i32_0;
7175 args[4] = ctx->ac.i32_1; /* OFFEN */
7176 args[5] = ctx->ac.i32_0; /* IDXEN */
7177 args[6] = ctx->ac.i32_1; /* GLC */
7178 args[7] = ctx->ac.i32_1; /* SLC */
7179 args[8] = ctx->ac.i32_0; /* TFE */
7180
7181 int idx = 0;
7182
7183 for (unsigned i = 0; i < RADEON_LLVM_MAX_OUTPUTS; ++i) {
7184 int length = 4;
7185 int slot = idx;
7186 int slot_inc = 1;
7187 if (!(ctx->output_mask & (1ull << i)))
7188 continue;
7189
7190 if (i == VARYING_SLOT_CLIP_DIST0) {
7191 /* unpack clip and cull from a single set of slots */
7192 length = ctx->num_output_clips + ctx->num_output_culls;
7193 if (length > 4)
7194 slot_inc = 2;
7195 }
7196
7197 for (unsigned j = 0; j < length; j++) {
7198 LLVMValueRef value;
7199 args[2] = LLVMConstInt(ctx->ac.i32,
7200 (slot * 4 + j) *
7201 ctx->gs_max_out_vertices * 16 * 4, false);
7202
7203 value = ac_build_intrinsic(&ctx->ac,
7204 "llvm.SI.buffer.load.dword.i32.i32",
7205 ctx->ac.i32, args, 9,
7206 AC_FUNC_ATTR_READONLY |
7207 AC_FUNC_ATTR_LEGACY);
7208
7209 LLVMBuildStore(ctx->builder,
7210 ac_to_float(&ctx->ac, value), ctx->nir->outputs[radeon_llvm_reg_index_soa(i, j)]);
7211 }
7212 idx += slot_inc;
7213 }
7214 handle_vs_outputs_post(ctx, false, &ctx->shader_info->vs.outinfo);
7215 }
7216
7217 void ac_create_gs_copy_shader(LLVMTargetMachineRef tm,
7218 struct nir_shader *geom_shader,
7219 struct ac_shader_binary *binary,
7220 struct ac_shader_config *config,
7221 struct ac_shader_variant_info *shader_info,
7222 const struct ac_nir_compiler_options *options,
7223 bool dump_shader)
7224 {
7225 struct nir_to_llvm_context ctx = {0};
7226 ctx.context = LLVMContextCreate();
7227 ctx.module = LLVMModuleCreateWithNameInContext("shader", ctx.context);
7228 ctx.options = options;
7229 ctx.shader_info = shader_info;
7230
7231 ac_llvm_context_init(&ctx.ac, ctx.context, options->chip_class,
7232 options->family);
7233 ctx.ac.module = ctx.module;
7234
7235 ctx.is_gs_copy_shader = true;
7236 LLVMSetTarget(ctx.module, "amdgcn--");
7237
7238 enum ac_float_mode float_mode =
7239 options->unsafe_math ? AC_FLOAT_MODE_UNSAFE_FP_MATH :
7240 AC_FLOAT_MODE_DEFAULT;
7241
7242 ctx.builder = ac_create_builder(ctx.context, float_mode);
7243 ctx.ac.builder = ctx.builder;
7244 ctx.stage = MESA_SHADER_VERTEX;
7245
7246 create_function(&ctx, MESA_SHADER_VERTEX, false, MESA_SHADER_VERTEX);
7247
7248 ctx.gs_max_out_vertices = geom_shader->info.gs.vertices_out;
7249 ac_setup_rings(&ctx);
7250
7251 ctx.num_output_clips = geom_shader->info.clip_distance_array_size;
7252 ctx.num_output_culls = geom_shader->info.cull_distance_array_size;
7253
7254 struct ac_nir_context nir_ctx = {};
7255 nir_ctx.ac = ctx.ac;
7256 nir_ctx.abi = &ctx.abi;
7257
7258 nir_ctx.nctx = &ctx;
7259 ctx.nir = &nir_ctx;
7260
7261 nir_foreach_variable(variable, &geom_shader->outputs) {
7262 scan_shader_output_decl(&ctx, variable, geom_shader, MESA_SHADER_VERTEX);
7263 handle_shader_output_decl(&nir_ctx, geom_shader, variable);
7264 }
7265
7266 ac_gs_copy_shader_emit(&ctx);
7267
7268 ctx.nir = NULL;
7269
7270 LLVMBuildRetVoid(ctx.builder);
7271
7272 ac_llvm_finalize_module(&ctx);
7273
7274 ac_compile_llvm_module(tm, ctx.module, binary, config, shader_info,
7275 MESA_SHADER_VERTEX,
7276 dump_shader, options->supports_spill);
7277 }