92188cfb8e384b12805c4979058efb0e48022fac
[mesa.git] / src / amd / common / ac_nir_to_llvm.c
1 /*
2 * Copyright © 2016 Bas Nieuwenhuizen
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21 * IN THE SOFTWARE.
22 */
23
24 #include "ac_nir_to_llvm.h"
25 #include "ac_llvm_build.h"
26 #include "ac_llvm_util.h"
27 #include "ac_binary.h"
28 #include "sid.h"
29 #include "nir/nir.h"
30 #include "../vulkan/radv_descriptor_set.h"
31 #include "util/bitscan.h"
32 #include <llvm-c/Transforms/Scalar.h>
33 #include "ac_shader_abi.h"
34 #include "ac_shader_info.h"
35 #include "ac_shader_util.h"
36 #include "ac_exp_param.h"
37
38 enum radeon_llvm_calling_convention {
39 RADEON_LLVM_AMDGPU_VS = 87,
40 RADEON_LLVM_AMDGPU_GS = 88,
41 RADEON_LLVM_AMDGPU_PS = 89,
42 RADEON_LLVM_AMDGPU_CS = 90,
43 RADEON_LLVM_AMDGPU_HS = 93,
44 };
45
46 #define RADEON_LLVM_MAX_INPUTS (VARYING_SLOT_VAR31 + 1)
47 #define RADEON_LLVM_MAX_OUTPUTS (VARYING_SLOT_VAR31 + 1)
48
49 struct nir_to_llvm_context;
50
51 struct ac_nir_context {
52 struct ac_llvm_context ac;
53 struct ac_shader_abi *abi;
54
55 gl_shader_stage stage;
56
57 struct hash_table *defs;
58 struct hash_table *phis;
59 struct hash_table *vars;
60
61 LLVMValueRef main_function;
62 LLVMBasicBlockRef continue_block;
63 LLVMBasicBlockRef break_block;
64
65 LLVMValueRef outputs[RADEON_LLVM_MAX_OUTPUTS * 4];
66
67 int num_locals;
68 LLVMValueRef *locals;
69
70 struct nir_to_llvm_context *nctx; /* TODO get rid of this */
71 };
72
73 struct nir_to_llvm_context {
74 struct ac_llvm_context ac;
75 const struct ac_nir_compiler_options *options;
76 struct ac_shader_variant_info *shader_info;
77 struct ac_shader_abi abi;
78 struct ac_nir_context *nir;
79
80 unsigned max_workgroup_size;
81 LLVMContextRef context;
82 LLVMModuleRef module;
83 LLVMBuilderRef builder;
84 LLVMValueRef main_function;
85
86 struct hash_table *defs;
87 struct hash_table *phis;
88
89 LLVMValueRef descriptor_sets[AC_UD_MAX_SETS];
90 LLVMValueRef ring_offsets;
91 LLVMValueRef push_constants;
92 LLVMValueRef view_index;
93 LLVMValueRef num_work_groups;
94 LLVMValueRef workgroup_ids[3];
95 LLVMValueRef local_invocation_ids;
96 LLVMValueRef tg_size;
97
98 LLVMValueRef vertex_buffers;
99 LLVMValueRef rel_auto_id;
100 LLVMValueRef vs_prim_id;
101 LLVMValueRef ls_out_layout;
102 LLVMValueRef es2gs_offset;
103
104 LLVMValueRef tcs_offchip_layout;
105 LLVMValueRef tcs_out_offsets;
106 LLVMValueRef tcs_out_layout;
107 LLVMValueRef tcs_in_layout;
108 LLVMValueRef oc_lds;
109 LLVMValueRef merged_wave_info;
110 LLVMValueRef tess_factor_offset;
111 LLVMValueRef tes_rel_patch_id;
112 LLVMValueRef tes_u;
113 LLVMValueRef tes_v;
114
115 LLVMValueRef gsvs_ring_stride;
116 LLVMValueRef gsvs_num_entries;
117 LLVMValueRef gs2vs_offset;
118 LLVMValueRef gs_wave_id;
119 LLVMValueRef gs_vtx_offset[6];
120
121 LLVMValueRef esgs_ring;
122 LLVMValueRef gsvs_ring;
123 LLVMValueRef hs_ring_tess_offchip;
124 LLVMValueRef hs_ring_tess_factor;
125
126 LLVMValueRef prim_mask;
127 LLVMValueRef sample_pos_offset;
128 LLVMValueRef persp_sample, persp_center, persp_centroid;
129 LLVMValueRef linear_sample, linear_center, linear_centroid;
130
131 gl_shader_stage stage;
132
133 LLVMValueRef inputs[RADEON_LLVM_MAX_INPUTS * 4];
134
135 uint64_t input_mask;
136 uint64_t output_mask;
137 uint8_t num_output_clips;
138 uint8_t num_output_culls;
139
140 bool is_gs_copy_shader;
141 LLVMValueRef gs_next_vertex;
142 unsigned gs_max_out_vertices;
143
144 unsigned tes_primitive_mode;
145 uint64_t tess_outputs_written;
146 uint64_t tess_patch_outputs_written;
147
148 uint32_t tcs_patch_outputs_read;
149 uint64_t tcs_outputs_read;
150 };
151
152 static inline struct nir_to_llvm_context *
153 nir_to_llvm_context_from_abi(struct ac_shader_abi *abi)
154 {
155 struct nir_to_llvm_context *ctx = NULL;
156 return container_of(abi, ctx, abi);
157 }
158
159 static LLVMTypeRef
160 nir2llvmtype(struct ac_nir_context *ctx,
161 const struct glsl_type *type)
162 {
163 switch (glsl_get_base_type(glsl_without_array(type))) {
164 case GLSL_TYPE_UINT:
165 case GLSL_TYPE_INT:
166 return ctx->ac.i32;
167 case GLSL_TYPE_UINT64:
168 case GLSL_TYPE_INT64:
169 return ctx->ac.i64;
170 case GLSL_TYPE_DOUBLE:
171 return ctx->ac.f64;
172 case GLSL_TYPE_FLOAT:
173 return ctx->ac.f32;
174 default:
175 assert(!"Unsupported type in nir2llvmtype()");
176 break;
177 }
178 return 0;
179 }
180
181 static LLVMValueRef get_sampler_desc(struct ac_nir_context *ctx,
182 const nir_deref_var *deref,
183 enum ac_descriptor_type desc_type,
184 const nir_tex_instr *instr,
185 bool image, bool write);
186
187 static unsigned radeon_llvm_reg_index_soa(unsigned index, unsigned chan)
188 {
189 return (index * 4) + chan;
190 }
191
192 static unsigned shader_io_get_unique_index(gl_varying_slot slot)
193 {
194 /* handle patch indices separate */
195 if (slot == VARYING_SLOT_TESS_LEVEL_OUTER)
196 return 0;
197 if (slot == VARYING_SLOT_TESS_LEVEL_INNER)
198 return 1;
199 if (slot >= VARYING_SLOT_PATCH0 && slot <= VARYING_SLOT_TESS_MAX)
200 return 2 + (slot - VARYING_SLOT_PATCH0);
201
202 if (slot == VARYING_SLOT_POS)
203 return 0;
204 if (slot == VARYING_SLOT_PSIZ)
205 return 1;
206 if (slot == VARYING_SLOT_CLIP_DIST0)
207 return 2;
208 /* 3 is reserved for clip dist as well */
209 if (slot >= VARYING_SLOT_VAR0 && slot <= VARYING_SLOT_VAR31)
210 return 4 + (slot - VARYING_SLOT_VAR0);
211 unreachable("illegal slot in get unique index\n");
212 }
213
214 static void set_llvm_calling_convention(LLVMValueRef func,
215 gl_shader_stage stage)
216 {
217 enum radeon_llvm_calling_convention calling_conv;
218
219 switch (stage) {
220 case MESA_SHADER_VERTEX:
221 case MESA_SHADER_TESS_EVAL:
222 calling_conv = RADEON_LLVM_AMDGPU_VS;
223 break;
224 case MESA_SHADER_GEOMETRY:
225 calling_conv = RADEON_LLVM_AMDGPU_GS;
226 break;
227 case MESA_SHADER_TESS_CTRL:
228 calling_conv = HAVE_LLVM >= 0x0500 ? RADEON_LLVM_AMDGPU_HS : RADEON_LLVM_AMDGPU_VS;
229 break;
230 case MESA_SHADER_FRAGMENT:
231 calling_conv = RADEON_LLVM_AMDGPU_PS;
232 break;
233 case MESA_SHADER_COMPUTE:
234 calling_conv = RADEON_LLVM_AMDGPU_CS;
235 break;
236 default:
237 unreachable("Unhandle shader type");
238 }
239
240 LLVMSetFunctionCallConv(func, calling_conv);
241 }
242
243 #define MAX_ARGS 23
244 struct arg_info {
245 LLVMTypeRef types[MAX_ARGS];
246 LLVMValueRef *assign[MAX_ARGS];
247 unsigned array_params_mask;
248 uint8_t count;
249 uint8_t sgpr_count;
250 uint8_t num_sgprs_used;
251 uint8_t num_vgprs_used;
252 };
253
254 enum ac_arg_regfile {
255 ARG_SGPR,
256 ARG_VGPR,
257 };
258
259 static void
260 add_arg(struct arg_info *info, enum ac_arg_regfile regfile, LLVMTypeRef type,
261 LLVMValueRef *param_ptr)
262 {
263 assert(info->count < MAX_ARGS);
264
265 info->assign[info->count] = param_ptr;
266 info->types[info->count] = type;
267 info->count++;
268
269 if (regfile == ARG_SGPR) {
270 info->num_sgprs_used += ac_get_type_size(type) / 4;
271 info->sgpr_count++;
272 } else {
273 assert(regfile == ARG_VGPR);
274 info->num_vgprs_used += ac_get_type_size(type) / 4;
275 }
276 }
277
278 static inline void
279 add_array_arg(struct arg_info *info, LLVMTypeRef type, LLVMValueRef *param_ptr)
280 {
281 info->array_params_mask |= (1 << info->count);
282 add_arg(info, ARG_SGPR, type, param_ptr);
283 }
284
285 static void assign_arguments(LLVMValueRef main_function,
286 struct arg_info *info)
287 {
288 unsigned i;
289 for (i = 0; i < info->count; i++) {
290 if (info->assign[i])
291 *info->assign[i] = LLVMGetParam(main_function, i);
292 }
293 }
294
295 static LLVMValueRef
296 create_llvm_function(LLVMContextRef ctx, LLVMModuleRef module,
297 LLVMBuilderRef builder, LLVMTypeRef *return_types,
298 unsigned num_return_elems,
299 struct arg_info *args,
300 unsigned max_workgroup_size,
301 bool unsafe_math)
302 {
303 LLVMTypeRef main_function_type, ret_type;
304 LLVMBasicBlockRef main_function_body;
305
306 if (num_return_elems)
307 ret_type = LLVMStructTypeInContext(ctx, return_types,
308 num_return_elems, true);
309 else
310 ret_type = LLVMVoidTypeInContext(ctx);
311
312 /* Setup the function */
313 main_function_type =
314 LLVMFunctionType(ret_type, args->types, args->count, 0);
315 LLVMValueRef main_function =
316 LLVMAddFunction(module, "main", main_function_type);
317 main_function_body =
318 LLVMAppendBasicBlockInContext(ctx, main_function, "main_body");
319 LLVMPositionBuilderAtEnd(builder, main_function_body);
320
321 LLVMSetFunctionCallConv(main_function, RADEON_LLVM_AMDGPU_CS);
322 for (unsigned i = 0; i < args->sgpr_count; ++i) {
323 ac_add_function_attr(ctx, main_function, i + 1, AC_FUNC_ATTR_INREG);
324
325 if (args->array_params_mask & (1 << i)) {
326 LLVMValueRef P = LLVMGetParam(main_function, i);
327 ac_add_function_attr(ctx, main_function, i + 1, AC_FUNC_ATTR_NOALIAS);
328 ac_add_attr_dereferenceable(P, UINT64_MAX);
329 }
330 }
331
332 if (max_workgroup_size) {
333 ac_llvm_add_target_dep_function_attr(main_function,
334 "amdgpu-max-work-group-size",
335 max_workgroup_size);
336 }
337 if (unsafe_math) {
338 /* These were copied from some LLVM test. */
339 LLVMAddTargetDependentFunctionAttr(main_function,
340 "less-precise-fpmad",
341 "true");
342 LLVMAddTargetDependentFunctionAttr(main_function,
343 "no-infs-fp-math",
344 "true");
345 LLVMAddTargetDependentFunctionAttr(main_function,
346 "no-nans-fp-math",
347 "true");
348 LLVMAddTargetDependentFunctionAttr(main_function,
349 "unsafe-fp-math",
350 "true");
351 LLVMAddTargetDependentFunctionAttr(main_function,
352 "no-signed-zeros-fp-math",
353 "true");
354 }
355 return main_function;
356 }
357
358 static LLVMTypeRef const_array(LLVMTypeRef elem_type, int num_elements)
359 {
360 return LLVMPointerType(LLVMArrayType(elem_type, num_elements),
361 AC_CONST_ADDR_SPACE);
362 }
363
364 static int get_elem_bits(struct ac_llvm_context *ctx, LLVMTypeRef type)
365 {
366 if (LLVMGetTypeKind(type) == LLVMVectorTypeKind)
367 type = LLVMGetElementType(type);
368
369 if (LLVMGetTypeKind(type) == LLVMIntegerTypeKind)
370 return LLVMGetIntTypeWidth(type);
371
372 if (type == ctx->f16)
373 return 16;
374 if (type == ctx->f32)
375 return 32;
376 if (type == ctx->f64)
377 return 64;
378
379 unreachable("Unhandled type kind in get_elem_bits");
380 }
381
382 static LLVMValueRef unpack_param(struct ac_llvm_context *ctx,
383 LLVMValueRef param, unsigned rshift,
384 unsigned bitwidth)
385 {
386 LLVMValueRef value = param;
387 if (rshift)
388 value = LLVMBuildLShr(ctx->builder, value,
389 LLVMConstInt(ctx->i32, rshift, false), "");
390
391 if (rshift + bitwidth < 32) {
392 unsigned mask = (1 << bitwidth) - 1;
393 value = LLVMBuildAnd(ctx->builder, value,
394 LLVMConstInt(ctx->i32, mask, false), "");
395 }
396 return value;
397 }
398
399 static LLVMValueRef get_rel_patch_id(struct nir_to_llvm_context *ctx)
400 {
401 switch (ctx->stage) {
402 case MESA_SHADER_TESS_CTRL:
403 return unpack_param(&ctx->ac, ctx->abi.tcs_rel_ids, 0, 8);
404 case MESA_SHADER_TESS_EVAL:
405 return ctx->tes_rel_patch_id;
406 break;
407 default:
408 unreachable("Illegal stage");
409 }
410 }
411
412 /* Tessellation shaders pass outputs to the next shader using LDS.
413 *
414 * LS outputs = TCS inputs
415 * TCS outputs = TES inputs
416 *
417 * The LDS layout is:
418 * - TCS inputs for patch 0
419 * - TCS inputs for patch 1
420 * - TCS inputs for patch 2 = get_tcs_in_current_patch_offset (if RelPatchID==2)
421 * - ...
422 * - TCS outputs for patch 0 = get_tcs_out_patch0_offset
423 * - Per-patch TCS outputs for patch 0 = get_tcs_out_patch0_patch_data_offset
424 * - TCS outputs for patch 1
425 * - Per-patch TCS outputs for patch 1
426 * - TCS outputs for patch 2 = get_tcs_out_current_patch_offset (if RelPatchID==2)
427 * - Per-patch TCS outputs for patch 2 = get_tcs_out_current_patch_data_offset (if RelPatchID==2)
428 * - ...
429 *
430 * All three shaders VS(LS), TCS, TES share the same LDS space.
431 */
432 static LLVMValueRef
433 get_tcs_in_patch_stride(struct nir_to_llvm_context *ctx)
434 {
435 if (ctx->stage == MESA_SHADER_VERTEX)
436 return unpack_param(&ctx->ac, ctx->ls_out_layout, 0, 13);
437 else if (ctx->stage == MESA_SHADER_TESS_CTRL)
438 return unpack_param(&ctx->ac, ctx->tcs_in_layout, 0, 13);
439 else {
440 assert(0);
441 return NULL;
442 }
443 }
444
445 static LLVMValueRef
446 get_tcs_out_patch_stride(struct nir_to_llvm_context *ctx)
447 {
448 return unpack_param(&ctx->ac, ctx->tcs_out_layout, 0, 13);
449 }
450
451 static LLVMValueRef
452 get_tcs_out_patch0_offset(struct nir_to_llvm_context *ctx)
453 {
454 return LLVMBuildMul(ctx->builder,
455 unpack_param(&ctx->ac, ctx->tcs_out_offsets, 0, 16),
456 LLVMConstInt(ctx->ac.i32, 4, false), "");
457 }
458
459 static LLVMValueRef
460 get_tcs_out_patch0_patch_data_offset(struct nir_to_llvm_context *ctx)
461 {
462 return LLVMBuildMul(ctx->builder,
463 unpack_param(&ctx->ac, ctx->tcs_out_offsets, 16, 16),
464 LLVMConstInt(ctx->ac.i32, 4, false), "");
465 }
466
467 static LLVMValueRef
468 get_tcs_in_current_patch_offset(struct nir_to_llvm_context *ctx)
469 {
470 LLVMValueRef patch_stride = get_tcs_in_patch_stride(ctx);
471 LLVMValueRef rel_patch_id = get_rel_patch_id(ctx);
472
473 return LLVMBuildMul(ctx->builder, patch_stride, rel_patch_id, "");
474 }
475
476 static LLVMValueRef
477 get_tcs_out_current_patch_offset(struct nir_to_llvm_context *ctx)
478 {
479 LLVMValueRef patch0_offset = get_tcs_out_patch0_offset(ctx);
480 LLVMValueRef patch_stride = get_tcs_out_patch_stride(ctx);
481 LLVMValueRef rel_patch_id = get_rel_patch_id(ctx);
482
483 return LLVMBuildAdd(ctx->builder, patch0_offset,
484 LLVMBuildMul(ctx->builder, patch_stride,
485 rel_patch_id, ""),
486 "");
487 }
488
489 static LLVMValueRef
490 get_tcs_out_current_patch_data_offset(struct nir_to_llvm_context *ctx)
491 {
492 LLVMValueRef patch0_patch_data_offset =
493 get_tcs_out_patch0_patch_data_offset(ctx);
494 LLVMValueRef patch_stride = get_tcs_out_patch_stride(ctx);
495 LLVMValueRef rel_patch_id = get_rel_patch_id(ctx);
496
497 return LLVMBuildAdd(ctx->builder, patch0_patch_data_offset,
498 LLVMBuildMul(ctx->builder, patch_stride,
499 rel_patch_id, ""),
500 "");
501 }
502
503 static void
504 set_loc(struct ac_userdata_info *ud_info, uint8_t *sgpr_idx, uint8_t num_sgprs,
505 uint32_t indirect_offset)
506 {
507 ud_info->sgpr_idx = *sgpr_idx;
508 ud_info->num_sgprs = num_sgprs;
509 ud_info->indirect = indirect_offset > 0;
510 ud_info->indirect_offset = indirect_offset;
511 *sgpr_idx += num_sgprs;
512 }
513
514 static void
515 set_loc_shader(struct nir_to_llvm_context *ctx, int idx, uint8_t *sgpr_idx,
516 uint8_t num_sgprs)
517 {
518 struct ac_userdata_info *ud_info =
519 &ctx->shader_info->user_sgprs_locs.shader_data[idx];
520 assert(ud_info);
521
522 set_loc(ud_info, sgpr_idx, num_sgprs, 0);
523 }
524
525 static void
526 set_loc_desc(struct nir_to_llvm_context *ctx, int idx, uint8_t *sgpr_idx,
527 uint32_t indirect_offset)
528 {
529 struct ac_userdata_info *ud_info =
530 &ctx->shader_info->user_sgprs_locs.descriptor_sets[idx];
531 assert(ud_info);
532
533 set_loc(ud_info, sgpr_idx, 2, indirect_offset);
534 }
535
536 struct user_sgpr_info {
537 bool need_ring_offsets;
538 uint8_t sgpr_count;
539 bool indirect_all_descriptor_sets;
540 };
541
542 static bool needs_view_index_sgpr(struct nir_to_llvm_context *ctx,
543 gl_shader_stage stage)
544 {
545 switch (stage) {
546 case MESA_SHADER_VERTEX:
547 if (ctx->shader_info->info.needs_multiview_view_index ||
548 (!ctx->options->key.vs.as_es && !ctx->options->key.vs.as_ls && ctx->options->key.has_multiview_view_index))
549 return true;
550 break;
551 case MESA_SHADER_TESS_EVAL:
552 if (ctx->shader_info->info.needs_multiview_view_index || (!ctx->options->key.tes.as_es && ctx->options->key.has_multiview_view_index))
553 return true;
554 break;
555 case MESA_SHADER_GEOMETRY:
556 case MESA_SHADER_TESS_CTRL:
557 if (ctx->shader_info->info.needs_multiview_view_index)
558 return true;
559 break;
560 default:
561 break;
562 }
563 return false;
564 }
565
566 static void allocate_user_sgprs(struct nir_to_llvm_context *ctx,
567 gl_shader_stage stage,
568 bool needs_view_index,
569 struct user_sgpr_info *user_sgpr_info)
570 {
571 memset(user_sgpr_info, 0, sizeof(struct user_sgpr_info));
572
573 /* until we sort out scratch/global buffers always assign ring offsets for gs/vs/es */
574 if (stage == MESA_SHADER_GEOMETRY ||
575 stage == MESA_SHADER_VERTEX ||
576 stage == MESA_SHADER_TESS_CTRL ||
577 stage == MESA_SHADER_TESS_EVAL ||
578 ctx->is_gs_copy_shader)
579 user_sgpr_info->need_ring_offsets = true;
580
581 if (stage == MESA_SHADER_FRAGMENT &&
582 ctx->shader_info->info.ps.needs_sample_positions)
583 user_sgpr_info->need_ring_offsets = true;
584
585 /* 2 user sgprs will nearly always be allocated for scratch/rings */
586 if (ctx->options->supports_spill || user_sgpr_info->need_ring_offsets) {
587 user_sgpr_info->sgpr_count += 2;
588 }
589
590 /* FIXME: fix the number of user sgprs for merged shaders on GFX9 */
591 switch (stage) {
592 case MESA_SHADER_COMPUTE:
593 if (ctx->shader_info->info.cs.uses_grid_size)
594 user_sgpr_info->sgpr_count += 3;
595 break;
596 case MESA_SHADER_FRAGMENT:
597 user_sgpr_info->sgpr_count += ctx->shader_info->info.ps.needs_sample_positions;
598 break;
599 case MESA_SHADER_VERTEX:
600 if (!ctx->is_gs_copy_shader) {
601 user_sgpr_info->sgpr_count += ctx->shader_info->info.vs.has_vertex_buffers ? 2 : 0;
602 if (ctx->shader_info->info.vs.needs_draw_id) {
603 user_sgpr_info->sgpr_count += 3;
604 } else {
605 user_sgpr_info->sgpr_count += 2;
606 }
607 }
608 if (ctx->options->key.vs.as_ls)
609 user_sgpr_info->sgpr_count++;
610 break;
611 case MESA_SHADER_TESS_CTRL:
612 user_sgpr_info->sgpr_count += 4;
613 break;
614 case MESA_SHADER_TESS_EVAL:
615 user_sgpr_info->sgpr_count += 1;
616 break;
617 case MESA_SHADER_GEOMETRY:
618 user_sgpr_info->sgpr_count += 2;
619 break;
620 default:
621 break;
622 }
623
624 if (needs_view_index)
625 user_sgpr_info->sgpr_count++;
626
627 if (ctx->shader_info->info.loads_push_constants)
628 user_sgpr_info->sgpr_count += 2;
629
630 uint32_t available_sgprs = ctx->options->chip_class >= GFX9 ? 32 : 16;
631 uint32_t remaining_sgprs = available_sgprs - user_sgpr_info->sgpr_count;
632
633 if (remaining_sgprs / 2 < util_bitcount(ctx->shader_info->info.desc_set_used_mask)) {
634 user_sgpr_info->sgpr_count += 2;
635 user_sgpr_info->indirect_all_descriptor_sets = true;
636 } else {
637 user_sgpr_info->sgpr_count += util_bitcount(ctx->shader_info->info.desc_set_used_mask) * 2;
638 }
639 }
640
641 static void
642 declare_global_input_sgprs(struct nir_to_llvm_context *ctx,
643 gl_shader_stage stage,
644 bool has_previous_stage,
645 gl_shader_stage previous_stage,
646 const struct user_sgpr_info *user_sgpr_info,
647 struct arg_info *args,
648 LLVMValueRef *desc_sets)
649 {
650 LLVMTypeRef type = const_array(ctx->ac.i8, 1024 * 1024);
651 unsigned num_sets = ctx->options->layout ?
652 ctx->options->layout->num_sets : 0;
653 unsigned stage_mask = 1 << stage;
654
655 if (has_previous_stage)
656 stage_mask |= 1 << previous_stage;
657
658 /* 1 for each descriptor set */
659 if (!user_sgpr_info->indirect_all_descriptor_sets) {
660 for (unsigned i = 0; i < num_sets; ++i) {
661 if (ctx->options->layout->set[i].layout->shader_stages & stage_mask) {
662 add_array_arg(args, type,
663 &ctx->descriptor_sets[i]);
664 }
665 }
666 } else {
667 add_array_arg(args, const_array(type, 32), desc_sets);
668 }
669
670 if (ctx->shader_info->info.loads_push_constants) {
671 /* 1 for push constants and dynamic descriptors */
672 add_array_arg(args, type, &ctx->push_constants);
673 }
674 }
675
676 static void
677 declare_vs_specific_input_sgprs(struct nir_to_llvm_context *ctx,
678 gl_shader_stage stage,
679 bool has_previous_stage,
680 gl_shader_stage previous_stage,
681 struct arg_info *args)
682 {
683 if (!ctx->is_gs_copy_shader &&
684 (stage == MESA_SHADER_VERTEX ||
685 (has_previous_stage && previous_stage == MESA_SHADER_VERTEX))) {
686 if (ctx->shader_info->info.vs.has_vertex_buffers) {
687 add_arg(args, ARG_SGPR, const_array(ctx->ac.v4i32, 16),
688 &ctx->vertex_buffers);
689 }
690 add_arg(args, ARG_SGPR, ctx->ac.i32, &ctx->abi.base_vertex);
691 add_arg(args, ARG_SGPR, ctx->ac.i32, &ctx->abi.start_instance);
692 if (ctx->shader_info->info.vs.needs_draw_id) {
693 add_arg(args, ARG_SGPR, ctx->ac.i32, &ctx->abi.draw_id);
694 }
695 }
696 }
697
698 static void
699 declare_vs_input_vgprs(struct nir_to_llvm_context *ctx, struct arg_info *args)
700 {
701 add_arg(args, ARG_VGPR, ctx->ac.i32, &ctx->abi.vertex_id);
702 if (!ctx->is_gs_copy_shader) {
703 if (ctx->options->key.vs.as_ls) {
704 add_arg(args, ARG_VGPR, ctx->ac.i32, &ctx->rel_auto_id);
705 add_arg(args, ARG_VGPR, ctx->ac.i32, &ctx->abi.instance_id);
706 } else {
707 add_arg(args, ARG_VGPR, ctx->ac.i32, &ctx->abi.instance_id);
708 add_arg(args, ARG_VGPR, ctx->ac.i32, &ctx->vs_prim_id);
709 }
710 add_arg(args, ARG_VGPR, ctx->ac.i32, NULL); /* unused */
711 }
712 }
713
714 static void
715 declare_tes_input_vgprs(struct nir_to_llvm_context *ctx, struct arg_info *args)
716 {
717 add_arg(args, ARG_VGPR, ctx->ac.f32, &ctx->tes_u);
718 add_arg(args, ARG_VGPR, ctx->ac.f32, &ctx->tes_v);
719 add_arg(args, ARG_VGPR, ctx->ac.i32, &ctx->tes_rel_patch_id);
720 add_arg(args, ARG_VGPR, ctx->ac.i32, &ctx->abi.tes_patch_id);
721 }
722
723 static void
724 set_global_input_locs(struct nir_to_llvm_context *ctx, gl_shader_stage stage,
725 bool has_previous_stage, gl_shader_stage previous_stage,
726 const struct user_sgpr_info *user_sgpr_info,
727 LLVMValueRef desc_sets, uint8_t *user_sgpr_idx)
728 {
729 unsigned num_sets = ctx->options->layout ?
730 ctx->options->layout->num_sets : 0;
731 unsigned stage_mask = 1 << stage;
732
733 if (has_previous_stage)
734 stage_mask |= 1 << previous_stage;
735
736 if (!user_sgpr_info->indirect_all_descriptor_sets) {
737 for (unsigned i = 0; i < num_sets; ++i) {
738 if (ctx->options->layout->set[i].layout->shader_stages & stage_mask) {
739 set_loc_desc(ctx, i, user_sgpr_idx, 0);
740 } else
741 ctx->descriptor_sets[i] = NULL;
742 }
743 } else {
744 set_loc_shader(ctx, AC_UD_INDIRECT_DESCRIPTOR_SETS,
745 user_sgpr_idx, 2);
746
747 for (unsigned i = 0; i < num_sets; ++i) {
748 if (ctx->options->layout->set[i].layout->shader_stages & stage_mask) {
749 set_loc_desc(ctx, i, user_sgpr_idx, i * 8);
750 ctx->descriptor_sets[i] =
751 ac_build_load_to_sgpr(&ctx->ac,
752 desc_sets,
753 LLVMConstInt(ctx->ac.i32, i, false));
754
755 } else
756 ctx->descriptor_sets[i] = NULL;
757 }
758 ctx->shader_info->need_indirect_descriptor_sets = true;
759 }
760
761 if (ctx->shader_info->info.loads_push_constants) {
762 set_loc_shader(ctx, AC_UD_PUSH_CONSTANTS, user_sgpr_idx, 2);
763 }
764 }
765
766 static void
767 set_vs_specific_input_locs(struct nir_to_llvm_context *ctx,
768 gl_shader_stage stage, bool has_previous_stage,
769 gl_shader_stage previous_stage,
770 uint8_t *user_sgpr_idx)
771 {
772 if (!ctx->is_gs_copy_shader &&
773 (stage == MESA_SHADER_VERTEX ||
774 (has_previous_stage && previous_stage == MESA_SHADER_VERTEX))) {
775 if (ctx->shader_info->info.vs.has_vertex_buffers) {
776 set_loc_shader(ctx, AC_UD_VS_VERTEX_BUFFERS,
777 user_sgpr_idx, 2);
778 }
779
780 unsigned vs_num = 2;
781 if (ctx->shader_info->info.vs.needs_draw_id)
782 vs_num++;
783
784 set_loc_shader(ctx, AC_UD_VS_BASE_VERTEX_START_INSTANCE,
785 user_sgpr_idx, vs_num);
786 }
787 }
788
789 static void create_function(struct nir_to_llvm_context *ctx,
790 gl_shader_stage stage,
791 bool has_previous_stage,
792 gl_shader_stage previous_stage)
793 {
794 uint8_t user_sgpr_idx;
795 struct user_sgpr_info user_sgpr_info;
796 struct arg_info args = {};
797 LLVMValueRef desc_sets;
798 bool needs_view_index = needs_view_index_sgpr(ctx, stage);
799 allocate_user_sgprs(ctx, stage, needs_view_index, &user_sgpr_info);
800
801 if (user_sgpr_info.need_ring_offsets && !ctx->options->supports_spill) {
802 add_arg(&args, ARG_SGPR, const_array(ctx->ac.v4i32, 16),
803 &ctx->ring_offsets);
804 }
805
806 switch (stage) {
807 case MESA_SHADER_COMPUTE:
808 declare_global_input_sgprs(ctx, stage, has_previous_stage,
809 previous_stage, &user_sgpr_info,
810 &args, &desc_sets);
811
812 if (ctx->shader_info->info.cs.uses_grid_size) {
813 add_arg(&args, ARG_SGPR, ctx->ac.v3i32,
814 &ctx->num_work_groups);
815 }
816
817 for (int i = 0; i < 3; i++) {
818 ctx->workgroup_ids[i] = NULL;
819 if (ctx->shader_info->info.cs.uses_block_id[i]) {
820 add_arg(&args, ARG_SGPR, ctx->ac.i32,
821 &ctx->workgroup_ids[i]);
822 }
823 }
824
825 if (ctx->shader_info->info.cs.uses_local_invocation_idx)
826 add_arg(&args, ARG_SGPR, ctx->ac.i32, &ctx->tg_size);
827 add_arg(&args, ARG_VGPR, ctx->ac.v3i32,
828 &ctx->local_invocation_ids);
829 break;
830 case MESA_SHADER_VERTEX:
831 declare_global_input_sgprs(ctx, stage, has_previous_stage,
832 previous_stage, &user_sgpr_info,
833 &args, &desc_sets);
834 declare_vs_specific_input_sgprs(ctx, stage, has_previous_stage,
835 previous_stage, &args);
836
837 if (needs_view_index)
838 add_arg(&args, ARG_SGPR, ctx->ac.i32, &ctx->view_index);
839 if (ctx->options->key.vs.as_es)
840 add_arg(&args, ARG_SGPR, ctx->ac.i32,
841 &ctx->es2gs_offset);
842 else if (ctx->options->key.vs.as_ls)
843 add_arg(&args, ARG_SGPR, ctx->ac.i32,
844 &ctx->ls_out_layout);
845
846 declare_vs_input_vgprs(ctx, &args);
847 break;
848 case MESA_SHADER_TESS_CTRL:
849 if (has_previous_stage) {
850 // First 6 system regs
851 add_arg(&args, ARG_SGPR, ctx->ac.i32, &ctx->oc_lds);
852 add_arg(&args, ARG_SGPR, ctx->ac.i32,
853 &ctx->merged_wave_info);
854 add_arg(&args, ARG_SGPR, ctx->ac.i32,
855 &ctx->tess_factor_offset);
856
857 add_arg(&args, ARG_SGPR, ctx->ac.i32, NULL); // scratch offset
858 add_arg(&args, ARG_SGPR, ctx->ac.i32, NULL); // unknown
859 add_arg(&args, ARG_SGPR, ctx->ac.i32, NULL); // unknown
860
861 declare_global_input_sgprs(ctx, stage,
862 has_previous_stage,
863 previous_stage,
864 &user_sgpr_info, &args,
865 &desc_sets);
866 declare_vs_specific_input_sgprs(ctx, stage,
867 has_previous_stage,
868 previous_stage, &args);
869
870 add_arg(&args, ARG_SGPR, ctx->ac.i32,
871 &ctx->ls_out_layout);
872
873 add_arg(&args, ARG_SGPR, ctx->ac.i32,
874 &ctx->tcs_offchip_layout);
875 add_arg(&args, ARG_SGPR, ctx->ac.i32,
876 &ctx->tcs_out_offsets);
877 add_arg(&args, ARG_SGPR, ctx->ac.i32,
878 &ctx->tcs_out_layout);
879 add_arg(&args, ARG_SGPR, ctx->ac.i32,
880 &ctx->tcs_in_layout);
881 if (needs_view_index)
882 add_arg(&args, ARG_SGPR, ctx->ac.i32,
883 &ctx->view_index);
884
885 add_arg(&args, ARG_VGPR, ctx->ac.i32,
886 &ctx->abi.tcs_patch_id);
887 add_arg(&args, ARG_VGPR, ctx->ac.i32,
888 &ctx->abi.tcs_rel_ids);
889
890 declare_vs_input_vgprs(ctx, &args);
891 } else {
892 declare_global_input_sgprs(ctx, stage,
893 has_previous_stage,
894 previous_stage,
895 &user_sgpr_info, &args,
896 &desc_sets);
897
898 add_arg(&args, ARG_SGPR, ctx->ac.i32,
899 &ctx->tcs_offchip_layout);
900 add_arg(&args, ARG_SGPR, ctx->ac.i32,
901 &ctx->tcs_out_offsets);
902 add_arg(&args, ARG_SGPR, ctx->ac.i32,
903 &ctx->tcs_out_layout);
904 add_arg(&args, ARG_SGPR, ctx->ac.i32,
905 &ctx->tcs_in_layout);
906 if (needs_view_index)
907 add_arg(&args, ARG_SGPR, ctx->ac.i32,
908 &ctx->view_index);
909
910 add_arg(&args, ARG_SGPR, ctx->ac.i32, &ctx->oc_lds);
911 add_arg(&args, ARG_SGPR, ctx->ac.i32,
912 &ctx->tess_factor_offset);
913 add_arg(&args, ARG_VGPR, ctx->ac.i32,
914 &ctx->abi.tcs_patch_id);
915 add_arg(&args, ARG_VGPR, ctx->ac.i32,
916 &ctx->abi.tcs_rel_ids);
917 }
918 break;
919 case MESA_SHADER_TESS_EVAL:
920 declare_global_input_sgprs(ctx, stage, has_previous_stage,
921 previous_stage, &user_sgpr_info,
922 &args, &desc_sets);
923
924 add_arg(&args, ARG_SGPR, ctx->ac.i32, &ctx->tcs_offchip_layout);
925 if (needs_view_index)
926 add_arg(&args, ARG_SGPR, ctx->ac.i32, &ctx->view_index);
927
928 if (ctx->options->key.tes.as_es) {
929 add_arg(&args, ARG_SGPR, ctx->ac.i32, &ctx->oc_lds);
930 add_arg(&args, ARG_SGPR, ctx->ac.i32, NULL);
931 add_arg(&args, ARG_SGPR, ctx->ac.i32,
932 &ctx->es2gs_offset);
933 } else {
934 add_arg(&args, ARG_SGPR, ctx->ac.i32, NULL);
935 add_arg(&args, ARG_SGPR, ctx->ac.i32, &ctx->oc_lds);
936 }
937 declare_tes_input_vgprs(ctx, &args);
938 break;
939 case MESA_SHADER_GEOMETRY:
940 if (has_previous_stage) {
941 // First 6 system regs
942 add_arg(&args, ARG_SGPR, ctx->ac.i32,
943 &ctx->gs2vs_offset);
944 add_arg(&args, ARG_SGPR, ctx->ac.i32,
945 &ctx->merged_wave_info);
946 add_arg(&args, ARG_SGPR, ctx->ac.i32, &ctx->oc_lds);
947
948 add_arg(&args, ARG_SGPR, ctx->ac.i32, NULL); // scratch offset
949 add_arg(&args, ARG_SGPR, ctx->ac.i32, NULL); // unknown
950 add_arg(&args, ARG_SGPR, ctx->ac.i32, NULL); // unknown
951
952 declare_global_input_sgprs(ctx, stage,
953 has_previous_stage,
954 previous_stage,
955 &user_sgpr_info, &args,
956 &desc_sets);
957
958 if (previous_stage == MESA_SHADER_TESS_EVAL) {
959 add_arg(&args, ARG_SGPR, ctx->ac.i32,
960 &ctx->tcs_offchip_layout);
961 } else {
962 declare_vs_specific_input_sgprs(ctx, stage,
963 has_previous_stage,
964 previous_stage,
965 &args);
966 }
967
968 add_arg(&args, ARG_SGPR, ctx->ac.i32,
969 &ctx->gsvs_ring_stride);
970 add_arg(&args, ARG_SGPR, ctx->ac.i32,
971 &ctx->gsvs_num_entries);
972 if (needs_view_index)
973 add_arg(&args, ARG_SGPR, ctx->ac.i32,
974 &ctx->view_index);
975
976 add_arg(&args, ARG_VGPR, ctx->ac.i32,
977 &ctx->gs_vtx_offset[0]);
978 add_arg(&args, ARG_VGPR, ctx->ac.i32,
979 &ctx->gs_vtx_offset[2]);
980 add_arg(&args, ARG_VGPR, ctx->ac.i32,
981 &ctx->abi.gs_prim_id);
982 add_arg(&args, ARG_VGPR, ctx->ac.i32,
983 &ctx->abi.gs_invocation_id);
984 add_arg(&args, ARG_VGPR, ctx->ac.i32,
985 &ctx->gs_vtx_offset[4]);
986
987 if (previous_stage == MESA_SHADER_VERTEX) {
988 declare_vs_input_vgprs(ctx, &args);
989 } else {
990 declare_tes_input_vgprs(ctx, &args);
991 }
992 } else {
993 declare_global_input_sgprs(ctx, stage,
994 has_previous_stage,
995 previous_stage,
996 &user_sgpr_info, &args,
997 &desc_sets);
998
999 add_arg(&args, ARG_SGPR, ctx->ac.i32,
1000 &ctx->gsvs_ring_stride);
1001 add_arg(&args, ARG_SGPR, ctx->ac.i32,
1002 &ctx->gsvs_num_entries);
1003 if (needs_view_index)
1004 add_arg(&args, ARG_SGPR, ctx->ac.i32,
1005 &ctx->view_index);
1006
1007 add_arg(&args, ARG_SGPR, ctx->ac.i32, &ctx->gs2vs_offset);
1008 add_arg(&args, ARG_SGPR, ctx->ac.i32, &ctx->gs_wave_id);
1009 add_arg(&args, ARG_VGPR, ctx->ac.i32,
1010 &ctx->gs_vtx_offset[0]);
1011 add_arg(&args, ARG_VGPR, ctx->ac.i32,
1012 &ctx->gs_vtx_offset[1]);
1013 add_arg(&args, ARG_VGPR, ctx->ac.i32,
1014 &ctx->abi.gs_prim_id);
1015 add_arg(&args, ARG_VGPR, ctx->ac.i32,
1016 &ctx->gs_vtx_offset[2]);
1017 add_arg(&args, ARG_VGPR, ctx->ac.i32,
1018 &ctx->gs_vtx_offset[3]);
1019 add_arg(&args, ARG_VGPR, ctx->ac.i32,
1020 &ctx->gs_vtx_offset[4]);
1021 add_arg(&args, ARG_VGPR, ctx->ac.i32,
1022 &ctx->gs_vtx_offset[5]);
1023 add_arg(&args, ARG_VGPR, ctx->ac.i32,
1024 &ctx->abi.gs_invocation_id);
1025 }
1026 break;
1027 case MESA_SHADER_FRAGMENT:
1028 declare_global_input_sgprs(ctx, stage, has_previous_stage,
1029 previous_stage, &user_sgpr_info,
1030 &args, &desc_sets);
1031
1032 if (ctx->shader_info->info.ps.needs_sample_positions)
1033 add_arg(&args, ARG_SGPR, ctx->ac.i32,
1034 &ctx->sample_pos_offset);
1035
1036 add_arg(&args, ARG_SGPR, ctx->ac.i32, &ctx->prim_mask);
1037 add_arg(&args, ARG_VGPR, ctx->ac.v2i32, &ctx->persp_sample);
1038 add_arg(&args, ARG_VGPR, ctx->ac.v2i32, &ctx->persp_center);
1039 add_arg(&args, ARG_VGPR, ctx->ac.v2i32, &ctx->persp_centroid);
1040 add_arg(&args, ARG_VGPR, ctx->ac.v3i32, NULL); /* persp pull model */
1041 add_arg(&args, ARG_VGPR, ctx->ac.v2i32, &ctx->linear_sample);
1042 add_arg(&args, ARG_VGPR, ctx->ac.v2i32, &ctx->linear_center);
1043 add_arg(&args, ARG_VGPR, ctx->ac.v2i32, &ctx->linear_centroid);
1044 add_arg(&args, ARG_VGPR, ctx->ac.f32, NULL); /* line stipple tex */
1045 add_arg(&args, ARG_VGPR, ctx->ac.f32, &ctx->abi.frag_pos[0]);
1046 add_arg(&args, ARG_VGPR, ctx->ac.f32, &ctx->abi.frag_pos[1]);
1047 add_arg(&args, ARG_VGPR, ctx->ac.f32, &ctx->abi.frag_pos[2]);
1048 add_arg(&args, ARG_VGPR, ctx->ac.f32, &ctx->abi.frag_pos[3]);
1049 add_arg(&args, ARG_VGPR, ctx->ac.i32, &ctx->abi.front_face);
1050 add_arg(&args, ARG_VGPR, ctx->ac.i32, &ctx->abi.ancillary);
1051 add_arg(&args, ARG_VGPR, ctx->ac.i32, &ctx->abi.sample_coverage);
1052 add_arg(&args, ARG_VGPR, ctx->ac.i32, NULL); /* fixed pt */
1053 break;
1054 default:
1055 unreachable("Shader stage not implemented");
1056 }
1057
1058 ctx->main_function = create_llvm_function(
1059 ctx->context, ctx->module, ctx->builder, NULL, 0, &args,
1060 ctx->max_workgroup_size,
1061 ctx->options->unsafe_math);
1062 set_llvm_calling_convention(ctx->main_function, stage);
1063
1064
1065 ctx->shader_info->num_input_vgprs = 0;
1066 ctx->shader_info->num_input_sgprs = ctx->options->supports_spill ? 2 : 0;
1067
1068 ctx->shader_info->num_input_sgprs += args.num_sgprs_used;
1069
1070 if (ctx->stage != MESA_SHADER_FRAGMENT)
1071 ctx->shader_info->num_input_vgprs = args.num_vgprs_used;
1072
1073 assign_arguments(ctx->main_function, &args);
1074
1075 user_sgpr_idx = 0;
1076
1077 if (ctx->options->supports_spill || user_sgpr_info.need_ring_offsets) {
1078 set_loc_shader(ctx, AC_UD_SCRATCH_RING_OFFSETS,
1079 &user_sgpr_idx, 2);
1080 if (ctx->options->supports_spill) {
1081 ctx->ring_offsets = ac_build_intrinsic(&ctx->ac, "llvm.amdgcn.implicit.buffer.ptr",
1082 LLVMPointerType(ctx->ac.i8, AC_CONST_ADDR_SPACE),
1083 NULL, 0, AC_FUNC_ATTR_READNONE);
1084 ctx->ring_offsets = LLVMBuildBitCast(ctx->builder, ctx->ring_offsets,
1085 const_array(ctx->ac.v4i32, 16), "");
1086 }
1087 }
1088
1089 /* For merged shaders the user SGPRs start at 8, with 8 system SGPRs in front (including
1090 * the rw_buffers at s0/s1. With user SGPR0 = s8, lets restart the count from 0 */
1091 if (has_previous_stage)
1092 user_sgpr_idx = 0;
1093
1094 set_global_input_locs(ctx, stage, has_previous_stage, previous_stage,
1095 &user_sgpr_info, desc_sets, &user_sgpr_idx);
1096
1097 switch (stage) {
1098 case MESA_SHADER_COMPUTE:
1099 if (ctx->shader_info->info.cs.uses_grid_size) {
1100 set_loc_shader(ctx, AC_UD_CS_GRID_SIZE,
1101 &user_sgpr_idx, 3);
1102 }
1103 break;
1104 case MESA_SHADER_VERTEX:
1105 set_vs_specific_input_locs(ctx, stage, has_previous_stage,
1106 previous_stage, &user_sgpr_idx);
1107 if (ctx->view_index)
1108 set_loc_shader(ctx, AC_UD_VIEW_INDEX, &user_sgpr_idx, 1);
1109 if (ctx->options->key.vs.as_ls) {
1110 set_loc_shader(ctx, AC_UD_VS_LS_TCS_IN_LAYOUT,
1111 &user_sgpr_idx, 1);
1112 }
1113 if (ctx->options->key.vs.as_ls)
1114 ac_declare_lds_as_pointer(&ctx->ac);
1115 break;
1116 case MESA_SHADER_TESS_CTRL:
1117 set_vs_specific_input_locs(ctx, stage, has_previous_stage,
1118 previous_stage, &user_sgpr_idx);
1119 if (has_previous_stage)
1120 set_loc_shader(ctx, AC_UD_VS_LS_TCS_IN_LAYOUT,
1121 &user_sgpr_idx, 1);
1122 set_loc_shader(ctx, AC_UD_TCS_OFFCHIP_LAYOUT, &user_sgpr_idx, 4);
1123 if (ctx->view_index)
1124 set_loc_shader(ctx, AC_UD_VIEW_INDEX, &user_sgpr_idx, 1);
1125 ac_declare_lds_as_pointer(&ctx->ac);
1126 break;
1127 case MESA_SHADER_TESS_EVAL:
1128 set_loc_shader(ctx, AC_UD_TES_OFFCHIP_LAYOUT, &user_sgpr_idx, 1);
1129 if (ctx->view_index)
1130 set_loc_shader(ctx, AC_UD_VIEW_INDEX, &user_sgpr_idx, 1);
1131 break;
1132 case MESA_SHADER_GEOMETRY:
1133 if (has_previous_stage) {
1134 if (previous_stage == MESA_SHADER_VERTEX)
1135 set_vs_specific_input_locs(ctx, stage,
1136 has_previous_stage,
1137 previous_stage,
1138 &user_sgpr_idx);
1139 else
1140 set_loc_shader(ctx, AC_UD_TES_OFFCHIP_LAYOUT,
1141 &user_sgpr_idx, 1);
1142 }
1143 set_loc_shader(ctx, AC_UD_GS_VS_RING_STRIDE_ENTRIES,
1144 &user_sgpr_idx, 2);
1145 if (ctx->view_index)
1146 set_loc_shader(ctx, AC_UD_VIEW_INDEX, &user_sgpr_idx, 1);
1147 if (has_previous_stage)
1148 ac_declare_lds_as_pointer(&ctx->ac);
1149 break;
1150 case MESA_SHADER_FRAGMENT:
1151 if (ctx->shader_info->info.ps.needs_sample_positions) {
1152 set_loc_shader(ctx, AC_UD_PS_SAMPLE_POS_OFFSET,
1153 &user_sgpr_idx, 1);
1154 }
1155 break;
1156 default:
1157 unreachable("Shader stage not implemented");
1158 }
1159
1160 ctx->shader_info->num_user_sgprs = user_sgpr_idx;
1161 }
1162
1163 static LLVMValueRef trim_vector(struct ac_llvm_context *ctx,
1164 LLVMValueRef value, unsigned count)
1165 {
1166 unsigned num_components = ac_get_llvm_num_components(value);
1167 if (count == num_components)
1168 return value;
1169
1170 LLVMValueRef masks[] = {
1171 LLVMConstInt(ctx->i32, 0, false), LLVMConstInt(ctx->i32, 1, false),
1172 LLVMConstInt(ctx->i32, 2, false), LLVMConstInt(ctx->i32, 3, false)};
1173
1174 if (count == 1)
1175 return LLVMBuildExtractElement(ctx->builder, value, masks[0],
1176 "");
1177
1178 LLVMValueRef swizzle = LLVMConstVector(masks, count);
1179 return LLVMBuildShuffleVector(ctx->builder, value, value, swizzle, "");
1180 }
1181
1182 static void
1183 build_store_values_extended(struct ac_llvm_context *ac,
1184 LLVMValueRef *values,
1185 unsigned value_count,
1186 unsigned value_stride,
1187 LLVMValueRef vec)
1188 {
1189 LLVMBuilderRef builder = ac->builder;
1190 unsigned i;
1191
1192 for (i = 0; i < value_count; i++) {
1193 LLVMValueRef ptr = values[i * value_stride];
1194 LLVMValueRef index = LLVMConstInt(ac->i32, i, false);
1195 LLVMValueRef value = LLVMBuildExtractElement(builder, vec, index, "");
1196 LLVMBuildStore(builder, value, ptr);
1197 }
1198 }
1199
1200 static LLVMTypeRef get_def_type(struct ac_nir_context *ctx,
1201 const nir_ssa_def *def)
1202 {
1203 LLVMTypeRef type = LLVMIntTypeInContext(ctx->ac.context, def->bit_size);
1204 if (def->num_components > 1) {
1205 type = LLVMVectorType(type, def->num_components);
1206 }
1207 return type;
1208 }
1209
1210 static LLVMValueRef get_src(struct ac_nir_context *nir, nir_src src)
1211 {
1212 assert(src.is_ssa);
1213 struct hash_entry *entry = _mesa_hash_table_search(nir->defs, src.ssa);
1214 return (LLVMValueRef)entry->data;
1215 }
1216
1217
1218 static LLVMBasicBlockRef get_block(struct ac_nir_context *nir,
1219 const struct nir_block *b)
1220 {
1221 struct hash_entry *entry = _mesa_hash_table_search(nir->defs, b);
1222 return (LLVMBasicBlockRef)entry->data;
1223 }
1224
1225 static LLVMValueRef get_alu_src(struct ac_nir_context *ctx,
1226 nir_alu_src src,
1227 unsigned num_components)
1228 {
1229 LLVMValueRef value = get_src(ctx, src.src);
1230 bool need_swizzle = false;
1231
1232 assert(value);
1233 LLVMTypeRef type = LLVMTypeOf(value);
1234 unsigned src_components = LLVMGetTypeKind(type) == LLVMVectorTypeKind
1235 ? LLVMGetVectorSize(type)
1236 : 1;
1237
1238 for (unsigned i = 0; i < num_components; ++i) {
1239 assert(src.swizzle[i] < src_components);
1240 if (src.swizzle[i] != i)
1241 need_swizzle = true;
1242 }
1243
1244 if (need_swizzle || num_components != src_components) {
1245 LLVMValueRef masks[] = {
1246 LLVMConstInt(ctx->ac.i32, src.swizzle[0], false),
1247 LLVMConstInt(ctx->ac.i32, src.swizzle[1], false),
1248 LLVMConstInt(ctx->ac.i32, src.swizzle[2], false),
1249 LLVMConstInt(ctx->ac.i32, src.swizzle[3], false)};
1250
1251 if (src_components > 1 && num_components == 1) {
1252 value = LLVMBuildExtractElement(ctx->ac.builder, value,
1253 masks[0], "");
1254 } else if (src_components == 1 && num_components > 1) {
1255 LLVMValueRef values[] = {value, value, value, value};
1256 value = ac_build_gather_values(&ctx->ac, values, num_components);
1257 } else {
1258 LLVMValueRef swizzle = LLVMConstVector(masks, num_components);
1259 value = LLVMBuildShuffleVector(ctx->ac.builder, value, value,
1260 swizzle, "");
1261 }
1262 }
1263 assert(!src.negate);
1264 assert(!src.abs);
1265 return value;
1266 }
1267
1268 static LLVMValueRef emit_int_cmp(struct ac_llvm_context *ctx,
1269 LLVMIntPredicate pred, LLVMValueRef src0,
1270 LLVMValueRef src1)
1271 {
1272 LLVMValueRef result = LLVMBuildICmp(ctx->builder, pred, src0, src1, "");
1273 return LLVMBuildSelect(ctx->builder, result,
1274 LLVMConstInt(ctx->i32, 0xFFFFFFFF, false),
1275 ctx->i32_0, "");
1276 }
1277
1278 static LLVMValueRef emit_float_cmp(struct ac_llvm_context *ctx,
1279 LLVMRealPredicate pred, LLVMValueRef src0,
1280 LLVMValueRef src1)
1281 {
1282 LLVMValueRef result;
1283 src0 = ac_to_float(ctx, src0);
1284 src1 = ac_to_float(ctx, src1);
1285 result = LLVMBuildFCmp(ctx->builder, pred, src0, src1, "");
1286 return LLVMBuildSelect(ctx->builder, result,
1287 LLVMConstInt(ctx->i32, 0xFFFFFFFF, false),
1288 ctx->i32_0, "");
1289 }
1290
1291 static LLVMValueRef emit_intrin_1f_param(struct ac_llvm_context *ctx,
1292 const char *intrin,
1293 LLVMTypeRef result_type,
1294 LLVMValueRef src0)
1295 {
1296 char name[64];
1297 LLVMValueRef params[] = {
1298 ac_to_float(ctx, src0),
1299 };
1300
1301 MAYBE_UNUSED const int length = snprintf(name, sizeof(name), "%s.f%d", intrin,
1302 get_elem_bits(ctx, result_type));
1303 assert(length < sizeof(name));
1304 return ac_build_intrinsic(ctx, name, result_type, params, 1, AC_FUNC_ATTR_READNONE);
1305 }
1306
1307 static LLVMValueRef emit_intrin_2f_param(struct ac_llvm_context *ctx,
1308 const char *intrin,
1309 LLVMTypeRef result_type,
1310 LLVMValueRef src0, LLVMValueRef src1)
1311 {
1312 char name[64];
1313 LLVMValueRef params[] = {
1314 ac_to_float(ctx, src0),
1315 ac_to_float(ctx, src1),
1316 };
1317
1318 MAYBE_UNUSED const int length = snprintf(name, sizeof(name), "%s.f%d", intrin,
1319 get_elem_bits(ctx, result_type));
1320 assert(length < sizeof(name));
1321 return ac_build_intrinsic(ctx, name, result_type, params, 2, AC_FUNC_ATTR_READNONE);
1322 }
1323
1324 static LLVMValueRef emit_intrin_3f_param(struct ac_llvm_context *ctx,
1325 const char *intrin,
1326 LLVMTypeRef result_type,
1327 LLVMValueRef src0, LLVMValueRef src1, LLVMValueRef src2)
1328 {
1329 char name[64];
1330 LLVMValueRef params[] = {
1331 ac_to_float(ctx, src0),
1332 ac_to_float(ctx, src1),
1333 ac_to_float(ctx, src2),
1334 };
1335
1336 MAYBE_UNUSED const int length = snprintf(name, sizeof(name), "%s.f%d", intrin,
1337 get_elem_bits(ctx, result_type));
1338 assert(length < sizeof(name));
1339 return ac_build_intrinsic(ctx, name, result_type, params, 3, AC_FUNC_ATTR_READNONE);
1340 }
1341
1342 static LLVMValueRef emit_bcsel(struct ac_llvm_context *ctx,
1343 LLVMValueRef src0, LLVMValueRef src1, LLVMValueRef src2)
1344 {
1345 LLVMValueRef v = LLVMBuildICmp(ctx->builder, LLVMIntNE, src0,
1346 ctx->i32_0, "");
1347 return LLVMBuildSelect(ctx->builder, v, src1, src2, "");
1348 }
1349
1350 static LLVMValueRef emit_minmax_int(struct ac_llvm_context *ctx,
1351 LLVMIntPredicate pred,
1352 LLVMValueRef src0, LLVMValueRef src1)
1353 {
1354 return LLVMBuildSelect(ctx->builder,
1355 LLVMBuildICmp(ctx->builder, pred, src0, src1, ""),
1356 src0,
1357 src1, "");
1358
1359 }
1360 static LLVMValueRef emit_iabs(struct ac_llvm_context *ctx,
1361 LLVMValueRef src0)
1362 {
1363 return emit_minmax_int(ctx, LLVMIntSGT, src0,
1364 LLVMBuildNeg(ctx->builder, src0, ""));
1365 }
1366
1367 static LLVMValueRef emit_fsign(struct ac_llvm_context *ctx,
1368 LLVMValueRef src0,
1369 unsigned bitsize)
1370 {
1371 LLVMValueRef cmp, val, zero, one;
1372 LLVMTypeRef type;
1373
1374 if (bitsize == 32) {
1375 type = ctx->f32;
1376 zero = ctx->f32_0;
1377 one = ctx->f32_1;
1378 } else {
1379 type = ctx->f64;
1380 zero = ctx->f64_0;
1381 one = ctx->f64_1;
1382 }
1383
1384 cmp = LLVMBuildFCmp(ctx->builder, LLVMRealOGT, src0, zero, "");
1385 val = LLVMBuildSelect(ctx->builder, cmp, one, src0, "");
1386 cmp = LLVMBuildFCmp(ctx->builder, LLVMRealOGE, val, zero, "");
1387 val = LLVMBuildSelect(ctx->builder, cmp, val, LLVMConstReal(type, -1.0), "");
1388 return val;
1389 }
1390
1391 static LLVMValueRef emit_isign(struct ac_llvm_context *ctx,
1392 LLVMValueRef src0, unsigned bitsize)
1393 {
1394 LLVMValueRef cmp, val, zero, one;
1395 LLVMTypeRef type;
1396
1397 if (bitsize == 32) {
1398 type = ctx->i32;
1399 zero = ctx->i32_0;
1400 one = ctx->i32_1;
1401 } else {
1402 type = ctx->i64;
1403 zero = ctx->i64_0;
1404 one = ctx->i64_1;
1405 }
1406
1407 cmp = LLVMBuildICmp(ctx->builder, LLVMIntSGT, src0, zero, "");
1408 val = LLVMBuildSelect(ctx->builder, cmp, one, src0, "");
1409 cmp = LLVMBuildICmp(ctx->builder, LLVMIntSGE, val, zero, "");
1410 val = LLVMBuildSelect(ctx->builder, cmp, val, LLVMConstInt(type, -1, true), "");
1411 return val;
1412 }
1413
1414 static LLVMValueRef emit_ffract(struct ac_llvm_context *ctx,
1415 LLVMValueRef src0)
1416 {
1417 const char *intr = "llvm.floor.f32";
1418 LLVMValueRef fsrc0 = ac_to_float(ctx, src0);
1419 LLVMValueRef params[] = {
1420 fsrc0,
1421 };
1422 LLVMValueRef floor = ac_build_intrinsic(ctx, intr,
1423 ctx->f32, params, 1,
1424 AC_FUNC_ATTR_READNONE);
1425 return LLVMBuildFSub(ctx->builder, fsrc0, floor, "");
1426 }
1427
1428 static LLVMValueRef emit_uint_carry(struct ac_llvm_context *ctx,
1429 const char *intrin,
1430 LLVMValueRef src0, LLVMValueRef src1)
1431 {
1432 LLVMTypeRef ret_type;
1433 LLVMTypeRef types[] = { ctx->i32, ctx->i1 };
1434 LLVMValueRef res;
1435 LLVMValueRef params[] = { src0, src1 };
1436 ret_type = LLVMStructTypeInContext(ctx->context, types,
1437 2, true);
1438
1439 res = ac_build_intrinsic(ctx, intrin, ret_type,
1440 params, 2, AC_FUNC_ATTR_READNONE);
1441
1442 res = LLVMBuildExtractValue(ctx->builder, res, 1, "");
1443 res = LLVMBuildZExt(ctx->builder, res, ctx->i32, "");
1444 return res;
1445 }
1446
1447 static LLVMValueRef emit_b2f(struct ac_llvm_context *ctx,
1448 LLVMValueRef src0)
1449 {
1450 return LLVMBuildAnd(ctx->builder, src0, LLVMBuildBitCast(ctx->builder, LLVMConstReal(ctx->f32, 1.0), ctx->i32, ""), "");
1451 }
1452
1453 static LLVMValueRef emit_f2b(struct ac_llvm_context *ctx,
1454 LLVMValueRef src0)
1455 {
1456 src0 = ac_to_float(ctx, src0);
1457 return LLVMBuildSExt(ctx->builder,
1458 LLVMBuildFCmp(ctx->builder, LLVMRealUNE, src0, ctx->f32_0, ""),
1459 ctx->i32, "");
1460 }
1461
1462 static LLVMValueRef emit_b2i(struct ac_llvm_context *ctx,
1463 LLVMValueRef src0,
1464 unsigned bitsize)
1465 {
1466 LLVMValueRef result = LLVMBuildAnd(ctx->builder, src0, ctx->i32_1, "");
1467
1468 if (bitsize == 32)
1469 return result;
1470
1471 return LLVMBuildZExt(ctx->builder, result, ctx->i64, "");
1472 }
1473
1474 static LLVMValueRef emit_i2b(struct ac_llvm_context *ctx,
1475 LLVMValueRef src0)
1476 {
1477 return LLVMBuildSExt(ctx->builder,
1478 LLVMBuildICmp(ctx->builder, LLVMIntNE, src0, ctx->i32_0, ""),
1479 ctx->i32, "");
1480 }
1481
1482 static LLVMValueRef emit_f2f16(struct nir_to_llvm_context *ctx,
1483 LLVMValueRef src0)
1484 {
1485 LLVMValueRef result;
1486 LLVMValueRef cond = NULL;
1487
1488 src0 = ac_to_float(&ctx->ac, src0);
1489 result = LLVMBuildFPTrunc(ctx->builder, src0, ctx->ac.f16, "");
1490
1491 if (ctx->options->chip_class >= VI) {
1492 LLVMValueRef args[2];
1493 /* Check if the result is a denormal - and flush to 0 if so. */
1494 args[0] = result;
1495 args[1] = LLVMConstInt(ctx->ac.i32, N_SUBNORMAL | P_SUBNORMAL, false);
1496 cond = ac_build_intrinsic(&ctx->ac, "llvm.amdgcn.class.f16", ctx->ac.i1, args, 2, AC_FUNC_ATTR_READNONE);
1497 }
1498
1499 /* need to convert back up to f32 */
1500 result = LLVMBuildFPExt(ctx->builder, result, ctx->ac.f32, "");
1501
1502 if (ctx->options->chip_class >= VI)
1503 result = LLVMBuildSelect(ctx->builder, cond, ctx->ac.f32_0, result, "");
1504 else {
1505 /* for SI/CIK */
1506 /* 0x38800000 is smallest half float value (2^-14) in 32-bit float,
1507 * so compare the result and flush to 0 if it's smaller.
1508 */
1509 LLVMValueRef temp, cond2;
1510 temp = emit_intrin_1f_param(&ctx->ac, "llvm.fabs",
1511 ctx->ac.f32, result);
1512 cond = LLVMBuildFCmp(ctx->builder, LLVMRealUGT,
1513 LLVMBuildBitCast(ctx->builder, LLVMConstInt(ctx->ac.i32, 0x38800000, false), ctx->ac.f32, ""),
1514 temp, "");
1515 cond2 = LLVMBuildFCmp(ctx->builder, LLVMRealUNE,
1516 temp, ctx->ac.f32_0, "");
1517 cond = LLVMBuildAnd(ctx->builder, cond, cond2, "");
1518 result = LLVMBuildSelect(ctx->builder, cond, ctx->ac.f32_0, result, "");
1519 }
1520 return result;
1521 }
1522
1523 static LLVMValueRef emit_umul_high(struct ac_llvm_context *ctx,
1524 LLVMValueRef src0, LLVMValueRef src1)
1525 {
1526 LLVMValueRef dst64, result;
1527 src0 = LLVMBuildZExt(ctx->builder, src0, ctx->i64, "");
1528 src1 = LLVMBuildZExt(ctx->builder, src1, ctx->i64, "");
1529
1530 dst64 = LLVMBuildMul(ctx->builder, src0, src1, "");
1531 dst64 = LLVMBuildLShr(ctx->builder, dst64, LLVMConstInt(ctx->i64, 32, false), "");
1532 result = LLVMBuildTrunc(ctx->builder, dst64, ctx->i32, "");
1533 return result;
1534 }
1535
1536 static LLVMValueRef emit_imul_high(struct ac_llvm_context *ctx,
1537 LLVMValueRef src0, LLVMValueRef src1)
1538 {
1539 LLVMValueRef dst64, result;
1540 src0 = LLVMBuildSExt(ctx->builder, src0, ctx->i64, "");
1541 src1 = LLVMBuildSExt(ctx->builder, src1, ctx->i64, "");
1542
1543 dst64 = LLVMBuildMul(ctx->builder, src0, src1, "");
1544 dst64 = LLVMBuildAShr(ctx->builder, dst64, LLVMConstInt(ctx->i64, 32, false), "");
1545 result = LLVMBuildTrunc(ctx->builder, dst64, ctx->i32, "");
1546 return result;
1547 }
1548
1549 static LLVMValueRef emit_bitfield_extract(struct ac_llvm_context *ctx,
1550 bool is_signed,
1551 const LLVMValueRef srcs[3])
1552 {
1553 LLVMValueRef result;
1554 LLVMValueRef icond = LLVMBuildICmp(ctx->builder, LLVMIntEQ, srcs[2], LLVMConstInt(ctx->i32, 32, false), "");
1555
1556 result = ac_build_bfe(ctx, srcs[0], srcs[1], srcs[2], is_signed);
1557 result = LLVMBuildSelect(ctx->builder, icond, srcs[0], result, "");
1558 return result;
1559 }
1560
1561 static LLVMValueRef emit_bitfield_insert(struct ac_llvm_context *ctx,
1562 LLVMValueRef src0, LLVMValueRef src1,
1563 LLVMValueRef src2, LLVMValueRef src3)
1564 {
1565 LLVMValueRef bfi_args[3], result;
1566
1567 bfi_args[0] = LLVMBuildShl(ctx->builder,
1568 LLVMBuildSub(ctx->builder,
1569 LLVMBuildShl(ctx->builder,
1570 ctx->i32_1,
1571 src3, ""),
1572 ctx->i32_1, ""),
1573 src2, "");
1574 bfi_args[1] = LLVMBuildShl(ctx->builder, src1, src2, "");
1575 bfi_args[2] = src0;
1576
1577 LLVMValueRef icond = LLVMBuildICmp(ctx->builder, LLVMIntEQ, src3, LLVMConstInt(ctx->i32, 32, false), "");
1578
1579 /* Calculate:
1580 * (arg0 & arg1) | (~arg0 & arg2) = arg2 ^ (arg0 & (arg1 ^ arg2)
1581 * Use the right-hand side, which the LLVM backend can convert to V_BFI.
1582 */
1583 result = LLVMBuildXor(ctx->builder, bfi_args[2],
1584 LLVMBuildAnd(ctx->builder, bfi_args[0],
1585 LLVMBuildXor(ctx->builder, bfi_args[1], bfi_args[2], ""), ""), "");
1586
1587 result = LLVMBuildSelect(ctx->builder, icond, src1, result, "");
1588 return result;
1589 }
1590
1591 static LLVMValueRef emit_pack_half_2x16(struct ac_llvm_context *ctx,
1592 LLVMValueRef src0)
1593 {
1594 LLVMValueRef comp[2];
1595
1596 src0 = ac_to_float(ctx, src0);
1597 comp[0] = LLVMBuildExtractElement(ctx->builder, src0, ctx->i32_0, "");
1598 comp[1] = LLVMBuildExtractElement(ctx->builder, src0, ctx->i32_1, "");
1599
1600 return ac_build_cvt_pkrtz_f16(ctx, comp);
1601 }
1602
1603 static LLVMValueRef emit_unpack_half_2x16(struct ac_llvm_context *ctx,
1604 LLVMValueRef src0)
1605 {
1606 LLVMValueRef const16 = LLVMConstInt(ctx->i32, 16, false);
1607 LLVMValueRef temps[2], result, val;
1608 int i;
1609
1610 for (i = 0; i < 2; i++) {
1611 val = i == 1 ? LLVMBuildLShr(ctx->builder, src0, const16, "") : src0;
1612 val = LLVMBuildTrunc(ctx->builder, val, ctx->i16, "");
1613 val = LLVMBuildBitCast(ctx->builder, val, ctx->f16, "");
1614 temps[i] = LLVMBuildFPExt(ctx->builder, val, ctx->f32, "");
1615 }
1616
1617 result = LLVMBuildInsertElement(ctx->builder, LLVMGetUndef(ctx->v2f32), temps[0],
1618 ctx->i32_0, "");
1619 result = LLVMBuildInsertElement(ctx->builder, result, temps[1],
1620 ctx->i32_1, "");
1621 return result;
1622 }
1623
1624 static LLVMValueRef emit_ddxy(struct ac_nir_context *ctx,
1625 nir_op op,
1626 LLVMValueRef src0)
1627 {
1628 unsigned mask;
1629 int idx;
1630 LLVMValueRef result;
1631
1632 if (op == nir_op_fddx_fine || op == nir_op_fddx)
1633 mask = AC_TID_MASK_LEFT;
1634 else if (op == nir_op_fddy_fine || op == nir_op_fddy)
1635 mask = AC_TID_MASK_TOP;
1636 else
1637 mask = AC_TID_MASK_TOP_LEFT;
1638
1639 /* for DDX we want to next X pixel, DDY next Y pixel. */
1640 if (op == nir_op_fddx_fine ||
1641 op == nir_op_fddx_coarse ||
1642 op == nir_op_fddx)
1643 idx = 1;
1644 else
1645 idx = 2;
1646
1647 result = ac_build_ddxy(&ctx->ac, mask, idx, src0);
1648 return result;
1649 }
1650
1651 /*
1652 * this takes an I,J coordinate pair,
1653 * and works out the X and Y derivatives.
1654 * it returns DDX(I), DDX(J), DDY(I), DDY(J).
1655 */
1656 static LLVMValueRef emit_ddxy_interp(
1657 struct ac_nir_context *ctx,
1658 LLVMValueRef interp_ij)
1659 {
1660 LLVMValueRef result[4], a;
1661 unsigned i;
1662
1663 for (i = 0; i < 2; i++) {
1664 a = LLVMBuildExtractElement(ctx->ac.builder, interp_ij,
1665 LLVMConstInt(ctx->ac.i32, i, false), "");
1666 result[i] = emit_ddxy(ctx, nir_op_fddx, a);
1667 result[2+i] = emit_ddxy(ctx, nir_op_fddy, a);
1668 }
1669 return ac_build_gather_values(&ctx->ac, result, 4);
1670 }
1671
1672 static void visit_alu(struct ac_nir_context *ctx, const nir_alu_instr *instr)
1673 {
1674 LLVMValueRef src[4], result = NULL;
1675 unsigned num_components = instr->dest.dest.ssa.num_components;
1676 unsigned src_components;
1677 LLVMTypeRef def_type = get_def_type(ctx, &instr->dest.dest.ssa);
1678
1679 assert(nir_op_infos[instr->op].num_inputs <= ARRAY_SIZE(src));
1680 switch (instr->op) {
1681 case nir_op_vec2:
1682 case nir_op_vec3:
1683 case nir_op_vec4:
1684 src_components = 1;
1685 break;
1686 case nir_op_pack_half_2x16:
1687 src_components = 2;
1688 break;
1689 case nir_op_unpack_half_2x16:
1690 src_components = 1;
1691 break;
1692 default:
1693 src_components = num_components;
1694 break;
1695 }
1696 for (unsigned i = 0; i < nir_op_infos[instr->op].num_inputs; i++)
1697 src[i] = get_alu_src(ctx, instr->src[i], src_components);
1698
1699 switch (instr->op) {
1700 case nir_op_fmov:
1701 case nir_op_imov:
1702 result = src[0];
1703 break;
1704 case nir_op_fneg:
1705 src[0] = ac_to_float(&ctx->ac, src[0]);
1706 result = LLVMBuildFNeg(ctx->ac.builder, src[0], "");
1707 break;
1708 case nir_op_ineg:
1709 result = LLVMBuildNeg(ctx->ac.builder, src[0], "");
1710 break;
1711 case nir_op_inot:
1712 result = LLVMBuildNot(ctx->ac.builder, src[0], "");
1713 break;
1714 case nir_op_iadd:
1715 result = LLVMBuildAdd(ctx->ac.builder, src[0], src[1], "");
1716 break;
1717 case nir_op_fadd:
1718 src[0] = ac_to_float(&ctx->ac, src[0]);
1719 src[1] = ac_to_float(&ctx->ac, src[1]);
1720 result = LLVMBuildFAdd(ctx->ac.builder, src[0], src[1], "");
1721 break;
1722 case nir_op_fsub:
1723 src[0] = ac_to_float(&ctx->ac, src[0]);
1724 src[1] = ac_to_float(&ctx->ac, src[1]);
1725 result = LLVMBuildFSub(ctx->ac.builder, src[0], src[1], "");
1726 break;
1727 case nir_op_isub:
1728 result = LLVMBuildSub(ctx->ac.builder, src[0], src[1], "");
1729 break;
1730 case nir_op_imul:
1731 result = LLVMBuildMul(ctx->ac.builder, src[0], src[1], "");
1732 break;
1733 case nir_op_imod:
1734 result = LLVMBuildSRem(ctx->ac.builder, src[0], src[1], "");
1735 break;
1736 case nir_op_umod:
1737 result = LLVMBuildURem(ctx->ac.builder, src[0], src[1], "");
1738 break;
1739 case nir_op_fmod:
1740 src[0] = ac_to_float(&ctx->ac, src[0]);
1741 src[1] = ac_to_float(&ctx->ac, src[1]);
1742 result = ac_build_fdiv(&ctx->ac, src[0], src[1]);
1743 result = emit_intrin_1f_param(&ctx->ac, "llvm.floor",
1744 ac_to_float_type(&ctx->ac, def_type), result);
1745 result = LLVMBuildFMul(ctx->ac.builder, src[1] , result, "");
1746 result = LLVMBuildFSub(ctx->ac.builder, src[0], result, "");
1747 break;
1748 case nir_op_frem:
1749 src[0] = ac_to_float(&ctx->ac, src[0]);
1750 src[1] = ac_to_float(&ctx->ac, src[1]);
1751 result = LLVMBuildFRem(ctx->ac.builder, src[0], src[1], "");
1752 break;
1753 case nir_op_irem:
1754 result = LLVMBuildSRem(ctx->ac.builder, src[0], src[1], "");
1755 break;
1756 case nir_op_idiv:
1757 result = LLVMBuildSDiv(ctx->ac.builder, src[0], src[1], "");
1758 break;
1759 case nir_op_udiv:
1760 result = LLVMBuildUDiv(ctx->ac.builder, src[0], src[1], "");
1761 break;
1762 case nir_op_fmul:
1763 src[0] = ac_to_float(&ctx->ac, src[0]);
1764 src[1] = ac_to_float(&ctx->ac, src[1]);
1765 result = LLVMBuildFMul(ctx->ac.builder, src[0], src[1], "");
1766 break;
1767 case nir_op_fdiv:
1768 src[0] = ac_to_float(&ctx->ac, src[0]);
1769 src[1] = ac_to_float(&ctx->ac, src[1]);
1770 result = ac_build_fdiv(&ctx->ac, src[0], src[1]);
1771 break;
1772 case nir_op_frcp:
1773 src[0] = ac_to_float(&ctx->ac, src[0]);
1774 result = ac_build_fdiv(&ctx->ac, instr->dest.dest.ssa.bit_size == 32 ? ctx->ac.f32_1 : ctx->ac.f64_1,
1775 src[0]);
1776 break;
1777 case nir_op_iand:
1778 result = LLVMBuildAnd(ctx->ac.builder, src[0], src[1], "");
1779 break;
1780 case nir_op_ior:
1781 result = LLVMBuildOr(ctx->ac.builder, src[0], src[1], "");
1782 break;
1783 case nir_op_ixor:
1784 result = LLVMBuildXor(ctx->ac.builder, src[0], src[1], "");
1785 break;
1786 case nir_op_ishl:
1787 result = LLVMBuildShl(ctx->ac.builder, src[0],
1788 LLVMBuildZExt(ctx->ac.builder, src[1],
1789 LLVMTypeOf(src[0]), ""),
1790 "");
1791 break;
1792 case nir_op_ishr:
1793 result = LLVMBuildAShr(ctx->ac.builder, src[0],
1794 LLVMBuildZExt(ctx->ac.builder, src[1],
1795 LLVMTypeOf(src[0]), ""),
1796 "");
1797 break;
1798 case nir_op_ushr:
1799 result = LLVMBuildLShr(ctx->ac.builder, src[0],
1800 LLVMBuildZExt(ctx->ac.builder, src[1],
1801 LLVMTypeOf(src[0]), ""),
1802 "");
1803 break;
1804 case nir_op_ilt:
1805 result = emit_int_cmp(&ctx->ac, LLVMIntSLT, src[0], src[1]);
1806 break;
1807 case nir_op_ine:
1808 result = emit_int_cmp(&ctx->ac, LLVMIntNE, src[0], src[1]);
1809 break;
1810 case nir_op_ieq:
1811 result = emit_int_cmp(&ctx->ac, LLVMIntEQ, src[0], src[1]);
1812 break;
1813 case nir_op_ige:
1814 result = emit_int_cmp(&ctx->ac, LLVMIntSGE, src[0], src[1]);
1815 break;
1816 case nir_op_ult:
1817 result = emit_int_cmp(&ctx->ac, LLVMIntULT, src[0], src[1]);
1818 break;
1819 case nir_op_uge:
1820 result = emit_int_cmp(&ctx->ac, LLVMIntUGE, src[0], src[1]);
1821 break;
1822 case nir_op_feq:
1823 result = emit_float_cmp(&ctx->ac, LLVMRealUEQ, src[0], src[1]);
1824 break;
1825 case nir_op_fne:
1826 result = emit_float_cmp(&ctx->ac, LLVMRealUNE, src[0], src[1]);
1827 break;
1828 case nir_op_flt:
1829 result = emit_float_cmp(&ctx->ac, LLVMRealULT, src[0], src[1]);
1830 break;
1831 case nir_op_fge:
1832 result = emit_float_cmp(&ctx->ac, LLVMRealUGE, src[0], src[1]);
1833 break;
1834 case nir_op_fabs:
1835 result = emit_intrin_1f_param(&ctx->ac, "llvm.fabs",
1836 ac_to_float_type(&ctx->ac, def_type), src[0]);
1837 break;
1838 case nir_op_iabs:
1839 result = emit_iabs(&ctx->ac, src[0]);
1840 break;
1841 case nir_op_imax:
1842 result = emit_minmax_int(&ctx->ac, LLVMIntSGT, src[0], src[1]);
1843 break;
1844 case nir_op_imin:
1845 result = emit_minmax_int(&ctx->ac, LLVMIntSLT, src[0], src[1]);
1846 break;
1847 case nir_op_umax:
1848 result = emit_minmax_int(&ctx->ac, LLVMIntUGT, src[0], src[1]);
1849 break;
1850 case nir_op_umin:
1851 result = emit_minmax_int(&ctx->ac, LLVMIntULT, src[0], src[1]);
1852 break;
1853 case nir_op_isign:
1854 result = emit_isign(&ctx->ac, src[0], instr->dest.dest.ssa.bit_size);
1855 break;
1856 case nir_op_fsign:
1857 src[0] = ac_to_float(&ctx->ac, src[0]);
1858 result = emit_fsign(&ctx->ac, src[0], instr->dest.dest.ssa.bit_size);
1859 break;
1860 case nir_op_ffloor:
1861 result = emit_intrin_1f_param(&ctx->ac, "llvm.floor",
1862 ac_to_float_type(&ctx->ac, def_type), src[0]);
1863 break;
1864 case nir_op_ftrunc:
1865 result = emit_intrin_1f_param(&ctx->ac, "llvm.trunc",
1866 ac_to_float_type(&ctx->ac, def_type), src[0]);
1867 break;
1868 case nir_op_fceil:
1869 result = emit_intrin_1f_param(&ctx->ac, "llvm.ceil",
1870 ac_to_float_type(&ctx->ac, def_type), src[0]);
1871 break;
1872 case nir_op_fround_even:
1873 result = emit_intrin_1f_param(&ctx->ac, "llvm.rint",
1874 ac_to_float_type(&ctx->ac, def_type),src[0]);
1875 break;
1876 case nir_op_ffract:
1877 result = emit_ffract(&ctx->ac, src[0]);
1878 break;
1879 case nir_op_fsin:
1880 result = emit_intrin_1f_param(&ctx->ac, "llvm.sin",
1881 ac_to_float_type(&ctx->ac, def_type), src[0]);
1882 break;
1883 case nir_op_fcos:
1884 result = emit_intrin_1f_param(&ctx->ac, "llvm.cos",
1885 ac_to_float_type(&ctx->ac, def_type), src[0]);
1886 break;
1887 case nir_op_fsqrt:
1888 result = emit_intrin_1f_param(&ctx->ac, "llvm.sqrt",
1889 ac_to_float_type(&ctx->ac, def_type), src[0]);
1890 break;
1891 case nir_op_fexp2:
1892 result = emit_intrin_1f_param(&ctx->ac, "llvm.exp2",
1893 ac_to_float_type(&ctx->ac, def_type), src[0]);
1894 break;
1895 case nir_op_flog2:
1896 result = emit_intrin_1f_param(&ctx->ac, "llvm.log2",
1897 ac_to_float_type(&ctx->ac, def_type), src[0]);
1898 break;
1899 case nir_op_frsq:
1900 result = emit_intrin_1f_param(&ctx->ac, "llvm.sqrt",
1901 ac_to_float_type(&ctx->ac, def_type), src[0]);
1902 result = ac_build_fdiv(&ctx->ac, instr->dest.dest.ssa.bit_size == 32 ? ctx->ac.f32_1 : ctx->ac.f64_1,
1903 result);
1904 break;
1905 case nir_op_fpow:
1906 result = emit_intrin_2f_param(&ctx->ac, "llvm.pow",
1907 ac_to_float_type(&ctx->ac, def_type), src[0], src[1]);
1908 break;
1909 case nir_op_fmax:
1910 result = emit_intrin_2f_param(&ctx->ac, "llvm.maxnum",
1911 ac_to_float_type(&ctx->ac, def_type), src[0], src[1]);
1912 if (ctx->ac.chip_class < GFX9 &&
1913 instr->dest.dest.ssa.bit_size == 32) {
1914 /* Only pre-GFX9 chips do not flush denorms. */
1915 result = emit_intrin_1f_param(&ctx->ac, "llvm.canonicalize",
1916 ac_to_float_type(&ctx->ac, def_type),
1917 result);
1918 }
1919 break;
1920 case nir_op_fmin:
1921 result = emit_intrin_2f_param(&ctx->ac, "llvm.minnum",
1922 ac_to_float_type(&ctx->ac, def_type), src[0], src[1]);
1923 if (ctx->ac.chip_class < GFX9 &&
1924 instr->dest.dest.ssa.bit_size == 32) {
1925 /* Only pre-GFX9 chips do not flush denorms. */
1926 result = emit_intrin_1f_param(&ctx->ac, "llvm.canonicalize",
1927 ac_to_float_type(&ctx->ac, def_type),
1928 result);
1929 }
1930 break;
1931 case nir_op_ffma:
1932 result = emit_intrin_3f_param(&ctx->ac, "llvm.fmuladd",
1933 ac_to_float_type(&ctx->ac, def_type), src[0], src[1], src[2]);
1934 break;
1935 case nir_op_ibitfield_extract:
1936 result = emit_bitfield_extract(&ctx->ac, true, src);
1937 break;
1938 case nir_op_ubitfield_extract:
1939 result = emit_bitfield_extract(&ctx->ac, false, src);
1940 break;
1941 case nir_op_bitfield_insert:
1942 result = emit_bitfield_insert(&ctx->ac, src[0], src[1], src[2], src[3]);
1943 break;
1944 case nir_op_bitfield_reverse:
1945 result = ac_build_intrinsic(&ctx->ac, "llvm.bitreverse.i32", ctx->ac.i32, src, 1, AC_FUNC_ATTR_READNONE);
1946 break;
1947 case nir_op_bit_count:
1948 result = ac_build_intrinsic(&ctx->ac, "llvm.ctpop.i32", ctx->ac.i32, src, 1, AC_FUNC_ATTR_READNONE);
1949 break;
1950 case nir_op_vec2:
1951 case nir_op_vec3:
1952 case nir_op_vec4:
1953 for (unsigned i = 0; i < nir_op_infos[instr->op].num_inputs; i++)
1954 src[i] = ac_to_integer(&ctx->ac, src[i]);
1955 result = ac_build_gather_values(&ctx->ac, src, num_components);
1956 break;
1957 case nir_op_f2i32:
1958 case nir_op_f2i64:
1959 src[0] = ac_to_float(&ctx->ac, src[0]);
1960 result = LLVMBuildFPToSI(ctx->ac.builder, src[0], def_type, "");
1961 break;
1962 case nir_op_f2u32:
1963 case nir_op_f2u64:
1964 src[0] = ac_to_float(&ctx->ac, src[0]);
1965 result = LLVMBuildFPToUI(ctx->ac.builder, src[0], def_type, "");
1966 break;
1967 case nir_op_i2f32:
1968 case nir_op_i2f64:
1969 src[0] = ac_to_integer(&ctx->ac, src[0]);
1970 result = LLVMBuildSIToFP(ctx->ac.builder, src[0], ac_to_float_type(&ctx->ac, def_type), "");
1971 break;
1972 case nir_op_u2f32:
1973 case nir_op_u2f64:
1974 src[0] = ac_to_integer(&ctx->ac, src[0]);
1975 result = LLVMBuildUIToFP(ctx->ac.builder, src[0], ac_to_float_type(&ctx->ac, def_type), "");
1976 break;
1977 case nir_op_f2f64:
1978 src[0] = ac_to_float(&ctx->ac, src[0]);
1979 result = LLVMBuildFPExt(ctx->ac.builder, src[0], ac_to_float_type(&ctx->ac, def_type), "");
1980 break;
1981 case nir_op_f2f32:
1982 result = LLVMBuildFPTrunc(ctx->ac.builder, src[0], ac_to_float_type(&ctx->ac, def_type), "");
1983 break;
1984 case nir_op_u2u32:
1985 case nir_op_u2u64:
1986 src[0] = ac_to_integer(&ctx->ac, src[0]);
1987 if (get_elem_bits(&ctx->ac, LLVMTypeOf(src[0])) < get_elem_bits(&ctx->ac, def_type))
1988 result = LLVMBuildZExt(ctx->ac.builder, src[0], def_type, "");
1989 else
1990 result = LLVMBuildTrunc(ctx->ac.builder, src[0], def_type, "");
1991 break;
1992 case nir_op_i2i32:
1993 case nir_op_i2i64:
1994 src[0] = ac_to_integer(&ctx->ac, src[0]);
1995 if (get_elem_bits(&ctx->ac, LLVMTypeOf(src[0])) < get_elem_bits(&ctx->ac, def_type))
1996 result = LLVMBuildSExt(ctx->ac.builder, src[0], def_type, "");
1997 else
1998 result = LLVMBuildTrunc(ctx->ac.builder, src[0], def_type, "");
1999 break;
2000 case nir_op_bcsel:
2001 result = emit_bcsel(&ctx->ac, src[0], src[1], src[2]);
2002 break;
2003 case nir_op_find_lsb:
2004 src[0] = ac_to_integer(&ctx->ac, src[0]);
2005 result = ac_find_lsb(&ctx->ac, ctx->ac.i32, src[0]);
2006 break;
2007 case nir_op_ufind_msb:
2008 src[0] = ac_to_integer(&ctx->ac, src[0]);
2009 result = ac_build_umsb(&ctx->ac, src[0], ctx->ac.i32);
2010 break;
2011 case nir_op_ifind_msb:
2012 src[0] = ac_to_integer(&ctx->ac, src[0]);
2013 result = ac_build_imsb(&ctx->ac, src[0], ctx->ac.i32);
2014 break;
2015 case nir_op_uadd_carry:
2016 src[0] = ac_to_integer(&ctx->ac, src[0]);
2017 src[1] = ac_to_integer(&ctx->ac, src[1]);
2018 result = emit_uint_carry(&ctx->ac, "llvm.uadd.with.overflow.i32", src[0], src[1]);
2019 break;
2020 case nir_op_usub_borrow:
2021 src[0] = ac_to_integer(&ctx->ac, src[0]);
2022 src[1] = ac_to_integer(&ctx->ac, src[1]);
2023 result = emit_uint_carry(&ctx->ac, "llvm.usub.with.overflow.i32", src[0], src[1]);
2024 break;
2025 case nir_op_b2f:
2026 result = emit_b2f(&ctx->ac, src[0]);
2027 break;
2028 case nir_op_f2b:
2029 result = emit_f2b(&ctx->ac, src[0]);
2030 break;
2031 case nir_op_b2i:
2032 result = emit_b2i(&ctx->ac, src[0], instr->dest.dest.ssa.bit_size);
2033 break;
2034 case nir_op_i2b:
2035 src[0] = ac_to_integer(&ctx->ac, src[0]);
2036 result = emit_i2b(&ctx->ac, src[0]);
2037 break;
2038 case nir_op_fquantize2f16:
2039 result = emit_f2f16(ctx->nctx, src[0]);
2040 break;
2041 case nir_op_umul_high:
2042 src[0] = ac_to_integer(&ctx->ac, src[0]);
2043 src[1] = ac_to_integer(&ctx->ac, src[1]);
2044 result = emit_umul_high(&ctx->ac, src[0], src[1]);
2045 break;
2046 case nir_op_imul_high:
2047 src[0] = ac_to_integer(&ctx->ac, src[0]);
2048 src[1] = ac_to_integer(&ctx->ac, src[1]);
2049 result = emit_imul_high(&ctx->ac, src[0], src[1]);
2050 break;
2051 case nir_op_pack_half_2x16:
2052 result = emit_pack_half_2x16(&ctx->ac, src[0]);
2053 break;
2054 case nir_op_unpack_half_2x16:
2055 result = emit_unpack_half_2x16(&ctx->ac, src[0]);
2056 break;
2057 case nir_op_fddx:
2058 case nir_op_fddy:
2059 case nir_op_fddx_fine:
2060 case nir_op_fddy_fine:
2061 case nir_op_fddx_coarse:
2062 case nir_op_fddy_coarse:
2063 result = emit_ddxy(ctx, instr->op, src[0]);
2064 break;
2065
2066 case nir_op_unpack_64_2x32_split_x: {
2067 assert(instr->src[0].src.ssa->num_components == 1);
2068 LLVMValueRef tmp = LLVMBuildBitCast(ctx->ac.builder, src[0],
2069 ctx->ac.v2i32,
2070 "");
2071 result = LLVMBuildExtractElement(ctx->ac.builder, tmp,
2072 ctx->ac.i32_0, "");
2073 break;
2074 }
2075
2076 case nir_op_unpack_64_2x32_split_y: {
2077 assert(instr->src[0].src.ssa->num_components == 1);
2078 LLVMValueRef tmp = LLVMBuildBitCast(ctx->ac.builder, src[0],
2079 ctx->ac.v2i32,
2080 "");
2081 result = LLVMBuildExtractElement(ctx->ac.builder, tmp,
2082 ctx->ac.i32_1, "");
2083 break;
2084 }
2085
2086 case nir_op_pack_64_2x32_split: {
2087 LLVMValueRef tmp = LLVMGetUndef(ctx->ac.v2i32);
2088 tmp = LLVMBuildInsertElement(ctx->ac.builder, tmp,
2089 src[0], ctx->ac.i32_0, "");
2090 tmp = LLVMBuildInsertElement(ctx->ac.builder, tmp,
2091 src[1], ctx->ac.i32_1, "");
2092 result = LLVMBuildBitCast(ctx->ac.builder, tmp, ctx->ac.i64, "");
2093 break;
2094 }
2095
2096 default:
2097 fprintf(stderr, "Unknown NIR alu instr: ");
2098 nir_print_instr(&instr->instr, stderr);
2099 fprintf(stderr, "\n");
2100 abort();
2101 }
2102
2103 if (result) {
2104 assert(instr->dest.dest.is_ssa);
2105 result = ac_to_integer(&ctx->ac, result);
2106 _mesa_hash_table_insert(ctx->defs, &instr->dest.dest.ssa,
2107 result);
2108 }
2109 }
2110
2111 static void visit_load_const(struct ac_nir_context *ctx,
2112 const nir_load_const_instr *instr)
2113 {
2114 LLVMValueRef values[4], value = NULL;
2115 LLVMTypeRef element_type =
2116 LLVMIntTypeInContext(ctx->ac.context, instr->def.bit_size);
2117
2118 for (unsigned i = 0; i < instr->def.num_components; ++i) {
2119 switch (instr->def.bit_size) {
2120 case 32:
2121 values[i] = LLVMConstInt(element_type,
2122 instr->value.u32[i], false);
2123 break;
2124 case 64:
2125 values[i] = LLVMConstInt(element_type,
2126 instr->value.u64[i], false);
2127 break;
2128 default:
2129 fprintf(stderr,
2130 "unsupported nir load_const bit_size: %d\n",
2131 instr->def.bit_size);
2132 abort();
2133 }
2134 }
2135 if (instr->def.num_components > 1) {
2136 value = LLVMConstVector(values, instr->def.num_components);
2137 } else
2138 value = values[0];
2139
2140 _mesa_hash_table_insert(ctx->defs, &instr->def, value);
2141 }
2142
2143 static LLVMValueRef cast_ptr(struct nir_to_llvm_context *ctx, LLVMValueRef ptr,
2144 LLVMTypeRef type)
2145 {
2146 int addr_space = LLVMGetPointerAddressSpace(LLVMTypeOf(ptr));
2147 return LLVMBuildBitCast(ctx->builder, ptr,
2148 LLVMPointerType(type, addr_space), "");
2149 }
2150
2151 static LLVMValueRef
2152 get_buffer_size(struct ac_nir_context *ctx, LLVMValueRef descriptor, bool in_elements)
2153 {
2154 LLVMValueRef size =
2155 LLVMBuildExtractElement(ctx->ac.builder, descriptor,
2156 LLVMConstInt(ctx->ac.i32, 2, false), "");
2157
2158 /* VI only */
2159 if (ctx->ac.chip_class == VI && in_elements) {
2160 /* On VI, the descriptor contains the size in bytes,
2161 * but TXQ must return the size in elements.
2162 * The stride is always non-zero for resources using TXQ.
2163 */
2164 LLVMValueRef stride =
2165 LLVMBuildExtractElement(ctx->ac.builder, descriptor,
2166 ctx->ac.i32_1, "");
2167 stride = LLVMBuildLShr(ctx->ac.builder, stride,
2168 LLVMConstInt(ctx->ac.i32, 16, false), "");
2169 stride = LLVMBuildAnd(ctx->ac.builder, stride,
2170 LLVMConstInt(ctx->ac.i32, 0x3fff, false), "");
2171
2172 size = LLVMBuildUDiv(ctx->ac.builder, size, stride, "");
2173 }
2174 return size;
2175 }
2176
2177 /**
2178 * Given the i32 or vNi32 \p type, generate the textual name (e.g. for use with
2179 * intrinsic names).
2180 */
2181 static void build_int_type_name(
2182 LLVMTypeRef type,
2183 char *buf, unsigned bufsize)
2184 {
2185 assert(bufsize >= 6);
2186
2187 if (LLVMGetTypeKind(type) == LLVMVectorTypeKind)
2188 snprintf(buf, bufsize, "v%ui32",
2189 LLVMGetVectorSize(type));
2190 else
2191 strcpy(buf, "i32");
2192 }
2193
2194 static LLVMValueRef radv_lower_gather4_integer(struct ac_llvm_context *ctx,
2195 struct ac_image_args *args,
2196 const nir_tex_instr *instr)
2197 {
2198 enum glsl_base_type stype = glsl_get_sampler_result_type(instr->texture->var->type);
2199 LLVMValueRef coord = args->addr;
2200 LLVMValueRef half_texel[2];
2201 LLVMValueRef compare_cube_wa = NULL;
2202 LLVMValueRef result;
2203 int c;
2204 unsigned coord_vgpr_index = (unsigned)args->offset + (unsigned)args->compare;
2205
2206 //TODO Rect
2207 {
2208 struct ac_image_args txq_args = { 0 };
2209
2210 txq_args.da = instr->is_array || instr->sampler_dim == GLSL_SAMPLER_DIM_CUBE;
2211 txq_args.opcode = ac_image_get_resinfo;
2212 txq_args.dmask = 0xf;
2213 txq_args.addr = ctx->i32_0;
2214 txq_args.resource = args->resource;
2215 LLVMValueRef size = ac_build_image_opcode(ctx, &txq_args);
2216
2217 for (c = 0; c < 2; c++) {
2218 half_texel[c] = LLVMBuildExtractElement(ctx->builder, size,
2219 LLVMConstInt(ctx->i32, c, false), "");
2220 half_texel[c] = LLVMBuildUIToFP(ctx->builder, half_texel[c], ctx->f32, "");
2221 half_texel[c] = ac_build_fdiv(ctx, ctx->f32_1, half_texel[c]);
2222 half_texel[c] = LLVMBuildFMul(ctx->builder, half_texel[c],
2223 LLVMConstReal(ctx->f32, -0.5), "");
2224 }
2225 }
2226
2227 LLVMValueRef orig_coords = args->addr;
2228
2229 for (c = 0; c < 2; c++) {
2230 LLVMValueRef tmp;
2231 LLVMValueRef index = LLVMConstInt(ctx->i32, coord_vgpr_index + c, 0);
2232 tmp = LLVMBuildExtractElement(ctx->builder, coord, index, "");
2233 tmp = LLVMBuildBitCast(ctx->builder, tmp, ctx->f32, "");
2234 tmp = LLVMBuildFAdd(ctx->builder, tmp, half_texel[c], "");
2235 tmp = LLVMBuildBitCast(ctx->builder, tmp, ctx->i32, "");
2236 coord = LLVMBuildInsertElement(ctx->builder, coord, tmp, index, "");
2237 }
2238
2239
2240 /*
2241 * Apparantly cube has issue with integer types that the workaround doesn't solve,
2242 * so this tests if the format is 8_8_8_8 and an integer type do an alternate
2243 * workaround by sampling using a scaled type and converting.
2244 * This is taken from amdgpu-pro shaders.
2245 */
2246 /* NOTE this produces some ugly code compared to amdgpu-pro,
2247 * LLVM ends up dumping SGPRs into VGPRs to deal with the compare/select,
2248 * and then reads them back. -pro generates two selects,
2249 * one s_cmp for the descriptor rewriting
2250 * one v_cmp for the coordinate and result changes.
2251 */
2252 if (instr->sampler_dim == GLSL_SAMPLER_DIM_CUBE) {
2253 LLVMValueRef tmp, tmp2;
2254
2255 /* workaround 8/8/8/8 uint/sint cube gather bug */
2256 /* first detect it then change to a scaled read and f2i */
2257 tmp = LLVMBuildExtractElement(ctx->builder, args->resource, ctx->i32_1, "");
2258 tmp2 = tmp;
2259
2260 /* extract the DATA_FORMAT */
2261 tmp = ac_build_bfe(ctx, tmp, LLVMConstInt(ctx->i32, 20, false),
2262 LLVMConstInt(ctx->i32, 6, false), false);
2263
2264 /* is the DATA_FORMAT == 8_8_8_8 */
2265 compare_cube_wa = LLVMBuildICmp(ctx->builder, LLVMIntEQ, tmp, LLVMConstInt(ctx->i32, V_008F14_IMG_DATA_FORMAT_8_8_8_8, false), "");
2266
2267 if (stype == GLSL_TYPE_UINT)
2268 /* Create a NUM FORMAT - 0x2 or 0x4 - USCALED or UINT */
2269 tmp = LLVMBuildSelect(ctx->builder, compare_cube_wa, LLVMConstInt(ctx->i32, 0x8000000, false),
2270 LLVMConstInt(ctx->i32, 0x10000000, false), "");
2271 else
2272 /* Create a NUM FORMAT - 0x3 or 0x5 - SSCALED or SINT */
2273 tmp = LLVMBuildSelect(ctx->builder, compare_cube_wa, LLVMConstInt(ctx->i32, 0xc000000, false),
2274 LLVMConstInt(ctx->i32, 0x14000000, false), "");
2275
2276 /* replace the NUM FORMAT in the descriptor */
2277 tmp2 = LLVMBuildAnd(ctx->builder, tmp2, LLVMConstInt(ctx->i32, C_008F14_NUM_FORMAT_GFX6, false), "");
2278 tmp2 = LLVMBuildOr(ctx->builder, tmp2, tmp, "");
2279
2280 args->resource = LLVMBuildInsertElement(ctx->builder, args->resource, tmp2, ctx->i32_1, "");
2281
2282 /* don't modify the coordinates for this case */
2283 coord = LLVMBuildSelect(ctx->builder, compare_cube_wa, orig_coords, coord, "");
2284 }
2285 args->addr = coord;
2286 result = ac_build_image_opcode(ctx, args);
2287
2288 if (instr->sampler_dim == GLSL_SAMPLER_DIM_CUBE) {
2289 LLVMValueRef tmp, tmp2;
2290
2291 /* if the cube workaround is in place, f2i the result. */
2292 for (c = 0; c < 4; c++) {
2293 tmp = LLVMBuildExtractElement(ctx->builder, result, LLVMConstInt(ctx->i32, c, false), "");
2294 if (stype == GLSL_TYPE_UINT)
2295 tmp2 = LLVMBuildFPToUI(ctx->builder, tmp, ctx->i32, "");
2296 else
2297 tmp2 = LLVMBuildFPToSI(ctx->builder, tmp, ctx->i32, "");
2298 tmp = LLVMBuildBitCast(ctx->builder, tmp, ctx->i32, "");
2299 tmp2 = LLVMBuildBitCast(ctx->builder, tmp2, ctx->i32, "");
2300 tmp = LLVMBuildSelect(ctx->builder, compare_cube_wa, tmp2, tmp, "");
2301 tmp = LLVMBuildBitCast(ctx->builder, tmp, ctx->f32, "");
2302 result = LLVMBuildInsertElement(ctx->builder, result, tmp, LLVMConstInt(ctx->i32, c, false), "");
2303 }
2304 }
2305 return result;
2306 }
2307
2308 static LLVMValueRef build_tex_intrinsic(struct ac_nir_context *ctx,
2309 const nir_tex_instr *instr,
2310 bool lod_is_zero,
2311 struct ac_image_args *args)
2312 {
2313 if (instr->sampler_dim == GLSL_SAMPLER_DIM_BUF) {
2314 unsigned mask = nir_ssa_def_components_read(&instr->dest.ssa);
2315
2316 return ac_build_buffer_load_format(&ctx->ac,
2317 args->resource,
2318 args->addr,
2319 ctx->ac.i32_0,
2320 util_last_bit(mask),
2321 true);
2322 }
2323
2324 args->opcode = ac_image_sample;
2325 args->compare = instr->is_shadow;
2326
2327 switch (instr->op) {
2328 case nir_texop_txf:
2329 case nir_texop_txf_ms:
2330 case nir_texop_samples_identical:
2331 args->opcode = lod_is_zero ||
2332 instr->sampler_dim == GLSL_SAMPLER_DIM_MS ?
2333 ac_image_load : ac_image_load_mip;
2334 args->compare = false;
2335 args->offset = false;
2336 break;
2337 case nir_texop_txb:
2338 args->bias = true;
2339 break;
2340 case nir_texop_txl:
2341 if (lod_is_zero)
2342 args->level_zero = true;
2343 else
2344 args->lod = true;
2345 break;
2346 case nir_texop_txs:
2347 case nir_texop_query_levels:
2348 args->opcode = ac_image_get_resinfo;
2349 break;
2350 case nir_texop_tex:
2351 if (ctx->stage != MESA_SHADER_FRAGMENT)
2352 args->level_zero = true;
2353 break;
2354 case nir_texop_txd:
2355 args->deriv = true;
2356 break;
2357 case nir_texop_tg4:
2358 args->opcode = ac_image_gather4;
2359 args->level_zero = true;
2360 break;
2361 case nir_texop_lod:
2362 args->opcode = ac_image_get_lod;
2363 args->compare = false;
2364 args->offset = false;
2365 break;
2366 default:
2367 break;
2368 }
2369
2370 if (instr->op == nir_texop_tg4 && ctx->ac.chip_class <= VI) {
2371 enum glsl_base_type stype = glsl_get_sampler_result_type(instr->texture->var->type);
2372 if (stype == GLSL_TYPE_UINT || stype == GLSL_TYPE_INT) {
2373 return radv_lower_gather4_integer(&ctx->ac, args, instr);
2374 }
2375 }
2376 return ac_build_image_opcode(&ctx->ac, args);
2377 }
2378
2379 static LLVMValueRef visit_vulkan_resource_index(struct nir_to_llvm_context *ctx,
2380 nir_intrinsic_instr *instr)
2381 {
2382 LLVMValueRef index = get_src(ctx->nir, instr->src[0]);
2383 unsigned desc_set = nir_intrinsic_desc_set(instr);
2384 unsigned binding = nir_intrinsic_binding(instr);
2385 LLVMValueRef desc_ptr = ctx->descriptor_sets[desc_set];
2386 struct radv_pipeline_layout *pipeline_layout = ctx->options->layout;
2387 struct radv_descriptor_set_layout *layout = pipeline_layout->set[desc_set].layout;
2388 unsigned base_offset = layout->binding[binding].offset;
2389 LLVMValueRef offset, stride;
2390
2391 if (layout->binding[binding].type == VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER_DYNAMIC ||
2392 layout->binding[binding].type == VK_DESCRIPTOR_TYPE_STORAGE_BUFFER_DYNAMIC) {
2393 unsigned idx = pipeline_layout->set[desc_set].dynamic_offset_start +
2394 layout->binding[binding].dynamic_offset_offset;
2395 desc_ptr = ctx->push_constants;
2396 base_offset = pipeline_layout->push_constant_size + 16 * idx;
2397 stride = LLVMConstInt(ctx->ac.i32, 16, false);
2398 } else
2399 stride = LLVMConstInt(ctx->ac.i32, layout->binding[binding].size, false);
2400
2401 offset = LLVMConstInt(ctx->ac.i32, base_offset, false);
2402 index = LLVMBuildMul(ctx->builder, index, stride, "");
2403 offset = LLVMBuildAdd(ctx->builder, offset, index, "");
2404
2405 desc_ptr = ac_build_gep0(&ctx->ac, desc_ptr, offset);
2406 desc_ptr = cast_ptr(ctx, desc_ptr, ctx->ac.v4i32);
2407 LLVMSetMetadata(desc_ptr, ctx->ac.uniform_md_kind, ctx->ac.empty_md);
2408
2409 return desc_ptr;
2410 }
2411
2412 static LLVMValueRef visit_vulkan_resource_reindex(struct nir_to_llvm_context *ctx,
2413 nir_intrinsic_instr *instr)
2414 {
2415 LLVMValueRef ptr = get_src(ctx->nir, instr->src[0]);
2416 LLVMValueRef index = get_src(ctx->nir, instr->src[1]);
2417
2418 LLVMValueRef result = LLVMBuildGEP(ctx->builder, ptr, &index, 1, "");
2419 LLVMSetMetadata(result, ctx->ac.uniform_md_kind, ctx->ac.empty_md);
2420 return result;
2421 }
2422
2423 static LLVMValueRef visit_load_push_constant(struct nir_to_llvm_context *ctx,
2424 nir_intrinsic_instr *instr)
2425 {
2426 LLVMValueRef ptr, addr;
2427
2428 addr = LLVMConstInt(ctx->ac.i32, nir_intrinsic_base(instr), 0);
2429 addr = LLVMBuildAdd(ctx->builder, addr, get_src(ctx->nir, instr->src[0]), "");
2430
2431 ptr = ac_build_gep0(&ctx->ac, ctx->push_constants, addr);
2432 ptr = cast_ptr(ctx, ptr, get_def_type(ctx->nir, &instr->dest.ssa));
2433
2434 return LLVMBuildLoad(ctx->builder, ptr, "");
2435 }
2436
2437 static LLVMValueRef visit_get_buffer_size(struct ac_nir_context *ctx,
2438 const nir_intrinsic_instr *instr)
2439 {
2440 LLVMValueRef index = get_src(ctx, instr->src[0]);
2441
2442 return get_buffer_size(ctx, ctx->abi->load_ssbo(ctx->abi, index, false), false);
2443 }
2444
2445 static uint32_t widen_mask(uint32_t mask, unsigned multiplier)
2446 {
2447 uint32_t new_mask = 0;
2448 for(unsigned i = 0; i < 32 && (1u << i) <= mask; ++i)
2449 if (mask & (1u << i))
2450 new_mask |= ((1u << multiplier) - 1u) << (i * multiplier);
2451 return new_mask;
2452 }
2453
2454 static LLVMValueRef extract_vector_range(struct ac_llvm_context *ctx, LLVMValueRef src,
2455 unsigned start, unsigned count)
2456 {
2457 LLVMTypeRef type = LLVMTypeOf(src);
2458
2459 if (LLVMGetTypeKind(type) != LLVMVectorTypeKind) {
2460 assert(start == 0);
2461 assert(count == 1);
2462 return src;
2463 }
2464
2465 unsigned src_elements = LLVMGetVectorSize(type);
2466 assert(start < src_elements);
2467 assert(start + count <= src_elements);
2468
2469 if (start == 0 && count == src_elements)
2470 return src;
2471
2472 if (count == 1)
2473 return LLVMBuildExtractElement(ctx->builder, src, LLVMConstInt(ctx->i32, start, false), "");
2474
2475 assert(count <= 8);
2476 LLVMValueRef indices[8];
2477 for (unsigned i = 0; i < count; ++i)
2478 indices[i] = LLVMConstInt(ctx->i32, start + i, false);
2479
2480 LLVMValueRef swizzle = LLVMConstVector(indices, count);
2481 return LLVMBuildShuffleVector(ctx->builder, src, src, swizzle, "");
2482 }
2483
2484 static void visit_store_ssbo(struct ac_nir_context *ctx,
2485 nir_intrinsic_instr *instr)
2486 {
2487 const char *store_name;
2488 LLVMValueRef src_data = get_src(ctx, instr->src[0]);
2489 LLVMTypeRef data_type = ctx->ac.f32;
2490 int elem_size_mult = get_elem_bits(&ctx->ac, LLVMTypeOf(src_data)) / 32;
2491 int components_32bit = elem_size_mult * instr->num_components;
2492 unsigned writemask = nir_intrinsic_write_mask(instr);
2493 LLVMValueRef base_data, base_offset;
2494 LLVMValueRef params[6];
2495
2496 params[1] = ctx->abi->load_ssbo(ctx->abi,
2497 get_src(ctx, instr->src[1]), true);
2498 params[2] = ctx->ac.i32_0; /* vindex */
2499 params[4] = ctx->ac.i1false; /* glc */
2500 params[5] = ctx->ac.i1false; /* slc */
2501
2502 if (components_32bit > 1)
2503 data_type = LLVMVectorType(ctx->ac.f32, components_32bit);
2504
2505 writemask = widen_mask(writemask, elem_size_mult);
2506
2507 base_data = ac_to_float(&ctx->ac, src_data);
2508 base_data = trim_vector(&ctx->ac, base_data, instr->num_components);
2509 base_data = LLVMBuildBitCast(ctx->ac.builder, base_data,
2510 data_type, "");
2511 base_offset = get_src(ctx, instr->src[2]); /* voffset */
2512 while (writemask) {
2513 int start, count;
2514 LLVMValueRef data;
2515 LLVMValueRef offset;
2516
2517 u_bit_scan_consecutive_range(&writemask, &start, &count);
2518
2519 /* Due to an LLVM limitation, split 3-element writes
2520 * into a 2-element and a 1-element write. */
2521 if (count == 3) {
2522 writemask |= 1 << (start + 2);
2523 count = 2;
2524 }
2525
2526 if (count > 4) {
2527 writemask |= ((1u << (count - 4)) - 1u) << (start + 4);
2528 count = 4;
2529 }
2530
2531 if (count == 4) {
2532 store_name = "llvm.amdgcn.buffer.store.v4f32";
2533 } else if (count == 2) {
2534 store_name = "llvm.amdgcn.buffer.store.v2f32";
2535
2536 } else {
2537 assert(count == 1);
2538 store_name = "llvm.amdgcn.buffer.store.f32";
2539 }
2540 data = extract_vector_range(&ctx->ac, base_data, start, count);
2541
2542 offset = base_offset;
2543 if (start != 0) {
2544 offset = LLVMBuildAdd(ctx->ac.builder, offset, LLVMConstInt(ctx->ac.i32, start * 4, false), "");
2545 }
2546 params[0] = data;
2547 params[3] = offset;
2548 ac_build_intrinsic(&ctx->ac, store_name,
2549 ctx->ac.voidt, params, 6, 0);
2550 }
2551 }
2552
2553 static LLVMValueRef visit_atomic_ssbo(struct ac_nir_context *ctx,
2554 const nir_intrinsic_instr *instr)
2555 {
2556 const char *name;
2557 LLVMValueRef params[6];
2558 int arg_count = 0;
2559
2560 if (instr->intrinsic == nir_intrinsic_ssbo_atomic_comp_swap) {
2561 params[arg_count++] = ac_llvm_extract_elem(&ctx->ac, get_src(ctx, instr->src[3]), 0);
2562 }
2563 params[arg_count++] = ac_llvm_extract_elem(&ctx->ac, get_src(ctx, instr->src[2]), 0);
2564 params[arg_count++] = ctx->abi->load_ssbo(ctx->abi,
2565 get_src(ctx, instr->src[0]),
2566 true);
2567 params[arg_count++] = ctx->ac.i32_0; /* vindex */
2568 params[arg_count++] = get_src(ctx, instr->src[1]); /* voffset */
2569 params[arg_count++] = LLVMConstInt(ctx->ac.i1, 0, false); /* slc */
2570
2571 switch (instr->intrinsic) {
2572 case nir_intrinsic_ssbo_atomic_add:
2573 name = "llvm.amdgcn.buffer.atomic.add";
2574 break;
2575 case nir_intrinsic_ssbo_atomic_imin:
2576 name = "llvm.amdgcn.buffer.atomic.smin";
2577 break;
2578 case nir_intrinsic_ssbo_atomic_umin:
2579 name = "llvm.amdgcn.buffer.atomic.umin";
2580 break;
2581 case nir_intrinsic_ssbo_atomic_imax:
2582 name = "llvm.amdgcn.buffer.atomic.smax";
2583 break;
2584 case nir_intrinsic_ssbo_atomic_umax:
2585 name = "llvm.amdgcn.buffer.atomic.umax";
2586 break;
2587 case nir_intrinsic_ssbo_atomic_and:
2588 name = "llvm.amdgcn.buffer.atomic.and";
2589 break;
2590 case nir_intrinsic_ssbo_atomic_or:
2591 name = "llvm.amdgcn.buffer.atomic.or";
2592 break;
2593 case nir_intrinsic_ssbo_atomic_xor:
2594 name = "llvm.amdgcn.buffer.atomic.xor";
2595 break;
2596 case nir_intrinsic_ssbo_atomic_exchange:
2597 name = "llvm.amdgcn.buffer.atomic.swap";
2598 break;
2599 case nir_intrinsic_ssbo_atomic_comp_swap:
2600 name = "llvm.amdgcn.buffer.atomic.cmpswap";
2601 break;
2602 default:
2603 abort();
2604 }
2605
2606 return ac_build_intrinsic(&ctx->ac, name, ctx->ac.i32, params, arg_count, 0);
2607 }
2608
2609 static LLVMValueRef visit_load_buffer(struct ac_nir_context *ctx,
2610 const nir_intrinsic_instr *instr)
2611 {
2612 LLVMValueRef results[2];
2613 int load_components;
2614 int num_components = instr->num_components;
2615 if (instr->dest.ssa.bit_size == 64)
2616 num_components *= 2;
2617
2618 for (int i = 0; i < num_components; i += load_components) {
2619 load_components = MIN2(num_components - i, 4);
2620 const char *load_name;
2621 LLVMTypeRef data_type = ctx->ac.f32;
2622 LLVMValueRef offset = LLVMConstInt(ctx->ac.i32, i * 4, false);
2623 offset = LLVMBuildAdd(ctx->ac.builder, get_src(ctx, instr->src[1]), offset, "");
2624
2625 if (load_components == 3)
2626 data_type = LLVMVectorType(ctx->ac.f32, 4);
2627 else if (load_components > 1)
2628 data_type = LLVMVectorType(ctx->ac.f32, load_components);
2629
2630 if (load_components >= 3)
2631 load_name = "llvm.amdgcn.buffer.load.v4f32";
2632 else if (load_components == 2)
2633 load_name = "llvm.amdgcn.buffer.load.v2f32";
2634 else if (load_components == 1)
2635 load_name = "llvm.amdgcn.buffer.load.f32";
2636 else
2637 unreachable("unhandled number of components");
2638
2639 LLVMValueRef params[] = {
2640 ctx->abi->load_ssbo(ctx->abi,
2641 get_src(ctx, instr->src[0]),
2642 false),
2643 ctx->ac.i32_0,
2644 offset,
2645 ctx->ac.i1false,
2646 ctx->ac.i1false,
2647 };
2648
2649 results[i > 0 ? 1 : 0] = ac_build_intrinsic(&ctx->ac, load_name, data_type, params, 5, 0);
2650 }
2651
2652 assume(results[0]);
2653 LLVMValueRef ret = results[0];
2654 if (num_components > 4 || num_components == 3) {
2655 LLVMValueRef masks[] = {
2656 LLVMConstInt(ctx->ac.i32, 0, false), LLVMConstInt(ctx->ac.i32, 1, false),
2657 LLVMConstInt(ctx->ac.i32, 2, false), LLVMConstInt(ctx->ac.i32, 3, false),
2658 LLVMConstInt(ctx->ac.i32, 4, false), LLVMConstInt(ctx->ac.i32, 5, false),
2659 LLVMConstInt(ctx->ac.i32, 6, false), LLVMConstInt(ctx->ac.i32, 7, false)
2660 };
2661
2662 LLVMValueRef swizzle = LLVMConstVector(masks, num_components);
2663 ret = LLVMBuildShuffleVector(ctx->ac.builder, results[0],
2664 results[num_components > 4 ? 1 : 0], swizzle, "");
2665 }
2666
2667 return LLVMBuildBitCast(ctx->ac.builder, ret,
2668 get_def_type(ctx, &instr->dest.ssa), "");
2669 }
2670
2671 static LLVMValueRef visit_load_ubo_buffer(struct ac_nir_context *ctx,
2672 const nir_intrinsic_instr *instr)
2673 {
2674 LLVMValueRef ret;
2675 LLVMValueRef rsrc = get_src(ctx, instr->src[0]);
2676 LLVMValueRef offset = get_src(ctx, instr->src[1]);
2677 int num_components = instr->num_components;
2678
2679 if (ctx->abi->load_ubo)
2680 rsrc = ctx->abi->load_ubo(ctx->abi, rsrc);
2681
2682 if (instr->dest.ssa.bit_size == 64)
2683 num_components *= 2;
2684
2685 ret = ac_build_buffer_load(&ctx->ac, rsrc, num_components, NULL, offset,
2686 NULL, 0, false, false, true, true);
2687 ret = trim_vector(&ctx->ac, ret, num_components);
2688 return LLVMBuildBitCast(ctx->ac.builder, ret,
2689 get_def_type(ctx, &instr->dest.ssa), "");
2690 }
2691
2692 static void
2693 get_deref_offset(struct ac_nir_context *ctx, nir_deref_var *deref,
2694 bool vs_in, unsigned *vertex_index_out,
2695 LLVMValueRef *vertex_index_ref,
2696 unsigned *const_out, LLVMValueRef *indir_out)
2697 {
2698 unsigned const_offset = 0;
2699 nir_deref *tail = &deref->deref;
2700 LLVMValueRef offset = NULL;
2701
2702 if (vertex_index_out != NULL || vertex_index_ref != NULL) {
2703 tail = tail->child;
2704 nir_deref_array *deref_array = nir_deref_as_array(tail);
2705 if (vertex_index_out)
2706 *vertex_index_out = deref_array->base_offset;
2707
2708 if (vertex_index_ref) {
2709 LLVMValueRef vtx = LLVMConstInt(ctx->ac.i32, deref_array->base_offset, false);
2710 if (deref_array->deref_array_type == nir_deref_array_type_indirect) {
2711 vtx = LLVMBuildAdd(ctx->ac.builder, vtx, get_src(ctx, deref_array->indirect), "");
2712 }
2713 *vertex_index_ref = vtx;
2714 }
2715 }
2716
2717 if (deref->var->data.compact) {
2718 assert(tail->child->deref_type == nir_deref_type_array);
2719 assert(glsl_type_is_scalar(glsl_without_array(deref->var->type)));
2720 nir_deref_array *deref_array = nir_deref_as_array(tail->child);
2721 /* We always lower indirect dereferences for "compact" array vars. */
2722 assert(deref_array->deref_array_type == nir_deref_array_type_direct);
2723
2724 const_offset = deref_array->base_offset;
2725 goto out;
2726 }
2727
2728 while (tail->child != NULL) {
2729 const struct glsl_type *parent_type = tail->type;
2730 tail = tail->child;
2731
2732 if (tail->deref_type == nir_deref_type_array) {
2733 nir_deref_array *deref_array = nir_deref_as_array(tail);
2734 LLVMValueRef index, stride, local_offset;
2735 unsigned size = glsl_count_attribute_slots(tail->type, vs_in);
2736
2737 const_offset += size * deref_array->base_offset;
2738 if (deref_array->deref_array_type == nir_deref_array_type_direct)
2739 continue;
2740
2741 assert(deref_array->deref_array_type == nir_deref_array_type_indirect);
2742 index = get_src(ctx, deref_array->indirect);
2743 stride = LLVMConstInt(ctx->ac.i32, size, 0);
2744 local_offset = LLVMBuildMul(ctx->ac.builder, stride, index, "");
2745
2746 if (offset)
2747 offset = LLVMBuildAdd(ctx->ac.builder, offset, local_offset, "");
2748 else
2749 offset = local_offset;
2750 } else if (tail->deref_type == nir_deref_type_struct) {
2751 nir_deref_struct *deref_struct = nir_deref_as_struct(tail);
2752
2753 for (unsigned i = 0; i < deref_struct->index; i++) {
2754 const struct glsl_type *ft = glsl_get_struct_field(parent_type, i);
2755 const_offset += glsl_count_attribute_slots(ft, vs_in);
2756 }
2757 } else
2758 unreachable("unsupported deref type");
2759
2760 }
2761 out:
2762 if (const_offset && offset)
2763 offset = LLVMBuildAdd(ctx->ac.builder, offset,
2764 LLVMConstInt(ctx->ac.i32, const_offset, 0),
2765 "");
2766
2767 *const_out = const_offset;
2768 *indir_out = offset;
2769 }
2770
2771
2772 /* The offchip buffer layout for TCS->TES is
2773 *
2774 * - attribute 0 of patch 0 vertex 0
2775 * - attribute 0 of patch 0 vertex 1
2776 * - attribute 0 of patch 0 vertex 2
2777 * ...
2778 * - attribute 0 of patch 1 vertex 0
2779 * - attribute 0 of patch 1 vertex 1
2780 * ...
2781 * - attribute 1 of patch 0 vertex 0
2782 * - attribute 1 of patch 0 vertex 1
2783 * ...
2784 * - per patch attribute 0 of patch 0
2785 * - per patch attribute 0 of patch 1
2786 * ...
2787 *
2788 * Note that every attribute has 4 components.
2789 */
2790 static LLVMValueRef get_tcs_tes_buffer_address(struct nir_to_llvm_context *ctx,
2791 LLVMValueRef vertex_index,
2792 LLVMValueRef param_index)
2793 {
2794 LLVMValueRef base_addr, vertices_per_patch, num_patches, total_vertices;
2795 LLVMValueRef param_stride, constant16;
2796 LLVMValueRef rel_patch_id = get_rel_patch_id(ctx);
2797
2798 vertices_per_patch = unpack_param(&ctx->ac, ctx->tcs_offchip_layout, 9, 6);
2799 num_patches = unpack_param(&ctx->ac, ctx->tcs_offchip_layout, 0, 9);
2800 total_vertices = LLVMBuildMul(ctx->builder, vertices_per_patch,
2801 num_patches, "");
2802
2803 constant16 = LLVMConstInt(ctx->ac.i32, 16, false);
2804 if (vertex_index) {
2805 base_addr = LLVMBuildMul(ctx->builder, rel_patch_id,
2806 vertices_per_patch, "");
2807
2808 base_addr = LLVMBuildAdd(ctx->builder, base_addr,
2809 vertex_index, "");
2810
2811 param_stride = total_vertices;
2812 } else {
2813 base_addr = rel_patch_id;
2814 param_stride = num_patches;
2815 }
2816
2817 base_addr = LLVMBuildAdd(ctx->builder, base_addr,
2818 LLVMBuildMul(ctx->builder, param_index,
2819 param_stride, ""), "");
2820
2821 base_addr = LLVMBuildMul(ctx->builder, base_addr, constant16, "");
2822
2823 if (!vertex_index) {
2824 LLVMValueRef patch_data_offset =
2825 unpack_param(&ctx->ac, ctx->tcs_offchip_layout, 16, 16);
2826
2827 base_addr = LLVMBuildAdd(ctx->builder, base_addr,
2828 patch_data_offset, "");
2829 }
2830 return base_addr;
2831 }
2832
2833 static LLVMValueRef get_tcs_tes_buffer_address_params(struct nir_to_llvm_context *ctx,
2834 unsigned param,
2835 unsigned const_index,
2836 bool is_compact,
2837 LLVMValueRef vertex_index,
2838 LLVMValueRef indir_index)
2839 {
2840 LLVMValueRef param_index;
2841
2842 if (indir_index)
2843 param_index = LLVMBuildAdd(ctx->builder, LLVMConstInt(ctx->ac.i32, param, false),
2844 indir_index, "");
2845 else {
2846 if (const_index && !is_compact)
2847 param += const_index;
2848 param_index = LLVMConstInt(ctx->ac.i32, param, false);
2849 }
2850 return get_tcs_tes_buffer_address(ctx, vertex_index, param_index);
2851 }
2852
2853 static void
2854 mark_tess_output(struct nir_to_llvm_context *ctx,
2855 bool is_patch, uint32_t param)
2856
2857 {
2858 if (is_patch) {
2859 ctx->tess_patch_outputs_written |= (1ull << param);
2860 } else
2861 ctx->tess_outputs_written |= (1ull << param);
2862 }
2863
2864 static LLVMValueRef
2865 get_dw_address(struct nir_to_llvm_context *ctx,
2866 LLVMValueRef dw_addr,
2867 unsigned param,
2868 unsigned const_index,
2869 bool compact_const_index,
2870 LLVMValueRef vertex_index,
2871 LLVMValueRef stride,
2872 LLVMValueRef indir_index)
2873
2874 {
2875
2876 if (vertex_index) {
2877 dw_addr = LLVMBuildAdd(ctx->builder, dw_addr,
2878 LLVMBuildMul(ctx->builder,
2879 vertex_index,
2880 stride, ""), "");
2881 }
2882
2883 if (indir_index)
2884 dw_addr = LLVMBuildAdd(ctx->builder, dw_addr,
2885 LLVMBuildMul(ctx->builder, indir_index,
2886 LLVMConstInt(ctx->ac.i32, 4, false), ""), "");
2887 else if (const_index && !compact_const_index)
2888 dw_addr = LLVMBuildAdd(ctx->builder, dw_addr,
2889 LLVMConstInt(ctx->ac.i32, const_index, false), "");
2890
2891 dw_addr = LLVMBuildAdd(ctx->builder, dw_addr,
2892 LLVMConstInt(ctx->ac.i32, param * 4, false), "");
2893
2894 if (const_index && compact_const_index)
2895 dw_addr = LLVMBuildAdd(ctx->builder, dw_addr,
2896 LLVMConstInt(ctx->ac.i32, const_index, false), "");
2897 return dw_addr;
2898 }
2899
2900 static LLVMValueRef
2901 load_tcs_varyings(struct ac_shader_abi *abi,
2902 LLVMValueRef vertex_index,
2903 LLVMValueRef indir_index,
2904 unsigned const_index,
2905 unsigned location,
2906 unsigned driver_location,
2907 unsigned component,
2908 unsigned num_components,
2909 bool is_patch,
2910 bool is_compact,
2911 bool load_input)
2912 {
2913 struct nir_to_llvm_context *ctx = nir_to_llvm_context_from_abi(abi);
2914 LLVMValueRef dw_addr, stride;
2915 LLVMValueRef value[4], result;
2916 unsigned param = shader_io_get_unique_index(location);
2917
2918 if (load_input) {
2919 stride = unpack_param(&ctx->ac, ctx->tcs_in_layout, 13, 8);
2920 dw_addr = get_tcs_in_current_patch_offset(ctx);
2921 } else {
2922 if (!is_patch) {
2923 stride = unpack_param(&ctx->ac, ctx->tcs_out_layout, 13, 8);
2924 dw_addr = get_tcs_out_current_patch_offset(ctx);
2925 } else {
2926 dw_addr = get_tcs_out_current_patch_data_offset(ctx);
2927 stride = NULL;
2928 }
2929 }
2930
2931 dw_addr = get_dw_address(ctx, dw_addr, param, const_index, is_compact, vertex_index, stride,
2932 indir_index);
2933
2934 for (unsigned i = 0; i < num_components + component; i++) {
2935 value[i] = ac_lds_load(&ctx->ac, dw_addr);
2936 dw_addr = LLVMBuildAdd(ctx->builder, dw_addr,
2937 ctx->ac.i32_1, "");
2938 }
2939 result = ac_build_varying_gather_values(&ctx->ac, value, num_components, component);
2940 return result;
2941 }
2942
2943 static void
2944 store_tcs_output(struct ac_shader_abi *abi,
2945 LLVMValueRef vertex_index,
2946 LLVMValueRef param_index,
2947 unsigned const_index,
2948 unsigned location,
2949 unsigned driver_location,
2950 LLVMValueRef src,
2951 unsigned component,
2952 bool is_patch,
2953 bool is_compact,
2954 unsigned writemask)
2955 {
2956 struct nir_to_llvm_context *ctx = nir_to_llvm_context_from_abi(abi);
2957 LLVMValueRef dw_addr;
2958 LLVMValueRef stride = NULL;
2959 LLVMValueRef buf_addr = NULL;
2960 unsigned param;
2961 bool store_lds = true;
2962
2963 if (is_patch) {
2964 if (!(ctx->tcs_patch_outputs_read & (1U << (location - VARYING_SLOT_PATCH0))))
2965 store_lds = false;
2966 } else {
2967 if (!(ctx->tcs_outputs_read & (1ULL << location)))
2968 store_lds = false;
2969 }
2970
2971 param = shader_io_get_unique_index(location);
2972 if (location == VARYING_SLOT_CLIP_DIST0 &&
2973 is_compact && const_index > 3) {
2974 const_index -= 3;
2975 param++;
2976 }
2977
2978 if (!is_patch) {
2979 stride = unpack_param(&ctx->ac, ctx->tcs_out_layout, 13, 8);
2980 dw_addr = get_tcs_out_current_patch_offset(ctx);
2981 } else {
2982 dw_addr = get_tcs_out_current_patch_data_offset(ctx);
2983 }
2984
2985 mark_tess_output(ctx, is_patch, param);
2986
2987 dw_addr = get_dw_address(ctx, dw_addr, param, const_index, is_compact, vertex_index, stride,
2988 param_index);
2989 buf_addr = get_tcs_tes_buffer_address_params(ctx, param, const_index, is_compact,
2990 vertex_index, param_index);
2991
2992 bool is_tess_factor = false;
2993 if (location == VARYING_SLOT_TESS_LEVEL_INNER ||
2994 location == VARYING_SLOT_TESS_LEVEL_OUTER)
2995 is_tess_factor = true;
2996
2997 unsigned base = is_compact ? const_index : 0;
2998 for (unsigned chan = 0; chan < 8; chan++) {
2999 if (!(writemask & (1 << chan)))
3000 continue;
3001 LLVMValueRef value = ac_llvm_extract_elem(&ctx->ac, src, chan - component);
3002
3003 if (store_lds || is_tess_factor) {
3004 LLVMValueRef dw_addr_chan =
3005 LLVMBuildAdd(ctx->builder, dw_addr,
3006 LLVMConstInt(ctx->ac.i32, chan, false), "");
3007 ac_lds_store(&ctx->ac, dw_addr_chan, value);
3008 }
3009
3010 if (!is_tess_factor && writemask != 0xF)
3011 ac_build_buffer_store_dword(&ctx->ac, ctx->hs_ring_tess_offchip, value, 1,
3012 buf_addr, ctx->oc_lds,
3013 4 * (base + chan), 1, 0, true, false);
3014 }
3015
3016 if (writemask == 0xF) {
3017 ac_build_buffer_store_dword(&ctx->ac, ctx->hs_ring_tess_offchip, src, 4,
3018 buf_addr, ctx->oc_lds,
3019 (base * 4), 1, 0, true, false);
3020 }
3021 }
3022
3023 static LLVMValueRef
3024 load_tes_input(struct ac_shader_abi *abi,
3025 LLVMValueRef vertex_index,
3026 LLVMValueRef param_index,
3027 unsigned const_index,
3028 unsigned location,
3029 unsigned driver_location,
3030 unsigned component,
3031 unsigned num_components,
3032 bool is_patch,
3033 bool is_compact,
3034 bool load_input)
3035 {
3036 struct nir_to_llvm_context *ctx = nir_to_llvm_context_from_abi(abi);
3037 LLVMValueRef buf_addr;
3038 LLVMValueRef result;
3039 unsigned param = shader_io_get_unique_index(location);
3040
3041 if (location == VARYING_SLOT_CLIP_DIST0 && is_compact && const_index > 3) {
3042 const_index -= 3;
3043 param++;
3044 }
3045
3046 buf_addr = get_tcs_tes_buffer_address_params(ctx, param, const_index,
3047 is_compact, vertex_index, param_index);
3048
3049 LLVMValueRef comp_offset = LLVMConstInt(ctx->ac.i32, component * 4, false);
3050 buf_addr = LLVMBuildAdd(ctx->builder, buf_addr, comp_offset, "");
3051
3052 result = ac_build_buffer_load(&ctx->ac, ctx->hs_ring_tess_offchip, num_components, NULL,
3053 buf_addr, ctx->oc_lds, is_compact ? (4 * const_index) : 0, 1, 0, true, false);
3054 result = trim_vector(&ctx->ac, result, num_components);
3055 return result;
3056 }
3057
3058 static LLVMValueRef
3059 load_gs_input(struct ac_shader_abi *abi,
3060 unsigned location,
3061 unsigned driver_location,
3062 unsigned component,
3063 unsigned num_components,
3064 unsigned vertex_index,
3065 unsigned const_index,
3066 LLVMTypeRef type)
3067 {
3068 struct nir_to_llvm_context *ctx = nir_to_llvm_context_from_abi(abi);
3069 LLVMValueRef vtx_offset;
3070 LLVMValueRef args[9];
3071 unsigned param, vtx_offset_param;
3072 LLVMValueRef value[4], result;
3073
3074 vtx_offset_param = vertex_index;
3075 assert(vtx_offset_param < 6);
3076 vtx_offset = LLVMBuildMul(ctx->builder, ctx->gs_vtx_offset[vtx_offset_param],
3077 LLVMConstInt(ctx->ac.i32, 4, false), "");
3078
3079 param = shader_io_get_unique_index(location);
3080
3081 for (unsigned i = component; i < num_components + component; i++) {
3082 if (ctx->ac.chip_class >= GFX9) {
3083 LLVMValueRef dw_addr = ctx->gs_vtx_offset[vtx_offset_param];
3084 dw_addr = LLVMBuildAdd(ctx->ac.builder, dw_addr,
3085 LLVMConstInt(ctx->ac.i32, param * 4 + i + const_index, 0), "");
3086 value[i] = ac_lds_load(&ctx->ac, dw_addr);
3087 } else {
3088 args[0] = ctx->esgs_ring;
3089 args[1] = vtx_offset;
3090 args[2] = LLVMConstInt(ctx->ac.i32, (param * 4 + i + const_index) * 256, false);
3091 args[3] = ctx->ac.i32_0;
3092 args[4] = ctx->ac.i32_1; /* OFFEN */
3093 args[5] = ctx->ac.i32_0; /* IDXEN */
3094 args[6] = ctx->ac.i32_1; /* GLC */
3095 args[7] = ctx->ac.i32_0; /* SLC */
3096 args[8] = ctx->ac.i32_0; /* TFE */
3097
3098 value[i] = ac_build_intrinsic(&ctx->ac, "llvm.SI.buffer.load.dword.i32.i32",
3099 ctx->ac.i32, args, 9,
3100 AC_FUNC_ATTR_READONLY |
3101 AC_FUNC_ATTR_LEGACY);
3102 }
3103 }
3104 result = ac_build_varying_gather_values(&ctx->ac, value, num_components, component);
3105
3106 return result;
3107 }
3108
3109 static LLVMValueRef
3110 build_gep_for_deref(struct ac_nir_context *ctx,
3111 nir_deref_var *deref)
3112 {
3113 struct hash_entry *entry = _mesa_hash_table_search(ctx->vars, deref->var);
3114 assert(entry->data);
3115 LLVMValueRef val = entry->data;
3116 nir_deref *tail = deref->deref.child;
3117 while (tail != NULL) {
3118 LLVMValueRef offset;
3119 switch (tail->deref_type) {
3120 case nir_deref_type_array: {
3121 nir_deref_array *array = nir_deref_as_array(tail);
3122 offset = LLVMConstInt(ctx->ac.i32, array->base_offset, 0);
3123 if (array->deref_array_type ==
3124 nir_deref_array_type_indirect) {
3125 offset = LLVMBuildAdd(ctx->ac.builder, offset,
3126 get_src(ctx,
3127 array->indirect),
3128 "");
3129 }
3130 break;
3131 }
3132 case nir_deref_type_struct: {
3133 nir_deref_struct *deref_struct =
3134 nir_deref_as_struct(tail);
3135 offset = LLVMConstInt(ctx->ac.i32,
3136 deref_struct->index, 0);
3137 break;
3138 }
3139 default:
3140 unreachable("bad deref type");
3141 }
3142 val = ac_build_gep0(&ctx->ac, val, offset);
3143 tail = tail->child;
3144 }
3145 return val;
3146 }
3147
3148 static LLVMValueRef load_tess_varyings(struct ac_nir_context *ctx,
3149 nir_intrinsic_instr *instr,
3150 bool load_inputs)
3151 {
3152 LLVMValueRef result;
3153 LLVMValueRef vertex_index = NULL;
3154 LLVMValueRef indir_index = NULL;
3155 unsigned const_index = 0;
3156 unsigned location = instr->variables[0]->var->data.location;
3157 unsigned driver_location = instr->variables[0]->var->data.driver_location;
3158 const bool is_patch = instr->variables[0]->var->data.patch;
3159 const bool is_compact = instr->variables[0]->var->data.compact;
3160
3161 get_deref_offset(ctx, instr->variables[0],
3162 false, NULL, is_patch ? NULL : &vertex_index,
3163 &const_index, &indir_index);
3164
3165 result = ctx->abi->load_tess_varyings(ctx->abi, vertex_index, indir_index,
3166 const_index, location, driver_location,
3167 instr->variables[0]->var->data.location_frac,
3168 instr->num_components,
3169 is_patch, is_compact, load_inputs);
3170 return LLVMBuildBitCast(ctx->ac.builder, result, get_def_type(ctx, &instr->dest.ssa), "");
3171 }
3172
3173 static LLVMValueRef visit_load_var(struct ac_nir_context *ctx,
3174 nir_intrinsic_instr *instr)
3175 {
3176 LLVMValueRef values[8];
3177 int idx = instr->variables[0]->var->data.driver_location;
3178 int ve = instr->dest.ssa.num_components;
3179 unsigned comp = instr->variables[0]->var->data.location_frac;
3180 LLVMValueRef indir_index;
3181 LLVMValueRef ret;
3182 unsigned const_index;
3183 unsigned stride = instr->variables[0]->var->data.compact ? 1 : 4;
3184 bool vs_in = ctx->stage == MESA_SHADER_VERTEX &&
3185 instr->variables[0]->var->data.mode == nir_var_shader_in;
3186 get_deref_offset(ctx, instr->variables[0], vs_in, NULL, NULL,
3187 &const_index, &indir_index);
3188
3189 if (instr->dest.ssa.bit_size == 64)
3190 ve *= 2;
3191
3192 switch (instr->variables[0]->var->data.mode) {
3193 case nir_var_shader_in:
3194 if (ctx->stage == MESA_SHADER_TESS_CTRL ||
3195 ctx->stage == MESA_SHADER_TESS_EVAL) {
3196 return load_tess_varyings(ctx, instr, true);
3197 }
3198
3199 if (ctx->stage == MESA_SHADER_GEOMETRY) {
3200 LLVMValueRef indir_index;
3201 unsigned const_index, vertex_index;
3202 get_deref_offset(ctx, instr->variables[0],
3203 false, &vertex_index, NULL,
3204 &const_index, &indir_index);
3205 return ctx->abi->load_inputs(ctx->abi, instr->variables[0]->var->data.location,
3206 instr->variables[0]->var->data.driver_location,
3207 instr->variables[0]->var->data.location_frac, ve,
3208 vertex_index, const_index,
3209 nir2llvmtype(ctx, instr->variables[0]->var->type));
3210 }
3211
3212 for (unsigned chan = comp; chan < ve + comp; chan++) {
3213 if (indir_index) {
3214 unsigned count = glsl_count_attribute_slots(
3215 instr->variables[0]->var->type,
3216 ctx->stage == MESA_SHADER_VERTEX);
3217 count -= chan / 4;
3218 LLVMValueRef tmp_vec = ac_build_gather_values_extended(
3219 &ctx->ac, ctx->abi->inputs + idx + chan, count,
3220 stride, false, true);
3221
3222 values[chan] = LLVMBuildExtractElement(ctx->ac.builder,
3223 tmp_vec,
3224 indir_index, "");
3225 } else
3226 values[chan] = ctx->abi->inputs[idx + chan + const_index * stride];
3227 }
3228 break;
3229 case nir_var_local:
3230 for (unsigned chan = 0; chan < ve; chan++) {
3231 if (indir_index) {
3232 unsigned count = glsl_count_attribute_slots(
3233 instr->variables[0]->var->type, false);
3234 count -= chan / 4;
3235 LLVMValueRef tmp_vec = ac_build_gather_values_extended(
3236 &ctx->ac, ctx->locals + idx + chan, count,
3237 stride, true, true);
3238
3239 values[chan] = LLVMBuildExtractElement(ctx->ac.builder,
3240 tmp_vec,
3241 indir_index, "");
3242 } else {
3243 values[chan] = LLVMBuildLoad(ctx->ac.builder, ctx->locals[idx + chan + const_index * stride], "");
3244 }
3245 }
3246 break;
3247 case nir_var_shared: {
3248 LLVMValueRef address = build_gep_for_deref(ctx,
3249 instr->variables[0]);
3250 LLVMValueRef val = LLVMBuildLoad(ctx->ac.builder, address, "");
3251 return LLVMBuildBitCast(ctx->ac.builder, val,
3252 get_def_type(ctx, &instr->dest.ssa),
3253 "");
3254 }
3255 case nir_var_shader_out:
3256 if (ctx->stage == MESA_SHADER_TESS_CTRL) {
3257 return load_tess_varyings(ctx, instr, false);
3258 }
3259
3260 for (unsigned chan = comp; chan < ve + comp; chan++) {
3261 if (indir_index) {
3262 unsigned count = glsl_count_attribute_slots(
3263 instr->variables[0]->var->type, false);
3264 count -= chan / 4;
3265 LLVMValueRef tmp_vec = ac_build_gather_values_extended(
3266 &ctx->ac, ctx->outputs + idx + chan, count,
3267 stride, true, true);
3268
3269 values[chan] = LLVMBuildExtractElement(ctx->ac.builder,
3270 tmp_vec,
3271 indir_index, "");
3272 } else {
3273 values[chan] = LLVMBuildLoad(ctx->ac.builder,
3274 ctx->outputs[idx + chan + const_index * stride],
3275 "");
3276 }
3277 }
3278 break;
3279 default:
3280 unreachable("unhandle variable mode");
3281 }
3282 ret = ac_build_varying_gather_values(&ctx->ac, values, ve, comp);
3283 return LLVMBuildBitCast(ctx->ac.builder, ret, get_def_type(ctx, &instr->dest.ssa), "");
3284 }
3285
3286 static void
3287 visit_store_var(struct ac_nir_context *ctx,
3288 nir_intrinsic_instr *instr)
3289 {
3290 LLVMValueRef temp_ptr, value;
3291 int idx = instr->variables[0]->var->data.driver_location;
3292 unsigned comp = instr->variables[0]->var->data.location_frac;
3293 LLVMValueRef src = ac_to_float(&ctx->ac, get_src(ctx, instr->src[0]));
3294 int writemask = instr->const_index[0] << comp;
3295 LLVMValueRef indir_index;
3296 unsigned const_index;
3297 get_deref_offset(ctx, instr->variables[0], false,
3298 NULL, NULL, &const_index, &indir_index);
3299
3300 if (get_elem_bits(&ctx->ac, LLVMTypeOf(src)) == 64) {
3301
3302 src = LLVMBuildBitCast(ctx->ac.builder, src,
3303 LLVMVectorType(ctx->ac.f32, ac_get_llvm_num_components(src) * 2),
3304 "");
3305
3306 writemask = widen_mask(writemask, 2);
3307 }
3308
3309 switch (instr->variables[0]->var->data.mode) {
3310 case nir_var_shader_out:
3311
3312 if (ctx->stage == MESA_SHADER_TESS_CTRL) {
3313 LLVMValueRef vertex_index = NULL;
3314 LLVMValueRef indir_index = NULL;
3315 unsigned const_index = 0;
3316 const unsigned location = instr->variables[0]->var->data.location;
3317 const unsigned driver_location = instr->variables[0]->var->data.driver_location;
3318 const unsigned comp = instr->variables[0]->var->data.location_frac;
3319 const bool is_patch = instr->variables[0]->var->data.patch;
3320 const bool is_compact = instr->variables[0]->var->data.compact;
3321
3322 get_deref_offset(ctx, instr->variables[0],
3323 false, NULL, is_patch ? NULL : &vertex_index,
3324 &const_index, &indir_index);
3325
3326 ctx->abi->store_tcs_outputs(ctx->abi, vertex_index, indir_index,
3327 const_index, location, driver_location,
3328 src, comp, is_patch, is_compact, writemask);
3329 return;
3330 }
3331
3332 for (unsigned chan = 0; chan < 8; chan++) {
3333 int stride = 4;
3334 if (!(writemask & (1 << chan)))
3335 continue;
3336
3337 value = ac_llvm_extract_elem(&ctx->ac, src, chan - comp);
3338
3339 if (instr->variables[0]->var->data.compact)
3340 stride = 1;
3341 if (indir_index) {
3342 unsigned count = glsl_count_attribute_slots(
3343 instr->variables[0]->var->type, false);
3344 count -= chan / 4;
3345 LLVMValueRef tmp_vec = ac_build_gather_values_extended(
3346 &ctx->ac, ctx->outputs + idx + chan, count,
3347 stride, true, true);
3348
3349 tmp_vec = LLVMBuildInsertElement(ctx->ac.builder, tmp_vec,
3350 value, indir_index, "");
3351 build_store_values_extended(&ctx->ac, ctx->outputs + idx + chan,
3352 count, stride, tmp_vec);
3353
3354 } else {
3355 temp_ptr = ctx->outputs[idx + chan + const_index * stride];
3356
3357 LLVMBuildStore(ctx->ac.builder, value, temp_ptr);
3358 }
3359 }
3360 break;
3361 case nir_var_local:
3362 for (unsigned chan = 0; chan < 8; chan++) {
3363 if (!(writemask & (1 << chan)))
3364 continue;
3365
3366 value = ac_llvm_extract_elem(&ctx->ac, src, chan);
3367 if (indir_index) {
3368 unsigned count = glsl_count_attribute_slots(
3369 instr->variables[0]->var->type, false);
3370 count -= chan / 4;
3371 LLVMValueRef tmp_vec = ac_build_gather_values_extended(
3372 &ctx->ac, ctx->locals + idx + chan, count,
3373 4, true, true);
3374
3375 tmp_vec = LLVMBuildInsertElement(ctx->ac.builder, tmp_vec,
3376 value, indir_index, "");
3377 build_store_values_extended(&ctx->ac, ctx->locals + idx + chan,
3378 count, 4, tmp_vec);
3379 } else {
3380 temp_ptr = ctx->locals[idx + chan + const_index * 4];
3381
3382 LLVMBuildStore(ctx->ac.builder, value, temp_ptr);
3383 }
3384 }
3385 break;
3386 case nir_var_shared: {
3387 int writemask = instr->const_index[0];
3388 LLVMValueRef address = build_gep_for_deref(ctx,
3389 instr->variables[0]);
3390 LLVMValueRef val = get_src(ctx, instr->src[0]);
3391 unsigned components =
3392 glsl_get_vector_elements(
3393 nir_deref_tail(&instr->variables[0]->deref)->type);
3394 if (writemask == (1 << components) - 1) {
3395 val = LLVMBuildBitCast(
3396 ctx->ac.builder, val,
3397 LLVMGetElementType(LLVMTypeOf(address)), "");
3398 LLVMBuildStore(ctx->ac.builder, val, address);
3399 } else {
3400 for (unsigned chan = 0; chan < 4; chan++) {
3401 if (!(writemask & (1 << chan)))
3402 continue;
3403 LLVMValueRef ptr =
3404 LLVMBuildStructGEP(ctx->ac.builder,
3405 address, chan, "");
3406 LLVMValueRef src = ac_llvm_extract_elem(&ctx->ac, val,
3407 chan);
3408 src = LLVMBuildBitCast(
3409 ctx->ac.builder, src,
3410 LLVMGetElementType(LLVMTypeOf(ptr)), "");
3411 LLVMBuildStore(ctx->ac.builder, src, ptr);
3412 }
3413 }
3414 break;
3415 }
3416 default:
3417 break;
3418 }
3419 }
3420
3421 static int image_type_to_components_count(enum glsl_sampler_dim dim, bool array)
3422 {
3423 switch (dim) {
3424 case GLSL_SAMPLER_DIM_BUF:
3425 return 1;
3426 case GLSL_SAMPLER_DIM_1D:
3427 return array ? 2 : 1;
3428 case GLSL_SAMPLER_DIM_2D:
3429 return array ? 3 : 2;
3430 case GLSL_SAMPLER_DIM_MS:
3431 return array ? 4 : 3;
3432 case GLSL_SAMPLER_DIM_3D:
3433 case GLSL_SAMPLER_DIM_CUBE:
3434 return 3;
3435 case GLSL_SAMPLER_DIM_RECT:
3436 case GLSL_SAMPLER_DIM_SUBPASS:
3437 return 2;
3438 case GLSL_SAMPLER_DIM_SUBPASS_MS:
3439 return 3;
3440 default:
3441 break;
3442 }
3443 return 0;
3444 }
3445
3446
3447
3448 /* Adjust the sample index according to FMASK.
3449 *
3450 * For uncompressed MSAA surfaces, FMASK should return 0x76543210,
3451 * which is the identity mapping. Each nibble says which physical sample
3452 * should be fetched to get that sample.
3453 *
3454 * For example, 0x11111100 means there are only 2 samples stored and
3455 * the second sample covers 3/4 of the pixel. When reading samples 0
3456 * and 1, return physical sample 0 (determined by the first two 0s
3457 * in FMASK), otherwise return physical sample 1.
3458 *
3459 * The sample index should be adjusted as follows:
3460 * sample_index = (fmask >> (sample_index * 4)) & 0xF;
3461 */
3462 static LLVMValueRef adjust_sample_index_using_fmask(struct ac_llvm_context *ctx,
3463 LLVMValueRef coord_x, LLVMValueRef coord_y,
3464 LLVMValueRef coord_z,
3465 LLVMValueRef sample_index,
3466 LLVMValueRef fmask_desc_ptr)
3467 {
3468 LLVMValueRef fmask_load_address[4];
3469 LLVMValueRef res;
3470
3471 fmask_load_address[0] = coord_x;
3472 fmask_load_address[1] = coord_y;
3473 if (coord_z) {
3474 fmask_load_address[2] = coord_z;
3475 fmask_load_address[3] = LLVMGetUndef(ctx->i32);
3476 }
3477
3478 struct ac_image_args args = {0};
3479
3480 args.opcode = ac_image_load;
3481 args.da = coord_z ? true : false;
3482 args.resource = fmask_desc_ptr;
3483 args.dmask = 0xf;
3484 args.addr = ac_build_gather_values(ctx, fmask_load_address, coord_z ? 4 : 2);
3485
3486 res = ac_build_image_opcode(ctx, &args);
3487
3488 res = ac_to_integer(ctx, res);
3489 LLVMValueRef four = LLVMConstInt(ctx->i32, 4, false);
3490 LLVMValueRef F = LLVMConstInt(ctx->i32, 0xf, false);
3491
3492 LLVMValueRef fmask = LLVMBuildExtractElement(ctx->builder,
3493 res,
3494 ctx->i32_0, "");
3495
3496 LLVMValueRef sample_index4 =
3497 LLVMBuildMul(ctx->builder, sample_index, four, "");
3498 LLVMValueRef shifted_fmask =
3499 LLVMBuildLShr(ctx->builder, fmask, sample_index4, "");
3500 LLVMValueRef final_sample =
3501 LLVMBuildAnd(ctx->builder, shifted_fmask, F, "");
3502
3503 /* Don't rewrite the sample index if WORD1.DATA_FORMAT of the FMASK
3504 * resource descriptor is 0 (invalid),
3505 */
3506 LLVMValueRef fmask_desc =
3507 LLVMBuildBitCast(ctx->builder, fmask_desc_ptr,
3508 ctx->v8i32, "");
3509
3510 LLVMValueRef fmask_word1 =
3511 LLVMBuildExtractElement(ctx->builder, fmask_desc,
3512 ctx->i32_1, "");
3513
3514 LLVMValueRef word1_is_nonzero =
3515 LLVMBuildICmp(ctx->builder, LLVMIntNE,
3516 fmask_word1, ctx->i32_0, "");
3517
3518 /* Replace the MSAA sample index. */
3519 sample_index =
3520 LLVMBuildSelect(ctx->builder, word1_is_nonzero,
3521 final_sample, sample_index, "");
3522 return sample_index;
3523 }
3524
3525 static LLVMValueRef get_image_coords(struct ac_nir_context *ctx,
3526 const nir_intrinsic_instr *instr)
3527 {
3528 const struct glsl_type *type = glsl_without_array(instr->variables[0]->var->type);
3529
3530 LLVMValueRef src0 = get_src(ctx, instr->src[0]);
3531 LLVMValueRef coords[4];
3532 LLVMValueRef masks[] = {
3533 LLVMConstInt(ctx->ac.i32, 0, false), LLVMConstInt(ctx->ac.i32, 1, false),
3534 LLVMConstInt(ctx->ac.i32, 2, false), LLVMConstInt(ctx->ac.i32, 3, false),
3535 };
3536 LLVMValueRef res;
3537 LLVMValueRef sample_index = ac_llvm_extract_elem(&ctx->ac, get_src(ctx, instr->src[1]), 0);
3538
3539 int count;
3540 enum glsl_sampler_dim dim = glsl_get_sampler_dim(type);
3541 bool is_array = glsl_sampler_type_is_array(type);
3542 bool add_frag_pos = (dim == GLSL_SAMPLER_DIM_SUBPASS ||
3543 dim == GLSL_SAMPLER_DIM_SUBPASS_MS);
3544 bool is_ms = (dim == GLSL_SAMPLER_DIM_MS ||
3545 dim == GLSL_SAMPLER_DIM_SUBPASS_MS);
3546 bool gfx9_1d = ctx->ac.chip_class >= GFX9 && dim == GLSL_SAMPLER_DIM_1D;
3547 count = image_type_to_components_count(dim, is_array);
3548
3549 if (is_ms) {
3550 LLVMValueRef fmask_load_address[3];
3551 int chan;
3552
3553 fmask_load_address[0] = LLVMBuildExtractElement(ctx->ac.builder, src0, masks[0], "");
3554 fmask_load_address[1] = LLVMBuildExtractElement(ctx->ac.builder, src0, masks[1], "");
3555 if (is_array)
3556 fmask_load_address[2] = LLVMBuildExtractElement(ctx->ac.builder, src0, masks[2], "");
3557 else
3558 fmask_load_address[2] = NULL;
3559 if (add_frag_pos) {
3560 for (chan = 0; chan < 2; ++chan)
3561 fmask_load_address[chan] =
3562 LLVMBuildAdd(ctx->ac.builder, fmask_load_address[chan],
3563 LLVMBuildFPToUI(ctx->ac.builder, ctx->abi->frag_pos[chan],
3564 ctx->ac.i32, ""), "");
3565 fmask_load_address[2] = ac_to_integer(&ctx->ac, ctx->abi->inputs[radeon_llvm_reg_index_soa(VARYING_SLOT_LAYER, 0)]);
3566 }
3567 sample_index = adjust_sample_index_using_fmask(&ctx->ac,
3568 fmask_load_address[0],
3569 fmask_load_address[1],
3570 fmask_load_address[2],
3571 sample_index,
3572 get_sampler_desc(ctx, instr->variables[0], AC_DESC_FMASK, NULL, true, false));
3573 }
3574 if (count == 1 && !gfx9_1d) {
3575 if (instr->src[0].ssa->num_components)
3576 res = LLVMBuildExtractElement(ctx->ac.builder, src0, masks[0], "");
3577 else
3578 res = src0;
3579 } else {
3580 int chan;
3581 if (is_ms)
3582 count--;
3583 for (chan = 0; chan < count; ++chan) {
3584 coords[chan] = ac_llvm_extract_elem(&ctx->ac, src0, chan);
3585 }
3586 if (add_frag_pos) {
3587 for (chan = 0; chan < 2; ++chan)
3588 coords[chan] = LLVMBuildAdd(ctx->ac.builder, coords[chan], LLVMBuildFPToUI(ctx->ac.builder, ctx->abi->frag_pos[chan],
3589 ctx->ac.i32, ""), "");
3590 coords[2] = ac_to_integer(&ctx->ac, ctx->abi->inputs[radeon_llvm_reg_index_soa(VARYING_SLOT_LAYER, 0)]);
3591 count++;
3592 }
3593
3594 if (gfx9_1d) {
3595 if (is_array) {
3596 coords[2] = coords[1];
3597 coords[1] = ctx->ac.i32_0;
3598 } else
3599 coords[1] = ctx->ac.i32_0;
3600 count++;
3601 }
3602
3603 if (is_ms) {
3604 coords[count] = sample_index;
3605 count++;
3606 }
3607
3608 if (count == 3) {
3609 coords[3] = LLVMGetUndef(ctx->ac.i32);
3610 count = 4;
3611 }
3612 res = ac_build_gather_values(&ctx->ac, coords, count);
3613 }
3614 return res;
3615 }
3616
3617 static LLVMValueRef visit_image_load(struct ac_nir_context *ctx,
3618 const nir_intrinsic_instr *instr)
3619 {
3620 LLVMValueRef params[7];
3621 LLVMValueRef res;
3622 char intrinsic_name[64];
3623 const nir_variable *var = instr->variables[0]->var;
3624 const struct glsl_type *type = var->type;
3625
3626 if(instr->variables[0]->deref.child)
3627 type = instr->variables[0]->deref.child->type;
3628
3629 type = glsl_without_array(type);
3630 if (glsl_get_sampler_dim(type) == GLSL_SAMPLER_DIM_BUF) {
3631 params[0] = get_sampler_desc(ctx, instr->variables[0], AC_DESC_BUFFER, NULL, true, false);
3632 params[1] = LLVMBuildExtractElement(ctx->ac.builder, get_src(ctx, instr->src[0]),
3633 ctx->ac.i32_0, ""); /* vindex */
3634 params[2] = ctx->ac.i32_0; /* voffset */
3635 params[3] = ctx->ac.i1false; /* glc */
3636 params[4] = ctx->ac.i1false; /* slc */
3637 res = ac_build_intrinsic(&ctx->ac, "llvm.amdgcn.buffer.load.format.v4f32", ctx->ac.v4f32,
3638 params, 5, 0);
3639
3640 res = trim_vector(&ctx->ac, res, instr->dest.ssa.num_components);
3641 res = ac_to_integer(&ctx->ac, res);
3642 } else {
3643 bool is_da = glsl_sampler_type_is_array(type) ||
3644 glsl_get_sampler_dim(type) == GLSL_SAMPLER_DIM_CUBE ||
3645 glsl_get_sampler_dim(type) == GLSL_SAMPLER_DIM_3D ||
3646 glsl_get_sampler_dim(type) == GLSL_SAMPLER_DIM_SUBPASS ||
3647 glsl_get_sampler_dim(type) == GLSL_SAMPLER_DIM_SUBPASS_MS;
3648 LLVMValueRef da = is_da ? ctx->ac.i1true : ctx->ac.i1false;
3649 LLVMValueRef glc = ctx->ac.i1false;
3650 LLVMValueRef slc = ctx->ac.i1false;
3651
3652 params[0] = get_image_coords(ctx, instr);
3653 params[1] = get_sampler_desc(ctx, instr->variables[0], AC_DESC_IMAGE, NULL, true, false);
3654 params[2] = LLVMConstInt(ctx->ac.i32, 15, false); /* dmask */
3655 if (HAVE_LLVM <= 0x0309) {
3656 params[3] = ctx->ac.i1false; /* r128 */
3657 params[4] = da;
3658 params[5] = glc;
3659 params[6] = slc;
3660 } else {
3661 LLVMValueRef lwe = ctx->ac.i1false;
3662 params[3] = glc;
3663 params[4] = slc;
3664 params[5] = lwe;
3665 params[6] = da;
3666 }
3667
3668 ac_get_image_intr_name("llvm.amdgcn.image.load",
3669 ctx->ac.v4f32, /* vdata */
3670 LLVMTypeOf(params[0]), /* coords */
3671 LLVMTypeOf(params[1]), /* rsrc */
3672 intrinsic_name, sizeof(intrinsic_name));
3673
3674 res = ac_build_intrinsic(&ctx->ac, intrinsic_name, ctx->ac.v4f32,
3675 params, 7, AC_FUNC_ATTR_READONLY);
3676 }
3677 return ac_to_integer(&ctx->ac, res);
3678 }
3679
3680 static void visit_image_store(struct ac_nir_context *ctx,
3681 nir_intrinsic_instr *instr)
3682 {
3683 LLVMValueRef params[8];
3684 char intrinsic_name[64];
3685 const nir_variable *var = instr->variables[0]->var;
3686 const struct glsl_type *type = glsl_without_array(var->type);
3687 LLVMValueRef glc = ctx->ac.i1false;
3688 bool force_glc = ctx->ac.chip_class == SI;
3689 if (force_glc)
3690 glc = ctx->ac.i1true;
3691
3692 if (glsl_get_sampler_dim(type) == GLSL_SAMPLER_DIM_BUF) {
3693 params[0] = ac_to_float(&ctx->ac, get_src(ctx, instr->src[2])); /* data */
3694 params[1] = get_sampler_desc(ctx, instr->variables[0], AC_DESC_BUFFER, NULL, true, true);
3695 params[2] = LLVMBuildExtractElement(ctx->ac.builder, get_src(ctx, instr->src[0]),
3696 ctx->ac.i32_0, ""); /* vindex */
3697 params[3] = ctx->ac.i32_0; /* voffset */
3698 params[4] = glc; /* glc */
3699 params[5] = ctx->ac.i1false; /* slc */
3700 ac_build_intrinsic(&ctx->ac, "llvm.amdgcn.buffer.store.format.v4f32", ctx->ac.voidt,
3701 params, 6, 0);
3702 } else {
3703 bool is_da = glsl_sampler_type_is_array(type) ||
3704 glsl_get_sampler_dim(type) == GLSL_SAMPLER_DIM_CUBE ||
3705 glsl_get_sampler_dim(type) == GLSL_SAMPLER_DIM_3D;
3706 LLVMValueRef da = is_da ? ctx->ac.i1true : ctx->ac.i1false;
3707 LLVMValueRef slc = ctx->ac.i1false;
3708
3709 params[0] = ac_to_float(&ctx->ac, get_src(ctx, instr->src[2]));
3710 params[1] = get_image_coords(ctx, instr); /* coords */
3711 params[2] = get_sampler_desc(ctx, instr->variables[0], AC_DESC_IMAGE, NULL, true, true);
3712 params[3] = LLVMConstInt(ctx->ac.i32, 15, false); /* dmask */
3713 if (HAVE_LLVM <= 0x0309) {
3714 params[4] = ctx->ac.i1false; /* r128 */
3715 params[5] = da;
3716 params[6] = glc;
3717 params[7] = slc;
3718 } else {
3719 LLVMValueRef lwe = ctx->ac.i1false;
3720 params[4] = glc;
3721 params[5] = slc;
3722 params[6] = lwe;
3723 params[7] = da;
3724 }
3725
3726 ac_get_image_intr_name("llvm.amdgcn.image.store",
3727 LLVMTypeOf(params[0]), /* vdata */
3728 LLVMTypeOf(params[1]), /* coords */
3729 LLVMTypeOf(params[2]), /* rsrc */
3730 intrinsic_name, sizeof(intrinsic_name));
3731
3732 ac_build_intrinsic(&ctx->ac, intrinsic_name, ctx->ac.voidt,
3733 params, 8, 0);
3734 }
3735
3736 }
3737
3738 static LLVMValueRef visit_image_atomic(struct ac_nir_context *ctx,
3739 const nir_intrinsic_instr *instr)
3740 {
3741 LLVMValueRef params[7];
3742 int param_count = 0;
3743 const nir_variable *var = instr->variables[0]->var;
3744
3745 const char *atomic_name;
3746 char intrinsic_name[41];
3747 const struct glsl_type *type = glsl_without_array(var->type);
3748 MAYBE_UNUSED int length;
3749
3750 bool is_unsigned = glsl_get_sampler_result_type(type) == GLSL_TYPE_UINT;
3751
3752 switch (instr->intrinsic) {
3753 case nir_intrinsic_image_atomic_add:
3754 atomic_name = "add";
3755 break;
3756 case nir_intrinsic_image_atomic_min:
3757 atomic_name = is_unsigned ? "umin" : "smin";
3758 break;
3759 case nir_intrinsic_image_atomic_max:
3760 atomic_name = is_unsigned ? "umax" : "smax";
3761 break;
3762 case nir_intrinsic_image_atomic_and:
3763 atomic_name = "and";
3764 break;
3765 case nir_intrinsic_image_atomic_or:
3766 atomic_name = "or";
3767 break;
3768 case nir_intrinsic_image_atomic_xor:
3769 atomic_name = "xor";
3770 break;
3771 case nir_intrinsic_image_atomic_exchange:
3772 atomic_name = "swap";
3773 break;
3774 case nir_intrinsic_image_atomic_comp_swap:
3775 atomic_name = "cmpswap";
3776 break;
3777 default:
3778 abort();
3779 }
3780
3781 if (instr->intrinsic == nir_intrinsic_image_atomic_comp_swap)
3782 params[param_count++] = get_src(ctx, instr->src[3]);
3783 params[param_count++] = get_src(ctx, instr->src[2]);
3784
3785 if (glsl_get_sampler_dim(type) == GLSL_SAMPLER_DIM_BUF) {
3786 params[param_count++] = get_sampler_desc(ctx, instr->variables[0], AC_DESC_BUFFER,
3787 NULL, true, true);
3788 params[param_count++] = LLVMBuildExtractElement(ctx->ac.builder, get_src(ctx, instr->src[0]),
3789 ctx->ac.i32_0, ""); /* vindex */
3790 params[param_count++] = ctx->ac.i32_0; /* voffset */
3791 params[param_count++] = ctx->ac.i1false; /* slc */
3792
3793 length = snprintf(intrinsic_name, sizeof(intrinsic_name),
3794 "llvm.amdgcn.buffer.atomic.%s", atomic_name);
3795 } else {
3796 char coords_type[8];
3797
3798 bool da = glsl_sampler_type_is_array(type) ||
3799 glsl_get_sampler_dim(type) == GLSL_SAMPLER_DIM_CUBE;
3800
3801 LLVMValueRef coords = params[param_count++] = get_image_coords(ctx, instr);
3802 params[param_count++] = get_sampler_desc(ctx, instr->variables[0], AC_DESC_IMAGE,
3803 NULL, true, true);
3804 params[param_count++] = ctx->ac.i1false; /* r128 */
3805 params[param_count++] = da ? ctx->ac.i1true : ctx->ac.i1false; /* da */
3806 params[param_count++] = ctx->ac.i1false; /* slc */
3807
3808 build_int_type_name(LLVMTypeOf(coords),
3809 coords_type, sizeof(coords_type));
3810
3811 length = snprintf(intrinsic_name, sizeof(intrinsic_name),
3812 "llvm.amdgcn.image.atomic.%s.%s", atomic_name, coords_type);
3813 }
3814
3815 assert(length < sizeof(intrinsic_name));
3816 return ac_build_intrinsic(&ctx->ac, intrinsic_name, ctx->ac.i32, params, param_count, 0);
3817 }
3818
3819 static LLVMValueRef visit_image_size(struct ac_nir_context *ctx,
3820 const nir_intrinsic_instr *instr)
3821 {
3822 LLVMValueRef res;
3823 const nir_variable *var = instr->variables[0]->var;
3824 const struct glsl_type *type = instr->variables[0]->var->type;
3825 bool da = glsl_sampler_type_is_array(var->type) ||
3826 glsl_get_sampler_dim(var->type) == GLSL_SAMPLER_DIM_CUBE ||
3827 glsl_get_sampler_dim(var->type) == GLSL_SAMPLER_DIM_3D;
3828 if(instr->variables[0]->deref.child)
3829 type = instr->variables[0]->deref.child->type;
3830
3831 if (glsl_get_sampler_dim(type) == GLSL_SAMPLER_DIM_BUF)
3832 return get_buffer_size(ctx,
3833 get_sampler_desc(ctx, instr->variables[0],
3834 AC_DESC_BUFFER, NULL, true, false), true);
3835
3836 struct ac_image_args args = { 0 };
3837
3838 args.da = da;
3839 args.dmask = 0xf;
3840 args.resource = get_sampler_desc(ctx, instr->variables[0], AC_DESC_IMAGE, NULL, true, false);
3841 args.opcode = ac_image_get_resinfo;
3842 args.addr = ctx->ac.i32_0;
3843
3844 res = ac_build_image_opcode(&ctx->ac, &args);
3845
3846 LLVMValueRef two = LLVMConstInt(ctx->ac.i32, 2, false);
3847
3848 if (glsl_get_sampler_dim(type) == GLSL_SAMPLER_DIM_CUBE &&
3849 glsl_sampler_type_is_array(type)) {
3850 LLVMValueRef six = LLVMConstInt(ctx->ac.i32, 6, false);
3851 LLVMValueRef z = LLVMBuildExtractElement(ctx->ac.builder, res, two, "");
3852 z = LLVMBuildSDiv(ctx->ac.builder, z, six, "");
3853 res = LLVMBuildInsertElement(ctx->ac.builder, res, z, two, "");
3854 }
3855 if (ctx->ac.chip_class >= GFX9 &&
3856 glsl_get_sampler_dim(type) == GLSL_SAMPLER_DIM_1D &&
3857 glsl_sampler_type_is_array(type)) {
3858 LLVMValueRef layers = LLVMBuildExtractElement(ctx->ac.builder, res, two, "");
3859 res = LLVMBuildInsertElement(ctx->ac.builder, res, layers,
3860 ctx->ac.i32_1, "");
3861
3862 }
3863 return res;
3864 }
3865
3866 #define NOOP_WAITCNT 0xf7f
3867 #define LGKM_CNT 0x07f
3868 #define VM_CNT 0xf70
3869
3870 static void emit_membar(struct nir_to_llvm_context *ctx,
3871 const nir_intrinsic_instr *instr)
3872 {
3873 unsigned waitcnt = NOOP_WAITCNT;
3874
3875 switch (instr->intrinsic) {
3876 case nir_intrinsic_memory_barrier:
3877 case nir_intrinsic_group_memory_barrier:
3878 waitcnt &= VM_CNT & LGKM_CNT;
3879 break;
3880 case nir_intrinsic_memory_barrier_atomic_counter:
3881 case nir_intrinsic_memory_barrier_buffer:
3882 case nir_intrinsic_memory_barrier_image:
3883 waitcnt &= VM_CNT;
3884 break;
3885 case nir_intrinsic_memory_barrier_shared:
3886 waitcnt &= LGKM_CNT;
3887 break;
3888 default:
3889 break;
3890 }
3891 if (waitcnt != NOOP_WAITCNT)
3892 ac_build_waitcnt(&ctx->ac, waitcnt);
3893 }
3894
3895 static void emit_barrier(struct ac_llvm_context *ac, gl_shader_stage stage)
3896 {
3897 /* SI only (thanks to a hw bug workaround):
3898 * The real barrier instruction isn’t needed, because an entire patch
3899 * always fits into a single wave.
3900 */
3901 if (ac->chip_class == SI && stage == MESA_SHADER_TESS_CTRL) {
3902 ac_build_waitcnt(ac, LGKM_CNT & VM_CNT);
3903 return;
3904 }
3905 ac_build_intrinsic(ac, "llvm.amdgcn.s.barrier",
3906 ac->voidt, NULL, 0, AC_FUNC_ATTR_CONVERGENT);
3907 }
3908
3909 static void emit_discard(struct ac_nir_context *ctx,
3910 const nir_intrinsic_instr *instr)
3911 {
3912 LLVMValueRef cond;
3913
3914 if (instr->intrinsic == nir_intrinsic_discard_if) {
3915 cond = LLVMBuildICmp(ctx->ac.builder, LLVMIntEQ,
3916 get_src(ctx, instr->src[0]),
3917 ctx->ac.i32_0, "");
3918 } else {
3919 assert(instr->intrinsic == nir_intrinsic_discard);
3920 cond = LLVMConstInt(ctx->ac.i1, false, 0);
3921 }
3922
3923 ac_build_kill_if_false(&ctx->ac, cond);
3924 }
3925
3926 static LLVMValueRef
3927 visit_load_helper_invocation(struct ac_nir_context *ctx)
3928 {
3929 LLVMValueRef result = ac_build_intrinsic(&ctx->ac,
3930 "llvm.amdgcn.ps.live",
3931 ctx->ac.i1, NULL, 0,
3932 AC_FUNC_ATTR_READNONE);
3933 result = LLVMBuildNot(ctx->ac.builder, result, "");
3934 return LLVMBuildSExt(ctx->ac.builder, result, ctx->ac.i32, "");
3935 }
3936
3937 static LLVMValueRef
3938 visit_load_local_invocation_index(struct nir_to_llvm_context *ctx)
3939 {
3940 LLVMValueRef result;
3941 LLVMValueRef thread_id = ac_get_thread_id(&ctx->ac);
3942 result = LLVMBuildAnd(ctx->builder, ctx->tg_size,
3943 LLVMConstInt(ctx->ac.i32, 0xfc0, false), "");
3944
3945 return LLVMBuildAdd(ctx->builder, result, thread_id, "");
3946 }
3947
3948 static LLVMValueRef visit_var_atomic(struct nir_to_llvm_context *ctx,
3949 const nir_intrinsic_instr *instr)
3950 {
3951 LLVMValueRef ptr, result;
3952 LLVMValueRef src = get_src(ctx->nir, instr->src[0]);
3953 ptr = build_gep_for_deref(ctx->nir, instr->variables[0]);
3954
3955 if (instr->intrinsic == nir_intrinsic_var_atomic_comp_swap) {
3956 LLVMValueRef src1 = get_src(ctx->nir, instr->src[1]);
3957 result = LLVMBuildAtomicCmpXchg(ctx->builder,
3958 ptr, src, src1,
3959 LLVMAtomicOrderingSequentiallyConsistent,
3960 LLVMAtomicOrderingSequentiallyConsistent,
3961 false);
3962 } else {
3963 LLVMAtomicRMWBinOp op;
3964 switch (instr->intrinsic) {
3965 case nir_intrinsic_var_atomic_add:
3966 op = LLVMAtomicRMWBinOpAdd;
3967 break;
3968 case nir_intrinsic_var_atomic_umin:
3969 op = LLVMAtomicRMWBinOpUMin;
3970 break;
3971 case nir_intrinsic_var_atomic_umax:
3972 op = LLVMAtomicRMWBinOpUMax;
3973 break;
3974 case nir_intrinsic_var_atomic_imin:
3975 op = LLVMAtomicRMWBinOpMin;
3976 break;
3977 case nir_intrinsic_var_atomic_imax:
3978 op = LLVMAtomicRMWBinOpMax;
3979 break;
3980 case nir_intrinsic_var_atomic_and:
3981 op = LLVMAtomicRMWBinOpAnd;
3982 break;
3983 case nir_intrinsic_var_atomic_or:
3984 op = LLVMAtomicRMWBinOpOr;
3985 break;
3986 case nir_intrinsic_var_atomic_xor:
3987 op = LLVMAtomicRMWBinOpXor;
3988 break;
3989 case nir_intrinsic_var_atomic_exchange:
3990 op = LLVMAtomicRMWBinOpXchg;
3991 break;
3992 default:
3993 return NULL;
3994 }
3995
3996 result = LLVMBuildAtomicRMW(ctx->builder, op, ptr, ac_to_integer(&ctx->ac, src),
3997 LLVMAtomicOrderingSequentiallyConsistent,
3998 false);
3999 }
4000 return result;
4001 }
4002
4003 #define INTERP_CENTER 0
4004 #define INTERP_CENTROID 1
4005 #define INTERP_SAMPLE 2
4006
4007 static LLVMValueRef lookup_interp_param(struct nir_to_llvm_context *ctx,
4008 enum glsl_interp_mode interp, unsigned location)
4009 {
4010 switch (interp) {
4011 case INTERP_MODE_FLAT:
4012 default:
4013 return NULL;
4014 case INTERP_MODE_SMOOTH:
4015 case INTERP_MODE_NONE:
4016 if (location == INTERP_CENTER)
4017 return ctx->persp_center;
4018 else if (location == INTERP_CENTROID)
4019 return ctx->persp_centroid;
4020 else if (location == INTERP_SAMPLE)
4021 return ctx->persp_sample;
4022 break;
4023 case INTERP_MODE_NOPERSPECTIVE:
4024 if (location == INTERP_CENTER)
4025 return ctx->linear_center;
4026 else if (location == INTERP_CENTROID)
4027 return ctx->linear_centroid;
4028 else if (location == INTERP_SAMPLE)
4029 return ctx->linear_sample;
4030 break;
4031 }
4032 return NULL;
4033 }
4034
4035 static LLVMValueRef load_sample_position(struct nir_to_llvm_context *ctx,
4036 LLVMValueRef sample_id)
4037 {
4038 LLVMValueRef result;
4039 LLVMValueRef ptr = ac_build_gep0(&ctx->ac, ctx->ring_offsets, LLVMConstInt(ctx->ac.i32, RING_PS_SAMPLE_POSITIONS, false));
4040
4041 ptr = LLVMBuildBitCast(ctx->builder, ptr,
4042 const_array(ctx->ac.v2f32, 64), "");
4043
4044 sample_id = LLVMBuildAdd(ctx->builder, sample_id, ctx->sample_pos_offset, "");
4045 result = ac_build_load_invariant(&ctx->ac, ptr, sample_id);
4046
4047 return result;
4048 }
4049
4050 static LLVMValueRef load_sample_pos(struct ac_nir_context *ctx)
4051 {
4052 LLVMValueRef values[2];
4053
4054 values[0] = emit_ffract(&ctx->ac, ctx->abi->frag_pos[0]);
4055 values[1] = emit_ffract(&ctx->ac, ctx->abi->frag_pos[1]);
4056 return ac_build_gather_values(&ctx->ac, values, 2);
4057 }
4058
4059 static LLVMValueRef load_sample_mask_in(struct ac_nir_context *ctx)
4060 {
4061 uint8_t log2_ps_iter_samples = ctx->nctx->shader_info->info.ps.force_persample ? ctx->nctx->options->key.fs.log2_num_samples : ctx->nctx->options->key.fs.log2_ps_iter_samples;
4062
4063 /* The bit pattern matches that used by fixed function fragment
4064 * processing. */
4065 static const uint16_t ps_iter_masks[] = {
4066 0xffff, /* not used */
4067 0x5555,
4068 0x1111,
4069 0x0101,
4070 0x0001,
4071 };
4072 assert(log2_ps_iter_samples < ARRAY_SIZE(ps_iter_masks));
4073
4074 uint32_t ps_iter_mask = ps_iter_masks[log2_ps_iter_samples];
4075
4076 LLVMValueRef result, sample_id;
4077 sample_id = unpack_param(&ctx->ac, ctx->abi->ancillary, 8, 4);
4078 sample_id = LLVMBuildShl(ctx->ac.builder, LLVMConstInt(ctx->ac.i32, ps_iter_mask, false), sample_id, "");
4079 result = LLVMBuildAnd(ctx->ac.builder, sample_id, ctx->abi->sample_coverage, "");
4080 return result;
4081 }
4082
4083 static LLVMValueRef visit_interp(struct nir_to_llvm_context *ctx,
4084 const nir_intrinsic_instr *instr)
4085 {
4086 LLVMValueRef result[4];
4087 LLVMValueRef interp_param, attr_number;
4088 unsigned location;
4089 unsigned chan;
4090 LLVMValueRef src_c0 = NULL;
4091 LLVMValueRef src_c1 = NULL;
4092 LLVMValueRef src0 = NULL;
4093 int input_index = instr->variables[0]->var->data.location - VARYING_SLOT_VAR0;
4094 switch (instr->intrinsic) {
4095 case nir_intrinsic_interp_var_at_centroid:
4096 location = INTERP_CENTROID;
4097 break;
4098 case nir_intrinsic_interp_var_at_sample:
4099 case nir_intrinsic_interp_var_at_offset:
4100 location = INTERP_CENTER;
4101 src0 = get_src(ctx->nir, instr->src[0]);
4102 break;
4103 default:
4104 break;
4105 }
4106
4107 if (instr->intrinsic == nir_intrinsic_interp_var_at_offset) {
4108 src_c0 = ac_to_float(&ctx->ac, LLVMBuildExtractElement(ctx->builder, src0, ctx->ac.i32_0, ""));
4109 src_c1 = ac_to_float(&ctx->ac, LLVMBuildExtractElement(ctx->builder, src0, ctx->ac.i32_1, ""));
4110 } else if (instr->intrinsic == nir_intrinsic_interp_var_at_sample) {
4111 LLVMValueRef sample_position;
4112 LLVMValueRef halfval = LLVMConstReal(ctx->ac.f32, 0.5f);
4113
4114 /* fetch sample ID */
4115 sample_position = load_sample_position(ctx, src0);
4116
4117 src_c0 = LLVMBuildExtractElement(ctx->builder, sample_position, ctx->ac.i32_0, "");
4118 src_c0 = LLVMBuildFSub(ctx->builder, src_c0, halfval, "");
4119 src_c1 = LLVMBuildExtractElement(ctx->builder, sample_position, ctx->ac.i32_1, "");
4120 src_c1 = LLVMBuildFSub(ctx->builder, src_c1, halfval, "");
4121 }
4122 interp_param = lookup_interp_param(ctx, instr->variables[0]->var->data.interpolation, location);
4123 attr_number = LLVMConstInt(ctx->ac.i32, input_index, false);
4124
4125 if (location == INTERP_CENTER) {
4126 LLVMValueRef ij_out[2];
4127 LLVMValueRef ddxy_out = emit_ddxy_interp(ctx->nir, interp_param);
4128
4129 /*
4130 * take the I then J parameters, and the DDX/Y for it, and
4131 * calculate the IJ inputs for the interpolator.
4132 * temp1 = ddx * offset/sample.x + I;
4133 * interp_param.I = ddy * offset/sample.y + temp1;
4134 * temp1 = ddx * offset/sample.x + J;
4135 * interp_param.J = ddy * offset/sample.y + temp1;
4136 */
4137 for (unsigned i = 0; i < 2; i++) {
4138 LLVMValueRef ix_ll = LLVMConstInt(ctx->ac.i32, i, false);
4139 LLVMValueRef iy_ll = LLVMConstInt(ctx->ac.i32, i + 2, false);
4140 LLVMValueRef ddx_el = LLVMBuildExtractElement(ctx->builder,
4141 ddxy_out, ix_ll, "");
4142 LLVMValueRef ddy_el = LLVMBuildExtractElement(ctx->builder,
4143 ddxy_out, iy_ll, "");
4144 LLVMValueRef interp_el = LLVMBuildExtractElement(ctx->builder,
4145 interp_param, ix_ll, "");
4146 LLVMValueRef temp1, temp2;
4147
4148 interp_el = LLVMBuildBitCast(ctx->builder, interp_el,
4149 ctx->ac.f32, "");
4150
4151 temp1 = LLVMBuildFMul(ctx->builder, ddx_el, src_c0, "");
4152 temp1 = LLVMBuildFAdd(ctx->builder, temp1, interp_el, "");
4153
4154 temp2 = LLVMBuildFMul(ctx->builder, ddy_el, src_c1, "");
4155 temp2 = LLVMBuildFAdd(ctx->builder, temp2, temp1, "");
4156
4157 ij_out[i] = LLVMBuildBitCast(ctx->builder,
4158 temp2, ctx->ac.i32, "");
4159 }
4160 interp_param = ac_build_gather_values(&ctx->ac, ij_out, 2);
4161
4162 }
4163
4164 for (chan = 0; chan < 4; chan++) {
4165 LLVMValueRef llvm_chan = LLVMConstInt(ctx->ac.i32, chan, false);
4166
4167 if (interp_param) {
4168 interp_param = LLVMBuildBitCast(ctx->builder,
4169 interp_param, ctx->ac.v2f32, "");
4170 LLVMValueRef i = LLVMBuildExtractElement(
4171 ctx->builder, interp_param, ctx->ac.i32_0, "");
4172 LLVMValueRef j = LLVMBuildExtractElement(
4173 ctx->builder, interp_param, ctx->ac.i32_1, "");
4174
4175 result[chan] = ac_build_fs_interp(&ctx->ac,
4176 llvm_chan, attr_number,
4177 ctx->prim_mask, i, j);
4178 } else {
4179 result[chan] = ac_build_fs_interp_mov(&ctx->ac,
4180 LLVMConstInt(ctx->ac.i32, 2, false),
4181 llvm_chan, attr_number,
4182 ctx->prim_mask);
4183 }
4184 }
4185 return ac_build_varying_gather_values(&ctx->ac, result, instr->num_components,
4186 instr->variables[0]->var->data.location_frac);
4187 }
4188
4189 static void
4190 visit_emit_vertex(struct ac_shader_abi *abi, unsigned stream, LLVMValueRef *addrs)
4191 {
4192 LLVMValueRef gs_next_vertex;
4193 LLVMValueRef can_emit;
4194 int idx;
4195 struct nir_to_llvm_context *ctx = nir_to_llvm_context_from_abi(abi);
4196
4197 assert(stream == 0);
4198
4199 /* Write vertex attribute values to GSVS ring */
4200 gs_next_vertex = LLVMBuildLoad(ctx->builder,
4201 ctx->gs_next_vertex,
4202 "");
4203
4204 /* If this thread has already emitted the declared maximum number of
4205 * vertices, kill it: excessive vertex emissions are not supposed to
4206 * have any effect, and GS threads have no externally observable
4207 * effects other than emitting vertices.
4208 */
4209 can_emit = LLVMBuildICmp(ctx->builder, LLVMIntULT, gs_next_vertex,
4210 LLVMConstInt(ctx->ac.i32, ctx->gs_max_out_vertices, false), "");
4211 ac_build_kill_if_false(&ctx->ac, can_emit);
4212
4213 /* loop num outputs */
4214 idx = 0;
4215 for (unsigned i = 0; i < RADEON_LLVM_MAX_OUTPUTS; ++i) {
4216 LLVMValueRef *out_ptr = &addrs[i * 4];
4217 int length = 4;
4218 int slot = idx;
4219 int slot_inc = 1;
4220
4221 if (!(ctx->output_mask & (1ull << i)))
4222 continue;
4223
4224 if (i == VARYING_SLOT_CLIP_DIST0) {
4225 /* pack clip and cull into a single set of slots */
4226 length = ctx->num_output_clips + ctx->num_output_culls;
4227 if (length > 4)
4228 slot_inc = 2;
4229 }
4230 for (unsigned j = 0; j < length; j++) {
4231 LLVMValueRef out_val = LLVMBuildLoad(ctx->builder,
4232 out_ptr[j], "");
4233 LLVMValueRef voffset = LLVMConstInt(ctx->ac.i32, (slot * 4 + j) * ctx->gs_max_out_vertices, false);
4234 voffset = LLVMBuildAdd(ctx->builder, voffset, gs_next_vertex, "");
4235 voffset = LLVMBuildMul(ctx->builder, voffset, LLVMConstInt(ctx->ac.i32, 4, false), "");
4236
4237 out_val = LLVMBuildBitCast(ctx->builder, out_val, ctx->ac.i32, "");
4238
4239 ac_build_buffer_store_dword(&ctx->ac, ctx->gsvs_ring,
4240 out_val, 1,
4241 voffset, ctx->gs2vs_offset, 0,
4242 1, 1, true, true);
4243 }
4244 idx += slot_inc;
4245 }
4246
4247 gs_next_vertex = LLVMBuildAdd(ctx->builder, gs_next_vertex,
4248 ctx->ac.i32_1, "");
4249 LLVMBuildStore(ctx->builder, gs_next_vertex, ctx->gs_next_vertex);
4250
4251 ac_build_sendmsg(&ctx->ac, AC_SENDMSG_GS_OP_EMIT | AC_SENDMSG_GS | (0 << 8), ctx->gs_wave_id);
4252 }
4253
4254 static void
4255 visit_end_primitive(struct ac_shader_abi *abi, unsigned stream)
4256 {
4257 struct nir_to_llvm_context *ctx = nir_to_llvm_context_from_abi(abi);
4258 ac_build_sendmsg(&ctx->ac, AC_SENDMSG_GS_OP_CUT | AC_SENDMSG_GS | (stream << 8), ctx->gs_wave_id);
4259 }
4260
4261 static LLVMValueRef
4262 load_tess_coord(struct ac_shader_abi *abi, LLVMTypeRef type,
4263 unsigned num_components)
4264 {
4265 struct nir_to_llvm_context *ctx = nir_to_llvm_context_from_abi(abi);
4266
4267 LLVMValueRef coord[4] = {
4268 ctx->tes_u,
4269 ctx->tes_v,
4270 ctx->ac.f32_0,
4271 ctx->ac.f32_0,
4272 };
4273
4274 if (ctx->tes_primitive_mode == GL_TRIANGLES)
4275 coord[2] = LLVMBuildFSub(ctx->builder, ctx->ac.f32_1,
4276 LLVMBuildFAdd(ctx->builder, coord[0], coord[1], ""), "");
4277
4278 LLVMValueRef result = ac_build_gather_values(&ctx->ac, coord, num_components);
4279 return LLVMBuildBitCast(ctx->builder, result, type, "");
4280 }
4281
4282 static LLVMValueRef
4283 load_patch_vertices_in(struct ac_shader_abi *abi)
4284 {
4285 struct nir_to_llvm_context *ctx = nir_to_llvm_context_from_abi(abi);
4286 return LLVMConstInt(ctx->ac.i32, ctx->options->key.tcs.input_vertices, false);
4287 }
4288
4289 static void visit_intrinsic(struct ac_nir_context *ctx,
4290 nir_intrinsic_instr *instr)
4291 {
4292 LLVMValueRef result = NULL;
4293
4294 switch (instr->intrinsic) {
4295 case nir_intrinsic_ballot:
4296 result = ac_build_ballot(&ctx->ac, get_src(ctx, instr->src[0]));
4297 break;
4298 case nir_intrinsic_read_invocation:
4299 case nir_intrinsic_read_first_invocation: {
4300 LLVMValueRef args[2];
4301
4302 /* Value */
4303 args[0] = get_src(ctx, instr->src[0]);
4304
4305 unsigned num_args;
4306 const char *intr_name;
4307 if (instr->intrinsic == nir_intrinsic_read_invocation) {
4308 num_args = 2;
4309 intr_name = "llvm.amdgcn.readlane";
4310
4311 /* Invocation */
4312 args[1] = get_src(ctx, instr->src[1]);
4313 } else {
4314 num_args = 1;
4315 intr_name = "llvm.amdgcn.readfirstlane";
4316 }
4317
4318 /* We currently have no other way to prevent LLVM from lifting the icmp
4319 * calls to a dominating basic block.
4320 */
4321 ac_build_optimization_barrier(&ctx->ac, &args[0]);
4322
4323 result = ac_build_intrinsic(&ctx->ac, intr_name,
4324 ctx->ac.i32, args, num_args,
4325 AC_FUNC_ATTR_READNONE |
4326 AC_FUNC_ATTR_CONVERGENT);
4327 break;
4328 }
4329 case nir_intrinsic_load_subgroup_invocation:
4330 result = ac_get_thread_id(&ctx->ac);
4331 break;
4332 case nir_intrinsic_load_work_group_id: {
4333 LLVMValueRef values[3];
4334
4335 for (int i = 0; i < 3; i++) {
4336 values[i] = ctx->nctx->workgroup_ids[i] ?
4337 ctx->nctx->workgroup_ids[i] : ctx->ac.i32_0;
4338 }
4339
4340 result = ac_build_gather_values(&ctx->ac, values, 3);
4341 break;
4342 }
4343 case nir_intrinsic_load_base_vertex: {
4344 result = ctx->abi->base_vertex;
4345 break;
4346 }
4347 case nir_intrinsic_load_vertex_id_zero_base: {
4348 result = ctx->abi->vertex_id;
4349 break;
4350 }
4351 case nir_intrinsic_load_local_invocation_id: {
4352 result = ctx->nctx->local_invocation_ids;
4353 break;
4354 }
4355 case nir_intrinsic_load_base_instance:
4356 result = ctx->abi->start_instance;
4357 break;
4358 case nir_intrinsic_load_draw_id:
4359 result = ctx->abi->draw_id;
4360 break;
4361 case nir_intrinsic_load_view_index:
4362 result = ctx->nctx->view_index ? ctx->nctx->view_index : ctx->ac.i32_0;
4363 break;
4364 case nir_intrinsic_load_invocation_id:
4365 if (ctx->stage == MESA_SHADER_TESS_CTRL)
4366 result = unpack_param(&ctx->ac, ctx->abi->tcs_rel_ids, 8, 5);
4367 else
4368 result = ctx->abi->gs_invocation_id;
4369 break;
4370 case nir_intrinsic_load_primitive_id:
4371 if (ctx->stage == MESA_SHADER_GEOMETRY) {
4372 result = ctx->abi->gs_prim_id;
4373 } else if (ctx->stage == MESA_SHADER_TESS_CTRL) {
4374 result = ctx->abi->tcs_patch_id;
4375 } else if (ctx->stage == MESA_SHADER_TESS_EVAL) {
4376 result = ctx->abi->tes_patch_id;
4377 } else
4378 fprintf(stderr, "Unknown primitive id intrinsic: %d", ctx->stage);
4379 break;
4380 case nir_intrinsic_load_sample_id:
4381 result = unpack_param(&ctx->ac, ctx->abi->ancillary, 8, 4);
4382 break;
4383 case nir_intrinsic_load_sample_pos:
4384 result = load_sample_pos(ctx);
4385 break;
4386 case nir_intrinsic_load_sample_mask_in:
4387 if (ctx->nctx)
4388 result = load_sample_mask_in(ctx);
4389 else
4390 result = ctx->abi->sample_coverage;
4391 break;
4392 case nir_intrinsic_load_frag_coord: {
4393 LLVMValueRef values[4] = {
4394 ctx->abi->frag_pos[0],
4395 ctx->abi->frag_pos[1],
4396 ctx->abi->frag_pos[2],
4397 ac_build_fdiv(&ctx->ac, ctx->ac.f32_1, ctx->abi->frag_pos[3])
4398 };
4399 result = ac_build_gather_values(&ctx->ac, values, 4);
4400 break;
4401 }
4402 case nir_intrinsic_load_front_face:
4403 result = ctx->abi->front_face;
4404 break;
4405 case nir_intrinsic_load_helper_invocation:
4406 result = visit_load_helper_invocation(ctx);
4407 break;
4408 case nir_intrinsic_load_instance_id:
4409 result = ctx->abi->instance_id;
4410 break;
4411 case nir_intrinsic_load_num_work_groups:
4412 result = ctx->nctx->num_work_groups;
4413 break;
4414 case nir_intrinsic_load_local_invocation_index:
4415 result = visit_load_local_invocation_index(ctx->nctx);
4416 break;
4417 case nir_intrinsic_load_push_constant:
4418 result = visit_load_push_constant(ctx->nctx, instr);
4419 break;
4420 case nir_intrinsic_vulkan_resource_index:
4421 result = visit_vulkan_resource_index(ctx->nctx, instr);
4422 break;
4423 case nir_intrinsic_vulkan_resource_reindex:
4424 result = visit_vulkan_resource_reindex(ctx->nctx, instr);
4425 break;
4426 case nir_intrinsic_store_ssbo:
4427 visit_store_ssbo(ctx, instr);
4428 break;
4429 case nir_intrinsic_load_ssbo:
4430 result = visit_load_buffer(ctx, instr);
4431 break;
4432 case nir_intrinsic_ssbo_atomic_add:
4433 case nir_intrinsic_ssbo_atomic_imin:
4434 case nir_intrinsic_ssbo_atomic_umin:
4435 case nir_intrinsic_ssbo_atomic_imax:
4436 case nir_intrinsic_ssbo_atomic_umax:
4437 case nir_intrinsic_ssbo_atomic_and:
4438 case nir_intrinsic_ssbo_atomic_or:
4439 case nir_intrinsic_ssbo_atomic_xor:
4440 case nir_intrinsic_ssbo_atomic_exchange:
4441 case nir_intrinsic_ssbo_atomic_comp_swap:
4442 result = visit_atomic_ssbo(ctx, instr);
4443 break;
4444 case nir_intrinsic_load_ubo:
4445 result = visit_load_ubo_buffer(ctx, instr);
4446 break;
4447 case nir_intrinsic_get_buffer_size:
4448 result = visit_get_buffer_size(ctx, instr);
4449 break;
4450 case nir_intrinsic_load_var:
4451 result = visit_load_var(ctx, instr);
4452 break;
4453 case nir_intrinsic_store_var:
4454 visit_store_var(ctx, instr);
4455 break;
4456 case nir_intrinsic_image_load:
4457 result = visit_image_load(ctx, instr);
4458 break;
4459 case nir_intrinsic_image_store:
4460 visit_image_store(ctx, instr);
4461 break;
4462 case nir_intrinsic_image_atomic_add:
4463 case nir_intrinsic_image_atomic_min:
4464 case nir_intrinsic_image_atomic_max:
4465 case nir_intrinsic_image_atomic_and:
4466 case nir_intrinsic_image_atomic_or:
4467 case nir_intrinsic_image_atomic_xor:
4468 case nir_intrinsic_image_atomic_exchange:
4469 case nir_intrinsic_image_atomic_comp_swap:
4470 result = visit_image_atomic(ctx, instr);
4471 break;
4472 case nir_intrinsic_image_size:
4473 result = visit_image_size(ctx, instr);
4474 break;
4475 case nir_intrinsic_discard:
4476 case nir_intrinsic_discard_if:
4477 emit_discard(ctx, instr);
4478 break;
4479 case nir_intrinsic_memory_barrier:
4480 case nir_intrinsic_group_memory_barrier:
4481 case nir_intrinsic_memory_barrier_atomic_counter:
4482 case nir_intrinsic_memory_barrier_buffer:
4483 case nir_intrinsic_memory_barrier_image:
4484 case nir_intrinsic_memory_barrier_shared:
4485 emit_membar(ctx->nctx, instr);
4486 break;
4487 case nir_intrinsic_barrier:
4488 emit_barrier(&ctx->ac, ctx->stage);
4489 break;
4490 case nir_intrinsic_var_atomic_add:
4491 case nir_intrinsic_var_atomic_imin:
4492 case nir_intrinsic_var_atomic_umin:
4493 case nir_intrinsic_var_atomic_imax:
4494 case nir_intrinsic_var_atomic_umax:
4495 case nir_intrinsic_var_atomic_and:
4496 case nir_intrinsic_var_atomic_or:
4497 case nir_intrinsic_var_atomic_xor:
4498 case nir_intrinsic_var_atomic_exchange:
4499 case nir_intrinsic_var_atomic_comp_swap:
4500 result = visit_var_atomic(ctx->nctx, instr);
4501 break;
4502 case nir_intrinsic_interp_var_at_centroid:
4503 case nir_intrinsic_interp_var_at_sample:
4504 case nir_intrinsic_interp_var_at_offset:
4505 result = visit_interp(ctx->nctx, instr);
4506 break;
4507 case nir_intrinsic_emit_vertex:
4508 ctx->abi->emit_vertex(ctx->abi, nir_intrinsic_stream_id(instr), ctx->outputs);
4509 break;
4510 case nir_intrinsic_end_primitive:
4511 ctx->abi->emit_primitive(ctx->abi, nir_intrinsic_stream_id(instr));
4512 break;
4513 case nir_intrinsic_load_tess_coord: {
4514 LLVMTypeRef type = ctx->nctx ?
4515 get_def_type(ctx->nctx->nir, &instr->dest.ssa) :
4516 NULL;
4517 result = ctx->abi->load_tess_coord(ctx->abi, type, instr->num_components);
4518 break;
4519 }
4520 case nir_intrinsic_load_tess_level_outer:
4521 result = ctx->abi->load_tess_level(ctx->abi, VARYING_SLOT_TESS_LEVEL_OUTER);
4522 break;
4523 case nir_intrinsic_load_tess_level_inner:
4524 result = ctx->abi->load_tess_level(ctx->abi, VARYING_SLOT_TESS_LEVEL_INNER);
4525 break;
4526 case nir_intrinsic_load_patch_vertices_in:
4527 result = ctx->abi->load_patch_vertices_in(ctx->abi);
4528 break;
4529 case nir_intrinsic_vote_all: {
4530 LLVMValueRef tmp = ac_build_vote_all(&ctx->ac, get_src(ctx, instr->src[0]));
4531 result = LLVMBuildSExt(ctx->ac.builder, tmp, ctx->ac.i32, "");
4532 break;
4533 }
4534 case nir_intrinsic_vote_any: {
4535 LLVMValueRef tmp = ac_build_vote_any(&ctx->ac, get_src(ctx, instr->src[0]));
4536 result = LLVMBuildSExt(ctx->ac.builder, tmp, ctx->ac.i32, "");
4537 break;
4538 }
4539 case nir_intrinsic_vote_eq: {
4540 LLVMValueRef tmp = ac_build_vote_eq(&ctx->ac, get_src(ctx, instr->src[0]));
4541 result = LLVMBuildSExt(ctx->ac.builder, tmp, ctx->ac.i32, "");
4542 break;
4543 }
4544 default:
4545 fprintf(stderr, "Unknown intrinsic: ");
4546 nir_print_instr(&instr->instr, stderr);
4547 fprintf(stderr, "\n");
4548 break;
4549 }
4550 if (result) {
4551 _mesa_hash_table_insert(ctx->defs, &instr->dest.ssa, result);
4552 }
4553 }
4554
4555 static LLVMValueRef radv_load_ssbo(struct ac_shader_abi *abi,
4556 LLVMValueRef buffer_ptr, bool write)
4557 {
4558 struct nir_to_llvm_context *ctx = nir_to_llvm_context_from_abi(abi);
4559 LLVMValueRef result;
4560
4561 LLVMSetMetadata(buffer_ptr, ctx->ac.uniform_md_kind, ctx->ac.empty_md);
4562
4563 result = LLVMBuildLoad(ctx->builder, buffer_ptr, "");
4564 LLVMSetMetadata(result, ctx->ac.invariant_load_md_kind, ctx->ac.empty_md);
4565
4566 return result;
4567 }
4568
4569 static LLVMValueRef radv_load_ubo(struct ac_shader_abi *abi, LLVMValueRef buffer_ptr)
4570 {
4571 struct nir_to_llvm_context *ctx = nir_to_llvm_context_from_abi(abi);
4572 LLVMValueRef result;
4573
4574 LLVMSetMetadata(buffer_ptr, ctx->ac.uniform_md_kind, ctx->ac.empty_md);
4575
4576 result = LLVMBuildLoad(ctx->builder, buffer_ptr, "");
4577 LLVMSetMetadata(result, ctx->ac.invariant_load_md_kind, ctx->ac.empty_md);
4578
4579 return result;
4580 }
4581
4582 static LLVMValueRef radv_get_sampler_desc(struct ac_shader_abi *abi,
4583 unsigned descriptor_set,
4584 unsigned base_index,
4585 unsigned constant_index,
4586 LLVMValueRef index,
4587 enum ac_descriptor_type desc_type,
4588 bool image, bool write)
4589 {
4590 struct nir_to_llvm_context *ctx = nir_to_llvm_context_from_abi(abi);
4591 LLVMValueRef list = ctx->descriptor_sets[descriptor_set];
4592 struct radv_descriptor_set_layout *layout = ctx->options->layout->set[descriptor_set].layout;
4593 struct radv_descriptor_set_binding_layout *binding = layout->binding + base_index;
4594 unsigned offset = binding->offset;
4595 unsigned stride = binding->size;
4596 unsigned type_size;
4597 LLVMBuilderRef builder = ctx->builder;
4598 LLVMTypeRef type;
4599
4600 assert(base_index < layout->binding_count);
4601
4602 switch (desc_type) {
4603 case AC_DESC_IMAGE:
4604 type = ctx->ac.v8i32;
4605 type_size = 32;
4606 break;
4607 case AC_DESC_FMASK:
4608 type = ctx->ac.v8i32;
4609 offset += 32;
4610 type_size = 32;
4611 break;
4612 case AC_DESC_SAMPLER:
4613 type = ctx->ac.v4i32;
4614 if (binding->type == VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER)
4615 offset += 64;
4616
4617 type_size = 16;
4618 break;
4619 case AC_DESC_BUFFER:
4620 type = ctx->ac.v4i32;
4621 type_size = 16;
4622 break;
4623 default:
4624 unreachable("invalid desc_type\n");
4625 }
4626
4627 offset += constant_index * stride;
4628
4629 if (desc_type == AC_DESC_SAMPLER && binding->immutable_samplers_offset &&
4630 (!index || binding->immutable_samplers_equal)) {
4631 if (binding->immutable_samplers_equal)
4632 constant_index = 0;
4633
4634 const uint32_t *samplers = radv_immutable_samplers(layout, binding);
4635
4636 LLVMValueRef constants[] = {
4637 LLVMConstInt(ctx->ac.i32, samplers[constant_index * 4 + 0], 0),
4638 LLVMConstInt(ctx->ac.i32, samplers[constant_index * 4 + 1], 0),
4639 LLVMConstInt(ctx->ac.i32, samplers[constant_index * 4 + 2], 0),
4640 LLVMConstInt(ctx->ac.i32, samplers[constant_index * 4 + 3], 0),
4641 };
4642 return ac_build_gather_values(&ctx->ac, constants, 4);
4643 }
4644
4645 assert(stride % type_size == 0);
4646
4647 if (!index)
4648 index = ctx->ac.i32_0;
4649
4650 index = LLVMBuildMul(builder, index, LLVMConstInt(ctx->ac.i32, stride / type_size, 0), "");
4651
4652 list = ac_build_gep0(&ctx->ac, list, LLVMConstInt(ctx->ac.i32, offset, 0));
4653 list = LLVMBuildPointerCast(builder, list, const_array(type, 0), "");
4654
4655 return ac_build_load_to_sgpr(&ctx->ac, list, index);
4656 }
4657
4658 static LLVMValueRef get_sampler_desc(struct ac_nir_context *ctx,
4659 const nir_deref_var *deref,
4660 enum ac_descriptor_type desc_type,
4661 const nir_tex_instr *tex_instr,
4662 bool image, bool write)
4663 {
4664 LLVMValueRef index = NULL;
4665 unsigned constant_index = 0;
4666 unsigned descriptor_set;
4667 unsigned base_index;
4668
4669 if (!deref) {
4670 assert(tex_instr && !image);
4671 descriptor_set = 0;
4672 base_index = tex_instr->sampler_index;
4673 } else {
4674 const nir_deref *tail = &deref->deref;
4675 while (tail->child) {
4676 const nir_deref_array *child = nir_deref_as_array(tail->child);
4677 unsigned array_size = glsl_get_aoa_size(tail->child->type);
4678
4679 if (!array_size)
4680 array_size = 1;
4681
4682 assert(child->deref_array_type != nir_deref_array_type_wildcard);
4683
4684 if (child->deref_array_type == nir_deref_array_type_indirect) {
4685 LLVMValueRef indirect = get_src(ctx, child->indirect);
4686
4687 indirect = LLVMBuildMul(ctx->ac.builder, indirect,
4688 LLVMConstInt(ctx->ac.i32, array_size, false), "");
4689
4690 if (!index)
4691 index = indirect;
4692 else
4693 index = LLVMBuildAdd(ctx->ac.builder, index, indirect, "");
4694 }
4695
4696 constant_index += child->base_offset * array_size;
4697
4698 tail = &child->deref;
4699 }
4700 descriptor_set = deref->var->data.descriptor_set;
4701 base_index = deref->var->data.binding;
4702 }
4703
4704 return ctx->abi->load_sampler_desc(ctx->abi,
4705 descriptor_set,
4706 base_index,
4707 constant_index, index,
4708 desc_type, image, write);
4709 }
4710
4711 static void set_tex_fetch_args(struct ac_llvm_context *ctx,
4712 struct ac_image_args *args,
4713 const nir_tex_instr *instr,
4714 nir_texop op,
4715 LLVMValueRef res_ptr, LLVMValueRef samp_ptr,
4716 LLVMValueRef *param, unsigned count,
4717 unsigned dmask)
4718 {
4719 unsigned is_rect = 0;
4720 bool da = instr->is_array || instr->sampler_dim == GLSL_SAMPLER_DIM_CUBE;
4721
4722 if (op == nir_texop_lod)
4723 da = false;
4724 /* Pad to power of two vector */
4725 while (count < util_next_power_of_two(count))
4726 param[count++] = LLVMGetUndef(ctx->i32);
4727
4728 if (count > 1)
4729 args->addr = ac_build_gather_values(ctx, param, count);
4730 else
4731 args->addr = param[0];
4732
4733 args->resource = res_ptr;
4734 args->sampler = samp_ptr;
4735
4736 if (instr->sampler_dim == GLSL_SAMPLER_DIM_BUF && op == nir_texop_txf) {
4737 args->addr = param[0];
4738 return;
4739 }
4740
4741 args->dmask = dmask;
4742 args->unorm = is_rect;
4743 args->da = da;
4744 }
4745
4746 /* Disable anisotropic filtering if BASE_LEVEL == LAST_LEVEL.
4747 *
4748 * SI-CI:
4749 * If BASE_LEVEL == LAST_LEVEL, the shader must disable anisotropic
4750 * filtering manually. The driver sets img7 to a mask clearing
4751 * MAX_ANISO_RATIO if BASE_LEVEL == LAST_LEVEL. The shader must do:
4752 * s_and_b32 samp0, samp0, img7
4753 *
4754 * VI:
4755 * The ANISO_OVERRIDE sampler field enables this fix in TA.
4756 */
4757 static LLVMValueRef sici_fix_sampler_aniso(struct ac_nir_context *ctx,
4758 LLVMValueRef res, LLVMValueRef samp)
4759 {
4760 LLVMBuilderRef builder = ctx->ac.builder;
4761 LLVMValueRef img7, samp0;
4762
4763 if (ctx->ac.chip_class >= VI)
4764 return samp;
4765
4766 img7 = LLVMBuildExtractElement(builder, res,
4767 LLVMConstInt(ctx->ac.i32, 7, 0), "");
4768 samp0 = LLVMBuildExtractElement(builder, samp,
4769 LLVMConstInt(ctx->ac.i32, 0, 0), "");
4770 samp0 = LLVMBuildAnd(builder, samp0, img7, "");
4771 return LLVMBuildInsertElement(builder, samp, samp0,
4772 LLVMConstInt(ctx->ac.i32, 0, 0), "");
4773 }
4774
4775 static void tex_fetch_ptrs(struct ac_nir_context *ctx,
4776 nir_tex_instr *instr,
4777 LLVMValueRef *res_ptr, LLVMValueRef *samp_ptr,
4778 LLVMValueRef *fmask_ptr)
4779 {
4780 if (instr->sampler_dim == GLSL_SAMPLER_DIM_BUF)
4781 *res_ptr = get_sampler_desc(ctx, instr->texture, AC_DESC_BUFFER, instr, false, false);
4782 else
4783 *res_ptr = get_sampler_desc(ctx, instr->texture, AC_DESC_IMAGE, instr, false, false);
4784 if (samp_ptr) {
4785 if (instr->sampler)
4786 *samp_ptr = get_sampler_desc(ctx, instr->sampler, AC_DESC_SAMPLER, instr, false, false);
4787 else
4788 *samp_ptr = get_sampler_desc(ctx, instr->texture, AC_DESC_SAMPLER, instr, false, false);
4789 if (instr->sampler_dim < GLSL_SAMPLER_DIM_RECT)
4790 *samp_ptr = sici_fix_sampler_aniso(ctx, *res_ptr, *samp_ptr);
4791 }
4792 if (fmask_ptr && !instr->sampler && (instr->op == nir_texop_txf_ms ||
4793 instr->op == nir_texop_samples_identical))
4794 *fmask_ptr = get_sampler_desc(ctx, instr->texture, AC_DESC_FMASK, instr, false, false);
4795 }
4796
4797 static LLVMValueRef apply_round_slice(struct ac_llvm_context *ctx,
4798 LLVMValueRef coord)
4799 {
4800 coord = ac_to_float(ctx, coord);
4801 coord = ac_build_intrinsic(ctx, "llvm.rint.f32", ctx->f32, &coord, 1, 0);
4802 coord = ac_to_integer(ctx, coord);
4803 return coord;
4804 }
4805
4806 static void visit_tex(struct ac_nir_context *ctx, nir_tex_instr *instr)
4807 {
4808 LLVMValueRef result = NULL;
4809 struct ac_image_args args = { 0 };
4810 unsigned dmask = 0xf;
4811 LLVMValueRef address[16];
4812 LLVMValueRef coords[5];
4813 LLVMValueRef coord = NULL, lod = NULL, comparator = NULL;
4814 LLVMValueRef bias = NULL, offsets = NULL;
4815 LLVMValueRef res_ptr, samp_ptr, fmask_ptr = NULL, sample_index = NULL;
4816 LLVMValueRef ddx = NULL, ddy = NULL;
4817 LLVMValueRef derivs[6];
4818 unsigned chan, count = 0;
4819 unsigned const_src = 0, num_deriv_comp = 0;
4820 bool lod_is_zero = false;
4821
4822 tex_fetch_ptrs(ctx, instr, &res_ptr, &samp_ptr, &fmask_ptr);
4823
4824 for (unsigned i = 0; i < instr->num_srcs; i++) {
4825 switch (instr->src[i].src_type) {
4826 case nir_tex_src_coord:
4827 coord = get_src(ctx, instr->src[i].src);
4828 break;
4829 case nir_tex_src_projector:
4830 break;
4831 case nir_tex_src_comparator:
4832 comparator = get_src(ctx, instr->src[i].src);
4833 break;
4834 case nir_tex_src_offset:
4835 offsets = get_src(ctx, instr->src[i].src);
4836 const_src = i;
4837 break;
4838 case nir_tex_src_bias:
4839 bias = get_src(ctx, instr->src[i].src);
4840 break;
4841 case nir_tex_src_lod: {
4842 nir_const_value *val = nir_src_as_const_value(instr->src[i].src);
4843
4844 if (val && val->i32[0] == 0)
4845 lod_is_zero = true;
4846 lod = get_src(ctx, instr->src[i].src);
4847 break;
4848 }
4849 case nir_tex_src_ms_index:
4850 sample_index = get_src(ctx, instr->src[i].src);
4851 break;
4852 case nir_tex_src_ms_mcs:
4853 break;
4854 case nir_tex_src_ddx:
4855 ddx = get_src(ctx, instr->src[i].src);
4856 num_deriv_comp = instr->src[i].src.ssa->num_components;
4857 break;
4858 case nir_tex_src_ddy:
4859 ddy = get_src(ctx, instr->src[i].src);
4860 break;
4861 case nir_tex_src_texture_offset:
4862 case nir_tex_src_sampler_offset:
4863 case nir_tex_src_plane:
4864 default:
4865 break;
4866 }
4867 }
4868
4869 if (instr->op == nir_texop_txs && instr->sampler_dim == GLSL_SAMPLER_DIM_BUF) {
4870 result = get_buffer_size(ctx, res_ptr, true);
4871 goto write_result;
4872 }
4873
4874 if (instr->op == nir_texop_texture_samples) {
4875 LLVMValueRef res, samples, is_msaa;
4876 res = LLVMBuildBitCast(ctx->ac.builder, res_ptr, ctx->ac.v8i32, "");
4877 samples = LLVMBuildExtractElement(ctx->ac.builder, res,
4878 LLVMConstInt(ctx->ac.i32, 3, false), "");
4879 is_msaa = LLVMBuildLShr(ctx->ac.builder, samples,
4880 LLVMConstInt(ctx->ac.i32, 28, false), "");
4881 is_msaa = LLVMBuildAnd(ctx->ac.builder, is_msaa,
4882 LLVMConstInt(ctx->ac.i32, 0xe, false), "");
4883 is_msaa = LLVMBuildICmp(ctx->ac.builder, LLVMIntEQ, is_msaa,
4884 LLVMConstInt(ctx->ac.i32, 0xe, false), "");
4885
4886 samples = LLVMBuildLShr(ctx->ac.builder, samples,
4887 LLVMConstInt(ctx->ac.i32, 16, false), "");
4888 samples = LLVMBuildAnd(ctx->ac.builder, samples,
4889 LLVMConstInt(ctx->ac.i32, 0xf, false), "");
4890 samples = LLVMBuildShl(ctx->ac.builder, ctx->ac.i32_1,
4891 samples, "");
4892 samples = LLVMBuildSelect(ctx->ac.builder, is_msaa, samples,
4893 ctx->ac.i32_1, "");
4894 result = samples;
4895 goto write_result;
4896 }
4897
4898 if (coord)
4899 for (chan = 0; chan < instr->coord_components; chan++)
4900 coords[chan] = ac_llvm_extract_elem(&ctx->ac, coord, chan);
4901
4902 if (offsets && instr->op != nir_texop_txf) {
4903 LLVMValueRef offset[3], pack;
4904 for (chan = 0; chan < 3; ++chan)
4905 offset[chan] = ctx->ac.i32_0;
4906
4907 args.offset = true;
4908 for (chan = 0; chan < ac_get_llvm_num_components(offsets); chan++) {
4909 offset[chan] = ac_llvm_extract_elem(&ctx->ac, offsets, chan);
4910 offset[chan] = LLVMBuildAnd(ctx->ac.builder, offset[chan],
4911 LLVMConstInt(ctx->ac.i32, 0x3f, false), "");
4912 if (chan)
4913 offset[chan] = LLVMBuildShl(ctx->ac.builder, offset[chan],
4914 LLVMConstInt(ctx->ac.i32, chan * 8, false), "");
4915 }
4916 pack = LLVMBuildOr(ctx->ac.builder, offset[0], offset[1], "");
4917 pack = LLVMBuildOr(ctx->ac.builder, pack, offset[2], "");
4918 address[count++] = pack;
4919
4920 }
4921 /* pack LOD bias value */
4922 if (instr->op == nir_texop_txb && bias) {
4923 address[count++] = bias;
4924 }
4925
4926 /* Pack depth comparison value */
4927 if (instr->is_shadow && comparator) {
4928 LLVMValueRef z = ac_to_float(&ctx->ac,
4929 ac_llvm_extract_elem(&ctx->ac, comparator, 0));
4930
4931 /* TC-compatible HTILE on radeonsi promotes Z16 and Z24 to Z32_FLOAT,
4932 * so the depth comparison value isn't clamped for Z16 and
4933 * Z24 anymore. Do it manually here.
4934 *
4935 * It's unnecessary if the original texture format was
4936 * Z32_FLOAT, but we don't know that here.
4937 */
4938 if (ctx->ac.chip_class == VI && ctx->abi->clamp_shadow_reference)
4939 z = ac_build_clamp(&ctx->ac, z);
4940
4941 address[count++] = z;
4942 }
4943
4944 /* pack derivatives */
4945 if (ddx || ddy) {
4946 int num_src_deriv_channels, num_dest_deriv_channels;
4947 switch (instr->sampler_dim) {
4948 case GLSL_SAMPLER_DIM_3D:
4949 case GLSL_SAMPLER_DIM_CUBE:
4950 num_deriv_comp = 3;
4951 num_src_deriv_channels = 3;
4952 num_dest_deriv_channels = 3;
4953 break;
4954 case GLSL_SAMPLER_DIM_2D:
4955 default:
4956 num_src_deriv_channels = 2;
4957 num_dest_deriv_channels = 2;
4958 num_deriv_comp = 2;
4959 break;
4960 case GLSL_SAMPLER_DIM_1D:
4961 num_src_deriv_channels = 1;
4962 if (ctx->ac.chip_class >= GFX9) {
4963 num_dest_deriv_channels = 2;
4964 num_deriv_comp = 2;
4965 } else {
4966 num_dest_deriv_channels = 1;
4967 num_deriv_comp = 1;
4968 }
4969 break;
4970 }
4971
4972 for (unsigned i = 0; i < num_src_deriv_channels; i++) {
4973 derivs[i] = ac_to_float(&ctx->ac, ac_llvm_extract_elem(&ctx->ac, ddx, i));
4974 derivs[num_dest_deriv_channels + i] = ac_to_float(&ctx->ac, ac_llvm_extract_elem(&ctx->ac, ddy, i));
4975 }
4976 for (unsigned i = num_src_deriv_channels; i < num_dest_deriv_channels; i++) {
4977 derivs[i] = ctx->ac.f32_0;
4978 derivs[num_dest_deriv_channels + i] = ctx->ac.f32_0;
4979 }
4980 }
4981
4982 if (instr->sampler_dim == GLSL_SAMPLER_DIM_CUBE && coord) {
4983 for (chan = 0; chan < instr->coord_components; chan++)
4984 coords[chan] = ac_to_float(&ctx->ac, coords[chan]);
4985 if (instr->coord_components == 3)
4986 coords[3] = LLVMGetUndef(ctx->ac.f32);
4987 ac_prepare_cube_coords(&ctx->ac,
4988 instr->op == nir_texop_txd, instr->is_array,
4989 instr->op == nir_texop_lod, coords, derivs);
4990 if (num_deriv_comp)
4991 num_deriv_comp--;
4992 }
4993
4994 if (ddx || ddy) {
4995 for (unsigned i = 0; i < num_deriv_comp * 2; i++)
4996 address[count++] = derivs[i];
4997 }
4998
4999 /* Pack texture coordinates */
5000 if (coord) {
5001 address[count++] = coords[0];
5002 if (instr->coord_components > 1) {
5003 if (instr->sampler_dim == GLSL_SAMPLER_DIM_1D && instr->is_array && instr->op != nir_texop_txf) {
5004 coords[1] = apply_round_slice(&ctx->ac, coords[1]);
5005 }
5006 address[count++] = coords[1];
5007 }
5008 if (instr->coord_components > 2) {
5009 /* This seems like a bit of a hack - but it passes Vulkan CTS with it */
5010 if (instr->sampler_dim != GLSL_SAMPLER_DIM_3D &&
5011 instr->sampler_dim != GLSL_SAMPLER_DIM_CUBE &&
5012 instr->op != nir_texop_txf) {
5013 coords[2] = apply_round_slice(&ctx->ac, coords[2]);
5014 }
5015 address[count++] = coords[2];
5016 }
5017
5018 if (ctx->ac.chip_class >= GFX9) {
5019 LLVMValueRef filler;
5020 if (instr->op == nir_texop_txf)
5021 filler = ctx->ac.i32_0;
5022 else
5023 filler = LLVMConstReal(ctx->ac.f32, 0.5);
5024
5025 if (instr->sampler_dim == GLSL_SAMPLER_DIM_1D) {
5026 /* No nir_texop_lod, because it does not take a slice
5027 * even with array textures. */
5028 if (instr->is_array && instr->op != nir_texop_lod ) {
5029 address[count] = address[count - 1];
5030 address[count - 1] = filler;
5031 count++;
5032 } else
5033 address[count++] = filler;
5034 }
5035 }
5036 }
5037
5038 /* Pack LOD */
5039 if (lod && ((instr->op == nir_texop_txl && !lod_is_zero) ||
5040 instr->op == nir_texop_txf)) {
5041 address[count++] = lod;
5042 } else if (instr->op == nir_texop_txf_ms && sample_index) {
5043 address[count++] = sample_index;
5044 } else if(instr->op == nir_texop_txs) {
5045 count = 0;
5046 if (lod)
5047 address[count++] = lod;
5048 else
5049 address[count++] = ctx->ac.i32_0;
5050 }
5051
5052 for (chan = 0; chan < count; chan++) {
5053 address[chan] = LLVMBuildBitCast(ctx->ac.builder,
5054 address[chan], ctx->ac.i32, "");
5055 }
5056
5057 if (instr->op == nir_texop_samples_identical) {
5058 LLVMValueRef txf_address[4];
5059 struct ac_image_args txf_args = { 0 };
5060 unsigned txf_count = count;
5061 memcpy(txf_address, address, sizeof(txf_address));
5062
5063 if (!instr->is_array)
5064 txf_address[2] = ctx->ac.i32_0;
5065 txf_address[3] = ctx->ac.i32_0;
5066
5067 set_tex_fetch_args(&ctx->ac, &txf_args, instr, nir_texop_txf,
5068 fmask_ptr, NULL,
5069 txf_address, txf_count, 0xf);
5070
5071 result = build_tex_intrinsic(ctx, instr, false, &txf_args);
5072
5073 result = LLVMBuildExtractElement(ctx->ac.builder, result, ctx->ac.i32_0, "");
5074 result = emit_int_cmp(&ctx->ac, LLVMIntEQ, result, ctx->ac.i32_0);
5075 goto write_result;
5076 }
5077
5078 if (instr->sampler_dim == GLSL_SAMPLER_DIM_MS &&
5079 instr->op != nir_texop_txs) {
5080 unsigned sample_chan = instr->is_array ? 3 : 2;
5081 address[sample_chan] = adjust_sample_index_using_fmask(&ctx->ac,
5082 address[0],
5083 address[1],
5084 instr->is_array ? address[2] : NULL,
5085 address[sample_chan],
5086 fmask_ptr);
5087 }
5088
5089 if (offsets && instr->op == nir_texop_txf) {
5090 nir_const_value *const_offset =
5091 nir_src_as_const_value(instr->src[const_src].src);
5092 int num_offsets = instr->src[const_src].src.ssa->num_components;
5093 assert(const_offset);
5094 num_offsets = MIN2(num_offsets, instr->coord_components);
5095 if (num_offsets > 2)
5096 address[2] = LLVMBuildAdd(ctx->ac.builder,
5097 address[2], LLVMConstInt(ctx->ac.i32, const_offset->i32[2], false), "");
5098 if (num_offsets > 1)
5099 address[1] = LLVMBuildAdd(ctx->ac.builder,
5100 address[1], LLVMConstInt(ctx->ac.i32, const_offset->i32[1], false), "");
5101 address[0] = LLVMBuildAdd(ctx->ac.builder,
5102 address[0], LLVMConstInt(ctx->ac.i32, const_offset->i32[0], false), "");
5103
5104 }
5105
5106 /* TODO TG4 support */
5107 if (instr->op == nir_texop_tg4) {
5108 if (instr->is_shadow)
5109 dmask = 1;
5110 else
5111 dmask = 1 << instr->component;
5112 }
5113 set_tex_fetch_args(&ctx->ac, &args, instr, instr->op,
5114 res_ptr, samp_ptr, address, count, dmask);
5115
5116 result = build_tex_intrinsic(ctx, instr, lod_is_zero, &args);
5117
5118 if (instr->op == nir_texop_query_levels)
5119 result = LLVMBuildExtractElement(ctx->ac.builder, result, LLVMConstInt(ctx->ac.i32, 3, false), "");
5120 else if (instr->is_shadow && instr->is_new_style_shadow &&
5121 instr->op != nir_texop_txs && instr->op != nir_texop_lod &&
5122 instr->op != nir_texop_tg4)
5123 result = LLVMBuildExtractElement(ctx->ac.builder, result, ctx->ac.i32_0, "");
5124 else if (instr->op == nir_texop_txs &&
5125 instr->sampler_dim == GLSL_SAMPLER_DIM_CUBE &&
5126 instr->is_array) {
5127 LLVMValueRef two = LLVMConstInt(ctx->ac.i32, 2, false);
5128 LLVMValueRef six = LLVMConstInt(ctx->ac.i32, 6, false);
5129 LLVMValueRef z = LLVMBuildExtractElement(ctx->ac.builder, result, two, "");
5130 z = LLVMBuildSDiv(ctx->ac.builder, z, six, "");
5131 result = LLVMBuildInsertElement(ctx->ac.builder, result, z, two, "");
5132 } else if (ctx->ac.chip_class >= GFX9 &&
5133 instr->op == nir_texop_txs &&
5134 instr->sampler_dim == GLSL_SAMPLER_DIM_1D &&
5135 instr->is_array) {
5136 LLVMValueRef two = LLVMConstInt(ctx->ac.i32, 2, false);
5137 LLVMValueRef layers = LLVMBuildExtractElement(ctx->ac.builder, result, two, "");
5138 result = LLVMBuildInsertElement(ctx->ac.builder, result, layers,
5139 ctx->ac.i32_1, "");
5140 } else if (instr->dest.ssa.num_components != 4)
5141 result = trim_vector(&ctx->ac, result, instr->dest.ssa.num_components);
5142
5143 write_result:
5144 if (result) {
5145 assert(instr->dest.is_ssa);
5146 result = ac_to_integer(&ctx->ac, result);
5147 _mesa_hash_table_insert(ctx->defs, &instr->dest.ssa, result);
5148 }
5149 }
5150
5151
5152 static void visit_phi(struct ac_nir_context *ctx, nir_phi_instr *instr)
5153 {
5154 LLVMTypeRef type = get_def_type(ctx, &instr->dest.ssa);
5155 LLVMValueRef result = LLVMBuildPhi(ctx->ac.builder, type, "");
5156
5157 _mesa_hash_table_insert(ctx->defs, &instr->dest.ssa, result);
5158 _mesa_hash_table_insert(ctx->phis, instr, result);
5159 }
5160
5161 static void visit_post_phi(struct ac_nir_context *ctx,
5162 nir_phi_instr *instr,
5163 LLVMValueRef llvm_phi)
5164 {
5165 nir_foreach_phi_src(src, instr) {
5166 LLVMBasicBlockRef block = get_block(ctx, src->pred);
5167 LLVMValueRef llvm_src = get_src(ctx, src->src);
5168
5169 LLVMAddIncoming(llvm_phi, &llvm_src, &block, 1);
5170 }
5171 }
5172
5173 static void phi_post_pass(struct ac_nir_context *ctx)
5174 {
5175 struct hash_entry *entry;
5176 hash_table_foreach(ctx->phis, entry) {
5177 visit_post_phi(ctx, (nir_phi_instr*)entry->key,
5178 (LLVMValueRef)entry->data);
5179 }
5180 }
5181
5182
5183 static void visit_ssa_undef(struct ac_nir_context *ctx,
5184 const nir_ssa_undef_instr *instr)
5185 {
5186 unsigned num_components = instr->def.num_components;
5187 LLVMTypeRef type = LLVMIntTypeInContext(ctx->ac.context, instr->def.bit_size);
5188 LLVMValueRef undef;
5189
5190 if (num_components == 1)
5191 undef = LLVMGetUndef(type);
5192 else {
5193 undef = LLVMGetUndef(LLVMVectorType(type, num_components));
5194 }
5195 _mesa_hash_table_insert(ctx->defs, &instr->def, undef);
5196 }
5197
5198 static void visit_jump(struct ac_nir_context *ctx,
5199 const nir_jump_instr *instr)
5200 {
5201 switch (instr->type) {
5202 case nir_jump_break:
5203 LLVMBuildBr(ctx->ac.builder, ctx->break_block);
5204 LLVMClearInsertionPosition(ctx->ac.builder);
5205 break;
5206 case nir_jump_continue:
5207 LLVMBuildBr(ctx->ac.builder, ctx->continue_block);
5208 LLVMClearInsertionPosition(ctx->ac.builder);
5209 break;
5210 default:
5211 fprintf(stderr, "Unknown NIR jump instr: ");
5212 nir_print_instr(&instr->instr, stderr);
5213 fprintf(stderr, "\n");
5214 abort();
5215 }
5216 }
5217
5218 static void visit_cf_list(struct ac_nir_context *ctx,
5219 struct exec_list *list);
5220
5221 static void visit_block(struct ac_nir_context *ctx, nir_block *block)
5222 {
5223 LLVMBasicBlockRef llvm_block = LLVMGetInsertBlock(ctx->ac.builder);
5224 nir_foreach_instr(instr, block)
5225 {
5226 switch (instr->type) {
5227 case nir_instr_type_alu:
5228 visit_alu(ctx, nir_instr_as_alu(instr));
5229 break;
5230 case nir_instr_type_load_const:
5231 visit_load_const(ctx, nir_instr_as_load_const(instr));
5232 break;
5233 case nir_instr_type_intrinsic:
5234 visit_intrinsic(ctx, nir_instr_as_intrinsic(instr));
5235 break;
5236 case nir_instr_type_tex:
5237 visit_tex(ctx, nir_instr_as_tex(instr));
5238 break;
5239 case nir_instr_type_phi:
5240 visit_phi(ctx, nir_instr_as_phi(instr));
5241 break;
5242 case nir_instr_type_ssa_undef:
5243 visit_ssa_undef(ctx, nir_instr_as_ssa_undef(instr));
5244 break;
5245 case nir_instr_type_jump:
5246 visit_jump(ctx, nir_instr_as_jump(instr));
5247 break;
5248 default:
5249 fprintf(stderr, "Unknown NIR instr type: ");
5250 nir_print_instr(instr, stderr);
5251 fprintf(stderr, "\n");
5252 abort();
5253 }
5254 }
5255
5256 _mesa_hash_table_insert(ctx->defs, block, llvm_block);
5257 }
5258
5259 static void visit_if(struct ac_nir_context *ctx, nir_if *if_stmt)
5260 {
5261 LLVMValueRef value = get_src(ctx, if_stmt->condition);
5262
5263 LLVMValueRef fn = LLVMGetBasicBlockParent(LLVMGetInsertBlock(ctx->ac.builder));
5264 LLVMBasicBlockRef merge_block =
5265 LLVMAppendBasicBlockInContext(ctx->ac.context, fn, "");
5266 LLVMBasicBlockRef if_block =
5267 LLVMAppendBasicBlockInContext(ctx->ac.context, fn, "");
5268 LLVMBasicBlockRef else_block = merge_block;
5269 if (!exec_list_is_empty(&if_stmt->else_list))
5270 else_block = LLVMAppendBasicBlockInContext(
5271 ctx->ac.context, fn, "");
5272
5273 LLVMValueRef cond = LLVMBuildICmp(ctx->ac.builder, LLVMIntNE, value,
5274 ctx->ac.i32_0, "");
5275 LLVMBuildCondBr(ctx->ac.builder, cond, if_block, else_block);
5276
5277 LLVMPositionBuilderAtEnd(ctx->ac.builder, if_block);
5278 visit_cf_list(ctx, &if_stmt->then_list);
5279 if (LLVMGetInsertBlock(ctx->ac.builder))
5280 LLVMBuildBr(ctx->ac.builder, merge_block);
5281
5282 if (!exec_list_is_empty(&if_stmt->else_list)) {
5283 LLVMPositionBuilderAtEnd(ctx->ac.builder, else_block);
5284 visit_cf_list(ctx, &if_stmt->else_list);
5285 if (LLVMGetInsertBlock(ctx->ac.builder))
5286 LLVMBuildBr(ctx->ac.builder, merge_block);
5287 }
5288
5289 LLVMPositionBuilderAtEnd(ctx->ac.builder, merge_block);
5290 }
5291
5292 static void visit_loop(struct ac_nir_context *ctx, nir_loop *loop)
5293 {
5294 LLVMValueRef fn = LLVMGetBasicBlockParent(LLVMGetInsertBlock(ctx->ac.builder));
5295 LLVMBasicBlockRef continue_parent = ctx->continue_block;
5296 LLVMBasicBlockRef break_parent = ctx->break_block;
5297
5298 ctx->continue_block =
5299 LLVMAppendBasicBlockInContext(ctx->ac.context, fn, "");
5300 ctx->break_block =
5301 LLVMAppendBasicBlockInContext(ctx->ac.context, fn, "");
5302
5303 LLVMBuildBr(ctx->ac.builder, ctx->continue_block);
5304 LLVMPositionBuilderAtEnd(ctx->ac.builder, ctx->continue_block);
5305 visit_cf_list(ctx, &loop->body);
5306
5307 if (LLVMGetInsertBlock(ctx->ac.builder))
5308 LLVMBuildBr(ctx->ac.builder, ctx->continue_block);
5309 LLVMPositionBuilderAtEnd(ctx->ac.builder, ctx->break_block);
5310
5311 ctx->continue_block = continue_parent;
5312 ctx->break_block = break_parent;
5313 }
5314
5315 static void visit_cf_list(struct ac_nir_context *ctx,
5316 struct exec_list *list)
5317 {
5318 foreach_list_typed(nir_cf_node, node, node, list)
5319 {
5320 switch (node->type) {
5321 case nir_cf_node_block:
5322 visit_block(ctx, nir_cf_node_as_block(node));
5323 break;
5324
5325 case nir_cf_node_if:
5326 visit_if(ctx, nir_cf_node_as_if(node));
5327 break;
5328
5329 case nir_cf_node_loop:
5330 visit_loop(ctx, nir_cf_node_as_loop(node));
5331 break;
5332
5333 default:
5334 assert(0);
5335 }
5336 }
5337 }
5338
5339 static void
5340 handle_vs_input_decl(struct nir_to_llvm_context *ctx,
5341 struct nir_variable *variable)
5342 {
5343 LLVMValueRef t_list_ptr = ctx->vertex_buffers;
5344 LLVMValueRef t_offset;
5345 LLVMValueRef t_list;
5346 LLVMValueRef input;
5347 LLVMValueRef buffer_index;
5348 int index = variable->data.location - VERT_ATTRIB_GENERIC0;
5349 int idx = variable->data.location;
5350 unsigned attrib_count = glsl_count_attribute_slots(variable->type, true);
5351
5352 variable->data.driver_location = idx * 4;
5353
5354 for (unsigned i = 0; i < attrib_count; ++i, ++idx) {
5355 if (ctx->options->key.vs.instance_rate_inputs & (1u << (index + i))) {
5356 buffer_index = LLVMBuildAdd(ctx->builder, ctx->abi.instance_id,
5357 ctx->abi.start_instance, "");
5358 if (ctx->options->key.vs.as_ls) {
5359 ctx->shader_info->vs.vgpr_comp_cnt =
5360 MAX2(2, ctx->shader_info->vs.vgpr_comp_cnt);
5361 } else {
5362 ctx->shader_info->vs.vgpr_comp_cnt =
5363 MAX2(1, ctx->shader_info->vs.vgpr_comp_cnt);
5364 }
5365 } else
5366 buffer_index = LLVMBuildAdd(ctx->builder, ctx->abi.vertex_id,
5367 ctx->abi.base_vertex, "");
5368 t_offset = LLVMConstInt(ctx->ac.i32, index + i, false);
5369
5370 t_list = ac_build_load_to_sgpr(&ctx->ac, t_list_ptr, t_offset);
5371
5372 input = ac_build_buffer_load_format(&ctx->ac, t_list,
5373 buffer_index,
5374 ctx->ac.i32_0,
5375 4, true);
5376
5377 for (unsigned chan = 0; chan < 4; chan++) {
5378 LLVMValueRef llvm_chan = LLVMConstInt(ctx->ac.i32, chan, false);
5379 ctx->inputs[radeon_llvm_reg_index_soa(idx, chan)] =
5380 ac_to_integer(&ctx->ac, LLVMBuildExtractElement(ctx->builder,
5381 input, llvm_chan, ""));
5382 }
5383 }
5384 }
5385
5386 static void interp_fs_input(struct nir_to_llvm_context *ctx,
5387 unsigned attr,
5388 LLVMValueRef interp_param,
5389 LLVMValueRef prim_mask,
5390 LLVMValueRef result[4])
5391 {
5392 LLVMValueRef attr_number;
5393 unsigned chan;
5394 LLVMValueRef i, j;
5395 bool interp = interp_param != NULL;
5396
5397 attr_number = LLVMConstInt(ctx->ac.i32, attr, false);
5398
5399 /* fs.constant returns the param from the middle vertex, so it's not
5400 * really useful for flat shading. It's meant to be used for custom
5401 * interpolation (but the intrinsic can't fetch from the other two
5402 * vertices).
5403 *
5404 * Luckily, it doesn't matter, because we rely on the FLAT_SHADE state
5405 * to do the right thing. The only reason we use fs.constant is that
5406 * fs.interp cannot be used on integers, because they can be equal
5407 * to NaN.
5408 */
5409 if (interp) {
5410 interp_param = LLVMBuildBitCast(ctx->builder, interp_param,
5411 ctx->ac.v2f32, "");
5412
5413 i = LLVMBuildExtractElement(ctx->builder, interp_param,
5414 ctx->ac.i32_0, "");
5415 j = LLVMBuildExtractElement(ctx->builder, interp_param,
5416 ctx->ac.i32_1, "");
5417 }
5418
5419 for (chan = 0; chan < 4; chan++) {
5420 LLVMValueRef llvm_chan = LLVMConstInt(ctx->ac.i32, chan, false);
5421
5422 if (interp) {
5423 result[chan] = ac_build_fs_interp(&ctx->ac,
5424 llvm_chan,
5425 attr_number,
5426 prim_mask, i, j);
5427 } else {
5428 result[chan] = ac_build_fs_interp_mov(&ctx->ac,
5429 LLVMConstInt(ctx->ac.i32, 2, false),
5430 llvm_chan,
5431 attr_number,
5432 prim_mask);
5433 }
5434 }
5435 }
5436
5437 static void
5438 handle_fs_input_decl(struct nir_to_llvm_context *ctx,
5439 struct nir_variable *variable)
5440 {
5441 int idx = variable->data.location;
5442 unsigned attrib_count = glsl_count_attribute_slots(variable->type, false);
5443 LLVMValueRef interp;
5444
5445 variable->data.driver_location = idx * 4;
5446 ctx->input_mask |= ((1ull << attrib_count) - 1) << variable->data.location;
5447
5448 if (glsl_get_base_type(glsl_without_array(variable->type)) == GLSL_TYPE_FLOAT) {
5449 unsigned interp_type;
5450 if (variable->data.sample) {
5451 interp_type = INTERP_SAMPLE;
5452 ctx->shader_info->info.ps.force_persample = true;
5453 } else if (variable->data.centroid)
5454 interp_type = INTERP_CENTROID;
5455 else
5456 interp_type = INTERP_CENTER;
5457
5458 interp = lookup_interp_param(ctx, variable->data.interpolation, interp_type);
5459 } else
5460 interp = NULL;
5461
5462 for (unsigned i = 0; i < attrib_count; ++i)
5463 ctx->inputs[radeon_llvm_reg_index_soa(idx + i, 0)] = interp;
5464
5465 }
5466
5467 static void
5468 handle_vs_inputs(struct nir_to_llvm_context *ctx,
5469 struct nir_shader *nir) {
5470 nir_foreach_variable(variable, &nir->inputs)
5471 handle_vs_input_decl(ctx, variable);
5472 }
5473
5474 static void
5475 prepare_interp_optimize(struct nir_to_llvm_context *ctx,
5476 struct nir_shader *nir)
5477 {
5478 if (!ctx->options->key.fs.multisample)
5479 return;
5480
5481 bool uses_center = false;
5482 bool uses_centroid = false;
5483 nir_foreach_variable(variable, &nir->inputs) {
5484 if (glsl_get_base_type(glsl_without_array(variable->type)) != GLSL_TYPE_FLOAT ||
5485 variable->data.sample)
5486 continue;
5487
5488 if (variable->data.centroid)
5489 uses_centroid = true;
5490 else
5491 uses_center = true;
5492 }
5493
5494 if (uses_center && uses_centroid) {
5495 LLVMValueRef sel = LLVMBuildICmp(ctx->builder, LLVMIntSLT, ctx->prim_mask, ctx->ac.i32_0, "");
5496 ctx->persp_centroid = LLVMBuildSelect(ctx->builder, sel, ctx->persp_center, ctx->persp_centroid, "");
5497 ctx->linear_centroid = LLVMBuildSelect(ctx->builder, sel, ctx->linear_center, ctx->linear_centroid, "");
5498 }
5499 }
5500
5501 static void
5502 handle_fs_inputs(struct nir_to_llvm_context *ctx,
5503 struct nir_shader *nir)
5504 {
5505 prepare_interp_optimize(ctx, nir);
5506
5507 nir_foreach_variable(variable, &nir->inputs)
5508 handle_fs_input_decl(ctx, variable);
5509
5510 unsigned index = 0;
5511
5512 if (ctx->shader_info->info.ps.uses_input_attachments ||
5513 ctx->shader_info->info.needs_multiview_view_index)
5514 ctx->input_mask |= 1ull << VARYING_SLOT_LAYER;
5515
5516 for (unsigned i = 0; i < RADEON_LLVM_MAX_INPUTS; ++i) {
5517 LLVMValueRef interp_param;
5518 LLVMValueRef *inputs = ctx->inputs +radeon_llvm_reg_index_soa(i, 0);
5519
5520 if (!(ctx->input_mask & (1ull << i)))
5521 continue;
5522
5523 if (i >= VARYING_SLOT_VAR0 || i == VARYING_SLOT_PNTC ||
5524 i == VARYING_SLOT_PRIMITIVE_ID || i == VARYING_SLOT_LAYER) {
5525 interp_param = *inputs;
5526 interp_fs_input(ctx, index, interp_param, ctx->prim_mask,
5527 inputs);
5528
5529 if (!interp_param)
5530 ctx->shader_info->fs.flat_shaded_mask |= 1u << index;
5531 ++index;
5532 } else if (i == VARYING_SLOT_POS) {
5533 for(int i = 0; i < 3; ++i)
5534 inputs[i] = ctx->abi.frag_pos[i];
5535
5536 inputs[3] = ac_build_fdiv(&ctx->ac, ctx->ac.f32_1,
5537 ctx->abi.frag_pos[3]);
5538 }
5539 }
5540 ctx->shader_info->fs.num_interp = index;
5541 if (ctx->input_mask & (1 << VARYING_SLOT_PNTC))
5542 ctx->shader_info->fs.has_pcoord = true;
5543 if (ctx->input_mask & (1 << VARYING_SLOT_PRIMITIVE_ID))
5544 ctx->shader_info->fs.prim_id_input = true;
5545 if (ctx->input_mask & (1 << VARYING_SLOT_LAYER))
5546 ctx->shader_info->fs.layer_input = true;
5547 ctx->shader_info->fs.input_mask = ctx->input_mask >> VARYING_SLOT_VAR0;
5548
5549 if (ctx->shader_info->info.needs_multiview_view_index)
5550 ctx->view_index = ctx->inputs[radeon_llvm_reg_index_soa(VARYING_SLOT_LAYER, 0)];
5551 }
5552
5553 static LLVMValueRef
5554 ac_build_alloca(struct ac_llvm_context *ac,
5555 LLVMTypeRef type,
5556 const char *name)
5557 {
5558 LLVMBuilderRef builder = ac->builder;
5559 LLVMBasicBlockRef current_block = LLVMGetInsertBlock(builder);
5560 LLVMValueRef function = LLVMGetBasicBlockParent(current_block);
5561 LLVMBasicBlockRef first_block = LLVMGetEntryBasicBlock(function);
5562 LLVMValueRef first_instr = LLVMGetFirstInstruction(first_block);
5563 LLVMBuilderRef first_builder = LLVMCreateBuilderInContext(ac->context);
5564 LLVMValueRef res;
5565
5566 if (first_instr) {
5567 LLVMPositionBuilderBefore(first_builder, first_instr);
5568 } else {
5569 LLVMPositionBuilderAtEnd(first_builder, first_block);
5570 }
5571
5572 res = LLVMBuildAlloca(first_builder, type, name);
5573 LLVMBuildStore(builder, LLVMConstNull(type), res);
5574
5575 LLVMDisposeBuilder(first_builder);
5576
5577 return res;
5578 }
5579
5580 static LLVMValueRef si_build_alloca_undef(struct ac_llvm_context *ac,
5581 LLVMTypeRef type,
5582 const char *name)
5583 {
5584 LLVMValueRef ptr = ac_build_alloca(ac, type, name);
5585 LLVMBuildStore(ac->builder, LLVMGetUndef(type), ptr);
5586 return ptr;
5587 }
5588
5589 static void
5590 scan_shader_output_decl(struct nir_to_llvm_context *ctx,
5591 struct nir_variable *variable,
5592 struct nir_shader *shader,
5593 gl_shader_stage stage)
5594 {
5595 int idx = variable->data.location + variable->data.index;
5596 unsigned attrib_count = glsl_count_attribute_slots(variable->type, false);
5597 uint64_t mask_attribs;
5598
5599 variable->data.driver_location = idx * 4;
5600
5601 /* tess ctrl has it's own load/store paths for outputs */
5602 if (stage == MESA_SHADER_TESS_CTRL)
5603 return;
5604
5605 mask_attribs = ((1ull << attrib_count) - 1) << idx;
5606 if (stage == MESA_SHADER_VERTEX ||
5607 stage == MESA_SHADER_TESS_EVAL ||
5608 stage == MESA_SHADER_GEOMETRY) {
5609 if (idx == VARYING_SLOT_CLIP_DIST0) {
5610 int length = shader->info.clip_distance_array_size +
5611 shader->info.cull_distance_array_size;
5612 if (stage == MESA_SHADER_VERTEX) {
5613 ctx->shader_info->vs.outinfo.clip_dist_mask = (1 << shader->info.clip_distance_array_size) - 1;
5614 ctx->shader_info->vs.outinfo.cull_dist_mask = (1 << shader->info.cull_distance_array_size) - 1;
5615 }
5616 if (stage == MESA_SHADER_TESS_EVAL) {
5617 ctx->shader_info->tes.outinfo.clip_dist_mask = (1 << shader->info.clip_distance_array_size) - 1;
5618 ctx->shader_info->tes.outinfo.cull_dist_mask = (1 << shader->info.cull_distance_array_size) - 1;
5619 }
5620
5621 if (length > 4)
5622 attrib_count = 2;
5623 else
5624 attrib_count = 1;
5625 mask_attribs = 1ull << idx;
5626 }
5627 }
5628
5629 ctx->output_mask |= mask_attribs;
5630 }
5631
5632 static void
5633 handle_shader_output_decl(struct ac_nir_context *ctx,
5634 struct nir_shader *nir,
5635 struct nir_variable *variable)
5636 {
5637 unsigned output_loc = variable->data.driver_location / 4;
5638 unsigned attrib_count = glsl_count_attribute_slots(variable->type, false);
5639
5640 /* tess ctrl has it's own load/store paths for outputs */
5641 if (ctx->stage == MESA_SHADER_TESS_CTRL)
5642 return;
5643
5644 if (ctx->stage == MESA_SHADER_VERTEX ||
5645 ctx->stage == MESA_SHADER_TESS_EVAL ||
5646 ctx->stage == MESA_SHADER_GEOMETRY) {
5647 int idx = variable->data.location + variable->data.index;
5648 if (idx == VARYING_SLOT_CLIP_DIST0) {
5649 int length = nir->info.clip_distance_array_size +
5650 nir->info.cull_distance_array_size;
5651
5652 if (length > 4)
5653 attrib_count = 2;
5654 else
5655 attrib_count = 1;
5656 }
5657 }
5658
5659 for (unsigned i = 0; i < attrib_count; ++i) {
5660 for (unsigned chan = 0; chan < 4; chan++) {
5661 ctx->outputs[radeon_llvm_reg_index_soa(output_loc + i, chan)] =
5662 si_build_alloca_undef(&ctx->ac, ctx->ac.f32, "");
5663 }
5664 }
5665 }
5666
5667 static LLVMTypeRef
5668 glsl_base_to_llvm_type(struct nir_to_llvm_context *ctx,
5669 enum glsl_base_type type)
5670 {
5671 switch (type) {
5672 case GLSL_TYPE_INT:
5673 case GLSL_TYPE_UINT:
5674 case GLSL_TYPE_BOOL:
5675 case GLSL_TYPE_SUBROUTINE:
5676 return ctx->ac.i32;
5677 case GLSL_TYPE_FLOAT: /* TODO handle mediump */
5678 return ctx->ac.f32;
5679 case GLSL_TYPE_INT64:
5680 case GLSL_TYPE_UINT64:
5681 return ctx->ac.i64;
5682 case GLSL_TYPE_DOUBLE:
5683 return ctx->ac.f64;
5684 default:
5685 unreachable("unknown GLSL type");
5686 }
5687 }
5688
5689 static LLVMTypeRef
5690 glsl_to_llvm_type(struct nir_to_llvm_context *ctx,
5691 const struct glsl_type *type)
5692 {
5693 if (glsl_type_is_scalar(type)) {
5694 return glsl_base_to_llvm_type(ctx, glsl_get_base_type(type));
5695 }
5696
5697 if (glsl_type_is_vector(type)) {
5698 return LLVMVectorType(
5699 glsl_base_to_llvm_type(ctx, glsl_get_base_type(type)),
5700 glsl_get_vector_elements(type));
5701 }
5702
5703 if (glsl_type_is_matrix(type)) {
5704 return LLVMArrayType(
5705 glsl_to_llvm_type(ctx, glsl_get_column_type(type)),
5706 glsl_get_matrix_columns(type));
5707 }
5708
5709 if (glsl_type_is_array(type)) {
5710 return LLVMArrayType(
5711 glsl_to_llvm_type(ctx, glsl_get_array_element(type)),
5712 glsl_get_length(type));
5713 }
5714
5715 assert(glsl_type_is_struct(type));
5716
5717 LLVMTypeRef member_types[glsl_get_length(type)];
5718
5719 for (unsigned i = 0; i < glsl_get_length(type); i++) {
5720 member_types[i] =
5721 glsl_to_llvm_type(ctx,
5722 glsl_get_struct_field(type, i));
5723 }
5724
5725 return LLVMStructTypeInContext(ctx->context, member_types,
5726 glsl_get_length(type), false);
5727 }
5728
5729 static void
5730 setup_locals(struct ac_nir_context *ctx,
5731 struct nir_function *func)
5732 {
5733 int i, j;
5734 ctx->num_locals = 0;
5735 nir_foreach_variable(variable, &func->impl->locals) {
5736 unsigned attrib_count = glsl_count_attribute_slots(variable->type, false);
5737 variable->data.driver_location = ctx->num_locals * 4;
5738 variable->data.location_frac = 0;
5739 ctx->num_locals += attrib_count;
5740 }
5741 ctx->locals = malloc(4 * ctx->num_locals * sizeof(LLVMValueRef));
5742 if (!ctx->locals)
5743 return;
5744
5745 for (i = 0; i < ctx->num_locals; i++) {
5746 for (j = 0; j < 4; j++) {
5747 ctx->locals[i * 4 + j] =
5748 si_build_alloca_undef(&ctx->ac, ctx->ac.f32, "temp");
5749 }
5750 }
5751 }
5752
5753 static void
5754 setup_shared(struct ac_nir_context *ctx,
5755 struct nir_shader *nir)
5756 {
5757 nir_foreach_variable(variable, &nir->shared) {
5758 LLVMValueRef shared =
5759 LLVMAddGlobalInAddressSpace(
5760 ctx->ac.module, glsl_to_llvm_type(ctx->nctx, variable->type),
5761 variable->name ? variable->name : "",
5762 AC_LOCAL_ADDR_SPACE);
5763 _mesa_hash_table_insert(ctx->vars, variable, shared);
5764 }
5765 }
5766
5767 static LLVMValueRef
5768 emit_float_saturate(struct ac_llvm_context *ctx, LLVMValueRef v, float lo, float hi)
5769 {
5770 v = ac_to_float(ctx, v);
5771 v = emit_intrin_2f_param(ctx, "llvm.maxnum", ctx->f32, v, LLVMConstReal(ctx->f32, lo));
5772 return emit_intrin_2f_param(ctx, "llvm.minnum", ctx->f32, v, LLVMConstReal(ctx->f32, hi));
5773 }
5774
5775
5776 static LLVMValueRef emit_pack_int16(struct nir_to_llvm_context *ctx,
5777 LLVMValueRef src0, LLVMValueRef src1)
5778 {
5779 LLVMValueRef const16 = LLVMConstInt(ctx->ac.i32, 16, false);
5780 LLVMValueRef comp[2];
5781
5782 comp[0] = LLVMBuildAnd(ctx->builder, src0, LLVMConstInt(ctx->ac.i32, 65535, 0), "");
5783 comp[1] = LLVMBuildAnd(ctx->builder, src1, LLVMConstInt(ctx->ac.i32, 65535, 0), "");
5784 comp[1] = LLVMBuildShl(ctx->builder, comp[1], const16, "");
5785 return LLVMBuildOr(ctx->builder, comp[0], comp[1], "");
5786 }
5787
5788 /* Initialize arguments for the shader export intrinsic */
5789 static void
5790 si_llvm_init_export_args(struct nir_to_llvm_context *ctx,
5791 LLVMValueRef *values,
5792 unsigned target,
5793 struct ac_export_args *args)
5794 {
5795 /* Default is 0xf. Adjusted below depending on the format. */
5796 args->enabled_channels = 0xf;
5797
5798 /* Specify whether the EXEC mask represents the valid mask */
5799 args->valid_mask = 0;
5800
5801 /* Specify whether this is the last export */
5802 args->done = 0;
5803
5804 /* Specify the target we are exporting */
5805 args->target = target;
5806
5807 args->compr = false;
5808 args->out[0] = LLVMGetUndef(ctx->ac.f32);
5809 args->out[1] = LLVMGetUndef(ctx->ac.f32);
5810 args->out[2] = LLVMGetUndef(ctx->ac.f32);
5811 args->out[3] = LLVMGetUndef(ctx->ac.f32);
5812
5813 if (!values)
5814 return;
5815
5816 if (ctx->stage == MESA_SHADER_FRAGMENT && target >= V_008DFC_SQ_EXP_MRT) {
5817 LLVMValueRef val[4];
5818 unsigned index = target - V_008DFC_SQ_EXP_MRT;
5819 unsigned col_format = (ctx->options->key.fs.col_format >> (4 * index)) & 0xf;
5820 bool is_int8 = (ctx->options->key.fs.is_int8 >> index) & 1;
5821 bool is_int10 = (ctx->options->key.fs.is_int10 >> index) & 1;
5822
5823 switch(col_format) {
5824 case V_028714_SPI_SHADER_ZERO:
5825 args->enabled_channels = 0; /* writemask */
5826 args->target = V_008DFC_SQ_EXP_NULL;
5827 break;
5828
5829 case V_028714_SPI_SHADER_32_R:
5830 args->enabled_channels = 1;
5831 args->out[0] = values[0];
5832 break;
5833
5834 case V_028714_SPI_SHADER_32_GR:
5835 args->enabled_channels = 0x3;
5836 args->out[0] = values[0];
5837 args->out[1] = values[1];
5838 break;
5839
5840 case V_028714_SPI_SHADER_32_AR:
5841 args->enabled_channels = 0x9;
5842 args->out[0] = values[0];
5843 args->out[3] = values[3];
5844 break;
5845
5846 case V_028714_SPI_SHADER_FP16_ABGR:
5847 args->compr = 1;
5848
5849 for (unsigned chan = 0; chan < 2; chan++) {
5850 LLVMValueRef pack_args[2] = {
5851 values[2 * chan],
5852 values[2 * chan + 1]
5853 };
5854 LLVMValueRef packed;
5855
5856 packed = ac_build_cvt_pkrtz_f16(&ctx->ac, pack_args);
5857 args->out[chan] = packed;
5858 }
5859 break;
5860
5861 case V_028714_SPI_SHADER_UNORM16_ABGR:
5862 for (unsigned chan = 0; chan < 4; chan++) {
5863 val[chan] = ac_build_clamp(&ctx->ac, values[chan]);
5864 val[chan] = LLVMBuildFMul(ctx->builder, val[chan],
5865 LLVMConstReal(ctx->ac.f32, 65535), "");
5866 val[chan] = LLVMBuildFAdd(ctx->builder, val[chan],
5867 LLVMConstReal(ctx->ac.f32, 0.5), "");
5868 val[chan] = LLVMBuildFPToUI(ctx->builder, val[chan],
5869 ctx->ac.i32, "");
5870 }
5871
5872 args->compr = 1;
5873 args->out[0] = emit_pack_int16(ctx, val[0], val[1]);
5874 args->out[1] = emit_pack_int16(ctx, val[2], val[3]);
5875 break;
5876
5877 case V_028714_SPI_SHADER_SNORM16_ABGR:
5878 for (unsigned chan = 0; chan < 4; chan++) {
5879 val[chan] = emit_float_saturate(&ctx->ac, values[chan], -1, 1);
5880 val[chan] = LLVMBuildFMul(ctx->builder, val[chan],
5881 LLVMConstReal(ctx->ac.f32, 32767), "");
5882
5883 /* If positive, add 0.5, else add -0.5. */
5884 val[chan] = LLVMBuildFAdd(ctx->builder, val[chan],
5885 LLVMBuildSelect(ctx->builder,
5886 LLVMBuildFCmp(ctx->builder, LLVMRealOGE,
5887 val[chan], ctx->ac.f32_0, ""),
5888 LLVMConstReal(ctx->ac.f32, 0.5),
5889 LLVMConstReal(ctx->ac.f32, -0.5), ""), "");
5890 val[chan] = LLVMBuildFPToSI(ctx->builder, val[chan], ctx->ac.i32, "");
5891 }
5892
5893 args->compr = 1;
5894 args->out[0] = emit_pack_int16(ctx, val[0], val[1]);
5895 args->out[1] = emit_pack_int16(ctx, val[2], val[3]);
5896 break;
5897
5898 case V_028714_SPI_SHADER_UINT16_ABGR: {
5899 LLVMValueRef max_rgb = LLVMConstInt(ctx->ac.i32,
5900 is_int8 ? 255 : is_int10 ? 1023 : 65535, 0);
5901 LLVMValueRef max_alpha = !is_int10 ? max_rgb : LLVMConstInt(ctx->ac.i32, 3, 0);
5902
5903 for (unsigned chan = 0; chan < 4; chan++) {
5904 val[chan] = ac_to_integer(&ctx->ac, values[chan]);
5905 val[chan] = emit_minmax_int(&ctx->ac, LLVMIntULT, val[chan], chan == 3 ? max_alpha : max_rgb);
5906 }
5907
5908 args->compr = 1;
5909 args->out[0] = emit_pack_int16(ctx, val[0], val[1]);
5910 args->out[1] = emit_pack_int16(ctx, val[2], val[3]);
5911 break;
5912 }
5913
5914 case V_028714_SPI_SHADER_SINT16_ABGR: {
5915 LLVMValueRef max_rgb = LLVMConstInt(ctx->ac.i32,
5916 is_int8 ? 127 : is_int10 ? 511 : 32767, 0);
5917 LLVMValueRef min_rgb = LLVMConstInt(ctx->ac.i32,
5918 is_int8 ? -128 : is_int10 ? -512 : -32768, 0);
5919 LLVMValueRef max_alpha = !is_int10 ? max_rgb : ctx->ac.i32_1;
5920 LLVMValueRef min_alpha = !is_int10 ? min_rgb : LLVMConstInt(ctx->ac.i32, -2, 0);
5921
5922 /* Clamp. */
5923 for (unsigned chan = 0; chan < 4; chan++) {
5924 val[chan] = ac_to_integer(&ctx->ac, values[chan]);
5925 val[chan] = emit_minmax_int(&ctx->ac, LLVMIntSLT, val[chan], chan == 3 ? max_alpha : max_rgb);
5926 val[chan] = emit_minmax_int(&ctx->ac, LLVMIntSGT, val[chan], chan == 3 ? min_alpha : min_rgb);
5927 }
5928
5929 args->compr = 1;
5930 args->out[0] = emit_pack_int16(ctx, val[0], val[1]);
5931 args->out[1] = emit_pack_int16(ctx, val[2], val[3]);
5932 break;
5933 }
5934
5935 default:
5936 case V_028714_SPI_SHADER_32_ABGR:
5937 memcpy(&args->out[0], values, sizeof(values[0]) * 4);
5938 break;
5939 }
5940 } else
5941 memcpy(&args->out[0], values, sizeof(values[0]) * 4);
5942
5943 for (unsigned i = 0; i < 4; ++i)
5944 args->out[i] = ac_to_float(&ctx->ac, args->out[i]);
5945 }
5946
5947 static void
5948 handle_vs_outputs_post(struct nir_to_llvm_context *ctx,
5949 bool export_prim_id,
5950 struct ac_vs_output_info *outinfo)
5951 {
5952 uint32_t param_count = 0;
5953 unsigned target;
5954 unsigned pos_idx, num_pos_exports = 0;
5955 struct ac_export_args args, pos_args[4] = {};
5956 LLVMValueRef psize_value = NULL, layer_value = NULL, viewport_index_value = NULL;
5957 int i;
5958
5959 if (ctx->options->key.has_multiview_view_index) {
5960 LLVMValueRef* tmp_out = &ctx->nir->outputs[radeon_llvm_reg_index_soa(VARYING_SLOT_LAYER, 0)];
5961 if(!*tmp_out) {
5962 for(unsigned i = 0; i < 4; ++i)
5963 ctx->nir->outputs[radeon_llvm_reg_index_soa(VARYING_SLOT_LAYER, i)] =
5964 si_build_alloca_undef(&ctx->ac, ctx->ac.f32, "");
5965 }
5966
5967 LLVMBuildStore(ctx->builder, ac_to_float(&ctx->ac, ctx->view_index), *tmp_out);
5968 ctx->output_mask |= 1ull << VARYING_SLOT_LAYER;
5969 }
5970
5971 memset(outinfo->vs_output_param_offset, AC_EXP_PARAM_UNDEFINED,
5972 sizeof(outinfo->vs_output_param_offset));
5973
5974 if (ctx->output_mask & (1ull << VARYING_SLOT_CLIP_DIST0)) {
5975 LLVMValueRef slots[8];
5976 unsigned j;
5977
5978 if (outinfo->cull_dist_mask)
5979 outinfo->cull_dist_mask <<= ctx->num_output_clips;
5980
5981 i = VARYING_SLOT_CLIP_DIST0;
5982 for (j = 0; j < ctx->num_output_clips + ctx->num_output_culls; j++)
5983 slots[j] = ac_to_float(&ctx->ac, LLVMBuildLoad(ctx->builder,
5984 ctx->nir->outputs[radeon_llvm_reg_index_soa(i, j)], ""));
5985
5986 for (i = ctx->num_output_clips + ctx->num_output_culls; i < 8; i++)
5987 slots[i] = LLVMGetUndef(ctx->ac.f32);
5988
5989 if (ctx->num_output_clips + ctx->num_output_culls > 4) {
5990 target = V_008DFC_SQ_EXP_POS + 3;
5991 si_llvm_init_export_args(ctx, &slots[4], target, &args);
5992 memcpy(&pos_args[target - V_008DFC_SQ_EXP_POS],
5993 &args, sizeof(args));
5994 }
5995
5996 target = V_008DFC_SQ_EXP_POS + 2;
5997 si_llvm_init_export_args(ctx, &slots[0], target, &args);
5998 memcpy(&pos_args[target - V_008DFC_SQ_EXP_POS],
5999 &args, sizeof(args));
6000
6001 }
6002
6003 LLVMValueRef pos_values[4] = {ctx->ac.f32_0, ctx->ac.f32_0, ctx->ac.f32_0, ctx->ac.f32_1};
6004 if (ctx->output_mask & (1ull << VARYING_SLOT_POS)) {
6005 for (unsigned j = 0; j < 4; j++)
6006 pos_values[j] = LLVMBuildLoad(ctx->builder,
6007 ctx->nir->outputs[radeon_llvm_reg_index_soa(VARYING_SLOT_POS, j)], "");
6008 }
6009 si_llvm_init_export_args(ctx, pos_values, V_008DFC_SQ_EXP_POS, &pos_args[0]);
6010
6011 if (ctx->output_mask & (1ull << VARYING_SLOT_PSIZ)) {
6012 outinfo->writes_pointsize = true;
6013 psize_value = LLVMBuildLoad(ctx->builder,
6014 ctx->nir->outputs[radeon_llvm_reg_index_soa(VARYING_SLOT_PSIZ, 0)], "");
6015 }
6016
6017 if (ctx->output_mask & (1ull << VARYING_SLOT_LAYER)) {
6018 outinfo->writes_layer = true;
6019 layer_value = LLVMBuildLoad(ctx->builder,
6020 ctx->nir->outputs[radeon_llvm_reg_index_soa(VARYING_SLOT_LAYER, 0)], "");
6021 }
6022
6023 if (ctx->output_mask & (1ull << VARYING_SLOT_VIEWPORT)) {
6024 outinfo->writes_viewport_index = true;
6025 viewport_index_value = LLVMBuildLoad(ctx->builder,
6026 ctx->nir->outputs[radeon_llvm_reg_index_soa(VARYING_SLOT_VIEWPORT, 0)], "");
6027 }
6028
6029 if (outinfo->writes_pointsize ||
6030 outinfo->writes_layer ||
6031 outinfo->writes_viewport_index) {
6032 pos_args[1].enabled_channels = ((outinfo->writes_pointsize == true ? 1 : 0) |
6033 (outinfo->writes_layer == true ? 4 : 0));
6034 pos_args[1].valid_mask = 0;
6035 pos_args[1].done = 0;
6036 pos_args[1].target = V_008DFC_SQ_EXP_POS + 1;
6037 pos_args[1].compr = 0;
6038 pos_args[1].out[0] = ctx->ac.f32_0; /* X */
6039 pos_args[1].out[1] = ctx->ac.f32_0; /* Y */
6040 pos_args[1].out[2] = ctx->ac.f32_0; /* Z */
6041 pos_args[1].out[3] = ctx->ac.f32_0; /* W */
6042
6043 if (outinfo->writes_pointsize == true)
6044 pos_args[1].out[0] = psize_value;
6045 if (outinfo->writes_layer == true)
6046 pos_args[1].out[2] = layer_value;
6047 if (outinfo->writes_viewport_index == true) {
6048 if (ctx->options->chip_class >= GFX9) {
6049 /* GFX9 has the layer in out.z[10:0] and the viewport
6050 * index in out.z[19:16].
6051 */
6052 LLVMValueRef v = viewport_index_value;
6053 v = ac_to_integer(&ctx->ac, v);
6054 v = LLVMBuildShl(ctx->builder, v,
6055 LLVMConstInt(ctx->ac.i32, 16, false),
6056 "");
6057 v = LLVMBuildOr(ctx->builder, v,
6058 ac_to_integer(&ctx->ac, pos_args[1].out[2]), "");
6059
6060 pos_args[1].out[2] = ac_to_float(&ctx->ac, v);
6061 pos_args[1].enabled_channels |= 1 << 2;
6062 } else {
6063 pos_args[1].out[3] = viewport_index_value;
6064 pos_args[1].enabled_channels |= 1 << 3;
6065 }
6066 }
6067 }
6068 for (i = 0; i < 4; i++) {
6069 if (pos_args[i].out[0])
6070 num_pos_exports++;
6071 }
6072
6073 pos_idx = 0;
6074 for (i = 0; i < 4; i++) {
6075 if (!pos_args[i].out[0])
6076 continue;
6077
6078 /* Specify the target we are exporting */
6079 pos_args[i].target = V_008DFC_SQ_EXP_POS + pos_idx++;
6080 if (pos_idx == num_pos_exports)
6081 pos_args[i].done = 1;
6082 ac_build_export(&ctx->ac, &pos_args[i]);
6083 }
6084
6085 for (unsigned i = 0; i < RADEON_LLVM_MAX_OUTPUTS; ++i) {
6086 LLVMValueRef values[4];
6087 if (!(ctx->output_mask & (1ull << i)))
6088 continue;
6089
6090 for (unsigned j = 0; j < 4; j++)
6091 values[j] = ac_to_float(&ctx->ac, LLVMBuildLoad(ctx->builder,
6092 ctx->nir->outputs[radeon_llvm_reg_index_soa(i, j)], ""));
6093
6094 if (i == VARYING_SLOT_LAYER) {
6095 target = V_008DFC_SQ_EXP_PARAM + param_count;
6096 outinfo->vs_output_param_offset[VARYING_SLOT_LAYER] = param_count;
6097 param_count++;
6098 } else if (i == VARYING_SLOT_PRIMITIVE_ID) {
6099 target = V_008DFC_SQ_EXP_PARAM + param_count;
6100 outinfo->vs_output_param_offset[VARYING_SLOT_PRIMITIVE_ID] = param_count;
6101 param_count++;
6102 } else if (i >= VARYING_SLOT_VAR0) {
6103 outinfo->export_mask |= 1u << (i - VARYING_SLOT_VAR0);
6104 target = V_008DFC_SQ_EXP_PARAM + param_count;
6105 outinfo->vs_output_param_offset[i] = param_count;
6106 param_count++;
6107 } else
6108 continue;
6109
6110 si_llvm_init_export_args(ctx, values, target, &args);
6111
6112 if (target >= V_008DFC_SQ_EXP_POS &&
6113 target <= (V_008DFC_SQ_EXP_POS + 3)) {
6114 memcpy(&pos_args[target - V_008DFC_SQ_EXP_POS],
6115 &args, sizeof(args));
6116 } else {
6117 ac_build_export(&ctx->ac, &args);
6118 }
6119 }
6120
6121 if (export_prim_id) {
6122 LLVMValueRef values[4];
6123 target = V_008DFC_SQ_EXP_PARAM + param_count;
6124 outinfo->vs_output_param_offset[VARYING_SLOT_PRIMITIVE_ID] = param_count;
6125 param_count++;
6126
6127 values[0] = ctx->vs_prim_id;
6128 ctx->shader_info->vs.vgpr_comp_cnt = MAX2(2,
6129 ctx->shader_info->vs.vgpr_comp_cnt);
6130 for (unsigned j = 1; j < 4; j++)
6131 values[j] = ctx->ac.f32_0;
6132 si_llvm_init_export_args(ctx, values, target, &args);
6133 ac_build_export(&ctx->ac, &args);
6134 outinfo->export_prim_id = true;
6135 }
6136
6137 outinfo->pos_exports = num_pos_exports;
6138 outinfo->param_exports = param_count;
6139 }
6140
6141 static void
6142 handle_es_outputs_post(struct nir_to_llvm_context *ctx,
6143 struct ac_es_output_info *outinfo)
6144 {
6145 int j;
6146 uint64_t max_output_written = 0;
6147 LLVMValueRef lds_base = NULL;
6148
6149 for (unsigned i = 0; i < RADEON_LLVM_MAX_OUTPUTS; ++i) {
6150 int param_index;
6151 int length = 4;
6152
6153 if (!(ctx->output_mask & (1ull << i)))
6154 continue;
6155
6156 if (i == VARYING_SLOT_CLIP_DIST0)
6157 length = ctx->num_output_clips + ctx->num_output_culls;
6158
6159 param_index = shader_io_get_unique_index(i);
6160
6161 max_output_written = MAX2(param_index + (length > 4), max_output_written);
6162 }
6163
6164 outinfo->esgs_itemsize = (max_output_written + 1) * 16;
6165
6166 if (ctx->ac.chip_class >= GFX9) {
6167 unsigned itemsize_dw = outinfo->esgs_itemsize / 4;
6168 LLVMValueRef vertex_idx = ac_get_thread_id(&ctx->ac);
6169 LLVMValueRef wave_idx = ac_build_bfe(&ctx->ac, ctx->merged_wave_info,
6170 LLVMConstInt(ctx->ac.i32, 24, false),
6171 LLVMConstInt(ctx->ac.i32, 4, false), false);
6172 vertex_idx = LLVMBuildOr(ctx->ac.builder, vertex_idx,
6173 LLVMBuildMul(ctx->ac.builder, wave_idx,
6174 LLVMConstInt(ctx->ac.i32, 64, false), ""), "");
6175 lds_base = LLVMBuildMul(ctx->ac.builder, vertex_idx,
6176 LLVMConstInt(ctx->ac.i32, itemsize_dw, 0), "");
6177 }
6178
6179 for (unsigned i = 0; i < RADEON_LLVM_MAX_OUTPUTS; ++i) {
6180 LLVMValueRef dw_addr;
6181 LLVMValueRef *out_ptr = &ctx->nir->outputs[i * 4];
6182 int param_index;
6183 int length = 4;
6184
6185 if (!(ctx->output_mask & (1ull << i)))
6186 continue;
6187
6188 if (i == VARYING_SLOT_CLIP_DIST0)
6189 length = ctx->num_output_clips + ctx->num_output_culls;
6190
6191 param_index = shader_io_get_unique_index(i);
6192
6193 if (lds_base) {
6194 dw_addr = LLVMBuildAdd(ctx->builder, lds_base,
6195 LLVMConstInt(ctx->ac.i32, param_index * 4, false),
6196 "");
6197 }
6198 for (j = 0; j < length; j++) {
6199 LLVMValueRef out_val = LLVMBuildLoad(ctx->builder, out_ptr[j], "");
6200 out_val = LLVMBuildBitCast(ctx->builder, out_val, ctx->ac.i32, "");
6201
6202 if (ctx->ac.chip_class >= GFX9) {
6203 ac_lds_store(&ctx->ac, dw_addr,
6204 LLVMBuildLoad(ctx->builder, out_ptr[j], ""));
6205 dw_addr = LLVMBuildAdd(ctx->builder, dw_addr, ctx->ac.i32_1, "");
6206 } else {
6207 ac_build_buffer_store_dword(&ctx->ac,
6208 ctx->esgs_ring,
6209 out_val, 1,
6210 NULL, ctx->es2gs_offset,
6211 (4 * param_index + j) * 4,
6212 1, 1, true, true);
6213 }
6214 }
6215 }
6216 }
6217
6218 static void
6219 handle_ls_outputs_post(struct nir_to_llvm_context *ctx)
6220 {
6221 LLVMValueRef vertex_id = ctx->rel_auto_id;
6222 LLVMValueRef vertex_dw_stride = unpack_param(&ctx->ac, ctx->ls_out_layout, 13, 8);
6223 LLVMValueRef base_dw_addr = LLVMBuildMul(ctx->builder, vertex_id,
6224 vertex_dw_stride, "");
6225
6226 for (unsigned i = 0; i < RADEON_LLVM_MAX_OUTPUTS; ++i) {
6227 LLVMValueRef *out_ptr = &ctx->nir->outputs[i * 4];
6228 int length = 4;
6229
6230 if (!(ctx->output_mask & (1ull << i)))
6231 continue;
6232
6233 if (i == VARYING_SLOT_CLIP_DIST0)
6234 length = ctx->num_output_clips + ctx->num_output_culls;
6235 int param = shader_io_get_unique_index(i);
6236 mark_tess_output(ctx, false, param);
6237 if (length > 4)
6238 mark_tess_output(ctx, false, param + 1);
6239 LLVMValueRef dw_addr = LLVMBuildAdd(ctx->builder, base_dw_addr,
6240 LLVMConstInt(ctx->ac.i32, param * 4, false),
6241 "");
6242 for (unsigned j = 0; j < length; j++) {
6243 ac_lds_store(&ctx->ac, dw_addr,
6244 LLVMBuildLoad(ctx->builder, out_ptr[j], ""));
6245 dw_addr = LLVMBuildAdd(ctx->builder, dw_addr, ctx->ac.i32_1, "");
6246 }
6247 }
6248 }
6249
6250 struct ac_build_if_state
6251 {
6252 struct nir_to_llvm_context *ctx;
6253 LLVMValueRef condition;
6254 LLVMBasicBlockRef entry_block;
6255 LLVMBasicBlockRef true_block;
6256 LLVMBasicBlockRef false_block;
6257 LLVMBasicBlockRef merge_block;
6258 };
6259
6260 static LLVMBasicBlockRef
6261 ac_build_insert_new_block(struct nir_to_llvm_context *ctx, const char *name)
6262 {
6263 LLVMBasicBlockRef current_block;
6264 LLVMBasicBlockRef next_block;
6265 LLVMBasicBlockRef new_block;
6266
6267 /* get current basic block */
6268 current_block = LLVMGetInsertBlock(ctx->builder);
6269
6270 /* chqeck if there's another block after this one */
6271 next_block = LLVMGetNextBasicBlock(current_block);
6272 if (next_block) {
6273 /* insert the new block before the next block */
6274 new_block = LLVMInsertBasicBlockInContext(ctx->context, next_block, name);
6275 }
6276 else {
6277 /* append new block after current block */
6278 LLVMValueRef function = LLVMGetBasicBlockParent(current_block);
6279 new_block = LLVMAppendBasicBlockInContext(ctx->context, function, name);
6280 }
6281 return new_block;
6282 }
6283
6284 static void
6285 ac_nir_build_if(struct ac_build_if_state *ifthen,
6286 struct nir_to_llvm_context *ctx,
6287 LLVMValueRef condition)
6288 {
6289 LLVMBasicBlockRef block = LLVMGetInsertBlock(ctx->builder);
6290
6291 memset(ifthen, 0, sizeof *ifthen);
6292 ifthen->ctx = ctx;
6293 ifthen->condition = condition;
6294 ifthen->entry_block = block;
6295
6296 /* create endif/merge basic block for the phi functions */
6297 ifthen->merge_block = ac_build_insert_new_block(ctx, "endif-block");
6298
6299 /* create/insert true_block before merge_block */
6300 ifthen->true_block =
6301 LLVMInsertBasicBlockInContext(ctx->context,
6302 ifthen->merge_block,
6303 "if-true-block");
6304
6305 /* successive code goes into the true block */
6306 LLVMPositionBuilderAtEnd(ctx->builder, ifthen->true_block);
6307 }
6308
6309 /**
6310 * End a conditional.
6311 */
6312 static void
6313 ac_nir_build_endif(struct ac_build_if_state *ifthen)
6314 {
6315 LLVMBuilderRef builder = ifthen->ctx->builder;
6316
6317 /* Insert branch to the merge block from current block */
6318 LLVMBuildBr(builder, ifthen->merge_block);
6319
6320 /*
6321 * Now patch in the various branch instructions.
6322 */
6323
6324 /* Insert the conditional branch instruction at the end of entry_block */
6325 LLVMPositionBuilderAtEnd(builder, ifthen->entry_block);
6326 if (ifthen->false_block) {
6327 /* we have an else clause */
6328 LLVMBuildCondBr(builder, ifthen->condition,
6329 ifthen->true_block, ifthen->false_block);
6330 }
6331 else {
6332 /* no else clause */
6333 LLVMBuildCondBr(builder, ifthen->condition,
6334 ifthen->true_block, ifthen->merge_block);
6335 }
6336
6337 /* Resume building code at end of the ifthen->merge_block */
6338 LLVMPositionBuilderAtEnd(builder, ifthen->merge_block);
6339 }
6340
6341 static void
6342 write_tess_factors(struct nir_to_llvm_context *ctx)
6343 {
6344 unsigned stride, outer_comps, inner_comps;
6345 struct ac_build_if_state if_ctx, inner_if_ctx;
6346 LLVMValueRef invocation_id = unpack_param(&ctx->ac, ctx->abi.tcs_rel_ids, 8, 5);
6347 LLVMValueRef rel_patch_id = unpack_param(&ctx->ac, ctx->abi.tcs_rel_ids, 0, 8);
6348 unsigned tess_inner_index, tess_outer_index;
6349 LLVMValueRef lds_base, lds_inner, lds_outer, byteoffset, buffer;
6350 LLVMValueRef out[6], vec0, vec1, tf_base, inner[4], outer[4];
6351 int i;
6352 emit_barrier(&ctx->ac, ctx->stage);
6353
6354 switch (ctx->options->key.tcs.primitive_mode) {
6355 case GL_ISOLINES:
6356 stride = 2;
6357 outer_comps = 2;
6358 inner_comps = 0;
6359 break;
6360 case GL_TRIANGLES:
6361 stride = 4;
6362 outer_comps = 3;
6363 inner_comps = 1;
6364 break;
6365 case GL_QUADS:
6366 stride = 6;
6367 outer_comps = 4;
6368 inner_comps = 2;
6369 break;
6370 default:
6371 return;
6372 }
6373
6374 ac_nir_build_if(&if_ctx, ctx,
6375 LLVMBuildICmp(ctx->builder, LLVMIntEQ,
6376 invocation_id, ctx->ac.i32_0, ""));
6377
6378 tess_inner_index = shader_io_get_unique_index(VARYING_SLOT_TESS_LEVEL_INNER);
6379 tess_outer_index = shader_io_get_unique_index(VARYING_SLOT_TESS_LEVEL_OUTER);
6380
6381 mark_tess_output(ctx, true, tess_inner_index);
6382 mark_tess_output(ctx, true, tess_outer_index);
6383 lds_base = get_tcs_out_current_patch_data_offset(ctx);
6384 lds_inner = LLVMBuildAdd(ctx->builder, lds_base,
6385 LLVMConstInt(ctx->ac.i32, tess_inner_index * 4, false), "");
6386 lds_outer = LLVMBuildAdd(ctx->builder, lds_base,
6387 LLVMConstInt(ctx->ac.i32, tess_outer_index * 4, false), "");
6388
6389 for (i = 0; i < 4; i++) {
6390 inner[i] = LLVMGetUndef(ctx->ac.i32);
6391 outer[i] = LLVMGetUndef(ctx->ac.i32);
6392 }
6393
6394 // LINES reverseal
6395 if (ctx->options->key.tcs.primitive_mode == GL_ISOLINES) {
6396 outer[0] = out[1] = ac_lds_load(&ctx->ac, lds_outer);
6397 lds_outer = LLVMBuildAdd(ctx->builder, lds_outer,
6398 ctx->ac.i32_1, "");
6399 outer[1] = out[0] = ac_lds_load(&ctx->ac, lds_outer);
6400 } else {
6401 for (i = 0; i < outer_comps; i++) {
6402 outer[i] = out[i] =
6403 ac_lds_load(&ctx->ac, lds_outer);
6404 lds_outer = LLVMBuildAdd(ctx->builder, lds_outer,
6405 ctx->ac.i32_1, "");
6406 }
6407 for (i = 0; i < inner_comps; i++) {
6408 inner[i] = out[outer_comps+i] =
6409 ac_lds_load(&ctx->ac, lds_inner);
6410 lds_inner = LLVMBuildAdd(ctx->builder, lds_inner,
6411 ctx->ac.i32_1, "");
6412 }
6413 }
6414
6415 /* Convert the outputs to vectors for stores. */
6416 vec0 = ac_build_gather_values(&ctx->ac, out, MIN2(stride, 4));
6417 vec1 = NULL;
6418
6419 if (stride > 4)
6420 vec1 = ac_build_gather_values(&ctx->ac, out + 4, stride - 4);
6421
6422
6423 buffer = ctx->hs_ring_tess_factor;
6424 tf_base = ctx->tess_factor_offset;
6425 byteoffset = LLVMBuildMul(ctx->builder, rel_patch_id,
6426 LLVMConstInt(ctx->ac.i32, 4 * stride, false), "");
6427 unsigned tf_offset = 0;
6428
6429 if (ctx->options->chip_class <= VI) {
6430 ac_nir_build_if(&inner_if_ctx, ctx,
6431 LLVMBuildICmp(ctx->builder, LLVMIntEQ,
6432 rel_patch_id, ctx->ac.i32_0, ""));
6433
6434 /* Store the dynamic HS control word. */
6435 ac_build_buffer_store_dword(&ctx->ac, buffer,
6436 LLVMConstInt(ctx->ac.i32, 0x80000000, false),
6437 1, ctx->ac.i32_0, tf_base,
6438 0, 1, 0, true, false);
6439 tf_offset += 4;
6440
6441 ac_nir_build_endif(&inner_if_ctx);
6442 }
6443
6444 /* Store the tessellation factors. */
6445 ac_build_buffer_store_dword(&ctx->ac, buffer, vec0,
6446 MIN2(stride, 4), byteoffset, tf_base,
6447 tf_offset, 1, 0, true, false);
6448 if (vec1)
6449 ac_build_buffer_store_dword(&ctx->ac, buffer, vec1,
6450 stride - 4, byteoffset, tf_base,
6451 16 + tf_offset, 1, 0, true, false);
6452
6453 //store to offchip for TES to read - only if TES reads them
6454 if (ctx->options->key.tcs.tes_reads_tess_factors) {
6455 LLVMValueRef inner_vec, outer_vec, tf_outer_offset;
6456 LLVMValueRef tf_inner_offset;
6457 unsigned param_outer, param_inner;
6458
6459 param_outer = shader_io_get_unique_index(VARYING_SLOT_TESS_LEVEL_OUTER);
6460 tf_outer_offset = get_tcs_tes_buffer_address(ctx, NULL,
6461 LLVMConstInt(ctx->ac.i32, param_outer, 0));
6462
6463 outer_vec = ac_build_gather_values(&ctx->ac, outer,
6464 util_next_power_of_two(outer_comps));
6465
6466 ac_build_buffer_store_dword(&ctx->ac, ctx->hs_ring_tess_offchip, outer_vec,
6467 outer_comps, tf_outer_offset,
6468 ctx->oc_lds, 0, 1, 0, true, false);
6469 if (inner_comps) {
6470 param_inner = shader_io_get_unique_index(VARYING_SLOT_TESS_LEVEL_INNER);
6471 tf_inner_offset = get_tcs_tes_buffer_address(ctx, NULL,
6472 LLVMConstInt(ctx->ac.i32, param_inner, 0));
6473
6474 inner_vec = inner_comps == 1 ? inner[0] :
6475 ac_build_gather_values(&ctx->ac, inner, inner_comps);
6476 ac_build_buffer_store_dword(&ctx->ac, ctx->hs_ring_tess_offchip, inner_vec,
6477 inner_comps, tf_inner_offset,
6478 ctx->oc_lds, 0, 1, 0, true, false);
6479 }
6480 }
6481 ac_nir_build_endif(&if_ctx);
6482 }
6483
6484 static void
6485 handle_tcs_outputs_post(struct nir_to_llvm_context *ctx)
6486 {
6487 write_tess_factors(ctx);
6488 }
6489
6490 static bool
6491 si_export_mrt_color(struct nir_to_llvm_context *ctx,
6492 LLVMValueRef *color, unsigned param, bool is_last,
6493 struct ac_export_args *args)
6494 {
6495 /* Export */
6496 si_llvm_init_export_args(ctx, color, param,
6497 args);
6498
6499 if (is_last) {
6500 args->valid_mask = 1; /* whether the EXEC mask is valid */
6501 args->done = 1; /* DONE bit */
6502 } else if (!args->enabled_channels)
6503 return false; /* unnecessary NULL export */
6504
6505 return true;
6506 }
6507
6508 static void
6509 radv_export_mrt_z(struct nir_to_llvm_context *ctx,
6510 LLVMValueRef depth, LLVMValueRef stencil,
6511 LLVMValueRef samplemask)
6512 {
6513 struct ac_export_args args;
6514
6515 ac_export_mrt_z(&ctx->ac, depth, stencil, samplemask, &args);
6516
6517 ac_build_export(&ctx->ac, &args);
6518 }
6519
6520 static void
6521 handle_fs_outputs_post(struct nir_to_llvm_context *ctx)
6522 {
6523 unsigned index = 0;
6524 LLVMValueRef depth = NULL, stencil = NULL, samplemask = NULL;
6525 struct ac_export_args color_args[8];
6526
6527 for (unsigned i = 0; i < RADEON_LLVM_MAX_OUTPUTS; ++i) {
6528 LLVMValueRef values[4];
6529
6530 if (!(ctx->output_mask & (1ull << i)))
6531 continue;
6532
6533 if (i == FRAG_RESULT_DEPTH) {
6534 ctx->shader_info->fs.writes_z = true;
6535 depth = ac_to_float(&ctx->ac, LLVMBuildLoad(ctx->builder,
6536 ctx->nir->outputs[radeon_llvm_reg_index_soa(i, 0)], ""));
6537 } else if (i == FRAG_RESULT_STENCIL) {
6538 ctx->shader_info->fs.writes_stencil = true;
6539 stencil = ac_to_float(&ctx->ac, LLVMBuildLoad(ctx->builder,
6540 ctx->nir->outputs[radeon_llvm_reg_index_soa(i, 0)], ""));
6541 } else if (i == FRAG_RESULT_SAMPLE_MASK) {
6542 ctx->shader_info->fs.writes_sample_mask = true;
6543 samplemask = ac_to_float(&ctx->ac, LLVMBuildLoad(ctx->builder,
6544 ctx->nir->outputs[radeon_llvm_reg_index_soa(i, 0)], ""));
6545 } else {
6546 bool last = false;
6547 for (unsigned j = 0; j < 4; j++)
6548 values[j] = ac_to_float(&ctx->ac, LLVMBuildLoad(ctx->builder,
6549 ctx->nir->outputs[radeon_llvm_reg_index_soa(i, j)], ""));
6550
6551 if (!ctx->shader_info->fs.writes_z && !ctx->shader_info->fs.writes_stencil && !ctx->shader_info->fs.writes_sample_mask)
6552 last = ctx->output_mask <= ((1ull << (i + 1)) - 1);
6553
6554 bool ret = si_export_mrt_color(ctx, values, V_008DFC_SQ_EXP_MRT + (i - FRAG_RESULT_DATA0), last, &color_args[index]);
6555 if (ret)
6556 index++;
6557 }
6558 }
6559
6560 for (unsigned i = 0; i < index; i++)
6561 ac_build_export(&ctx->ac, &color_args[i]);
6562 if (depth || stencil || samplemask)
6563 radv_export_mrt_z(ctx, depth, stencil, samplemask);
6564 else if (!index) {
6565 si_export_mrt_color(ctx, NULL, V_008DFC_SQ_EXP_NULL, true, &color_args[0]);
6566 ac_build_export(&ctx->ac, &color_args[0]);
6567 }
6568 }
6569
6570 static void
6571 emit_gs_epilogue(struct nir_to_llvm_context *ctx)
6572 {
6573 ac_build_sendmsg(&ctx->ac, AC_SENDMSG_GS_OP_NOP | AC_SENDMSG_GS_DONE, ctx->gs_wave_id);
6574 }
6575
6576 static void
6577 handle_shader_outputs_post(struct ac_shader_abi *abi, unsigned max_outputs,
6578 LLVMValueRef *addrs)
6579 {
6580 struct nir_to_llvm_context *ctx = nir_to_llvm_context_from_abi(abi);
6581
6582 switch (ctx->stage) {
6583 case MESA_SHADER_VERTEX:
6584 if (ctx->options->key.vs.as_ls)
6585 handle_ls_outputs_post(ctx);
6586 else if (ctx->options->key.vs.as_es)
6587 handle_es_outputs_post(ctx, &ctx->shader_info->vs.es_info);
6588 else
6589 handle_vs_outputs_post(ctx, ctx->options->key.vs.export_prim_id,
6590 &ctx->shader_info->vs.outinfo);
6591 break;
6592 case MESA_SHADER_FRAGMENT:
6593 handle_fs_outputs_post(ctx);
6594 break;
6595 case MESA_SHADER_GEOMETRY:
6596 emit_gs_epilogue(ctx);
6597 break;
6598 case MESA_SHADER_TESS_CTRL:
6599 handle_tcs_outputs_post(ctx);
6600 break;
6601 case MESA_SHADER_TESS_EVAL:
6602 if (ctx->options->key.tes.as_es)
6603 handle_es_outputs_post(ctx, &ctx->shader_info->tes.es_info);
6604 else
6605 handle_vs_outputs_post(ctx, ctx->options->key.tes.export_prim_id,
6606 &ctx->shader_info->tes.outinfo);
6607 break;
6608 default:
6609 break;
6610 }
6611 }
6612
6613 static void ac_llvm_finalize_module(struct nir_to_llvm_context * ctx)
6614 {
6615 LLVMPassManagerRef passmgr;
6616 /* Create the pass manager */
6617 passmgr = LLVMCreateFunctionPassManagerForModule(
6618 ctx->module);
6619
6620 /* This pass should eliminate all the load and store instructions */
6621 LLVMAddPromoteMemoryToRegisterPass(passmgr);
6622
6623 /* Add some optimization passes */
6624 LLVMAddScalarReplAggregatesPass(passmgr);
6625 LLVMAddLICMPass(passmgr);
6626 LLVMAddAggressiveDCEPass(passmgr);
6627 LLVMAddCFGSimplificationPass(passmgr);
6628 LLVMAddInstructionCombiningPass(passmgr);
6629
6630 /* Run the pass */
6631 LLVMInitializeFunctionPassManager(passmgr);
6632 LLVMRunFunctionPassManager(passmgr, ctx->main_function);
6633 LLVMFinalizeFunctionPassManager(passmgr);
6634
6635 LLVMDisposeBuilder(ctx->builder);
6636 LLVMDisposePassManager(passmgr);
6637 }
6638
6639 static void
6640 ac_nir_eliminate_const_vs_outputs(struct nir_to_llvm_context *ctx)
6641 {
6642 struct ac_vs_output_info *outinfo;
6643
6644 switch (ctx->stage) {
6645 case MESA_SHADER_FRAGMENT:
6646 case MESA_SHADER_COMPUTE:
6647 case MESA_SHADER_TESS_CTRL:
6648 case MESA_SHADER_GEOMETRY:
6649 return;
6650 case MESA_SHADER_VERTEX:
6651 if (ctx->options->key.vs.as_ls ||
6652 ctx->options->key.vs.as_es)
6653 return;
6654 outinfo = &ctx->shader_info->vs.outinfo;
6655 break;
6656 case MESA_SHADER_TESS_EVAL:
6657 if (ctx->options->key.vs.as_es)
6658 return;
6659 outinfo = &ctx->shader_info->tes.outinfo;
6660 break;
6661 default:
6662 unreachable("Unhandled shader type");
6663 }
6664
6665 ac_optimize_vs_outputs(&ctx->ac,
6666 ctx->main_function,
6667 outinfo->vs_output_param_offset,
6668 VARYING_SLOT_MAX,
6669 &outinfo->param_exports);
6670 }
6671
6672 static void
6673 ac_setup_rings(struct nir_to_llvm_context *ctx)
6674 {
6675 if ((ctx->stage == MESA_SHADER_VERTEX && ctx->options->key.vs.as_es) ||
6676 (ctx->stage == MESA_SHADER_TESS_EVAL && ctx->options->key.tes.as_es)) {
6677 ctx->esgs_ring = ac_build_load_to_sgpr(&ctx->ac, ctx->ring_offsets, LLVMConstInt(ctx->ac.i32, RING_ESGS_VS, false));
6678 }
6679
6680 if (ctx->is_gs_copy_shader) {
6681 ctx->gsvs_ring = ac_build_load_to_sgpr(&ctx->ac, ctx->ring_offsets, LLVMConstInt(ctx->ac.i32, RING_GSVS_VS, false));
6682 }
6683 if (ctx->stage == MESA_SHADER_GEOMETRY) {
6684 LLVMValueRef tmp;
6685 ctx->esgs_ring = ac_build_load_to_sgpr(&ctx->ac, ctx->ring_offsets, LLVMConstInt(ctx->ac.i32, RING_ESGS_GS, false));
6686 ctx->gsvs_ring = ac_build_load_to_sgpr(&ctx->ac, ctx->ring_offsets, LLVMConstInt(ctx->ac.i32, RING_GSVS_GS, false));
6687
6688 ctx->gsvs_ring = LLVMBuildBitCast(ctx->builder, ctx->gsvs_ring, ctx->ac.v4i32, "");
6689
6690 ctx->gsvs_ring = LLVMBuildInsertElement(ctx->builder, ctx->gsvs_ring, ctx->gsvs_num_entries, LLVMConstInt(ctx->ac.i32, 2, false), "");
6691 tmp = LLVMBuildExtractElement(ctx->builder, ctx->gsvs_ring, ctx->ac.i32_1, "");
6692 tmp = LLVMBuildOr(ctx->builder, tmp, ctx->gsvs_ring_stride, "");
6693 ctx->gsvs_ring = LLVMBuildInsertElement(ctx->builder, ctx->gsvs_ring, tmp, ctx->ac.i32_1, "");
6694 }
6695
6696 if (ctx->stage == MESA_SHADER_TESS_CTRL ||
6697 ctx->stage == MESA_SHADER_TESS_EVAL) {
6698 ctx->hs_ring_tess_offchip = ac_build_load_to_sgpr(&ctx->ac, ctx->ring_offsets, LLVMConstInt(ctx->ac.i32, RING_HS_TESS_OFFCHIP, false));
6699 ctx->hs_ring_tess_factor = ac_build_load_to_sgpr(&ctx->ac, ctx->ring_offsets, LLVMConstInt(ctx->ac.i32, RING_HS_TESS_FACTOR, false));
6700 }
6701 }
6702
6703 static unsigned
6704 ac_nir_get_max_workgroup_size(enum chip_class chip_class,
6705 const struct nir_shader *nir)
6706 {
6707 switch (nir->info.stage) {
6708 case MESA_SHADER_TESS_CTRL:
6709 return chip_class >= CIK ? 128 : 64;
6710 case MESA_SHADER_GEOMETRY:
6711 return chip_class >= GFX9 ? 128 : 64;
6712 case MESA_SHADER_COMPUTE:
6713 break;
6714 default:
6715 return 0;
6716 }
6717
6718 unsigned max_workgroup_size = nir->info.cs.local_size[0] *
6719 nir->info.cs.local_size[1] *
6720 nir->info.cs.local_size[2];
6721 return max_workgroup_size;
6722 }
6723
6724 /* Fixup the HW not emitting the TCS regs if there are no HS threads. */
6725 static void ac_nir_fixup_ls_hs_input_vgprs(struct nir_to_llvm_context *ctx)
6726 {
6727 LLVMValueRef count = ac_build_bfe(&ctx->ac, ctx->merged_wave_info,
6728 LLVMConstInt(ctx->ac.i32, 8, false),
6729 LLVMConstInt(ctx->ac.i32, 8, false), false);
6730 LLVMValueRef hs_empty = LLVMBuildICmp(ctx->ac.builder, LLVMIntEQ, count,
6731 ctx->ac.i32_0, "");
6732 ctx->abi.instance_id = LLVMBuildSelect(ctx->ac.builder, hs_empty, ctx->rel_auto_id, ctx->abi.instance_id, "");
6733 ctx->vs_prim_id = LLVMBuildSelect(ctx->ac.builder, hs_empty, ctx->abi.vertex_id, ctx->vs_prim_id, "");
6734 ctx->rel_auto_id = LLVMBuildSelect(ctx->ac.builder, hs_empty, ctx->abi.tcs_rel_ids, ctx->rel_auto_id, "");
6735 ctx->abi.vertex_id = LLVMBuildSelect(ctx->ac.builder, hs_empty, ctx->abi.tcs_patch_id, ctx->abi.vertex_id, "");
6736 }
6737
6738 static void prepare_gs_input_vgprs(struct nir_to_llvm_context *ctx)
6739 {
6740 for(int i = 5; i >= 0; --i) {
6741 ctx->gs_vtx_offset[i] = ac_build_bfe(&ctx->ac, ctx->gs_vtx_offset[i & ~1],
6742 LLVMConstInt(ctx->ac.i32, (i & 1) * 16, false),
6743 LLVMConstInt(ctx->ac.i32, 16, false), false);
6744 }
6745
6746 ctx->gs_wave_id = ac_build_bfe(&ctx->ac, ctx->merged_wave_info,
6747 LLVMConstInt(ctx->ac.i32, 16, false),
6748 LLVMConstInt(ctx->ac.i32, 8, false), false);
6749 }
6750
6751 void ac_nir_translate(struct ac_llvm_context *ac, struct ac_shader_abi *abi,
6752 struct nir_shader *nir, struct nir_to_llvm_context *nctx)
6753 {
6754 struct ac_nir_context ctx = {};
6755 struct nir_function *func;
6756
6757 ctx.ac = *ac;
6758 ctx.abi = abi;
6759
6760 ctx.nctx = nctx;
6761 if (nctx)
6762 nctx->nir = &ctx;
6763
6764 ctx.stage = nir->info.stage;
6765
6766 ctx.main_function = LLVMGetBasicBlockParent(LLVMGetInsertBlock(ctx.ac.builder));
6767
6768 nir_foreach_variable(variable, &nir->outputs)
6769 handle_shader_output_decl(&ctx, nir, variable);
6770
6771 ctx.defs = _mesa_hash_table_create(NULL, _mesa_hash_pointer,
6772 _mesa_key_pointer_equal);
6773 ctx.phis = _mesa_hash_table_create(NULL, _mesa_hash_pointer,
6774 _mesa_key_pointer_equal);
6775 ctx.vars = _mesa_hash_table_create(NULL, _mesa_hash_pointer,
6776 _mesa_key_pointer_equal);
6777
6778 func = (struct nir_function *)exec_list_get_head(&nir->functions);
6779
6780 setup_locals(&ctx, func);
6781
6782 if (nir->info.stage == MESA_SHADER_COMPUTE)
6783 setup_shared(&ctx, nir);
6784
6785 visit_cf_list(&ctx, &func->impl->body);
6786 phi_post_pass(&ctx);
6787
6788 ctx.abi->emit_outputs(ctx.abi, RADEON_LLVM_MAX_OUTPUTS,
6789 ctx.outputs);
6790
6791 free(ctx.locals);
6792 ralloc_free(ctx.defs);
6793 ralloc_free(ctx.phis);
6794 ralloc_free(ctx.vars);
6795
6796 if (nctx)
6797 nctx->nir = NULL;
6798 }
6799
6800 static
6801 LLVMModuleRef ac_translate_nir_to_llvm(LLVMTargetMachineRef tm,
6802 struct nir_shader *const *shaders,
6803 int shader_count,
6804 struct ac_shader_variant_info *shader_info,
6805 const struct ac_nir_compiler_options *options)
6806 {
6807 struct nir_to_llvm_context ctx = {0};
6808 unsigned i;
6809 ctx.options = options;
6810 ctx.shader_info = shader_info;
6811 ctx.context = LLVMContextCreate();
6812 ctx.module = LLVMModuleCreateWithNameInContext("shader", ctx.context);
6813
6814 ac_llvm_context_init(&ctx.ac, ctx.context, options->chip_class,
6815 options->family);
6816 ctx.ac.module = ctx.module;
6817 LLVMSetTarget(ctx.module, options->supports_spill ? "amdgcn-mesa-mesa3d" : "amdgcn--");
6818
6819 LLVMTargetDataRef data_layout = LLVMCreateTargetDataLayout(tm);
6820 char *data_layout_str = LLVMCopyStringRepOfTargetData(data_layout);
6821 LLVMSetDataLayout(ctx.module, data_layout_str);
6822 LLVMDisposeTargetData(data_layout);
6823 LLVMDisposeMessage(data_layout_str);
6824
6825 enum ac_float_mode float_mode =
6826 options->unsafe_math ? AC_FLOAT_MODE_UNSAFE_FP_MATH :
6827 AC_FLOAT_MODE_DEFAULT;
6828
6829 ctx.builder = ac_create_builder(ctx.context, float_mode);
6830 ctx.ac.builder = ctx.builder;
6831
6832 memset(shader_info, 0, sizeof(*shader_info));
6833
6834 for(int i = 0; i < shader_count; ++i)
6835 ac_nir_shader_info_pass(shaders[i], options, &shader_info->info);
6836
6837 for (i = 0; i < AC_UD_MAX_SETS; i++)
6838 shader_info->user_sgprs_locs.descriptor_sets[i].sgpr_idx = -1;
6839 for (i = 0; i < AC_UD_MAX_UD; i++)
6840 shader_info->user_sgprs_locs.shader_data[i].sgpr_idx = -1;
6841
6842 ctx.max_workgroup_size = 0;
6843 for (int i = 0; i < shader_count; ++i) {
6844 ctx.max_workgroup_size = MAX2(ctx.max_workgroup_size,
6845 ac_nir_get_max_workgroup_size(ctx.options->chip_class,
6846 shaders[i]));
6847 }
6848
6849 create_function(&ctx, shaders[shader_count - 1]->info.stage, shader_count >= 2,
6850 shader_count >= 2 ? shaders[shader_count - 2]->info.stage : MESA_SHADER_VERTEX);
6851
6852 ctx.abi.inputs = &ctx.inputs[0];
6853 ctx.abi.emit_outputs = handle_shader_outputs_post;
6854 ctx.abi.emit_vertex = visit_emit_vertex;
6855 ctx.abi.load_ubo = radv_load_ubo;
6856 ctx.abi.load_ssbo = radv_load_ssbo;
6857 ctx.abi.load_sampler_desc = radv_get_sampler_desc;
6858 ctx.abi.clamp_shadow_reference = false;
6859
6860 if (shader_count >= 2)
6861 ac_init_exec_full_mask(&ctx.ac);
6862
6863 if (ctx.ac.chip_class == GFX9 &&
6864 shaders[shader_count - 1]->info.stage == MESA_SHADER_TESS_CTRL)
6865 ac_nir_fixup_ls_hs_input_vgprs(&ctx);
6866
6867 for(int i = 0; i < shader_count; ++i) {
6868 ctx.stage = shaders[i]->info.stage;
6869 ctx.output_mask = 0;
6870 ctx.tess_outputs_written = 0;
6871 ctx.num_output_clips = shaders[i]->info.clip_distance_array_size;
6872 ctx.num_output_culls = shaders[i]->info.cull_distance_array_size;
6873
6874 if (shaders[i]->info.stage == MESA_SHADER_GEOMETRY) {
6875 ctx.gs_next_vertex = ac_build_alloca(&ctx.ac, ctx.ac.i32, "gs_next_vertex");
6876 ctx.gs_max_out_vertices = shaders[i]->info.gs.vertices_out;
6877 ctx.abi.load_inputs = load_gs_input;
6878 ctx.abi.emit_primitive = visit_end_primitive;
6879 } else if (shaders[i]->info.stage == MESA_SHADER_TESS_CTRL) {
6880 ctx.tcs_outputs_read = shaders[i]->info.outputs_read;
6881 ctx.tcs_patch_outputs_read = shaders[i]->info.patch_outputs_read;
6882 ctx.abi.load_tess_varyings = load_tcs_varyings;
6883 ctx.abi.load_patch_vertices_in = load_patch_vertices_in;
6884 ctx.abi.store_tcs_outputs = store_tcs_output;
6885 } else if (shaders[i]->info.stage == MESA_SHADER_TESS_EVAL) {
6886 ctx.tes_primitive_mode = shaders[i]->info.tess.primitive_mode;
6887 ctx.abi.load_tess_varyings = load_tes_input;
6888 ctx.abi.load_tess_coord = load_tess_coord;
6889 ctx.abi.load_patch_vertices_in = load_patch_vertices_in;
6890 } else if (shaders[i]->info.stage == MESA_SHADER_VERTEX) {
6891 if (shader_info->info.vs.needs_instance_id) {
6892 if (ctx.options->key.vs.as_ls) {
6893 ctx.shader_info->vs.vgpr_comp_cnt =
6894 MAX2(2, ctx.shader_info->vs.vgpr_comp_cnt);
6895 } else {
6896 ctx.shader_info->vs.vgpr_comp_cnt =
6897 MAX2(1, ctx.shader_info->vs.vgpr_comp_cnt);
6898 }
6899 }
6900 } else if (shaders[i]->info.stage == MESA_SHADER_FRAGMENT) {
6901 shader_info->fs.can_discard = shaders[i]->info.fs.uses_discard;
6902 }
6903
6904 if (i)
6905 emit_barrier(&ctx.ac, ctx.stage);
6906
6907 ac_setup_rings(&ctx);
6908
6909 LLVMBasicBlockRef merge_block;
6910 if (shader_count >= 2) {
6911 LLVMValueRef fn = LLVMGetBasicBlockParent(LLVMGetInsertBlock(ctx.ac.builder));
6912 LLVMBasicBlockRef then_block = LLVMAppendBasicBlockInContext(ctx.ac.context, fn, "");
6913 merge_block = LLVMAppendBasicBlockInContext(ctx.ac.context, fn, "");
6914
6915 LLVMValueRef count = ac_build_bfe(&ctx.ac, ctx.merged_wave_info,
6916 LLVMConstInt(ctx.ac.i32, 8 * i, false),
6917 LLVMConstInt(ctx.ac.i32, 8, false), false);
6918 LLVMValueRef thread_id = ac_get_thread_id(&ctx.ac);
6919 LLVMValueRef cond = LLVMBuildICmp(ctx.ac.builder, LLVMIntULT,
6920 thread_id, count, "");
6921 LLVMBuildCondBr(ctx.ac.builder, cond, then_block, merge_block);
6922
6923 LLVMPositionBuilderAtEnd(ctx.ac.builder, then_block);
6924 }
6925
6926 if (shaders[i]->info.stage == MESA_SHADER_FRAGMENT)
6927 handle_fs_inputs(&ctx, shaders[i]);
6928 else if(shaders[i]->info.stage == MESA_SHADER_VERTEX)
6929 handle_vs_inputs(&ctx, shaders[i]);
6930 else if(shader_count >= 2 && shaders[i]->info.stage == MESA_SHADER_GEOMETRY)
6931 prepare_gs_input_vgprs(&ctx);
6932
6933 nir_foreach_variable(variable, &shaders[i]->outputs)
6934 scan_shader_output_decl(&ctx, variable, shaders[i], shaders[i]->info.stage);
6935
6936 ac_nir_translate(&ctx.ac, &ctx.abi, shaders[i], &ctx);
6937
6938 if (shader_count >= 2) {
6939 LLVMBuildBr(ctx.ac.builder, merge_block);
6940 LLVMPositionBuilderAtEnd(ctx.ac.builder, merge_block);
6941 }
6942
6943 if (shaders[i]->info.stage == MESA_SHADER_GEOMETRY) {
6944 unsigned addclip = shaders[i]->info.clip_distance_array_size +
6945 shaders[i]->info.cull_distance_array_size > 4;
6946 shader_info->gs.gsvs_vertex_size = (util_bitcount64(ctx.output_mask) + addclip) * 16;
6947 shader_info->gs.max_gsvs_emit_size = shader_info->gs.gsvs_vertex_size *
6948 shaders[i]->info.gs.vertices_out;
6949 } else if (shaders[i]->info.stage == MESA_SHADER_TESS_CTRL) {
6950 shader_info->tcs.outputs_written = ctx.tess_outputs_written;
6951 shader_info->tcs.patch_outputs_written = ctx.tess_patch_outputs_written;
6952 } else if (shaders[i]->info.stage == MESA_SHADER_VERTEX && ctx.options->key.vs.as_ls) {
6953 shader_info->vs.outputs_written = ctx.tess_outputs_written;
6954 }
6955 }
6956
6957 LLVMBuildRetVoid(ctx.builder);
6958
6959 if (options->dump_preoptir)
6960 ac_dump_module(ctx.module);
6961
6962 ac_llvm_finalize_module(&ctx);
6963
6964 if (shader_count == 1)
6965 ac_nir_eliminate_const_vs_outputs(&ctx);
6966
6967 return ctx.module;
6968 }
6969
6970 static void ac_diagnostic_handler(LLVMDiagnosticInfoRef di, void *context)
6971 {
6972 unsigned *retval = (unsigned *)context;
6973 LLVMDiagnosticSeverity severity = LLVMGetDiagInfoSeverity(di);
6974 char *description = LLVMGetDiagInfoDescription(di);
6975
6976 if (severity == LLVMDSError) {
6977 *retval = 1;
6978 fprintf(stderr, "LLVM triggered Diagnostic Handler: %s\n",
6979 description);
6980 }
6981
6982 LLVMDisposeMessage(description);
6983 }
6984
6985 static unsigned ac_llvm_compile(LLVMModuleRef M,
6986 struct ac_shader_binary *binary,
6987 LLVMTargetMachineRef tm)
6988 {
6989 unsigned retval = 0;
6990 char *err;
6991 LLVMContextRef llvm_ctx;
6992 LLVMMemoryBufferRef out_buffer;
6993 unsigned buffer_size;
6994 const char *buffer_data;
6995 LLVMBool mem_err;
6996
6997 /* Setup Diagnostic Handler*/
6998 llvm_ctx = LLVMGetModuleContext(M);
6999
7000 LLVMContextSetDiagnosticHandler(llvm_ctx, ac_diagnostic_handler,
7001 &retval);
7002
7003 /* Compile IR*/
7004 mem_err = LLVMTargetMachineEmitToMemoryBuffer(tm, M, LLVMObjectFile,
7005 &err, &out_buffer);
7006
7007 /* Process Errors/Warnings */
7008 if (mem_err) {
7009 fprintf(stderr, "%s: %s", __FUNCTION__, err);
7010 free(err);
7011 retval = 1;
7012 goto out;
7013 }
7014
7015 /* Extract Shader Code*/
7016 buffer_size = LLVMGetBufferSize(out_buffer);
7017 buffer_data = LLVMGetBufferStart(out_buffer);
7018
7019 ac_elf_read(buffer_data, buffer_size, binary);
7020
7021 /* Clean up */
7022 LLVMDisposeMemoryBuffer(out_buffer);
7023
7024 out:
7025 return retval;
7026 }
7027
7028 static void ac_compile_llvm_module(LLVMTargetMachineRef tm,
7029 LLVMModuleRef llvm_module,
7030 struct ac_shader_binary *binary,
7031 struct ac_shader_config *config,
7032 struct ac_shader_variant_info *shader_info,
7033 gl_shader_stage stage,
7034 bool dump_shader, bool supports_spill)
7035 {
7036 if (dump_shader)
7037 ac_dump_module(llvm_module);
7038
7039 memset(binary, 0, sizeof(*binary));
7040 int v = ac_llvm_compile(llvm_module, binary, tm);
7041 if (v) {
7042 fprintf(stderr, "compile failed\n");
7043 }
7044
7045 if (dump_shader)
7046 fprintf(stderr, "disasm:\n%s\n", binary->disasm_string);
7047
7048 ac_shader_binary_read_config(binary, config, 0, supports_spill);
7049
7050 LLVMContextRef ctx = LLVMGetModuleContext(llvm_module);
7051 LLVMDisposeModule(llvm_module);
7052 LLVMContextDispose(ctx);
7053
7054 if (stage == MESA_SHADER_FRAGMENT) {
7055 shader_info->num_input_vgprs = 0;
7056 if (G_0286CC_PERSP_SAMPLE_ENA(config->spi_ps_input_addr))
7057 shader_info->num_input_vgprs += 2;
7058 if (G_0286CC_PERSP_CENTER_ENA(config->spi_ps_input_addr))
7059 shader_info->num_input_vgprs += 2;
7060 if (G_0286CC_PERSP_CENTROID_ENA(config->spi_ps_input_addr))
7061 shader_info->num_input_vgprs += 2;
7062 if (G_0286CC_PERSP_PULL_MODEL_ENA(config->spi_ps_input_addr))
7063 shader_info->num_input_vgprs += 3;
7064 if (G_0286CC_LINEAR_SAMPLE_ENA(config->spi_ps_input_addr))
7065 shader_info->num_input_vgprs += 2;
7066 if (G_0286CC_LINEAR_CENTER_ENA(config->spi_ps_input_addr))
7067 shader_info->num_input_vgprs += 2;
7068 if (G_0286CC_LINEAR_CENTROID_ENA(config->spi_ps_input_addr))
7069 shader_info->num_input_vgprs += 2;
7070 if (G_0286CC_LINE_STIPPLE_TEX_ENA(config->spi_ps_input_addr))
7071 shader_info->num_input_vgprs += 1;
7072 if (G_0286CC_POS_X_FLOAT_ENA(config->spi_ps_input_addr))
7073 shader_info->num_input_vgprs += 1;
7074 if (G_0286CC_POS_Y_FLOAT_ENA(config->spi_ps_input_addr))
7075 shader_info->num_input_vgprs += 1;
7076 if (G_0286CC_POS_Z_FLOAT_ENA(config->spi_ps_input_addr))
7077 shader_info->num_input_vgprs += 1;
7078 if (G_0286CC_POS_W_FLOAT_ENA(config->spi_ps_input_addr))
7079 shader_info->num_input_vgprs += 1;
7080 if (G_0286CC_FRONT_FACE_ENA(config->spi_ps_input_addr))
7081 shader_info->num_input_vgprs += 1;
7082 if (G_0286CC_ANCILLARY_ENA(config->spi_ps_input_addr))
7083 shader_info->num_input_vgprs += 1;
7084 if (G_0286CC_SAMPLE_COVERAGE_ENA(config->spi_ps_input_addr))
7085 shader_info->num_input_vgprs += 1;
7086 if (G_0286CC_POS_FIXED_PT_ENA(config->spi_ps_input_addr))
7087 shader_info->num_input_vgprs += 1;
7088 }
7089 config->num_vgprs = MAX2(config->num_vgprs, shader_info->num_input_vgprs);
7090
7091 /* +3 for scratch wave offset and VCC */
7092 config->num_sgprs = MAX2(config->num_sgprs,
7093 shader_info->num_input_sgprs + 3);
7094
7095 /* Enable 64-bit and 16-bit denormals, because there is no performance
7096 * cost.
7097 *
7098 * If denormals are enabled, all floating-point output modifiers are
7099 * ignored.
7100 *
7101 * Don't enable denormals for 32-bit floats, because:
7102 * - Floating-point output modifiers would be ignored by the hw.
7103 * - Some opcodes don't support denormals, such as v_mad_f32. We would
7104 * have to stop using those.
7105 * - SI & CI would be very slow.
7106 */
7107 config->float_mode |= V_00B028_FP_64_DENORMS;
7108 }
7109
7110 static void
7111 ac_fill_shader_info(struct ac_shader_variant_info *shader_info, struct nir_shader *nir, const struct ac_nir_compiler_options *options)
7112 {
7113 switch (nir->info.stage) {
7114 case MESA_SHADER_COMPUTE:
7115 for (int i = 0; i < 3; ++i)
7116 shader_info->cs.block_size[i] = nir->info.cs.local_size[i];
7117 break;
7118 case MESA_SHADER_FRAGMENT:
7119 shader_info->fs.early_fragment_test = nir->info.fs.early_fragment_tests;
7120 break;
7121 case MESA_SHADER_GEOMETRY:
7122 shader_info->gs.vertices_in = nir->info.gs.vertices_in;
7123 shader_info->gs.vertices_out = nir->info.gs.vertices_out;
7124 shader_info->gs.output_prim = nir->info.gs.output_primitive;
7125 shader_info->gs.invocations = nir->info.gs.invocations;
7126 break;
7127 case MESA_SHADER_TESS_EVAL:
7128 shader_info->tes.primitive_mode = nir->info.tess.primitive_mode;
7129 shader_info->tes.spacing = nir->info.tess.spacing;
7130 shader_info->tes.ccw = nir->info.tess.ccw;
7131 shader_info->tes.point_mode = nir->info.tess.point_mode;
7132 shader_info->tes.as_es = options->key.tes.as_es;
7133 break;
7134 case MESA_SHADER_TESS_CTRL:
7135 shader_info->tcs.tcs_vertices_out = nir->info.tess.tcs_vertices_out;
7136 break;
7137 case MESA_SHADER_VERTEX:
7138 shader_info->vs.as_es = options->key.vs.as_es;
7139 shader_info->vs.as_ls = options->key.vs.as_ls;
7140 /* in LS mode we need at least 1, invocation id needs 2, handled elsewhere */
7141 if (options->key.vs.as_ls)
7142 shader_info->vs.vgpr_comp_cnt = MAX2(1, shader_info->vs.vgpr_comp_cnt);
7143 break;
7144 default:
7145 break;
7146 }
7147 }
7148
7149 void ac_compile_nir_shader(LLVMTargetMachineRef tm,
7150 struct ac_shader_binary *binary,
7151 struct ac_shader_config *config,
7152 struct ac_shader_variant_info *shader_info,
7153 struct nir_shader *const *nir,
7154 int nir_count,
7155 const struct ac_nir_compiler_options *options,
7156 bool dump_shader)
7157 {
7158
7159 LLVMModuleRef llvm_module = ac_translate_nir_to_llvm(tm, nir, nir_count, shader_info,
7160 options);
7161
7162 ac_compile_llvm_module(tm, llvm_module, binary, config, shader_info, nir[0]->info.stage, dump_shader, options->supports_spill);
7163 for (int i = 0; i < nir_count; ++i)
7164 ac_fill_shader_info(shader_info, nir[i], options);
7165
7166 /* Determine the ES type (VS or TES) for the GS on GFX9. */
7167 if (options->chip_class == GFX9) {
7168 if (nir_count == 2 &&
7169 nir[1]->info.stage == MESA_SHADER_GEOMETRY) {
7170 shader_info->gs.es_type = nir[0]->info.stage;
7171 }
7172 }
7173 }
7174
7175 static void
7176 ac_gs_copy_shader_emit(struct nir_to_llvm_context *ctx)
7177 {
7178 LLVMValueRef args[9];
7179 args[0] = ctx->gsvs_ring;
7180 args[1] = LLVMBuildMul(ctx->builder, ctx->abi.vertex_id, LLVMConstInt(ctx->ac.i32, 4, false), "");
7181 args[3] = ctx->ac.i32_0;
7182 args[4] = ctx->ac.i32_1; /* OFFEN */
7183 args[5] = ctx->ac.i32_0; /* IDXEN */
7184 args[6] = ctx->ac.i32_1; /* GLC */
7185 args[7] = ctx->ac.i32_1; /* SLC */
7186 args[8] = ctx->ac.i32_0; /* TFE */
7187
7188 int idx = 0;
7189
7190 for (unsigned i = 0; i < RADEON_LLVM_MAX_OUTPUTS; ++i) {
7191 int length = 4;
7192 int slot = idx;
7193 int slot_inc = 1;
7194 if (!(ctx->output_mask & (1ull << i)))
7195 continue;
7196
7197 if (i == VARYING_SLOT_CLIP_DIST0) {
7198 /* unpack clip and cull from a single set of slots */
7199 length = ctx->num_output_clips + ctx->num_output_culls;
7200 if (length > 4)
7201 slot_inc = 2;
7202 }
7203
7204 for (unsigned j = 0; j < length; j++) {
7205 LLVMValueRef value;
7206 args[2] = LLVMConstInt(ctx->ac.i32,
7207 (slot * 4 + j) *
7208 ctx->gs_max_out_vertices * 16 * 4, false);
7209
7210 value = ac_build_intrinsic(&ctx->ac,
7211 "llvm.SI.buffer.load.dword.i32.i32",
7212 ctx->ac.i32, args, 9,
7213 AC_FUNC_ATTR_READONLY |
7214 AC_FUNC_ATTR_LEGACY);
7215
7216 LLVMBuildStore(ctx->builder,
7217 ac_to_float(&ctx->ac, value), ctx->nir->outputs[radeon_llvm_reg_index_soa(i, j)]);
7218 }
7219 idx += slot_inc;
7220 }
7221 handle_vs_outputs_post(ctx, false, &ctx->shader_info->vs.outinfo);
7222 }
7223
7224 void ac_create_gs_copy_shader(LLVMTargetMachineRef tm,
7225 struct nir_shader *geom_shader,
7226 struct ac_shader_binary *binary,
7227 struct ac_shader_config *config,
7228 struct ac_shader_variant_info *shader_info,
7229 const struct ac_nir_compiler_options *options,
7230 bool dump_shader)
7231 {
7232 struct nir_to_llvm_context ctx = {0};
7233 ctx.context = LLVMContextCreate();
7234 ctx.module = LLVMModuleCreateWithNameInContext("shader", ctx.context);
7235 ctx.options = options;
7236 ctx.shader_info = shader_info;
7237
7238 ac_llvm_context_init(&ctx.ac, ctx.context, options->chip_class,
7239 options->family);
7240 ctx.ac.module = ctx.module;
7241
7242 ctx.is_gs_copy_shader = true;
7243 LLVMSetTarget(ctx.module, "amdgcn--");
7244
7245 enum ac_float_mode float_mode =
7246 options->unsafe_math ? AC_FLOAT_MODE_UNSAFE_FP_MATH :
7247 AC_FLOAT_MODE_DEFAULT;
7248
7249 ctx.builder = ac_create_builder(ctx.context, float_mode);
7250 ctx.ac.builder = ctx.builder;
7251 ctx.stage = MESA_SHADER_VERTEX;
7252
7253 create_function(&ctx, MESA_SHADER_VERTEX, false, MESA_SHADER_VERTEX);
7254
7255 ctx.gs_max_out_vertices = geom_shader->info.gs.vertices_out;
7256 ac_setup_rings(&ctx);
7257
7258 ctx.num_output_clips = geom_shader->info.clip_distance_array_size;
7259 ctx.num_output_culls = geom_shader->info.cull_distance_array_size;
7260
7261 struct ac_nir_context nir_ctx = {};
7262 nir_ctx.ac = ctx.ac;
7263 nir_ctx.abi = &ctx.abi;
7264
7265 nir_ctx.nctx = &ctx;
7266 ctx.nir = &nir_ctx;
7267
7268 nir_foreach_variable(variable, &geom_shader->outputs) {
7269 scan_shader_output_decl(&ctx, variable, geom_shader, MESA_SHADER_VERTEX);
7270 handle_shader_output_decl(&nir_ctx, geom_shader, variable);
7271 }
7272
7273 ac_gs_copy_shader_emit(&ctx);
7274
7275 ctx.nir = NULL;
7276
7277 LLVMBuildRetVoid(ctx.builder);
7278
7279 ac_llvm_finalize_module(&ctx);
7280
7281 ac_compile_llvm_module(tm, ctx.module, binary, config, shader_info,
7282 MESA_SHADER_VERTEX,
7283 dump_shader, options->supports_spill);
7284 }