ac/nir: fix translation of nir_op_fsign for doubles
[mesa.git] / src / amd / common / ac_nir_to_llvm.c
1 /*
2 * Copyright © 2016 Bas Nieuwenhuizen
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21 * IN THE SOFTWARE.
22 */
23
24 #include "ac_nir_to_llvm.h"
25 #include "ac_llvm_build.h"
26 #include "ac_llvm_util.h"
27 #include "ac_binary.h"
28 #include "sid.h"
29 #include "nir/nir.h"
30 #include "../vulkan/radv_descriptor_set.h"
31 #include "util/bitscan.h"
32 #include <llvm-c/Transforms/Scalar.h>
33 #include "ac_shader_abi.h"
34 #include "ac_shader_info.h"
35 #include "ac_shader_util.h"
36 #include "ac_exp_param.h"
37
38 enum radeon_llvm_calling_convention {
39 RADEON_LLVM_AMDGPU_VS = 87,
40 RADEON_LLVM_AMDGPU_GS = 88,
41 RADEON_LLVM_AMDGPU_PS = 89,
42 RADEON_LLVM_AMDGPU_CS = 90,
43 RADEON_LLVM_AMDGPU_HS = 93,
44 };
45
46 #define CONST_ADDR_SPACE 2
47 #define LOCAL_ADDR_SPACE 3
48
49 #define RADEON_LLVM_MAX_INPUTS (VARYING_SLOT_VAR31 + 1)
50 #define RADEON_LLVM_MAX_OUTPUTS (VARYING_SLOT_VAR31 + 1)
51
52 struct nir_to_llvm_context;
53
54 struct ac_nir_context {
55 struct ac_llvm_context ac;
56 struct ac_shader_abi *abi;
57
58 gl_shader_stage stage;
59
60 struct hash_table *defs;
61 struct hash_table *phis;
62 struct hash_table *vars;
63
64 LLVMValueRef main_function;
65 LLVMBasicBlockRef continue_block;
66 LLVMBasicBlockRef break_block;
67
68 LLVMValueRef outputs[RADEON_LLVM_MAX_OUTPUTS * 4];
69
70 int num_locals;
71 LLVMValueRef *locals;
72
73 struct nir_to_llvm_context *nctx; /* TODO get rid of this */
74 };
75
76 struct nir_to_llvm_context {
77 struct ac_llvm_context ac;
78 const struct ac_nir_compiler_options *options;
79 struct ac_shader_variant_info *shader_info;
80 struct ac_shader_abi abi;
81 struct ac_nir_context *nir;
82
83 unsigned max_workgroup_size;
84 LLVMContextRef context;
85 LLVMModuleRef module;
86 LLVMBuilderRef builder;
87 LLVMValueRef main_function;
88
89 struct hash_table *defs;
90 struct hash_table *phis;
91
92 LLVMValueRef descriptor_sets[AC_UD_MAX_SETS];
93 LLVMValueRef ring_offsets;
94 LLVMValueRef push_constants;
95 LLVMValueRef view_index;
96 LLVMValueRef num_work_groups;
97 LLVMValueRef workgroup_ids[3];
98 LLVMValueRef local_invocation_ids;
99 LLVMValueRef tg_size;
100
101 LLVMValueRef vertex_buffers;
102 LLVMValueRef rel_auto_id;
103 LLVMValueRef vs_prim_id;
104 LLVMValueRef ls_out_layout;
105 LLVMValueRef es2gs_offset;
106
107 LLVMValueRef tcs_offchip_layout;
108 LLVMValueRef tcs_out_offsets;
109 LLVMValueRef tcs_out_layout;
110 LLVMValueRef tcs_in_layout;
111 LLVMValueRef oc_lds;
112 LLVMValueRef merged_wave_info;
113 LLVMValueRef tess_factor_offset;
114 LLVMValueRef tes_rel_patch_id;
115 LLVMValueRef tes_u;
116 LLVMValueRef tes_v;
117
118 LLVMValueRef gsvs_ring_stride;
119 LLVMValueRef gsvs_num_entries;
120 LLVMValueRef gs2vs_offset;
121 LLVMValueRef gs_wave_id;
122 LLVMValueRef gs_vtx_offset[6];
123
124 LLVMValueRef esgs_ring;
125 LLVMValueRef gsvs_ring;
126 LLVMValueRef hs_ring_tess_offchip;
127 LLVMValueRef hs_ring_tess_factor;
128
129 LLVMValueRef prim_mask;
130 LLVMValueRef sample_pos_offset;
131 LLVMValueRef persp_sample, persp_center, persp_centroid;
132 LLVMValueRef linear_sample, linear_center, linear_centroid;
133
134 gl_shader_stage stage;
135
136 LLVMValueRef inputs[RADEON_LLVM_MAX_INPUTS * 4];
137
138 uint64_t input_mask;
139 uint64_t output_mask;
140 uint8_t num_output_clips;
141 uint8_t num_output_culls;
142
143 bool is_gs_copy_shader;
144 LLVMValueRef gs_next_vertex;
145 unsigned gs_max_out_vertices;
146
147 unsigned tes_primitive_mode;
148 uint64_t tess_outputs_written;
149 uint64_t tess_patch_outputs_written;
150
151 uint32_t tcs_patch_outputs_read;
152 uint64_t tcs_outputs_read;
153 };
154
155 static inline struct nir_to_llvm_context *
156 nir_to_llvm_context_from_abi(struct ac_shader_abi *abi)
157 {
158 struct nir_to_llvm_context *ctx = NULL;
159 return container_of(abi, ctx, abi);
160 }
161
162 static LLVMTypeRef
163 nir2llvmtype(struct ac_nir_context *ctx,
164 const struct glsl_type *type)
165 {
166 switch (glsl_get_base_type(glsl_without_array(type))) {
167 case GLSL_TYPE_UINT:
168 case GLSL_TYPE_INT:
169 return ctx->ac.i32;
170 case GLSL_TYPE_UINT64:
171 case GLSL_TYPE_INT64:
172 return ctx->ac.i64;
173 case GLSL_TYPE_DOUBLE:
174 return ctx->ac.f64;
175 case GLSL_TYPE_FLOAT:
176 return ctx->ac.f32;
177 default:
178 assert(!"Unsupported type in nir2llvmtype()");
179 break;
180 }
181 return 0;
182 }
183
184 static LLVMValueRef get_sampler_desc(struct ac_nir_context *ctx,
185 const nir_deref_var *deref,
186 enum ac_descriptor_type desc_type,
187 const nir_tex_instr *instr,
188 bool image, bool write);
189
190 static unsigned radeon_llvm_reg_index_soa(unsigned index, unsigned chan)
191 {
192 return (index * 4) + chan;
193 }
194
195 static unsigned shader_io_get_unique_index(gl_varying_slot slot)
196 {
197 /* handle patch indices separate */
198 if (slot == VARYING_SLOT_TESS_LEVEL_OUTER)
199 return 0;
200 if (slot == VARYING_SLOT_TESS_LEVEL_INNER)
201 return 1;
202 if (slot >= VARYING_SLOT_PATCH0 && slot <= VARYING_SLOT_TESS_MAX)
203 return 2 + (slot - VARYING_SLOT_PATCH0);
204
205 if (slot == VARYING_SLOT_POS)
206 return 0;
207 if (slot == VARYING_SLOT_PSIZ)
208 return 1;
209 if (slot == VARYING_SLOT_CLIP_DIST0)
210 return 2;
211 /* 3 is reserved for clip dist as well */
212 if (slot >= VARYING_SLOT_VAR0 && slot <= VARYING_SLOT_VAR31)
213 return 4 + (slot - VARYING_SLOT_VAR0);
214 unreachable("illegal slot in get unique index\n");
215 }
216
217 static void set_llvm_calling_convention(LLVMValueRef func,
218 gl_shader_stage stage)
219 {
220 enum radeon_llvm_calling_convention calling_conv;
221
222 switch (stage) {
223 case MESA_SHADER_VERTEX:
224 case MESA_SHADER_TESS_EVAL:
225 calling_conv = RADEON_LLVM_AMDGPU_VS;
226 break;
227 case MESA_SHADER_GEOMETRY:
228 calling_conv = RADEON_LLVM_AMDGPU_GS;
229 break;
230 case MESA_SHADER_TESS_CTRL:
231 calling_conv = HAVE_LLVM >= 0x0500 ? RADEON_LLVM_AMDGPU_HS : RADEON_LLVM_AMDGPU_VS;
232 break;
233 case MESA_SHADER_FRAGMENT:
234 calling_conv = RADEON_LLVM_AMDGPU_PS;
235 break;
236 case MESA_SHADER_COMPUTE:
237 calling_conv = RADEON_LLVM_AMDGPU_CS;
238 break;
239 default:
240 unreachable("Unhandle shader type");
241 }
242
243 LLVMSetFunctionCallConv(func, calling_conv);
244 }
245
246 #define MAX_ARGS 23
247 struct arg_info {
248 LLVMTypeRef types[MAX_ARGS];
249 LLVMValueRef *assign[MAX_ARGS];
250 unsigned array_params_mask;
251 uint8_t count;
252 uint8_t sgpr_count;
253 uint8_t num_sgprs_used;
254 uint8_t num_vgprs_used;
255 };
256
257 enum ac_arg_regfile {
258 ARG_SGPR,
259 ARG_VGPR,
260 };
261
262 static void
263 add_arg(struct arg_info *info, enum ac_arg_regfile regfile, LLVMTypeRef type,
264 LLVMValueRef *param_ptr)
265 {
266 assert(info->count < MAX_ARGS);
267
268 info->assign[info->count] = param_ptr;
269 info->types[info->count] = type;
270 info->count++;
271
272 if (regfile == ARG_SGPR) {
273 info->num_sgprs_used += ac_get_type_size(type) / 4;
274 info->sgpr_count++;
275 } else {
276 assert(regfile == ARG_VGPR);
277 info->num_vgprs_used += ac_get_type_size(type) / 4;
278 }
279 }
280
281 static inline void
282 add_array_arg(struct arg_info *info, LLVMTypeRef type, LLVMValueRef *param_ptr)
283 {
284 info->array_params_mask |= (1 << info->count);
285 add_arg(info, ARG_SGPR, type, param_ptr);
286 }
287
288 static void assign_arguments(LLVMValueRef main_function,
289 struct arg_info *info)
290 {
291 unsigned i;
292 for (i = 0; i < info->count; i++) {
293 if (info->assign[i])
294 *info->assign[i] = LLVMGetParam(main_function, i);
295 }
296 }
297
298 static LLVMValueRef
299 create_llvm_function(LLVMContextRef ctx, LLVMModuleRef module,
300 LLVMBuilderRef builder, LLVMTypeRef *return_types,
301 unsigned num_return_elems,
302 struct arg_info *args,
303 unsigned max_workgroup_size,
304 bool unsafe_math)
305 {
306 LLVMTypeRef main_function_type, ret_type;
307 LLVMBasicBlockRef main_function_body;
308
309 if (num_return_elems)
310 ret_type = LLVMStructTypeInContext(ctx, return_types,
311 num_return_elems, true);
312 else
313 ret_type = LLVMVoidTypeInContext(ctx);
314
315 /* Setup the function */
316 main_function_type =
317 LLVMFunctionType(ret_type, args->types, args->count, 0);
318 LLVMValueRef main_function =
319 LLVMAddFunction(module, "main", main_function_type);
320 main_function_body =
321 LLVMAppendBasicBlockInContext(ctx, main_function, "main_body");
322 LLVMPositionBuilderAtEnd(builder, main_function_body);
323
324 LLVMSetFunctionCallConv(main_function, RADEON_LLVM_AMDGPU_CS);
325 for (unsigned i = 0; i < args->sgpr_count; ++i) {
326 if (args->array_params_mask & (1 << i)) {
327 LLVMValueRef P = LLVMGetParam(main_function, i);
328 ac_add_function_attr(ctx, main_function, i + 1, AC_FUNC_ATTR_BYVAL);
329 ac_add_attr_dereferenceable(P, UINT64_MAX);
330 }
331 else {
332 ac_add_function_attr(ctx, main_function, i + 1, AC_FUNC_ATTR_INREG);
333 }
334 }
335
336 if (max_workgroup_size) {
337 ac_llvm_add_target_dep_function_attr(main_function,
338 "amdgpu-max-work-group-size",
339 max_workgroup_size);
340 }
341 if (unsafe_math) {
342 /* These were copied from some LLVM test. */
343 LLVMAddTargetDependentFunctionAttr(main_function,
344 "less-precise-fpmad",
345 "true");
346 LLVMAddTargetDependentFunctionAttr(main_function,
347 "no-infs-fp-math",
348 "true");
349 LLVMAddTargetDependentFunctionAttr(main_function,
350 "no-nans-fp-math",
351 "true");
352 LLVMAddTargetDependentFunctionAttr(main_function,
353 "unsafe-fp-math",
354 "true");
355 }
356 return main_function;
357 }
358
359 static LLVMTypeRef const_array(LLVMTypeRef elem_type, int num_elements)
360 {
361 return LLVMPointerType(LLVMArrayType(elem_type, num_elements),
362 CONST_ADDR_SPACE);
363 }
364
365 static int get_elem_bits(struct ac_llvm_context *ctx, LLVMTypeRef type)
366 {
367 if (LLVMGetTypeKind(type) == LLVMVectorTypeKind)
368 type = LLVMGetElementType(type);
369
370 if (LLVMGetTypeKind(type) == LLVMIntegerTypeKind)
371 return LLVMGetIntTypeWidth(type);
372
373 if (type == ctx->f16)
374 return 16;
375 if (type == ctx->f32)
376 return 32;
377 if (type == ctx->f64)
378 return 64;
379
380 unreachable("Unhandled type kind in get_elem_bits");
381 }
382
383 static LLVMValueRef unpack_param(struct ac_llvm_context *ctx,
384 LLVMValueRef param, unsigned rshift,
385 unsigned bitwidth)
386 {
387 LLVMValueRef value = param;
388 if (rshift)
389 value = LLVMBuildLShr(ctx->builder, value,
390 LLVMConstInt(ctx->i32, rshift, false), "");
391
392 if (rshift + bitwidth < 32) {
393 unsigned mask = (1 << bitwidth) - 1;
394 value = LLVMBuildAnd(ctx->builder, value,
395 LLVMConstInt(ctx->i32, mask, false), "");
396 }
397 return value;
398 }
399
400 static LLVMValueRef get_rel_patch_id(struct nir_to_llvm_context *ctx)
401 {
402 switch (ctx->stage) {
403 case MESA_SHADER_TESS_CTRL:
404 return unpack_param(&ctx->ac, ctx->abi.tcs_rel_ids, 0, 8);
405 case MESA_SHADER_TESS_EVAL:
406 return ctx->tes_rel_patch_id;
407 break;
408 default:
409 unreachable("Illegal stage");
410 }
411 }
412
413 /* Tessellation shaders pass outputs to the next shader using LDS.
414 *
415 * LS outputs = TCS inputs
416 * TCS outputs = TES inputs
417 *
418 * The LDS layout is:
419 * - TCS inputs for patch 0
420 * - TCS inputs for patch 1
421 * - TCS inputs for patch 2 = get_tcs_in_current_patch_offset (if RelPatchID==2)
422 * - ...
423 * - TCS outputs for patch 0 = get_tcs_out_patch0_offset
424 * - Per-patch TCS outputs for patch 0 = get_tcs_out_patch0_patch_data_offset
425 * - TCS outputs for patch 1
426 * - Per-patch TCS outputs for patch 1
427 * - TCS outputs for patch 2 = get_tcs_out_current_patch_offset (if RelPatchID==2)
428 * - Per-patch TCS outputs for patch 2 = get_tcs_out_current_patch_data_offset (if RelPatchID==2)
429 * - ...
430 *
431 * All three shaders VS(LS), TCS, TES share the same LDS space.
432 */
433 static LLVMValueRef
434 get_tcs_in_patch_stride(struct nir_to_llvm_context *ctx)
435 {
436 if (ctx->stage == MESA_SHADER_VERTEX)
437 return unpack_param(&ctx->ac, ctx->ls_out_layout, 0, 13);
438 else if (ctx->stage == MESA_SHADER_TESS_CTRL)
439 return unpack_param(&ctx->ac, ctx->tcs_in_layout, 0, 13);
440 else {
441 assert(0);
442 return NULL;
443 }
444 }
445
446 static LLVMValueRef
447 get_tcs_out_patch_stride(struct nir_to_llvm_context *ctx)
448 {
449 return unpack_param(&ctx->ac, ctx->tcs_out_layout, 0, 13);
450 }
451
452 static LLVMValueRef
453 get_tcs_out_patch0_offset(struct nir_to_llvm_context *ctx)
454 {
455 return LLVMBuildMul(ctx->builder,
456 unpack_param(&ctx->ac, ctx->tcs_out_offsets, 0, 16),
457 LLVMConstInt(ctx->ac.i32, 4, false), "");
458 }
459
460 static LLVMValueRef
461 get_tcs_out_patch0_patch_data_offset(struct nir_to_llvm_context *ctx)
462 {
463 return LLVMBuildMul(ctx->builder,
464 unpack_param(&ctx->ac, ctx->tcs_out_offsets, 16, 16),
465 LLVMConstInt(ctx->ac.i32, 4, false), "");
466 }
467
468 static LLVMValueRef
469 get_tcs_in_current_patch_offset(struct nir_to_llvm_context *ctx)
470 {
471 LLVMValueRef patch_stride = get_tcs_in_patch_stride(ctx);
472 LLVMValueRef rel_patch_id = get_rel_patch_id(ctx);
473
474 return LLVMBuildMul(ctx->builder, patch_stride, rel_patch_id, "");
475 }
476
477 static LLVMValueRef
478 get_tcs_out_current_patch_offset(struct nir_to_llvm_context *ctx)
479 {
480 LLVMValueRef patch0_offset = get_tcs_out_patch0_offset(ctx);
481 LLVMValueRef patch_stride = get_tcs_out_patch_stride(ctx);
482 LLVMValueRef rel_patch_id = get_rel_patch_id(ctx);
483
484 return LLVMBuildAdd(ctx->builder, patch0_offset,
485 LLVMBuildMul(ctx->builder, patch_stride,
486 rel_patch_id, ""),
487 "");
488 }
489
490 static LLVMValueRef
491 get_tcs_out_current_patch_data_offset(struct nir_to_llvm_context *ctx)
492 {
493 LLVMValueRef patch0_patch_data_offset =
494 get_tcs_out_patch0_patch_data_offset(ctx);
495 LLVMValueRef patch_stride = get_tcs_out_patch_stride(ctx);
496 LLVMValueRef rel_patch_id = get_rel_patch_id(ctx);
497
498 return LLVMBuildAdd(ctx->builder, patch0_patch_data_offset,
499 LLVMBuildMul(ctx->builder, patch_stride,
500 rel_patch_id, ""),
501 "");
502 }
503
504 static void
505 set_loc(struct ac_userdata_info *ud_info, uint8_t *sgpr_idx, uint8_t num_sgprs,
506 uint32_t indirect_offset)
507 {
508 ud_info->sgpr_idx = *sgpr_idx;
509 ud_info->num_sgprs = num_sgprs;
510 ud_info->indirect = indirect_offset > 0;
511 ud_info->indirect_offset = indirect_offset;
512 *sgpr_idx += num_sgprs;
513 }
514
515 static void
516 set_loc_shader(struct nir_to_llvm_context *ctx, int idx, uint8_t *sgpr_idx,
517 uint8_t num_sgprs)
518 {
519 struct ac_userdata_info *ud_info =
520 &ctx->shader_info->user_sgprs_locs.shader_data[idx];
521 assert(ud_info);
522
523 set_loc(ud_info, sgpr_idx, num_sgprs, 0);
524 }
525
526 static void
527 set_loc_desc(struct nir_to_llvm_context *ctx, int idx, uint8_t *sgpr_idx,
528 uint32_t indirect_offset)
529 {
530 struct ac_userdata_info *ud_info =
531 &ctx->shader_info->user_sgprs_locs.descriptor_sets[idx];
532 assert(ud_info);
533
534 set_loc(ud_info, sgpr_idx, 2, indirect_offset);
535 }
536
537 struct user_sgpr_info {
538 bool need_ring_offsets;
539 uint8_t sgpr_count;
540 bool indirect_all_descriptor_sets;
541 };
542
543 static void allocate_user_sgprs(struct nir_to_llvm_context *ctx,
544 gl_shader_stage stage,
545 struct user_sgpr_info *user_sgpr_info)
546 {
547 memset(user_sgpr_info, 0, sizeof(struct user_sgpr_info));
548
549 /* until we sort out scratch/global buffers always assign ring offsets for gs/vs/es */
550 if (stage == MESA_SHADER_GEOMETRY ||
551 stage == MESA_SHADER_VERTEX ||
552 stage == MESA_SHADER_TESS_CTRL ||
553 stage == MESA_SHADER_TESS_EVAL ||
554 ctx->is_gs_copy_shader)
555 user_sgpr_info->need_ring_offsets = true;
556
557 if (stage == MESA_SHADER_FRAGMENT &&
558 ctx->shader_info->info.ps.needs_sample_positions)
559 user_sgpr_info->need_ring_offsets = true;
560
561 /* 2 user sgprs will nearly always be allocated for scratch/rings */
562 if (ctx->options->supports_spill || user_sgpr_info->need_ring_offsets) {
563 user_sgpr_info->sgpr_count += 2;
564 }
565
566 /* FIXME: fix the number of user sgprs for merged shaders on GFX9 */
567 switch (stage) {
568 case MESA_SHADER_COMPUTE:
569 if (ctx->shader_info->info.cs.uses_grid_size)
570 user_sgpr_info->sgpr_count += 3;
571 break;
572 case MESA_SHADER_FRAGMENT:
573 user_sgpr_info->sgpr_count += ctx->shader_info->info.ps.needs_sample_positions;
574 break;
575 case MESA_SHADER_VERTEX:
576 if (!ctx->is_gs_copy_shader) {
577 user_sgpr_info->sgpr_count += ctx->shader_info->info.vs.has_vertex_buffers ? 2 : 0;
578 if (ctx->shader_info->info.vs.needs_draw_id) {
579 user_sgpr_info->sgpr_count += 3;
580 } else {
581 user_sgpr_info->sgpr_count += 2;
582 }
583 }
584 if (ctx->options->key.vs.as_ls)
585 user_sgpr_info->sgpr_count++;
586 break;
587 case MESA_SHADER_TESS_CTRL:
588 user_sgpr_info->sgpr_count += 4;
589 break;
590 case MESA_SHADER_TESS_EVAL:
591 user_sgpr_info->sgpr_count += 1;
592 break;
593 case MESA_SHADER_GEOMETRY:
594 user_sgpr_info->sgpr_count += 2;
595 break;
596 default:
597 break;
598 }
599
600 if (ctx->shader_info->info.loads_push_constants)
601 user_sgpr_info->sgpr_count += 2;
602
603 uint32_t available_sgprs = ctx->options->chip_class >= GFX9 ? 32 : 16;
604 uint32_t remaining_sgprs = available_sgprs - user_sgpr_info->sgpr_count;
605
606 if (remaining_sgprs / 2 < util_bitcount(ctx->shader_info->info.desc_set_used_mask)) {
607 user_sgpr_info->sgpr_count += 2;
608 user_sgpr_info->indirect_all_descriptor_sets = true;
609 } else {
610 user_sgpr_info->sgpr_count += util_bitcount(ctx->shader_info->info.desc_set_used_mask) * 2;
611 }
612 }
613
614 static void
615 declare_global_input_sgprs(struct nir_to_llvm_context *ctx,
616 gl_shader_stage stage,
617 bool has_previous_stage,
618 gl_shader_stage previous_stage,
619 const struct user_sgpr_info *user_sgpr_info,
620 struct arg_info *args,
621 LLVMValueRef *desc_sets)
622 {
623 LLVMTypeRef type = const_array(ctx->ac.i8, 1024 * 1024);
624 unsigned num_sets = ctx->options->layout ?
625 ctx->options->layout->num_sets : 0;
626 unsigned stage_mask = 1 << stage;
627
628 if (has_previous_stage)
629 stage_mask |= 1 << previous_stage;
630
631 /* 1 for each descriptor set */
632 if (!user_sgpr_info->indirect_all_descriptor_sets) {
633 for (unsigned i = 0; i < num_sets; ++i) {
634 if (ctx->options->layout->set[i].layout->shader_stages & stage_mask) {
635 add_array_arg(args, type,
636 &ctx->descriptor_sets[i]);
637 }
638 }
639 } else {
640 add_array_arg(args, const_array(type, 32), desc_sets);
641 }
642
643 if (ctx->shader_info->info.loads_push_constants) {
644 /* 1 for push constants and dynamic descriptors */
645 add_array_arg(args, type, &ctx->push_constants);
646 }
647 }
648
649 static void
650 declare_vs_specific_input_sgprs(struct nir_to_llvm_context *ctx,
651 gl_shader_stage stage,
652 bool has_previous_stage,
653 gl_shader_stage previous_stage,
654 struct arg_info *args)
655 {
656 if (!ctx->is_gs_copy_shader &&
657 (stage == MESA_SHADER_VERTEX ||
658 (has_previous_stage && previous_stage == MESA_SHADER_VERTEX))) {
659 if (ctx->shader_info->info.vs.has_vertex_buffers) {
660 add_arg(args, ARG_SGPR, const_array(ctx->ac.v4i32, 16),
661 &ctx->vertex_buffers);
662 }
663 add_arg(args, ARG_SGPR, ctx->ac.i32, &ctx->abi.base_vertex);
664 add_arg(args, ARG_SGPR, ctx->ac.i32, &ctx->abi.start_instance);
665 if (ctx->shader_info->info.vs.needs_draw_id) {
666 add_arg(args, ARG_SGPR, ctx->ac.i32, &ctx->abi.draw_id);
667 }
668 }
669 }
670
671 static void
672 declare_vs_input_vgprs(struct nir_to_llvm_context *ctx, struct arg_info *args)
673 {
674 add_arg(args, ARG_VGPR, ctx->ac.i32, &ctx->abi.vertex_id);
675 if (!ctx->is_gs_copy_shader) {
676 if (ctx->options->key.vs.as_ls) {
677 add_arg(args, ARG_VGPR, ctx->ac.i32, &ctx->rel_auto_id);
678 add_arg(args, ARG_VGPR, ctx->ac.i32, &ctx->abi.instance_id);
679 } else {
680 add_arg(args, ARG_VGPR, ctx->ac.i32, &ctx->abi.instance_id);
681 add_arg(args, ARG_VGPR, ctx->ac.i32, &ctx->vs_prim_id);
682 }
683 add_arg(args, ARG_VGPR, ctx->ac.i32, NULL); /* unused */
684 }
685 }
686
687 static void
688 declare_tes_input_vgprs(struct nir_to_llvm_context *ctx, struct arg_info *args)
689 {
690 add_arg(args, ARG_VGPR, ctx->ac.f32, &ctx->tes_u);
691 add_arg(args, ARG_VGPR, ctx->ac.f32, &ctx->tes_v);
692 add_arg(args, ARG_VGPR, ctx->ac.i32, &ctx->tes_rel_patch_id);
693 add_arg(args, ARG_VGPR, ctx->ac.i32, &ctx->abi.tes_patch_id);
694 }
695
696 static void
697 set_global_input_locs(struct nir_to_llvm_context *ctx, gl_shader_stage stage,
698 bool has_previous_stage, gl_shader_stage previous_stage,
699 const struct user_sgpr_info *user_sgpr_info,
700 LLVMValueRef desc_sets, uint8_t *user_sgpr_idx)
701 {
702 unsigned num_sets = ctx->options->layout ?
703 ctx->options->layout->num_sets : 0;
704 unsigned stage_mask = 1 << stage;
705
706 if (has_previous_stage)
707 stage_mask |= 1 << previous_stage;
708
709 if (!user_sgpr_info->indirect_all_descriptor_sets) {
710 for (unsigned i = 0; i < num_sets; ++i) {
711 if (ctx->options->layout->set[i].layout->shader_stages & stage_mask) {
712 set_loc_desc(ctx, i, user_sgpr_idx, 0);
713 } else
714 ctx->descriptor_sets[i] = NULL;
715 }
716 } else {
717 set_loc_shader(ctx, AC_UD_INDIRECT_DESCRIPTOR_SETS,
718 user_sgpr_idx, 2);
719
720 for (unsigned i = 0; i < num_sets; ++i) {
721 if (ctx->options->layout->set[i].layout->shader_stages & stage_mask) {
722 set_loc_desc(ctx, i, user_sgpr_idx, i * 8);
723 ctx->descriptor_sets[i] =
724 ac_build_load_to_sgpr(&ctx->ac,
725 desc_sets,
726 LLVMConstInt(ctx->ac.i32, i, false));
727
728 } else
729 ctx->descriptor_sets[i] = NULL;
730 }
731 ctx->shader_info->need_indirect_descriptor_sets = true;
732 }
733
734 if (ctx->shader_info->info.loads_push_constants) {
735 set_loc_shader(ctx, AC_UD_PUSH_CONSTANTS, user_sgpr_idx, 2);
736 }
737 }
738
739 static void
740 set_vs_specific_input_locs(struct nir_to_llvm_context *ctx,
741 gl_shader_stage stage, bool has_previous_stage,
742 gl_shader_stage previous_stage,
743 uint8_t *user_sgpr_idx)
744 {
745 if (!ctx->is_gs_copy_shader &&
746 (stage == MESA_SHADER_VERTEX ||
747 (has_previous_stage && previous_stage == MESA_SHADER_VERTEX))) {
748 if (ctx->shader_info->info.vs.has_vertex_buffers) {
749 set_loc_shader(ctx, AC_UD_VS_VERTEX_BUFFERS,
750 user_sgpr_idx, 2);
751 }
752
753 unsigned vs_num = 2;
754 if (ctx->shader_info->info.vs.needs_draw_id)
755 vs_num++;
756
757 set_loc_shader(ctx, AC_UD_VS_BASE_VERTEX_START_INSTANCE,
758 user_sgpr_idx, vs_num);
759 }
760 }
761
762 static void create_function(struct nir_to_llvm_context *ctx,
763 gl_shader_stage stage,
764 bool has_previous_stage,
765 gl_shader_stage previous_stage)
766 {
767 uint8_t user_sgpr_idx;
768 struct user_sgpr_info user_sgpr_info;
769 struct arg_info args = {};
770 LLVMValueRef desc_sets;
771
772 allocate_user_sgprs(ctx, stage, &user_sgpr_info);
773
774 if (user_sgpr_info.need_ring_offsets && !ctx->options->supports_spill) {
775 add_arg(&args, ARG_SGPR, const_array(ctx->ac.v4i32, 16),
776 &ctx->ring_offsets);
777 }
778
779 switch (stage) {
780 case MESA_SHADER_COMPUTE:
781 declare_global_input_sgprs(ctx, stage, has_previous_stage,
782 previous_stage, &user_sgpr_info,
783 &args, &desc_sets);
784
785 if (ctx->shader_info->info.cs.uses_grid_size) {
786 add_arg(&args, ARG_SGPR, ctx->ac.v3i32,
787 &ctx->num_work_groups);
788 }
789
790 for (int i = 0; i < 3; i++) {
791 ctx->workgroup_ids[i] = NULL;
792 if (ctx->shader_info->info.cs.uses_block_id[i]) {
793 add_arg(&args, ARG_SGPR, ctx->ac.i32,
794 &ctx->workgroup_ids[i]);
795 }
796 }
797
798 if (ctx->shader_info->info.cs.uses_local_invocation_idx)
799 add_arg(&args, ARG_SGPR, ctx->ac.i32, &ctx->tg_size);
800 add_arg(&args, ARG_VGPR, ctx->ac.v3i32,
801 &ctx->local_invocation_ids);
802 break;
803 case MESA_SHADER_VERTEX:
804 declare_global_input_sgprs(ctx, stage, has_previous_stage,
805 previous_stage, &user_sgpr_info,
806 &args, &desc_sets);
807 declare_vs_specific_input_sgprs(ctx, stage, has_previous_stage,
808 previous_stage, &args);
809
810 if (ctx->shader_info->info.needs_multiview_view_index || (!ctx->options->key.vs.as_es && !ctx->options->key.vs.as_ls && ctx->options->key.has_multiview_view_index))
811 add_arg(&args, ARG_SGPR, ctx->ac.i32, &ctx->view_index);
812 if (ctx->options->key.vs.as_es)
813 add_arg(&args, ARG_SGPR, ctx->ac.i32,
814 &ctx->es2gs_offset);
815 else if (ctx->options->key.vs.as_ls)
816 add_arg(&args, ARG_SGPR, ctx->ac.i32,
817 &ctx->ls_out_layout);
818
819 declare_vs_input_vgprs(ctx, &args);
820 break;
821 case MESA_SHADER_TESS_CTRL:
822 if (has_previous_stage) {
823 // First 6 system regs
824 add_arg(&args, ARG_SGPR, ctx->ac.i32, &ctx->oc_lds);
825 add_arg(&args, ARG_SGPR, ctx->ac.i32,
826 &ctx->merged_wave_info);
827 add_arg(&args, ARG_SGPR, ctx->ac.i32,
828 &ctx->tess_factor_offset);
829
830 add_arg(&args, ARG_SGPR, ctx->ac.i32, NULL); // scratch offset
831 add_arg(&args, ARG_SGPR, ctx->ac.i32, NULL); // unknown
832 add_arg(&args, ARG_SGPR, ctx->ac.i32, NULL); // unknown
833
834 declare_global_input_sgprs(ctx, stage,
835 has_previous_stage,
836 previous_stage,
837 &user_sgpr_info, &args,
838 &desc_sets);
839 declare_vs_specific_input_sgprs(ctx, stage,
840 has_previous_stage,
841 previous_stage, &args);
842
843 add_arg(&args, ARG_SGPR, ctx->ac.i32,
844 &ctx->ls_out_layout);
845
846 add_arg(&args, ARG_SGPR, ctx->ac.i32,
847 &ctx->tcs_offchip_layout);
848 add_arg(&args, ARG_SGPR, ctx->ac.i32,
849 &ctx->tcs_out_offsets);
850 add_arg(&args, ARG_SGPR, ctx->ac.i32,
851 &ctx->tcs_out_layout);
852 add_arg(&args, ARG_SGPR, ctx->ac.i32,
853 &ctx->tcs_in_layout);
854 if (ctx->shader_info->info.needs_multiview_view_index)
855 add_arg(&args, ARG_SGPR, ctx->ac.i32,
856 &ctx->view_index);
857
858 add_arg(&args, ARG_VGPR, ctx->ac.i32,
859 &ctx->abi.tcs_patch_id);
860 add_arg(&args, ARG_VGPR, ctx->ac.i32,
861 &ctx->abi.tcs_rel_ids);
862
863 declare_vs_input_vgprs(ctx, &args);
864 } else {
865 declare_global_input_sgprs(ctx, stage,
866 has_previous_stage,
867 previous_stage,
868 &user_sgpr_info, &args,
869 &desc_sets);
870
871 add_arg(&args, ARG_SGPR, ctx->ac.i32,
872 &ctx->tcs_offchip_layout);
873 add_arg(&args, ARG_SGPR, ctx->ac.i32,
874 &ctx->tcs_out_offsets);
875 add_arg(&args, ARG_SGPR, ctx->ac.i32,
876 &ctx->tcs_out_layout);
877 add_arg(&args, ARG_SGPR, ctx->ac.i32,
878 &ctx->tcs_in_layout);
879 if (ctx->shader_info->info.needs_multiview_view_index)
880 add_arg(&args, ARG_SGPR, ctx->ac.i32,
881 &ctx->view_index);
882
883 add_arg(&args, ARG_SGPR, ctx->ac.i32, &ctx->oc_lds);
884 add_arg(&args, ARG_SGPR, ctx->ac.i32,
885 &ctx->tess_factor_offset);
886 add_arg(&args, ARG_VGPR, ctx->ac.i32,
887 &ctx->abi.tcs_patch_id);
888 add_arg(&args, ARG_VGPR, ctx->ac.i32,
889 &ctx->abi.tcs_rel_ids);
890 }
891 break;
892 case MESA_SHADER_TESS_EVAL:
893 declare_global_input_sgprs(ctx, stage, has_previous_stage,
894 previous_stage, &user_sgpr_info,
895 &args, &desc_sets);
896
897 add_arg(&args, ARG_SGPR, ctx->ac.i32, &ctx->tcs_offchip_layout);
898 if (ctx->shader_info->info.needs_multiview_view_index || (!ctx->options->key.tes.as_es && ctx->options->key.has_multiview_view_index))
899 add_arg(&args, ARG_SGPR, ctx->ac.i32, &ctx->view_index);
900
901 if (ctx->options->key.tes.as_es) {
902 add_arg(&args, ARG_SGPR, ctx->ac.i32, &ctx->oc_lds);
903 add_arg(&args, ARG_SGPR, ctx->ac.i32, NULL);
904 add_arg(&args, ARG_SGPR, ctx->ac.i32,
905 &ctx->es2gs_offset);
906 } else {
907 add_arg(&args, ARG_SGPR, ctx->ac.i32, NULL);
908 add_arg(&args, ARG_SGPR, ctx->ac.i32, &ctx->oc_lds);
909 }
910 declare_tes_input_vgprs(ctx, &args);
911 break;
912 case MESA_SHADER_GEOMETRY:
913 if (has_previous_stage) {
914 // First 6 system regs
915 add_arg(&args, ARG_SGPR, ctx->ac.i32,
916 &ctx->gs2vs_offset);
917 add_arg(&args, ARG_SGPR, ctx->ac.i32,
918 &ctx->merged_wave_info);
919 add_arg(&args, ARG_SGPR, ctx->ac.i32, &ctx->oc_lds);
920
921 add_arg(&args, ARG_SGPR, ctx->ac.i32, NULL); // scratch offset
922 add_arg(&args, ARG_SGPR, ctx->ac.i32, NULL); // unknown
923 add_arg(&args, ARG_SGPR, ctx->ac.i32, NULL); // unknown
924
925 declare_global_input_sgprs(ctx, stage,
926 has_previous_stage,
927 previous_stage,
928 &user_sgpr_info, &args,
929 &desc_sets);
930
931 if (previous_stage == MESA_SHADER_TESS_EVAL) {
932 add_arg(&args, ARG_SGPR, ctx->ac.i32,
933 &ctx->tcs_offchip_layout);
934 } else {
935 declare_vs_specific_input_sgprs(ctx, stage,
936 has_previous_stage,
937 previous_stage,
938 &args);
939 }
940
941 add_arg(&args, ARG_SGPR, ctx->ac.i32,
942 &ctx->gsvs_ring_stride);
943 add_arg(&args, ARG_SGPR, ctx->ac.i32,
944 &ctx->gsvs_num_entries);
945 if (ctx->shader_info->info.needs_multiview_view_index)
946 add_arg(&args, ARG_SGPR, ctx->ac.i32,
947 &ctx->view_index);
948
949 add_arg(&args, ARG_VGPR, ctx->ac.i32,
950 &ctx->gs_vtx_offset[0]);
951 add_arg(&args, ARG_VGPR, ctx->ac.i32,
952 &ctx->gs_vtx_offset[2]);
953 add_arg(&args, ARG_VGPR, ctx->ac.i32,
954 &ctx->abi.gs_prim_id);
955 add_arg(&args, ARG_VGPR, ctx->ac.i32,
956 &ctx->abi.gs_invocation_id);
957 add_arg(&args, ARG_VGPR, ctx->ac.i32,
958 &ctx->gs_vtx_offset[4]);
959
960 if (previous_stage == MESA_SHADER_VERTEX) {
961 declare_vs_input_vgprs(ctx, &args);
962 } else {
963 declare_tes_input_vgprs(ctx, &args);
964 }
965 } else {
966 declare_global_input_sgprs(ctx, stage,
967 has_previous_stage,
968 previous_stage,
969 &user_sgpr_info, &args,
970 &desc_sets);
971
972 add_arg(&args, ARG_SGPR, ctx->ac.i32,
973 &ctx->gsvs_ring_stride);
974 add_arg(&args, ARG_SGPR, ctx->ac.i32,
975 &ctx->gsvs_num_entries);
976 if (ctx->shader_info->info.needs_multiview_view_index)
977 add_arg(&args, ARG_SGPR, ctx->ac.i32,
978 &ctx->view_index);
979
980 add_arg(&args, ARG_SGPR, ctx->ac.i32, &ctx->gs2vs_offset);
981 add_arg(&args, ARG_SGPR, ctx->ac.i32, &ctx->gs_wave_id);
982 add_arg(&args, ARG_VGPR, ctx->ac.i32,
983 &ctx->gs_vtx_offset[0]);
984 add_arg(&args, ARG_VGPR, ctx->ac.i32,
985 &ctx->gs_vtx_offset[1]);
986 add_arg(&args, ARG_VGPR, ctx->ac.i32,
987 &ctx->abi.gs_prim_id);
988 add_arg(&args, ARG_VGPR, ctx->ac.i32,
989 &ctx->gs_vtx_offset[2]);
990 add_arg(&args, ARG_VGPR, ctx->ac.i32,
991 &ctx->gs_vtx_offset[3]);
992 add_arg(&args, ARG_VGPR, ctx->ac.i32,
993 &ctx->gs_vtx_offset[4]);
994 add_arg(&args, ARG_VGPR, ctx->ac.i32,
995 &ctx->gs_vtx_offset[5]);
996 add_arg(&args, ARG_VGPR, ctx->ac.i32,
997 &ctx->abi.gs_invocation_id);
998 }
999 break;
1000 case MESA_SHADER_FRAGMENT:
1001 declare_global_input_sgprs(ctx, stage, has_previous_stage,
1002 previous_stage, &user_sgpr_info,
1003 &args, &desc_sets);
1004
1005 if (ctx->shader_info->info.ps.needs_sample_positions)
1006 add_arg(&args, ARG_SGPR, ctx->ac.i32,
1007 &ctx->sample_pos_offset);
1008
1009 add_arg(&args, ARG_SGPR, ctx->ac.i32, &ctx->prim_mask);
1010 add_arg(&args, ARG_VGPR, ctx->ac.v2i32, &ctx->persp_sample);
1011 add_arg(&args, ARG_VGPR, ctx->ac.v2i32, &ctx->persp_center);
1012 add_arg(&args, ARG_VGPR, ctx->ac.v2i32, &ctx->persp_centroid);
1013 add_arg(&args, ARG_VGPR, ctx->ac.v3i32, NULL); /* persp pull model */
1014 add_arg(&args, ARG_VGPR, ctx->ac.v2i32, &ctx->linear_sample);
1015 add_arg(&args, ARG_VGPR, ctx->ac.v2i32, &ctx->linear_center);
1016 add_arg(&args, ARG_VGPR, ctx->ac.v2i32, &ctx->linear_centroid);
1017 add_arg(&args, ARG_VGPR, ctx->ac.f32, NULL); /* line stipple tex */
1018 add_arg(&args, ARG_VGPR, ctx->ac.f32, &ctx->abi.frag_pos[0]);
1019 add_arg(&args, ARG_VGPR, ctx->ac.f32, &ctx->abi.frag_pos[1]);
1020 add_arg(&args, ARG_VGPR, ctx->ac.f32, &ctx->abi.frag_pos[2]);
1021 add_arg(&args, ARG_VGPR, ctx->ac.f32, &ctx->abi.frag_pos[3]);
1022 add_arg(&args, ARG_VGPR, ctx->ac.i32, &ctx->abi.front_face);
1023 add_arg(&args, ARG_VGPR, ctx->ac.i32, &ctx->abi.ancillary);
1024 add_arg(&args, ARG_VGPR, ctx->ac.i32, &ctx->abi.sample_coverage);
1025 add_arg(&args, ARG_VGPR, ctx->ac.i32, NULL); /* fixed pt */
1026 break;
1027 default:
1028 unreachable("Shader stage not implemented");
1029 }
1030
1031 ctx->main_function = create_llvm_function(
1032 ctx->context, ctx->module, ctx->builder, NULL, 0, &args,
1033 ctx->max_workgroup_size,
1034 ctx->options->unsafe_math);
1035 set_llvm_calling_convention(ctx->main_function, stage);
1036
1037
1038 ctx->shader_info->num_input_vgprs = 0;
1039 ctx->shader_info->num_input_sgprs = ctx->options->supports_spill ? 2 : 0;
1040
1041 ctx->shader_info->num_input_sgprs += args.num_sgprs_used;
1042
1043 if (ctx->stage != MESA_SHADER_FRAGMENT)
1044 ctx->shader_info->num_input_vgprs = args.num_vgprs_used;
1045
1046 assign_arguments(ctx->main_function, &args);
1047
1048 user_sgpr_idx = 0;
1049
1050 if (ctx->options->supports_spill || user_sgpr_info.need_ring_offsets) {
1051 set_loc_shader(ctx, AC_UD_SCRATCH_RING_OFFSETS,
1052 &user_sgpr_idx, 2);
1053 if (ctx->options->supports_spill) {
1054 ctx->ring_offsets = ac_build_intrinsic(&ctx->ac, "llvm.amdgcn.implicit.buffer.ptr",
1055 LLVMPointerType(ctx->ac.i8, CONST_ADDR_SPACE),
1056 NULL, 0, AC_FUNC_ATTR_READNONE);
1057 ctx->ring_offsets = LLVMBuildBitCast(ctx->builder, ctx->ring_offsets,
1058 const_array(ctx->ac.v4i32, 16), "");
1059 }
1060 }
1061
1062 /* For merged shaders the user SGPRs start at 8, with 8 system SGPRs in front (including
1063 * the rw_buffers at s0/s1. With user SGPR0 = s8, lets restart the count from 0 */
1064 if (has_previous_stage)
1065 user_sgpr_idx = 0;
1066
1067 set_global_input_locs(ctx, stage, has_previous_stage, previous_stage,
1068 &user_sgpr_info, desc_sets, &user_sgpr_idx);
1069
1070 switch (stage) {
1071 case MESA_SHADER_COMPUTE:
1072 if (ctx->shader_info->info.cs.uses_grid_size) {
1073 set_loc_shader(ctx, AC_UD_CS_GRID_SIZE,
1074 &user_sgpr_idx, 3);
1075 }
1076 break;
1077 case MESA_SHADER_VERTEX:
1078 set_vs_specific_input_locs(ctx, stage, has_previous_stage,
1079 previous_stage, &user_sgpr_idx);
1080 if (ctx->view_index)
1081 set_loc_shader(ctx, AC_UD_VIEW_INDEX, &user_sgpr_idx, 1);
1082 if (ctx->options->key.vs.as_ls) {
1083 set_loc_shader(ctx, AC_UD_VS_LS_TCS_IN_LAYOUT,
1084 &user_sgpr_idx, 1);
1085 }
1086 if (ctx->options->key.vs.as_ls)
1087 ac_declare_lds_as_pointer(&ctx->ac);
1088 break;
1089 case MESA_SHADER_TESS_CTRL:
1090 set_vs_specific_input_locs(ctx, stage, has_previous_stage,
1091 previous_stage, &user_sgpr_idx);
1092 if (has_previous_stage)
1093 set_loc_shader(ctx, AC_UD_VS_LS_TCS_IN_LAYOUT,
1094 &user_sgpr_idx, 1);
1095 set_loc_shader(ctx, AC_UD_TCS_OFFCHIP_LAYOUT, &user_sgpr_idx, 4);
1096 if (ctx->view_index)
1097 set_loc_shader(ctx, AC_UD_VIEW_INDEX, &user_sgpr_idx, 1);
1098 ac_declare_lds_as_pointer(&ctx->ac);
1099 break;
1100 case MESA_SHADER_TESS_EVAL:
1101 set_loc_shader(ctx, AC_UD_TES_OFFCHIP_LAYOUT, &user_sgpr_idx, 1);
1102 if (ctx->view_index)
1103 set_loc_shader(ctx, AC_UD_VIEW_INDEX, &user_sgpr_idx, 1);
1104 break;
1105 case MESA_SHADER_GEOMETRY:
1106 if (has_previous_stage) {
1107 if (previous_stage == MESA_SHADER_VERTEX)
1108 set_vs_specific_input_locs(ctx, stage,
1109 has_previous_stage,
1110 previous_stage,
1111 &user_sgpr_idx);
1112 else
1113 set_loc_shader(ctx, AC_UD_TES_OFFCHIP_LAYOUT,
1114 &user_sgpr_idx, 1);
1115 }
1116 set_loc_shader(ctx, AC_UD_GS_VS_RING_STRIDE_ENTRIES,
1117 &user_sgpr_idx, 2);
1118 if (ctx->view_index)
1119 set_loc_shader(ctx, AC_UD_VIEW_INDEX, &user_sgpr_idx, 1);
1120 if (has_previous_stage)
1121 ac_declare_lds_as_pointer(&ctx->ac);
1122 break;
1123 case MESA_SHADER_FRAGMENT:
1124 if (ctx->shader_info->info.ps.needs_sample_positions) {
1125 set_loc_shader(ctx, AC_UD_PS_SAMPLE_POS_OFFSET,
1126 &user_sgpr_idx, 1);
1127 }
1128 break;
1129 default:
1130 unreachable("Shader stage not implemented");
1131 }
1132
1133 ctx->shader_info->num_user_sgprs = user_sgpr_idx;
1134 }
1135
1136 static LLVMValueRef trim_vector(struct ac_llvm_context *ctx,
1137 LLVMValueRef value, unsigned count)
1138 {
1139 unsigned num_components = ac_get_llvm_num_components(value);
1140 if (count == num_components)
1141 return value;
1142
1143 LLVMValueRef masks[] = {
1144 LLVMConstInt(ctx->i32, 0, false), LLVMConstInt(ctx->i32, 1, false),
1145 LLVMConstInt(ctx->i32, 2, false), LLVMConstInt(ctx->i32, 3, false)};
1146
1147 if (count == 1)
1148 return LLVMBuildExtractElement(ctx->builder, value, masks[0],
1149 "");
1150
1151 LLVMValueRef swizzle = LLVMConstVector(masks, count);
1152 return LLVMBuildShuffleVector(ctx->builder, value, value, swizzle, "");
1153 }
1154
1155 static void
1156 build_store_values_extended(struct ac_llvm_context *ac,
1157 LLVMValueRef *values,
1158 unsigned value_count,
1159 unsigned value_stride,
1160 LLVMValueRef vec)
1161 {
1162 LLVMBuilderRef builder = ac->builder;
1163 unsigned i;
1164
1165 for (i = 0; i < value_count; i++) {
1166 LLVMValueRef ptr = values[i * value_stride];
1167 LLVMValueRef index = LLVMConstInt(ac->i32, i, false);
1168 LLVMValueRef value = LLVMBuildExtractElement(builder, vec, index, "");
1169 LLVMBuildStore(builder, value, ptr);
1170 }
1171 }
1172
1173 static LLVMTypeRef get_def_type(struct ac_nir_context *ctx,
1174 const nir_ssa_def *def)
1175 {
1176 LLVMTypeRef type = LLVMIntTypeInContext(ctx->ac.context, def->bit_size);
1177 if (def->num_components > 1) {
1178 type = LLVMVectorType(type, def->num_components);
1179 }
1180 return type;
1181 }
1182
1183 static LLVMValueRef get_src(struct ac_nir_context *nir, nir_src src)
1184 {
1185 assert(src.is_ssa);
1186 struct hash_entry *entry = _mesa_hash_table_search(nir->defs, src.ssa);
1187 return (LLVMValueRef)entry->data;
1188 }
1189
1190
1191 static LLVMBasicBlockRef get_block(struct ac_nir_context *nir,
1192 const struct nir_block *b)
1193 {
1194 struct hash_entry *entry = _mesa_hash_table_search(nir->defs, b);
1195 return (LLVMBasicBlockRef)entry->data;
1196 }
1197
1198 static LLVMValueRef get_alu_src(struct ac_nir_context *ctx,
1199 nir_alu_src src,
1200 unsigned num_components)
1201 {
1202 LLVMValueRef value = get_src(ctx, src.src);
1203 bool need_swizzle = false;
1204
1205 assert(value);
1206 LLVMTypeRef type = LLVMTypeOf(value);
1207 unsigned src_components = LLVMGetTypeKind(type) == LLVMVectorTypeKind
1208 ? LLVMGetVectorSize(type)
1209 : 1;
1210
1211 for (unsigned i = 0; i < num_components; ++i) {
1212 assert(src.swizzle[i] < src_components);
1213 if (src.swizzle[i] != i)
1214 need_swizzle = true;
1215 }
1216
1217 if (need_swizzle || num_components != src_components) {
1218 LLVMValueRef masks[] = {
1219 LLVMConstInt(ctx->ac.i32, src.swizzle[0], false),
1220 LLVMConstInt(ctx->ac.i32, src.swizzle[1], false),
1221 LLVMConstInt(ctx->ac.i32, src.swizzle[2], false),
1222 LLVMConstInt(ctx->ac.i32, src.swizzle[3], false)};
1223
1224 if (src_components > 1 && num_components == 1) {
1225 value = LLVMBuildExtractElement(ctx->ac.builder, value,
1226 masks[0], "");
1227 } else if (src_components == 1 && num_components > 1) {
1228 LLVMValueRef values[] = {value, value, value, value};
1229 value = ac_build_gather_values(&ctx->ac, values, num_components);
1230 } else {
1231 LLVMValueRef swizzle = LLVMConstVector(masks, num_components);
1232 value = LLVMBuildShuffleVector(ctx->ac.builder, value, value,
1233 swizzle, "");
1234 }
1235 }
1236 assert(!src.negate);
1237 assert(!src.abs);
1238 return value;
1239 }
1240
1241 static LLVMValueRef emit_int_cmp(struct ac_llvm_context *ctx,
1242 LLVMIntPredicate pred, LLVMValueRef src0,
1243 LLVMValueRef src1)
1244 {
1245 LLVMValueRef result = LLVMBuildICmp(ctx->builder, pred, src0, src1, "");
1246 return LLVMBuildSelect(ctx->builder, result,
1247 LLVMConstInt(ctx->i32, 0xFFFFFFFF, false),
1248 ctx->i32_0, "");
1249 }
1250
1251 static LLVMValueRef emit_float_cmp(struct ac_llvm_context *ctx,
1252 LLVMRealPredicate pred, LLVMValueRef src0,
1253 LLVMValueRef src1)
1254 {
1255 LLVMValueRef result;
1256 src0 = ac_to_float(ctx, src0);
1257 src1 = ac_to_float(ctx, src1);
1258 result = LLVMBuildFCmp(ctx->builder, pred, src0, src1, "");
1259 return LLVMBuildSelect(ctx->builder, result,
1260 LLVMConstInt(ctx->i32, 0xFFFFFFFF, false),
1261 ctx->i32_0, "");
1262 }
1263
1264 static LLVMValueRef emit_intrin_1f_param(struct ac_llvm_context *ctx,
1265 const char *intrin,
1266 LLVMTypeRef result_type,
1267 LLVMValueRef src0)
1268 {
1269 char name[64];
1270 LLVMValueRef params[] = {
1271 ac_to_float(ctx, src0),
1272 };
1273
1274 MAYBE_UNUSED const int length = snprintf(name, sizeof(name), "%s.f%d", intrin,
1275 get_elem_bits(ctx, result_type));
1276 assert(length < sizeof(name));
1277 return ac_build_intrinsic(ctx, name, result_type, params, 1, AC_FUNC_ATTR_READNONE);
1278 }
1279
1280 static LLVMValueRef emit_intrin_2f_param(struct ac_llvm_context *ctx,
1281 const char *intrin,
1282 LLVMTypeRef result_type,
1283 LLVMValueRef src0, LLVMValueRef src1)
1284 {
1285 char name[64];
1286 LLVMValueRef params[] = {
1287 ac_to_float(ctx, src0),
1288 ac_to_float(ctx, src1),
1289 };
1290
1291 MAYBE_UNUSED const int length = snprintf(name, sizeof(name), "%s.f%d", intrin,
1292 get_elem_bits(ctx, result_type));
1293 assert(length < sizeof(name));
1294 return ac_build_intrinsic(ctx, name, result_type, params, 2, AC_FUNC_ATTR_READNONE);
1295 }
1296
1297 static LLVMValueRef emit_intrin_3f_param(struct ac_llvm_context *ctx,
1298 const char *intrin,
1299 LLVMTypeRef result_type,
1300 LLVMValueRef src0, LLVMValueRef src1, LLVMValueRef src2)
1301 {
1302 char name[64];
1303 LLVMValueRef params[] = {
1304 ac_to_float(ctx, src0),
1305 ac_to_float(ctx, src1),
1306 ac_to_float(ctx, src2),
1307 };
1308
1309 MAYBE_UNUSED const int length = snprintf(name, sizeof(name), "%s.f%d", intrin,
1310 get_elem_bits(ctx, result_type));
1311 assert(length < sizeof(name));
1312 return ac_build_intrinsic(ctx, name, result_type, params, 3, AC_FUNC_ATTR_READNONE);
1313 }
1314
1315 static LLVMValueRef emit_bcsel(struct ac_llvm_context *ctx,
1316 LLVMValueRef src0, LLVMValueRef src1, LLVMValueRef src2)
1317 {
1318 LLVMValueRef v = LLVMBuildICmp(ctx->builder, LLVMIntNE, src0,
1319 ctx->i32_0, "");
1320 return LLVMBuildSelect(ctx->builder, v, src1, src2, "");
1321 }
1322
1323 static LLVMValueRef emit_minmax_int(struct ac_llvm_context *ctx,
1324 LLVMIntPredicate pred,
1325 LLVMValueRef src0, LLVMValueRef src1)
1326 {
1327 return LLVMBuildSelect(ctx->builder,
1328 LLVMBuildICmp(ctx->builder, pred, src0, src1, ""),
1329 src0,
1330 src1, "");
1331
1332 }
1333 static LLVMValueRef emit_iabs(struct ac_llvm_context *ctx,
1334 LLVMValueRef src0)
1335 {
1336 return emit_minmax_int(ctx, LLVMIntSGT, src0,
1337 LLVMBuildNeg(ctx->builder, src0, ""));
1338 }
1339
1340 static LLVMValueRef emit_fsign(struct ac_llvm_context *ctx,
1341 LLVMValueRef src0,
1342 unsigned bitsize)
1343 {
1344 LLVMValueRef cmp, val, zero, one;
1345 LLVMTypeRef type;
1346
1347 if (bitsize == 32) {
1348 type = ctx->f32;
1349 zero = ctx->f32_0;
1350 one = ctx->f32_1;
1351 } else {
1352 type = ctx->f64;
1353 zero = ctx->f64_0;
1354 one = ctx->f64_1;
1355 }
1356
1357 cmp = LLVMBuildFCmp(ctx->builder, LLVMRealOGT, src0, zero, "");
1358 val = LLVMBuildSelect(ctx->builder, cmp, one, src0, "");
1359 cmp = LLVMBuildFCmp(ctx->builder, LLVMRealOGE, val, zero, "");
1360 val = LLVMBuildSelect(ctx->builder, cmp, val, LLVMConstReal(type, -1.0), "");
1361 return val;
1362 }
1363
1364 static LLVMValueRef emit_isign(struct ac_llvm_context *ctx,
1365 LLVMValueRef src0)
1366 {
1367 LLVMValueRef cmp, val;
1368
1369 cmp = LLVMBuildICmp(ctx->builder, LLVMIntSGT, src0, ctx->i32_0, "");
1370 val = LLVMBuildSelect(ctx->builder, cmp, ctx->i32_1, src0, "");
1371 cmp = LLVMBuildICmp(ctx->builder, LLVMIntSGE, val, ctx->i32_0, "");
1372 val = LLVMBuildSelect(ctx->builder, cmp, val, LLVMConstInt(ctx->i32, -1, true), "");
1373 return val;
1374 }
1375
1376 static LLVMValueRef emit_ffract(struct ac_llvm_context *ctx,
1377 LLVMValueRef src0)
1378 {
1379 const char *intr = "llvm.floor.f32";
1380 LLVMValueRef fsrc0 = ac_to_float(ctx, src0);
1381 LLVMValueRef params[] = {
1382 fsrc0,
1383 };
1384 LLVMValueRef floor = ac_build_intrinsic(ctx, intr,
1385 ctx->f32, params, 1,
1386 AC_FUNC_ATTR_READNONE);
1387 return LLVMBuildFSub(ctx->builder, fsrc0, floor, "");
1388 }
1389
1390 static LLVMValueRef emit_uint_carry(struct ac_llvm_context *ctx,
1391 const char *intrin,
1392 LLVMValueRef src0, LLVMValueRef src1)
1393 {
1394 LLVMTypeRef ret_type;
1395 LLVMTypeRef types[] = { ctx->i32, ctx->i1 };
1396 LLVMValueRef res;
1397 LLVMValueRef params[] = { src0, src1 };
1398 ret_type = LLVMStructTypeInContext(ctx->context, types,
1399 2, true);
1400
1401 res = ac_build_intrinsic(ctx, intrin, ret_type,
1402 params, 2, AC_FUNC_ATTR_READNONE);
1403
1404 res = LLVMBuildExtractValue(ctx->builder, res, 1, "");
1405 res = LLVMBuildZExt(ctx->builder, res, ctx->i32, "");
1406 return res;
1407 }
1408
1409 static LLVMValueRef emit_b2f(struct ac_llvm_context *ctx,
1410 LLVMValueRef src0)
1411 {
1412 return LLVMBuildAnd(ctx->builder, src0, LLVMBuildBitCast(ctx->builder, LLVMConstReal(ctx->f32, 1.0), ctx->i32, ""), "");
1413 }
1414
1415 static LLVMValueRef emit_f2b(struct ac_llvm_context *ctx,
1416 LLVMValueRef src0)
1417 {
1418 src0 = ac_to_float(ctx, src0);
1419 return LLVMBuildSExt(ctx->builder,
1420 LLVMBuildFCmp(ctx->builder, LLVMRealUNE, src0, ctx->f32_0, ""),
1421 ctx->i32, "");
1422 }
1423
1424 static LLVMValueRef emit_b2i(struct ac_llvm_context *ctx,
1425 LLVMValueRef src0)
1426 {
1427 return LLVMBuildAnd(ctx->builder, src0, ctx->i32_1, "");
1428 }
1429
1430 static LLVMValueRef emit_i2b(struct ac_llvm_context *ctx,
1431 LLVMValueRef src0)
1432 {
1433 return LLVMBuildSExt(ctx->builder,
1434 LLVMBuildICmp(ctx->builder, LLVMIntNE, src0, ctx->i32_0, ""),
1435 ctx->i32, "");
1436 }
1437
1438 static LLVMValueRef emit_f2f16(struct nir_to_llvm_context *ctx,
1439 LLVMValueRef src0)
1440 {
1441 LLVMValueRef result;
1442 LLVMValueRef cond = NULL;
1443
1444 src0 = ac_to_float(&ctx->ac, src0);
1445 result = LLVMBuildFPTrunc(ctx->builder, src0, ctx->ac.f16, "");
1446
1447 if (ctx->options->chip_class >= VI) {
1448 LLVMValueRef args[2];
1449 /* Check if the result is a denormal - and flush to 0 if so. */
1450 args[0] = result;
1451 args[1] = LLVMConstInt(ctx->ac.i32, N_SUBNORMAL | P_SUBNORMAL, false);
1452 cond = ac_build_intrinsic(&ctx->ac, "llvm.amdgcn.class.f16", ctx->ac.i1, args, 2, AC_FUNC_ATTR_READNONE);
1453 }
1454
1455 /* need to convert back up to f32 */
1456 result = LLVMBuildFPExt(ctx->builder, result, ctx->ac.f32, "");
1457
1458 if (ctx->options->chip_class >= VI)
1459 result = LLVMBuildSelect(ctx->builder, cond, ctx->ac.f32_0, result, "");
1460 else {
1461 /* for SI/CIK */
1462 /* 0x38800000 is smallest half float value (2^-14) in 32-bit float,
1463 * so compare the result and flush to 0 if it's smaller.
1464 */
1465 LLVMValueRef temp, cond2;
1466 temp = emit_intrin_1f_param(&ctx->ac, "llvm.fabs",
1467 ctx->ac.f32, result);
1468 cond = LLVMBuildFCmp(ctx->builder, LLVMRealUGT,
1469 LLVMBuildBitCast(ctx->builder, LLVMConstInt(ctx->ac.i32, 0x38800000, false), ctx->ac.f32, ""),
1470 temp, "");
1471 cond2 = LLVMBuildFCmp(ctx->builder, LLVMRealUNE,
1472 temp, ctx->ac.f32_0, "");
1473 cond = LLVMBuildAnd(ctx->builder, cond, cond2, "");
1474 result = LLVMBuildSelect(ctx->builder, cond, ctx->ac.f32_0, result, "");
1475 }
1476 return result;
1477 }
1478
1479 static LLVMValueRef emit_umul_high(struct ac_llvm_context *ctx,
1480 LLVMValueRef src0, LLVMValueRef src1)
1481 {
1482 LLVMValueRef dst64, result;
1483 src0 = LLVMBuildZExt(ctx->builder, src0, ctx->i64, "");
1484 src1 = LLVMBuildZExt(ctx->builder, src1, ctx->i64, "");
1485
1486 dst64 = LLVMBuildMul(ctx->builder, src0, src1, "");
1487 dst64 = LLVMBuildLShr(ctx->builder, dst64, LLVMConstInt(ctx->i64, 32, false), "");
1488 result = LLVMBuildTrunc(ctx->builder, dst64, ctx->i32, "");
1489 return result;
1490 }
1491
1492 static LLVMValueRef emit_imul_high(struct ac_llvm_context *ctx,
1493 LLVMValueRef src0, LLVMValueRef src1)
1494 {
1495 LLVMValueRef dst64, result;
1496 src0 = LLVMBuildSExt(ctx->builder, src0, ctx->i64, "");
1497 src1 = LLVMBuildSExt(ctx->builder, src1, ctx->i64, "");
1498
1499 dst64 = LLVMBuildMul(ctx->builder, src0, src1, "");
1500 dst64 = LLVMBuildAShr(ctx->builder, dst64, LLVMConstInt(ctx->i64, 32, false), "");
1501 result = LLVMBuildTrunc(ctx->builder, dst64, ctx->i32, "");
1502 return result;
1503 }
1504
1505 static LLVMValueRef emit_bitfield_extract(struct ac_llvm_context *ctx,
1506 bool is_signed,
1507 const LLVMValueRef srcs[3])
1508 {
1509 LLVMValueRef result;
1510 LLVMValueRef icond = LLVMBuildICmp(ctx->builder, LLVMIntEQ, srcs[2], LLVMConstInt(ctx->i32, 32, false), "");
1511
1512 result = ac_build_bfe(ctx, srcs[0], srcs[1], srcs[2], is_signed);
1513 result = LLVMBuildSelect(ctx->builder, icond, srcs[0], result, "");
1514 return result;
1515 }
1516
1517 static LLVMValueRef emit_bitfield_insert(struct ac_llvm_context *ctx,
1518 LLVMValueRef src0, LLVMValueRef src1,
1519 LLVMValueRef src2, LLVMValueRef src3)
1520 {
1521 LLVMValueRef bfi_args[3], result;
1522
1523 bfi_args[0] = LLVMBuildShl(ctx->builder,
1524 LLVMBuildSub(ctx->builder,
1525 LLVMBuildShl(ctx->builder,
1526 ctx->i32_1,
1527 src3, ""),
1528 ctx->i32_1, ""),
1529 src2, "");
1530 bfi_args[1] = LLVMBuildShl(ctx->builder, src1, src2, "");
1531 bfi_args[2] = src0;
1532
1533 LLVMValueRef icond = LLVMBuildICmp(ctx->builder, LLVMIntEQ, src3, LLVMConstInt(ctx->i32, 32, false), "");
1534
1535 /* Calculate:
1536 * (arg0 & arg1) | (~arg0 & arg2) = arg2 ^ (arg0 & (arg1 ^ arg2)
1537 * Use the right-hand side, which the LLVM backend can convert to V_BFI.
1538 */
1539 result = LLVMBuildXor(ctx->builder, bfi_args[2],
1540 LLVMBuildAnd(ctx->builder, bfi_args[0],
1541 LLVMBuildXor(ctx->builder, bfi_args[1], bfi_args[2], ""), ""), "");
1542
1543 result = LLVMBuildSelect(ctx->builder, icond, src1, result, "");
1544 return result;
1545 }
1546
1547 static LLVMValueRef emit_pack_half_2x16(struct ac_llvm_context *ctx,
1548 LLVMValueRef src0)
1549 {
1550 LLVMValueRef comp[2];
1551
1552 src0 = ac_to_float(ctx, src0);
1553 comp[0] = LLVMBuildExtractElement(ctx->builder, src0, ctx->i32_0, "");
1554 comp[1] = LLVMBuildExtractElement(ctx->builder, src0, ctx->i32_1, "");
1555
1556 return ac_build_cvt_pkrtz_f16(ctx, comp);
1557 }
1558
1559 static LLVMValueRef emit_unpack_half_2x16(struct ac_llvm_context *ctx,
1560 LLVMValueRef src0)
1561 {
1562 LLVMValueRef const16 = LLVMConstInt(ctx->i32, 16, false);
1563 LLVMValueRef temps[2], result, val;
1564 int i;
1565
1566 for (i = 0; i < 2; i++) {
1567 val = i == 1 ? LLVMBuildLShr(ctx->builder, src0, const16, "") : src0;
1568 val = LLVMBuildTrunc(ctx->builder, val, ctx->i16, "");
1569 val = LLVMBuildBitCast(ctx->builder, val, ctx->f16, "");
1570 temps[i] = LLVMBuildFPExt(ctx->builder, val, ctx->f32, "");
1571 }
1572
1573 result = LLVMBuildInsertElement(ctx->builder, LLVMGetUndef(ctx->v2f32), temps[0],
1574 ctx->i32_0, "");
1575 result = LLVMBuildInsertElement(ctx->builder, result, temps[1],
1576 ctx->i32_1, "");
1577 return result;
1578 }
1579
1580 static LLVMValueRef emit_ddxy(struct ac_nir_context *ctx,
1581 nir_op op,
1582 LLVMValueRef src0)
1583 {
1584 unsigned mask;
1585 int idx;
1586 LLVMValueRef result;
1587
1588 if (op == nir_op_fddx_fine || op == nir_op_fddx)
1589 mask = AC_TID_MASK_LEFT;
1590 else if (op == nir_op_fddy_fine || op == nir_op_fddy)
1591 mask = AC_TID_MASK_TOP;
1592 else
1593 mask = AC_TID_MASK_TOP_LEFT;
1594
1595 /* for DDX we want to next X pixel, DDY next Y pixel. */
1596 if (op == nir_op_fddx_fine ||
1597 op == nir_op_fddx_coarse ||
1598 op == nir_op_fddx)
1599 idx = 1;
1600 else
1601 idx = 2;
1602
1603 result = ac_build_ddxy(&ctx->ac, mask, idx, src0);
1604 return result;
1605 }
1606
1607 /*
1608 * this takes an I,J coordinate pair,
1609 * and works out the X and Y derivatives.
1610 * it returns DDX(I), DDX(J), DDY(I), DDY(J).
1611 */
1612 static LLVMValueRef emit_ddxy_interp(
1613 struct ac_nir_context *ctx,
1614 LLVMValueRef interp_ij)
1615 {
1616 LLVMValueRef result[4], a;
1617 unsigned i;
1618
1619 for (i = 0; i < 2; i++) {
1620 a = LLVMBuildExtractElement(ctx->ac.builder, interp_ij,
1621 LLVMConstInt(ctx->ac.i32, i, false), "");
1622 result[i] = emit_ddxy(ctx, nir_op_fddx, a);
1623 result[2+i] = emit_ddxy(ctx, nir_op_fddy, a);
1624 }
1625 return ac_build_gather_values(&ctx->ac, result, 4);
1626 }
1627
1628 static void visit_alu(struct ac_nir_context *ctx, const nir_alu_instr *instr)
1629 {
1630 LLVMValueRef src[4], result = NULL;
1631 unsigned num_components = instr->dest.dest.ssa.num_components;
1632 unsigned src_components;
1633 LLVMTypeRef def_type = get_def_type(ctx, &instr->dest.dest.ssa);
1634
1635 assert(nir_op_infos[instr->op].num_inputs <= ARRAY_SIZE(src));
1636 switch (instr->op) {
1637 case nir_op_vec2:
1638 case nir_op_vec3:
1639 case nir_op_vec4:
1640 src_components = 1;
1641 break;
1642 case nir_op_pack_half_2x16:
1643 src_components = 2;
1644 break;
1645 case nir_op_unpack_half_2x16:
1646 src_components = 1;
1647 break;
1648 default:
1649 src_components = num_components;
1650 break;
1651 }
1652 for (unsigned i = 0; i < nir_op_infos[instr->op].num_inputs; i++)
1653 src[i] = get_alu_src(ctx, instr->src[i], src_components);
1654
1655 switch (instr->op) {
1656 case nir_op_fmov:
1657 case nir_op_imov:
1658 result = src[0];
1659 break;
1660 case nir_op_fneg:
1661 src[0] = ac_to_float(&ctx->ac, src[0]);
1662 result = LLVMBuildFNeg(ctx->ac.builder, src[0], "");
1663 break;
1664 case nir_op_ineg:
1665 result = LLVMBuildNeg(ctx->ac.builder, src[0], "");
1666 break;
1667 case nir_op_inot:
1668 result = LLVMBuildNot(ctx->ac.builder, src[0], "");
1669 break;
1670 case nir_op_iadd:
1671 result = LLVMBuildAdd(ctx->ac.builder, src[0], src[1], "");
1672 break;
1673 case nir_op_fadd:
1674 src[0] = ac_to_float(&ctx->ac, src[0]);
1675 src[1] = ac_to_float(&ctx->ac, src[1]);
1676 result = LLVMBuildFAdd(ctx->ac.builder, src[0], src[1], "");
1677 break;
1678 case nir_op_fsub:
1679 src[0] = ac_to_float(&ctx->ac, src[0]);
1680 src[1] = ac_to_float(&ctx->ac, src[1]);
1681 result = LLVMBuildFSub(ctx->ac.builder, src[0], src[1], "");
1682 break;
1683 case nir_op_isub:
1684 result = LLVMBuildSub(ctx->ac.builder, src[0], src[1], "");
1685 break;
1686 case nir_op_imul:
1687 result = LLVMBuildMul(ctx->ac.builder, src[0], src[1], "");
1688 break;
1689 case nir_op_imod:
1690 result = LLVMBuildSRem(ctx->ac.builder, src[0], src[1], "");
1691 break;
1692 case nir_op_umod:
1693 result = LLVMBuildURem(ctx->ac.builder, src[0], src[1], "");
1694 break;
1695 case nir_op_fmod:
1696 src[0] = ac_to_float(&ctx->ac, src[0]);
1697 src[1] = ac_to_float(&ctx->ac, src[1]);
1698 result = ac_build_fdiv(&ctx->ac, src[0], src[1]);
1699 result = emit_intrin_1f_param(&ctx->ac, "llvm.floor",
1700 ac_to_float_type(&ctx->ac, def_type), result);
1701 result = LLVMBuildFMul(ctx->ac.builder, src[1] , result, "");
1702 result = LLVMBuildFSub(ctx->ac.builder, src[0], result, "");
1703 break;
1704 case nir_op_frem:
1705 src[0] = ac_to_float(&ctx->ac, src[0]);
1706 src[1] = ac_to_float(&ctx->ac, src[1]);
1707 result = LLVMBuildFRem(ctx->ac.builder, src[0], src[1], "");
1708 break;
1709 case nir_op_irem:
1710 result = LLVMBuildSRem(ctx->ac.builder, src[0], src[1], "");
1711 break;
1712 case nir_op_idiv:
1713 result = LLVMBuildSDiv(ctx->ac.builder, src[0], src[1], "");
1714 break;
1715 case nir_op_udiv:
1716 result = LLVMBuildUDiv(ctx->ac.builder, src[0], src[1], "");
1717 break;
1718 case nir_op_fmul:
1719 src[0] = ac_to_float(&ctx->ac, src[0]);
1720 src[1] = ac_to_float(&ctx->ac, src[1]);
1721 result = LLVMBuildFMul(ctx->ac.builder, src[0], src[1], "");
1722 break;
1723 case nir_op_fdiv:
1724 src[0] = ac_to_float(&ctx->ac, src[0]);
1725 src[1] = ac_to_float(&ctx->ac, src[1]);
1726 result = ac_build_fdiv(&ctx->ac, src[0], src[1]);
1727 break;
1728 case nir_op_frcp:
1729 src[0] = ac_to_float(&ctx->ac, src[0]);
1730 result = ac_build_fdiv(&ctx->ac, instr->dest.dest.ssa.bit_size == 32 ? ctx->ac.f32_1 : ctx->ac.f64_1,
1731 src[0]);
1732 break;
1733 case nir_op_iand:
1734 result = LLVMBuildAnd(ctx->ac.builder, src[0], src[1], "");
1735 break;
1736 case nir_op_ior:
1737 result = LLVMBuildOr(ctx->ac.builder, src[0], src[1], "");
1738 break;
1739 case nir_op_ixor:
1740 result = LLVMBuildXor(ctx->ac.builder, src[0], src[1], "");
1741 break;
1742 case nir_op_ishl:
1743 result = LLVMBuildShl(ctx->ac.builder, src[0],
1744 LLVMBuildZExt(ctx->ac.builder, src[1],
1745 LLVMTypeOf(src[0]), ""),
1746 "");
1747 break;
1748 case nir_op_ishr:
1749 result = LLVMBuildAShr(ctx->ac.builder, src[0],
1750 LLVMBuildZExt(ctx->ac.builder, src[1],
1751 LLVMTypeOf(src[0]), ""),
1752 "");
1753 break;
1754 case nir_op_ushr:
1755 result = LLVMBuildLShr(ctx->ac.builder, src[0],
1756 LLVMBuildZExt(ctx->ac.builder, src[1],
1757 LLVMTypeOf(src[0]), ""),
1758 "");
1759 break;
1760 case nir_op_ilt:
1761 result = emit_int_cmp(&ctx->ac, LLVMIntSLT, src[0], src[1]);
1762 break;
1763 case nir_op_ine:
1764 result = emit_int_cmp(&ctx->ac, LLVMIntNE, src[0], src[1]);
1765 break;
1766 case nir_op_ieq:
1767 result = emit_int_cmp(&ctx->ac, LLVMIntEQ, src[0], src[1]);
1768 break;
1769 case nir_op_ige:
1770 result = emit_int_cmp(&ctx->ac, LLVMIntSGE, src[0], src[1]);
1771 break;
1772 case nir_op_ult:
1773 result = emit_int_cmp(&ctx->ac, LLVMIntULT, src[0], src[1]);
1774 break;
1775 case nir_op_uge:
1776 result = emit_int_cmp(&ctx->ac, LLVMIntUGE, src[0], src[1]);
1777 break;
1778 case nir_op_feq:
1779 result = emit_float_cmp(&ctx->ac, LLVMRealUEQ, src[0], src[1]);
1780 break;
1781 case nir_op_fne:
1782 result = emit_float_cmp(&ctx->ac, LLVMRealUNE, src[0], src[1]);
1783 break;
1784 case nir_op_flt:
1785 result = emit_float_cmp(&ctx->ac, LLVMRealULT, src[0], src[1]);
1786 break;
1787 case nir_op_fge:
1788 result = emit_float_cmp(&ctx->ac, LLVMRealUGE, src[0], src[1]);
1789 break;
1790 case nir_op_fabs:
1791 result = emit_intrin_1f_param(&ctx->ac, "llvm.fabs",
1792 ac_to_float_type(&ctx->ac, def_type), src[0]);
1793 break;
1794 case nir_op_iabs:
1795 result = emit_iabs(&ctx->ac, src[0]);
1796 break;
1797 case nir_op_imax:
1798 result = emit_minmax_int(&ctx->ac, LLVMIntSGT, src[0], src[1]);
1799 break;
1800 case nir_op_imin:
1801 result = emit_minmax_int(&ctx->ac, LLVMIntSLT, src[0], src[1]);
1802 break;
1803 case nir_op_umax:
1804 result = emit_minmax_int(&ctx->ac, LLVMIntUGT, src[0], src[1]);
1805 break;
1806 case nir_op_umin:
1807 result = emit_minmax_int(&ctx->ac, LLVMIntULT, src[0], src[1]);
1808 break;
1809 case nir_op_isign:
1810 result = emit_isign(&ctx->ac, src[0]);
1811 break;
1812 case nir_op_fsign:
1813 src[0] = ac_to_float(&ctx->ac, src[0]);
1814 result = emit_fsign(&ctx->ac, src[0], instr->dest.dest.ssa.bit_size);
1815 break;
1816 case nir_op_ffloor:
1817 result = emit_intrin_1f_param(&ctx->ac, "llvm.floor",
1818 ac_to_float_type(&ctx->ac, def_type), src[0]);
1819 break;
1820 case nir_op_ftrunc:
1821 result = emit_intrin_1f_param(&ctx->ac, "llvm.trunc",
1822 ac_to_float_type(&ctx->ac, def_type), src[0]);
1823 break;
1824 case nir_op_fceil:
1825 result = emit_intrin_1f_param(&ctx->ac, "llvm.ceil",
1826 ac_to_float_type(&ctx->ac, def_type), src[0]);
1827 break;
1828 case nir_op_fround_even:
1829 result = emit_intrin_1f_param(&ctx->ac, "llvm.rint",
1830 ac_to_float_type(&ctx->ac, def_type),src[0]);
1831 break;
1832 case nir_op_ffract:
1833 result = emit_ffract(&ctx->ac, src[0]);
1834 break;
1835 case nir_op_fsin:
1836 result = emit_intrin_1f_param(&ctx->ac, "llvm.sin",
1837 ac_to_float_type(&ctx->ac, def_type), src[0]);
1838 break;
1839 case nir_op_fcos:
1840 result = emit_intrin_1f_param(&ctx->ac, "llvm.cos",
1841 ac_to_float_type(&ctx->ac, def_type), src[0]);
1842 break;
1843 case nir_op_fsqrt:
1844 result = emit_intrin_1f_param(&ctx->ac, "llvm.sqrt",
1845 ac_to_float_type(&ctx->ac, def_type), src[0]);
1846 break;
1847 case nir_op_fexp2:
1848 result = emit_intrin_1f_param(&ctx->ac, "llvm.exp2",
1849 ac_to_float_type(&ctx->ac, def_type), src[0]);
1850 break;
1851 case nir_op_flog2:
1852 result = emit_intrin_1f_param(&ctx->ac, "llvm.log2",
1853 ac_to_float_type(&ctx->ac, def_type), src[0]);
1854 break;
1855 case nir_op_frsq:
1856 result = emit_intrin_1f_param(&ctx->ac, "llvm.sqrt",
1857 ac_to_float_type(&ctx->ac, def_type), src[0]);
1858 result = ac_build_fdiv(&ctx->ac, instr->dest.dest.ssa.bit_size == 32 ? ctx->ac.f32_1 : ctx->ac.f64_1,
1859 result);
1860 break;
1861 case nir_op_fpow:
1862 result = emit_intrin_2f_param(&ctx->ac, "llvm.pow",
1863 ac_to_float_type(&ctx->ac, def_type), src[0], src[1]);
1864 break;
1865 case nir_op_fmax:
1866 result = emit_intrin_2f_param(&ctx->ac, "llvm.maxnum",
1867 ac_to_float_type(&ctx->ac, def_type), src[0], src[1]);
1868 if (instr->dest.dest.ssa.bit_size == 32)
1869 result = emit_intrin_1f_param(&ctx->ac, "llvm.canonicalize",
1870 ac_to_float_type(&ctx->ac, def_type),
1871 result);
1872 break;
1873 case nir_op_fmin:
1874 result = emit_intrin_2f_param(&ctx->ac, "llvm.minnum",
1875 ac_to_float_type(&ctx->ac, def_type), src[0], src[1]);
1876 if (instr->dest.dest.ssa.bit_size == 32)
1877 result = emit_intrin_1f_param(&ctx->ac, "llvm.canonicalize",
1878 ac_to_float_type(&ctx->ac, def_type),
1879 result);
1880 break;
1881 case nir_op_ffma:
1882 result = emit_intrin_3f_param(&ctx->ac, "llvm.fmuladd",
1883 ac_to_float_type(&ctx->ac, def_type), src[0], src[1], src[2]);
1884 break;
1885 case nir_op_ibitfield_extract:
1886 result = emit_bitfield_extract(&ctx->ac, true, src);
1887 break;
1888 case nir_op_ubitfield_extract:
1889 result = emit_bitfield_extract(&ctx->ac, false, src);
1890 break;
1891 case nir_op_bitfield_insert:
1892 result = emit_bitfield_insert(&ctx->ac, src[0], src[1], src[2], src[3]);
1893 break;
1894 case nir_op_bitfield_reverse:
1895 result = ac_build_intrinsic(&ctx->ac, "llvm.bitreverse.i32", ctx->ac.i32, src, 1, AC_FUNC_ATTR_READNONE);
1896 break;
1897 case nir_op_bit_count:
1898 result = ac_build_intrinsic(&ctx->ac, "llvm.ctpop.i32", ctx->ac.i32, src, 1, AC_FUNC_ATTR_READNONE);
1899 break;
1900 case nir_op_vec2:
1901 case nir_op_vec3:
1902 case nir_op_vec4:
1903 for (unsigned i = 0; i < nir_op_infos[instr->op].num_inputs; i++)
1904 src[i] = ac_to_integer(&ctx->ac, src[i]);
1905 result = ac_build_gather_values(&ctx->ac, src, num_components);
1906 break;
1907 case nir_op_f2i32:
1908 case nir_op_f2i64:
1909 src[0] = ac_to_float(&ctx->ac, src[0]);
1910 result = LLVMBuildFPToSI(ctx->ac.builder, src[0], def_type, "");
1911 break;
1912 case nir_op_f2u32:
1913 case nir_op_f2u64:
1914 src[0] = ac_to_float(&ctx->ac, src[0]);
1915 result = LLVMBuildFPToUI(ctx->ac.builder, src[0], def_type, "");
1916 break;
1917 case nir_op_i2f32:
1918 case nir_op_i2f64:
1919 src[0] = ac_to_integer(&ctx->ac, src[0]);
1920 result = LLVMBuildSIToFP(ctx->ac.builder, src[0], ac_to_float_type(&ctx->ac, def_type), "");
1921 break;
1922 case nir_op_u2f32:
1923 case nir_op_u2f64:
1924 src[0] = ac_to_integer(&ctx->ac, src[0]);
1925 result = LLVMBuildUIToFP(ctx->ac.builder, src[0], ac_to_float_type(&ctx->ac, def_type), "");
1926 break;
1927 case nir_op_f2f64:
1928 src[0] = ac_to_float(&ctx->ac, src[0]);
1929 result = LLVMBuildFPExt(ctx->ac.builder, src[0], ac_to_float_type(&ctx->ac, def_type), "");
1930 break;
1931 case nir_op_f2f32:
1932 result = LLVMBuildFPTrunc(ctx->ac.builder, src[0], ac_to_float_type(&ctx->ac, def_type), "");
1933 break;
1934 case nir_op_u2u32:
1935 case nir_op_u2u64:
1936 src[0] = ac_to_integer(&ctx->ac, src[0]);
1937 if (get_elem_bits(&ctx->ac, LLVMTypeOf(src[0])) < get_elem_bits(&ctx->ac, def_type))
1938 result = LLVMBuildZExt(ctx->ac.builder, src[0], def_type, "");
1939 else
1940 result = LLVMBuildTrunc(ctx->ac.builder, src[0], def_type, "");
1941 break;
1942 case nir_op_i2i32:
1943 case nir_op_i2i64:
1944 src[0] = ac_to_integer(&ctx->ac, src[0]);
1945 if (get_elem_bits(&ctx->ac, LLVMTypeOf(src[0])) < get_elem_bits(&ctx->ac, def_type))
1946 result = LLVMBuildSExt(ctx->ac.builder, src[0], def_type, "");
1947 else
1948 result = LLVMBuildTrunc(ctx->ac.builder, src[0], def_type, "");
1949 break;
1950 case nir_op_bcsel:
1951 result = emit_bcsel(&ctx->ac, src[0], src[1], src[2]);
1952 break;
1953 case nir_op_find_lsb:
1954 src[0] = ac_to_integer(&ctx->ac, src[0]);
1955 result = ac_find_lsb(&ctx->ac, ctx->ac.i32, src[0]);
1956 break;
1957 case nir_op_ufind_msb:
1958 src[0] = ac_to_integer(&ctx->ac, src[0]);
1959 result = ac_build_umsb(&ctx->ac, src[0], ctx->ac.i32);
1960 break;
1961 case nir_op_ifind_msb:
1962 src[0] = ac_to_integer(&ctx->ac, src[0]);
1963 result = ac_build_imsb(&ctx->ac, src[0], ctx->ac.i32);
1964 break;
1965 case nir_op_uadd_carry:
1966 src[0] = ac_to_integer(&ctx->ac, src[0]);
1967 src[1] = ac_to_integer(&ctx->ac, src[1]);
1968 result = emit_uint_carry(&ctx->ac, "llvm.uadd.with.overflow.i32", src[0], src[1]);
1969 break;
1970 case nir_op_usub_borrow:
1971 src[0] = ac_to_integer(&ctx->ac, src[0]);
1972 src[1] = ac_to_integer(&ctx->ac, src[1]);
1973 result = emit_uint_carry(&ctx->ac, "llvm.usub.with.overflow.i32", src[0], src[1]);
1974 break;
1975 case nir_op_b2f:
1976 result = emit_b2f(&ctx->ac, src[0]);
1977 break;
1978 case nir_op_f2b:
1979 result = emit_f2b(&ctx->ac, src[0]);
1980 break;
1981 case nir_op_b2i:
1982 result = emit_b2i(&ctx->ac, src[0]);
1983 break;
1984 case nir_op_i2b:
1985 src[0] = ac_to_integer(&ctx->ac, src[0]);
1986 result = emit_i2b(&ctx->ac, src[0]);
1987 break;
1988 case nir_op_fquantize2f16:
1989 result = emit_f2f16(ctx->nctx, src[0]);
1990 break;
1991 case nir_op_umul_high:
1992 src[0] = ac_to_integer(&ctx->ac, src[0]);
1993 src[1] = ac_to_integer(&ctx->ac, src[1]);
1994 result = emit_umul_high(&ctx->ac, src[0], src[1]);
1995 break;
1996 case nir_op_imul_high:
1997 src[0] = ac_to_integer(&ctx->ac, src[0]);
1998 src[1] = ac_to_integer(&ctx->ac, src[1]);
1999 result = emit_imul_high(&ctx->ac, src[0], src[1]);
2000 break;
2001 case nir_op_pack_half_2x16:
2002 result = emit_pack_half_2x16(&ctx->ac, src[0]);
2003 break;
2004 case nir_op_unpack_half_2x16:
2005 result = emit_unpack_half_2x16(&ctx->ac, src[0]);
2006 break;
2007 case nir_op_fddx:
2008 case nir_op_fddy:
2009 case nir_op_fddx_fine:
2010 case nir_op_fddy_fine:
2011 case nir_op_fddx_coarse:
2012 case nir_op_fddy_coarse:
2013 result = emit_ddxy(ctx, instr->op, src[0]);
2014 break;
2015
2016 case nir_op_unpack_64_2x32_split_x: {
2017 assert(instr->src[0].src.ssa->num_components == 1);
2018 LLVMValueRef tmp = LLVMBuildBitCast(ctx->ac.builder, src[0],
2019 ctx->ac.v2i32,
2020 "");
2021 result = LLVMBuildExtractElement(ctx->ac.builder, tmp,
2022 ctx->ac.i32_0, "");
2023 break;
2024 }
2025
2026 case nir_op_unpack_64_2x32_split_y: {
2027 assert(instr->src[0].src.ssa->num_components == 1);
2028 LLVMValueRef tmp = LLVMBuildBitCast(ctx->ac.builder, src[0],
2029 ctx->ac.v2i32,
2030 "");
2031 result = LLVMBuildExtractElement(ctx->ac.builder, tmp,
2032 ctx->ac.i32_1, "");
2033 break;
2034 }
2035
2036 case nir_op_pack_64_2x32_split: {
2037 LLVMValueRef tmp = LLVMGetUndef(ctx->ac.v2i32);
2038 tmp = LLVMBuildInsertElement(ctx->ac.builder, tmp,
2039 src[0], ctx->ac.i32_0, "");
2040 tmp = LLVMBuildInsertElement(ctx->ac.builder, tmp,
2041 src[1], ctx->ac.i32_1, "");
2042 result = LLVMBuildBitCast(ctx->ac.builder, tmp, ctx->ac.i64, "");
2043 break;
2044 }
2045
2046 default:
2047 fprintf(stderr, "Unknown NIR alu instr: ");
2048 nir_print_instr(&instr->instr, stderr);
2049 fprintf(stderr, "\n");
2050 abort();
2051 }
2052
2053 if (result) {
2054 assert(instr->dest.dest.is_ssa);
2055 result = ac_to_integer(&ctx->ac, result);
2056 _mesa_hash_table_insert(ctx->defs, &instr->dest.dest.ssa,
2057 result);
2058 }
2059 }
2060
2061 static void visit_load_const(struct ac_nir_context *ctx,
2062 const nir_load_const_instr *instr)
2063 {
2064 LLVMValueRef values[4], value = NULL;
2065 LLVMTypeRef element_type =
2066 LLVMIntTypeInContext(ctx->ac.context, instr->def.bit_size);
2067
2068 for (unsigned i = 0; i < instr->def.num_components; ++i) {
2069 switch (instr->def.bit_size) {
2070 case 32:
2071 values[i] = LLVMConstInt(element_type,
2072 instr->value.u32[i], false);
2073 break;
2074 case 64:
2075 values[i] = LLVMConstInt(element_type,
2076 instr->value.u64[i], false);
2077 break;
2078 default:
2079 fprintf(stderr,
2080 "unsupported nir load_const bit_size: %d\n",
2081 instr->def.bit_size);
2082 abort();
2083 }
2084 }
2085 if (instr->def.num_components > 1) {
2086 value = LLVMConstVector(values, instr->def.num_components);
2087 } else
2088 value = values[0];
2089
2090 _mesa_hash_table_insert(ctx->defs, &instr->def, value);
2091 }
2092
2093 static LLVMValueRef cast_ptr(struct nir_to_llvm_context *ctx, LLVMValueRef ptr,
2094 LLVMTypeRef type)
2095 {
2096 int addr_space = LLVMGetPointerAddressSpace(LLVMTypeOf(ptr));
2097 return LLVMBuildBitCast(ctx->builder, ptr,
2098 LLVMPointerType(type, addr_space), "");
2099 }
2100
2101 static LLVMValueRef
2102 get_buffer_size(struct ac_nir_context *ctx, LLVMValueRef descriptor, bool in_elements)
2103 {
2104 LLVMValueRef size =
2105 LLVMBuildExtractElement(ctx->ac.builder, descriptor,
2106 LLVMConstInt(ctx->ac.i32, 2, false), "");
2107
2108 /* VI only */
2109 if (ctx->ac.chip_class == VI && in_elements) {
2110 /* On VI, the descriptor contains the size in bytes,
2111 * but TXQ must return the size in elements.
2112 * The stride is always non-zero for resources using TXQ.
2113 */
2114 LLVMValueRef stride =
2115 LLVMBuildExtractElement(ctx->ac.builder, descriptor,
2116 ctx->ac.i32_1, "");
2117 stride = LLVMBuildLShr(ctx->ac.builder, stride,
2118 LLVMConstInt(ctx->ac.i32, 16, false), "");
2119 stride = LLVMBuildAnd(ctx->ac.builder, stride,
2120 LLVMConstInt(ctx->ac.i32, 0x3fff, false), "");
2121
2122 size = LLVMBuildUDiv(ctx->ac.builder, size, stride, "");
2123 }
2124 return size;
2125 }
2126
2127 /**
2128 * Given the i32 or vNi32 \p type, generate the textual name (e.g. for use with
2129 * intrinsic names).
2130 */
2131 static void build_int_type_name(
2132 LLVMTypeRef type,
2133 char *buf, unsigned bufsize)
2134 {
2135 assert(bufsize >= 6);
2136
2137 if (LLVMGetTypeKind(type) == LLVMVectorTypeKind)
2138 snprintf(buf, bufsize, "v%ui32",
2139 LLVMGetVectorSize(type));
2140 else
2141 strcpy(buf, "i32");
2142 }
2143
2144 static LLVMValueRef radv_lower_gather4_integer(struct ac_llvm_context *ctx,
2145 struct ac_image_args *args,
2146 const nir_tex_instr *instr)
2147 {
2148 enum glsl_base_type stype = glsl_get_sampler_result_type(instr->texture->var->type);
2149 LLVMValueRef coord = args->addr;
2150 LLVMValueRef half_texel[2];
2151 LLVMValueRef compare_cube_wa = NULL;
2152 LLVMValueRef result;
2153 int c;
2154 unsigned coord_vgpr_index = (unsigned)args->offset + (unsigned)args->compare;
2155
2156 //TODO Rect
2157 {
2158 struct ac_image_args txq_args = { 0 };
2159
2160 txq_args.da = instr->is_array || instr->sampler_dim == GLSL_SAMPLER_DIM_CUBE;
2161 txq_args.opcode = ac_image_get_resinfo;
2162 txq_args.dmask = 0xf;
2163 txq_args.addr = ctx->i32_0;
2164 txq_args.resource = args->resource;
2165 LLVMValueRef size = ac_build_image_opcode(ctx, &txq_args);
2166
2167 for (c = 0; c < 2; c++) {
2168 half_texel[c] = LLVMBuildExtractElement(ctx->builder, size,
2169 LLVMConstInt(ctx->i32, c, false), "");
2170 half_texel[c] = LLVMBuildUIToFP(ctx->builder, half_texel[c], ctx->f32, "");
2171 half_texel[c] = ac_build_fdiv(ctx, ctx->f32_1, half_texel[c]);
2172 half_texel[c] = LLVMBuildFMul(ctx->builder, half_texel[c],
2173 LLVMConstReal(ctx->f32, -0.5), "");
2174 }
2175 }
2176
2177 LLVMValueRef orig_coords = args->addr;
2178
2179 for (c = 0; c < 2; c++) {
2180 LLVMValueRef tmp;
2181 LLVMValueRef index = LLVMConstInt(ctx->i32, coord_vgpr_index + c, 0);
2182 tmp = LLVMBuildExtractElement(ctx->builder, coord, index, "");
2183 tmp = LLVMBuildBitCast(ctx->builder, tmp, ctx->f32, "");
2184 tmp = LLVMBuildFAdd(ctx->builder, tmp, half_texel[c], "");
2185 tmp = LLVMBuildBitCast(ctx->builder, tmp, ctx->i32, "");
2186 coord = LLVMBuildInsertElement(ctx->builder, coord, tmp, index, "");
2187 }
2188
2189
2190 /*
2191 * Apparantly cube has issue with integer types that the workaround doesn't solve,
2192 * so this tests if the format is 8_8_8_8 and an integer type do an alternate
2193 * workaround by sampling using a scaled type and converting.
2194 * This is taken from amdgpu-pro shaders.
2195 */
2196 /* NOTE this produces some ugly code compared to amdgpu-pro,
2197 * LLVM ends up dumping SGPRs into VGPRs to deal with the compare/select,
2198 * and then reads them back. -pro generates two selects,
2199 * one s_cmp for the descriptor rewriting
2200 * one v_cmp for the coordinate and result changes.
2201 */
2202 if (instr->sampler_dim == GLSL_SAMPLER_DIM_CUBE) {
2203 LLVMValueRef tmp, tmp2;
2204
2205 /* workaround 8/8/8/8 uint/sint cube gather bug */
2206 /* first detect it then change to a scaled read and f2i */
2207 tmp = LLVMBuildExtractElement(ctx->builder, args->resource, ctx->i32_1, "");
2208 tmp2 = tmp;
2209
2210 /* extract the DATA_FORMAT */
2211 tmp = ac_build_bfe(ctx, tmp, LLVMConstInt(ctx->i32, 20, false),
2212 LLVMConstInt(ctx->i32, 6, false), false);
2213
2214 /* is the DATA_FORMAT == 8_8_8_8 */
2215 compare_cube_wa = LLVMBuildICmp(ctx->builder, LLVMIntEQ, tmp, LLVMConstInt(ctx->i32, V_008F14_IMG_DATA_FORMAT_8_8_8_8, false), "");
2216
2217 if (stype == GLSL_TYPE_UINT)
2218 /* Create a NUM FORMAT - 0x2 or 0x4 - USCALED or UINT */
2219 tmp = LLVMBuildSelect(ctx->builder, compare_cube_wa, LLVMConstInt(ctx->i32, 0x8000000, false),
2220 LLVMConstInt(ctx->i32, 0x10000000, false), "");
2221 else
2222 /* Create a NUM FORMAT - 0x3 or 0x5 - SSCALED or SINT */
2223 tmp = LLVMBuildSelect(ctx->builder, compare_cube_wa, LLVMConstInt(ctx->i32, 0xc000000, false),
2224 LLVMConstInt(ctx->i32, 0x14000000, false), "");
2225
2226 /* replace the NUM FORMAT in the descriptor */
2227 tmp2 = LLVMBuildAnd(ctx->builder, tmp2, LLVMConstInt(ctx->i32, C_008F14_NUM_FORMAT_GFX6, false), "");
2228 tmp2 = LLVMBuildOr(ctx->builder, tmp2, tmp, "");
2229
2230 args->resource = LLVMBuildInsertElement(ctx->builder, args->resource, tmp2, ctx->i32_1, "");
2231
2232 /* don't modify the coordinates for this case */
2233 coord = LLVMBuildSelect(ctx->builder, compare_cube_wa, orig_coords, coord, "");
2234 }
2235 args->addr = coord;
2236 result = ac_build_image_opcode(ctx, args);
2237
2238 if (instr->sampler_dim == GLSL_SAMPLER_DIM_CUBE) {
2239 LLVMValueRef tmp, tmp2;
2240
2241 /* if the cube workaround is in place, f2i the result. */
2242 for (c = 0; c < 4; c++) {
2243 tmp = LLVMBuildExtractElement(ctx->builder, result, LLVMConstInt(ctx->i32, c, false), "");
2244 if (stype == GLSL_TYPE_UINT)
2245 tmp2 = LLVMBuildFPToUI(ctx->builder, tmp, ctx->i32, "");
2246 else
2247 tmp2 = LLVMBuildFPToSI(ctx->builder, tmp, ctx->i32, "");
2248 tmp = LLVMBuildBitCast(ctx->builder, tmp, ctx->i32, "");
2249 tmp2 = LLVMBuildBitCast(ctx->builder, tmp2, ctx->i32, "");
2250 tmp = LLVMBuildSelect(ctx->builder, compare_cube_wa, tmp2, tmp, "");
2251 tmp = LLVMBuildBitCast(ctx->builder, tmp, ctx->f32, "");
2252 result = LLVMBuildInsertElement(ctx->builder, result, tmp, LLVMConstInt(ctx->i32, c, false), "");
2253 }
2254 }
2255 return result;
2256 }
2257
2258 static LLVMValueRef build_tex_intrinsic(struct ac_nir_context *ctx,
2259 const nir_tex_instr *instr,
2260 bool lod_is_zero,
2261 struct ac_image_args *args)
2262 {
2263 if (instr->sampler_dim == GLSL_SAMPLER_DIM_BUF) {
2264 return ac_build_buffer_load_format(&ctx->ac,
2265 args->resource,
2266 args->addr,
2267 ctx->ac.i32_0,
2268 true);
2269 }
2270
2271 args->opcode = ac_image_sample;
2272 args->compare = instr->is_shadow;
2273
2274 switch (instr->op) {
2275 case nir_texop_txf:
2276 case nir_texop_txf_ms:
2277 case nir_texop_samples_identical:
2278 args->opcode = lod_is_zero ||
2279 instr->sampler_dim == GLSL_SAMPLER_DIM_MS ?
2280 ac_image_load : ac_image_load_mip;
2281 args->compare = false;
2282 args->offset = false;
2283 break;
2284 case nir_texop_txb:
2285 args->bias = true;
2286 break;
2287 case nir_texop_txl:
2288 if (lod_is_zero)
2289 args->level_zero = true;
2290 else
2291 args->lod = true;
2292 break;
2293 case nir_texop_txs:
2294 case nir_texop_query_levels:
2295 args->opcode = ac_image_get_resinfo;
2296 break;
2297 case nir_texop_tex:
2298 if (ctx->stage != MESA_SHADER_FRAGMENT)
2299 args->level_zero = true;
2300 break;
2301 case nir_texop_txd:
2302 args->deriv = true;
2303 break;
2304 case nir_texop_tg4:
2305 args->opcode = ac_image_gather4;
2306 args->level_zero = true;
2307 break;
2308 case nir_texop_lod:
2309 args->opcode = ac_image_get_lod;
2310 args->compare = false;
2311 args->offset = false;
2312 break;
2313 default:
2314 break;
2315 }
2316
2317 if (instr->op == nir_texop_tg4 && ctx->ac.chip_class <= VI) {
2318 enum glsl_base_type stype = glsl_get_sampler_result_type(instr->texture->var->type);
2319 if (stype == GLSL_TYPE_UINT || stype == GLSL_TYPE_INT) {
2320 return radv_lower_gather4_integer(&ctx->ac, args, instr);
2321 }
2322 }
2323 return ac_build_image_opcode(&ctx->ac, args);
2324 }
2325
2326 static LLVMValueRef visit_vulkan_resource_index(struct nir_to_llvm_context *ctx,
2327 nir_intrinsic_instr *instr)
2328 {
2329 LLVMValueRef index = get_src(ctx->nir, instr->src[0]);
2330 unsigned desc_set = nir_intrinsic_desc_set(instr);
2331 unsigned binding = nir_intrinsic_binding(instr);
2332 LLVMValueRef desc_ptr = ctx->descriptor_sets[desc_set];
2333 struct radv_pipeline_layout *pipeline_layout = ctx->options->layout;
2334 struct radv_descriptor_set_layout *layout = pipeline_layout->set[desc_set].layout;
2335 unsigned base_offset = layout->binding[binding].offset;
2336 LLVMValueRef offset, stride;
2337
2338 if (layout->binding[binding].type == VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER_DYNAMIC ||
2339 layout->binding[binding].type == VK_DESCRIPTOR_TYPE_STORAGE_BUFFER_DYNAMIC) {
2340 unsigned idx = pipeline_layout->set[desc_set].dynamic_offset_start +
2341 layout->binding[binding].dynamic_offset_offset;
2342 desc_ptr = ctx->push_constants;
2343 base_offset = pipeline_layout->push_constant_size + 16 * idx;
2344 stride = LLVMConstInt(ctx->ac.i32, 16, false);
2345 } else
2346 stride = LLVMConstInt(ctx->ac.i32, layout->binding[binding].size, false);
2347
2348 offset = LLVMConstInt(ctx->ac.i32, base_offset, false);
2349 index = LLVMBuildMul(ctx->builder, index, stride, "");
2350 offset = LLVMBuildAdd(ctx->builder, offset, index, "");
2351
2352 desc_ptr = ac_build_gep0(&ctx->ac, desc_ptr, offset);
2353 desc_ptr = cast_ptr(ctx, desc_ptr, ctx->ac.v4i32);
2354 LLVMSetMetadata(desc_ptr, ctx->ac.uniform_md_kind, ctx->ac.empty_md);
2355
2356 return desc_ptr;
2357 }
2358
2359 static LLVMValueRef visit_vulkan_resource_reindex(struct nir_to_llvm_context *ctx,
2360 nir_intrinsic_instr *instr)
2361 {
2362 LLVMValueRef ptr = get_src(ctx->nir, instr->src[0]);
2363 LLVMValueRef index = get_src(ctx->nir, instr->src[1]);
2364
2365 LLVMValueRef result = LLVMBuildGEP(ctx->builder, ptr, &index, 1, "");
2366 LLVMSetMetadata(result, ctx->ac.uniform_md_kind, ctx->ac.empty_md);
2367 return result;
2368 }
2369
2370 static LLVMValueRef visit_load_push_constant(struct nir_to_llvm_context *ctx,
2371 nir_intrinsic_instr *instr)
2372 {
2373 LLVMValueRef ptr, addr;
2374
2375 addr = LLVMConstInt(ctx->ac.i32, nir_intrinsic_base(instr), 0);
2376 addr = LLVMBuildAdd(ctx->builder, addr, get_src(ctx->nir, instr->src[0]), "");
2377
2378 ptr = ac_build_gep0(&ctx->ac, ctx->push_constants, addr);
2379 ptr = cast_ptr(ctx, ptr, get_def_type(ctx->nir, &instr->dest.ssa));
2380
2381 return LLVMBuildLoad(ctx->builder, ptr, "");
2382 }
2383
2384 static LLVMValueRef visit_get_buffer_size(struct ac_nir_context *ctx,
2385 const nir_intrinsic_instr *instr)
2386 {
2387 LLVMValueRef ptr = get_src(ctx, instr->src[0]);
2388
2389 return get_buffer_size(ctx, LLVMBuildLoad(ctx->ac.builder, ptr, ""), false);
2390 }
2391 static void visit_store_ssbo(struct ac_nir_context *ctx,
2392 nir_intrinsic_instr *instr)
2393 {
2394 const char *store_name;
2395 LLVMValueRef src_data = get_src(ctx, instr->src[0]);
2396 LLVMTypeRef data_type = ctx->ac.f32;
2397 int elem_size_mult = get_elem_bits(&ctx->ac, LLVMTypeOf(src_data)) / 32;
2398 int components_32bit = elem_size_mult * instr->num_components;
2399 unsigned writemask = nir_intrinsic_write_mask(instr);
2400 LLVMValueRef base_data, base_offset;
2401 LLVMValueRef params[6];
2402
2403 params[1] = ctx->abi->load_ssbo(ctx->abi,
2404 get_src(ctx, instr->src[1]), true);
2405 params[2] = ctx->ac.i32_0; /* vindex */
2406 params[4] = ctx->ac.i1false; /* glc */
2407 params[5] = ctx->ac.i1false; /* slc */
2408
2409 if (components_32bit > 1)
2410 data_type = LLVMVectorType(ctx->ac.f32, components_32bit);
2411
2412 base_data = ac_to_float(&ctx->ac, src_data);
2413 base_data = trim_vector(&ctx->ac, base_data, instr->num_components);
2414 base_data = LLVMBuildBitCast(ctx->ac.builder, base_data,
2415 data_type, "");
2416 base_offset = get_src(ctx, instr->src[2]); /* voffset */
2417 while (writemask) {
2418 int start, count;
2419 LLVMValueRef data;
2420 LLVMValueRef offset;
2421 LLVMValueRef tmp;
2422 u_bit_scan_consecutive_range(&writemask, &start, &count);
2423
2424 /* Due to an LLVM limitation, split 3-element writes
2425 * into a 2-element and a 1-element write. */
2426 if (count == 3) {
2427 writemask |= 1 << (start + 2);
2428 count = 2;
2429 }
2430
2431 start *= elem_size_mult;
2432 count *= elem_size_mult;
2433
2434 if (count > 4) {
2435 writemask |= ((1u << (count - 4)) - 1u) << (start + 4);
2436 count = 4;
2437 }
2438
2439 if (count == 4) {
2440 store_name = "llvm.amdgcn.buffer.store.v4f32";
2441 data = base_data;
2442 } else if (count == 2) {
2443 tmp = LLVMBuildExtractElement(ctx->ac.builder,
2444 base_data, LLVMConstInt(ctx->ac.i32, start, false), "");
2445 data = LLVMBuildInsertElement(ctx->ac.builder, LLVMGetUndef(ctx->ac.v2f32), tmp,
2446 ctx->ac.i32_0, "");
2447
2448 tmp = LLVMBuildExtractElement(ctx->ac.builder,
2449 base_data, LLVMConstInt(ctx->ac.i32, start + 1, false), "");
2450 data = LLVMBuildInsertElement(ctx->ac.builder, data, tmp,
2451 ctx->ac.i32_1, "");
2452 store_name = "llvm.amdgcn.buffer.store.v2f32";
2453
2454 } else {
2455 assert(count == 1);
2456 if (ac_get_llvm_num_components(base_data) > 1)
2457 data = LLVMBuildExtractElement(ctx->ac.builder, base_data,
2458 LLVMConstInt(ctx->ac.i32, start, false), "");
2459 else
2460 data = base_data;
2461 store_name = "llvm.amdgcn.buffer.store.f32";
2462 }
2463
2464 offset = base_offset;
2465 if (start != 0) {
2466 offset = LLVMBuildAdd(ctx->ac.builder, offset, LLVMConstInt(ctx->ac.i32, start * 4, false), "");
2467 }
2468 params[0] = data;
2469 params[3] = offset;
2470 ac_build_intrinsic(&ctx->ac, store_name,
2471 ctx->ac.voidt, params, 6, 0);
2472 }
2473 }
2474
2475 static LLVMValueRef visit_atomic_ssbo(struct ac_nir_context *ctx,
2476 const nir_intrinsic_instr *instr)
2477 {
2478 const char *name;
2479 LLVMValueRef params[6];
2480 int arg_count = 0;
2481
2482 if (instr->intrinsic == nir_intrinsic_ssbo_atomic_comp_swap) {
2483 params[arg_count++] = ac_llvm_extract_elem(&ctx->ac, get_src(ctx, instr->src[3]), 0);
2484 }
2485 params[arg_count++] = ac_llvm_extract_elem(&ctx->ac, get_src(ctx, instr->src[2]), 0);
2486 params[arg_count++] = ctx->abi->load_ssbo(ctx->abi,
2487 get_src(ctx, instr->src[0]),
2488 true);
2489 params[arg_count++] = ctx->ac.i32_0; /* vindex */
2490 params[arg_count++] = get_src(ctx, instr->src[1]); /* voffset */
2491 params[arg_count++] = LLVMConstInt(ctx->ac.i1, 0, false); /* slc */
2492
2493 switch (instr->intrinsic) {
2494 case nir_intrinsic_ssbo_atomic_add:
2495 name = "llvm.amdgcn.buffer.atomic.add";
2496 break;
2497 case nir_intrinsic_ssbo_atomic_imin:
2498 name = "llvm.amdgcn.buffer.atomic.smin";
2499 break;
2500 case nir_intrinsic_ssbo_atomic_umin:
2501 name = "llvm.amdgcn.buffer.atomic.umin";
2502 break;
2503 case nir_intrinsic_ssbo_atomic_imax:
2504 name = "llvm.amdgcn.buffer.atomic.smax";
2505 break;
2506 case nir_intrinsic_ssbo_atomic_umax:
2507 name = "llvm.amdgcn.buffer.atomic.umax";
2508 break;
2509 case nir_intrinsic_ssbo_atomic_and:
2510 name = "llvm.amdgcn.buffer.atomic.and";
2511 break;
2512 case nir_intrinsic_ssbo_atomic_or:
2513 name = "llvm.amdgcn.buffer.atomic.or";
2514 break;
2515 case nir_intrinsic_ssbo_atomic_xor:
2516 name = "llvm.amdgcn.buffer.atomic.xor";
2517 break;
2518 case nir_intrinsic_ssbo_atomic_exchange:
2519 name = "llvm.amdgcn.buffer.atomic.swap";
2520 break;
2521 case nir_intrinsic_ssbo_atomic_comp_swap:
2522 name = "llvm.amdgcn.buffer.atomic.cmpswap";
2523 break;
2524 default:
2525 abort();
2526 }
2527
2528 return ac_build_intrinsic(&ctx->ac, name, ctx->ac.i32, params, arg_count, 0);
2529 }
2530
2531 static LLVMValueRef visit_load_buffer(struct ac_nir_context *ctx,
2532 const nir_intrinsic_instr *instr)
2533 {
2534 LLVMValueRef results[2];
2535 int load_components;
2536 int num_components = instr->num_components;
2537 if (instr->dest.ssa.bit_size == 64)
2538 num_components *= 2;
2539
2540 for (int i = 0; i < num_components; i += load_components) {
2541 load_components = MIN2(num_components - i, 4);
2542 const char *load_name;
2543 LLVMTypeRef data_type = ctx->ac.f32;
2544 LLVMValueRef offset = LLVMConstInt(ctx->ac.i32, i * 4, false);
2545 offset = LLVMBuildAdd(ctx->ac.builder, get_src(ctx, instr->src[1]), offset, "");
2546
2547 if (load_components == 3)
2548 data_type = LLVMVectorType(ctx->ac.f32, 4);
2549 else if (load_components > 1)
2550 data_type = LLVMVectorType(ctx->ac.f32, load_components);
2551
2552 if (load_components >= 3)
2553 load_name = "llvm.amdgcn.buffer.load.v4f32";
2554 else if (load_components == 2)
2555 load_name = "llvm.amdgcn.buffer.load.v2f32";
2556 else if (load_components == 1)
2557 load_name = "llvm.amdgcn.buffer.load.f32";
2558 else
2559 unreachable("unhandled number of components");
2560
2561 LLVMValueRef params[] = {
2562 ctx->abi->load_ssbo(ctx->abi,
2563 get_src(ctx, instr->src[0]),
2564 false),
2565 ctx->ac.i32_0,
2566 offset,
2567 ctx->ac.i1false,
2568 ctx->ac.i1false,
2569 };
2570
2571 results[i] = ac_build_intrinsic(&ctx->ac, load_name, data_type, params, 5, 0);
2572
2573 }
2574
2575 assume(results[0]);
2576 LLVMValueRef ret = results[0];
2577 if (num_components > 4 || num_components == 3) {
2578 LLVMValueRef masks[] = {
2579 LLVMConstInt(ctx->ac.i32, 0, false), LLVMConstInt(ctx->ac.i32, 1, false),
2580 LLVMConstInt(ctx->ac.i32, 2, false), LLVMConstInt(ctx->ac.i32, 3, false),
2581 LLVMConstInt(ctx->ac.i32, 4, false), LLVMConstInt(ctx->ac.i32, 5, false),
2582 LLVMConstInt(ctx->ac.i32, 6, false), LLVMConstInt(ctx->ac.i32, 7, false)
2583 };
2584
2585 LLVMValueRef swizzle = LLVMConstVector(masks, num_components);
2586 ret = LLVMBuildShuffleVector(ctx->ac.builder, results[0],
2587 results[num_components > 4 ? 1 : 0], swizzle, "");
2588 }
2589
2590 return LLVMBuildBitCast(ctx->ac.builder, ret,
2591 get_def_type(ctx, &instr->dest.ssa), "");
2592 }
2593
2594 static LLVMValueRef visit_load_ubo_buffer(struct ac_nir_context *ctx,
2595 const nir_intrinsic_instr *instr)
2596 {
2597 LLVMValueRef ret;
2598 LLVMValueRef rsrc = get_src(ctx, instr->src[0]);
2599 LLVMValueRef offset = get_src(ctx, instr->src[1]);
2600 int num_components = instr->num_components;
2601
2602 if (ctx->abi->load_ubo)
2603 rsrc = ctx->abi->load_ubo(ctx->abi, rsrc);
2604
2605 if (instr->dest.ssa.bit_size == 64)
2606 num_components *= 2;
2607
2608 ret = ac_build_buffer_load(&ctx->ac, rsrc, num_components, NULL, offset,
2609 NULL, 0, false, false, true, true);
2610
2611 return LLVMBuildBitCast(ctx->ac.builder, ret,
2612 get_def_type(ctx, &instr->dest.ssa), "");
2613 }
2614
2615 static void
2616 get_deref_offset(struct ac_nir_context *ctx, nir_deref_var *deref,
2617 bool vs_in, unsigned *vertex_index_out,
2618 LLVMValueRef *vertex_index_ref,
2619 unsigned *const_out, LLVMValueRef *indir_out)
2620 {
2621 unsigned const_offset = 0;
2622 nir_deref *tail = &deref->deref;
2623 LLVMValueRef offset = NULL;
2624
2625 if (vertex_index_out != NULL || vertex_index_ref != NULL) {
2626 tail = tail->child;
2627 nir_deref_array *deref_array = nir_deref_as_array(tail);
2628 if (vertex_index_out)
2629 *vertex_index_out = deref_array->base_offset;
2630
2631 if (vertex_index_ref) {
2632 LLVMValueRef vtx = LLVMConstInt(ctx->ac.i32, deref_array->base_offset, false);
2633 if (deref_array->deref_array_type == nir_deref_array_type_indirect) {
2634 vtx = LLVMBuildAdd(ctx->ac.builder, vtx, get_src(ctx, deref_array->indirect), "");
2635 }
2636 *vertex_index_ref = vtx;
2637 }
2638 }
2639
2640 if (deref->var->data.compact) {
2641 assert(tail->child->deref_type == nir_deref_type_array);
2642 assert(glsl_type_is_scalar(glsl_without_array(deref->var->type)));
2643 nir_deref_array *deref_array = nir_deref_as_array(tail->child);
2644 /* We always lower indirect dereferences for "compact" array vars. */
2645 assert(deref_array->deref_array_type == nir_deref_array_type_direct);
2646
2647 const_offset = deref_array->base_offset;
2648 goto out;
2649 }
2650
2651 while (tail->child != NULL) {
2652 const struct glsl_type *parent_type = tail->type;
2653 tail = tail->child;
2654
2655 if (tail->deref_type == nir_deref_type_array) {
2656 nir_deref_array *deref_array = nir_deref_as_array(tail);
2657 LLVMValueRef index, stride, local_offset;
2658 unsigned size = glsl_count_attribute_slots(tail->type, vs_in);
2659
2660 const_offset += size * deref_array->base_offset;
2661 if (deref_array->deref_array_type == nir_deref_array_type_direct)
2662 continue;
2663
2664 assert(deref_array->deref_array_type == nir_deref_array_type_indirect);
2665 index = get_src(ctx, deref_array->indirect);
2666 stride = LLVMConstInt(ctx->ac.i32, size, 0);
2667 local_offset = LLVMBuildMul(ctx->ac.builder, stride, index, "");
2668
2669 if (offset)
2670 offset = LLVMBuildAdd(ctx->ac.builder, offset, local_offset, "");
2671 else
2672 offset = local_offset;
2673 } else if (tail->deref_type == nir_deref_type_struct) {
2674 nir_deref_struct *deref_struct = nir_deref_as_struct(tail);
2675
2676 for (unsigned i = 0; i < deref_struct->index; i++) {
2677 const struct glsl_type *ft = glsl_get_struct_field(parent_type, i);
2678 const_offset += glsl_count_attribute_slots(ft, vs_in);
2679 }
2680 } else
2681 unreachable("unsupported deref type");
2682
2683 }
2684 out:
2685 if (const_offset && offset)
2686 offset = LLVMBuildAdd(ctx->ac.builder, offset,
2687 LLVMConstInt(ctx->ac.i32, const_offset, 0),
2688 "");
2689
2690 *const_out = const_offset;
2691 *indir_out = offset;
2692 }
2693
2694
2695 /* The offchip buffer layout for TCS->TES is
2696 *
2697 * - attribute 0 of patch 0 vertex 0
2698 * - attribute 0 of patch 0 vertex 1
2699 * - attribute 0 of patch 0 vertex 2
2700 * ...
2701 * - attribute 0 of patch 1 vertex 0
2702 * - attribute 0 of patch 1 vertex 1
2703 * ...
2704 * - attribute 1 of patch 0 vertex 0
2705 * - attribute 1 of patch 0 vertex 1
2706 * ...
2707 * - per patch attribute 0 of patch 0
2708 * - per patch attribute 0 of patch 1
2709 * ...
2710 *
2711 * Note that every attribute has 4 components.
2712 */
2713 static LLVMValueRef get_tcs_tes_buffer_address(struct nir_to_llvm_context *ctx,
2714 LLVMValueRef vertex_index,
2715 LLVMValueRef param_index)
2716 {
2717 LLVMValueRef base_addr, vertices_per_patch, num_patches, total_vertices;
2718 LLVMValueRef param_stride, constant16;
2719 LLVMValueRef rel_patch_id = get_rel_patch_id(ctx);
2720
2721 vertices_per_patch = unpack_param(&ctx->ac, ctx->tcs_offchip_layout, 9, 6);
2722 num_patches = unpack_param(&ctx->ac, ctx->tcs_offchip_layout, 0, 9);
2723 total_vertices = LLVMBuildMul(ctx->builder, vertices_per_patch,
2724 num_patches, "");
2725
2726 constant16 = LLVMConstInt(ctx->ac.i32, 16, false);
2727 if (vertex_index) {
2728 base_addr = LLVMBuildMul(ctx->builder, rel_patch_id,
2729 vertices_per_patch, "");
2730
2731 base_addr = LLVMBuildAdd(ctx->builder, base_addr,
2732 vertex_index, "");
2733
2734 param_stride = total_vertices;
2735 } else {
2736 base_addr = rel_patch_id;
2737 param_stride = num_patches;
2738 }
2739
2740 base_addr = LLVMBuildAdd(ctx->builder, base_addr,
2741 LLVMBuildMul(ctx->builder, param_index,
2742 param_stride, ""), "");
2743
2744 base_addr = LLVMBuildMul(ctx->builder, base_addr, constant16, "");
2745
2746 if (!vertex_index) {
2747 LLVMValueRef patch_data_offset =
2748 unpack_param(&ctx->ac, ctx->tcs_offchip_layout, 16, 16);
2749
2750 base_addr = LLVMBuildAdd(ctx->builder, base_addr,
2751 patch_data_offset, "");
2752 }
2753 return base_addr;
2754 }
2755
2756 static LLVMValueRef get_tcs_tes_buffer_address_params(struct nir_to_llvm_context *ctx,
2757 unsigned param,
2758 unsigned const_index,
2759 bool is_compact,
2760 LLVMValueRef vertex_index,
2761 LLVMValueRef indir_index)
2762 {
2763 LLVMValueRef param_index;
2764
2765 if (indir_index)
2766 param_index = LLVMBuildAdd(ctx->builder, LLVMConstInt(ctx->ac.i32, param, false),
2767 indir_index, "");
2768 else {
2769 if (const_index && !is_compact)
2770 param += const_index;
2771 param_index = LLVMConstInt(ctx->ac.i32, param, false);
2772 }
2773 return get_tcs_tes_buffer_address(ctx, vertex_index, param_index);
2774 }
2775
2776 static void
2777 mark_tess_output(struct nir_to_llvm_context *ctx,
2778 bool is_patch, uint32_t param)
2779
2780 {
2781 if (is_patch) {
2782 ctx->tess_patch_outputs_written |= (1ull << param);
2783 } else
2784 ctx->tess_outputs_written |= (1ull << param);
2785 }
2786
2787 static LLVMValueRef
2788 get_dw_address(struct nir_to_llvm_context *ctx,
2789 LLVMValueRef dw_addr,
2790 unsigned param,
2791 unsigned const_index,
2792 bool compact_const_index,
2793 LLVMValueRef vertex_index,
2794 LLVMValueRef stride,
2795 LLVMValueRef indir_index)
2796
2797 {
2798
2799 if (vertex_index) {
2800 dw_addr = LLVMBuildAdd(ctx->builder, dw_addr,
2801 LLVMBuildMul(ctx->builder,
2802 vertex_index,
2803 stride, ""), "");
2804 }
2805
2806 if (indir_index)
2807 dw_addr = LLVMBuildAdd(ctx->builder, dw_addr,
2808 LLVMBuildMul(ctx->builder, indir_index,
2809 LLVMConstInt(ctx->ac.i32, 4, false), ""), "");
2810 else if (const_index && !compact_const_index)
2811 dw_addr = LLVMBuildAdd(ctx->builder, dw_addr,
2812 LLVMConstInt(ctx->ac.i32, const_index, false), "");
2813
2814 dw_addr = LLVMBuildAdd(ctx->builder, dw_addr,
2815 LLVMConstInt(ctx->ac.i32, param * 4, false), "");
2816
2817 if (const_index && compact_const_index)
2818 dw_addr = LLVMBuildAdd(ctx->builder, dw_addr,
2819 LLVMConstInt(ctx->ac.i32, const_index, false), "");
2820 return dw_addr;
2821 }
2822
2823 static LLVMValueRef
2824 load_tcs_input(struct ac_shader_abi *abi,
2825 LLVMValueRef vertex_index,
2826 LLVMValueRef indir_index,
2827 unsigned const_index,
2828 unsigned location,
2829 unsigned driver_location,
2830 unsigned component,
2831 unsigned num_components,
2832 bool is_patch,
2833 bool is_compact)
2834 {
2835 struct nir_to_llvm_context *ctx = nir_to_llvm_context_from_abi(abi);
2836 LLVMValueRef dw_addr, stride;
2837 LLVMValueRef value[4], result;
2838 unsigned param = shader_io_get_unique_index(location);
2839
2840 stride = unpack_param(&ctx->ac, ctx->tcs_in_layout, 13, 8);
2841 dw_addr = get_tcs_in_current_patch_offset(ctx);
2842 dw_addr = get_dw_address(ctx, dw_addr, param, const_index, is_compact, vertex_index, stride,
2843 indir_index);
2844
2845 for (unsigned i = 0; i < num_components + component; i++) {
2846 value[i] = ac_lds_load(&ctx->ac, dw_addr);
2847 dw_addr = LLVMBuildAdd(ctx->builder, dw_addr,
2848 ctx->ac.i32_1, "");
2849 }
2850 result = ac_build_varying_gather_values(&ctx->ac, value, num_components, component);
2851 return result;
2852 }
2853
2854 static LLVMValueRef
2855 load_tcs_output(struct nir_to_llvm_context *ctx,
2856 nir_intrinsic_instr *instr)
2857 {
2858 LLVMValueRef dw_addr;
2859 LLVMValueRef stride = NULL;
2860 LLVMValueRef value[4], result;
2861 LLVMValueRef vertex_index = NULL;
2862 LLVMValueRef indir_index = NULL;
2863 unsigned const_index = 0;
2864 unsigned param;
2865 const bool per_vertex = nir_is_per_vertex_io(instr->variables[0]->var, ctx->stage);
2866 const bool is_compact = instr->variables[0]->var->data.compact;
2867 param = shader_io_get_unique_index(instr->variables[0]->var->data.location);
2868 get_deref_offset(ctx->nir, instr->variables[0],
2869 false, NULL, per_vertex ? &vertex_index : NULL,
2870 &const_index, &indir_index);
2871
2872 if (!instr->variables[0]->var->data.patch) {
2873 stride = unpack_param(&ctx->ac, ctx->tcs_out_layout, 13, 8);
2874 dw_addr = get_tcs_out_current_patch_offset(ctx);
2875 } else {
2876 dw_addr = get_tcs_out_current_patch_data_offset(ctx);
2877 }
2878
2879 dw_addr = get_dw_address(ctx, dw_addr, param, const_index, is_compact, vertex_index, stride,
2880 indir_index);
2881
2882 unsigned comp = instr->variables[0]->var->data.location_frac;
2883 for (unsigned i = comp; i < instr->num_components + comp; i++) {
2884 value[i] = ac_lds_load(&ctx->ac, dw_addr);
2885 dw_addr = LLVMBuildAdd(ctx->builder, dw_addr,
2886 ctx->ac.i32_1, "");
2887 }
2888 result = ac_build_varying_gather_values(&ctx->ac, value, instr->num_components, comp);
2889 result = LLVMBuildBitCast(ctx->builder, result, get_def_type(ctx->nir, &instr->dest.ssa), "");
2890 return result;
2891 }
2892
2893 static void
2894 store_tcs_output(struct ac_shader_abi *abi,
2895 LLVMValueRef vertex_index,
2896 LLVMValueRef param_index,
2897 unsigned const_index,
2898 unsigned location,
2899 unsigned driver_location,
2900 LLVMValueRef src,
2901 unsigned component,
2902 bool is_patch,
2903 bool is_compact,
2904 unsigned writemask)
2905 {
2906 struct nir_to_llvm_context *ctx = nir_to_llvm_context_from_abi(abi);
2907 LLVMValueRef dw_addr;
2908 LLVMValueRef stride = NULL;
2909 LLVMValueRef buf_addr = NULL;
2910 unsigned param;
2911 bool store_lds = true;
2912
2913 if (is_patch) {
2914 if (!(ctx->tcs_patch_outputs_read & (1U << (location - VARYING_SLOT_PATCH0))))
2915 store_lds = false;
2916 } else {
2917 if (!(ctx->tcs_outputs_read & (1ULL << location)))
2918 store_lds = false;
2919 }
2920
2921 param = shader_io_get_unique_index(location);
2922 if (location == VARYING_SLOT_CLIP_DIST0 &&
2923 is_compact && const_index > 3) {
2924 const_index -= 3;
2925 param++;
2926 }
2927
2928 if (!is_patch) {
2929 stride = unpack_param(&ctx->ac, ctx->tcs_out_layout, 13, 8);
2930 dw_addr = get_tcs_out_current_patch_offset(ctx);
2931 } else {
2932 dw_addr = get_tcs_out_current_patch_data_offset(ctx);
2933 }
2934
2935 mark_tess_output(ctx, is_patch, param);
2936
2937 dw_addr = get_dw_address(ctx, dw_addr, param, const_index, is_compact, vertex_index, stride,
2938 param_index);
2939 buf_addr = get_tcs_tes_buffer_address_params(ctx, param, const_index, is_compact,
2940 vertex_index, param_index);
2941
2942 bool is_tess_factor = false;
2943 if (location == VARYING_SLOT_TESS_LEVEL_INNER ||
2944 location == VARYING_SLOT_TESS_LEVEL_OUTER)
2945 is_tess_factor = true;
2946
2947 unsigned base = is_compact ? const_index : 0;
2948 for (unsigned chan = 0; chan < 8; chan++) {
2949 if (!(writemask & (1 << chan)))
2950 continue;
2951 LLVMValueRef value = ac_llvm_extract_elem(&ctx->ac, src, chan - component);
2952
2953 if (store_lds || is_tess_factor)
2954 ac_lds_store(&ctx->ac, dw_addr, value);
2955
2956 if (!is_tess_factor && writemask != 0xF)
2957 ac_build_buffer_store_dword(&ctx->ac, ctx->hs_ring_tess_offchip, value, 1,
2958 buf_addr, ctx->oc_lds,
2959 4 * (base + chan), 1, 0, true, false);
2960
2961 dw_addr = LLVMBuildAdd(ctx->builder, dw_addr,
2962 ctx->ac.i32_1, "");
2963 }
2964
2965 if (writemask == 0xF) {
2966 ac_build_buffer_store_dword(&ctx->ac, ctx->hs_ring_tess_offchip, src, 4,
2967 buf_addr, ctx->oc_lds,
2968 (base * 4), 1, 0, true, false);
2969 }
2970 }
2971
2972 static LLVMValueRef
2973 load_tes_input(struct ac_shader_abi *abi,
2974 LLVMValueRef vertex_index,
2975 LLVMValueRef param_index,
2976 unsigned const_index,
2977 unsigned location,
2978 unsigned driver_location,
2979 unsigned component,
2980 unsigned num_components,
2981 bool is_patch,
2982 bool is_compact)
2983 {
2984 struct nir_to_llvm_context *ctx = nir_to_llvm_context_from_abi(abi);
2985 LLVMValueRef buf_addr;
2986 LLVMValueRef result;
2987 unsigned param = shader_io_get_unique_index(location);
2988
2989 if (location == VARYING_SLOT_CLIP_DIST0 && is_compact && const_index > 3) {
2990 const_index -= 3;
2991 param++;
2992 }
2993
2994 buf_addr = get_tcs_tes_buffer_address_params(ctx, param, const_index,
2995 is_compact, vertex_index, param_index);
2996
2997 LLVMValueRef comp_offset = LLVMConstInt(ctx->ac.i32, component * 4, false);
2998 buf_addr = LLVMBuildAdd(ctx->builder, buf_addr, comp_offset, "");
2999
3000 result = ac_build_buffer_load(&ctx->ac, ctx->hs_ring_tess_offchip, num_components, NULL,
3001 buf_addr, ctx->oc_lds, is_compact ? (4 * const_index) : 0, 1, 0, true, false);
3002 result = trim_vector(&ctx->ac, result, num_components);
3003 return result;
3004 }
3005
3006 static LLVMValueRef
3007 load_gs_input(struct ac_shader_abi *abi,
3008 unsigned location,
3009 unsigned driver_location,
3010 unsigned component,
3011 unsigned num_components,
3012 unsigned vertex_index,
3013 unsigned const_index,
3014 LLVMTypeRef type)
3015 {
3016 struct nir_to_llvm_context *ctx = nir_to_llvm_context_from_abi(abi);
3017 LLVMValueRef vtx_offset;
3018 LLVMValueRef args[9];
3019 unsigned param, vtx_offset_param;
3020 LLVMValueRef value[4], result;
3021
3022 vtx_offset_param = vertex_index;
3023 assert(vtx_offset_param < 6);
3024 vtx_offset = LLVMBuildMul(ctx->builder, ctx->gs_vtx_offset[vtx_offset_param],
3025 LLVMConstInt(ctx->ac.i32, 4, false), "");
3026
3027 param = shader_io_get_unique_index(location);
3028
3029 for (unsigned i = component; i < num_components + component; i++) {
3030 if (ctx->ac.chip_class >= GFX9) {
3031 LLVMValueRef dw_addr = ctx->gs_vtx_offset[vtx_offset_param];
3032 dw_addr = LLVMBuildAdd(ctx->ac.builder, dw_addr,
3033 LLVMConstInt(ctx->ac.i32, param * 4 + i + const_index, 0), "");
3034 value[i] = ac_lds_load(&ctx->ac, dw_addr);
3035 } else {
3036 args[0] = ctx->esgs_ring;
3037 args[1] = vtx_offset;
3038 args[2] = LLVMConstInt(ctx->ac.i32, (param * 4 + i + const_index) * 256, false);
3039 args[3] = ctx->ac.i32_0;
3040 args[4] = ctx->ac.i32_1; /* OFFEN */
3041 args[5] = ctx->ac.i32_0; /* IDXEN */
3042 args[6] = ctx->ac.i32_1; /* GLC */
3043 args[7] = ctx->ac.i32_0; /* SLC */
3044 args[8] = ctx->ac.i32_0; /* TFE */
3045
3046 value[i] = ac_build_intrinsic(&ctx->ac, "llvm.SI.buffer.load.dword.i32.i32",
3047 ctx->ac.i32, args, 9,
3048 AC_FUNC_ATTR_READONLY |
3049 AC_FUNC_ATTR_LEGACY);
3050 }
3051 }
3052 result = ac_build_varying_gather_values(&ctx->ac, value, num_components, component);
3053
3054 return result;
3055 }
3056
3057 static LLVMValueRef
3058 build_gep_for_deref(struct ac_nir_context *ctx,
3059 nir_deref_var *deref)
3060 {
3061 struct hash_entry *entry = _mesa_hash_table_search(ctx->vars, deref->var);
3062 assert(entry->data);
3063 LLVMValueRef val = entry->data;
3064 nir_deref *tail = deref->deref.child;
3065 while (tail != NULL) {
3066 LLVMValueRef offset;
3067 switch (tail->deref_type) {
3068 case nir_deref_type_array: {
3069 nir_deref_array *array = nir_deref_as_array(tail);
3070 offset = LLVMConstInt(ctx->ac.i32, array->base_offset, 0);
3071 if (array->deref_array_type ==
3072 nir_deref_array_type_indirect) {
3073 offset = LLVMBuildAdd(ctx->ac.builder, offset,
3074 get_src(ctx,
3075 array->indirect),
3076 "");
3077 }
3078 break;
3079 }
3080 case nir_deref_type_struct: {
3081 nir_deref_struct *deref_struct =
3082 nir_deref_as_struct(tail);
3083 offset = LLVMConstInt(ctx->ac.i32,
3084 deref_struct->index, 0);
3085 break;
3086 }
3087 default:
3088 unreachable("bad deref type");
3089 }
3090 val = ac_build_gep0(&ctx->ac, val, offset);
3091 tail = tail->child;
3092 }
3093 return val;
3094 }
3095
3096 static LLVMValueRef visit_load_var(struct ac_nir_context *ctx,
3097 nir_intrinsic_instr *instr)
3098 {
3099 LLVMValueRef values[8];
3100 int idx = instr->variables[0]->var->data.driver_location;
3101 int ve = instr->dest.ssa.num_components;
3102 unsigned comp = instr->variables[0]->var->data.location_frac;
3103 LLVMValueRef indir_index;
3104 LLVMValueRef ret;
3105 unsigned const_index;
3106 unsigned stride = instr->variables[0]->var->data.compact ? 1 : 4;
3107 bool vs_in = ctx->stage == MESA_SHADER_VERTEX &&
3108 instr->variables[0]->var->data.mode == nir_var_shader_in;
3109 get_deref_offset(ctx, instr->variables[0], vs_in, NULL, NULL,
3110 &const_index, &indir_index);
3111
3112 if (instr->dest.ssa.bit_size == 64)
3113 ve *= 2;
3114
3115 switch (instr->variables[0]->var->data.mode) {
3116 case nir_var_shader_in:
3117 if (ctx->stage == MESA_SHADER_TESS_CTRL ||
3118 ctx->stage == MESA_SHADER_TESS_EVAL) {
3119 LLVMValueRef result;
3120 LLVMValueRef vertex_index = NULL;
3121 LLVMValueRef indir_index = NULL;
3122 unsigned const_index = 0;
3123 unsigned location = instr->variables[0]->var->data.location;
3124 unsigned driver_location = instr->variables[0]->var->data.driver_location;
3125 const bool is_patch = instr->variables[0]->var->data.patch;
3126 const bool is_compact = instr->variables[0]->var->data.compact;
3127
3128 get_deref_offset(ctx, instr->variables[0],
3129 false, NULL, is_patch ? NULL : &vertex_index,
3130 &const_index, &indir_index);
3131
3132 result = ctx->abi->load_tess_inputs(ctx->abi, vertex_index, indir_index,
3133 const_index, location, driver_location,
3134 instr->variables[0]->var->data.location_frac,
3135 instr->num_components,
3136 is_patch, is_compact);
3137 return LLVMBuildBitCast(ctx->ac.builder, result, get_def_type(ctx, &instr->dest.ssa), "");
3138 }
3139
3140 if (ctx->stage == MESA_SHADER_GEOMETRY) {
3141 LLVMValueRef indir_index;
3142 unsigned const_index, vertex_index;
3143 get_deref_offset(ctx, instr->variables[0],
3144 false, &vertex_index, NULL,
3145 &const_index, &indir_index);
3146 return ctx->abi->load_inputs(ctx->abi, instr->variables[0]->var->data.location,
3147 instr->variables[0]->var->data.driver_location,
3148 instr->variables[0]->var->data.location_frac, ve,
3149 vertex_index, const_index,
3150 nir2llvmtype(ctx, instr->variables[0]->var->type));
3151 }
3152
3153 for (unsigned chan = comp; chan < ve + comp; chan++) {
3154 if (indir_index) {
3155 unsigned count = glsl_count_attribute_slots(
3156 instr->variables[0]->var->type,
3157 ctx->stage == MESA_SHADER_VERTEX);
3158 count -= chan / 4;
3159 LLVMValueRef tmp_vec = ac_build_gather_values_extended(
3160 &ctx->ac, ctx->abi->inputs + idx + chan, count,
3161 stride, false, true);
3162
3163 values[chan] = LLVMBuildExtractElement(ctx->ac.builder,
3164 tmp_vec,
3165 indir_index, "");
3166 } else
3167 values[chan] = ctx->abi->inputs[idx + chan + const_index * stride];
3168 }
3169 break;
3170 case nir_var_local:
3171 for (unsigned chan = 0; chan < ve; chan++) {
3172 if (indir_index) {
3173 unsigned count = glsl_count_attribute_slots(
3174 instr->variables[0]->var->type, false);
3175 count -= chan / 4;
3176 LLVMValueRef tmp_vec = ac_build_gather_values_extended(
3177 &ctx->ac, ctx->locals + idx + chan, count,
3178 stride, true, true);
3179
3180 values[chan] = LLVMBuildExtractElement(ctx->ac.builder,
3181 tmp_vec,
3182 indir_index, "");
3183 } else {
3184 values[chan] = LLVMBuildLoad(ctx->ac.builder, ctx->locals[idx + chan + const_index * stride], "");
3185 }
3186 }
3187 break;
3188 case nir_var_shared: {
3189 LLVMValueRef address = build_gep_for_deref(ctx,
3190 instr->variables[0]);
3191 LLVMValueRef val = LLVMBuildLoad(ctx->ac.builder, address, "");
3192 return LLVMBuildBitCast(ctx->ac.builder, val,
3193 get_def_type(ctx, &instr->dest.ssa),
3194 "");
3195 }
3196 case nir_var_shader_out:
3197 if (ctx->stage == MESA_SHADER_TESS_CTRL)
3198 return load_tcs_output(ctx->nctx, instr);
3199
3200 for (unsigned chan = comp; chan < ve + comp; chan++) {
3201 if (indir_index) {
3202 unsigned count = glsl_count_attribute_slots(
3203 instr->variables[0]->var->type, false);
3204 count -= chan / 4;
3205 LLVMValueRef tmp_vec = ac_build_gather_values_extended(
3206 &ctx->ac, ctx->outputs + idx + chan, count,
3207 stride, true, true);
3208
3209 values[chan] = LLVMBuildExtractElement(ctx->ac.builder,
3210 tmp_vec,
3211 indir_index, "");
3212 } else {
3213 values[chan] = LLVMBuildLoad(ctx->ac.builder,
3214 ctx->outputs[idx + chan + const_index * stride],
3215 "");
3216 }
3217 }
3218 break;
3219 default:
3220 unreachable("unhandle variable mode");
3221 }
3222 ret = ac_build_varying_gather_values(&ctx->ac, values, ve, comp);
3223 return LLVMBuildBitCast(ctx->ac.builder, ret, get_def_type(ctx, &instr->dest.ssa), "");
3224 }
3225
3226 static void
3227 visit_store_var(struct ac_nir_context *ctx,
3228 nir_intrinsic_instr *instr)
3229 {
3230 LLVMValueRef temp_ptr, value;
3231 int idx = instr->variables[0]->var->data.driver_location;
3232 unsigned comp = instr->variables[0]->var->data.location_frac;
3233 LLVMValueRef src = ac_to_float(&ctx->ac, get_src(ctx, instr->src[0]));
3234 int writemask = instr->const_index[0] << comp;
3235 LLVMValueRef indir_index;
3236 unsigned const_index;
3237 get_deref_offset(ctx, instr->variables[0], false,
3238 NULL, NULL, &const_index, &indir_index);
3239
3240 if (get_elem_bits(&ctx->ac, LLVMTypeOf(src)) == 64) {
3241 int old_writemask = writemask;
3242
3243 src = LLVMBuildBitCast(ctx->ac.builder, src,
3244 LLVMVectorType(ctx->ac.f32, ac_get_llvm_num_components(src) * 2),
3245 "");
3246
3247 writemask = 0;
3248 for (unsigned chan = 0; chan < 4; chan++) {
3249 if (old_writemask & (1 << chan))
3250 writemask |= 3u << (2 * chan);
3251 }
3252 }
3253
3254 switch (instr->variables[0]->var->data.mode) {
3255 case nir_var_shader_out:
3256
3257 if (ctx->stage == MESA_SHADER_TESS_CTRL) {
3258 LLVMValueRef vertex_index = NULL;
3259 LLVMValueRef indir_index = NULL;
3260 unsigned const_index = 0;
3261 const unsigned location = instr->variables[0]->var->data.location;
3262 const unsigned driver_location = instr->variables[0]->var->data.driver_location;
3263 const unsigned comp = instr->variables[0]->var->data.location_frac;
3264 const bool is_patch = instr->variables[0]->var->data.patch;
3265 const bool is_compact = instr->variables[0]->var->data.compact;
3266
3267 get_deref_offset(ctx, instr->variables[0],
3268 false, NULL, is_patch ? NULL : &vertex_index,
3269 &const_index, &indir_index);
3270
3271 ctx->abi->store_tcs_outputs(ctx->abi, vertex_index, indir_index,
3272 const_index, location, driver_location,
3273 src, comp, is_patch, is_compact, writemask);
3274 return;
3275 }
3276
3277 for (unsigned chan = 0; chan < 8; chan++) {
3278 int stride = 4;
3279 if (!(writemask & (1 << chan)))
3280 continue;
3281
3282 value = ac_llvm_extract_elem(&ctx->ac, src, chan - comp);
3283
3284 if (instr->variables[0]->var->data.compact)
3285 stride = 1;
3286 if (indir_index) {
3287 unsigned count = glsl_count_attribute_slots(
3288 instr->variables[0]->var->type, false);
3289 count -= chan / 4;
3290 LLVMValueRef tmp_vec = ac_build_gather_values_extended(
3291 &ctx->ac, ctx->outputs + idx + chan, count,
3292 stride, true, true);
3293
3294 tmp_vec = LLVMBuildInsertElement(ctx->ac.builder, tmp_vec,
3295 value, indir_index, "");
3296 build_store_values_extended(&ctx->ac, ctx->outputs + idx + chan,
3297 count, stride, tmp_vec);
3298
3299 } else {
3300 temp_ptr = ctx->outputs[idx + chan + const_index * stride];
3301
3302 LLVMBuildStore(ctx->ac.builder, value, temp_ptr);
3303 }
3304 }
3305 break;
3306 case nir_var_local:
3307 for (unsigned chan = 0; chan < 8; chan++) {
3308 if (!(writemask & (1 << chan)))
3309 continue;
3310
3311 value = ac_llvm_extract_elem(&ctx->ac, src, chan);
3312 if (indir_index) {
3313 unsigned count = glsl_count_attribute_slots(
3314 instr->variables[0]->var->type, false);
3315 count -= chan / 4;
3316 LLVMValueRef tmp_vec = ac_build_gather_values_extended(
3317 &ctx->ac, ctx->locals + idx + chan, count,
3318 4, true, true);
3319
3320 tmp_vec = LLVMBuildInsertElement(ctx->ac.builder, tmp_vec,
3321 value, indir_index, "");
3322 build_store_values_extended(&ctx->ac, ctx->locals + idx + chan,
3323 count, 4, tmp_vec);
3324 } else {
3325 temp_ptr = ctx->locals[idx + chan + const_index * 4];
3326
3327 LLVMBuildStore(ctx->ac.builder, value, temp_ptr);
3328 }
3329 }
3330 break;
3331 case nir_var_shared: {
3332 int writemask = instr->const_index[0];
3333 LLVMValueRef address = build_gep_for_deref(ctx,
3334 instr->variables[0]);
3335 LLVMValueRef val = get_src(ctx, instr->src[0]);
3336 unsigned components =
3337 glsl_get_vector_elements(
3338 nir_deref_tail(&instr->variables[0]->deref)->type);
3339 if (writemask == (1 << components) - 1) {
3340 val = LLVMBuildBitCast(
3341 ctx->ac.builder, val,
3342 LLVMGetElementType(LLVMTypeOf(address)), "");
3343 LLVMBuildStore(ctx->ac.builder, val, address);
3344 } else {
3345 for (unsigned chan = 0; chan < 4; chan++) {
3346 if (!(writemask & (1 << chan)))
3347 continue;
3348 LLVMValueRef ptr =
3349 LLVMBuildStructGEP(ctx->ac.builder,
3350 address, chan, "");
3351 LLVMValueRef src = ac_llvm_extract_elem(&ctx->ac, val,
3352 chan);
3353 src = LLVMBuildBitCast(
3354 ctx->ac.builder, src,
3355 LLVMGetElementType(LLVMTypeOf(ptr)), "");
3356 LLVMBuildStore(ctx->ac.builder, src, ptr);
3357 }
3358 }
3359 break;
3360 }
3361 default:
3362 break;
3363 }
3364 }
3365
3366 static int image_type_to_components_count(enum glsl_sampler_dim dim, bool array)
3367 {
3368 switch (dim) {
3369 case GLSL_SAMPLER_DIM_BUF:
3370 return 1;
3371 case GLSL_SAMPLER_DIM_1D:
3372 return array ? 2 : 1;
3373 case GLSL_SAMPLER_DIM_2D:
3374 return array ? 3 : 2;
3375 case GLSL_SAMPLER_DIM_MS:
3376 return array ? 4 : 3;
3377 case GLSL_SAMPLER_DIM_3D:
3378 case GLSL_SAMPLER_DIM_CUBE:
3379 return 3;
3380 case GLSL_SAMPLER_DIM_RECT:
3381 case GLSL_SAMPLER_DIM_SUBPASS:
3382 return 2;
3383 case GLSL_SAMPLER_DIM_SUBPASS_MS:
3384 return 3;
3385 default:
3386 break;
3387 }
3388 return 0;
3389 }
3390
3391
3392
3393 /* Adjust the sample index according to FMASK.
3394 *
3395 * For uncompressed MSAA surfaces, FMASK should return 0x76543210,
3396 * which is the identity mapping. Each nibble says which physical sample
3397 * should be fetched to get that sample.
3398 *
3399 * For example, 0x11111100 means there are only 2 samples stored and
3400 * the second sample covers 3/4 of the pixel. When reading samples 0
3401 * and 1, return physical sample 0 (determined by the first two 0s
3402 * in FMASK), otherwise return physical sample 1.
3403 *
3404 * The sample index should be adjusted as follows:
3405 * sample_index = (fmask >> (sample_index * 4)) & 0xF;
3406 */
3407 static LLVMValueRef adjust_sample_index_using_fmask(struct ac_llvm_context *ctx,
3408 LLVMValueRef coord_x, LLVMValueRef coord_y,
3409 LLVMValueRef coord_z,
3410 LLVMValueRef sample_index,
3411 LLVMValueRef fmask_desc_ptr)
3412 {
3413 LLVMValueRef fmask_load_address[4];
3414 LLVMValueRef res;
3415
3416 fmask_load_address[0] = coord_x;
3417 fmask_load_address[1] = coord_y;
3418 if (coord_z) {
3419 fmask_load_address[2] = coord_z;
3420 fmask_load_address[3] = LLVMGetUndef(ctx->i32);
3421 }
3422
3423 struct ac_image_args args = {0};
3424
3425 args.opcode = ac_image_load;
3426 args.da = coord_z ? true : false;
3427 args.resource = fmask_desc_ptr;
3428 args.dmask = 0xf;
3429 args.addr = ac_build_gather_values(ctx, fmask_load_address, coord_z ? 4 : 2);
3430
3431 res = ac_build_image_opcode(ctx, &args);
3432
3433 res = ac_to_integer(ctx, res);
3434 LLVMValueRef four = LLVMConstInt(ctx->i32, 4, false);
3435 LLVMValueRef F = LLVMConstInt(ctx->i32, 0xf, false);
3436
3437 LLVMValueRef fmask = LLVMBuildExtractElement(ctx->builder,
3438 res,
3439 ctx->i32_0, "");
3440
3441 LLVMValueRef sample_index4 =
3442 LLVMBuildMul(ctx->builder, sample_index, four, "");
3443 LLVMValueRef shifted_fmask =
3444 LLVMBuildLShr(ctx->builder, fmask, sample_index4, "");
3445 LLVMValueRef final_sample =
3446 LLVMBuildAnd(ctx->builder, shifted_fmask, F, "");
3447
3448 /* Don't rewrite the sample index if WORD1.DATA_FORMAT of the FMASK
3449 * resource descriptor is 0 (invalid),
3450 */
3451 LLVMValueRef fmask_desc =
3452 LLVMBuildBitCast(ctx->builder, fmask_desc_ptr,
3453 ctx->v8i32, "");
3454
3455 LLVMValueRef fmask_word1 =
3456 LLVMBuildExtractElement(ctx->builder, fmask_desc,
3457 ctx->i32_1, "");
3458
3459 LLVMValueRef word1_is_nonzero =
3460 LLVMBuildICmp(ctx->builder, LLVMIntNE,
3461 fmask_word1, ctx->i32_0, "");
3462
3463 /* Replace the MSAA sample index. */
3464 sample_index =
3465 LLVMBuildSelect(ctx->builder, word1_is_nonzero,
3466 final_sample, sample_index, "");
3467 return sample_index;
3468 }
3469
3470 static LLVMValueRef get_image_coords(struct ac_nir_context *ctx,
3471 const nir_intrinsic_instr *instr)
3472 {
3473 const struct glsl_type *type = instr->variables[0]->var->type;
3474 if(instr->variables[0]->deref.child)
3475 type = instr->variables[0]->deref.child->type;
3476
3477 LLVMValueRef src0 = get_src(ctx, instr->src[0]);
3478 LLVMValueRef coords[4];
3479 LLVMValueRef masks[] = {
3480 LLVMConstInt(ctx->ac.i32, 0, false), LLVMConstInt(ctx->ac.i32, 1, false),
3481 LLVMConstInt(ctx->ac.i32, 2, false), LLVMConstInt(ctx->ac.i32, 3, false),
3482 };
3483 LLVMValueRef res;
3484 LLVMValueRef sample_index = ac_llvm_extract_elem(&ctx->ac, get_src(ctx, instr->src[1]), 0);
3485
3486 int count;
3487 enum glsl_sampler_dim dim = glsl_get_sampler_dim(type);
3488 bool is_array = glsl_sampler_type_is_array(type);
3489 bool add_frag_pos = (dim == GLSL_SAMPLER_DIM_SUBPASS ||
3490 dim == GLSL_SAMPLER_DIM_SUBPASS_MS);
3491 bool is_ms = (dim == GLSL_SAMPLER_DIM_MS ||
3492 dim == GLSL_SAMPLER_DIM_SUBPASS_MS);
3493 bool gfx9_1d = ctx->ac.chip_class >= GFX9 && dim == GLSL_SAMPLER_DIM_1D;
3494 count = image_type_to_components_count(dim, is_array);
3495
3496 if (is_ms) {
3497 LLVMValueRef fmask_load_address[3];
3498 int chan;
3499
3500 fmask_load_address[0] = LLVMBuildExtractElement(ctx->ac.builder, src0, masks[0], "");
3501 fmask_load_address[1] = LLVMBuildExtractElement(ctx->ac.builder, src0, masks[1], "");
3502 if (is_array)
3503 fmask_load_address[2] = LLVMBuildExtractElement(ctx->ac.builder, src0, masks[2], "");
3504 else
3505 fmask_load_address[2] = NULL;
3506 if (add_frag_pos) {
3507 for (chan = 0; chan < 2; ++chan)
3508 fmask_load_address[chan] =
3509 LLVMBuildAdd(ctx->ac.builder, fmask_load_address[chan],
3510 LLVMBuildFPToUI(ctx->ac.builder, ctx->abi->frag_pos[chan],
3511 ctx->ac.i32, ""), "");
3512 fmask_load_address[2] = ac_to_integer(&ctx->ac, ctx->abi->inputs[radeon_llvm_reg_index_soa(VARYING_SLOT_LAYER, 0)]);
3513 }
3514 sample_index = adjust_sample_index_using_fmask(&ctx->ac,
3515 fmask_load_address[0],
3516 fmask_load_address[1],
3517 fmask_load_address[2],
3518 sample_index,
3519 get_sampler_desc(ctx, instr->variables[0], AC_DESC_FMASK, NULL, true, false));
3520 }
3521 if (count == 1 && !gfx9_1d) {
3522 if (instr->src[0].ssa->num_components)
3523 res = LLVMBuildExtractElement(ctx->ac.builder, src0, masks[0], "");
3524 else
3525 res = src0;
3526 } else {
3527 int chan;
3528 if (is_ms)
3529 count--;
3530 for (chan = 0; chan < count; ++chan) {
3531 coords[chan] = ac_llvm_extract_elem(&ctx->ac, src0, chan);
3532 }
3533 if (add_frag_pos) {
3534 for (chan = 0; chan < 2; ++chan)
3535 coords[chan] = LLVMBuildAdd(ctx->ac.builder, coords[chan], LLVMBuildFPToUI(ctx->ac.builder, ctx->abi->frag_pos[chan],
3536 ctx->ac.i32, ""), "");
3537 coords[2] = ac_to_integer(&ctx->ac, ctx->abi->inputs[radeon_llvm_reg_index_soa(VARYING_SLOT_LAYER, 0)]);
3538 count++;
3539 }
3540
3541 if (gfx9_1d) {
3542 if (is_array) {
3543 coords[2] = coords[1];
3544 coords[1] = ctx->ac.i32_0;
3545 } else
3546 coords[1] = ctx->ac.i32_0;
3547 count++;
3548 }
3549
3550 if (is_ms) {
3551 coords[count] = sample_index;
3552 count++;
3553 }
3554
3555 if (count == 3) {
3556 coords[3] = LLVMGetUndef(ctx->ac.i32);
3557 count = 4;
3558 }
3559 res = ac_build_gather_values(&ctx->ac, coords, count);
3560 }
3561 return res;
3562 }
3563
3564 static LLVMValueRef visit_image_load(struct ac_nir_context *ctx,
3565 const nir_intrinsic_instr *instr)
3566 {
3567 LLVMValueRef params[7];
3568 LLVMValueRef res;
3569 char intrinsic_name[64];
3570 const nir_variable *var = instr->variables[0]->var;
3571 const struct glsl_type *type = var->type;
3572
3573 if(instr->variables[0]->deref.child)
3574 type = instr->variables[0]->deref.child->type;
3575
3576 type = glsl_without_array(type);
3577 if (glsl_get_sampler_dim(type) == GLSL_SAMPLER_DIM_BUF) {
3578 params[0] = get_sampler_desc(ctx, instr->variables[0], AC_DESC_BUFFER, NULL, true, false);
3579 params[1] = LLVMBuildExtractElement(ctx->ac.builder, get_src(ctx, instr->src[0]),
3580 ctx->ac.i32_0, ""); /* vindex */
3581 params[2] = ctx->ac.i32_0; /* voffset */
3582 params[3] = ctx->ac.i1false; /* glc */
3583 params[4] = ctx->ac.i1false; /* slc */
3584 res = ac_build_intrinsic(&ctx->ac, "llvm.amdgcn.buffer.load.format.v4f32", ctx->ac.v4f32,
3585 params, 5, 0);
3586
3587 res = trim_vector(&ctx->ac, res, instr->dest.ssa.num_components);
3588 res = ac_to_integer(&ctx->ac, res);
3589 } else {
3590 bool is_da = glsl_sampler_type_is_array(type) ||
3591 glsl_get_sampler_dim(type) == GLSL_SAMPLER_DIM_CUBE ||
3592 glsl_get_sampler_dim(type) == GLSL_SAMPLER_DIM_SUBPASS ||
3593 glsl_get_sampler_dim(type) == GLSL_SAMPLER_DIM_SUBPASS_MS;
3594 LLVMValueRef da = is_da ? ctx->ac.i1true : ctx->ac.i1false;
3595 LLVMValueRef glc = ctx->ac.i1false;
3596 LLVMValueRef slc = ctx->ac.i1false;
3597
3598 params[0] = get_image_coords(ctx, instr);
3599 params[1] = get_sampler_desc(ctx, instr->variables[0], AC_DESC_IMAGE, NULL, true, false);
3600 params[2] = LLVMConstInt(ctx->ac.i32, 15, false); /* dmask */
3601 if (HAVE_LLVM <= 0x0309) {
3602 params[3] = ctx->ac.i1false; /* r128 */
3603 params[4] = da;
3604 params[5] = glc;
3605 params[6] = slc;
3606 } else {
3607 LLVMValueRef lwe = ctx->ac.i1false;
3608 params[3] = glc;
3609 params[4] = slc;
3610 params[5] = lwe;
3611 params[6] = da;
3612 }
3613
3614 ac_get_image_intr_name("llvm.amdgcn.image.load",
3615 ctx->ac.v4f32, /* vdata */
3616 LLVMTypeOf(params[0]), /* coords */
3617 LLVMTypeOf(params[1]), /* rsrc */
3618 intrinsic_name, sizeof(intrinsic_name));
3619
3620 res = ac_build_intrinsic(&ctx->ac, intrinsic_name, ctx->ac.v4f32,
3621 params, 7, AC_FUNC_ATTR_READONLY);
3622 }
3623 return ac_to_integer(&ctx->ac, res);
3624 }
3625
3626 static void visit_image_store(struct ac_nir_context *ctx,
3627 nir_intrinsic_instr *instr)
3628 {
3629 LLVMValueRef params[8];
3630 char intrinsic_name[64];
3631 const nir_variable *var = instr->variables[0]->var;
3632 const struct glsl_type *type = glsl_without_array(var->type);
3633 LLVMValueRef glc = ctx->ac.i1false;
3634 bool force_glc = ctx->ac.chip_class == SI;
3635 if (force_glc)
3636 glc = ctx->ac.i1true;
3637
3638 if (glsl_get_sampler_dim(type) == GLSL_SAMPLER_DIM_BUF) {
3639 params[0] = ac_to_float(&ctx->ac, get_src(ctx, instr->src[2])); /* data */
3640 params[1] = get_sampler_desc(ctx, instr->variables[0], AC_DESC_BUFFER, NULL, true, true);
3641 params[2] = LLVMBuildExtractElement(ctx->ac.builder, get_src(ctx, instr->src[0]),
3642 ctx->ac.i32_0, ""); /* vindex */
3643 params[3] = ctx->ac.i32_0; /* voffset */
3644 params[4] = glc; /* glc */
3645 params[5] = ctx->ac.i1false; /* slc */
3646 ac_build_intrinsic(&ctx->ac, "llvm.amdgcn.buffer.store.format.v4f32", ctx->ac.voidt,
3647 params, 6, 0);
3648 } else {
3649 bool is_da = glsl_sampler_type_is_array(type) ||
3650 glsl_get_sampler_dim(type) == GLSL_SAMPLER_DIM_CUBE;
3651 LLVMValueRef da = is_da ? ctx->ac.i1true : ctx->ac.i1false;
3652 LLVMValueRef slc = ctx->ac.i1false;
3653
3654 params[0] = ac_to_float(&ctx->ac, get_src(ctx, instr->src[2]));
3655 params[1] = get_image_coords(ctx, instr); /* coords */
3656 params[2] = get_sampler_desc(ctx, instr->variables[0], AC_DESC_IMAGE, NULL, true, true);
3657 params[3] = LLVMConstInt(ctx->ac.i32, 15, false); /* dmask */
3658 if (HAVE_LLVM <= 0x0309) {
3659 params[4] = ctx->ac.i1false; /* r128 */
3660 params[5] = da;
3661 params[6] = glc;
3662 params[7] = slc;
3663 } else {
3664 LLVMValueRef lwe = ctx->ac.i1false;
3665 params[4] = glc;
3666 params[5] = slc;
3667 params[6] = lwe;
3668 params[7] = da;
3669 }
3670
3671 ac_get_image_intr_name("llvm.amdgcn.image.store",
3672 LLVMTypeOf(params[0]), /* vdata */
3673 LLVMTypeOf(params[1]), /* coords */
3674 LLVMTypeOf(params[2]), /* rsrc */
3675 intrinsic_name, sizeof(intrinsic_name));
3676
3677 ac_build_intrinsic(&ctx->ac, intrinsic_name, ctx->ac.voidt,
3678 params, 8, 0);
3679 }
3680
3681 }
3682
3683 static LLVMValueRef visit_image_atomic(struct ac_nir_context *ctx,
3684 const nir_intrinsic_instr *instr)
3685 {
3686 LLVMValueRef params[7];
3687 int param_count = 0;
3688 const nir_variable *var = instr->variables[0]->var;
3689
3690 const char *atomic_name;
3691 char intrinsic_name[41];
3692 const struct glsl_type *type = glsl_without_array(var->type);
3693 MAYBE_UNUSED int length;
3694
3695 bool is_unsigned = glsl_get_sampler_result_type(type) == GLSL_TYPE_UINT;
3696
3697 switch (instr->intrinsic) {
3698 case nir_intrinsic_image_atomic_add:
3699 atomic_name = "add";
3700 break;
3701 case nir_intrinsic_image_atomic_min:
3702 atomic_name = is_unsigned ? "umin" : "smin";
3703 break;
3704 case nir_intrinsic_image_atomic_max:
3705 atomic_name = is_unsigned ? "umax" : "smax";
3706 break;
3707 case nir_intrinsic_image_atomic_and:
3708 atomic_name = "and";
3709 break;
3710 case nir_intrinsic_image_atomic_or:
3711 atomic_name = "or";
3712 break;
3713 case nir_intrinsic_image_atomic_xor:
3714 atomic_name = "xor";
3715 break;
3716 case nir_intrinsic_image_atomic_exchange:
3717 atomic_name = "swap";
3718 break;
3719 case nir_intrinsic_image_atomic_comp_swap:
3720 atomic_name = "cmpswap";
3721 break;
3722 default:
3723 abort();
3724 }
3725
3726 if (instr->intrinsic == nir_intrinsic_image_atomic_comp_swap)
3727 params[param_count++] = get_src(ctx, instr->src[3]);
3728 params[param_count++] = get_src(ctx, instr->src[2]);
3729
3730 if (glsl_get_sampler_dim(type) == GLSL_SAMPLER_DIM_BUF) {
3731 params[param_count++] = get_sampler_desc(ctx, instr->variables[0], AC_DESC_BUFFER,
3732 NULL, true, true);
3733 params[param_count++] = LLVMBuildExtractElement(ctx->ac.builder, get_src(ctx, instr->src[0]),
3734 ctx->ac.i32_0, ""); /* vindex */
3735 params[param_count++] = ctx->ac.i32_0; /* voffset */
3736 params[param_count++] = ctx->ac.i1false; /* slc */
3737
3738 length = snprintf(intrinsic_name, sizeof(intrinsic_name),
3739 "llvm.amdgcn.buffer.atomic.%s", atomic_name);
3740 } else {
3741 char coords_type[8];
3742
3743 bool da = glsl_sampler_type_is_array(type) ||
3744 glsl_get_sampler_dim(type) == GLSL_SAMPLER_DIM_CUBE;
3745
3746 LLVMValueRef coords = params[param_count++] = get_image_coords(ctx, instr);
3747 params[param_count++] = get_sampler_desc(ctx, instr->variables[0], AC_DESC_IMAGE,
3748 NULL, true, true);
3749 params[param_count++] = ctx->ac.i1false; /* r128 */
3750 params[param_count++] = da ? ctx->ac.i1true : ctx->ac.i1false; /* da */
3751 params[param_count++] = ctx->ac.i1false; /* slc */
3752
3753 build_int_type_name(LLVMTypeOf(coords),
3754 coords_type, sizeof(coords_type));
3755
3756 length = snprintf(intrinsic_name, sizeof(intrinsic_name),
3757 "llvm.amdgcn.image.atomic.%s.%s", atomic_name, coords_type);
3758 }
3759
3760 assert(length < sizeof(intrinsic_name));
3761 return ac_build_intrinsic(&ctx->ac, intrinsic_name, ctx->ac.i32, params, param_count, 0);
3762 }
3763
3764 static LLVMValueRef visit_image_size(struct ac_nir_context *ctx,
3765 const nir_intrinsic_instr *instr)
3766 {
3767 LLVMValueRef res;
3768 const nir_variable *var = instr->variables[0]->var;
3769 const struct glsl_type *type = instr->variables[0]->var->type;
3770 bool da = glsl_sampler_type_is_array(var->type) ||
3771 glsl_get_sampler_dim(var->type) == GLSL_SAMPLER_DIM_CUBE;
3772 if(instr->variables[0]->deref.child)
3773 type = instr->variables[0]->deref.child->type;
3774
3775 if (glsl_get_sampler_dim(type) == GLSL_SAMPLER_DIM_BUF)
3776 return get_buffer_size(ctx,
3777 get_sampler_desc(ctx, instr->variables[0],
3778 AC_DESC_BUFFER, NULL, true, false), true);
3779
3780 struct ac_image_args args = { 0 };
3781
3782 args.da = da;
3783 args.dmask = 0xf;
3784 args.resource = get_sampler_desc(ctx, instr->variables[0], AC_DESC_IMAGE, NULL, true, false);
3785 args.opcode = ac_image_get_resinfo;
3786 args.addr = ctx->ac.i32_0;
3787
3788 res = ac_build_image_opcode(&ctx->ac, &args);
3789
3790 LLVMValueRef two = LLVMConstInt(ctx->ac.i32, 2, false);
3791
3792 if (glsl_get_sampler_dim(type) == GLSL_SAMPLER_DIM_CUBE &&
3793 glsl_sampler_type_is_array(type)) {
3794 LLVMValueRef six = LLVMConstInt(ctx->ac.i32, 6, false);
3795 LLVMValueRef z = LLVMBuildExtractElement(ctx->ac.builder, res, two, "");
3796 z = LLVMBuildSDiv(ctx->ac.builder, z, six, "");
3797 res = LLVMBuildInsertElement(ctx->ac.builder, res, z, two, "");
3798 }
3799 if (ctx->ac.chip_class >= GFX9 &&
3800 glsl_get_sampler_dim(type) == GLSL_SAMPLER_DIM_1D &&
3801 glsl_sampler_type_is_array(type)) {
3802 LLVMValueRef layers = LLVMBuildExtractElement(ctx->ac.builder, res, two, "");
3803 res = LLVMBuildInsertElement(ctx->ac.builder, res, layers,
3804 ctx->ac.i32_1, "");
3805
3806 }
3807 return res;
3808 }
3809
3810 #define NOOP_WAITCNT 0xf7f
3811 #define LGKM_CNT 0x07f
3812 #define VM_CNT 0xf70
3813
3814 static void emit_membar(struct nir_to_llvm_context *ctx,
3815 const nir_intrinsic_instr *instr)
3816 {
3817 unsigned waitcnt = NOOP_WAITCNT;
3818
3819 switch (instr->intrinsic) {
3820 case nir_intrinsic_memory_barrier:
3821 case nir_intrinsic_group_memory_barrier:
3822 waitcnt &= VM_CNT & LGKM_CNT;
3823 break;
3824 case nir_intrinsic_memory_barrier_atomic_counter:
3825 case nir_intrinsic_memory_barrier_buffer:
3826 case nir_intrinsic_memory_barrier_image:
3827 waitcnt &= VM_CNT;
3828 break;
3829 case nir_intrinsic_memory_barrier_shared:
3830 waitcnt &= LGKM_CNT;
3831 break;
3832 default:
3833 break;
3834 }
3835 if (waitcnt != NOOP_WAITCNT)
3836 ac_build_waitcnt(&ctx->ac, waitcnt);
3837 }
3838
3839 static void emit_barrier(struct ac_llvm_context *ac, gl_shader_stage stage)
3840 {
3841 /* SI only (thanks to a hw bug workaround):
3842 * The real barrier instruction isn’t needed, because an entire patch
3843 * always fits into a single wave.
3844 */
3845 if (ac->chip_class == SI && stage == MESA_SHADER_TESS_CTRL) {
3846 ac_build_waitcnt(ac, LGKM_CNT & VM_CNT);
3847 return;
3848 }
3849 ac_build_intrinsic(ac, "llvm.amdgcn.s.barrier",
3850 ac->voidt, NULL, 0, AC_FUNC_ATTR_CONVERGENT);
3851 }
3852
3853 static void emit_discard_if(struct ac_nir_context *ctx,
3854 const nir_intrinsic_instr *instr)
3855 {
3856 LLVMValueRef cond;
3857
3858 cond = LLVMBuildICmp(ctx->ac.builder, LLVMIntEQ,
3859 get_src(ctx, instr->src[0]),
3860 ctx->ac.i32_0, "");
3861 ac_build_kill_if_false(&ctx->ac, cond);
3862 }
3863
3864 static LLVMValueRef
3865 visit_load_local_invocation_index(struct nir_to_llvm_context *ctx)
3866 {
3867 LLVMValueRef result;
3868 LLVMValueRef thread_id = ac_get_thread_id(&ctx->ac);
3869 result = LLVMBuildAnd(ctx->builder, ctx->tg_size,
3870 LLVMConstInt(ctx->ac.i32, 0xfc0, false), "");
3871
3872 return LLVMBuildAdd(ctx->builder, result, thread_id, "");
3873 }
3874
3875 static LLVMValueRef visit_var_atomic(struct nir_to_llvm_context *ctx,
3876 const nir_intrinsic_instr *instr)
3877 {
3878 LLVMValueRef ptr, result;
3879 LLVMValueRef src = get_src(ctx->nir, instr->src[0]);
3880 ptr = build_gep_for_deref(ctx->nir, instr->variables[0]);
3881
3882 if (instr->intrinsic == nir_intrinsic_var_atomic_comp_swap) {
3883 LLVMValueRef src1 = get_src(ctx->nir, instr->src[1]);
3884 result = LLVMBuildAtomicCmpXchg(ctx->builder,
3885 ptr, src, src1,
3886 LLVMAtomicOrderingSequentiallyConsistent,
3887 LLVMAtomicOrderingSequentiallyConsistent,
3888 false);
3889 } else {
3890 LLVMAtomicRMWBinOp op;
3891 switch (instr->intrinsic) {
3892 case nir_intrinsic_var_atomic_add:
3893 op = LLVMAtomicRMWBinOpAdd;
3894 break;
3895 case nir_intrinsic_var_atomic_umin:
3896 op = LLVMAtomicRMWBinOpUMin;
3897 break;
3898 case nir_intrinsic_var_atomic_umax:
3899 op = LLVMAtomicRMWBinOpUMax;
3900 break;
3901 case nir_intrinsic_var_atomic_imin:
3902 op = LLVMAtomicRMWBinOpMin;
3903 break;
3904 case nir_intrinsic_var_atomic_imax:
3905 op = LLVMAtomicRMWBinOpMax;
3906 break;
3907 case nir_intrinsic_var_atomic_and:
3908 op = LLVMAtomicRMWBinOpAnd;
3909 break;
3910 case nir_intrinsic_var_atomic_or:
3911 op = LLVMAtomicRMWBinOpOr;
3912 break;
3913 case nir_intrinsic_var_atomic_xor:
3914 op = LLVMAtomicRMWBinOpXor;
3915 break;
3916 case nir_intrinsic_var_atomic_exchange:
3917 op = LLVMAtomicRMWBinOpXchg;
3918 break;
3919 default:
3920 return NULL;
3921 }
3922
3923 result = LLVMBuildAtomicRMW(ctx->builder, op, ptr, ac_to_integer(&ctx->ac, src),
3924 LLVMAtomicOrderingSequentiallyConsistent,
3925 false);
3926 }
3927 return result;
3928 }
3929
3930 #define INTERP_CENTER 0
3931 #define INTERP_CENTROID 1
3932 #define INTERP_SAMPLE 2
3933
3934 static LLVMValueRef lookup_interp_param(struct nir_to_llvm_context *ctx,
3935 enum glsl_interp_mode interp, unsigned location)
3936 {
3937 switch (interp) {
3938 case INTERP_MODE_FLAT:
3939 default:
3940 return NULL;
3941 case INTERP_MODE_SMOOTH:
3942 case INTERP_MODE_NONE:
3943 if (location == INTERP_CENTER)
3944 return ctx->persp_center;
3945 else if (location == INTERP_CENTROID)
3946 return ctx->persp_centroid;
3947 else if (location == INTERP_SAMPLE)
3948 return ctx->persp_sample;
3949 break;
3950 case INTERP_MODE_NOPERSPECTIVE:
3951 if (location == INTERP_CENTER)
3952 return ctx->linear_center;
3953 else if (location == INTERP_CENTROID)
3954 return ctx->linear_centroid;
3955 else if (location == INTERP_SAMPLE)
3956 return ctx->linear_sample;
3957 break;
3958 }
3959 return NULL;
3960 }
3961
3962 static LLVMValueRef load_sample_position(struct nir_to_llvm_context *ctx,
3963 LLVMValueRef sample_id)
3964 {
3965 LLVMValueRef result;
3966 LLVMValueRef ptr = ac_build_gep0(&ctx->ac, ctx->ring_offsets, LLVMConstInt(ctx->ac.i32, RING_PS_SAMPLE_POSITIONS, false));
3967
3968 ptr = LLVMBuildBitCast(ctx->builder, ptr,
3969 const_array(ctx->ac.v2f32, 64), "");
3970
3971 sample_id = LLVMBuildAdd(ctx->builder, sample_id, ctx->sample_pos_offset, "");
3972 result = ac_build_load_invariant(&ctx->ac, ptr, sample_id);
3973
3974 return result;
3975 }
3976
3977 static LLVMValueRef load_sample_pos(struct ac_nir_context *ctx)
3978 {
3979 LLVMValueRef values[2];
3980
3981 values[0] = emit_ffract(&ctx->ac, ctx->abi->frag_pos[0]);
3982 values[1] = emit_ffract(&ctx->ac, ctx->abi->frag_pos[1]);
3983 return ac_build_gather_values(&ctx->ac, values, 2);
3984 }
3985
3986 static LLVMValueRef visit_interp(struct nir_to_llvm_context *ctx,
3987 const nir_intrinsic_instr *instr)
3988 {
3989 LLVMValueRef result[4];
3990 LLVMValueRef interp_param, attr_number;
3991 unsigned location;
3992 unsigned chan;
3993 LLVMValueRef src_c0 = NULL;
3994 LLVMValueRef src_c1 = NULL;
3995 LLVMValueRef src0 = NULL;
3996 int input_index = instr->variables[0]->var->data.location - VARYING_SLOT_VAR0;
3997 switch (instr->intrinsic) {
3998 case nir_intrinsic_interp_var_at_centroid:
3999 location = INTERP_CENTROID;
4000 break;
4001 case nir_intrinsic_interp_var_at_sample:
4002 case nir_intrinsic_interp_var_at_offset:
4003 location = INTERP_CENTER;
4004 src0 = get_src(ctx->nir, instr->src[0]);
4005 break;
4006 default:
4007 break;
4008 }
4009
4010 if (instr->intrinsic == nir_intrinsic_interp_var_at_offset) {
4011 src_c0 = ac_to_float(&ctx->ac, LLVMBuildExtractElement(ctx->builder, src0, ctx->ac.i32_0, ""));
4012 src_c1 = ac_to_float(&ctx->ac, LLVMBuildExtractElement(ctx->builder, src0, ctx->ac.i32_1, ""));
4013 } else if (instr->intrinsic == nir_intrinsic_interp_var_at_sample) {
4014 LLVMValueRef sample_position;
4015 LLVMValueRef halfval = LLVMConstReal(ctx->ac.f32, 0.5f);
4016
4017 /* fetch sample ID */
4018 sample_position = load_sample_position(ctx, src0);
4019
4020 src_c0 = LLVMBuildExtractElement(ctx->builder, sample_position, ctx->ac.i32_0, "");
4021 src_c0 = LLVMBuildFSub(ctx->builder, src_c0, halfval, "");
4022 src_c1 = LLVMBuildExtractElement(ctx->builder, sample_position, ctx->ac.i32_1, "");
4023 src_c1 = LLVMBuildFSub(ctx->builder, src_c1, halfval, "");
4024 }
4025 interp_param = lookup_interp_param(ctx, instr->variables[0]->var->data.interpolation, location);
4026 attr_number = LLVMConstInt(ctx->ac.i32, input_index, false);
4027
4028 if (location == INTERP_CENTER) {
4029 LLVMValueRef ij_out[2];
4030 LLVMValueRef ddxy_out = emit_ddxy_interp(ctx->nir, interp_param);
4031
4032 /*
4033 * take the I then J parameters, and the DDX/Y for it, and
4034 * calculate the IJ inputs for the interpolator.
4035 * temp1 = ddx * offset/sample.x + I;
4036 * interp_param.I = ddy * offset/sample.y + temp1;
4037 * temp1 = ddx * offset/sample.x + J;
4038 * interp_param.J = ddy * offset/sample.y + temp1;
4039 */
4040 for (unsigned i = 0; i < 2; i++) {
4041 LLVMValueRef ix_ll = LLVMConstInt(ctx->ac.i32, i, false);
4042 LLVMValueRef iy_ll = LLVMConstInt(ctx->ac.i32, i + 2, false);
4043 LLVMValueRef ddx_el = LLVMBuildExtractElement(ctx->builder,
4044 ddxy_out, ix_ll, "");
4045 LLVMValueRef ddy_el = LLVMBuildExtractElement(ctx->builder,
4046 ddxy_out, iy_ll, "");
4047 LLVMValueRef interp_el = LLVMBuildExtractElement(ctx->builder,
4048 interp_param, ix_ll, "");
4049 LLVMValueRef temp1, temp2;
4050
4051 interp_el = LLVMBuildBitCast(ctx->builder, interp_el,
4052 ctx->ac.f32, "");
4053
4054 temp1 = LLVMBuildFMul(ctx->builder, ddx_el, src_c0, "");
4055 temp1 = LLVMBuildFAdd(ctx->builder, temp1, interp_el, "");
4056
4057 temp2 = LLVMBuildFMul(ctx->builder, ddy_el, src_c1, "");
4058 temp2 = LLVMBuildFAdd(ctx->builder, temp2, temp1, "");
4059
4060 ij_out[i] = LLVMBuildBitCast(ctx->builder,
4061 temp2, ctx->ac.i32, "");
4062 }
4063 interp_param = ac_build_gather_values(&ctx->ac, ij_out, 2);
4064
4065 }
4066
4067 for (chan = 0; chan < 4; chan++) {
4068 LLVMValueRef llvm_chan = LLVMConstInt(ctx->ac.i32, chan, false);
4069
4070 if (interp_param) {
4071 interp_param = LLVMBuildBitCast(ctx->builder,
4072 interp_param, ctx->ac.v2f32, "");
4073 LLVMValueRef i = LLVMBuildExtractElement(
4074 ctx->builder, interp_param, ctx->ac.i32_0, "");
4075 LLVMValueRef j = LLVMBuildExtractElement(
4076 ctx->builder, interp_param, ctx->ac.i32_1, "");
4077
4078 result[chan] = ac_build_fs_interp(&ctx->ac,
4079 llvm_chan, attr_number,
4080 ctx->prim_mask, i, j);
4081 } else {
4082 result[chan] = ac_build_fs_interp_mov(&ctx->ac,
4083 LLVMConstInt(ctx->ac.i32, 2, false),
4084 llvm_chan, attr_number,
4085 ctx->prim_mask);
4086 }
4087 }
4088 return ac_build_varying_gather_values(&ctx->ac, result, instr->num_components,
4089 instr->variables[0]->var->data.location_frac);
4090 }
4091
4092 static void
4093 visit_emit_vertex(struct ac_shader_abi *abi, unsigned stream, LLVMValueRef *addrs)
4094 {
4095 LLVMValueRef gs_next_vertex;
4096 LLVMValueRef can_emit;
4097 int idx;
4098 struct nir_to_llvm_context *ctx = nir_to_llvm_context_from_abi(abi);
4099
4100 /* Write vertex attribute values to GSVS ring */
4101 gs_next_vertex = LLVMBuildLoad(ctx->builder,
4102 ctx->gs_next_vertex,
4103 "");
4104
4105 /* If this thread has already emitted the declared maximum number of
4106 * vertices, kill it: excessive vertex emissions are not supposed to
4107 * have any effect, and GS threads have no externally observable
4108 * effects other than emitting vertices.
4109 */
4110 can_emit = LLVMBuildICmp(ctx->builder, LLVMIntULT, gs_next_vertex,
4111 LLVMConstInt(ctx->ac.i32, ctx->gs_max_out_vertices, false), "");
4112 ac_build_kill_if_false(&ctx->ac, can_emit);
4113
4114 /* loop num outputs */
4115 idx = 0;
4116 for (unsigned i = 0; i < RADEON_LLVM_MAX_OUTPUTS; ++i) {
4117 LLVMValueRef *out_ptr = &addrs[i * 4];
4118 int length = 4;
4119 int slot = idx;
4120 int slot_inc = 1;
4121
4122 if (!(ctx->output_mask & (1ull << i)))
4123 continue;
4124
4125 if (i == VARYING_SLOT_CLIP_DIST0) {
4126 /* pack clip and cull into a single set of slots */
4127 length = ctx->num_output_clips + ctx->num_output_culls;
4128 if (length > 4)
4129 slot_inc = 2;
4130 }
4131 for (unsigned j = 0; j < length; j++) {
4132 LLVMValueRef out_val = LLVMBuildLoad(ctx->builder,
4133 out_ptr[j], "");
4134 LLVMValueRef voffset = LLVMConstInt(ctx->ac.i32, (slot * 4 + j) * ctx->gs_max_out_vertices, false);
4135 voffset = LLVMBuildAdd(ctx->builder, voffset, gs_next_vertex, "");
4136 voffset = LLVMBuildMul(ctx->builder, voffset, LLVMConstInt(ctx->ac.i32, 4, false), "");
4137
4138 out_val = LLVMBuildBitCast(ctx->builder, out_val, ctx->ac.i32, "");
4139
4140 ac_build_buffer_store_dword(&ctx->ac, ctx->gsvs_ring,
4141 out_val, 1,
4142 voffset, ctx->gs2vs_offset, 0,
4143 1, 1, true, true);
4144 }
4145 idx += slot_inc;
4146 }
4147
4148 gs_next_vertex = LLVMBuildAdd(ctx->builder, gs_next_vertex,
4149 ctx->ac.i32_1, "");
4150 LLVMBuildStore(ctx->builder, gs_next_vertex, ctx->gs_next_vertex);
4151
4152 ac_build_sendmsg(&ctx->ac, AC_SENDMSG_GS_OP_EMIT | AC_SENDMSG_GS | (0 << 8), ctx->gs_wave_id);
4153 }
4154
4155 static void
4156 visit_end_primitive(struct nir_to_llvm_context *ctx,
4157 const nir_intrinsic_instr *instr)
4158 {
4159 ac_build_sendmsg(&ctx->ac, AC_SENDMSG_GS_OP_CUT | AC_SENDMSG_GS | (0 << 8), ctx->gs_wave_id);
4160 }
4161
4162 static LLVMValueRef
4163 load_tess_coord(struct ac_shader_abi *abi, LLVMTypeRef type,
4164 unsigned num_components)
4165 {
4166 struct nir_to_llvm_context *ctx = nir_to_llvm_context_from_abi(abi);
4167
4168 LLVMValueRef coord[4] = {
4169 ctx->tes_u,
4170 ctx->tes_v,
4171 ctx->ac.f32_0,
4172 ctx->ac.f32_0,
4173 };
4174
4175 if (ctx->tes_primitive_mode == GL_TRIANGLES)
4176 coord[2] = LLVMBuildFSub(ctx->builder, ctx->ac.f32_1,
4177 LLVMBuildFAdd(ctx->builder, coord[0], coord[1], ""), "");
4178
4179 LLVMValueRef result = ac_build_gather_values(&ctx->ac, coord, num_components);
4180 return LLVMBuildBitCast(ctx->builder, result, type, "");
4181 }
4182
4183 static LLVMValueRef
4184 load_patch_vertices_in(struct ac_shader_abi *abi)
4185 {
4186 struct nir_to_llvm_context *ctx = nir_to_llvm_context_from_abi(abi);
4187 return LLVMConstInt(ctx->ac.i32, ctx->options->key.tcs.input_vertices, false);
4188 }
4189
4190 static void visit_intrinsic(struct ac_nir_context *ctx,
4191 nir_intrinsic_instr *instr)
4192 {
4193 LLVMValueRef result = NULL;
4194
4195 switch (instr->intrinsic) {
4196 case nir_intrinsic_load_work_group_id: {
4197 LLVMValueRef values[3];
4198
4199 for (int i = 0; i < 3; i++) {
4200 values[i] = ctx->nctx->workgroup_ids[i] ?
4201 ctx->nctx->workgroup_ids[i] : ctx->ac.i32_0;
4202 }
4203
4204 result = ac_build_gather_values(&ctx->ac, values, 3);
4205 break;
4206 }
4207 case nir_intrinsic_load_base_vertex: {
4208 result = ctx->abi->base_vertex;
4209 break;
4210 }
4211 case nir_intrinsic_load_vertex_id_zero_base: {
4212 result = ctx->abi->vertex_id;
4213 break;
4214 }
4215 case nir_intrinsic_load_local_invocation_id: {
4216 result = ctx->nctx->local_invocation_ids;
4217 break;
4218 }
4219 case nir_intrinsic_load_base_instance:
4220 result = ctx->abi->start_instance;
4221 break;
4222 case nir_intrinsic_load_draw_id:
4223 result = ctx->abi->draw_id;
4224 break;
4225 case nir_intrinsic_load_view_index:
4226 result = ctx->nctx->view_index ? ctx->nctx->view_index : ctx->ac.i32_0;
4227 break;
4228 case nir_intrinsic_load_invocation_id:
4229 if (ctx->stage == MESA_SHADER_TESS_CTRL)
4230 result = unpack_param(&ctx->ac, ctx->abi->tcs_rel_ids, 8, 5);
4231 else
4232 result = ctx->abi->gs_invocation_id;
4233 break;
4234 case nir_intrinsic_load_primitive_id:
4235 if (ctx->stage == MESA_SHADER_GEOMETRY) {
4236 result = ctx->abi->gs_prim_id;
4237 } else if (ctx->stage == MESA_SHADER_TESS_CTRL) {
4238 result = ctx->abi->tcs_patch_id;
4239 } else if (ctx->stage == MESA_SHADER_TESS_EVAL) {
4240 result = ctx->abi->tes_patch_id;
4241 } else
4242 fprintf(stderr, "Unknown primitive id intrinsic: %d", ctx->stage);
4243 break;
4244 case nir_intrinsic_load_sample_id:
4245 result = unpack_param(&ctx->ac, ctx->abi->ancillary, 8, 4);
4246 break;
4247 case nir_intrinsic_load_sample_pos:
4248 result = load_sample_pos(ctx);
4249 break;
4250 case nir_intrinsic_load_sample_mask_in:
4251 result = ctx->abi->sample_coverage;
4252 break;
4253 case nir_intrinsic_load_frag_coord: {
4254 LLVMValueRef values[4] = {
4255 ctx->abi->frag_pos[0],
4256 ctx->abi->frag_pos[1],
4257 ctx->abi->frag_pos[2],
4258 ac_build_fdiv(&ctx->ac, ctx->ac.f32_1, ctx->abi->frag_pos[3])
4259 };
4260 result = ac_build_gather_values(&ctx->ac, values, 4);
4261 break;
4262 }
4263 case nir_intrinsic_load_front_face:
4264 result = ctx->abi->front_face;
4265 break;
4266 case nir_intrinsic_load_instance_id:
4267 result = ctx->abi->instance_id;
4268 break;
4269 case nir_intrinsic_load_num_work_groups:
4270 result = ctx->nctx->num_work_groups;
4271 break;
4272 case nir_intrinsic_load_local_invocation_index:
4273 result = visit_load_local_invocation_index(ctx->nctx);
4274 break;
4275 case nir_intrinsic_load_push_constant:
4276 result = visit_load_push_constant(ctx->nctx, instr);
4277 break;
4278 case nir_intrinsic_vulkan_resource_index:
4279 result = visit_vulkan_resource_index(ctx->nctx, instr);
4280 break;
4281 case nir_intrinsic_vulkan_resource_reindex:
4282 result = visit_vulkan_resource_reindex(ctx->nctx, instr);
4283 break;
4284 case nir_intrinsic_store_ssbo:
4285 visit_store_ssbo(ctx, instr);
4286 break;
4287 case nir_intrinsic_load_ssbo:
4288 result = visit_load_buffer(ctx, instr);
4289 break;
4290 case nir_intrinsic_ssbo_atomic_add:
4291 case nir_intrinsic_ssbo_atomic_imin:
4292 case nir_intrinsic_ssbo_atomic_umin:
4293 case nir_intrinsic_ssbo_atomic_imax:
4294 case nir_intrinsic_ssbo_atomic_umax:
4295 case nir_intrinsic_ssbo_atomic_and:
4296 case nir_intrinsic_ssbo_atomic_or:
4297 case nir_intrinsic_ssbo_atomic_xor:
4298 case nir_intrinsic_ssbo_atomic_exchange:
4299 case nir_intrinsic_ssbo_atomic_comp_swap:
4300 result = visit_atomic_ssbo(ctx, instr);
4301 break;
4302 case nir_intrinsic_load_ubo:
4303 result = visit_load_ubo_buffer(ctx, instr);
4304 break;
4305 case nir_intrinsic_get_buffer_size:
4306 result = visit_get_buffer_size(ctx, instr);
4307 break;
4308 case nir_intrinsic_load_var:
4309 result = visit_load_var(ctx, instr);
4310 break;
4311 case nir_intrinsic_store_var:
4312 visit_store_var(ctx, instr);
4313 break;
4314 case nir_intrinsic_image_load:
4315 result = visit_image_load(ctx, instr);
4316 break;
4317 case nir_intrinsic_image_store:
4318 visit_image_store(ctx, instr);
4319 break;
4320 case nir_intrinsic_image_atomic_add:
4321 case nir_intrinsic_image_atomic_min:
4322 case nir_intrinsic_image_atomic_max:
4323 case nir_intrinsic_image_atomic_and:
4324 case nir_intrinsic_image_atomic_or:
4325 case nir_intrinsic_image_atomic_xor:
4326 case nir_intrinsic_image_atomic_exchange:
4327 case nir_intrinsic_image_atomic_comp_swap:
4328 result = visit_image_atomic(ctx, instr);
4329 break;
4330 case nir_intrinsic_image_size:
4331 result = visit_image_size(ctx, instr);
4332 break;
4333 case nir_intrinsic_discard:
4334 ac_build_intrinsic(&ctx->ac, "llvm.AMDGPU.kilp",
4335 LLVMVoidTypeInContext(ctx->ac.context),
4336 NULL, 0, AC_FUNC_ATTR_LEGACY);
4337 break;
4338 case nir_intrinsic_discard_if:
4339 emit_discard_if(ctx, instr);
4340 break;
4341 case nir_intrinsic_memory_barrier:
4342 case nir_intrinsic_group_memory_barrier:
4343 case nir_intrinsic_memory_barrier_atomic_counter:
4344 case nir_intrinsic_memory_barrier_buffer:
4345 case nir_intrinsic_memory_barrier_image:
4346 case nir_intrinsic_memory_barrier_shared:
4347 emit_membar(ctx->nctx, instr);
4348 break;
4349 case nir_intrinsic_barrier:
4350 emit_barrier(&ctx->ac, ctx->stage);
4351 break;
4352 case nir_intrinsic_var_atomic_add:
4353 case nir_intrinsic_var_atomic_imin:
4354 case nir_intrinsic_var_atomic_umin:
4355 case nir_intrinsic_var_atomic_imax:
4356 case nir_intrinsic_var_atomic_umax:
4357 case nir_intrinsic_var_atomic_and:
4358 case nir_intrinsic_var_atomic_or:
4359 case nir_intrinsic_var_atomic_xor:
4360 case nir_intrinsic_var_atomic_exchange:
4361 case nir_intrinsic_var_atomic_comp_swap:
4362 result = visit_var_atomic(ctx->nctx, instr);
4363 break;
4364 case nir_intrinsic_interp_var_at_centroid:
4365 case nir_intrinsic_interp_var_at_sample:
4366 case nir_intrinsic_interp_var_at_offset:
4367 result = visit_interp(ctx->nctx, instr);
4368 break;
4369 case nir_intrinsic_emit_vertex:
4370 assert(instr->const_index[0] == 0);
4371 ctx->abi->emit_vertex(ctx->abi, 0, ctx->outputs);
4372 break;
4373 case nir_intrinsic_end_primitive:
4374 visit_end_primitive(ctx->nctx, instr);
4375 break;
4376 case nir_intrinsic_load_tess_coord: {
4377 LLVMTypeRef type = ctx->nctx ?
4378 get_def_type(ctx->nctx->nir, &instr->dest.ssa) :
4379 NULL;
4380 result = ctx->abi->load_tess_coord(ctx->abi, type, instr->num_components);
4381 break;
4382 }
4383 case nir_intrinsic_load_tess_level_outer:
4384 result = ctx->abi->load_tess_level(ctx->abi, VARYING_SLOT_TESS_LEVEL_OUTER);
4385 break;
4386 case nir_intrinsic_load_tess_level_inner:
4387 result = ctx->abi->load_tess_level(ctx->abi, VARYING_SLOT_TESS_LEVEL_INNER);
4388 break;
4389 case nir_intrinsic_load_patch_vertices_in:
4390 result = ctx->abi->load_patch_vertices_in(ctx->abi);
4391 break;
4392 default:
4393 fprintf(stderr, "Unknown intrinsic: ");
4394 nir_print_instr(&instr->instr, stderr);
4395 fprintf(stderr, "\n");
4396 break;
4397 }
4398 if (result) {
4399 _mesa_hash_table_insert(ctx->defs, &instr->dest.ssa, result);
4400 }
4401 }
4402
4403 static LLVMValueRef radv_load_ssbo(struct ac_shader_abi *abi,
4404 LLVMValueRef buffer_ptr, bool write)
4405 {
4406 struct nir_to_llvm_context *ctx = nir_to_llvm_context_from_abi(abi);
4407
4408 if (write && ctx->stage == MESA_SHADER_FRAGMENT)
4409 ctx->shader_info->fs.writes_memory = true;
4410
4411 return LLVMBuildLoad(ctx->builder, buffer_ptr, "");
4412 }
4413
4414 static LLVMValueRef radv_load_ubo(struct ac_shader_abi *abi, LLVMValueRef buffer_ptr)
4415 {
4416 struct nir_to_llvm_context *ctx = nir_to_llvm_context_from_abi(abi);
4417
4418 return LLVMBuildLoad(ctx->builder, buffer_ptr, "");
4419 }
4420
4421 static LLVMValueRef radv_get_sampler_desc(struct ac_shader_abi *abi,
4422 unsigned descriptor_set,
4423 unsigned base_index,
4424 unsigned constant_index,
4425 LLVMValueRef index,
4426 enum ac_descriptor_type desc_type,
4427 bool image, bool write)
4428 {
4429 struct nir_to_llvm_context *ctx = nir_to_llvm_context_from_abi(abi);
4430 LLVMValueRef list = ctx->descriptor_sets[descriptor_set];
4431 struct radv_descriptor_set_layout *layout = ctx->options->layout->set[descriptor_set].layout;
4432 struct radv_descriptor_set_binding_layout *binding = layout->binding + base_index;
4433 unsigned offset = binding->offset;
4434 unsigned stride = binding->size;
4435 unsigned type_size;
4436 LLVMBuilderRef builder = ctx->builder;
4437 LLVMTypeRef type;
4438
4439 assert(base_index < layout->binding_count);
4440
4441 if (write && ctx->stage == MESA_SHADER_FRAGMENT)
4442 ctx->shader_info->fs.writes_memory = true;
4443
4444 switch (desc_type) {
4445 case AC_DESC_IMAGE:
4446 type = ctx->ac.v8i32;
4447 type_size = 32;
4448 break;
4449 case AC_DESC_FMASK:
4450 type = ctx->ac.v8i32;
4451 offset += 32;
4452 type_size = 32;
4453 break;
4454 case AC_DESC_SAMPLER:
4455 type = ctx->ac.v4i32;
4456 if (binding->type == VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER)
4457 offset += 64;
4458
4459 type_size = 16;
4460 break;
4461 case AC_DESC_BUFFER:
4462 type = ctx->ac.v4i32;
4463 type_size = 16;
4464 break;
4465 default:
4466 unreachable("invalid desc_type\n");
4467 }
4468
4469 offset += constant_index * stride;
4470
4471 if (desc_type == AC_DESC_SAMPLER && binding->immutable_samplers_offset &&
4472 (!index || binding->immutable_samplers_equal)) {
4473 if (binding->immutable_samplers_equal)
4474 constant_index = 0;
4475
4476 const uint32_t *samplers = radv_immutable_samplers(layout, binding);
4477
4478 LLVMValueRef constants[] = {
4479 LLVMConstInt(ctx->ac.i32, samplers[constant_index * 4 + 0], 0),
4480 LLVMConstInt(ctx->ac.i32, samplers[constant_index * 4 + 1], 0),
4481 LLVMConstInt(ctx->ac.i32, samplers[constant_index * 4 + 2], 0),
4482 LLVMConstInt(ctx->ac.i32, samplers[constant_index * 4 + 3], 0),
4483 };
4484 return ac_build_gather_values(&ctx->ac, constants, 4);
4485 }
4486
4487 assert(stride % type_size == 0);
4488
4489 if (!index)
4490 index = ctx->ac.i32_0;
4491
4492 index = LLVMBuildMul(builder, index, LLVMConstInt(ctx->ac.i32, stride / type_size, 0), "");
4493
4494 list = ac_build_gep0(&ctx->ac, list, LLVMConstInt(ctx->ac.i32, offset, 0));
4495 list = LLVMBuildPointerCast(builder, list, const_array(type, 0), "");
4496
4497 return ac_build_load_to_sgpr(&ctx->ac, list, index);
4498 }
4499
4500 static LLVMValueRef get_sampler_desc(struct ac_nir_context *ctx,
4501 const nir_deref_var *deref,
4502 enum ac_descriptor_type desc_type,
4503 const nir_tex_instr *tex_instr,
4504 bool image, bool write)
4505 {
4506 LLVMValueRef index = NULL;
4507 unsigned constant_index = 0;
4508 unsigned descriptor_set;
4509 unsigned base_index;
4510
4511 if (!deref) {
4512 assert(tex_instr && !image);
4513 descriptor_set = 0;
4514 base_index = tex_instr->sampler_index;
4515 } else {
4516 const nir_deref *tail = &deref->deref;
4517 while (tail->child) {
4518 const nir_deref_array *child = nir_deref_as_array(tail->child);
4519 unsigned array_size = glsl_get_aoa_size(tail->child->type);
4520
4521 if (!array_size)
4522 array_size = 1;
4523
4524 assert(child->deref_array_type != nir_deref_array_type_wildcard);
4525
4526 if (child->deref_array_type == nir_deref_array_type_indirect) {
4527 LLVMValueRef indirect = get_src(ctx, child->indirect);
4528
4529 indirect = LLVMBuildMul(ctx->ac.builder, indirect,
4530 LLVMConstInt(ctx->ac.i32, array_size, false), "");
4531
4532 if (!index)
4533 index = indirect;
4534 else
4535 index = LLVMBuildAdd(ctx->ac.builder, index, indirect, "");
4536 }
4537
4538 constant_index += child->base_offset * array_size;
4539
4540 tail = &child->deref;
4541 }
4542 descriptor_set = deref->var->data.descriptor_set;
4543 base_index = deref->var->data.binding;
4544 }
4545
4546 return ctx->abi->load_sampler_desc(ctx->abi,
4547 descriptor_set,
4548 base_index,
4549 constant_index, index,
4550 desc_type, image, write);
4551 }
4552
4553 static void set_tex_fetch_args(struct ac_llvm_context *ctx,
4554 struct ac_image_args *args,
4555 const nir_tex_instr *instr,
4556 nir_texop op,
4557 LLVMValueRef res_ptr, LLVMValueRef samp_ptr,
4558 LLVMValueRef *param, unsigned count,
4559 unsigned dmask)
4560 {
4561 unsigned is_rect = 0;
4562 bool da = instr->is_array || instr->sampler_dim == GLSL_SAMPLER_DIM_CUBE;
4563
4564 if (op == nir_texop_lod)
4565 da = false;
4566 /* Pad to power of two vector */
4567 while (count < util_next_power_of_two(count))
4568 param[count++] = LLVMGetUndef(ctx->i32);
4569
4570 if (count > 1)
4571 args->addr = ac_build_gather_values(ctx, param, count);
4572 else
4573 args->addr = param[0];
4574
4575 args->resource = res_ptr;
4576 args->sampler = samp_ptr;
4577
4578 if (instr->sampler_dim == GLSL_SAMPLER_DIM_BUF && op == nir_texop_txf) {
4579 args->addr = param[0];
4580 return;
4581 }
4582
4583 args->dmask = dmask;
4584 args->unorm = is_rect;
4585 args->da = da;
4586 }
4587
4588 /* Disable anisotropic filtering if BASE_LEVEL == LAST_LEVEL.
4589 *
4590 * SI-CI:
4591 * If BASE_LEVEL == LAST_LEVEL, the shader must disable anisotropic
4592 * filtering manually. The driver sets img7 to a mask clearing
4593 * MAX_ANISO_RATIO if BASE_LEVEL == LAST_LEVEL. The shader must do:
4594 * s_and_b32 samp0, samp0, img7
4595 *
4596 * VI:
4597 * The ANISO_OVERRIDE sampler field enables this fix in TA.
4598 */
4599 static LLVMValueRef sici_fix_sampler_aniso(struct ac_nir_context *ctx,
4600 LLVMValueRef res, LLVMValueRef samp)
4601 {
4602 LLVMBuilderRef builder = ctx->ac.builder;
4603 LLVMValueRef img7, samp0;
4604
4605 if (ctx->ac.chip_class >= VI)
4606 return samp;
4607
4608 img7 = LLVMBuildExtractElement(builder, res,
4609 LLVMConstInt(ctx->ac.i32, 7, 0), "");
4610 samp0 = LLVMBuildExtractElement(builder, samp,
4611 LLVMConstInt(ctx->ac.i32, 0, 0), "");
4612 samp0 = LLVMBuildAnd(builder, samp0, img7, "");
4613 return LLVMBuildInsertElement(builder, samp, samp0,
4614 LLVMConstInt(ctx->ac.i32, 0, 0), "");
4615 }
4616
4617 static void tex_fetch_ptrs(struct ac_nir_context *ctx,
4618 nir_tex_instr *instr,
4619 LLVMValueRef *res_ptr, LLVMValueRef *samp_ptr,
4620 LLVMValueRef *fmask_ptr)
4621 {
4622 if (instr->sampler_dim == GLSL_SAMPLER_DIM_BUF)
4623 *res_ptr = get_sampler_desc(ctx, instr->texture, AC_DESC_BUFFER, instr, false, false);
4624 else
4625 *res_ptr = get_sampler_desc(ctx, instr->texture, AC_DESC_IMAGE, instr, false, false);
4626 if (samp_ptr) {
4627 if (instr->sampler)
4628 *samp_ptr = get_sampler_desc(ctx, instr->sampler, AC_DESC_SAMPLER, instr, false, false);
4629 else
4630 *samp_ptr = get_sampler_desc(ctx, instr->texture, AC_DESC_SAMPLER, instr, false, false);
4631 if (instr->sampler_dim < GLSL_SAMPLER_DIM_RECT)
4632 *samp_ptr = sici_fix_sampler_aniso(ctx, *res_ptr, *samp_ptr);
4633 }
4634 if (fmask_ptr && !instr->sampler && (instr->op == nir_texop_txf_ms ||
4635 instr->op == nir_texop_samples_identical))
4636 *fmask_ptr = get_sampler_desc(ctx, instr->texture, AC_DESC_FMASK, instr, false, false);
4637 }
4638
4639 static LLVMValueRef apply_round_slice(struct ac_llvm_context *ctx,
4640 LLVMValueRef coord)
4641 {
4642 coord = ac_to_float(ctx, coord);
4643 coord = ac_build_intrinsic(ctx, "llvm.rint.f32", ctx->f32, &coord, 1, 0);
4644 coord = ac_to_integer(ctx, coord);
4645 return coord;
4646 }
4647
4648 static void visit_tex(struct ac_nir_context *ctx, nir_tex_instr *instr)
4649 {
4650 LLVMValueRef result = NULL;
4651 struct ac_image_args args = { 0 };
4652 unsigned dmask = 0xf;
4653 LLVMValueRef address[16];
4654 LLVMValueRef coords[5];
4655 LLVMValueRef coord = NULL, lod = NULL, comparator = NULL;
4656 LLVMValueRef bias = NULL, offsets = NULL;
4657 LLVMValueRef res_ptr, samp_ptr, fmask_ptr = NULL, sample_index = NULL;
4658 LLVMValueRef ddx = NULL, ddy = NULL;
4659 LLVMValueRef derivs[6];
4660 unsigned chan, count = 0;
4661 unsigned const_src = 0, num_deriv_comp = 0;
4662 bool lod_is_zero = false;
4663
4664 tex_fetch_ptrs(ctx, instr, &res_ptr, &samp_ptr, &fmask_ptr);
4665
4666 for (unsigned i = 0; i < instr->num_srcs; i++) {
4667 switch (instr->src[i].src_type) {
4668 case nir_tex_src_coord:
4669 coord = get_src(ctx, instr->src[i].src);
4670 break;
4671 case nir_tex_src_projector:
4672 break;
4673 case nir_tex_src_comparator:
4674 comparator = get_src(ctx, instr->src[i].src);
4675 break;
4676 case nir_tex_src_offset:
4677 offsets = get_src(ctx, instr->src[i].src);
4678 const_src = i;
4679 break;
4680 case nir_tex_src_bias:
4681 bias = get_src(ctx, instr->src[i].src);
4682 break;
4683 case nir_tex_src_lod: {
4684 nir_const_value *val = nir_src_as_const_value(instr->src[i].src);
4685
4686 if (val && val->i32[0] == 0)
4687 lod_is_zero = true;
4688 lod = get_src(ctx, instr->src[i].src);
4689 break;
4690 }
4691 case nir_tex_src_ms_index:
4692 sample_index = get_src(ctx, instr->src[i].src);
4693 break;
4694 case nir_tex_src_ms_mcs:
4695 break;
4696 case nir_tex_src_ddx:
4697 ddx = get_src(ctx, instr->src[i].src);
4698 num_deriv_comp = instr->src[i].src.ssa->num_components;
4699 break;
4700 case nir_tex_src_ddy:
4701 ddy = get_src(ctx, instr->src[i].src);
4702 break;
4703 case nir_tex_src_texture_offset:
4704 case nir_tex_src_sampler_offset:
4705 case nir_tex_src_plane:
4706 default:
4707 break;
4708 }
4709 }
4710
4711 if (instr->op == nir_texop_txs && instr->sampler_dim == GLSL_SAMPLER_DIM_BUF) {
4712 result = get_buffer_size(ctx, res_ptr, true);
4713 goto write_result;
4714 }
4715
4716 if (instr->op == nir_texop_texture_samples) {
4717 LLVMValueRef res, samples, is_msaa;
4718 res = LLVMBuildBitCast(ctx->ac.builder, res_ptr, ctx->ac.v8i32, "");
4719 samples = LLVMBuildExtractElement(ctx->ac.builder, res,
4720 LLVMConstInt(ctx->ac.i32, 3, false), "");
4721 is_msaa = LLVMBuildLShr(ctx->ac.builder, samples,
4722 LLVMConstInt(ctx->ac.i32, 28, false), "");
4723 is_msaa = LLVMBuildAnd(ctx->ac.builder, is_msaa,
4724 LLVMConstInt(ctx->ac.i32, 0xe, false), "");
4725 is_msaa = LLVMBuildICmp(ctx->ac.builder, LLVMIntEQ, is_msaa,
4726 LLVMConstInt(ctx->ac.i32, 0xe, false), "");
4727
4728 samples = LLVMBuildLShr(ctx->ac.builder, samples,
4729 LLVMConstInt(ctx->ac.i32, 16, false), "");
4730 samples = LLVMBuildAnd(ctx->ac.builder, samples,
4731 LLVMConstInt(ctx->ac.i32, 0xf, false), "");
4732 samples = LLVMBuildShl(ctx->ac.builder, ctx->ac.i32_1,
4733 samples, "");
4734 samples = LLVMBuildSelect(ctx->ac.builder, is_msaa, samples,
4735 ctx->ac.i32_1, "");
4736 result = samples;
4737 goto write_result;
4738 }
4739
4740 if (coord)
4741 for (chan = 0; chan < instr->coord_components; chan++)
4742 coords[chan] = ac_llvm_extract_elem(&ctx->ac, coord, chan);
4743
4744 if (offsets && instr->op != nir_texop_txf) {
4745 LLVMValueRef offset[3], pack;
4746 for (chan = 0; chan < 3; ++chan)
4747 offset[chan] = ctx->ac.i32_0;
4748
4749 args.offset = true;
4750 for (chan = 0; chan < ac_get_llvm_num_components(offsets); chan++) {
4751 offset[chan] = ac_llvm_extract_elem(&ctx->ac, offsets, chan);
4752 offset[chan] = LLVMBuildAnd(ctx->ac.builder, offset[chan],
4753 LLVMConstInt(ctx->ac.i32, 0x3f, false), "");
4754 if (chan)
4755 offset[chan] = LLVMBuildShl(ctx->ac.builder, offset[chan],
4756 LLVMConstInt(ctx->ac.i32, chan * 8, false), "");
4757 }
4758 pack = LLVMBuildOr(ctx->ac.builder, offset[0], offset[1], "");
4759 pack = LLVMBuildOr(ctx->ac.builder, pack, offset[2], "");
4760 address[count++] = pack;
4761
4762 }
4763 /* pack LOD bias value */
4764 if (instr->op == nir_texop_txb && bias) {
4765 address[count++] = bias;
4766 }
4767
4768 /* Pack depth comparison value */
4769 if (instr->is_shadow && comparator) {
4770 LLVMValueRef z = ac_to_float(&ctx->ac,
4771 ac_llvm_extract_elem(&ctx->ac, comparator, 0));
4772
4773 /* TC-compatible HTILE on radeonsi promotes Z16 and Z24 to Z32_FLOAT,
4774 * so the depth comparison value isn't clamped for Z16 and
4775 * Z24 anymore. Do it manually here.
4776 *
4777 * It's unnecessary if the original texture format was
4778 * Z32_FLOAT, but we don't know that here.
4779 */
4780 if (ctx->ac.chip_class == VI && ctx->abi->clamp_shadow_reference)
4781 z = ac_build_clamp(&ctx->ac, z);
4782
4783 address[count++] = z;
4784 }
4785
4786 /* pack derivatives */
4787 if (ddx || ddy) {
4788 int num_src_deriv_channels, num_dest_deriv_channels;
4789 switch (instr->sampler_dim) {
4790 case GLSL_SAMPLER_DIM_3D:
4791 case GLSL_SAMPLER_DIM_CUBE:
4792 num_deriv_comp = 3;
4793 num_src_deriv_channels = 3;
4794 num_dest_deriv_channels = 3;
4795 break;
4796 case GLSL_SAMPLER_DIM_2D:
4797 default:
4798 num_src_deriv_channels = 2;
4799 num_dest_deriv_channels = 2;
4800 num_deriv_comp = 2;
4801 break;
4802 case GLSL_SAMPLER_DIM_1D:
4803 num_src_deriv_channels = 1;
4804 if (ctx->ac.chip_class >= GFX9) {
4805 num_dest_deriv_channels = 2;
4806 num_deriv_comp = 2;
4807 } else {
4808 num_dest_deriv_channels = 1;
4809 num_deriv_comp = 1;
4810 }
4811 break;
4812 }
4813
4814 for (unsigned i = 0; i < num_src_deriv_channels; i++) {
4815 derivs[i] = ac_to_float(&ctx->ac, ac_llvm_extract_elem(&ctx->ac, ddx, i));
4816 derivs[num_dest_deriv_channels + i] = ac_to_float(&ctx->ac, ac_llvm_extract_elem(&ctx->ac, ddy, i));
4817 }
4818 for (unsigned i = num_src_deriv_channels; i < num_dest_deriv_channels; i++) {
4819 derivs[i] = ctx->ac.f32_0;
4820 derivs[num_dest_deriv_channels + i] = ctx->ac.f32_0;
4821 }
4822 }
4823
4824 if (instr->sampler_dim == GLSL_SAMPLER_DIM_CUBE && coord) {
4825 for (chan = 0; chan < instr->coord_components; chan++)
4826 coords[chan] = ac_to_float(&ctx->ac, coords[chan]);
4827 if (instr->coord_components == 3)
4828 coords[3] = LLVMGetUndef(ctx->ac.f32);
4829 ac_prepare_cube_coords(&ctx->ac,
4830 instr->op == nir_texop_txd, instr->is_array,
4831 instr->op == nir_texop_lod, coords, derivs);
4832 if (num_deriv_comp)
4833 num_deriv_comp--;
4834 }
4835
4836 if (ddx || ddy) {
4837 for (unsigned i = 0; i < num_deriv_comp * 2; i++)
4838 address[count++] = derivs[i];
4839 }
4840
4841 /* Pack texture coordinates */
4842 if (coord) {
4843 address[count++] = coords[0];
4844 if (instr->coord_components > 1) {
4845 if (instr->sampler_dim == GLSL_SAMPLER_DIM_1D && instr->is_array && instr->op != nir_texop_txf) {
4846 coords[1] = apply_round_slice(&ctx->ac, coords[1]);
4847 }
4848 address[count++] = coords[1];
4849 }
4850 if (instr->coord_components > 2) {
4851 /* This seems like a bit of a hack - but it passes Vulkan CTS with it */
4852 if (instr->sampler_dim != GLSL_SAMPLER_DIM_3D &&
4853 instr->sampler_dim != GLSL_SAMPLER_DIM_CUBE &&
4854 instr->op != nir_texop_txf) {
4855 coords[2] = apply_round_slice(&ctx->ac, coords[2]);
4856 }
4857 address[count++] = coords[2];
4858 }
4859
4860 if (ctx->ac.chip_class >= GFX9) {
4861 LLVMValueRef filler;
4862 if (instr->op == nir_texop_txf)
4863 filler = ctx->ac.i32_0;
4864 else
4865 filler = LLVMConstReal(ctx->ac.f32, 0.5);
4866
4867 if (instr->sampler_dim == GLSL_SAMPLER_DIM_1D) {
4868 /* No nir_texop_lod, because it does not take a slice
4869 * even with array textures. */
4870 if (instr->is_array && instr->op != nir_texop_lod ) {
4871 address[count] = address[count - 1];
4872 address[count - 1] = filler;
4873 count++;
4874 } else
4875 address[count++] = filler;
4876 }
4877 }
4878 }
4879
4880 /* Pack LOD */
4881 if (lod && ((instr->op == nir_texop_txl && !lod_is_zero) ||
4882 instr->op == nir_texop_txf)) {
4883 address[count++] = lod;
4884 } else if (instr->op == nir_texop_txf_ms && sample_index) {
4885 address[count++] = sample_index;
4886 } else if(instr->op == nir_texop_txs) {
4887 count = 0;
4888 if (lod)
4889 address[count++] = lod;
4890 else
4891 address[count++] = ctx->ac.i32_0;
4892 }
4893
4894 for (chan = 0; chan < count; chan++) {
4895 address[chan] = LLVMBuildBitCast(ctx->ac.builder,
4896 address[chan], ctx->ac.i32, "");
4897 }
4898
4899 if (instr->op == nir_texop_samples_identical) {
4900 LLVMValueRef txf_address[4];
4901 struct ac_image_args txf_args = { 0 };
4902 unsigned txf_count = count;
4903 memcpy(txf_address, address, sizeof(txf_address));
4904
4905 if (!instr->is_array)
4906 txf_address[2] = ctx->ac.i32_0;
4907 txf_address[3] = ctx->ac.i32_0;
4908
4909 set_tex_fetch_args(&ctx->ac, &txf_args, instr, nir_texop_txf,
4910 fmask_ptr, NULL,
4911 txf_address, txf_count, 0xf);
4912
4913 result = build_tex_intrinsic(ctx, instr, false, &txf_args);
4914
4915 result = LLVMBuildExtractElement(ctx->ac.builder, result, ctx->ac.i32_0, "");
4916 result = emit_int_cmp(&ctx->ac, LLVMIntEQ, result, ctx->ac.i32_0);
4917 goto write_result;
4918 }
4919
4920 if (instr->sampler_dim == GLSL_SAMPLER_DIM_MS &&
4921 instr->op != nir_texop_txs) {
4922 unsigned sample_chan = instr->is_array ? 3 : 2;
4923 address[sample_chan] = adjust_sample_index_using_fmask(&ctx->ac,
4924 address[0],
4925 address[1],
4926 instr->is_array ? address[2] : NULL,
4927 address[sample_chan],
4928 fmask_ptr);
4929 }
4930
4931 if (offsets && instr->op == nir_texop_txf) {
4932 nir_const_value *const_offset =
4933 nir_src_as_const_value(instr->src[const_src].src);
4934 int num_offsets = instr->src[const_src].src.ssa->num_components;
4935 assert(const_offset);
4936 num_offsets = MIN2(num_offsets, instr->coord_components);
4937 if (num_offsets > 2)
4938 address[2] = LLVMBuildAdd(ctx->ac.builder,
4939 address[2], LLVMConstInt(ctx->ac.i32, const_offset->i32[2], false), "");
4940 if (num_offsets > 1)
4941 address[1] = LLVMBuildAdd(ctx->ac.builder,
4942 address[1], LLVMConstInt(ctx->ac.i32, const_offset->i32[1], false), "");
4943 address[0] = LLVMBuildAdd(ctx->ac.builder,
4944 address[0], LLVMConstInt(ctx->ac.i32, const_offset->i32[0], false), "");
4945
4946 }
4947
4948 /* TODO TG4 support */
4949 if (instr->op == nir_texop_tg4) {
4950 if (instr->is_shadow)
4951 dmask = 1;
4952 else
4953 dmask = 1 << instr->component;
4954 }
4955 set_tex_fetch_args(&ctx->ac, &args, instr, instr->op,
4956 res_ptr, samp_ptr, address, count, dmask);
4957
4958 result = build_tex_intrinsic(ctx, instr, lod_is_zero, &args);
4959
4960 if (instr->op == nir_texop_query_levels)
4961 result = LLVMBuildExtractElement(ctx->ac.builder, result, LLVMConstInt(ctx->ac.i32, 3, false), "");
4962 else if (instr->is_shadow && instr->is_new_style_shadow &&
4963 instr->op != nir_texop_txs && instr->op != nir_texop_lod &&
4964 instr->op != nir_texop_tg4)
4965 result = LLVMBuildExtractElement(ctx->ac.builder, result, ctx->ac.i32_0, "");
4966 else if (instr->op == nir_texop_txs &&
4967 instr->sampler_dim == GLSL_SAMPLER_DIM_CUBE &&
4968 instr->is_array) {
4969 LLVMValueRef two = LLVMConstInt(ctx->ac.i32, 2, false);
4970 LLVMValueRef six = LLVMConstInt(ctx->ac.i32, 6, false);
4971 LLVMValueRef z = LLVMBuildExtractElement(ctx->ac.builder, result, two, "");
4972 z = LLVMBuildSDiv(ctx->ac.builder, z, six, "");
4973 result = LLVMBuildInsertElement(ctx->ac.builder, result, z, two, "");
4974 } else if (ctx->ac.chip_class >= GFX9 &&
4975 instr->op == nir_texop_txs &&
4976 instr->sampler_dim == GLSL_SAMPLER_DIM_1D &&
4977 instr->is_array) {
4978 LLVMValueRef two = LLVMConstInt(ctx->ac.i32, 2, false);
4979 LLVMValueRef layers = LLVMBuildExtractElement(ctx->ac.builder, result, two, "");
4980 result = LLVMBuildInsertElement(ctx->ac.builder, result, layers,
4981 ctx->ac.i32_1, "");
4982 } else if (instr->dest.ssa.num_components != 4)
4983 result = trim_vector(&ctx->ac, result, instr->dest.ssa.num_components);
4984
4985 write_result:
4986 if (result) {
4987 assert(instr->dest.is_ssa);
4988 result = ac_to_integer(&ctx->ac, result);
4989 _mesa_hash_table_insert(ctx->defs, &instr->dest.ssa, result);
4990 }
4991 }
4992
4993
4994 static void visit_phi(struct ac_nir_context *ctx, nir_phi_instr *instr)
4995 {
4996 LLVMTypeRef type = get_def_type(ctx, &instr->dest.ssa);
4997 LLVMValueRef result = LLVMBuildPhi(ctx->ac.builder, type, "");
4998
4999 _mesa_hash_table_insert(ctx->defs, &instr->dest.ssa, result);
5000 _mesa_hash_table_insert(ctx->phis, instr, result);
5001 }
5002
5003 static void visit_post_phi(struct ac_nir_context *ctx,
5004 nir_phi_instr *instr,
5005 LLVMValueRef llvm_phi)
5006 {
5007 nir_foreach_phi_src(src, instr) {
5008 LLVMBasicBlockRef block = get_block(ctx, src->pred);
5009 LLVMValueRef llvm_src = get_src(ctx, src->src);
5010
5011 LLVMAddIncoming(llvm_phi, &llvm_src, &block, 1);
5012 }
5013 }
5014
5015 static void phi_post_pass(struct ac_nir_context *ctx)
5016 {
5017 struct hash_entry *entry;
5018 hash_table_foreach(ctx->phis, entry) {
5019 visit_post_phi(ctx, (nir_phi_instr*)entry->key,
5020 (LLVMValueRef)entry->data);
5021 }
5022 }
5023
5024
5025 static void visit_ssa_undef(struct ac_nir_context *ctx,
5026 const nir_ssa_undef_instr *instr)
5027 {
5028 unsigned num_components = instr->def.num_components;
5029 LLVMValueRef undef;
5030
5031 if (num_components == 1)
5032 undef = LLVMGetUndef(ctx->ac.i32);
5033 else {
5034 undef = LLVMGetUndef(LLVMVectorType(ctx->ac.i32, num_components));
5035 }
5036 _mesa_hash_table_insert(ctx->defs, &instr->def, undef);
5037 }
5038
5039 static void visit_jump(struct ac_nir_context *ctx,
5040 const nir_jump_instr *instr)
5041 {
5042 switch (instr->type) {
5043 case nir_jump_break:
5044 LLVMBuildBr(ctx->ac.builder, ctx->break_block);
5045 LLVMClearInsertionPosition(ctx->ac.builder);
5046 break;
5047 case nir_jump_continue:
5048 LLVMBuildBr(ctx->ac.builder, ctx->continue_block);
5049 LLVMClearInsertionPosition(ctx->ac.builder);
5050 break;
5051 default:
5052 fprintf(stderr, "Unknown NIR jump instr: ");
5053 nir_print_instr(&instr->instr, stderr);
5054 fprintf(stderr, "\n");
5055 abort();
5056 }
5057 }
5058
5059 static void visit_cf_list(struct ac_nir_context *ctx,
5060 struct exec_list *list);
5061
5062 static void visit_block(struct ac_nir_context *ctx, nir_block *block)
5063 {
5064 LLVMBasicBlockRef llvm_block = LLVMGetInsertBlock(ctx->ac.builder);
5065 nir_foreach_instr(instr, block)
5066 {
5067 switch (instr->type) {
5068 case nir_instr_type_alu:
5069 visit_alu(ctx, nir_instr_as_alu(instr));
5070 break;
5071 case nir_instr_type_load_const:
5072 visit_load_const(ctx, nir_instr_as_load_const(instr));
5073 break;
5074 case nir_instr_type_intrinsic:
5075 visit_intrinsic(ctx, nir_instr_as_intrinsic(instr));
5076 break;
5077 case nir_instr_type_tex:
5078 visit_tex(ctx, nir_instr_as_tex(instr));
5079 break;
5080 case nir_instr_type_phi:
5081 visit_phi(ctx, nir_instr_as_phi(instr));
5082 break;
5083 case nir_instr_type_ssa_undef:
5084 visit_ssa_undef(ctx, nir_instr_as_ssa_undef(instr));
5085 break;
5086 case nir_instr_type_jump:
5087 visit_jump(ctx, nir_instr_as_jump(instr));
5088 break;
5089 default:
5090 fprintf(stderr, "Unknown NIR instr type: ");
5091 nir_print_instr(instr, stderr);
5092 fprintf(stderr, "\n");
5093 abort();
5094 }
5095 }
5096
5097 _mesa_hash_table_insert(ctx->defs, block, llvm_block);
5098 }
5099
5100 static void visit_if(struct ac_nir_context *ctx, nir_if *if_stmt)
5101 {
5102 LLVMValueRef value = get_src(ctx, if_stmt->condition);
5103
5104 LLVMValueRef fn = LLVMGetBasicBlockParent(LLVMGetInsertBlock(ctx->ac.builder));
5105 LLVMBasicBlockRef merge_block =
5106 LLVMAppendBasicBlockInContext(ctx->ac.context, fn, "");
5107 LLVMBasicBlockRef if_block =
5108 LLVMAppendBasicBlockInContext(ctx->ac.context, fn, "");
5109 LLVMBasicBlockRef else_block = merge_block;
5110 if (!exec_list_is_empty(&if_stmt->else_list))
5111 else_block = LLVMAppendBasicBlockInContext(
5112 ctx->ac.context, fn, "");
5113
5114 LLVMValueRef cond = LLVMBuildICmp(ctx->ac.builder, LLVMIntNE, value,
5115 ctx->ac.i32_0, "");
5116 LLVMBuildCondBr(ctx->ac.builder, cond, if_block, else_block);
5117
5118 LLVMPositionBuilderAtEnd(ctx->ac.builder, if_block);
5119 visit_cf_list(ctx, &if_stmt->then_list);
5120 if (LLVMGetInsertBlock(ctx->ac.builder))
5121 LLVMBuildBr(ctx->ac.builder, merge_block);
5122
5123 if (!exec_list_is_empty(&if_stmt->else_list)) {
5124 LLVMPositionBuilderAtEnd(ctx->ac.builder, else_block);
5125 visit_cf_list(ctx, &if_stmt->else_list);
5126 if (LLVMGetInsertBlock(ctx->ac.builder))
5127 LLVMBuildBr(ctx->ac.builder, merge_block);
5128 }
5129
5130 LLVMPositionBuilderAtEnd(ctx->ac.builder, merge_block);
5131 }
5132
5133 static void visit_loop(struct ac_nir_context *ctx, nir_loop *loop)
5134 {
5135 LLVMValueRef fn = LLVMGetBasicBlockParent(LLVMGetInsertBlock(ctx->ac.builder));
5136 LLVMBasicBlockRef continue_parent = ctx->continue_block;
5137 LLVMBasicBlockRef break_parent = ctx->break_block;
5138
5139 ctx->continue_block =
5140 LLVMAppendBasicBlockInContext(ctx->ac.context, fn, "");
5141 ctx->break_block =
5142 LLVMAppendBasicBlockInContext(ctx->ac.context, fn, "");
5143
5144 LLVMBuildBr(ctx->ac.builder, ctx->continue_block);
5145 LLVMPositionBuilderAtEnd(ctx->ac.builder, ctx->continue_block);
5146 visit_cf_list(ctx, &loop->body);
5147
5148 if (LLVMGetInsertBlock(ctx->ac.builder))
5149 LLVMBuildBr(ctx->ac.builder, ctx->continue_block);
5150 LLVMPositionBuilderAtEnd(ctx->ac.builder, ctx->break_block);
5151
5152 ctx->continue_block = continue_parent;
5153 ctx->break_block = break_parent;
5154 }
5155
5156 static void visit_cf_list(struct ac_nir_context *ctx,
5157 struct exec_list *list)
5158 {
5159 foreach_list_typed(nir_cf_node, node, node, list)
5160 {
5161 switch (node->type) {
5162 case nir_cf_node_block:
5163 visit_block(ctx, nir_cf_node_as_block(node));
5164 break;
5165
5166 case nir_cf_node_if:
5167 visit_if(ctx, nir_cf_node_as_if(node));
5168 break;
5169
5170 case nir_cf_node_loop:
5171 visit_loop(ctx, nir_cf_node_as_loop(node));
5172 break;
5173
5174 default:
5175 assert(0);
5176 }
5177 }
5178 }
5179
5180 static void
5181 handle_vs_input_decl(struct nir_to_llvm_context *ctx,
5182 struct nir_variable *variable)
5183 {
5184 LLVMValueRef t_list_ptr = ctx->vertex_buffers;
5185 LLVMValueRef t_offset;
5186 LLVMValueRef t_list;
5187 LLVMValueRef input;
5188 LLVMValueRef buffer_index;
5189 int index = variable->data.location - VERT_ATTRIB_GENERIC0;
5190 int idx = variable->data.location;
5191 unsigned attrib_count = glsl_count_attribute_slots(variable->type, true);
5192
5193 variable->data.driver_location = idx * 4;
5194
5195 if (ctx->options->key.vs.instance_rate_inputs & (1u << index)) {
5196 buffer_index = LLVMBuildAdd(ctx->builder, ctx->abi.instance_id,
5197 ctx->abi.start_instance, "");
5198 if (ctx->options->key.vs.as_ls) {
5199 ctx->shader_info->vs.vgpr_comp_cnt =
5200 MAX2(2, ctx->shader_info->vs.vgpr_comp_cnt);
5201 } else {
5202 ctx->shader_info->vs.vgpr_comp_cnt =
5203 MAX2(1, ctx->shader_info->vs.vgpr_comp_cnt);
5204 }
5205 } else
5206 buffer_index = LLVMBuildAdd(ctx->builder, ctx->abi.vertex_id,
5207 ctx->abi.base_vertex, "");
5208
5209 for (unsigned i = 0; i < attrib_count; ++i, ++idx) {
5210 t_offset = LLVMConstInt(ctx->ac.i32, index + i, false);
5211
5212 t_list = ac_build_load_to_sgpr(&ctx->ac, t_list_ptr, t_offset);
5213
5214 input = ac_build_buffer_load_format(&ctx->ac, t_list,
5215 buffer_index,
5216 ctx->ac.i32_0,
5217 true);
5218
5219 for (unsigned chan = 0; chan < 4; chan++) {
5220 LLVMValueRef llvm_chan = LLVMConstInt(ctx->ac.i32, chan, false);
5221 ctx->inputs[radeon_llvm_reg_index_soa(idx, chan)] =
5222 ac_to_integer(&ctx->ac, LLVMBuildExtractElement(ctx->builder,
5223 input, llvm_chan, ""));
5224 }
5225 }
5226 }
5227
5228 static void interp_fs_input(struct nir_to_llvm_context *ctx,
5229 unsigned attr,
5230 LLVMValueRef interp_param,
5231 LLVMValueRef prim_mask,
5232 LLVMValueRef result[4])
5233 {
5234 LLVMValueRef attr_number;
5235 unsigned chan;
5236 LLVMValueRef i, j;
5237 bool interp = interp_param != NULL;
5238
5239 attr_number = LLVMConstInt(ctx->ac.i32, attr, false);
5240
5241 /* fs.constant returns the param from the middle vertex, so it's not
5242 * really useful for flat shading. It's meant to be used for custom
5243 * interpolation (but the intrinsic can't fetch from the other two
5244 * vertices).
5245 *
5246 * Luckily, it doesn't matter, because we rely on the FLAT_SHADE state
5247 * to do the right thing. The only reason we use fs.constant is that
5248 * fs.interp cannot be used on integers, because they can be equal
5249 * to NaN.
5250 */
5251 if (interp) {
5252 interp_param = LLVMBuildBitCast(ctx->builder, interp_param,
5253 ctx->ac.v2f32, "");
5254
5255 i = LLVMBuildExtractElement(ctx->builder, interp_param,
5256 ctx->ac.i32_0, "");
5257 j = LLVMBuildExtractElement(ctx->builder, interp_param,
5258 ctx->ac.i32_1, "");
5259 }
5260
5261 for (chan = 0; chan < 4; chan++) {
5262 LLVMValueRef llvm_chan = LLVMConstInt(ctx->ac.i32, chan, false);
5263
5264 if (interp) {
5265 result[chan] = ac_build_fs_interp(&ctx->ac,
5266 llvm_chan,
5267 attr_number,
5268 prim_mask, i, j);
5269 } else {
5270 result[chan] = ac_build_fs_interp_mov(&ctx->ac,
5271 LLVMConstInt(ctx->ac.i32, 2, false),
5272 llvm_chan,
5273 attr_number,
5274 prim_mask);
5275 }
5276 }
5277 }
5278
5279 static void
5280 handle_fs_input_decl(struct nir_to_llvm_context *ctx,
5281 struct nir_variable *variable)
5282 {
5283 int idx = variable->data.location;
5284 unsigned attrib_count = glsl_count_attribute_slots(variable->type, false);
5285 LLVMValueRef interp;
5286
5287 variable->data.driver_location = idx * 4;
5288 ctx->input_mask |= ((1ull << attrib_count) - 1) << variable->data.location;
5289
5290 if (glsl_get_base_type(glsl_without_array(variable->type)) == GLSL_TYPE_FLOAT) {
5291 unsigned interp_type;
5292 if (variable->data.sample) {
5293 interp_type = INTERP_SAMPLE;
5294 ctx->shader_info->info.ps.force_persample = true;
5295 } else if (variable->data.centroid)
5296 interp_type = INTERP_CENTROID;
5297 else
5298 interp_type = INTERP_CENTER;
5299
5300 interp = lookup_interp_param(ctx, variable->data.interpolation, interp_type);
5301 } else
5302 interp = NULL;
5303
5304 for (unsigned i = 0; i < attrib_count; ++i)
5305 ctx->inputs[radeon_llvm_reg_index_soa(idx + i, 0)] = interp;
5306
5307 }
5308
5309 static void
5310 handle_vs_inputs(struct nir_to_llvm_context *ctx,
5311 struct nir_shader *nir) {
5312 nir_foreach_variable(variable, &nir->inputs)
5313 handle_vs_input_decl(ctx, variable);
5314 }
5315
5316 static void
5317 prepare_interp_optimize(struct nir_to_llvm_context *ctx,
5318 struct nir_shader *nir)
5319 {
5320 if (!ctx->options->key.fs.multisample)
5321 return;
5322
5323 bool uses_center = false;
5324 bool uses_centroid = false;
5325 nir_foreach_variable(variable, &nir->inputs) {
5326 if (glsl_get_base_type(glsl_without_array(variable->type)) != GLSL_TYPE_FLOAT ||
5327 variable->data.sample)
5328 continue;
5329
5330 if (variable->data.centroid)
5331 uses_centroid = true;
5332 else
5333 uses_center = true;
5334 }
5335
5336 if (uses_center && uses_centroid) {
5337 LLVMValueRef sel = LLVMBuildICmp(ctx->builder, LLVMIntSLT, ctx->prim_mask, ctx->ac.i32_0, "");
5338 ctx->persp_centroid = LLVMBuildSelect(ctx->builder, sel, ctx->persp_center, ctx->persp_centroid, "");
5339 ctx->linear_centroid = LLVMBuildSelect(ctx->builder, sel, ctx->linear_center, ctx->linear_centroid, "");
5340 }
5341 }
5342
5343 static void
5344 handle_fs_inputs(struct nir_to_llvm_context *ctx,
5345 struct nir_shader *nir)
5346 {
5347 prepare_interp_optimize(ctx, nir);
5348
5349 nir_foreach_variable(variable, &nir->inputs)
5350 handle_fs_input_decl(ctx, variable);
5351
5352 unsigned index = 0;
5353
5354 if (ctx->shader_info->info.ps.uses_input_attachments ||
5355 ctx->shader_info->info.needs_multiview_view_index)
5356 ctx->input_mask |= 1ull << VARYING_SLOT_LAYER;
5357
5358 for (unsigned i = 0; i < RADEON_LLVM_MAX_INPUTS; ++i) {
5359 LLVMValueRef interp_param;
5360 LLVMValueRef *inputs = ctx->inputs +radeon_llvm_reg_index_soa(i, 0);
5361
5362 if (!(ctx->input_mask & (1ull << i)))
5363 continue;
5364
5365 if (i >= VARYING_SLOT_VAR0 || i == VARYING_SLOT_PNTC ||
5366 i == VARYING_SLOT_PRIMITIVE_ID || i == VARYING_SLOT_LAYER) {
5367 interp_param = *inputs;
5368 interp_fs_input(ctx, index, interp_param, ctx->prim_mask,
5369 inputs);
5370
5371 if (!interp_param)
5372 ctx->shader_info->fs.flat_shaded_mask |= 1u << index;
5373 ++index;
5374 } else if (i == VARYING_SLOT_POS) {
5375 for(int i = 0; i < 3; ++i)
5376 inputs[i] = ctx->abi.frag_pos[i];
5377
5378 inputs[3] = ac_build_fdiv(&ctx->ac, ctx->ac.f32_1,
5379 ctx->abi.frag_pos[3]);
5380 }
5381 }
5382 ctx->shader_info->fs.num_interp = index;
5383 if (ctx->input_mask & (1 << VARYING_SLOT_PNTC))
5384 ctx->shader_info->fs.has_pcoord = true;
5385 if (ctx->input_mask & (1 << VARYING_SLOT_PRIMITIVE_ID))
5386 ctx->shader_info->fs.prim_id_input = true;
5387 if (ctx->input_mask & (1 << VARYING_SLOT_LAYER))
5388 ctx->shader_info->fs.layer_input = true;
5389 ctx->shader_info->fs.input_mask = ctx->input_mask >> VARYING_SLOT_VAR0;
5390
5391 if (ctx->shader_info->info.needs_multiview_view_index)
5392 ctx->view_index = ctx->inputs[radeon_llvm_reg_index_soa(VARYING_SLOT_LAYER, 0)];
5393 }
5394
5395 static LLVMValueRef
5396 ac_build_alloca(struct ac_llvm_context *ac,
5397 LLVMTypeRef type,
5398 const char *name)
5399 {
5400 LLVMBuilderRef builder = ac->builder;
5401 LLVMBasicBlockRef current_block = LLVMGetInsertBlock(builder);
5402 LLVMValueRef function = LLVMGetBasicBlockParent(current_block);
5403 LLVMBasicBlockRef first_block = LLVMGetEntryBasicBlock(function);
5404 LLVMValueRef first_instr = LLVMGetFirstInstruction(first_block);
5405 LLVMBuilderRef first_builder = LLVMCreateBuilderInContext(ac->context);
5406 LLVMValueRef res;
5407
5408 if (first_instr) {
5409 LLVMPositionBuilderBefore(first_builder, first_instr);
5410 } else {
5411 LLVMPositionBuilderAtEnd(first_builder, first_block);
5412 }
5413
5414 res = LLVMBuildAlloca(first_builder, type, name);
5415 LLVMBuildStore(builder, LLVMConstNull(type), res);
5416
5417 LLVMDisposeBuilder(first_builder);
5418
5419 return res;
5420 }
5421
5422 static LLVMValueRef si_build_alloca_undef(struct ac_llvm_context *ac,
5423 LLVMTypeRef type,
5424 const char *name)
5425 {
5426 LLVMValueRef ptr = ac_build_alloca(ac, type, name);
5427 LLVMBuildStore(ac->builder, LLVMGetUndef(type), ptr);
5428 return ptr;
5429 }
5430
5431 static void
5432 scan_shader_output_decl(struct nir_to_llvm_context *ctx,
5433 struct nir_variable *variable,
5434 struct nir_shader *shader,
5435 gl_shader_stage stage)
5436 {
5437 int idx = variable->data.location + variable->data.index;
5438 unsigned attrib_count = glsl_count_attribute_slots(variable->type, false);
5439 uint64_t mask_attribs;
5440
5441 variable->data.driver_location = idx * 4;
5442
5443 /* tess ctrl has it's own load/store paths for outputs */
5444 if (stage == MESA_SHADER_TESS_CTRL)
5445 return;
5446
5447 mask_attribs = ((1ull << attrib_count) - 1) << idx;
5448 if (stage == MESA_SHADER_VERTEX ||
5449 stage == MESA_SHADER_TESS_EVAL ||
5450 stage == MESA_SHADER_GEOMETRY) {
5451 if (idx == VARYING_SLOT_CLIP_DIST0) {
5452 int length = shader->info.clip_distance_array_size +
5453 shader->info.cull_distance_array_size;
5454 if (stage == MESA_SHADER_VERTEX) {
5455 ctx->shader_info->vs.outinfo.clip_dist_mask = (1 << shader->info.clip_distance_array_size) - 1;
5456 ctx->shader_info->vs.outinfo.cull_dist_mask = (1 << shader->info.cull_distance_array_size) - 1;
5457 }
5458 if (stage == MESA_SHADER_TESS_EVAL) {
5459 ctx->shader_info->tes.outinfo.clip_dist_mask = (1 << shader->info.clip_distance_array_size) - 1;
5460 ctx->shader_info->tes.outinfo.cull_dist_mask = (1 << shader->info.cull_distance_array_size) - 1;
5461 }
5462
5463 if (length > 4)
5464 attrib_count = 2;
5465 else
5466 attrib_count = 1;
5467 mask_attribs = 1ull << idx;
5468 }
5469 }
5470
5471 ctx->output_mask |= mask_attribs;
5472 }
5473
5474 static void
5475 handle_shader_output_decl(struct ac_nir_context *ctx,
5476 struct nir_shader *nir,
5477 struct nir_variable *variable)
5478 {
5479 unsigned output_loc = variable->data.driver_location / 4;
5480 unsigned attrib_count = glsl_count_attribute_slots(variable->type, false);
5481
5482 /* tess ctrl has it's own load/store paths for outputs */
5483 if (ctx->stage == MESA_SHADER_TESS_CTRL)
5484 return;
5485
5486 if (ctx->stage == MESA_SHADER_VERTEX ||
5487 ctx->stage == MESA_SHADER_TESS_EVAL ||
5488 ctx->stage == MESA_SHADER_GEOMETRY) {
5489 int idx = variable->data.location + variable->data.index;
5490 if (idx == VARYING_SLOT_CLIP_DIST0) {
5491 int length = nir->info.clip_distance_array_size +
5492 nir->info.cull_distance_array_size;
5493
5494 if (length > 4)
5495 attrib_count = 2;
5496 else
5497 attrib_count = 1;
5498 }
5499 }
5500
5501 for (unsigned i = 0; i < attrib_count; ++i) {
5502 for (unsigned chan = 0; chan < 4; chan++) {
5503 ctx->outputs[radeon_llvm_reg_index_soa(output_loc + i, chan)] =
5504 si_build_alloca_undef(&ctx->ac, ctx->ac.f32, "");
5505 }
5506 }
5507 }
5508
5509 static LLVMTypeRef
5510 glsl_base_to_llvm_type(struct nir_to_llvm_context *ctx,
5511 enum glsl_base_type type)
5512 {
5513 switch (type) {
5514 case GLSL_TYPE_INT:
5515 case GLSL_TYPE_UINT:
5516 case GLSL_TYPE_BOOL:
5517 case GLSL_TYPE_SUBROUTINE:
5518 return ctx->ac.i32;
5519 case GLSL_TYPE_FLOAT: /* TODO handle mediump */
5520 return ctx->ac.f32;
5521 case GLSL_TYPE_INT64:
5522 case GLSL_TYPE_UINT64:
5523 return ctx->ac.i64;
5524 case GLSL_TYPE_DOUBLE:
5525 return ctx->ac.f64;
5526 default:
5527 unreachable("unknown GLSL type");
5528 }
5529 }
5530
5531 static LLVMTypeRef
5532 glsl_to_llvm_type(struct nir_to_llvm_context *ctx,
5533 const struct glsl_type *type)
5534 {
5535 if (glsl_type_is_scalar(type)) {
5536 return glsl_base_to_llvm_type(ctx, glsl_get_base_type(type));
5537 }
5538
5539 if (glsl_type_is_vector(type)) {
5540 return LLVMVectorType(
5541 glsl_base_to_llvm_type(ctx, glsl_get_base_type(type)),
5542 glsl_get_vector_elements(type));
5543 }
5544
5545 if (glsl_type_is_matrix(type)) {
5546 return LLVMArrayType(
5547 glsl_to_llvm_type(ctx, glsl_get_column_type(type)),
5548 glsl_get_matrix_columns(type));
5549 }
5550
5551 if (glsl_type_is_array(type)) {
5552 return LLVMArrayType(
5553 glsl_to_llvm_type(ctx, glsl_get_array_element(type)),
5554 glsl_get_length(type));
5555 }
5556
5557 assert(glsl_type_is_struct(type));
5558
5559 LLVMTypeRef member_types[glsl_get_length(type)];
5560
5561 for (unsigned i = 0; i < glsl_get_length(type); i++) {
5562 member_types[i] =
5563 glsl_to_llvm_type(ctx,
5564 glsl_get_struct_field(type, i));
5565 }
5566
5567 return LLVMStructTypeInContext(ctx->context, member_types,
5568 glsl_get_length(type), false);
5569 }
5570
5571 static void
5572 setup_locals(struct ac_nir_context *ctx,
5573 struct nir_function *func)
5574 {
5575 int i, j;
5576 ctx->num_locals = 0;
5577 nir_foreach_variable(variable, &func->impl->locals) {
5578 unsigned attrib_count = glsl_count_attribute_slots(variable->type, false);
5579 variable->data.driver_location = ctx->num_locals * 4;
5580 variable->data.location_frac = 0;
5581 ctx->num_locals += attrib_count;
5582 }
5583 ctx->locals = malloc(4 * ctx->num_locals * sizeof(LLVMValueRef));
5584 if (!ctx->locals)
5585 return;
5586
5587 for (i = 0; i < ctx->num_locals; i++) {
5588 for (j = 0; j < 4; j++) {
5589 ctx->locals[i * 4 + j] =
5590 si_build_alloca_undef(&ctx->ac, ctx->ac.f32, "temp");
5591 }
5592 }
5593 }
5594
5595 static void
5596 setup_shared(struct ac_nir_context *ctx,
5597 struct nir_shader *nir)
5598 {
5599 nir_foreach_variable(variable, &nir->shared) {
5600 LLVMValueRef shared =
5601 LLVMAddGlobalInAddressSpace(
5602 ctx->ac.module, glsl_to_llvm_type(ctx->nctx, variable->type),
5603 variable->name ? variable->name : "",
5604 LOCAL_ADDR_SPACE);
5605 _mesa_hash_table_insert(ctx->vars, variable, shared);
5606 }
5607 }
5608
5609 static LLVMValueRef
5610 emit_float_saturate(struct ac_llvm_context *ctx, LLVMValueRef v, float lo, float hi)
5611 {
5612 v = ac_to_float(ctx, v);
5613 v = emit_intrin_2f_param(ctx, "llvm.maxnum", ctx->f32, v, LLVMConstReal(ctx->f32, lo));
5614 return emit_intrin_2f_param(ctx, "llvm.minnum", ctx->f32, v, LLVMConstReal(ctx->f32, hi));
5615 }
5616
5617
5618 static LLVMValueRef emit_pack_int16(struct nir_to_llvm_context *ctx,
5619 LLVMValueRef src0, LLVMValueRef src1)
5620 {
5621 LLVMValueRef const16 = LLVMConstInt(ctx->ac.i32, 16, false);
5622 LLVMValueRef comp[2];
5623
5624 comp[0] = LLVMBuildAnd(ctx->builder, src0, LLVMConstInt(ctx->ac.i32, 65535, 0), "");
5625 comp[1] = LLVMBuildAnd(ctx->builder, src1, LLVMConstInt(ctx->ac.i32, 65535, 0), "");
5626 comp[1] = LLVMBuildShl(ctx->builder, comp[1], const16, "");
5627 return LLVMBuildOr(ctx->builder, comp[0], comp[1], "");
5628 }
5629
5630 /* Initialize arguments for the shader export intrinsic */
5631 static void
5632 si_llvm_init_export_args(struct nir_to_llvm_context *ctx,
5633 LLVMValueRef *values,
5634 unsigned target,
5635 struct ac_export_args *args)
5636 {
5637 /* Default is 0xf. Adjusted below depending on the format. */
5638 args->enabled_channels = 0xf;
5639
5640 /* Specify whether the EXEC mask represents the valid mask */
5641 args->valid_mask = 0;
5642
5643 /* Specify whether this is the last export */
5644 args->done = 0;
5645
5646 /* Specify the target we are exporting */
5647 args->target = target;
5648
5649 args->compr = false;
5650 args->out[0] = LLVMGetUndef(ctx->ac.f32);
5651 args->out[1] = LLVMGetUndef(ctx->ac.f32);
5652 args->out[2] = LLVMGetUndef(ctx->ac.f32);
5653 args->out[3] = LLVMGetUndef(ctx->ac.f32);
5654
5655 if (!values)
5656 return;
5657
5658 if (ctx->stage == MESA_SHADER_FRAGMENT && target >= V_008DFC_SQ_EXP_MRT) {
5659 LLVMValueRef val[4];
5660 unsigned index = target - V_008DFC_SQ_EXP_MRT;
5661 unsigned col_format = (ctx->options->key.fs.col_format >> (4 * index)) & 0xf;
5662 bool is_int8 = (ctx->options->key.fs.is_int8 >> index) & 1;
5663 bool is_int10 = (ctx->options->key.fs.is_int10 >> index) & 1;
5664
5665 switch(col_format) {
5666 case V_028714_SPI_SHADER_ZERO:
5667 args->enabled_channels = 0; /* writemask */
5668 args->target = V_008DFC_SQ_EXP_NULL;
5669 break;
5670
5671 case V_028714_SPI_SHADER_32_R:
5672 args->enabled_channels = 1;
5673 args->out[0] = values[0];
5674 break;
5675
5676 case V_028714_SPI_SHADER_32_GR:
5677 args->enabled_channels = 0x3;
5678 args->out[0] = values[0];
5679 args->out[1] = values[1];
5680 break;
5681
5682 case V_028714_SPI_SHADER_32_AR:
5683 args->enabled_channels = 0x9;
5684 args->out[0] = values[0];
5685 args->out[3] = values[3];
5686 break;
5687
5688 case V_028714_SPI_SHADER_FP16_ABGR:
5689 args->compr = 1;
5690
5691 for (unsigned chan = 0; chan < 2; chan++) {
5692 LLVMValueRef pack_args[2] = {
5693 values[2 * chan],
5694 values[2 * chan + 1]
5695 };
5696 LLVMValueRef packed;
5697
5698 packed = ac_build_cvt_pkrtz_f16(&ctx->ac, pack_args);
5699 args->out[chan] = packed;
5700 }
5701 break;
5702
5703 case V_028714_SPI_SHADER_UNORM16_ABGR:
5704 for (unsigned chan = 0; chan < 4; chan++) {
5705 val[chan] = ac_build_clamp(&ctx->ac, values[chan]);
5706 val[chan] = LLVMBuildFMul(ctx->builder, val[chan],
5707 LLVMConstReal(ctx->ac.f32, 65535), "");
5708 val[chan] = LLVMBuildFAdd(ctx->builder, val[chan],
5709 LLVMConstReal(ctx->ac.f32, 0.5), "");
5710 val[chan] = LLVMBuildFPToUI(ctx->builder, val[chan],
5711 ctx->ac.i32, "");
5712 }
5713
5714 args->compr = 1;
5715 args->out[0] = emit_pack_int16(ctx, val[0], val[1]);
5716 args->out[1] = emit_pack_int16(ctx, val[2], val[3]);
5717 break;
5718
5719 case V_028714_SPI_SHADER_SNORM16_ABGR:
5720 for (unsigned chan = 0; chan < 4; chan++) {
5721 val[chan] = emit_float_saturate(&ctx->ac, values[chan], -1, 1);
5722 val[chan] = LLVMBuildFMul(ctx->builder, val[chan],
5723 LLVMConstReal(ctx->ac.f32, 32767), "");
5724
5725 /* If positive, add 0.5, else add -0.5. */
5726 val[chan] = LLVMBuildFAdd(ctx->builder, val[chan],
5727 LLVMBuildSelect(ctx->builder,
5728 LLVMBuildFCmp(ctx->builder, LLVMRealOGE,
5729 val[chan], ctx->ac.f32_0, ""),
5730 LLVMConstReal(ctx->ac.f32, 0.5),
5731 LLVMConstReal(ctx->ac.f32, -0.5), ""), "");
5732 val[chan] = LLVMBuildFPToSI(ctx->builder, val[chan], ctx->ac.i32, "");
5733 }
5734
5735 args->compr = 1;
5736 args->out[0] = emit_pack_int16(ctx, val[0], val[1]);
5737 args->out[1] = emit_pack_int16(ctx, val[2], val[3]);
5738 break;
5739
5740 case V_028714_SPI_SHADER_UINT16_ABGR: {
5741 LLVMValueRef max_rgb = LLVMConstInt(ctx->ac.i32,
5742 is_int8 ? 255 : is_int10 ? 1023 : 65535, 0);
5743 LLVMValueRef max_alpha = !is_int10 ? max_rgb : LLVMConstInt(ctx->ac.i32, 3, 0);
5744
5745 for (unsigned chan = 0; chan < 4; chan++) {
5746 val[chan] = ac_to_integer(&ctx->ac, values[chan]);
5747 val[chan] = emit_minmax_int(&ctx->ac, LLVMIntULT, val[chan], chan == 3 ? max_alpha : max_rgb);
5748 }
5749
5750 args->compr = 1;
5751 args->out[0] = emit_pack_int16(ctx, val[0], val[1]);
5752 args->out[1] = emit_pack_int16(ctx, val[2], val[3]);
5753 break;
5754 }
5755
5756 case V_028714_SPI_SHADER_SINT16_ABGR: {
5757 LLVMValueRef max_rgb = LLVMConstInt(ctx->ac.i32,
5758 is_int8 ? 127 : is_int10 ? 511 : 32767, 0);
5759 LLVMValueRef min_rgb = LLVMConstInt(ctx->ac.i32,
5760 is_int8 ? -128 : is_int10 ? -512 : -32768, 0);
5761 LLVMValueRef max_alpha = !is_int10 ? max_rgb : ctx->ac.i32_1;
5762 LLVMValueRef min_alpha = !is_int10 ? min_rgb : LLVMConstInt(ctx->ac.i32, -2, 0);
5763
5764 /* Clamp. */
5765 for (unsigned chan = 0; chan < 4; chan++) {
5766 val[chan] = ac_to_integer(&ctx->ac, values[chan]);
5767 val[chan] = emit_minmax_int(&ctx->ac, LLVMIntSLT, val[chan], chan == 3 ? max_alpha : max_rgb);
5768 val[chan] = emit_minmax_int(&ctx->ac, LLVMIntSGT, val[chan], chan == 3 ? min_alpha : min_rgb);
5769 }
5770
5771 args->compr = 1;
5772 args->out[0] = emit_pack_int16(ctx, val[0], val[1]);
5773 args->out[1] = emit_pack_int16(ctx, val[2], val[3]);
5774 break;
5775 }
5776
5777 default:
5778 case V_028714_SPI_SHADER_32_ABGR:
5779 memcpy(&args->out[0], values, sizeof(values[0]) * 4);
5780 break;
5781 }
5782 } else
5783 memcpy(&args->out[0], values, sizeof(values[0]) * 4);
5784
5785 for (unsigned i = 0; i < 4; ++i)
5786 args->out[i] = ac_to_float(&ctx->ac, args->out[i]);
5787 }
5788
5789 static void
5790 handle_vs_outputs_post(struct nir_to_llvm_context *ctx,
5791 bool export_prim_id,
5792 struct ac_vs_output_info *outinfo)
5793 {
5794 uint32_t param_count = 0;
5795 unsigned target;
5796 unsigned pos_idx, num_pos_exports = 0;
5797 struct ac_export_args args, pos_args[4] = {};
5798 LLVMValueRef psize_value = NULL, layer_value = NULL, viewport_index_value = NULL;
5799 int i;
5800
5801 if (ctx->options->key.has_multiview_view_index) {
5802 LLVMValueRef* tmp_out = &ctx->nir->outputs[radeon_llvm_reg_index_soa(VARYING_SLOT_LAYER, 0)];
5803 if(!*tmp_out) {
5804 for(unsigned i = 0; i < 4; ++i)
5805 ctx->nir->outputs[radeon_llvm_reg_index_soa(VARYING_SLOT_LAYER, i)] =
5806 si_build_alloca_undef(&ctx->ac, ctx->ac.f32, "");
5807 }
5808
5809 LLVMBuildStore(ctx->builder, ac_to_float(&ctx->ac, ctx->view_index), *tmp_out);
5810 ctx->output_mask |= 1ull << VARYING_SLOT_LAYER;
5811 }
5812
5813 memset(outinfo->vs_output_param_offset, AC_EXP_PARAM_UNDEFINED,
5814 sizeof(outinfo->vs_output_param_offset));
5815
5816 if (ctx->output_mask & (1ull << VARYING_SLOT_CLIP_DIST0)) {
5817 LLVMValueRef slots[8];
5818 unsigned j;
5819
5820 if (outinfo->cull_dist_mask)
5821 outinfo->cull_dist_mask <<= ctx->num_output_clips;
5822
5823 i = VARYING_SLOT_CLIP_DIST0;
5824 for (j = 0; j < ctx->num_output_clips + ctx->num_output_culls; j++)
5825 slots[j] = ac_to_float(&ctx->ac, LLVMBuildLoad(ctx->builder,
5826 ctx->nir->outputs[radeon_llvm_reg_index_soa(i, j)], ""));
5827
5828 for (i = ctx->num_output_clips + ctx->num_output_culls; i < 8; i++)
5829 slots[i] = LLVMGetUndef(ctx->ac.f32);
5830
5831 if (ctx->num_output_clips + ctx->num_output_culls > 4) {
5832 target = V_008DFC_SQ_EXP_POS + 3;
5833 si_llvm_init_export_args(ctx, &slots[4], target, &args);
5834 memcpy(&pos_args[target - V_008DFC_SQ_EXP_POS],
5835 &args, sizeof(args));
5836 }
5837
5838 target = V_008DFC_SQ_EXP_POS + 2;
5839 si_llvm_init_export_args(ctx, &slots[0], target, &args);
5840 memcpy(&pos_args[target - V_008DFC_SQ_EXP_POS],
5841 &args, sizeof(args));
5842
5843 }
5844
5845 LLVMValueRef pos_values[4] = {ctx->ac.f32_0, ctx->ac.f32_0, ctx->ac.f32_0, ctx->ac.f32_1};
5846 if (ctx->output_mask & (1ull << VARYING_SLOT_POS)) {
5847 for (unsigned j = 0; j < 4; j++)
5848 pos_values[j] = LLVMBuildLoad(ctx->builder,
5849 ctx->nir->outputs[radeon_llvm_reg_index_soa(VARYING_SLOT_POS, j)], "");
5850 }
5851 si_llvm_init_export_args(ctx, pos_values, V_008DFC_SQ_EXP_POS, &pos_args[0]);
5852
5853 if (ctx->output_mask & (1ull << VARYING_SLOT_PSIZ)) {
5854 outinfo->writes_pointsize = true;
5855 psize_value = LLVMBuildLoad(ctx->builder,
5856 ctx->nir->outputs[radeon_llvm_reg_index_soa(VARYING_SLOT_PSIZ, 0)], "");
5857 }
5858
5859 if (ctx->output_mask & (1ull << VARYING_SLOT_LAYER)) {
5860 outinfo->writes_layer = true;
5861 layer_value = LLVMBuildLoad(ctx->builder,
5862 ctx->nir->outputs[radeon_llvm_reg_index_soa(VARYING_SLOT_LAYER, 0)], "");
5863 }
5864
5865 if (ctx->output_mask & (1ull << VARYING_SLOT_VIEWPORT)) {
5866 outinfo->writes_viewport_index = true;
5867 viewport_index_value = LLVMBuildLoad(ctx->builder,
5868 ctx->nir->outputs[radeon_llvm_reg_index_soa(VARYING_SLOT_VIEWPORT, 0)], "");
5869 }
5870
5871 if (outinfo->writes_pointsize ||
5872 outinfo->writes_layer ||
5873 outinfo->writes_viewport_index) {
5874 pos_args[1].enabled_channels = ((outinfo->writes_pointsize == true ? 1 : 0) |
5875 (outinfo->writes_layer == true ? 4 : 0));
5876 pos_args[1].valid_mask = 0;
5877 pos_args[1].done = 0;
5878 pos_args[1].target = V_008DFC_SQ_EXP_POS + 1;
5879 pos_args[1].compr = 0;
5880 pos_args[1].out[0] = ctx->ac.f32_0; /* X */
5881 pos_args[1].out[1] = ctx->ac.f32_0; /* Y */
5882 pos_args[1].out[2] = ctx->ac.f32_0; /* Z */
5883 pos_args[1].out[3] = ctx->ac.f32_0; /* W */
5884
5885 if (outinfo->writes_pointsize == true)
5886 pos_args[1].out[0] = psize_value;
5887 if (outinfo->writes_layer == true)
5888 pos_args[1].out[2] = layer_value;
5889 if (outinfo->writes_viewport_index == true) {
5890 if (ctx->options->chip_class >= GFX9) {
5891 /* GFX9 has the layer in out.z[10:0] and the viewport
5892 * index in out.z[19:16].
5893 */
5894 LLVMValueRef v = viewport_index_value;
5895 v = ac_to_integer(&ctx->ac, v);
5896 v = LLVMBuildShl(ctx->builder, v,
5897 LLVMConstInt(ctx->ac.i32, 16, false),
5898 "");
5899 v = LLVMBuildOr(ctx->builder, v,
5900 ac_to_integer(&ctx->ac, pos_args[1].out[2]), "");
5901
5902 pos_args[1].out[2] = ac_to_float(&ctx->ac, v);
5903 pos_args[1].enabled_channels |= 1 << 2;
5904 } else {
5905 pos_args[1].out[3] = viewport_index_value;
5906 pos_args[1].enabled_channels |= 1 << 3;
5907 }
5908 }
5909 }
5910 for (i = 0; i < 4; i++) {
5911 if (pos_args[i].out[0])
5912 num_pos_exports++;
5913 }
5914
5915 pos_idx = 0;
5916 for (i = 0; i < 4; i++) {
5917 if (!pos_args[i].out[0])
5918 continue;
5919
5920 /* Specify the target we are exporting */
5921 pos_args[i].target = V_008DFC_SQ_EXP_POS + pos_idx++;
5922 if (pos_idx == num_pos_exports)
5923 pos_args[i].done = 1;
5924 ac_build_export(&ctx->ac, &pos_args[i]);
5925 }
5926
5927 for (unsigned i = 0; i < RADEON_LLVM_MAX_OUTPUTS; ++i) {
5928 LLVMValueRef values[4];
5929 if (!(ctx->output_mask & (1ull << i)))
5930 continue;
5931
5932 for (unsigned j = 0; j < 4; j++)
5933 values[j] = ac_to_float(&ctx->ac, LLVMBuildLoad(ctx->builder,
5934 ctx->nir->outputs[radeon_llvm_reg_index_soa(i, j)], ""));
5935
5936 if (i == VARYING_SLOT_LAYER) {
5937 target = V_008DFC_SQ_EXP_PARAM + param_count;
5938 outinfo->vs_output_param_offset[VARYING_SLOT_LAYER] = param_count;
5939 param_count++;
5940 } else if (i == VARYING_SLOT_PRIMITIVE_ID) {
5941 target = V_008DFC_SQ_EXP_PARAM + param_count;
5942 outinfo->vs_output_param_offset[VARYING_SLOT_PRIMITIVE_ID] = param_count;
5943 param_count++;
5944 } else if (i >= VARYING_SLOT_VAR0) {
5945 outinfo->export_mask |= 1u << (i - VARYING_SLOT_VAR0);
5946 target = V_008DFC_SQ_EXP_PARAM + param_count;
5947 outinfo->vs_output_param_offset[i] = param_count;
5948 param_count++;
5949 } else
5950 continue;
5951
5952 si_llvm_init_export_args(ctx, values, target, &args);
5953
5954 if (target >= V_008DFC_SQ_EXP_POS &&
5955 target <= (V_008DFC_SQ_EXP_POS + 3)) {
5956 memcpy(&pos_args[target - V_008DFC_SQ_EXP_POS],
5957 &args, sizeof(args));
5958 } else {
5959 ac_build_export(&ctx->ac, &args);
5960 }
5961 }
5962
5963 if (export_prim_id) {
5964 LLVMValueRef values[4];
5965 target = V_008DFC_SQ_EXP_PARAM + param_count;
5966 outinfo->vs_output_param_offset[VARYING_SLOT_PRIMITIVE_ID] = param_count;
5967 param_count++;
5968
5969 values[0] = ctx->vs_prim_id;
5970 ctx->shader_info->vs.vgpr_comp_cnt = MAX2(2,
5971 ctx->shader_info->vs.vgpr_comp_cnt);
5972 for (unsigned j = 1; j < 4; j++)
5973 values[j] = ctx->ac.f32_0;
5974 si_llvm_init_export_args(ctx, values, target, &args);
5975 ac_build_export(&ctx->ac, &args);
5976 outinfo->export_prim_id = true;
5977 }
5978
5979 outinfo->pos_exports = num_pos_exports;
5980 outinfo->param_exports = param_count;
5981 }
5982
5983 static void
5984 handle_es_outputs_post(struct nir_to_llvm_context *ctx,
5985 struct ac_es_output_info *outinfo)
5986 {
5987 int j;
5988 uint64_t max_output_written = 0;
5989 LLVMValueRef lds_base = NULL;
5990
5991 for (unsigned i = 0; i < RADEON_LLVM_MAX_OUTPUTS; ++i) {
5992 int param_index;
5993 int length = 4;
5994
5995 if (!(ctx->output_mask & (1ull << i)))
5996 continue;
5997
5998 if (i == VARYING_SLOT_CLIP_DIST0)
5999 length = ctx->num_output_clips + ctx->num_output_culls;
6000
6001 param_index = shader_io_get_unique_index(i);
6002
6003 max_output_written = MAX2(param_index + (length > 4), max_output_written);
6004 }
6005
6006 outinfo->esgs_itemsize = (max_output_written + 1) * 16;
6007
6008 if (ctx->ac.chip_class >= GFX9) {
6009 unsigned itemsize_dw = outinfo->esgs_itemsize / 4;
6010 LLVMValueRef vertex_idx = ac_get_thread_id(&ctx->ac);
6011 LLVMValueRef wave_idx = ac_build_bfe(&ctx->ac, ctx->merged_wave_info,
6012 LLVMConstInt(ctx->ac.i32, 24, false),
6013 LLVMConstInt(ctx->ac.i32, 4, false), false);
6014 vertex_idx = LLVMBuildOr(ctx->ac.builder, vertex_idx,
6015 LLVMBuildMul(ctx->ac.builder, wave_idx,
6016 LLVMConstInt(ctx->ac.i32, 64, false), ""), "");
6017 lds_base = LLVMBuildMul(ctx->ac.builder, vertex_idx,
6018 LLVMConstInt(ctx->ac.i32, itemsize_dw, 0), "");
6019 }
6020
6021 for (unsigned i = 0; i < RADEON_LLVM_MAX_OUTPUTS; ++i) {
6022 LLVMValueRef dw_addr;
6023 LLVMValueRef *out_ptr = &ctx->nir->outputs[i * 4];
6024 int param_index;
6025 int length = 4;
6026
6027 if (!(ctx->output_mask & (1ull << i)))
6028 continue;
6029
6030 if (i == VARYING_SLOT_CLIP_DIST0)
6031 length = ctx->num_output_clips + ctx->num_output_culls;
6032
6033 param_index = shader_io_get_unique_index(i);
6034
6035 if (lds_base) {
6036 dw_addr = LLVMBuildAdd(ctx->builder, lds_base,
6037 LLVMConstInt(ctx->ac.i32, param_index * 4, false),
6038 "");
6039 }
6040 for (j = 0; j < length; j++) {
6041 LLVMValueRef out_val = LLVMBuildLoad(ctx->builder, out_ptr[j], "");
6042 out_val = LLVMBuildBitCast(ctx->builder, out_val, ctx->ac.i32, "");
6043
6044 if (ctx->ac.chip_class >= GFX9) {
6045 ac_lds_store(&ctx->ac, dw_addr,
6046 LLVMBuildLoad(ctx->builder, out_ptr[j], ""));
6047 dw_addr = LLVMBuildAdd(ctx->builder, dw_addr, ctx->ac.i32_1, "");
6048 } else {
6049 ac_build_buffer_store_dword(&ctx->ac,
6050 ctx->esgs_ring,
6051 out_val, 1,
6052 NULL, ctx->es2gs_offset,
6053 (4 * param_index + j) * 4,
6054 1, 1, true, true);
6055 }
6056 }
6057 }
6058 }
6059
6060 static void
6061 handle_ls_outputs_post(struct nir_to_llvm_context *ctx)
6062 {
6063 LLVMValueRef vertex_id = ctx->rel_auto_id;
6064 LLVMValueRef vertex_dw_stride = unpack_param(&ctx->ac, ctx->ls_out_layout, 13, 8);
6065 LLVMValueRef base_dw_addr = LLVMBuildMul(ctx->builder, vertex_id,
6066 vertex_dw_stride, "");
6067
6068 for (unsigned i = 0; i < RADEON_LLVM_MAX_OUTPUTS; ++i) {
6069 LLVMValueRef *out_ptr = &ctx->nir->outputs[i * 4];
6070 int length = 4;
6071
6072 if (!(ctx->output_mask & (1ull << i)))
6073 continue;
6074
6075 if (i == VARYING_SLOT_CLIP_DIST0)
6076 length = ctx->num_output_clips + ctx->num_output_culls;
6077 int param = shader_io_get_unique_index(i);
6078 mark_tess_output(ctx, false, param);
6079 if (length > 4)
6080 mark_tess_output(ctx, false, param + 1);
6081 LLVMValueRef dw_addr = LLVMBuildAdd(ctx->builder, base_dw_addr,
6082 LLVMConstInt(ctx->ac.i32, param * 4, false),
6083 "");
6084 for (unsigned j = 0; j < length; j++) {
6085 ac_lds_store(&ctx->ac, dw_addr,
6086 LLVMBuildLoad(ctx->builder, out_ptr[j], ""));
6087 dw_addr = LLVMBuildAdd(ctx->builder, dw_addr, ctx->ac.i32_1, "");
6088 }
6089 }
6090 }
6091
6092 struct ac_build_if_state
6093 {
6094 struct nir_to_llvm_context *ctx;
6095 LLVMValueRef condition;
6096 LLVMBasicBlockRef entry_block;
6097 LLVMBasicBlockRef true_block;
6098 LLVMBasicBlockRef false_block;
6099 LLVMBasicBlockRef merge_block;
6100 };
6101
6102 static LLVMBasicBlockRef
6103 ac_build_insert_new_block(struct nir_to_llvm_context *ctx, const char *name)
6104 {
6105 LLVMBasicBlockRef current_block;
6106 LLVMBasicBlockRef next_block;
6107 LLVMBasicBlockRef new_block;
6108
6109 /* get current basic block */
6110 current_block = LLVMGetInsertBlock(ctx->builder);
6111
6112 /* chqeck if there's another block after this one */
6113 next_block = LLVMGetNextBasicBlock(current_block);
6114 if (next_block) {
6115 /* insert the new block before the next block */
6116 new_block = LLVMInsertBasicBlockInContext(ctx->context, next_block, name);
6117 }
6118 else {
6119 /* append new block after current block */
6120 LLVMValueRef function = LLVMGetBasicBlockParent(current_block);
6121 new_block = LLVMAppendBasicBlockInContext(ctx->context, function, name);
6122 }
6123 return new_block;
6124 }
6125
6126 static void
6127 ac_nir_build_if(struct ac_build_if_state *ifthen,
6128 struct nir_to_llvm_context *ctx,
6129 LLVMValueRef condition)
6130 {
6131 LLVMBasicBlockRef block = LLVMGetInsertBlock(ctx->builder);
6132
6133 memset(ifthen, 0, sizeof *ifthen);
6134 ifthen->ctx = ctx;
6135 ifthen->condition = condition;
6136 ifthen->entry_block = block;
6137
6138 /* create endif/merge basic block for the phi functions */
6139 ifthen->merge_block = ac_build_insert_new_block(ctx, "endif-block");
6140
6141 /* create/insert true_block before merge_block */
6142 ifthen->true_block =
6143 LLVMInsertBasicBlockInContext(ctx->context,
6144 ifthen->merge_block,
6145 "if-true-block");
6146
6147 /* successive code goes into the true block */
6148 LLVMPositionBuilderAtEnd(ctx->builder, ifthen->true_block);
6149 }
6150
6151 /**
6152 * End a conditional.
6153 */
6154 static void
6155 ac_nir_build_endif(struct ac_build_if_state *ifthen)
6156 {
6157 LLVMBuilderRef builder = ifthen->ctx->builder;
6158
6159 /* Insert branch to the merge block from current block */
6160 LLVMBuildBr(builder, ifthen->merge_block);
6161
6162 /*
6163 * Now patch in the various branch instructions.
6164 */
6165
6166 /* Insert the conditional branch instruction at the end of entry_block */
6167 LLVMPositionBuilderAtEnd(builder, ifthen->entry_block);
6168 if (ifthen->false_block) {
6169 /* we have an else clause */
6170 LLVMBuildCondBr(builder, ifthen->condition,
6171 ifthen->true_block, ifthen->false_block);
6172 }
6173 else {
6174 /* no else clause */
6175 LLVMBuildCondBr(builder, ifthen->condition,
6176 ifthen->true_block, ifthen->merge_block);
6177 }
6178
6179 /* Resume building code at end of the ifthen->merge_block */
6180 LLVMPositionBuilderAtEnd(builder, ifthen->merge_block);
6181 }
6182
6183 static void
6184 write_tess_factors(struct nir_to_llvm_context *ctx)
6185 {
6186 unsigned stride, outer_comps, inner_comps;
6187 struct ac_build_if_state if_ctx, inner_if_ctx;
6188 LLVMValueRef invocation_id = unpack_param(&ctx->ac, ctx->abi.tcs_rel_ids, 8, 5);
6189 LLVMValueRef rel_patch_id = unpack_param(&ctx->ac, ctx->abi.tcs_rel_ids, 0, 8);
6190 unsigned tess_inner_index, tess_outer_index;
6191 LLVMValueRef lds_base, lds_inner, lds_outer, byteoffset, buffer;
6192 LLVMValueRef out[6], vec0, vec1, tf_base, inner[4], outer[4];
6193 int i;
6194 emit_barrier(&ctx->ac, ctx->stage);
6195
6196 switch (ctx->options->key.tcs.primitive_mode) {
6197 case GL_ISOLINES:
6198 stride = 2;
6199 outer_comps = 2;
6200 inner_comps = 0;
6201 break;
6202 case GL_TRIANGLES:
6203 stride = 4;
6204 outer_comps = 3;
6205 inner_comps = 1;
6206 break;
6207 case GL_QUADS:
6208 stride = 6;
6209 outer_comps = 4;
6210 inner_comps = 2;
6211 break;
6212 default:
6213 return;
6214 }
6215
6216 ac_nir_build_if(&if_ctx, ctx,
6217 LLVMBuildICmp(ctx->builder, LLVMIntEQ,
6218 invocation_id, ctx->ac.i32_0, ""));
6219
6220 tess_inner_index = shader_io_get_unique_index(VARYING_SLOT_TESS_LEVEL_INNER);
6221 tess_outer_index = shader_io_get_unique_index(VARYING_SLOT_TESS_LEVEL_OUTER);
6222
6223 mark_tess_output(ctx, true, tess_inner_index);
6224 mark_tess_output(ctx, true, tess_outer_index);
6225 lds_base = get_tcs_out_current_patch_data_offset(ctx);
6226 lds_inner = LLVMBuildAdd(ctx->builder, lds_base,
6227 LLVMConstInt(ctx->ac.i32, tess_inner_index * 4, false), "");
6228 lds_outer = LLVMBuildAdd(ctx->builder, lds_base,
6229 LLVMConstInt(ctx->ac.i32, tess_outer_index * 4, false), "");
6230
6231 for (i = 0; i < 4; i++) {
6232 inner[i] = LLVMGetUndef(ctx->ac.i32);
6233 outer[i] = LLVMGetUndef(ctx->ac.i32);
6234 }
6235
6236 // LINES reverseal
6237 if (ctx->options->key.tcs.primitive_mode == GL_ISOLINES) {
6238 outer[0] = out[1] = ac_lds_load(&ctx->ac, lds_outer);
6239 lds_outer = LLVMBuildAdd(ctx->builder, lds_outer,
6240 ctx->ac.i32_1, "");
6241 outer[1] = out[0] = ac_lds_load(&ctx->ac, lds_outer);
6242 } else {
6243 for (i = 0; i < outer_comps; i++) {
6244 outer[i] = out[i] =
6245 ac_lds_load(&ctx->ac, lds_outer);
6246 lds_outer = LLVMBuildAdd(ctx->builder, lds_outer,
6247 ctx->ac.i32_1, "");
6248 }
6249 for (i = 0; i < inner_comps; i++) {
6250 inner[i] = out[outer_comps+i] =
6251 ac_lds_load(&ctx->ac, lds_inner);
6252 lds_inner = LLVMBuildAdd(ctx->builder, lds_inner,
6253 ctx->ac.i32_1, "");
6254 }
6255 }
6256
6257 /* Convert the outputs to vectors for stores. */
6258 vec0 = ac_build_gather_values(&ctx->ac, out, MIN2(stride, 4));
6259 vec1 = NULL;
6260
6261 if (stride > 4)
6262 vec1 = ac_build_gather_values(&ctx->ac, out + 4, stride - 4);
6263
6264
6265 buffer = ctx->hs_ring_tess_factor;
6266 tf_base = ctx->tess_factor_offset;
6267 byteoffset = LLVMBuildMul(ctx->builder, rel_patch_id,
6268 LLVMConstInt(ctx->ac.i32, 4 * stride, false), "");
6269 unsigned tf_offset = 0;
6270
6271 if (ctx->options->chip_class <= VI) {
6272 ac_nir_build_if(&inner_if_ctx, ctx,
6273 LLVMBuildICmp(ctx->builder, LLVMIntEQ,
6274 rel_patch_id, ctx->ac.i32_0, ""));
6275
6276 /* Store the dynamic HS control word. */
6277 ac_build_buffer_store_dword(&ctx->ac, buffer,
6278 LLVMConstInt(ctx->ac.i32, 0x80000000, false),
6279 1, ctx->ac.i32_0, tf_base,
6280 0, 1, 0, true, false);
6281 tf_offset += 4;
6282
6283 ac_nir_build_endif(&inner_if_ctx);
6284 }
6285
6286 /* Store the tessellation factors. */
6287 ac_build_buffer_store_dword(&ctx->ac, buffer, vec0,
6288 MIN2(stride, 4), byteoffset, tf_base,
6289 tf_offset, 1, 0, true, false);
6290 if (vec1)
6291 ac_build_buffer_store_dword(&ctx->ac, buffer, vec1,
6292 stride - 4, byteoffset, tf_base,
6293 16 + tf_offset, 1, 0, true, false);
6294
6295 //store to offchip for TES to read - only if TES reads them
6296 if (ctx->options->key.tcs.tes_reads_tess_factors) {
6297 LLVMValueRef inner_vec, outer_vec, tf_outer_offset;
6298 LLVMValueRef tf_inner_offset;
6299 unsigned param_outer, param_inner;
6300
6301 param_outer = shader_io_get_unique_index(VARYING_SLOT_TESS_LEVEL_OUTER);
6302 tf_outer_offset = get_tcs_tes_buffer_address(ctx, NULL,
6303 LLVMConstInt(ctx->ac.i32, param_outer, 0));
6304
6305 outer_vec = ac_build_gather_values(&ctx->ac, outer,
6306 util_next_power_of_two(outer_comps));
6307
6308 ac_build_buffer_store_dword(&ctx->ac, ctx->hs_ring_tess_offchip, outer_vec,
6309 outer_comps, tf_outer_offset,
6310 ctx->oc_lds, 0, 1, 0, true, false);
6311 if (inner_comps) {
6312 param_inner = shader_io_get_unique_index(VARYING_SLOT_TESS_LEVEL_INNER);
6313 tf_inner_offset = get_tcs_tes_buffer_address(ctx, NULL,
6314 LLVMConstInt(ctx->ac.i32, param_inner, 0));
6315
6316 inner_vec = inner_comps == 1 ? inner[0] :
6317 ac_build_gather_values(&ctx->ac, inner, inner_comps);
6318 ac_build_buffer_store_dword(&ctx->ac, ctx->hs_ring_tess_offchip, inner_vec,
6319 inner_comps, tf_inner_offset,
6320 ctx->oc_lds, 0, 1, 0, true, false);
6321 }
6322 }
6323 ac_nir_build_endif(&if_ctx);
6324 }
6325
6326 static void
6327 handle_tcs_outputs_post(struct nir_to_llvm_context *ctx)
6328 {
6329 write_tess_factors(ctx);
6330 }
6331
6332 static bool
6333 si_export_mrt_color(struct nir_to_llvm_context *ctx,
6334 LLVMValueRef *color, unsigned param, bool is_last,
6335 struct ac_export_args *args)
6336 {
6337 /* Export */
6338 si_llvm_init_export_args(ctx, color, param,
6339 args);
6340
6341 if (is_last) {
6342 args->valid_mask = 1; /* whether the EXEC mask is valid */
6343 args->done = 1; /* DONE bit */
6344 } else if (!args->enabled_channels)
6345 return false; /* unnecessary NULL export */
6346
6347 return true;
6348 }
6349
6350 static void
6351 radv_export_mrt_z(struct nir_to_llvm_context *ctx,
6352 LLVMValueRef depth, LLVMValueRef stencil,
6353 LLVMValueRef samplemask)
6354 {
6355 struct ac_export_args args;
6356
6357 ac_export_mrt_z(&ctx->ac, depth, stencil, samplemask, &args);
6358
6359 ac_build_export(&ctx->ac, &args);
6360 }
6361
6362 static void
6363 handle_fs_outputs_post(struct nir_to_llvm_context *ctx)
6364 {
6365 unsigned index = 0;
6366 LLVMValueRef depth = NULL, stencil = NULL, samplemask = NULL;
6367 struct ac_export_args color_args[8];
6368
6369 for (unsigned i = 0; i < RADEON_LLVM_MAX_OUTPUTS; ++i) {
6370 LLVMValueRef values[4];
6371
6372 if (!(ctx->output_mask & (1ull << i)))
6373 continue;
6374
6375 if (i == FRAG_RESULT_DEPTH) {
6376 ctx->shader_info->fs.writes_z = true;
6377 depth = ac_to_float(&ctx->ac, LLVMBuildLoad(ctx->builder,
6378 ctx->nir->outputs[radeon_llvm_reg_index_soa(i, 0)], ""));
6379 } else if (i == FRAG_RESULT_STENCIL) {
6380 ctx->shader_info->fs.writes_stencil = true;
6381 stencil = ac_to_float(&ctx->ac, LLVMBuildLoad(ctx->builder,
6382 ctx->nir->outputs[radeon_llvm_reg_index_soa(i, 0)], ""));
6383 } else if (i == FRAG_RESULT_SAMPLE_MASK) {
6384 ctx->shader_info->fs.writes_sample_mask = true;
6385 samplemask = ac_to_float(&ctx->ac, LLVMBuildLoad(ctx->builder,
6386 ctx->nir->outputs[radeon_llvm_reg_index_soa(i, 0)], ""));
6387 } else {
6388 bool last = false;
6389 for (unsigned j = 0; j < 4; j++)
6390 values[j] = ac_to_float(&ctx->ac, LLVMBuildLoad(ctx->builder,
6391 ctx->nir->outputs[radeon_llvm_reg_index_soa(i, j)], ""));
6392
6393 if (!ctx->shader_info->fs.writes_z && !ctx->shader_info->fs.writes_stencil && !ctx->shader_info->fs.writes_sample_mask)
6394 last = ctx->output_mask <= ((1ull << (i + 1)) - 1);
6395
6396 bool ret = si_export_mrt_color(ctx, values, V_008DFC_SQ_EXP_MRT + (i - FRAG_RESULT_DATA0), last, &color_args[index]);
6397 if (ret)
6398 index++;
6399 }
6400 }
6401
6402 for (unsigned i = 0; i < index; i++)
6403 ac_build_export(&ctx->ac, &color_args[i]);
6404 if (depth || stencil || samplemask)
6405 radv_export_mrt_z(ctx, depth, stencil, samplemask);
6406 else if (!index) {
6407 si_export_mrt_color(ctx, NULL, V_008DFC_SQ_EXP_NULL, true, &color_args[0]);
6408 ac_build_export(&ctx->ac, &color_args[0]);
6409 }
6410
6411 ctx->shader_info->fs.output_mask = index ? ((1ull << index) - 1) : 0;
6412 }
6413
6414 static void
6415 emit_gs_epilogue(struct nir_to_llvm_context *ctx)
6416 {
6417 ac_build_sendmsg(&ctx->ac, AC_SENDMSG_GS_OP_NOP | AC_SENDMSG_GS_DONE, ctx->gs_wave_id);
6418 }
6419
6420 static void
6421 handle_shader_outputs_post(struct ac_shader_abi *abi, unsigned max_outputs,
6422 LLVMValueRef *addrs)
6423 {
6424 struct nir_to_llvm_context *ctx = nir_to_llvm_context_from_abi(abi);
6425
6426 switch (ctx->stage) {
6427 case MESA_SHADER_VERTEX:
6428 if (ctx->options->key.vs.as_ls)
6429 handle_ls_outputs_post(ctx);
6430 else if (ctx->options->key.vs.as_es)
6431 handle_es_outputs_post(ctx, &ctx->shader_info->vs.es_info);
6432 else
6433 handle_vs_outputs_post(ctx, ctx->options->key.vs.export_prim_id,
6434 &ctx->shader_info->vs.outinfo);
6435 break;
6436 case MESA_SHADER_FRAGMENT:
6437 handle_fs_outputs_post(ctx);
6438 break;
6439 case MESA_SHADER_GEOMETRY:
6440 emit_gs_epilogue(ctx);
6441 break;
6442 case MESA_SHADER_TESS_CTRL:
6443 handle_tcs_outputs_post(ctx);
6444 break;
6445 case MESA_SHADER_TESS_EVAL:
6446 if (ctx->options->key.tes.as_es)
6447 handle_es_outputs_post(ctx, &ctx->shader_info->tes.es_info);
6448 else
6449 handle_vs_outputs_post(ctx, ctx->options->key.tes.export_prim_id,
6450 &ctx->shader_info->tes.outinfo);
6451 break;
6452 default:
6453 break;
6454 }
6455 }
6456
6457 static void ac_llvm_finalize_module(struct nir_to_llvm_context * ctx)
6458 {
6459 LLVMPassManagerRef passmgr;
6460 /* Create the pass manager */
6461 passmgr = LLVMCreateFunctionPassManagerForModule(
6462 ctx->module);
6463
6464 /* This pass should eliminate all the load and store instructions */
6465 LLVMAddPromoteMemoryToRegisterPass(passmgr);
6466
6467 /* Add some optimization passes */
6468 LLVMAddScalarReplAggregatesPass(passmgr);
6469 LLVMAddLICMPass(passmgr);
6470 LLVMAddAggressiveDCEPass(passmgr);
6471 LLVMAddCFGSimplificationPass(passmgr);
6472 LLVMAddInstructionCombiningPass(passmgr);
6473
6474 /* Run the pass */
6475 LLVMInitializeFunctionPassManager(passmgr);
6476 LLVMRunFunctionPassManager(passmgr, ctx->main_function);
6477 LLVMFinalizeFunctionPassManager(passmgr);
6478
6479 LLVMDisposeBuilder(ctx->builder);
6480 LLVMDisposePassManager(passmgr);
6481 }
6482
6483 static void
6484 ac_nir_eliminate_const_vs_outputs(struct nir_to_llvm_context *ctx)
6485 {
6486 struct ac_vs_output_info *outinfo;
6487
6488 switch (ctx->stage) {
6489 case MESA_SHADER_FRAGMENT:
6490 case MESA_SHADER_COMPUTE:
6491 case MESA_SHADER_TESS_CTRL:
6492 case MESA_SHADER_GEOMETRY:
6493 return;
6494 case MESA_SHADER_VERTEX:
6495 if (ctx->options->key.vs.as_ls ||
6496 ctx->options->key.vs.as_es)
6497 return;
6498 outinfo = &ctx->shader_info->vs.outinfo;
6499 break;
6500 case MESA_SHADER_TESS_EVAL:
6501 if (ctx->options->key.vs.as_es)
6502 return;
6503 outinfo = &ctx->shader_info->tes.outinfo;
6504 break;
6505 default:
6506 unreachable("Unhandled shader type");
6507 }
6508
6509 ac_optimize_vs_outputs(&ctx->ac,
6510 ctx->main_function,
6511 outinfo->vs_output_param_offset,
6512 VARYING_SLOT_MAX,
6513 &outinfo->param_exports);
6514 }
6515
6516 static void
6517 ac_setup_rings(struct nir_to_llvm_context *ctx)
6518 {
6519 if ((ctx->stage == MESA_SHADER_VERTEX && ctx->options->key.vs.as_es) ||
6520 (ctx->stage == MESA_SHADER_TESS_EVAL && ctx->options->key.tes.as_es)) {
6521 ctx->esgs_ring = ac_build_load_to_sgpr(&ctx->ac, ctx->ring_offsets, LLVMConstInt(ctx->ac.i32, RING_ESGS_VS, false));
6522 }
6523
6524 if (ctx->is_gs_copy_shader) {
6525 ctx->gsvs_ring = ac_build_load_to_sgpr(&ctx->ac, ctx->ring_offsets, LLVMConstInt(ctx->ac.i32, RING_GSVS_VS, false));
6526 }
6527 if (ctx->stage == MESA_SHADER_GEOMETRY) {
6528 LLVMValueRef tmp;
6529 ctx->esgs_ring = ac_build_load_to_sgpr(&ctx->ac, ctx->ring_offsets, LLVMConstInt(ctx->ac.i32, RING_ESGS_GS, false));
6530 ctx->gsvs_ring = ac_build_load_to_sgpr(&ctx->ac, ctx->ring_offsets, LLVMConstInt(ctx->ac.i32, RING_GSVS_GS, false));
6531
6532 ctx->gsvs_ring = LLVMBuildBitCast(ctx->builder, ctx->gsvs_ring, ctx->ac.v4i32, "");
6533
6534 ctx->gsvs_ring = LLVMBuildInsertElement(ctx->builder, ctx->gsvs_ring, ctx->gsvs_num_entries, LLVMConstInt(ctx->ac.i32, 2, false), "");
6535 tmp = LLVMBuildExtractElement(ctx->builder, ctx->gsvs_ring, ctx->ac.i32_1, "");
6536 tmp = LLVMBuildOr(ctx->builder, tmp, ctx->gsvs_ring_stride, "");
6537 ctx->gsvs_ring = LLVMBuildInsertElement(ctx->builder, ctx->gsvs_ring, tmp, ctx->ac.i32_1, "");
6538 }
6539
6540 if (ctx->stage == MESA_SHADER_TESS_CTRL ||
6541 ctx->stage == MESA_SHADER_TESS_EVAL) {
6542 ctx->hs_ring_tess_offchip = ac_build_load_to_sgpr(&ctx->ac, ctx->ring_offsets, LLVMConstInt(ctx->ac.i32, RING_HS_TESS_OFFCHIP, false));
6543 ctx->hs_ring_tess_factor = ac_build_load_to_sgpr(&ctx->ac, ctx->ring_offsets, LLVMConstInt(ctx->ac.i32, RING_HS_TESS_FACTOR, false));
6544 }
6545 }
6546
6547 static unsigned
6548 ac_nir_get_max_workgroup_size(enum chip_class chip_class,
6549 const struct nir_shader *nir)
6550 {
6551 switch (nir->info.stage) {
6552 case MESA_SHADER_TESS_CTRL:
6553 return chip_class >= CIK ? 128 : 64;
6554 case MESA_SHADER_GEOMETRY:
6555 return chip_class >= GFX9 ? 128 : 64;
6556 case MESA_SHADER_COMPUTE:
6557 break;
6558 default:
6559 return 0;
6560 }
6561
6562 unsigned max_workgroup_size = nir->info.cs.local_size[0] *
6563 nir->info.cs.local_size[1] *
6564 nir->info.cs.local_size[2];
6565 return max_workgroup_size;
6566 }
6567
6568 /* Fixup the HW not emitting the TCS regs if there are no HS threads. */
6569 static void ac_nir_fixup_ls_hs_input_vgprs(struct nir_to_llvm_context *ctx)
6570 {
6571 LLVMValueRef count = ac_build_bfe(&ctx->ac, ctx->merged_wave_info,
6572 LLVMConstInt(ctx->ac.i32, 8, false),
6573 LLVMConstInt(ctx->ac.i32, 8, false), false);
6574 LLVMValueRef hs_empty = LLVMBuildICmp(ctx->ac.builder, LLVMIntEQ, count,
6575 ctx->ac.i32_0, "");
6576 ctx->abi.instance_id = LLVMBuildSelect(ctx->ac.builder, hs_empty, ctx->rel_auto_id, ctx->abi.instance_id, "");
6577 ctx->vs_prim_id = LLVMBuildSelect(ctx->ac.builder, hs_empty, ctx->abi.vertex_id, ctx->vs_prim_id, "");
6578 ctx->rel_auto_id = LLVMBuildSelect(ctx->ac.builder, hs_empty, ctx->abi.tcs_rel_ids, ctx->rel_auto_id, "");
6579 ctx->abi.vertex_id = LLVMBuildSelect(ctx->ac.builder, hs_empty, ctx->abi.tcs_patch_id, ctx->abi.vertex_id, "");
6580 }
6581
6582 static void prepare_gs_input_vgprs(struct nir_to_llvm_context *ctx)
6583 {
6584 for(int i = 5; i >= 0; --i) {
6585 ctx->gs_vtx_offset[i] = ac_build_bfe(&ctx->ac, ctx->gs_vtx_offset[i & ~1],
6586 LLVMConstInt(ctx->ac.i32, (i & 1) * 16, false),
6587 LLVMConstInt(ctx->ac.i32, 16, false), false);
6588 }
6589
6590 ctx->gs_wave_id = ac_build_bfe(&ctx->ac, ctx->merged_wave_info,
6591 LLVMConstInt(ctx->ac.i32, 16, false),
6592 LLVMConstInt(ctx->ac.i32, 8, false), false);
6593 }
6594
6595 void ac_nir_translate(struct ac_llvm_context *ac, struct ac_shader_abi *abi,
6596 struct nir_shader *nir, struct nir_to_llvm_context *nctx)
6597 {
6598 struct ac_nir_context ctx = {};
6599 struct nir_function *func;
6600
6601 ctx.ac = *ac;
6602 ctx.abi = abi;
6603
6604 ctx.nctx = nctx;
6605 if (nctx)
6606 nctx->nir = &ctx;
6607
6608 ctx.stage = nir->info.stage;
6609
6610 ctx.main_function = LLVMGetBasicBlockParent(LLVMGetInsertBlock(ctx.ac.builder));
6611
6612 nir_foreach_variable(variable, &nir->outputs)
6613 handle_shader_output_decl(&ctx, nir, variable);
6614
6615 ctx.defs = _mesa_hash_table_create(NULL, _mesa_hash_pointer,
6616 _mesa_key_pointer_equal);
6617 ctx.phis = _mesa_hash_table_create(NULL, _mesa_hash_pointer,
6618 _mesa_key_pointer_equal);
6619 ctx.vars = _mesa_hash_table_create(NULL, _mesa_hash_pointer,
6620 _mesa_key_pointer_equal);
6621
6622 func = (struct nir_function *)exec_list_get_head(&nir->functions);
6623
6624 setup_locals(&ctx, func);
6625
6626 if (nir->info.stage == MESA_SHADER_COMPUTE)
6627 setup_shared(&ctx, nir);
6628
6629 visit_cf_list(&ctx, &func->impl->body);
6630 phi_post_pass(&ctx);
6631
6632 ctx.abi->emit_outputs(ctx.abi, RADEON_LLVM_MAX_OUTPUTS,
6633 ctx.outputs);
6634
6635 free(ctx.locals);
6636 ralloc_free(ctx.defs);
6637 ralloc_free(ctx.phis);
6638 ralloc_free(ctx.vars);
6639
6640 if (nctx)
6641 nctx->nir = NULL;
6642 }
6643
6644 static
6645 LLVMModuleRef ac_translate_nir_to_llvm(LLVMTargetMachineRef tm,
6646 struct nir_shader *const *shaders,
6647 int shader_count,
6648 struct ac_shader_variant_info *shader_info,
6649 const struct ac_nir_compiler_options *options)
6650 {
6651 struct nir_to_llvm_context ctx = {0};
6652 unsigned i;
6653 ctx.options = options;
6654 ctx.shader_info = shader_info;
6655 ctx.context = LLVMContextCreate();
6656 ctx.module = LLVMModuleCreateWithNameInContext("shader", ctx.context);
6657
6658 ac_llvm_context_init(&ctx.ac, ctx.context, options->chip_class,
6659 options->family);
6660 ctx.ac.module = ctx.module;
6661 LLVMSetTarget(ctx.module, options->supports_spill ? "amdgcn-mesa-mesa3d" : "amdgcn--");
6662
6663 LLVMTargetDataRef data_layout = LLVMCreateTargetDataLayout(tm);
6664 char *data_layout_str = LLVMCopyStringRepOfTargetData(data_layout);
6665 LLVMSetDataLayout(ctx.module, data_layout_str);
6666 LLVMDisposeTargetData(data_layout);
6667 LLVMDisposeMessage(data_layout_str);
6668
6669 ctx.builder = LLVMCreateBuilderInContext(ctx.context);
6670 ctx.ac.builder = ctx.builder;
6671
6672 memset(shader_info, 0, sizeof(*shader_info));
6673
6674 for(int i = 0; i < shader_count; ++i)
6675 ac_nir_shader_info_pass(shaders[i], options, &shader_info->info);
6676
6677 for (i = 0; i < AC_UD_MAX_SETS; i++)
6678 shader_info->user_sgprs_locs.descriptor_sets[i].sgpr_idx = -1;
6679 for (i = 0; i < AC_UD_MAX_UD; i++)
6680 shader_info->user_sgprs_locs.shader_data[i].sgpr_idx = -1;
6681
6682 ctx.max_workgroup_size = 0;
6683 for (int i = 0; i < shader_count; ++i) {
6684 ctx.max_workgroup_size = MAX2(ctx.max_workgroup_size,
6685 ac_nir_get_max_workgroup_size(ctx.options->chip_class,
6686 shaders[i]));
6687 }
6688
6689 create_function(&ctx, shaders[shader_count - 1]->info.stage, shader_count >= 2,
6690 shader_count >= 2 ? shaders[shader_count - 2]->info.stage : MESA_SHADER_VERTEX);
6691
6692 ctx.abi.inputs = &ctx.inputs[0];
6693 ctx.abi.emit_outputs = handle_shader_outputs_post;
6694 ctx.abi.emit_vertex = visit_emit_vertex;
6695 ctx.abi.load_ubo = radv_load_ubo;
6696 ctx.abi.load_ssbo = radv_load_ssbo;
6697 ctx.abi.load_sampler_desc = radv_get_sampler_desc;
6698 ctx.abi.clamp_shadow_reference = false;
6699
6700 if (shader_count >= 2)
6701 ac_init_exec_full_mask(&ctx.ac);
6702
6703 if (ctx.ac.chip_class == GFX9 &&
6704 shaders[shader_count - 1]->info.stage == MESA_SHADER_TESS_CTRL)
6705 ac_nir_fixup_ls_hs_input_vgprs(&ctx);
6706
6707 for(int i = 0; i < shader_count; ++i) {
6708 ctx.stage = shaders[i]->info.stage;
6709 ctx.output_mask = 0;
6710 ctx.tess_outputs_written = 0;
6711 ctx.num_output_clips = shaders[i]->info.clip_distance_array_size;
6712 ctx.num_output_culls = shaders[i]->info.cull_distance_array_size;
6713
6714 if (shaders[i]->info.stage == MESA_SHADER_GEOMETRY) {
6715 ctx.gs_next_vertex = ac_build_alloca(&ctx.ac, ctx.ac.i32, "gs_next_vertex");
6716 ctx.gs_max_out_vertices = shaders[i]->info.gs.vertices_out;
6717 ctx.abi.load_inputs = load_gs_input;
6718 } else if (shaders[i]->info.stage == MESA_SHADER_TESS_CTRL) {
6719 ctx.tcs_outputs_read = shaders[i]->info.outputs_read;
6720 ctx.tcs_patch_outputs_read = shaders[i]->info.patch_outputs_read;
6721 ctx.abi.load_tess_inputs = load_tcs_input;
6722 ctx.abi.load_patch_vertices_in = load_patch_vertices_in;
6723 ctx.abi.store_tcs_outputs = store_tcs_output;
6724 } else if (shaders[i]->info.stage == MESA_SHADER_TESS_EVAL) {
6725 ctx.tes_primitive_mode = shaders[i]->info.tess.primitive_mode;
6726 ctx.abi.load_tess_inputs = load_tes_input;
6727 ctx.abi.load_tess_coord = load_tess_coord;
6728 ctx.abi.load_patch_vertices_in = load_patch_vertices_in;
6729 } else if (shaders[i]->info.stage == MESA_SHADER_VERTEX) {
6730 if (shader_info->info.vs.needs_instance_id) {
6731 if (ctx.ac.chip_class == GFX9 &&
6732 shaders[shader_count - 1]->info.stage == MESA_SHADER_TESS_CTRL) {
6733 ctx.shader_info->vs.vgpr_comp_cnt =
6734 MAX2(2, ctx.shader_info->vs.vgpr_comp_cnt);
6735 } else {
6736 ctx.shader_info->vs.vgpr_comp_cnt =
6737 MAX2(1, ctx.shader_info->vs.vgpr_comp_cnt);
6738 }
6739 }
6740 } else if (shaders[i]->info.stage == MESA_SHADER_FRAGMENT) {
6741 shader_info->fs.can_discard = shaders[i]->info.fs.uses_discard;
6742 }
6743
6744 if (i)
6745 emit_barrier(&ctx.ac, ctx.stage);
6746
6747 ac_setup_rings(&ctx);
6748
6749 LLVMBasicBlockRef merge_block;
6750 if (shader_count >= 2) {
6751 LLVMValueRef fn = LLVMGetBasicBlockParent(LLVMGetInsertBlock(ctx.ac.builder));
6752 LLVMBasicBlockRef then_block = LLVMAppendBasicBlockInContext(ctx.ac.context, fn, "");
6753 merge_block = LLVMAppendBasicBlockInContext(ctx.ac.context, fn, "");
6754
6755 LLVMValueRef count = ac_build_bfe(&ctx.ac, ctx.merged_wave_info,
6756 LLVMConstInt(ctx.ac.i32, 8 * i, false),
6757 LLVMConstInt(ctx.ac.i32, 8, false), false);
6758 LLVMValueRef thread_id = ac_get_thread_id(&ctx.ac);
6759 LLVMValueRef cond = LLVMBuildICmp(ctx.ac.builder, LLVMIntULT,
6760 thread_id, count, "");
6761 LLVMBuildCondBr(ctx.ac.builder, cond, then_block, merge_block);
6762
6763 LLVMPositionBuilderAtEnd(ctx.ac.builder, then_block);
6764 }
6765
6766 if (shaders[i]->info.stage == MESA_SHADER_FRAGMENT)
6767 handle_fs_inputs(&ctx, shaders[i]);
6768 else if(shaders[i]->info.stage == MESA_SHADER_VERTEX)
6769 handle_vs_inputs(&ctx, shaders[i]);
6770 else if(shader_count >= 2 && shaders[i]->info.stage == MESA_SHADER_GEOMETRY)
6771 prepare_gs_input_vgprs(&ctx);
6772
6773 nir_foreach_variable(variable, &shaders[i]->outputs)
6774 scan_shader_output_decl(&ctx, variable, shaders[i], shaders[i]->info.stage);
6775
6776 ac_nir_translate(&ctx.ac, &ctx.abi, shaders[i], &ctx);
6777
6778 if (shader_count >= 2) {
6779 LLVMBuildBr(ctx.ac.builder, merge_block);
6780 LLVMPositionBuilderAtEnd(ctx.ac.builder, merge_block);
6781 }
6782
6783 if (shaders[i]->info.stage == MESA_SHADER_GEOMETRY) {
6784 unsigned addclip = shaders[i]->info.clip_distance_array_size +
6785 shaders[i]->info.cull_distance_array_size > 4;
6786 shader_info->gs.gsvs_vertex_size = (util_bitcount64(ctx.output_mask) + addclip) * 16;
6787 shader_info->gs.max_gsvs_emit_size = shader_info->gs.gsvs_vertex_size *
6788 shaders[i]->info.gs.vertices_out;
6789 } else if (shaders[i]->info.stage == MESA_SHADER_TESS_CTRL) {
6790 shader_info->tcs.outputs_written = ctx.tess_outputs_written;
6791 shader_info->tcs.patch_outputs_written = ctx.tess_patch_outputs_written;
6792 } else if (shaders[i]->info.stage == MESA_SHADER_VERTEX && ctx.options->key.vs.as_ls) {
6793 shader_info->vs.outputs_written = ctx.tess_outputs_written;
6794 }
6795 }
6796
6797 LLVMBuildRetVoid(ctx.builder);
6798
6799 ac_llvm_finalize_module(&ctx);
6800
6801 if (shader_count == 1)
6802 ac_nir_eliminate_const_vs_outputs(&ctx);
6803
6804 return ctx.module;
6805 }
6806
6807 static void ac_diagnostic_handler(LLVMDiagnosticInfoRef di, void *context)
6808 {
6809 unsigned *retval = (unsigned *)context;
6810 LLVMDiagnosticSeverity severity = LLVMGetDiagInfoSeverity(di);
6811 char *description = LLVMGetDiagInfoDescription(di);
6812
6813 if (severity == LLVMDSError) {
6814 *retval = 1;
6815 fprintf(stderr, "LLVM triggered Diagnostic Handler: %s\n",
6816 description);
6817 }
6818
6819 LLVMDisposeMessage(description);
6820 }
6821
6822 static unsigned ac_llvm_compile(LLVMModuleRef M,
6823 struct ac_shader_binary *binary,
6824 LLVMTargetMachineRef tm)
6825 {
6826 unsigned retval = 0;
6827 char *err;
6828 LLVMContextRef llvm_ctx;
6829 LLVMMemoryBufferRef out_buffer;
6830 unsigned buffer_size;
6831 const char *buffer_data;
6832 LLVMBool mem_err;
6833
6834 /* Setup Diagnostic Handler*/
6835 llvm_ctx = LLVMGetModuleContext(M);
6836
6837 LLVMContextSetDiagnosticHandler(llvm_ctx, ac_diagnostic_handler,
6838 &retval);
6839
6840 /* Compile IR*/
6841 mem_err = LLVMTargetMachineEmitToMemoryBuffer(tm, M, LLVMObjectFile,
6842 &err, &out_buffer);
6843
6844 /* Process Errors/Warnings */
6845 if (mem_err) {
6846 fprintf(stderr, "%s: %s", __FUNCTION__, err);
6847 free(err);
6848 retval = 1;
6849 goto out;
6850 }
6851
6852 /* Extract Shader Code*/
6853 buffer_size = LLVMGetBufferSize(out_buffer);
6854 buffer_data = LLVMGetBufferStart(out_buffer);
6855
6856 ac_elf_read(buffer_data, buffer_size, binary);
6857
6858 /* Clean up */
6859 LLVMDisposeMemoryBuffer(out_buffer);
6860
6861 out:
6862 return retval;
6863 }
6864
6865 static void ac_compile_llvm_module(LLVMTargetMachineRef tm,
6866 LLVMModuleRef llvm_module,
6867 struct ac_shader_binary *binary,
6868 struct ac_shader_config *config,
6869 struct ac_shader_variant_info *shader_info,
6870 gl_shader_stage stage,
6871 bool dump_shader, bool supports_spill)
6872 {
6873 if (dump_shader)
6874 ac_dump_module(llvm_module);
6875
6876 memset(binary, 0, sizeof(*binary));
6877 int v = ac_llvm_compile(llvm_module, binary, tm);
6878 if (v) {
6879 fprintf(stderr, "compile failed\n");
6880 }
6881
6882 if (dump_shader)
6883 fprintf(stderr, "disasm:\n%s\n", binary->disasm_string);
6884
6885 ac_shader_binary_read_config(binary, config, 0, supports_spill);
6886
6887 LLVMContextRef ctx = LLVMGetModuleContext(llvm_module);
6888 LLVMDisposeModule(llvm_module);
6889 LLVMContextDispose(ctx);
6890
6891 if (stage == MESA_SHADER_FRAGMENT) {
6892 shader_info->num_input_vgprs = 0;
6893 if (G_0286CC_PERSP_SAMPLE_ENA(config->spi_ps_input_addr))
6894 shader_info->num_input_vgprs += 2;
6895 if (G_0286CC_PERSP_CENTER_ENA(config->spi_ps_input_addr))
6896 shader_info->num_input_vgprs += 2;
6897 if (G_0286CC_PERSP_CENTROID_ENA(config->spi_ps_input_addr))
6898 shader_info->num_input_vgprs += 2;
6899 if (G_0286CC_PERSP_PULL_MODEL_ENA(config->spi_ps_input_addr))
6900 shader_info->num_input_vgprs += 3;
6901 if (G_0286CC_LINEAR_SAMPLE_ENA(config->spi_ps_input_addr))
6902 shader_info->num_input_vgprs += 2;
6903 if (G_0286CC_LINEAR_CENTER_ENA(config->spi_ps_input_addr))
6904 shader_info->num_input_vgprs += 2;
6905 if (G_0286CC_LINEAR_CENTROID_ENA(config->spi_ps_input_addr))
6906 shader_info->num_input_vgprs += 2;
6907 if (G_0286CC_LINE_STIPPLE_TEX_ENA(config->spi_ps_input_addr))
6908 shader_info->num_input_vgprs += 1;
6909 if (G_0286CC_POS_X_FLOAT_ENA(config->spi_ps_input_addr))
6910 shader_info->num_input_vgprs += 1;
6911 if (G_0286CC_POS_Y_FLOAT_ENA(config->spi_ps_input_addr))
6912 shader_info->num_input_vgprs += 1;
6913 if (G_0286CC_POS_Z_FLOAT_ENA(config->spi_ps_input_addr))
6914 shader_info->num_input_vgprs += 1;
6915 if (G_0286CC_POS_W_FLOAT_ENA(config->spi_ps_input_addr))
6916 shader_info->num_input_vgprs += 1;
6917 if (G_0286CC_FRONT_FACE_ENA(config->spi_ps_input_addr))
6918 shader_info->num_input_vgprs += 1;
6919 if (G_0286CC_ANCILLARY_ENA(config->spi_ps_input_addr))
6920 shader_info->num_input_vgprs += 1;
6921 if (G_0286CC_SAMPLE_COVERAGE_ENA(config->spi_ps_input_addr))
6922 shader_info->num_input_vgprs += 1;
6923 if (G_0286CC_POS_FIXED_PT_ENA(config->spi_ps_input_addr))
6924 shader_info->num_input_vgprs += 1;
6925 }
6926 config->num_vgprs = MAX2(config->num_vgprs, shader_info->num_input_vgprs);
6927
6928 /* +3 for scratch wave offset and VCC */
6929 config->num_sgprs = MAX2(config->num_sgprs,
6930 shader_info->num_input_sgprs + 3);
6931
6932 /* Enable 64-bit and 16-bit denormals, because there is no performance
6933 * cost.
6934 *
6935 * If denormals are enabled, all floating-point output modifiers are
6936 * ignored.
6937 *
6938 * Don't enable denormals for 32-bit floats, because:
6939 * - Floating-point output modifiers would be ignored by the hw.
6940 * - Some opcodes don't support denormals, such as v_mad_f32. We would
6941 * have to stop using those.
6942 * - SI & CI would be very slow.
6943 */
6944 config->float_mode |= V_00B028_FP_64_DENORMS;
6945 }
6946
6947 static void
6948 ac_fill_shader_info(struct ac_shader_variant_info *shader_info, struct nir_shader *nir, const struct ac_nir_compiler_options *options)
6949 {
6950 switch (nir->info.stage) {
6951 case MESA_SHADER_COMPUTE:
6952 for (int i = 0; i < 3; ++i)
6953 shader_info->cs.block_size[i] = nir->info.cs.local_size[i];
6954 break;
6955 case MESA_SHADER_FRAGMENT:
6956 shader_info->fs.early_fragment_test = nir->info.fs.early_fragment_tests;
6957 break;
6958 case MESA_SHADER_GEOMETRY:
6959 shader_info->gs.vertices_in = nir->info.gs.vertices_in;
6960 shader_info->gs.vertices_out = nir->info.gs.vertices_out;
6961 shader_info->gs.output_prim = nir->info.gs.output_primitive;
6962 shader_info->gs.invocations = nir->info.gs.invocations;
6963 break;
6964 case MESA_SHADER_TESS_EVAL:
6965 shader_info->tes.primitive_mode = nir->info.tess.primitive_mode;
6966 shader_info->tes.spacing = nir->info.tess.spacing;
6967 shader_info->tes.ccw = nir->info.tess.ccw;
6968 shader_info->tes.point_mode = nir->info.tess.point_mode;
6969 shader_info->tes.as_es = options->key.tes.as_es;
6970 break;
6971 case MESA_SHADER_TESS_CTRL:
6972 shader_info->tcs.tcs_vertices_out = nir->info.tess.tcs_vertices_out;
6973 break;
6974 case MESA_SHADER_VERTEX:
6975 shader_info->vs.as_es = options->key.vs.as_es;
6976 shader_info->vs.as_ls = options->key.vs.as_ls;
6977 /* in LS mode we need at least 1, invocation id needs 2, handled elsewhere */
6978 if (options->key.vs.as_ls)
6979 shader_info->vs.vgpr_comp_cnt = MAX2(1, shader_info->vs.vgpr_comp_cnt);
6980 break;
6981 default:
6982 break;
6983 }
6984 }
6985
6986 void ac_compile_nir_shader(LLVMTargetMachineRef tm,
6987 struct ac_shader_binary *binary,
6988 struct ac_shader_config *config,
6989 struct ac_shader_variant_info *shader_info,
6990 struct nir_shader *const *nir,
6991 int nir_count,
6992 const struct ac_nir_compiler_options *options,
6993 bool dump_shader)
6994 {
6995
6996 LLVMModuleRef llvm_module = ac_translate_nir_to_llvm(tm, nir, nir_count, shader_info,
6997 options);
6998
6999 ac_compile_llvm_module(tm, llvm_module, binary, config, shader_info, nir[0]->info.stage, dump_shader, options->supports_spill);
7000 for (int i = 0; i < nir_count; ++i)
7001 ac_fill_shader_info(shader_info, nir[i], options);
7002
7003 /* Determine the ES type (VS or TES) for the GS on GFX9. */
7004 if (options->chip_class == GFX9) {
7005 if (nir_count == 2 &&
7006 nir[1]->info.stage == MESA_SHADER_GEOMETRY) {
7007 shader_info->gs.es_type = nir[0]->info.stage;
7008 }
7009 }
7010 }
7011
7012 static void
7013 ac_gs_copy_shader_emit(struct nir_to_llvm_context *ctx)
7014 {
7015 LLVMValueRef args[9];
7016 args[0] = ctx->gsvs_ring;
7017 args[1] = LLVMBuildMul(ctx->builder, ctx->abi.vertex_id, LLVMConstInt(ctx->ac.i32, 4, false), "");
7018 args[3] = ctx->ac.i32_0;
7019 args[4] = ctx->ac.i32_1; /* OFFEN */
7020 args[5] = ctx->ac.i32_0; /* IDXEN */
7021 args[6] = ctx->ac.i32_1; /* GLC */
7022 args[7] = ctx->ac.i32_1; /* SLC */
7023 args[8] = ctx->ac.i32_0; /* TFE */
7024
7025 int idx = 0;
7026
7027 for (unsigned i = 0; i < RADEON_LLVM_MAX_OUTPUTS; ++i) {
7028 int length = 4;
7029 int slot = idx;
7030 int slot_inc = 1;
7031 if (!(ctx->output_mask & (1ull << i)))
7032 continue;
7033
7034 if (i == VARYING_SLOT_CLIP_DIST0) {
7035 /* unpack clip and cull from a single set of slots */
7036 length = ctx->num_output_clips + ctx->num_output_culls;
7037 if (length > 4)
7038 slot_inc = 2;
7039 }
7040
7041 for (unsigned j = 0; j < length; j++) {
7042 LLVMValueRef value;
7043 args[2] = LLVMConstInt(ctx->ac.i32,
7044 (slot * 4 + j) *
7045 ctx->gs_max_out_vertices * 16 * 4, false);
7046
7047 value = ac_build_intrinsic(&ctx->ac,
7048 "llvm.SI.buffer.load.dword.i32.i32",
7049 ctx->ac.i32, args, 9,
7050 AC_FUNC_ATTR_READONLY |
7051 AC_FUNC_ATTR_LEGACY);
7052
7053 LLVMBuildStore(ctx->builder,
7054 ac_to_float(&ctx->ac, value), ctx->nir->outputs[radeon_llvm_reg_index_soa(i, j)]);
7055 }
7056 idx += slot_inc;
7057 }
7058 handle_vs_outputs_post(ctx, false, &ctx->shader_info->vs.outinfo);
7059 }
7060
7061 void ac_create_gs_copy_shader(LLVMTargetMachineRef tm,
7062 struct nir_shader *geom_shader,
7063 struct ac_shader_binary *binary,
7064 struct ac_shader_config *config,
7065 struct ac_shader_variant_info *shader_info,
7066 const struct ac_nir_compiler_options *options,
7067 bool dump_shader)
7068 {
7069 struct nir_to_llvm_context ctx = {0};
7070 ctx.context = LLVMContextCreate();
7071 ctx.module = LLVMModuleCreateWithNameInContext("shader", ctx.context);
7072 ctx.options = options;
7073 ctx.shader_info = shader_info;
7074
7075 ac_llvm_context_init(&ctx.ac, ctx.context, options->chip_class,
7076 options->family);
7077 ctx.ac.module = ctx.module;
7078
7079 ctx.is_gs_copy_shader = true;
7080 LLVMSetTarget(ctx.module, "amdgcn--");
7081
7082 ctx.builder = LLVMCreateBuilderInContext(ctx.context);
7083 ctx.ac.builder = ctx.builder;
7084 ctx.stage = MESA_SHADER_VERTEX;
7085
7086 create_function(&ctx, MESA_SHADER_VERTEX, false, MESA_SHADER_VERTEX);
7087
7088 ctx.gs_max_out_vertices = geom_shader->info.gs.vertices_out;
7089 ac_setup_rings(&ctx);
7090
7091 ctx.num_output_clips = geom_shader->info.clip_distance_array_size;
7092 ctx.num_output_culls = geom_shader->info.cull_distance_array_size;
7093
7094 struct ac_nir_context nir_ctx = {};
7095 nir_ctx.ac = ctx.ac;
7096 nir_ctx.abi = &ctx.abi;
7097
7098 nir_ctx.nctx = &ctx;
7099 ctx.nir = &nir_ctx;
7100
7101 nir_foreach_variable(variable, &geom_shader->outputs) {
7102 scan_shader_output_decl(&ctx, variable, geom_shader, MESA_SHADER_VERTEX);
7103 handle_shader_output_decl(&nir_ctx, geom_shader, variable);
7104 }
7105
7106 ac_gs_copy_shader_emit(&ctx);
7107
7108 ctx.nir = NULL;
7109
7110 LLVMBuildRetVoid(ctx.builder);
7111
7112 ac_llvm_finalize_module(&ctx);
7113
7114 ac_compile_llvm_module(tm, ctx.module, binary, config, shader_info,
7115 MESA_SHADER_VERTEX,
7116 dump_shader, options->supports_spill);
7117 }