ac/nir: split scanning outputs from setting up output allocas
[mesa.git] / src / amd / common / ac_nir_to_llvm.c
1 /*
2 * Copyright © 2016 Bas Nieuwenhuizen
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21 * IN THE SOFTWARE.
22 */
23
24 #include "ac_nir_to_llvm.h"
25 #include "ac_llvm_build.h"
26 #include "ac_llvm_util.h"
27 #include "ac_binary.h"
28 #include "sid.h"
29 #include "nir/nir.h"
30 #include "../vulkan/radv_descriptor_set.h"
31 #include "util/bitscan.h"
32 #include <llvm-c/Transforms/Scalar.h>
33 #include "ac_shader_abi.h"
34 #include "ac_shader_info.h"
35 #include "ac_exp_param.h"
36
37 enum radeon_llvm_calling_convention {
38 RADEON_LLVM_AMDGPU_VS = 87,
39 RADEON_LLVM_AMDGPU_GS = 88,
40 RADEON_LLVM_AMDGPU_PS = 89,
41 RADEON_LLVM_AMDGPU_CS = 90,
42 };
43
44 #define CONST_ADDR_SPACE 2
45 #define LOCAL_ADDR_SPACE 3
46
47 #define RADEON_LLVM_MAX_INPUTS (VARYING_SLOT_VAR31 + 1)
48 #define RADEON_LLVM_MAX_OUTPUTS (VARYING_SLOT_VAR31 + 1)
49
50 enum desc_type {
51 DESC_IMAGE,
52 DESC_FMASK,
53 DESC_SAMPLER,
54 DESC_BUFFER,
55 };
56
57 struct nir_to_llvm_context;
58
59 struct ac_nir_context {
60 struct ac_llvm_context ac;
61 struct ac_shader_abi *abi;
62
63 gl_shader_stage stage;
64
65 struct hash_table *defs;
66 struct hash_table *phis;
67 struct hash_table *vars;
68
69 LLVMBasicBlockRef continue_block;
70 LLVMBasicBlockRef break_block;
71
72 LLVMValueRef outputs[RADEON_LLVM_MAX_OUTPUTS * 4];
73
74 int num_locals;
75 LLVMValueRef *locals;
76
77 struct nir_to_llvm_context *nctx; /* TODO get rid of this */
78 };
79
80 struct nir_to_llvm_context {
81 struct ac_llvm_context ac;
82 const struct ac_nir_compiler_options *options;
83 struct ac_shader_variant_info *shader_info;
84 struct ac_shader_abi abi;
85 struct ac_nir_context *nir;
86
87 unsigned max_workgroup_size;
88 LLVMContextRef context;
89 LLVMModuleRef module;
90 LLVMBuilderRef builder;
91 LLVMValueRef main_function;
92
93 struct hash_table *defs;
94 struct hash_table *phis;
95
96 LLVMValueRef descriptor_sets[AC_UD_MAX_SETS];
97 LLVMValueRef ring_offsets;
98 LLVMValueRef push_constants;
99 LLVMValueRef num_work_groups;
100 LLVMValueRef workgroup_ids;
101 LLVMValueRef local_invocation_ids;
102 LLVMValueRef tg_size;
103
104 LLVMValueRef vertex_buffers;
105 LLVMValueRef rel_auto_id;
106 LLVMValueRef vs_prim_id;
107 LLVMValueRef ls_out_layout;
108 LLVMValueRef es2gs_offset;
109
110 LLVMValueRef tcs_offchip_layout;
111 LLVMValueRef tcs_out_offsets;
112 LLVMValueRef tcs_out_layout;
113 LLVMValueRef tcs_in_layout;
114 LLVMValueRef oc_lds;
115 LLVMValueRef tess_factor_offset;
116 LLVMValueRef tcs_patch_id;
117 LLVMValueRef tcs_rel_ids;
118 LLVMValueRef tes_rel_patch_id;
119 LLVMValueRef tes_patch_id;
120 LLVMValueRef tes_u;
121 LLVMValueRef tes_v;
122
123 LLVMValueRef gsvs_ring_stride;
124 LLVMValueRef gsvs_num_entries;
125 LLVMValueRef gs2vs_offset;
126 LLVMValueRef gs_wave_id;
127 LLVMValueRef gs_vtx_offset[6];
128 LLVMValueRef gs_prim_id, gs_invocation_id;
129
130 LLVMValueRef esgs_ring;
131 LLVMValueRef gsvs_ring;
132 LLVMValueRef hs_ring_tess_offchip;
133 LLVMValueRef hs_ring_tess_factor;
134
135 LLVMValueRef prim_mask;
136 LLVMValueRef sample_pos_offset;
137 LLVMValueRef persp_sample, persp_center, persp_centroid;
138 LLVMValueRef linear_sample, linear_center, linear_centroid;
139 LLVMValueRef front_face;
140 LLVMValueRef ancillary;
141 LLVMValueRef sample_coverage;
142 LLVMValueRef frag_pos[4];
143
144 LLVMTypeRef i1;
145 LLVMTypeRef i8;
146 LLVMTypeRef i16;
147 LLVMTypeRef i32;
148 LLVMTypeRef i64;
149 LLVMTypeRef v2i32;
150 LLVMTypeRef v3i32;
151 LLVMTypeRef v4i32;
152 LLVMTypeRef v8i32;
153 LLVMTypeRef f64;
154 LLVMTypeRef f32;
155 LLVMTypeRef f16;
156 LLVMTypeRef v2f32;
157 LLVMTypeRef v4f32;
158 LLVMTypeRef voidt;
159
160 LLVMValueRef i1true;
161 LLVMValueRef i1false;
162 LLVMValueRef i32zero;
163 LLVMValueRef i32one;
164 LLVMValueRef f32zero;
165 LLVMValueRef f32one;
166 LLVMValueRef v4f32empty;
167
168 unsigned uniform_md_kind;
169 LLVMValueRef empty_md;
170 gl_shader_stage stage;
171
172 LLVMValueRef lds;
173 LLVMValueRef inputs[RADEON_LLVM_MAX_INPUTS * 4];
174
175 uint64_t input_mask;
176 uint64_t output_mask;
177 uint8_t num_output_clips;
178 uint8_t num_output_culls;
179
180 bool has_ds_bpermute;
181
182 bool is_gs_copy_shader;
183 LLVMValueRef gs_next_vertex;
184 unsigned gs_max_out_vertices;
185
186 unsigned tes_primitive_mode;
187 uint64_t tess_outputs_written;
188 uint64_t tess_patch_outputs_written;
189 };
190
191 static inline struct nir_to_llvm_context *
192 nir_to_llvm_context_from_abi(struct ac_shader_abi *abi)
193 {
194 struct nir_to_llvm_context *ctx = NULL;
195 return container_of(abi, ctx, abi);
196 }
197
198 static LLVMValueRef get_sampler_desc(struct nir_to_llvm_context *ctx,
199 const nir_deref_var *deref,
200 enum desc_type desc_type);
201 static unsigned radeon_llvm_reg_index_soa(unsigned index, unsigned chan)
202 {
203 return (index * 4) + chan;
204 }
205
206 static unsigned shader_io_get_unique_index(gl_varying_slot slot)
207 {
208 /* handle patch indices separate */
209 if (slot == VARYING_SLOT_TESS_LEVEL_OUTER)
210 return 0;
211 if (slot == VARYING_SLOT_TESS_LEVEL_INNER)
212 return 1;
213 if (slot >= VARYING_SLOT_PATCH0 && slot <= VARYING_SLOT_TESS_MAX)
214 return 2 + (slot - VARYING_SLOT_PATCH0);
215
216 if (slot == VARYING_SLOT_POS)
217 return 0;
218 if (slot == VARYING_SLOT_PSIZ)
219 return 1;
220 if (slot == VARYING_SLOT_CLIP_DIST0)
221 return 2;
222 /* 3 is reserved for clip dist as well */
223 if (slot >= VARYING_SLOT_VAR0 && slot <= VARYING_SLOT_VAR31)
224 return 4 + (slot - VARYING_SLOT_VAR0);
225 unreachable("illegal slot in get unique index\n");
226 }
227
228 static unsigned llvm_get_type_size(LLVMTypeRef type)
229 {
230 LLVMTypeKind kind = LLVMGetTypeKind(type);
231
232 switch (kind) {
233 case LLVMIntegerTypeKind:
234 return LLVMGetIntTypeWidth(type) / 8;
235 case LLVMFloatTypeKind:
236 return 4;
237 case LLVMPointerTypeKind:
238 return 8;
239 case LLVMVectorTypeKind:
240 return LLVMGetVectorSize(type) *
241 llvm_get_type_size(LLVMGetElementType(type));
242 default:
243 assert(0);
244 return 0;
245 }
246 }
247
248 static void set_llvm_calling_convention(LLVMValueRef func,
249 gl_shader_stage stage)
250 {
251 enum radeon_llvm_calling_convention calling_conv;
252
253 switch (stage) {
254 case MESA_SHADER_VERTEX:
255 case MESA_SHADER_TESS_CTRL:
256 case MESA_SHADER_TESS_EVAL:
257 calling_conv = RADEON_LLVM_AMDGPU_VS;
258 break;
259 case MESA_SHADER_GEOMETRY:
260 calling_conv = RADEON_LLVM_AMDGPU_GS;
261 break;
262 case MESA_SHADER_FRAGMENT:
263 calling_conv = RADEON_LLVM_AMDGPU_PS;
264 break;
265 case MESA_SHADER_COMPUTE:
266 calling_conv = RADEON_LLVM_AMDGPU_CS;
267 break;
268 default:
269 unreachable("Unhandle shader type");
270 }
271
272 LLVMSetFunctionCallConv(func, calling_conv);
273 }
274
275 #define MAX_ARGS 23
276 struct arg_info {
277 LLVMTypeRef types[MAX_ARGS];
278 LLVMValueRef *assign[MAX_ARGS];
279 unsigned array_params_mask;
280 uint8_t count;
281 uint8_t user_sgpr_count;
282 uint8_t sgpr_count;
283 uint8_t num_user_sgprs_used;
284 uint8_t num_sgprs_used;
285 uint8_t num_vgprs_used;
286 };
287
288 static inline void
289 add_argument(struct arg_info *info,
290 LLVMTypeRef type, LLVMValueRef *param_ptr)
291 {
292 assert(info->count < MAX_ARGS);
293 info->assign[info->count] = param_ptr;
294 info->types[info->count] = type;
295 info->count++;
296 }
297
298 static inline void
299 add_sgpr_argument(struct arg_info *info,
300 LLVMTypeRef type, LLVMValueRef *param_ptr)
301 {
302 add_argument(info, type, param_ptr);
303 info->num_sgprs_used += llvm_get_type_size(type) / 4;
304 info->sgpr_count++;
305 }
306
307 static inline void
308 add_user_sgpr_argument(struct arg_info *info,
309 LLVMTypeRef type,
310 LLVMValueRef *param_ptr)
311 {
312 add_sgpr_argument(info, type, param_ptr);
313 info->num_user_sgprs_used += llvm_get_type_size(type) / 4;
314 info->user_sgpr_count++;
315 }
316
317 static inline void
318 add_vgpr_argument(struct arg_info *info,
319 LLVMTypeRef type,
320 LLVMValueRef *param_ptr)
321 {
322 add_argument(info, type, param_ptr);
323 info->num_vgprs_used += llvm_get_type_size(type) / 4;
324 }
325
326 static inline void
327 add_user_sgpr_array_argument(struct arg_info *info,
328 LLVMTypeRef type,
329 LLVMValueRef *param_ptr)
330 {
331 info->array_params_mask |= (1 << info->count);
332 add_user_sgpr_argument(info, type, param_ptr);
333 }
334
335 static void assign_arguments(LLVMValueRef main_function,
336 struct arg_info *info)
337 {
338 unsigned i;
339 for (i = 0; i < info->count; i++) {
340 if (info->assign[i])
341 *info->assign[i] = LLVMGetParam(main_function, i);
342 }
343 }
344
345 static LLVMValueRef
346 create_llvm_function(LLVMContextRef ctx, LLVMModuleRef module,
347 LLVMBuilderRef builder, LLVMTypeRef *return_types,
348 unsigned num_return_elems,
349 struct arg_info *args,
350 unsigned max_workgroup_size,
351 bool unsafe_math)
352 {
353 LLVMTypeRef main_function_type, ret_type;
354 LLVMBasicBlockRef main_function_body;
355
356 if (num_return_elems)
357 ret_type = LLVMStructTypeInContext(ctx, return_types,
358 num_return_elems, true);
359 else
360 ret_type = LLVMVoidTypeInContext(ctx);
361
362 /* Setup the function */
363 main_function_type =
364 LLVMFunctionType(ret_type, args->types, args->count, 0);
365 LLVMValueRef main_function =
366 LLVMAddFunction(module, "main", main_function_type);
367 main_function_body =
368 LLVMAppendBasicBlockInContext(ctx, main_function, "main_body");
369 LLVMPositionBuilderAtEnd(builder, main_function_body);
370
371 LLVMSetFunctionCallConv(main_function, RADEON_LLVM_AMDGPU_CS);
372 for (unsigned i = 0; i < args->sgpr_count; ++i) {
373 if (args->array_params_mask & (1 << i)) {
374 LLVMValueRef P = LLVMGetParam(main_function, i);
375 ac_add_function_attr(ctx, main_function, i + 1, AC_FUNC_ATTR_BYVAL);
376 ac_add_attr_dereferenceable(P, UINT64_MAX);
377 }
378 else {
379 ac_add_function_attr(ctx, main_function, i + 1, AC_FUNC_ATTR_INREG);
380 }
381 }
382
383 if (max_workgroup_size) {
384 ac_llvm_add_target_dep_function_attr(main_function,
385 "amdgpu-max-work-group-size",
386 max_workgroup_size);
387 }
388 if (unsafe_math) {
389 /* These were copied from some LLVM test. */
390 LLVMAddTargetDependentFunctionAttr(main_function,
391 "less-precise-fpmad",
392 "true");
393 LLVMAddTargetDependentFunctionAttr(main_function,
394 "no-infs-fp-math",
395 "true");
396 LLVMAddTargetDependentFunctionAttr(main_function,
397 "no-nans-fp-math",
398 "true");
399 LLVMAddTargetDependentFunctionAttr(main_function,
400 "unsafe-fp-math",
401 "true");
402 }
403 return main_function;
404 }
405
406 static LLVMTypeRef const_array(LLVMTypeRef elem_type, int num_elements)
407 {
408 return LLVMPointerType(LLVMArrayType(elem_type, num_elements),
409 CONST_ADDR_SPACE);
410 }
411
412 static LLVMTypeRef to_integer_type_scalar(struct ac_llvm_context *ctx, LLVMTypeRef t)
413 {
414 if (t == ctx->f16 || t == ctx->i16)
415 return ctx->i16;
416 else if (t == ctx->f32 || t == ctx->i32)
417 return ctx->i32;
418 else if (t == ctx->f64 || t == ctx->i64)
419 return ctx->i64;
420 else
421 unreachable("Unhandled integer size");
422 }
423
424 static LLVMTypeRef to_integer_type(struct ac_llvm_context *ctx, LLVMTypeRef t)
425 {
426 if (LLVMGetTypeKind(t) == LLVMVectorTypeKind) {
427 LLVMTypeRef elem_type = LLVMGetElementType(t);
428 return LLVMVectorType(to_integer_type_scalar(ctx, elem_type),
429 LLVMGetVectorSize(t));
430 }
431 return to_integer_type_scalar(ctx, t);
432 }
433
434 static LLVMValueRef to_integer(struct ac_llvm_context *ctx, LLVMValueRef v)
435 {
436 LLVMTypeRef type = LLVMTypeOf(v);
437 return LLVMBuildBitCast(ctx->builder, v, to_integer_type(ctx, type), "");
438 }
439
440 static LLVMTypeRef to_float_type_scalar(struct ac_llvm_context *ctx, LLVMTypeRef t)
441 {
442 if (t == ctx->i16 || t == ctx->f16)
443 return ctx->f16;
444 else if (t == ctx->i32 || t == ctx->f32)
445 return ctx->f32;
446 else if (t == ctx->i64 || t == ctx->f64)
447 return ctx->f64;
448 else
449 unreachable("Unhandled float size");
450 }
451
452 static LLVMTypeRef to_float_type(struct ac_llvm_context *ctx, LLVMTypeRef t)
453 {
454 if (LLVMGetTypeKind(t) == LLVMVectorTypeKind) {
455 LLVMTypeRef elem_type = LLVMGetElementType(t);
456 return LLVMVectorType(to_float_type_scalar(ctx, elem_type),
457 LLVMGetVectorSize(t));
458 }
459 return to_float_type_scalar(ctx, t);
460 }
461
462 static LLVMValueRef to_float(struct ac_llvm_context *ctx, LLVMValueRef v)
463 {
464 LLVMTypeRef type = LLVMTypeOf(v);
465 return LLVMBuildBitCast(ctx->builder, v, to_float_type(ctx, type), "");
466 }
467
468 static int get_elem_bits(struct ac_llvm_context *ctx, LLVMTypeRef type)
469 {
470 if (LLVMGetTypeKind(type) == LLVMVectorTypeKind)
471 type = LLVMGetElementType(type);
472
473 if (LLVMGetTypeKind(type) == LLVMIntegerTypeKind)
474 return LLVMGetIntTypeWidth(type);
475
476 if (type == ctx->f16)
477 return 16;
478 if (type == ctx->f32)
479 return 32;
480 if (type == ctx->f64)
481 return 64;
482
483 unreachable("Unhandled type kind in get_elem_bits");
484 }
485
486 static LLVMValueRef unpack_param(struct nir_to_llvm_context *ctx,
487 LLVMValueRef param, unsigned rshift,
488 unsigned bitwidth)
489 {
490 LLVMValueRef value = param;
491 if (rshift)
492 value = LLVMBuildLShr(ctx->builder, value,
493 LLVMConstInt(ctx->i32, rshift, false), "");
494
495 if (rshift + bitwidth < 32) {
496 unsigned mask = (1 << bitwidth) - 1;
497 value = LLVMBuildAnd(ctx->builder, value,
498 LLVMConstInt(ctx->i32, mask, false), "");
499 }
500 return value;
501 }
502
503 static LLVMValueRef get_rel_patch_id(struct nir_to_llvm_context *ctx)
504 {
505 switch (ctx->stage) {
506 case MESA_SHADER_TESS_CTRL:
507 return unpack_param(ctx, ctx->tcs_rel_ids, 0, 8);
508 case MESA_SHADER_TESS_EVAL:
509 return ctx->tes_rel_patch_id;
510 break;
511 default:
512 unreachable("Illegal stage");
513 }
514 }
515
516 /* Tessellation shaders pass outputs to the next shader using LDS.
517 *
518 * LS outputs = TCS inputs
519 * TCS outputs = TES inputs
520 *
521 * The LDS layout is:
522 * - TCS inputs for patch 0
523 * - TCS inputs for patch 1
524 * - TCS inputs for patch 2 = get_tcs_in_current_patch_offset (if RelPatchID==2)
525 * - ...
526 * - TCS outputs for patch 0 = get_tcs_out_patch0_offset
527 * - Per-patch TCS outputs for patch 0 = get_tcs_out_patch0_patch_data_offset
528 * - TCS outputs for patch 1
529 * - Per-patch TCS outputs for patch 1
530 * - TCS outputs for patch 2 = get_tcs_out_current_patch_offset (if RelPatchID==2)
531 * - Per-patch TCS outputs for patch 2 = get_tcs_out_current_patch_data_offset (if RelPatchID==2)
532 * - ...
533 *
534 * All three shaders VS(LS), TCS, TES share the same LDS space.
535 */
536 static LLVMValueRef
537 get_tcs_in_patch_stride(struct nir_to_llvm_context *ctx)
538 {
539 if (ctx->stage == MESA_SHADER_VERTEX)
540 return unpack_param(ctx, ctx->ls_out_layout, 0, 13);
541 else if (ctx->stage == MESA_SHADER_TESS_CTRL)
542 return unpack_param(ctx, ctx->tcs_in_layout, 0, 13);
543 else {
544 assert(0);
545 return NULL;
546 }
547 }
548
549 static LLVMValueRef
550 get_tcs_out_patch_stride(struct nir_to_llvm_context *ctx)
551 {
552 return unpack_param(ctx, ctx->tcs_out_layout, 0, 13);
553 }
554
555 static LLVMValueRef
556 get_tcs_out_patch0_offset(struct nir_to_llvm_context *ctx)
557 {
558 return LLVMBuildMul(ctx->builder,
559 unpack_param(ctx, ctx->tcs_out_offsets, 0, 16),
560 LLVMConstInt(ctx->i32, 4, false), "");
561 }
562
563 static LLVMValueRef
564 get_tcs_out_patch0_patch_data_offset(struct nir_to_llvm_context *ctx)
565 {
566 return LLVMBuildMul(ctx->builder,
567 unpack_param(ctx, ctx->tcs_out_offsets, 16, 16),
568 LLVMConstInt(ctx->i32, 4, false), "");
569 }
570
571 static LLVMValueRef
572 get_tcs_in_current_patch_offset(struct nir_to_llvm_context *ctx)
573 {
574 LLVMValueRef patch_stride = get_tcs_in_patch_stride(ctx);
575 LLVMValueRef rel_patch_id = get_rel_patch_id(ctx);
576
577 return LLVMBuildMul(ctx->builder, patch_stride, rel_patch_id, "");
578 }
579
580 static LLVMValueRef
581 get_tcs_out_current_patch_offset(struct nir_to_llvm_context *ctx)
582 {
583 LLVMValueRef patch0_offset = get_tcs_out_patch0_offset(ctx);
584 LLVMValueRef patch_stride = get_tcs_out_patch_stride(ctx);
585 LLVMValueRef rel_patch_id = get_rel_patch_id(ctx);
586
587 return LLVMBuildAdd(ctx->builder, patch0_offset,
588 LLVMBuildMul(ctx->builder, patch_stride,
589 rel_patch_id, ""),
590 "");
591 }
592
593 static LLVMValueRef
594 get_tcs_out_current_patch_data_offset(struct nir_to_llvm_context *ctx)
595 {
596 LLVMValueRef patch0_patch_data_offset =
597 get_tcs_out_patch0_patch_data_offset(ctx);
598 LLVMValueRef patch_stride = get_tcs_out_patch_stride(ctx);
599 LLVMValueRef rel_patch_id = get_rel_patch_id(ctx);
600
601 return LLVMBuildAdd(ctx->builder, patch0_patch_data_offset,
602 LLVMBuildMul(ctx->builder, patch_stride,
603 rel_patch_id, ""),
604 "");
605 }
606
607 static void set_userdata_location(struct ac_userdata_info *ud_info, uint8_t *sgpr_idx, uint8_t num_sgprs)
608 {
609 ud_info->sgpr_idx = *sgpr_idx;
610 ud_info->num_sgprs = num_sgprs;
611 ud_info->indirect = false;
612 ud_info->indirect_offset = 0;
613 *sgpr_idx += num_sgprs;
614 }
615
616 static void set_userdata_location_shader(struct nir_to_llvm_context *ctx,
617 int idx, uint8_t *sgpr_idx, uint8_t num_sgprs)
618 {
619 set_userdata_location(&ctx->shader_info->user_sgprs_locs.shader_data[idx], sgpr_idx, num_sgprs);
620 }
621
622
623 static void set_userdata_location_indirect(struct ac_userdata_info *ud_info, uint8_t sgpr_idx, uint8_t num_sgprs,
624 uint32_t indirect_offset)
625 {
626 ud_info->sgpr_idx = sgpr_idx;
627 ud_info->num_sgprs = num_sgprs;
628 ud_info->indirect = true;
629 ud_info->indirect_offset = indirect_offset;
630 }
631
632 static void declare_tess_lds(struct nir_to_llvm_context *ctx)
633 {
634 unsigned lds_size = ctx->options->chip_class >= CIK ? 65536 : 32768;
635 ctx->lds = LLVMBuildIntToPtr(ctx->builder, ctx->i32zero,
636 LLVMPointerType(LLVMArrayType(ctx->i32, lds_size / 4), LOCAL_ADDR_SPACE),
637 "tess_lds");
638 }
639
640 struct user_sgpr_info {
641 bool need_ring_offsets;
642 uint8_t sgpr_count;
643 bool indirect_all_descriptor_sets;
644 };
645
646 static void allocate_user_sgprs(struct nir_to_llvm_context *ctx,
647 struct user_sgpr_info *user_sgpr_info)
648 {
649 memset(user_sgpr_info, 0, sizeof(struct user_sgpr_info));
650
651 /* until we sort out scratch/global buffers always assign ring offsets for gs/vs/es */
652 if (ctx->stage == MESA_SHADER_GEOMETRY ||
653 ctx->stage == MESA_SHADER_VERTEX ||
654 ctx->stage == MESA_SHADER_TESS_CTRL ||
655 ctx->stage == MESA_SHADER_TESS_EVAL ||
656 ctx->is_gs_copy_shader)
657 user_sgpr_info->need_ring_offsets = true;
658
659 if (ctx->stage == MESA_SHADER_FRAGMENT &&
660 ctx->shader_info->info.ps.needs_sample_positions)
661 user_sgpr_info->need_ring_offsets = true;
662
663 /* 2 user sgprs will nearly always be allocated for scratch/rings */
664 if (ctx->options->supports_spill || user_sgpr_info->need_ring_offsets) {
665 user_sgpr_info->sgpr_count += 2;
666 }
667
668 switch (ctx->stage) {
669 case MESA_SHADER_COMPUTE:
670 user_sgpr_info->sgpr_count += ctx->shader_info->info.cs.grid_components_used;
671 break;
672 case MESA_SHADER_FRAGMENT:
673 user_sgpr_info->sgpr_count += ctx->shader_info->info.ps.needs_sample_positions;
674 break;
675 case MESA_SHADER_VERTEX:
676 if (!ctx->is_gs_copy_shader) {
677 user_sgpr_info->sgpr_count += ctx->shader_info->info.vs.has_vertex_buffers ? 2 : 0;
678 if (ctx->shader_info->info.vs.needs_draw_id) {
679 user_sgpr_info->sgpr_count += 3;
680 } else {
681 user_sgpr_info->sgpr_count += 2;
682 }
683 }
684 if (ctx->options->key.vs.as_ls)
685 user_sgpr_info->sgpr_count++;
686 break;
687 case MESA_SHADER_TESS_CTRL:
688 user_sgpr_info->sgpr_count += 4;
689 break;
690 case MESA_SHADER_TESS_EVAL:
691 user_sgpr_info->sgpr_count += 1;
692 break;
693 case MESA_SHADER_GEOMETRY:
694 user_sgpr_info->sgpr_count += 2;
695 break;
696 default:
697 break;
698 }
699
700 if (ctx->shader_info->info.needs_push_constants)
701 user_sgpr_info->sgpr_count += 2;
702
703 uint32_t remaining_sgprs = 16 - user_sgpr_info->sgpr_count;
704 if (remaining_sgprs / 2 < util_bitcount(ctx->shader_info->info.desc_set_used_mask)) {
705 user_sgpr_info->sgpr_count += 2;
706 user_sgpr_info->indirect_all_descriptor_sets = true;
707 } else {
708 user_sgpr_info->sgpr_count += util_bitcount(ctx->shader_info->info.desc_set_used_mask) * 2;
709 }
710 }
711
712 static void create_function(struct nir_to_llvm_context *ctx)
713 {
714 unsigned num_sets = ctx->options->layout ? ctx->options->layout->num_sets : 0;
715 uint8_t user_sgpr_idx;
716 struct user_sgpr_info user_sgpr_info;
717 struct arg_info args = {};
718 LLVMValueRef desc_sets;
719
720 allocate_user_sgprs(ctx, &user_sgpr_info);
721 if (user_sgpr_info.need_ring_offsets && !ctx->options->supports_spill) {
722 add_user_sgpr_argument(&args, const_array(ctx->v4i32, 16), &ctx->ring_offsets); /* address of rings */
723 }
724
725 /* 1 for each descriptor set */
726 if (!user_sgpr_info.indirect_all_descriptor_sets) {
727 for (unsigned i = 0; i < num_sets; ++i) {
728 if (ctx->options->layout->set[i].layout->shader_stages & (1 << ctx->stage)) {
729 add_user_sgpr_array_argument(&args, const_array(ctx->i8, 1024 * 1024), &ctx->descriptor_sets[i]);
730 }
731 }
732 } else
733 add_user_sgpr_array_argument(&args, const_array(const_array(ctx->i8, 1024 * 1024), 32), &desc_sets);
734
735 if (ctx->shader_info->info.needs_push_constants) {
736 /* 1 for push constants and dynamic descriptors */
737 add_user_sgpr_array_argument(&args, const_array(ctx->i8, 1024 * 1024), &ctx->push_constants);
738 }
739
740 switch (ctx->stage) {
741 case MESA_SHADER_COMPUTE:
742 if (ctx->shader_info->info.cs.grid_components_used)
743 add_user_sgpr_argument(&args, LLVMVectorType(ctx->i32, ctx->shader_info->info.cs.grid_components_used), &ctx->num_work_groups); /* grid size */
744 add_sgpr_argument(&args, LLVMVectorType(ctx->i32, 3), &ctx->workgroup_ids);
745 add_sgpr_argument(&args, ctx->i32, &ctx->tg_size);
746 add_vgpr_argument(&args, LLVMVectorType(ctx->i32, 3), &ctx->local_invocation_ids);
747 break;
748 case MESA_SHADER_VERTEX:
749 if (!ctx->is_gs_copy_shader) {
750 if (ctx->shader_info->info.vs.has_vertex_buffers)
751 add_user_sgpr_argument(&args, const_array(ctx->v4i32, 16), &ctx->vertex_buffers); /* vertex buffers */
752 add_user_sgpr_argument(&args, ctx->i32, &ctx->abi.base_vertex); // base vertex
753 add_user_sgpr_argument(&args, ctx->i32, &ctx->abi.start_instance);// start instance
754 if (ctx->shader_info->info.vs.needs_draw_id)
755 add_user_sgpr_argument(&args, ctx->i32, &ctx->abi.draw_id); // draw id
756 }
757 if (ctx->options->key.vs.as_es)
758 add_sgpr_argument(&args, ctx->i32, &ctx->es2gs_offset); // es2gs offset
759 else if (ctx->options->key.vs.as_ls)
760 add_user_sgpr_argument(&args, ctx->i32, &ctx->ls_out_layout); // ls out layout
761 add_vgpr_argument(&args, ctx->i32, &ctx->abi.vertex_id); // vertex id
762 if (!ctx->is_gs_copy_shader) {
763 add_vgpr_argument(&args, ctx->i32, &ctx->rel_auto_id); // rel auto id
764 add_vgpr_argument(&args, ctx->i32, &ctx->vs_prim_id); // vs prim id
765 add_vgpr_argument(&args, ctx->i32, &ctx->abi.instance_id); // instance id
766 }
767 break;
768 case MESA_SHADER_TESS_CTRL:
769 add_user_sgpr_argument(&args, ctx->i32, &ctx->tcs_offchip_layout); // tcs offchip layout
770 add_user_sgpr_argument(&args, ctx->i32, &ctx->tcs_out_offsets); // tcs out offsets
771 add_user_sgpr_argument(&args, ctx->i32, &ctx->tcs_out_layout); // tcs out layout
772 add_user_sgpr_argument(&args, ctx->i32, &ctx->tcs_in_layout); // tcs in layout
773 add_sgpr_argument(&args, ctx->i32, &ctx->oc_lds); // param oc lds
774 add_sgpr_argument(&args, ctx->i32, &ctx->tess_factor_offset); // tess factor offset
775 add_vgpr_argument(&args, ctx->i32, &ctx->tcs_patch_id); // patch id
776 add_vgpr_argument(&args, ctx->i32, &ctx->tcs_rel_ids); // rel ids;
777 break;
778 case MESA_SHADER_TESS_EVAL:
779 add_user_sgpr_argument(&args, ctx->i32, &ctx->tcs_offchip_layout); // tcs offchip layout
780 if (ctx->options->key.tes.as_es) {
781 add_sgpr_argument(&args, ctx->i32, &ctx->oc_lds); // OC LDS
782 add_sgpr_argument(&args, ctx->i32, NULL); //
783 add_sgpr_argument(&args, ctx->i32, &ctx->es2gs_offset); // es2gs offset
784 } else {
785 add_sgpr_argument(&args, ctx->i32, NULL); //
786 add_sgpr_argument(&args, ctx->i32, &ctx->oc_lds); // OC LDS
787 }
788 add_vgpr_argument(&args, ctx->f32, &ctx->tes_u); // tes_u
789 add_vgpr_argument(&args, ctx->f32, &ctx->tes_v); // tes_v
790 add_vgpr_argument(&args, ctx->i32, &ctx->tes_rel_patch_id); // tes rel patch id
791 add_vgpr_argument(&args, ctx->i32, &ctx->tes_patch_id); // tes patch id
792 break;
793 case MESA_SHADER_GEOMETRY:
794 add_user_sgpr_argument(&args, ctx->i32, &ctx->gsvs_ring_stride); // gsvs stride
795 add_user_sgpr_argument(&args, ctx->i32, &ctx->gsvs_num_entries); // gsvs num entires
796 add_sgpr_argument(&args, ctx->i32, &ctx->gs2vs_offset); // gs2vs offset
797 add_sgpr_argument(&args, ctx->i32, &ctx->gs_wave_id); // wave id
798 add_vgpr_argument(&args, ctx->i32, &ctx->gs_vtx_offset[0]); // vtx0
799 add_vgpr_argument(&args, ctx->i32, &ctx->gs_vtx_offset[1]); // vtx1
800 add_vgpr_argument(&args, ctx->i32, &ctx->gs_prim_id); // prim id
801 add_vgpr_argument(&args, ctx->i32, &ctx->gs_vtx_offset[2]);
802 add_vgpr_argument(&args, ctx->i32, &ctx->gs_vtx_offset[3]);
803 add_vgpr_argument(&args, ctx->i32, &ctx->gs_vtx_offset[4]);
804 add_vgpr_argument(&args, ctx->i32, &ctx->gs_vtx_offset[5]);
805 add_vgpr_argument(&args, ctx->i32, &ctx->gs_invocation_id);
806 break;
807 case MESA_SHADER_FRAGMENT:
808 if (ctx->shader_info->info.ps.needs_sample_positions)
809 add_user_sgpr_argument(&args, ctx->i32, &ctx->sample_pos_offset); /* sample position offset */
810 add_sgpr_argument(&args, ctx->i32, &ctx->prim_mask); /* prim mask */
811 add_vgpr_argument(&args, ctx->v2i32, &ctx->persp_sample); /* persp sample */
812 add_vgpr_argument(&args, ctx->v2i32, &ctx->persp_center); /* persp center */
813 add_vgpr_argument(&args, ctx->v2i32, &ctx->persp_centroid); /* persp centroid */
814 add_vgpr_argument(&args, ctx->v3i32, NULL); /* persp pull model */
815 add_vgpr_argument(&args, ctx->v2i32, &ctx->linear_sample); /* linear sample */
816 add_vgpr_argument(&args, ctx->v2i32, &ctx->linear_center); /* linear center */
817 add_vgpr_argument(&args, ctx->v2i32, &ctx->linear_centroid); /* linear centroid */
818 add_vgpr_argument(&args, ctx->f32, NULL); /* line stipple tex */
819 add_vgpr_argument(&args, ctx->f32, &ctx->frag_pos[0]); /* pos x float */
820 add_vgpr_argument(&args, ctx->f32, &ctx->frag_pos[1]); /* pos y float */
821 add_vgpr_argument(&args, ctx->f32, &ctx->frag_pos[2]); /* pos z float */
822 add_vgpr_argument(&args, ctx->f32, &ctx->frag_pos[3]); /* pos w float */
823 add_vgpr_argument(&args, ctx->i32, &ctx->front_face); /* front face */
824 add_vgpr_argument(&args, ctx->i32, &ctx->ancillary); /* ancillary */
825 add_vgpr_argument(&args, ctx->i32, &ctx->sample_coverage); /* sample coverage */
826 add_vgpr_argument(&args, ctx->i32, NULL); /* fixed pt */
827 break;
828 default:
829 unreachable("Shader stage not implemented");
830 }
831
832 ctx->main_function = create_llvm_function(
833 ctx->context, ctx->module, ctx->builder, NULL, 0, &args,
834 ctx->max_workgroup_size,
835 ctx->options->unsafe_math);
836 set_llvm_calling_convention(ctx->main_function, ctx->stage);
837
838
839 ctx->shader_info->num_input_vgprs = 0;
840 ctx->shader_info->num_input_sgprs = ctx->shader_info->num_user_sgprs =
841 ctx->options->supports_spill ? 2 : 0;
842
843 ctx->shader_info->num_user_sgprs += args.num_user_sgprs_used;
844 ctx->shader_info->num_input_sgprs += args.num_sgprs_used;
845
846 if (ctx->stage != MESA_SHADER_FRAGMENT)
847 ctx->shader_info->num_input_vgprs = args.num_vgprs_used;
848
849 assign_arguments(ctx->main_function, &args);
850
851 user_sgpr_idx = 0;
852
853 if (ctx->options->supports_spill || user_sgpr_info.need_ring_offsets) {
854 set_userdata_location_shader(ctx, AC_UD_SCRATCH_RING_OFFSETS, &user_sgpr_idx, 2);
855 if (ctx->options->supports_spill) {
856 ctx->ring_offsets = ac_build_intrinsic(&ctx->ac, "llvm.amdgcn.implicit.buffer.ptr",
857 LLVMPointerType(ctx->i8, CONST_ADDR_SPACE),
858 NULL, 0, AC_FUNC_ATTR_READNONE);
859 ctx->ring_offsets = LLVMBuildBitCast(ctx->builder, ctx->ring_offsets,
860 const_array(ctx->v4i32, 16), "");
861 }
862 }
863
864 if (!user_sgpr_info.indirect_all_descriptor_sets) {
865 for (unsigned i = 0; i < num_sets; ++i) {
866 if (ctx->options->layout->set[i].layout->shader_stages & (1 << ctx->stage)) {
867 set_userdata_location(&ctx->shader_info->user_sgprs_locs.descriptor_sets[i], &user_sgpr_idx, 2);
868 } else
869 ctx->descriptor_sets[i] = NULL;
870 }
871 } else {
872 uint32_t desc_sgpr_idx = user_sgpr_idx;
873 set_userdata_location_shader(ctx, AC_UD_INDIRECT_DESCRIPTOR_SETS, &user_sgpr_idx, 2);
874
875 for (unsigned i = 0; i < num_sets; ++i) {
876 if (ctx->options->layout->set[i].layout->shader_stages & (1 << ctx->stage)) {
877 set_userdata_location_indirect(&ctx->shader_info->user_sgprs_locs.descriptor_sets[i], desc_sgpr_idx, 2, i * 8);
878 ctx->descriptor_sets[i] = ac_build_indexed_load_const(&ctx->ac, desc_sets, LLVMConstInt(ctx->i32, i, false));
879
880 } else
881 ctx->descriptor_sets[i] = NULL;
882 }
883 ctx->shader_info->need_indirect_descriptor_sets = true;
884 }
885
886 if (ctx->shader_info->info.needs_push_constants) {
887 set_userdata_location_shader(ctx, AC_UD_PUSH_CONSTANTS, &user_sgpr_idx, 2);
888 }
889
890 switch (ctx->stage) {
891 case MESA_SHADER_COMPUTE:
892 if (ctx->shader_info->info.cs.grid_components_used) {
893 set_userdata_location_shader(ctx, AC_UD_CS_GRID_SIZE, &user_sgpr_idx, ctx->shader_info->info.cs.grid_components_used);
894 }
895 break;
896 case MESA_SHADER_VERTEX:
897 if (!ctx->is_gs_copy_shader) {
898 if (ctx->shader_info->info.vs.has_vertex_buffers) {
899 set_userdata_location_shader(ctx, AC_UD_VS_VERTEX_BUFFERS, &user_sgpr_idx, 2);
900 }
901 unsigned vs_num = 2;
902 if (ctx->shader_info->info.vs.needs_draw_id)
903 vs_num++;
904
905 set_userdata_location_shader(ctx, AC_UD_VS_BASE_VERTEX_START_INSTANCE, &user_sgpr_idx, vs_num);
906 }
907 if (ctx->options->key.vs.as_ls) {
908 set_userdata_location_shader(ctx, AC_UD_VS_LS_TCS_IN_LAYOUT, &user_sgpr_idx, 1);
909 }
910 if (ctx->options->key.vs.as_ls)
911 declare_tess_lds(ctx);
912 break;
913 case MESA_SHADER_TESS_CTRL:
914 set_userdata_location_shader(ctx, AC_UD_TCS_OFFCHIP_LAYOUT, &user_sgpr_idx, 4);
915 declare_tess_lds(ctx);
916 break;
917 case MESA_SHADER_TESS_EVAL:
918 set_userdata_location_shader(ctx, AC_UD_TES_OFFCHIP_LAYOUT, &user_sgpr_idx, 1);
919 break;
920 case MESA_SHADER_GEOMETRY:
921 set_userdata_location_shader(ctx, AC_UD_GS_VS_RING_STRIDE_ENTRIES, &user_sgpr_idx, 2);
922 break;
923 case MESA_SHADER_FRAGMENT:
924 if (ctx->shader_info->info.ps.needs_sample_positions) {
925 set_userdata_location_shader(ctx, AC_UD_PS_SAMPLE_POS_OFFSET, &user_sgpr_idx, 1);
926 }
927 break;
928 default:
929 unreachable("Shader stage not implemented");
930 }
931 }
932
933 static void setup_types(struct nir_to_llvm_context *ctx)
934 {
935 LLVMValueRef args[4];
936
937 ctx->voidt = LLVMVoidTypeInContext(ctx->context);
938 ctx->i1 = LLVMIntTypeInContext(ctx->context, 1);
939 ctx->i8 = LLVMIntTypeInContext(ctx->context, 8);
940 ctx->i16 = LLVMIntTypeInContext(ctx->context, 16);
941 ctx->i32 = LLVMIntTypeInContext(ctx->context, 32);
942 ctx->i64 = LLVMIntTypeInContext(ctx->context, 64);
943 ctx->v2i32 = LLVMVectorType(ctx->i32, 2);
944 ctx->v3i32 = LLVMVectorType(ctx->i32, 3);
945 ctx->v4i32 = LLVMVectorType(ctx->i32, 4);
946 ctx->v8i32 = LLVMVectorType(ctx->i32, 8);
947 ctx->f32 = LLVMFloatTypeInContext(ctx->context);
948 ctx->f16 = LLVMHalfTypeInContext(ctx->context);
949 ctx->f64 = LLVMDoubleTypeInContext(ctx->context);
950 ctx->v2f32 = LLVMVectorType(ctx->f32, 2);
951 ctx->v4f32 = LLVMVectorType(ctx->f32, 4);
952
953 ctx->i1false = LLVMConstInt(ctx->i1, 0, false);
954 ctx->i1true = LLVMConstInt(ctx->i1, 1, false);
955 ctx->i32zero = LLVMConstInt(ctx->i32, 0, false);
956 ctx->i32one = LLVMConstInt(ctx->i32, 1, false);
957 ctx->f32zero = LLVMConstReal(ctx->f32, 0.0);
958 ctx->f32one = LLVMConstReal(ctx->f32, 1.0);
959
960 args[0] = ctx->f32zero;
961 args[1] = ctx->f32zero;
962 args[2] = ctx->f32zero;
963 args[3] = ctx->f32one;
964 ctx->v4f32empty = LLVMConstVector(args, 4);
965
966 ctx->uniform_md_kind =
967 LLVMGetMDKindIDInContext(ctx->context, "amdgpu.uniform", 14);
968 ctx->empty_md = LLVMMDNodeInContext(ctx->context, NULL, 0);
969
970 args[0] = LLVMConstReal(ctx->f32, 2.5);
971 }
972
973 static int get_llvm_num_components(LLVMValueRef value)
974 {
975 LLVMTypeRef type = LLVMTypeOf(value);
976 unsigned num_components = LLVMGetTypeKind(type) == LLVMVectorTypeKind
977 ? LLVMGetVectorSize(type)
978 : 1;
979 return num_components;
980 }
981
982 static LLVMValueRef llvm_extract_elem(struct nir_to_llvm_context *ctx,
983 LLVMValueRef value,
984 int index)
985 {
986 int count = get_llvm_num_components(value);
987
988 assert(index < count);
989 if (count == 1)
990 return value;
991
992 return LLVMBuildExtractElement(ctx->builder, value,
993 LLVMConstInt(ctx->i32, index, false), "");
994 }
995
996 static LLVMValueRef trim_vector(struct nir_to_llvm_context *ctx,
997 LLVMValueRef value, unsigned count)
998 {
999 unsigned num_components = get_llvm_num_components(value);
1000 if (count == num_components)
1001 return value;
1002
1003 LLVMValueRef masks[] = {
1004 LLVMConstInt(ctx->i32, 0, false), LLVMConstInt(ctx->i32, 1, false),
1005 LLVMConstInt(ctx->i32, 2, false), LLVMConstInt(ctx->i32, 3, false)};
1006
1007 if (count == 1)
1008 return LLVMBuildExtractElement(ctx->builder, value, masks[0],
1009 "");
1010
1011 LLVMValueRef swizzle = LLVMConstVector(masks, count);
1012 return LLVMBuildShuffleVector(ctx->builder, value, value, swizzle, "");
1013 }
1014
1015 static void
1016 build_store_values_extended(struct nir_to_llvm_context *ctx,
1017 LLVMValueRef *values,
1018 unsigned value_count,
1019 unsigned value_stride,
1020 LLVMValueRef vec)
1021 {
1022 LLVMBuilderRef builder = ctx->builder;
1023 unsigned i;
1024
1025 if (value_count == 1) {
1026 LLVMBuildStore(builder, vec, values[0]);
1027 return;
1028 }
1029
1030 for (i = 0; i < value_count; i++) {
1031 LLVMValueRef ptr = values[i * value_stride];
1032 LLVMValueRef index = LLVMConstInt(ctx->i32, i, false);
1033 LLVMValueRef value = LLVMBuildExtractElement(builder, vec, index, "");
1034 LLVMBuildStore(builder, value, ptr);
1035 }
1036 }
1037
1038 static LLVMTypeRef get_def_type(struct ac_nir_context *ctx,
1039 const nir_ssa_def *def)
1040 {
1041 LLVMTypeRef type = LLVMIntTypeInContext(ctx->ac.context, def->bit_size);
1042 if (def->num_components > 1) {
1043 type = LLVMVectorType(type, def->num_components);
1044 }
1045 return type;
1046 }
1047
1048 static LLVMValueRef get_src(struct ac_nir_context *nir, nir_src src)
1049 {
1050 assert(src.is_ssa);
1051 struct hash_entry *entry = _mesa_hash_table_search(nir->defs, src.ssa);
1052 return (LLVMValueRef)entry->data;
1053 }
1054
1055
1056 static LLVMBasicBlockRef get_block(struct ac_nir_context *nir,
1057 const struct nir_block *b)
1058 {
1059 struct hash_entry *entry = _mesa_hash_table_search(nir->defs, b);
1060 return (LLVMBasicBlockRef)entry->data;
1061 }
1062
1063 static LLVMValueRef get_alu_src(struct ac_nir_context *ctx,
1064 nir_alu_src src,
1065 unsigned num_components)
1066 {
1067 LLVMValueRef value = get_src(ctx, src.src);
1068 bool need_swizzle = false;
1069
1070 assert(value);
1071 LLVMTypeRef type = LLVMTypeOf(value);
1072 unsigned src_components = LLVMGetTypeKind(type) == LLVMVectorTypeKind
1073 ? LLVMGetVectorSize(type)
1074 : 1;
1075
1076 for (unsigned i = 0; i < num_components; ++i) {
1077 assert(src.swizzle[i] < src_components);
1078 if (src.swizzle[i] != i)
1079 need_swizzle = true;
1080 }
1081
1082 if (need_swizzle || num_components != src_components) {
1083 LLVMValueRef masks[] = {
1084 LLVMConstInt(ctx->ac.i32, src.swizzle[0], false),
1085 LLVMConstInt(ctx->ac.i32, src.swizzle[1], false),
1086 LLVMConstInt(ctx->ac.i32, src.swizzle[2], false),
1087 LLVMConstInt(ctx->ac.i32, src.swizzle[3], false)};
1088
1089 if (src_components > 1 && num_components == 1) {
1090 value = LLVMBuildExtractElement(ctx->ac.builder, value,
1091 masks[0], "");
1092 } else if (src_components == 1 && num_components > 1) {
1093 LLVMValueRef values[] = {value, value, value, value};
1094 value = ac_build_gather_values(&ctx->ac, values, num_components);
1095 } else {
1096 LLVMValueRef swizzle = LLVMConstVector(masks, num_components);
1097 value = LLVMBuildShuffleVector(ctx->ac.builder, value, value,
1098 swizzle, "");
1099 }
1100 }
1101 assert(!src.negate);
1102 assert(!src.abs);
1103 return value;
1104 }
1105
1106 static LLVMValueRef emit_int_cmp(struct ac_llvm_context *ctx,
1107 LLVMIntPredicate pred, LLVMValueRef src0,
1108 LLVMValueRef src1)
1109 {
1110 LLVMValueRef result = LLVMBuildICmp(ctx->builder, pred, src0, src1, "");
1111 return LLVMBuildSelect(ctx->builder, result,
1112 LLVMConstInt(ctx->i32, 0xFFFFFFFF, false),
1113 LLVMConstInt(ctx->i32, 0, false), "");
1114 }
1115
1116 static LLVMValueRef emit_float_cmp(struct ac_llvm_context *ctx,
1117 LLVMRealPredicate pred, LLVMValueRef src0,
1118 LLVMValueRef src1)
1119 {
1120 LLVMValueRef result;
1121 src0 = to_float(ctx, src0);
1122 src1 = to_float(ctx, src1);
1123 result = LLVMBuildFCmp(ctx->builder, pred, src0, src1, "");
1124 return LLVMBuildSelect(ctx->builder, result,
1125 LLVMConstInt(ctx->i32, 0xFFFFFFFF, false),
1126 LLVMConstInt(ctx->i32, 0, false), "");
1127 }
1128
1129 static LLVMValueRef emit_intrin_1f_param(struct ac_llvm_context *ctx,
1130 const char *intrin,
1131 LLVMTypeRef result_type,
1132 LLVMValueRef src0)
1133 {
1134 char name[64];
1135 LLVMValueRef params[] = {
1136 to_float(ctx, src0),
1137 };
1138
1139 MAYBE_UNUSED const int length = snprintf(name, sizeof(name), "%s.f%d", intrin,
1140 get_elem_bits(ctx, result_type));
1141 assert(length < sizeof(name));
1142 return ac_build_intrinsic(ctx, name, result_type, params, 1, AC_FUNC_ATTR_READNONE);
1143 }
1144
1145 static LLVMValueRef emit_intrin_2f_param(struct ac_llvm_context *ctx,
1146 const char *intrin,
1147 LLVMTypeRef result_type,
1148 LLVMValueRef src0, LLVMValueRef src1)
1149 {
1150 char name[64];
1151 LLVMValueRef params[] = {
1152 to_float(ctx, src0),
1153 to_float(ctx, src1),
1154 };
1155
1156 MAYBE_UNUSED const int length = snprintf(name, sizeof(name), "%s.f%d", intrin,
1157 get_elem_bits(ctx, result_type));
1158 assert(length < sizeof(name));
1159 return ac_build_intrinsic(ctx, name, result_type, params, 2, AC_FUNC_ATTR_READNONE);
1160 }
1161
1162 static LLVMValueRef emit_intrin_3f_param(struct ac_llvm_context *ctx,
1163 const char *intrin,
1164 LLVMTypeRef result_type,
1165 LLVMValueRef src0, LLVMValueRef src1, LLVMValueRef src2)
1166 {
1167 char name[64];
1168 LLVMValueRef params[] = {
1169 to_float(ctx, src0),
1170 to_float(ctx, src1),
1171 to_float(ctx, src2),
1172 };
1173
1174 MAYBE_UNUSED const int length = snprintf(name, sizeof(name), "%s.f%d", intrin,
1175 get_elem_bits(ctx, result_type));
1176 assert(length < sizeof(name));
1177 return ac_build_intrinsic(ctx, name, result_type, params, 3, AC_FUNC_ATTR_READNONE);
1178 }
1179
1180 static LLVMValueRef emit_bcsel(struct ac_llvm_context *ctx,
1181 LLVMValueRef src0, LLVMValueRef src1, LLVMValueRef src2)
1182 {
1183 LLVMValueRef v = LLVMBuildICmp(ctx->builder, LLVMIntNE, src0,
1184 ctx->i32_0, "");
1185 return LLVMBuildSelect(ctx->builder, v, src1, src2, "");
1186 }
1187
1188 static LLVMValueRef emit_find_lsb(struct ac_llvm_context *ctx,
1189 LLVMValueRef src0)
1190 {
1191 LLVMValueRef params[2] = {
1192 src0,
1193
1194 /* The value of 1 means that ffs(x=0) = undef, so LLVM won't
1195 * add special code to check for x=0. The reason is that
1196 * the LLVM behavior for x=0 is different from what we
1197 * need here.
1198 *
1199 * The hardware already implements the correct behavior.
1200 */
1201 LLVMConstInt(ctx->i1, 1, false),
1202 };
1203 return ac_build_intrinsic(ctx, "llvm.cttz.i32", ctx->i32, params, 2, AC_FUNC_ATTR_READNONE);
1204 }
1205
1206 static LLVMValueRef emit_ifind_msb(struct ac_llvm_context *ctx,
1207 LLVMValueRef src0)
1208 {
1209 return ac_build_imsb(ctx, src0, ctx->i32);
1210 }
1211
1212 static LLVMValueRef emit_ufind_msb(struct ac_llvm_context *ctx,
1213 LLVMValueRef src0)
1214 {
1215 return ac_build_umsb(ctx, src0, ctx->i32);
1216 }
1217
1218 static LLVMValueRef emit_minmax_int(struct ac_llvm_context *ctx,
1219 LLVMIntPredicate pred,
1220 LLVMValueRef src0, LLVMValueRef src1)
1221 {
1222 return LLVMBuildSelect(ctx->builder,
1223 LLVMBuildICmp(ctx->builder, pred, src0, src1, ""),
1224 src0,
1225 src1, "");
1226
1227 }
1228 static LLVMValueRef emit_iabs(struct ac_llvm_context *ctx,
1229 LLVMValueRef src0)
1230 {
1231 return emit_minmax_int(ctx, LLVMIntSGT, src0,
1232 LLVMBuildNeg(ctx->builder, src0, ""));
1233 }
1234
1235 static LLVMValueRef emit_fsign(struct ac_llvm_context *ctx,
1236 LLVMValueRef src0)
1237 {
1238 LLVMValueRef cmp, val;
1239
1240 cmp = LLVMBuildFCmp(ctx->builder, LLVMRealOGT, src0, ctx->f32_0, "");
1241 val = LLVMBuildSelect(ctx->builder, cmp, ctx->f32_1, src0, "");
1242 cmp = LLVMBuildFCmp(ctx->builder, LLVMRealOGE, val, ctx->f32_0, "");
1243 val = LLVMBuildSelect(ctx->builder, cmp, val, LLVMConstReal(ctx->f32, -1.0), "");
1244 return val;
1245 }
1246
1247 static LLVMValueRef emit_isign(struct ac_llvm_context *ctx,
1248 LLVMValueRef src0)
1249 {
1250 LLVMValueRef cmp, val;
1251
1252 cmp = LLVMBuildICmp(ctx->builder, LLVMIntSGT, src0, ctx->i32_0, "");
1253 val = LLVMBuildSelect(ctx->builder, cmp, ctx->i32_1, src0, "");
1254 cmp = LLVMBuildICmp(ctx->builder, LLVMIntSGE, val, ctx->i32_0, "");
1255 val = LLVMBuildSelect(ctx->builder, cmp, val, LLVMConstInt(ctx->i32, -1, true), "");
1256 return val;
1257 }
1258
1259 static LLVMValueRef emit_ffract(struct ac_llvm_context *ctx,
1260 LLVMValueRef src0)
1261 {
1262 const char *intr = "llvm.floor.f32";
1263 LLVMValueRef fsrc0 = to_float(ctx, src0);
1264 LLVMValueRef params[] = {
1265 fsrc0,
1266 };
1267 LLVMValueRef floor = ac_build_intrinsic(ctx, intr,
1268 ctx->f32, params, 1,
1269 AC_FUNC_ATTR_READNONE);
1270 return LLVMBuildFSub(ctx->builder, fsrc0, floor, "");
1271 }
1272
1273 static LLVMValueRef emit_uint_carry(struct ac_llvm_context *ctx,
1274 const char *intrin,
1275 LLVMValueRef src0, LLVMValueRef src1)
1276 {
1277 LLVMTypeRef ret_type;
1278 LLVMTypeRef types[] = { ctx->i32, ctx->i1 };
1279 LLVMValueRef res;
1280 LLVMValueRef params[] = { src0, src1 };
1281 ret_type = LLVMStructTypeInContext(ctx->context, types,
1282 2, true);
1283
1284 res = ac_build_intrinsic(ctx, intrin, ret_type,
1285 params, 2, AC_FUNC_ATTR_READNONE);
1286
1287 res = LLVMBuildExtractValue(ctx->builder, res, 1, "");
1288 res = LLVMBuildZExt(ctx->builder, res, ctx->i32, "");
1289 return res;
1290 }
1291
1292 static LLVMValueRef emit_b2f(struct ac_llvm_context *ctx,
1293 LLVMValueRef src0)
1294 {
1295 return LLVMBuildAnd(ctx->builder, src0, LLVMBuildBitCast(ctx->builder, LLVMConstReal(ctx->f32, 1.0), ctx->i32, ""), "");
1296 }
1297
1298 static LLVMValueRef emit_f2b(struct ac_llvm_context *ctx,
1299 LLVMValueRef src0)
1300 {
1301 src0 = to_float(ctx, src0);
1302 return LLVMBuildSExt(ctx->builder,
1303 LLVMBuildFCmp(ctx->builder, LLVMRealUNE, src0, ctx->f32_0, ""),
1304 ctx->i32, "");
1305 }
1306
1307 static LLVMValueRef emit_b2i(struct ac_llvm_context *ctx,
1308 LLVMValueRef src0)
1309 {
1310 return LLVMBuildAnd(ctx->builder, src0, ctx->i32_1, "");
1311 }
1312
1313 static LLVMValueRef emit_i2b(struct ac_llvm_context *ctx,
1314 LLVMValueRef src0)
1315 {
1316 return LLVMBuildSExt(ctx->builder,
1317 LLVMBuildICmp(ctx->builder, LLVMIntNE, src0, ctx->i32_0, ""),
1318 ctx->i32, "");
1319 }
1320
1321 static LLVMValueRef emit_f2f16(struct nir_to_llvm_context *ctx,
1322 LLVMValueRef src0)
1323 {
1324 LLVMValueRef result;
1325 LLVMValueRef cond;
1326
1327 src0 = to_float(&ctx->ac, src0);
1328 result = LLVMBuildFPTrunc(ctx->builder, src0, ctx->f16, "");
1329
1330 /* TODO SI/CIK options here */
1331 if (ctx->options->chip_class >= VI) {
1332 LLVMValueRef args[2];
1333 /* Check if the result is a denormal - and flush to 0 if so. */
1334 args[0] = result;
1335 args[1] = LLVMConstInt(ctx->i32, N_SUBNORMAL | P_SUBNORMAL, false);
1336 cond = ac_build_intrinsic(&ctx->ac, "llvm.amdgcn.class.f16", ctx->i1, args, 2, AC_FUNC_ATTR_READNONE);
1337 }
1338
1339 /* need to convert back up to f32 */
1340 result = LLVMBuildFPExt(ctx->builder, result, ctx->f32, "");
1341
1342 if (ctx->options->chip_class >= VI)
1343 result = LLVMBuildSelect(ctx->builder, cond, ctx->f32zero, result, "");
1344
1345 return result;
1346 }
1347
1348 static LLVMValueRef emit_umul_high(struct ac_llvm_context *ctx,
1349 LLVMValueRef src0, LLVMValueRef src1)
1350 {
1351 LLVMValueRef dst64, result;
1352 src0 = LLVMBuildZExt(ctx->builder, src0, ctx->i64, "");
1353 src1 = LLVMBuildZExt(ctx->builder, src1, ctx->i64, "");
1354
1355 dst64 = LLVMBuildMul(ctx->builder, src0, src1, "");
1356 dst64 = LLVMBuildLShr(ctx->builder, dst64, LLVMConstInt(ctx->i64, 32, false), "");
1357 result = LLVMBuildTrunc(ctx->builder, dst64, ctx->i32, "");
1358 return result;
1359 }
1360
1361 static LLVMValueRef emit_imul_high(struct ac_llvm_context *ctx,
1362 LLVMValueRef src0, LLVMValueRef src1)
1363 {
1364 LLVMValueRef dst64, result;
1365 src0 = LLVMBuildSExt(ctx->builder, src0, ctx->i64, "");
1366 src1 = LLVMBuildSExt(ctx->builder, src1, ctx->i64, "");
1367
1368 dst64 = LLVMBuildMul(ctx->builder, src0, src1, "");
1369 dst64 = LLVMBuildAShr(ctx->builder, dst64, LLVMConstInt(ctx->i64, 32, false), "");
1370 result = LLVMBuildTrunc(ctx->builder, dst64, ctx->i32, "");
1371 return result;
1372 }
1373
1374 static LLVMValueRef emit_bitfield_extract(struct ac_llvm_context *ctx,
1375 bool is_signed,
1376 const LLVMValueRef srcs[3])
1377 {
1378 LLVMValueRef result;
1379 LLVMValueRef icond = LLVMBuildICmp(ctx->builder, LLVMIntEQ, srcs[2], LLVMConstInt(ctx->i32, 32, false), "");
1380
1381 result = ac_build_bfe(ctx, srcs[0], srcs[1], srcs[2], is_signed);
1382 result = LLVMBuildSelect(ctx->builder, icond, srcs[0], result, "");
1383 return result;
1384 }
1385
1386 static LLVMValueRef emit_bitfield_insert(struct ac_llvm_context *ctx,
1387 LLVMValueRef src0, LLVMValueRef src1,
1388 LLVMValueRef src2, LLVMValueRef src3)
1389 {
1390 LLVMValueRef bfi_args[3], result;
1391
1392 bfi_args[0] = LLVMBuildShl(ctx->builder,
1393 LLVMBuildSub(ctx->builder,
1394 LLVMBuildShl(ctx->builder,
1395 ctx->i32_1,
1396 src3, ""),
1397 ctx->i32_1, ""),
1398 src2, "");
1399 bfi_args[1] = LLVMBuildShl(ctx->builder, src1, src2, "");
1400 bfi_args[2] = src0;
1401
1402 LLVMValueRef icond = LLVMBuildICmp(ctx->builder, LLVMIntEQ, src3, LLVMConstInt(ctx->i32, 32, false), "");
1403
1404 /* Calculate:
1405 * (arg0 & arg1) | (~arg0 & arg2) = arg2 ^ (arg0 & (arg1 ^ arg2)
1406 * Use the right-hand side, which the LLVM backend can convert to V_BFI.
1407 */
1408 result = LLVMBuildXor(ctx->builder, bfi_args[2],
1409 LLVMBuildAnd(ctx->builder, bfi_args[0],
1410 LLVMBuildXor(ctx->builder, bfi_args[1], bfi_args[2], ""), ""), "");
1411
1412 result = LLVMBuildSelect(ctx->builder, icond, src1, result, "");
1413 return result;
1414 }
1415
1416 static LLVMValueRef emit_pack_half_2x16(struct ac_llvm_context *ctx,
1417 LLVMValueRef src0)
1418 {
1419 LLVMValueRef const16 = LLVMConstInt(ctx->i32, 16, false);
1420 int i;
1421 LLVMValueRef comp[2];
1422
1423 src0 = to_float(ctx, src0);
1424 comp[0] = LLVMBuildExtractElement(ctx->builder, src0, ctx->i32_0, "");
1425 comp[1] = LLVMBuildExtractElement(ctx->builder, src0, ctx->i32_1, "");
1426 for (i = 0; i < 2; i++) {
1427 comp[i] = LLVMBuildFPTrunc(ctx->builder, comp[i], ctx->f16, "");
1428 comp[i] = LLVMBuildBitCast(ctx->builder, comp[i], ctx->i16, "");
1429 comp[i] = LLVMBuildZExt(ctx->builder, comp[i], ctx->i32, "");
1430 }
1431
1432 comp[1] = LLVMBuildShl(ctx->builder, comp[1], const16, "");
1433 comp[0] = LLVMBuildOr(ctx->builder, comp[0], comp[1], "");
1434
1435 return comp[0];
1436 }
1437
1438 static LLVMValueRef emit_unpack_half_2x16(struct ac_llvm_context *ctx,
1439 LLVMValueRef src0)
1440 {
1441 LLVMValueRef const16 = LLVMConstInt(ctx->i32, 16, false);
1442 LLVMValueRef temps[2], result, val;
1443 int i;
1444
1445 for (i = 0; i < 2; i++) {
1446 val = i == 1 ? LLVMBuildLShr(ctx->builder, src0, const16, "") : src0;
1447 val = LLVMBuildTrunc(ctx->builder, val, ctx->i16, "");
1448 val = LLVMBuildBitCast(ctx->builder, val, ctx->f16, "");
1449 temps[i] = LLVMBuildFPExt(ctx->builder, val, ctx->f32, "");
1450 }
1451
1452 LLVMTypeRef v2f32 = LLVMVectorType(ctx->f32, 2);
1453 result = LLVMBuildInsertElement(ctx->builder, LLVMGetUndef(v2f32), temps[0],
1454 ctx->i32_0, "");
1455 result = LLVMBuildInsertElement(ctx->builder, result, temps[1],
1456 ctx->i32_1, "");
1457 return result;
1458 }
1459
1460 static LLVMValueRef emit_ddxy(struct nir_to_llvm_context *ctx,
1461 nir_op op,
1462 LLVMValueRef src0)
1463 {
1464 unsigned mask;
1465 int idx;
1466 LLVMValueRef result;
1467
1468 if (!ctx->lds && !ctx->has_ds_bpermute)
1469 ctx->lds = LLVMAddGlobalInAddressSpace(ctx->module,
1470 LLVMArrayType(ctx->i32, 64),
1471 "ddxy_lds", LOCAL_ADDR_SPACE);
1472
1473 if (op == nir_op_fddx_fine || op == nir_op_fddx)
1474 mask = AC_TID_MASK_LEFT;
1475 else if (op == nir_op_fddy_fine || op == nir_op_fddy)
1476 mask = AC_TID_MASK_TOP;
1477 else
1478 mask = AC_TID_MASK_TOP_LEFT;
1479
1480 /* for DDX we want to next X pixel, DDY next Y pixel. */
1481 if (op == nir_op_fddx_fine ||
1482 op == nir_op_fddx_coarse ||
1483 op == nir_op_fddx)
1484 idx = 1;
1485 else
1486 idx = 2;
1487
1488 result = ac_build_ddxy(&ctx->ac, ctx->has_ds_bpermute,
1489 mask, idx, ctx->lds,
1490 src0);
1491 return result;
1492 }
1493
1494 /*
1495 * this takes an I,J coordinate pair,
1496 * and works out the X and Y derivatives.
1497 * it returns DDX(I), DDX(J), DDY(I), DDY(J).
1498 */
1499 static LLVMValueRef emit_ddxy_interp(
1500 struct nir_to_llvm_context *ctx,
1501 LLVMValueRef interp_ij)
1502 {
1503 LLVMValueRef result[4], a;
1504 unsigned i;
1505
1506 for (i = 0; i < 2; i++) {
1507 a = LLVMBuildExtractElement(ctx->builder, interp_ij,
1508 LLVMConstInt(ctx->i32, i, false), "");
1509 result[i] = emit_ddxy(ctx, nir_op_fddx, a);
1510 result[2+i] = emit_ddxy(ctx, nir_op_fddy, a);
1511 }
1512 return ac_build_gather_values(&ctx->ac, result, 4);
1513 }
1514
1515 static void visit_alu(struct ac_nir_context *ctx, const nir_alu_instr *instr)
1516 {
1517 LLVMValueRef src[4], result = NULL;
1518 unsigned num_components = instr->dest.dest.ssa.num_components;
1519 unsigned src_components;
1520 LLVMTypeRef def_type = get_def_type(ctx, &instr->dest.dest.ssa);
1521
1522 assert(nir_op_infos[instr->op].num_inputs <= ARRAY_SIZE(src));
1523 switch (instr->op) {
1524 case nir_op_vec2:
1525 case nir_op_vec3:
1526 case nir_op_vec4:
1527 src_components = 1;
1528 break;
1529 case nir_op_pack_half_2x16:
1530 src_components = 2;
1531 break;
1532 case nir_op_unpack_half_2x16:
1533 src_components = 1;
1534 break;
1535 default:
1536 src_components = num_components;
1537 break;
1538 }
1539 for (unsigned i = 0; i < nir_op_infos[instr->op].num_inputs; i++)
1540 src[i] = get_alu_src(ctx, instr->src[i], src_components);
1541
1542 switch (instr->op) {
1543 case nir_op_fmov:
1544 case nir_op_imov:
1545 result = src[0];
1546 break;
1547 case nir_op_fneg:
1548 src[0] = to_float(&ctx->ac, src[0]);
1549 result = LLVMBuildFNeg(ctx->ac.builder, src[0], "");
1550 break;
1551 case nir_op_ineg:
1552 result = LLVMBuildNeg(ctx->ac.builder, src[0], "");
1553 break;
1554 case nir_op_inot:
1555 result = LLVMBuildNot(ctx->ac.builder, src[0], "");
1556 break;
1557 case nir_op_iadd:
1558 result = LLVMBuildAdd(ctx->ac.builder, src[0], src[1], "");
1559 break;
1560 case nir_op_fadd:
1561 src[0] = to_float(&ctx->ac, src[0]);
1562 src[1] = to_float(&ctx->ac, src[1]);
1563 result = LLVMBuildFAdd(ctx->ac.builder, src[0], src[1], "");
1564 break;
1565 case nir_op_fsub:
1566 src[0] = to_float(&ctx->ac, src[0]);
1567 src[1] = to_float(&ctx->ac, src[1]);
1568 result = LLVMBuildFSub(ctx->ac.builder, src[0], src[1], "");
1569 break;
1570 case nir_op_isub:
1571 result = LLVMBuildSub(ctx->ac.builder, src[0], src[1], "");
1572 break;
1573 case nir_op_imul:
1574 result = LLVMBuildMul(ctx->ac.builder, src[0], src[1], "");
1575 break;
1576 case nir_op_imod:
1577 result = LLVMBuildSRem(ctx->ac.builder, src[0], src[1], "");
1578 break;
1579 case nir_op_umod:
1580 result = LLVMBuildURem(ctx->ac.builder, src[0], src[1], "");
1581 break;
1582 case nir_op_fmod:
1583 src[0] = to_float(&ctx->ac, src[0]);
1584 src[1] = to_float(&ctx->ac, src[1]);
1585 result = ac_build_fdiv(&ctx->ac, src[0], src[1]);
1586 result = emit_intrin_1f_param(&ctx->ac, "llvm.floor",
1587 to_float_type(&ctx->ac, def_type), result);
1588 result = LLVMBuildFMul(ctx->ac.builder, src[1] , result, "");
1589 result = LLVMBuildFSub(ctx->ac.builder, src[0], result, "");
1590 break;
1591 case nir_op_frem:
1592 src[0] = to_float(&ctx->ac, src[0]);
1593 src[1] = to_float(&ctx->ac, src[1]);
1594 result = LLVMBuildFRem(ctx->ac.builder, src[0], src[1], "");
1595 break;
1596 case nir_op_irem:
1597 result = LLVMBuildSRem(ctx->ac.builder, src[0], src[1], "");
1598 break;
1599 case nir_op_idiv:
1600 result = LLVMBuildSDiv(ctx->ac.builder, src[0], src[1], "");
1601 break;
1602 case nir_op_udiv:
1603 result = LLVMBuildUDiv(ctx->ac.builder, src[0], src[1], "");
1604 break;
1605 case nir_op_fmul:
1606 src[0] = to_float(&ctx->ac, src[0]);
1607 src[1] = to_float(&ctx->ac, src[1]);
1608 result = LLVMBuildFMul(ctx->ac.builder, src[0], src[1], "");
1609 break;
1610 case nir_op_fdiv:
1611 src[0] = to_float(&ctx->ac, src[0]);
1612 src[1] = to_float(&ctx->ac, src[1]);
1613 result = ac_build_fdiv(&ctx->ac, src[0], src[1]);
1614 break;
1615 case nir_op_frcp:
1616 src[0] = to_float(&ctx->ac, src[0]);
1617 result = ac_build_fdiv(&ctx->ac, ctx->ac.f32_1, src[0]);
1618 break;
1619 case nir_op_iand:
1620 result = LLVMBuildAnd(ctx->ac.builder, src[0], src[1], "");
1621 break;
1622 case nir_op_ior:
1623 result = LLVMBuildOr(ctx->ac.builder, src[0], src[1], "");
1624 break;
1625 case nir_op_ixor:
1626 result = LLVMBuildXor(ctx->ac.builder, src[0], src[1], "");
1627 break;
1628 case nir_op_ishl:
1629 result = LLVMBuildShl(ctx->ac.builder, src[0],
1630 LLVMBuildZExt(ctx->ac.builder, src[1],
1631 LLVMTypeOf(src[0]), ""),
1632 "");
1633 break;
1634 case nir_op_ishr:
1635 result = LLVMBuildAShr(ctx->ac.builder, src[0],
1636 LLVMBuildZExt(ctx->ac.builder, src[1],
1637 LLVMTypeOf(src[0]), ""),
1638 "");
1639 break;
1640 case nir_op_ushr:
1641 result = LLVMBuildLShr(ctx->ac.builder, src[0],
1642 LLVMBuildZExt(ctx->ac.builder, src[1],
1643 LLVMTypeOf(src[0]), ""),
1644 "");
1645 break;
1646 case nir_op_ilt:
1647 result = emit_int_cmp(&ctx->ac, LLVMIntSLT, src[0], src[1]);
1648 break;
1649 case nir_op_ine:
1650 result = emit_int_cmp(&ctx->ac, LLVMIntNE, src[0], src[1]);
1651 break;
1652 case nir_op_ieq:
1653 result = emit_int_cmp(&ctx->ac, LLVMIntEQ, src[0], src[1]);
1654 break;
1655 case nir_op_ige:
1656 result = emit_int_cmp(&ctx->ac, LLVMIntSGE, src[0], src[1]);
1657 break;
1658 case nir_op_ult:
1659 result = emit_int_cmp(&ctx->ac, LLVMIntULT, src[0], src[1]);
1660 break;
1661 case nir_op_uge:
1662 result = emit_int_cmp(&ctx->ac, LLVMIntUGE, src[0], src[1]);
1663 break;
1664 case nir_op_feq:
1665 result = emit_float_cmp(&ctx->ac, LLVMRealUEQ, src[0], src[1]);
1666 break;
1667 case nir_op_fne:
1668 result = emit_float_cmp(&ctx->ac, LLVMRealUNE, src[0], src[1]);
1669 break;
1670 case nir_op_flt:
1671 result = emit_float_cmp(&ctx->ac, LLVMRealULT, src[0], src[1]);
1672 break;
1673 case nir_op_fge:
1674 result = emit_float_cmp(&ctx->ac, LLVMRealUGE, src[0], src[1]);
1675 break;
1676 case nir_op_fabs:
1677 result = emit_intrin_1f_param(&ctx->ac, "llvm.fabs",
1678 to_float_type(&ctx->ac, def_type), src[0]);
1679 break;
1680 case nir_op_iabs:
1681 result = emit_iabs(&ctx->ac, src[0]);
1682 break;
1683 case nir_op_imax:
1684 result = emit_minmax_int(&ctx->ac, LLVMIntSGT, src[0], src[1]);
1685 break;
1686 case nir_op_imin:
1687 result = emit_minmax_int(&ctx->ac, LLVMIntSLT, src[0], src[1]);
1688 break;
1689 case nir_op_umax:
1690 result = emit_minmax_int(&ctx->ac, LLVMIntUGT, src[0], src[1]);
1691 break;
1692 case nir_op_umin:
1693 result = emit_minmax_int(&ctx->ac, LLVMIntULT, src[0], src[1]);
1694 break;
1695 case nir_op_isign:
1696 result = emit_isign(&ctx->ac, src[0]);
1697 break;
1698 case nir_op_fsign:
1699 src[0] = to_float(&ctx->ac, src[0]);
1700 result = emit_fsign(&ctx->ac, src[0]);
1701 break;
1702 case nir_op_ffloor:
1703 result = emit_intrin_1f_param(&ctx->ac, "llvm.floor",
1704 to_float_type(&ctx->ac, def_type), src[0]);
1705 break;
1706 case nir_op_ftrunc:
1707 result = emit_intrin_1f_param(&ctx->ac, "llvm.trunc",
1708 to_float_type(&ctx->ac, def_type), src[0]);
1709 break;
1710 case nir_op_fceil:
1711 result = emit_intrin_1f_param(&ctx->ac, "llvm.ceil",
1712 to_float_type(&ctx->ac, def_type), src[0]);
1713 break;
1714 case nir_op_fround_even:
1715 result = emit_intrin_1f_param(&ctx->ac, "llvm.rint",
1716 to_float_type(&ctx->ac, def_type),src[0]);
1717 break;
1718 case nir_op_ffract:
1719 result = emit_ffract(&ctx->ac, src[0]);
1720 break;
1721 case nir_op_fsin:
1722 result = emit_intrin_1f_param(&ctx->ac, "llvm.sin",
1723 to_float_type(&ctx->ac, def_type), src[0]);
1724 break;
1725 case nir_op_fcos:
1726 result = emit_intrin_1f_param(&ctx->ac, "llvm.cos",
1727 to_float_type(&ctx->ac, def_type), src[0]);
1728 break;
1729 case nir_op_fsqrt:
1730 result = emit_intrin_1f_param(&ctx->ac, "llvm.sqrt",
1731 to_float_type(&ctx->ac, def_type), src[0]);
1732 break;
1733 case nir_op_fexp2:
1734 result = emit_intrin_1f_param(&ctx->ac, "llvm.exp2",
1735 to_float_type(&ctx->ac, def_type), src[0]);
1736 break;
1737 case nir_op_flog2:
1738 result = emit_intrin_1f_param(&ctx->ac, "llvm.log2",
1739 to_float_type(&ctx->ac, def_type), src[0]);
1740 break;
1741 case nir_op_frsq:
1742 result = emit_intrin_1f_param(&ctx->ac, "llvm.sqrt",
1743 to_float_type(&ctx->ac, def_type), src[0]);
1744 result = ac_build_fdiv(&ctx->ac, ctx->ac.f32_1, result);
1745 break;
1746 case nir_op_fpow:
1747 result = emit_intrin_2f_param(&ctx->ac, "llvm.pow",
1748 to_float_type(&ctx->ac, def_type), src[0], src[1]);
1749 break;
1750 case nir_op_fmax:
1751 result = emit_intrin_2f_param(&ctx->ac, "llvm.maxnum",
1752 to_float_type(&ctx->ac, def_type), src[0], src[1]);
1753 if (instr->dest.dest.ssa.bit_size == 32)
1754 result = emit_intrin_1f_param(&ctx->ac, "llvm.canonicalize",
1755 to_float_type(&ctx->ac, def_type),
1756 result);
1757 break;
1758 case nir_op_fmin:
1759 result = emit_intrin_2f_param(&ctx->ac, "llvm.minnum",
1760 to_float_type(&ctx->ac, def_type), src[0], src[1]);
1761 if (instr->dest.dest.ssa.bit_size == 32)
1762 result = emit_intrin_1f_param(&ctx->ac, "llvm.canonicalize",
1763 to_float_type(&ctx->ac, def_type),
1764 result);
1765 break;
1766 case nir_op_ffma:
1767 result = emit_intrin_3f_param(&ctx->ac, "llvm.fma",
1768 to_float_type(&ctx->ac, def_type), src[0], src[1], src[2]);
1769 break;
1770 case nir_op_ibitfield_extract:
1771 result = emit_bitfield_extract(&ctx->ac, true, src);
1772 break;
1773 case nir_op_ubitfield_extract:
1774 result = emit_bitfield_extract(&ctx->ac, false, src);
1775 break;
1776 case nir_op_bitfield_insert:
1777 result = emit_bitfield_insert(&ctx->ac, src[0], src[1], src[2], src[3]);
1778 break;
1779 case nir_op_bitfield_reverse:
1780 result = ac_build_intrinsic(&ctx->ac, "llvm.bitreverse.i32", ctx->ac.i32, src, 1, AC_FUNC_ATTR_READNONE);
1781 break;
1782 case nir_op_bit_count:
1783 result = ac_build_intrinsic(&ctx->ac, "llvm.ctpop.i32", ctx->ac.i32, src, 1, AC_FUNC_ATTR_READNONE);
1784 break;
1785 case nir_op_vec2:
1786 case nir_op_vec3:
1787 case nir_op_vec4:
1788 for (unsigned i = 0; i < nir_op_infos[instr->op].num_inputs; i++)
1789 src[i] = to_integer(&ctx->ac, src[i]);
1790 result = ac_build_gather_values(&ctx->ac, src, num_components);
1791 break;
1792 case nir_op_f2i32:
1793 case nir_op_f2i64:
1794 src[0] = to_float(&ctx->ac, src[0]);
1795 result = LLVMBuildFPToSI(ctx->ac.builder, src[0], def_type, "");
1796 break;
1797 case nir_op_f2u32:
1798 case nir_op_f2u64:
1799 src[0] = to_float(&ctx->ac, src[0]);
1800 result = LLVMBuildFPToUI(ctx->ac.builder, src[0], def_type, "");
1801 break;
1802 case nir_op_i2f32:
1803 case nir_op_i2f64:
1804 result = LLVMBuildSIToFP(ctx->ac.builder, src[0], to_float_type(&ctx->ac, def_type), "");
1805 break;
1806 case nir_op_u2f32:
1807 case nir_op_u2f64:
1808 result = LLVMBuildUIToFP(ctx->ac.builder, src[0], to_float_type(&ctx->ac, def_type), "");
1809 break;
1810 case nir_op_f2f64:
1811 result = LLVMBuildFPExt(ctx->ac.builder, src[0], to_float_type(&ctx->ac, def_type), "");
1812 break;
1813 case nir_op_f2f32:
1814 result = LLVMBuildFPTrunc(ctx->ac.builder, src[0], to_float_type(&ctx->ac, def_type), "");
1815 break;
1816 case nir_op_u2u32:
1817 case nir_op_u2u64:
1818 if (get_elem_bits(&ctx->ac, LLVMTypeOf(src[0])) < get_elem_bits(&ctx->ac, def_type))
1819 result = LLVMBuildZExt(ctx->ac.builder, src[0], def_type, "");
1820 else
1821 result = LLVMBuildTrunc(ctx->ac.builder, src[0], def_type, "");
1822 break;
1823 case nir_op_i2i32:
1824 case nir_op_i2i64:
1825 if (get_elem_bits(&ctx->ac, LLVMTypeOf(src[0])) < get_elem_bits(&ctx->ac, def_type))
1826 result = LLVMBuildSExt(ctx->ac.builder, src[0], def_type, "");
1827 else
1828 result = LLVMBuildTrunc(ctx->ac.builder, src[0], def_type, "");
1829 break;
1830 case nir_op_bcsel:
1831 result = emit_bcsel(&ctx->ac, src[0], src[1], src[2]);
1832 break;
1833 case nir_op_find_lsb:
1834 result = emit_find_lsb(&ctx->ac, src[0]);
1835 break;
1836 case nir_op_ufind_msb:
1837 result = emit_ufind_msb(&ctx->ac, src[0]);
1838 break;
1839 case nir_op_ifind_msb:
1840 result = emit_ifind_msb(&ctx->ac, src[0]);
1841 break;
1842 case nir_op_uadd_carry:
1843 result = emit_uint_carry(&ctx->ac, "llvm.uadd.with.overflow.i32", src[0], src[1]);
1844 break;
1845 case nir_op_usub_borrow:
1846 result = emit_uint_carry(&ctx->ac, "llvm.usub.with.overflow.i32", src[0], src[1]);
1847 break;
1848 case nir_op_b2f:
1849 result = emit_b2f(&ctx->ac, src[0]);
1850 break;
1851 case nir_op_f2b:
1852 result = emit_f2b(&ctx->ac, src[0]);
1853 break;
1854 case nir_op_b2i:
1855 result = emit_b2i(&ctx->ac, src[0]);
1856 break;
1857 case nir_op_i2b:
1858 result = emit_i2b(&ctx->ac, src[0]);
1859 break;
1860 case nir_op_fquantize2f16:
1861 result = emit_f2f16(ctx->nctx, src[0]);
1862 break;
1863 case nir_op_umul_high:
1864 result = emit_umul_high(&ctx->ac, src[0], src[1]);
1865 break;
1866 case nir_op_imul_high:
1867 result = emit_imul_high(&ctx->ac, src[0], src[1]);
1868 break;
1869 case nir_op_pack_half_2x16:
1870 result = emit_pack_half_2x16(&ctx->ac, src[0]);
1871 break;
1872 case nir_op_unpack_half_2x16:
1873 result = emit_unpack_half_2x16(&ctx->ac, src[0]);
1874 break;
1875 case nir_op_fddx:
1876 case nir_op_fddy:
1877 case nir_op_fddx_fine:
1878 case nir_op_fddy_fine:
1879 case nir_op_fddx_coarse:
1880 case nir_op_fddy_coarse:
1881 result = emit_ddxy(ctx->nctx, instr->op, src[0]);
1882 break;
1883
1884 case nir_op_unpack_64_2x32_split_x: {
1885 assert(instr->src[0].src.ssa->num_components == 1);
1886 LLVMValueRef tmp = LLVMBuildBitCast(ctx->ac.builder, src[0],
1887 LLVMVectorType(ctx->ac.i32, 2),
1888 "");
1889 result = LLVMBuildExtractElement(ctx->ac.builder, tmp,
1890 ctx->ac.i32_0, "");
1891 break;
1892 }
1893
1894 case nir_op_unpack_64_2x32_split_y: {
1895 assert(instr->src[0].src.ssa->num_components == 1);
1896 LLVMValueRef tmp = LLVMBuildBitCast(ctx->ac.builder, src[0],
1897 LLVMVectorType(ctx->ac.i32, 2),
1898 "");
1899 result = LLVMBuildExtractElement(ctx->ac.builder, tmp,
1900 ctx->ac.i32_0, "");
1901 break;
1902 }
1903
1904 case nir_op_pack_64_2x32_split: {
1905 LLVMValueRef tmp = LLVMGetUndef(LLVMVectorType(ctx->ac.i32, 2));
1906 tmp = LLVMBuildInsertElement(ctx->ac.builder, tmp,
1907 src[0], ctx->ac.i32_0, "");
1908 tmp = LLVMBuildInsertElement(ctx->ac.builder, tmp,
1909 src[1], ctx->ac.i32_1, "");
1910 result = LLVMBuildBitCast(ctx->ac.builder, tmp, ctx->ac.i64, "");
1911 break;
1912 }
1913
1914 default:
1915 fprintf(stderr, "Unknown NIR alu instr: ");
1916 nir_print_instr(&instr->instr, stderr);
1917 fprintf(stderr, "\n");
1918 abort();
1919 }
1920
1921 if (result) {
1922 assert(instr->dest.dest.is_ssa);
1923 result = to_integer(&ctx->ac, result);
1924 _mesa_hash_table_insert(ctx->defs, &instr->dest.dest.ssa,
1925 result);
1926 }
1927 }
1928
1929 static void visit_load_const(struct ac_nir_context *ctx,
1930 const nir_load_const_instr *instr)
1931 {
1932 LLVMValueRef values[4], value = NULL;
1933 LLVMTypeRef element_type =
1934 LLVMIntTypeInContext(ctx->ac.context, instr->def.bit_size);
1935
1936 for (unsigned i = 0; i < instr->def.num_components; ++i) {
1937 switch (instr->def.bit_size) {
1938 case 32:
1939 values[i] = LLVMConstInt(element_type,
1940 instr->value.u32[i], false);
1941 break;
1942 case 64:
1943 values[i] = LLVMConstInt(element_type,
1944 instr->value.u64[i], false);
1945 break;
1946 default:
1947 fprintf(stderr,
1948 "unsupported nir load_const bit_size: %d\n",
1949 instr->def.bit_size);
1950 abort();
1951 }
1952 }
1953 if (instr->def.num_components > 1) {
1954 value = LLVMConstVector(values, instr->def.num_components);
1955 } else
1956 value = values[0];
1957
1958 _mesa_hash_table_insert(ctx->defs, &instr->def, value);
1959 }
1960
1961 static LLVMValueRef cast_ptr(struct nir_to_llvm_context *ctx, LLVMValueRef ptr,
1962 LLVMTypeRef type)
1963 {
1964 int addr_space = LLVMGetPointerAddressSpace(LLVMTypeOf(ptr));
1965 return LLVMBuildBitCast(ctx->builder, ptr,
1966 LLVMPointerType(type, addr_space), "");
1967 }
1968
1969 static LLVMValueRef
1970 get_buffer_size(struct nir_to_llvm_context *ctx, LLVMValueRef descriptor, bool in_elements)
1971 {
1972 LLVMValueRef size =
1973 LLVMBuildExtractElement(ctx->builder, descriptor,
1974 LLVMConstInt(ctx->i32, 2, false), "");
1975
1976 /* VI only */
1977 if (ctx->options->chip_class >= VI && in_elements) {
1978 /* On VI, the descriptor contains the size in bytes,
1979 * but TXQ must return the size in elements.
1980 * The stride is always non-zero for resources using TXQ.
1981 */
1982 LLVMValueRef stride =
1983 LLVMBuildExtractElement(ctx->builder, descriptor,
1984 LLVMConstInt(ctx->i32, 1, false), "");
1985 stride = LLVMBuildLShr(ctx->builder, stride,
1986 LLVMConstInt(ctx->i32, 16, false), "");
1987 stride = LLVMBuildAnd(ctx->builder, stride,
1988 LLVMConstInt(ctx->i32, 0x3fff, false), "");
1989
1990 size = LLVMBuildUDiv(ctx->builder, size, stride, "");
1991 }
1992 return size;
1993 }
1994
1995 /**
1996 * Given the i32 or vNi32 \p type, generate the textual name (e.g. for use with
1997 * intrinsic names).
1998 */
1999 static void build_int_type_name(
2000 LLVMTypeRef type,
2001 char *buf, unsigned bufsize)
2002 {
2003 assert(bufsize >= 6);
2004
2005 if (LLVMGetTypeKind(type) == LLVMVectorTypeKind)
2006 snprintf(buf, bufsize, "v%ui32",
2007 LLVMGetVectorSize(type));
2008 else
2009 strcpy(buf, "i32");
2010 }
2011
2012 static LLVMValueRef radv_lower_gather4_integer(struct nir_to_llvm_context *ctx,
2013 struct ac_image_args *args,
2014 const nir_tex_instr *instr)
2015 {
2016 enum glsl_base_type stype = glsl_get_sampler_result_type(instr->texture->var->type);
2017 LLVMValueRef coord = args->addr;
2018 LLVMValueRef half_texel[2];
2019 LLVMValueRef compare_cube_wa;
2020 LLVMValueRef result;
2021 int c;
2022 unsigned coord_vgpr_index = (unsigned)args->offset + (unsigned)args->compare;
2023
2024 //TODO Rect
2025 {
2026 struct ac_image_args txq_args = { 0 };
2027
2028 txq_args.da = instr->is_array || instr->sampler_dim == GLSL_SAMPLER_DIM_CUBE;
2029 txq_args.opcode = ac_image_get_resinfo;
2030 txq_args.dmask = 0xf;
2031 txq_args.addr = ctx->i32zero;
2032 txq_args.resource = args->resource;
2033 LLVMValueRef size = ac_build_image_opcode(&ctx->ac, &txq_args);
2034
2035 for (c = 0; c < 2; c++) {
2036 half_texel[c] = LLVMBuildExtractElement(ctx->builder, size,
2037 LLVMConstInt(ctx->i32, c, false), "");
2038 half_texel[c] = LLVMBuildUIToFP(ctx->builder, half_texel[c], ctx->f32, "");
2039 half_texel[c] = ac_build_fdiv(&ctx->ac, ctx->f32one, half_texel[c]);
2040 half_texel[c] = LLVMBuildFMul(ctx->builder, half_texel[c],
2041 LLVMConstReal(ctx->f32, -0.5), "");
2042 }
2043 }
2044
2045 LLVMValueRef orig_coords = args->addr;
2046
2047 for (c = 0; c < 2; c++) {
2048 LLVMValueRef tmp;
2049 LLVMValueRef index = LLVMConstInt(ctx->i32, coord_vgpr_index + c, 0);
2050 tmp = LLVMBuildExtractElement(ctx->builder, coord, index, "");
2051 tmp = LLVMBuildBitCast(ctx->builder, tmp, ctx->f32, "");
2052 tmp = LLVMBuildFAdd(ctx->builder, tmp, half_texel[c], "");
2053 tmp = LLVMBuildBitCast(ctx->builder, tmp, ctx->i32, "");
2054 coord = LLVMBuildInsertElement(ctx->builder, coord, tmp, index, "");
2055 }
2056
2057
2058 /*
2059 * Apparantly cube has issue with integer types that the workaround doesn't solve,
2060 * so this tests if the format is 8_8_8_8 and an integer type do an alternate
2061 * workaround by sampling using a scaled type and converting.
2062 * This is taken from amdgpu-pro shaders.
2063 */
2064 /* NOTE this produces some ugly code compared to amdgpu-pro,
2065 * LLVM ends up dumping SGPRs into VGPRs to deal with the compare/select,
2066 * and then reads them back. -pro generates two selects,
2067 * one s_cmp for the descriptor rewriting
2068 * one v_cmp for the coordinate and result changes.
2069 */
2070 if (instr->sampler_dim == GLSL_SAMPLER_DIM_CUBE) {
2071 LLVMValueRef tmp, tmp2;
2072
2073 /* workaround 8/8/8/8 uint/sint cube gather bug */
2074 /* first detect it then change to a scaled read and f2i */
2075 tmp = LLVMBuildExtractElement(ctx->builder, args->resource, ctx->i32one, "");
2076 tmp2 = tmp;
2077
2078 /* extract the DATA_FORMAT */
2079 tmp = ac_build_bfe(&ctx->ac, tmp, LLVMConstInt(ctx->i32, 20, false),
2080 LLVMConstInt(ctx->i32, 6, false), false);
2081
2082 /* is the DATA_FORMAT == 8_8_8_8 */
2083 compare_cube_wa = LLVMBuildICmp(ctx->builder, LLVMIntEQ, tmp, LLVMConstInt(ctx->i32, V_008F14_IMG_DATA_FORMAT_8_8_8_8, false), "");
2084
2085 if (stype == GLSL_TYPE_UINT)
2086 /* Create a NUM FORMAT - 0x2 or 0x4 - USCALED or UINT */
2087 tmp = LLVMBuildSelect(ctx->builder, compare_cube_wa, LLVMConstInt(ctx->i32, 0x8000000, false),
2088 LLVMConstInt(ctx->i32, 0x10000000, false), "");
2089 else
2090 /* Create a NUM FORMAT - 0x3 or 0x5 - SSCALED or SINT */
2091 tmp = LLVMBuildSelect(ctx->builder, compare_cube_wa, LLVMConstInt(ctx->i32, 0xc000000, false),
2092 LLVMConstInt(ctx->i32, 0x14000000, false), "");
2093
2094 /* replace the NUM FORMAT in the descriptor */
2095 tmp2 = LLVMBuildAnd(ctx->builder, tmp2, LLVMConstInt(ctx->i32, C_008F14_NUM_FORMAT_GFX6, false), "");
2096 tmp2 = LLVMBuildOr(ctx->builder, tmp2, tmp, "");
2097
2098 args->resource = LLVMBuildInsertElement(ctx->builder, args->resource, tmp2, ctx->i32one, "");
2099
2100 /* don't modify the coordinates for this case */
2101 coord = LLVMBuildSelect(ctx->builder, compare_cube_wa, orig_coords, coord, "");
2102 }
2103 args->addr = coord;
2104 result = ac_build_image_opcode(&ctx->ac, args);
2105
2106 if (instr->sampler_dim == GLSL_SAMPLER_DIM_CUBE) {
2107 LLVMValueRef tmp, tmp2;
2108
2109 /* if the cube workaround is in place, f2i the result. */
2110 for (c = 0; c < 4; c++) {
2111 tmp = LLVMBuildExtractElement(ctx->builder, result, LLVMConstInt(ctx->i32, c, false), "");
2112 if (stype == GLSL_TYPE_UINT)
2113 tmp2 = LLVMBuildFPToUI(ctx->builder, tmp, ctx->i32, "");
2114 else
2115 tmp2 = LLVMBuildFPToSI(ctx->builder, tmp, ctx->i32, "");
2116 tmp = LLVMBuildBitCast(ctx->builder, tmp, ctx->i32, "");
2117 tmp2 = LLVMBuildBitCast(ctx->builder, tmp2, ctx->i32, "");
2118 tmp = LLVMBuildSelect(ctx->builder, compare_cube_wa, tmp2, tmp, "");
2119 tmp = LLVMBuildBitCast(ctx->builder, tmp, ctx->f32, "");
2120 result = LLVMBuildInsertElement(ctx->builder, result, tmp, LLVMConstInt(ctx->i32, c, false), "");
2121 }
2122 }
2123 return result;
2124 }
2125
2126 static LLVMValueRef build_tex_intrinsic(struct nir_to_llvm_context *ctx,
2127 const nir_tex_instr *instr,
2128 bool lod_is_zero,
2129 struct ac_image_args *args)
2130 {
2131 if (instr->sampler_dim == GLSL_SAMPLER_DIM_BUF) {
2132 return ac_build_buffer_load_format(&ctx->ac,
2133 args->resource,
2134 args->addr,
2135 LLVMConstInt(ctx->i32, 0, false),
2136 true);
2137 }
2138
2139 args->opcode = ac_image_sample;
2140 args->compare = instr->is_shadow;
2141
2142 switch (instr->op) {
2143 case nir_texop_txf:
2144 case nir_texop_txf_ms:
2145 case nir_texop_samples_identical:
2146 args->opcode = instr->sampler_dim == GLSL_SAMPLER_DIM_MS ? ac_image_load : ac_image_load_mip;
2147 args->compare = false;
2148 args->offset = false;
2149 break;
2150 case nir_texop_txb:
2151 args->bias = true;
2152 break;
2153 case nir_texop_txl:
2154 if (lod_is_zero)
2155 args->level_zero = true;
2156 else
2157 args->lod = true;
2158 break;
2159 case nir_texop_txs:
2160 case nir_texop_query_levels:
2161 args->opcode = ac_image_get_resinfo;
2162 break;
2163 case nir_texop_tex:
2164 if (ctx->stage != MESA_SHADER_FRAGMENT)
2165 args->level_zero = true;
2166 break;
2167 case nir_texop_txd:
2168 args->deriv = true;
2169 break;
2170 case nir_texop_tg4:
2171 args->opcode = ac_image_gather4;
2172 args->level_zero = true;
2173 break;
2174 case nir_texop_lod:
2175 args->opcode = ac_image_get_lod;
2176 args->compare = false;
2177 args->offset = false;
2178 break;
2179 default:
2180 break;
2181 }
2182
2183 if (instr->op == nir_texop_tg4) {
2184 enum glsl_base_type stype = glsl_get_sampler_result_type(instr->texture->var->type);
2185 if (stype == GLSL_TYPE_UINT || stype == GLSL_TYPE_INT) {
2186 return radv_lower_gather4_integer(ctx, args, instr);
2187 }
2188 }
2189 return ac_build_image_opcode(&ctx->ac, args);
2190 }
2191
2192 static LLVMValueRef visit_vulkan_resource_index(struct nir_to_llvm_context *ctx,
2193 nir_intrinsic_instr *instr)
2194 {
2195 LLVMValueRef index = get_src(ctx->nir, instr->src[0]);
2196 unsigned desc_set = nir_intrinsic_desc_set(instr);
2197 unsigned binding = nir_intrinsic_binding(instr);
2198 LLVMValueRef desc_ptr = ctx->descriptor_sets[desc_set];
2199 struct radv_pipeline_layout *pipeline_layout = ctx->options->layout;
2200 struct radv_descriptor_set_layout *layout = pipeline_layout->set[desc_set].layout;
2201 unsigned base_offset = layout->binding[binding].offset;
2202 LLVMValueRef offset, stride;
2203
2204 if (layout->binding[binding].type == VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER_DYNAMIC ||
2205 layout->binding[binding].type == VK_DESCRIPTOR_TYPE_STORAGE_BUFFER_DYNAMIC) {
2206 unsigned idx = pipeline_layout->set[desc_set].dynamic_offset_start +
2207 layout->binding[binding].dynamic_offset_offset;
2208 desc_ptr = ctx->push_constants;
2209 base_offset = pipeline_layout->push_constant_size + 16 * idx;
2210 stride = LLVMConstInt(ctx->i32, 16, false);
2211 } else
2212 stride = LLVMConstInt(ctx->i32, layout->binding[binding].size, false);
2213
2214 offset = LLVMConstInt(ctx->i32, base_offset, false);
2215 index = LLVMBuildMul(ctx->builder, index, stride, "");
2216 offset = LLVMBuildAdd(ctx->builder, offset, index, "");
2217
2218 desc_ptr = ac_build_gep0(&ctx->ac, desc_ptr, offset);
2219 desc_ptr = cast_ptr(ctx, desc_ptr, ctx->v4i32);
2220 LLVMSetMetadata(desc_ptr, ctx->uniform_md_kind, ctx->empty_md);
2221
2222 return LLVMBuildLoad(ctx->builder, desc_ptr, "");
2223 }
2224
2225 static LLVMValueRef visit_load_push_constant(struct nir_to_llvm_context *ctx,
2226 nir_intrinsic_instr *instr)
2227 {
2228 LLVMValueRef ptr, addr;
2229
2230 addr = LLVMConstInt(ctx->i32, nir_intrinsic_base(instr), 0);
2231 addr = LLVMBuildAdd(ctx->builder, addr, get_src(ctx->nir, instr->src[0]), "");
2232
2233 ptr = ac_build_gep0(&ctx->ac, ctx->push_constants, addr);
2234 ptr = cast_ptr(ctx, ptr, get_def_type(ctx->nir, &instr->dest.ssa));
2235
2236 return LLVMBuildLoad(ctx->builder, ptr, "");
2237 }
2238
2239 static LLVMValueRef visit_get_buffer_size(struct nir_to_llvm_context *ctx,
2240 const nir_intrinsic_instr *instr)
2241 {
2242 LLVMValueRef desc = get_src(ctx->nir, instr->src[0]);
2243
2244 return get_buffer_size(ctx, desc, false);
2245 }
2246 static void visit_store_ssbo(struct nir_to_llvm_context *ctx,
2247 nir_intrinsic_instr *instr)
2248 {
2249 const char *store_name;
2250 LLVMValueRef src_data = get_src(ctx->nir, instr->src[0]);
2251 LLVMTypeRef data_type = ctx->f32;
2252 int elem_size_mult = get_elem_bits(&ctx->ac, LLVMTypeOf(src_data)) / 32;
2253 int components_32bit = elem_size_mult * instr->num_components;
2254 unsigned writemask = nir_intrinsic_write_mask(instr);
2255 LLVMValueRef base_data, base_offset;
2256 LLVMValueRef params[6];
2257
2258 if (ctx->stage == MESA_SHADER_FRAGMENT)
2259 ctx->shader_info->fs.writes_memory = true;
2260
2261 params[1] = get_src(ctx->nir, instr->src[1]);
2262 params[2] = LLVMConstInt(ctx->i32, 0, false); /* vindex */
2263 params[4] = ctx->i1false; /* glc */
2264 params[5] = ctx->i1false; /* slc */
2265
2266 if (components_32bit > 1)
2267 data_type = LLVMVectorType(ctx->f32, components_32bit);
2268
2269 base_data = to_float(&ctx->ac, src_data);
2270 base_data = trim_vector(ctx, base_data, instr->num_components);
2271 base_data = LLVMBuildBitCast(ctx->builder, base_data,
2272 data_type, "");
2273 base_offset = get_src(ctx->nir, instr->src[2]); /* voffset */
2274 while (writemask) {
2275 int start, count;
2276 LLVMValueRef data;
2277 LLVMValueRef offset;
2278 LLVMValueRef tmp;
2279 u_bit_scan_consecutive_range(&writemask, &start, &count);
2280
2281 /* Due to an LLVM limitation, split 3-element writes
2282 * into a 2-element and a 1-element write. */
2283 if (count == 3) {
2284 writemask |= 1 << (start + 2);
2285 count = 2;
2286 }
2287
2288 start *= elem_size_mult;
2289 count *= elem_size_mult;
2290
2291 if (count > 4) {
2292 writemask |= ((1u << (count - 4)) - 1u) << (start + 4);
2293 count = 4;
2294 }
2295
2296 if (count == 4) {
2297 store_name = "llvm.amdgcn.buffer.store.v4f32";
2298 data = base_data;
2299 } else if (count == 2) {
2300 tmp = LLVMBuildExtractElement(ctx->builder,
2301 base_data, LLVMConstInt(ctx->i32, start, false), "");
2302 data = LLVMBuildInsertElement(ctx->builder, LLVMGetUndef(ctx->v2f32), tmp,
2303 ctx->i32zero, "");
2304
2305 tmp = LLVMBuildExtractElement(ctx->builder,
2306 base_data, LLVMConstInt(ctx->i32, start + 1, false), "");
2307 data = LLVMBuildInsertElement(ctx->builder, data, tmp,
2308 ctx->i32one, "");
2309 store_name = "llvm.amdgcn.buffer.store.v2f32";
2310
2311 } else {
2312 assert(count == 1);
2313 if (get_llvm_num_components(base_data) > 1)
2314 data = LLVMBuildExtractElement(ctx->builder, base_data,
2315 LLVMConstInt(ctx->i32, start, false), "");
2316 else
2317 data = base_data;
2318 store_name = "llvm.amdgcn.buffer.store.f32";
2319 }
2320
2321 offset = base_offset;
2322 if (start != 0) {
2323 offset = LLVMBuildAdd(ctx->builder, offset, LLVMConstInt(ctx->i32, start * 4, false), "");
2324 }
2325 params[0] = data;
2326 params[3] = offset;
2327 ac_build_intrinsic(&ctx->ac, store_name,
2328 ctx->voidt, params, 6, 0);
2329 }
2330 }
2331
2332 static LLVMValueRef visit_atomic_ssbo(struct nir_to_llvm_context *ctx,
2333 const nir_intrinsic_instr *instr)
2334 {
2335 const char *name;
2336 LLVMValueRef params[6];
2337 int arg_count = 0;
2338 if (ctx->stage == MESA_SHADER_FRAGMENT)
2339 ctx->shader_info->fs.writes_memory = true;
2340
2341 if (instr->intrinsic == nir_intrinsic_ssbo_atomic_comp_swap) {
2342 params[arg_count++] = llvm_extract_elem(ctx, get_src(ctx->nir, instr->src[3]), 0);
2343 }
2344 params[arg_count++] = llvm_extract_elem(ctx, get_src(ctx->nir, instr->src[2]), 0);
2345 params[arg_count++] = get_src(ctx->nir, instr->src[0]);
2346 params[arg_count++] = LLVMConstInt(ctx->i32, 0, false); /* vindex */
2347 params[arg_count++] = get_src(ctx->nir, instr->src[1]); /* voffset */
2348 params[arg_count++] = ctx->i1false; /* slc */
2349
2350 switch (instr->intrinsic) {
2351 case nir_intrinsic_ssbo_atomic_add:
2352 name = "llvm.amdgcn.buffer.atomic.add";
2353 break;
2354 case nir_intrinsic_ssbo_atomic_imin:
2355 name = "llvm.amdgcn.buffer.atomic.smin";
2356 break;
2357 case nir_intrinsic_ssbo_atomic_umin:
2358 name = "llvm.amdgcn.buffer.atomic.umin";
2359 break;
2360 case nir_intrinsic_ssbo_atomic_imax:
2361 name = "llvm.amdgcn.buffer.atomic.smax";
2362 break;
2363 case nir_intrinsic_ssbo_atomic_umax:
2364 name = "llvm.amdgcn.buffer.atomic.umax";
2365 break;
2366 case nir_intrinsic_ssbo_atomic_and:
2367 name = "llvm.amdgcn.buffer.atomic.and";
2368 break;
2369 case nir_intrinsic_ssbo_atomic_or:
2370 name = "llvm.amdgcn.buffer.atomic.or";
2371 break;
2372 case nir_intrinsic_ssbo_atomic_xor:
2373 name = "llvm.amdgcn.buffer.atomic.xor";
2374 break;
2375 case nir_intrinsic_ssbo_atomic_exchange:
2376 name = "llvm.amdgcn.buffer.atomic.swap";
2377 break;
2378 case nir_intrinsic_ssbo_atomic_comp_swap:
2379 name = "llvm.amdgcn.buffer.atomic.cmpswap";
2380 break;
2381 default:
2382 abort();
2383 }
2384
2385 return ac_build_intrinsic(&ctx->ac, name, ctx->i32, params, arg_count, 0);
2386 }
2387
2388 static LLVMValueRef visit_load_buffer(struct nir_to_llvm_context *ctx,
2389 const nir_intrinsic_instr *instr)
2390 {
2391 LLVMValueRef results[2];
2392 int load_components;
2393 int num_components = instr->num_components;
2394 if (instr->dest.ssa.bit_size == 64)
2395 num_components *= 2;
2396
2397 for (int i = 0; i < num_components; i += load_components) {
2398 load_components = MIN2(num_components - i, 4);
2399 const char *load_name;
2400 LLVMTypeRef data_type = ctx->f32;
2401 LLVMValueRef offset = LLVMConstInt(ctx->i32, i * 4, false);
2402 offset = LLVMBuildAdd(ctx->builder, get_src(ctx->nir, instr->src[1]), offset, "");
2403
2404 if (load_components == 3)
2405 data_type = LLVMVectorType(ctx->f32, 4);
2406 else if (load_components > 1)
2407 data_type = LLVMVectorType(ctx->f32, load_components);
2408
2409 if (load_components >= 3)
2410 load_name = "llvm.amdgcn.buffer.load.v4f32";
2411 else if (load_components == 2)
2412 load_name = "llvm.amdgcn.buffer.load.v2f32";
2413 else if (load_components == 1)
2414 load_name = "llvm.amdgcn.buffer.load.f32";
2415 else
2416 unreachable("unhandled number of components");
2417
2418 LLVMValueRef params[] = {
2419 get_src(ctx->nir, instr->src[0]),
2420 LLVMConstInt(ctx->i32, 0, false),
2421 offset,
2422 ctx->i1false,
2423 ctx->i1false,
2424 };
2425
2426 results[i] = ac_build_intrinsic(&ctx->ac, load_name, data_type, params, 5, 0);
2427
2428 }
2429
2430 LLVMValueRef ret = results[0];
2431 if (num_components > 4 || num_components == 3) {
2432 LLVMValueRef masks[] = {
2433 LLVMConstInt(ctx->i32, 0, false), LLVMConstInt(ctx->i32, 1, false),
2434 LLVMConstInt(ctx->i32, 2, false), LLVMConstInt(ctx->i32, 3, false),
2435 LLVMConstInt(ctx->i32, 4, false), LLVMConstInt(ctx->i32, 5, false),
2436 LLVMConstInt(ctx->i32, 6, false), LLVMConstInt(ctx->i32, 7, false)
2437 };
2438
2439 LLVMValueRef swizzle = LLVMConstVector(masks, num_components);
2440 ret = LLVMBuildShuffleVector(ctx->builder, results[0],
2441 results[num_components > 4 ? 1 : 0], swizzle, "");
2442 }
2443
2444 return LLVMBuildBitCast(ctx->builder, ret,
2445 get_def_type(ctx->nir, &instr->dest.ssa), "");
2446 }
2447
2448 static LLVMValueRef visit_load_ubo_buffer(struct nir_to_llvm_context *ctx,
2449 const nir_intrinsic_instr *instr)
2450 {
2451 LLVMValueRef results[8], ret;
2452 LLVMValueRef rsrc = get_src(ctx->nir, instr->src[0]);
2453 LLVMValueRef offset = get_src(ctx->nir, instr->src[1]);
2454 int num_components = instr->num_components;
2455
2456 if (instr->dest.ssa.bit_size == 64)
2457 num_components *= 2;
2458
2459 for (unsigned i = 0; i < num_components; ++i) {
2460 LLVMValueRef params[] = {
2461 rsrc,
2462 LLVMBuildAdd(ctx->builder, LLVMConstInt(ctx->i32, 4 * i, 0),
2463 offset, "")
2464 };
2465 results[i] = ac_build_intrinsic(&ctx->ac, "llvm.SI.load.const.v4i32", ctx->f32,
2466 params, 2,
2467 AC_FUNC_ATTR_READNONE |
2468 AC_FUNC_ATTR_LEGACY);
2469 }
2470
2471
2472 ret = ac_build_gather_values(&ctx->ac, results, instr->num_components);
2473 return LLVMBuildBitCast(ctx->builder, ret,
2474 get_def_type(ctx->nir, &instr->dest.ssa), "");
2475 }
2476
2477 static void
2478 radv_get_deref_offset(struct nir_to_llvm_context *ctx, nir_deref_var *deref,
2479 bool vs_in, unsigned *vertex_index_out,
2480 LLVMValueRef *vertex_index_ref,
2481 unsigned *const_out, LLVMValueRef *indir_out)
2482 {
2483 unsigned const_offset = 0;
2484 nir_deref *tail = &deref->deref;
2485 LLVMValueRef offset = NULL;
2486
2487 if (vertex_index_out != NULL || vertex_index_ref != NULL) {
2488 tail = tail->child;
2489 nir_deref_array *deref_array = nir_deref_as_array(tail);
2490 if (vertex_index_out)
2491 *vertex_index_out = deref_array->base_offset;
2492
2493 if (vertex_index_ref) {
2494 LLVMValueRef vtx = LLVMConstInt(ctx->i32, deref_array->base_offset, false);
2495 if (deref_array->deref_array_type == nir_deref_array_type_indirect) {
2496 vtx = LLVMBuildAdd(ctx->builder, vtx, get_src(ctx->nir, deref_array->indirect), "");
2497 }
2498 *vertex_index_ref = vtx;
2499 }
2500 }
2501
2502 if (deref->var->data.compact) {
2503 assert(tail->child->deref_type == nir_deref_type_array);
2504 assert(glsl_type_is_scalar(glsl_without_array(deref->var->type)));
2505 nir_deref_array *deref_array = nir_deref_as_array(tail->child);
2506 /* We always lower indirect dereferences for "compact" array vars. */
2507 assert(deref_array->deref_array_type == nir_deref_array_type_direct);
2508
2509 const_offset = deref_array->base_offset;
2510 goto out;
2511 }
2512
2513 while (tail->child != NULL) {
2514 const struct glsl_type *parent_type = tail->type;
2515 tail = tail->child;
2516
2517 if (tail->deref_type == nir_deref_type_array) {
2518 nir_deref_array *deref_array = nir_deref_as_array(tail);
2519 LLVMValueRef index, stride, local_offset;
2520 unsigned size = glsl_count_attribute_slots(tail->type, vs_in);
2521
2522 const_offset += size * deref_array->base_offset;
2523 if (deref_array->deref_array_type == nir_deref_array_type_direct)
2524 continue;
2525
2526 assert(deref_array->deref_array_type == nir_deref_array_type_indirect);
2527 index = get_src(ctx->nir, deref_array->indirect);
2528 stride = LLVMConstInt(ctx->i32, size, 0);
2529 local_offset = LLVMBuildMul(ctx->builder, stride, index, "");
2530
2531 if (offset)
2532 offset = LLVMBuildAdd(ctx->builder, offset, local_offset, "");
2533 else
2534 offset = local_offset;
2535 } else if (tail->deref_type == nir_deref_type_struct) {
2536 nir_deref_struct *deref_struct = nir_deref_as_struct(tail);
2537
2538 for (unsigned i = 0; i < deref_struct->index; i++) {
2539 const struct glsl_type *ft = glsl_get_struct_field(parent_type, i);
2540 const_offset += glsl_count_attribute_slots(ft, vs_in);
2541 }
2542 } else
2543 unreachable("unsupported deref type");
2544
2545 }
2546 out:
2547 if (const_offset && offset)
2548 offset = LLVMBuildAdd(ctx->builder, offset,
2549 LLVMConstInt(ctx->i32, const_offset, 0),
2550 "");
2551
2552 *const_out = const_offset;
2553 *indir_out = offset;
2554 }
2555
2556 static LLVMValueRef
2557 lds_load(struct nir_to_llvm_context *ctx,
2558 LLVMValueRef dw_addr)
2559 {
2560 LLVMValueRef value;
2561 value = ac_build_indexed_load(&ctx->ac, ctx->lds, dw_addr, false);
2562 return value;
2563 }
2564
2565 static void
2566 lds_store(struct nir_to_llvm_context *ctx,
2567 LLVMValueRef dw_addr, LLVMValueRef value)
2568 {
2569 value = LLVMBuildBitCast(ctx->builder, value, ctx->i32, "");
2570 ac_build_indexed_store(&ctx->ac, ctx->lds,
2571 dw_addr, value);
2572 }
2573
2574 /* The offchip buffer layout for TCS->TES is
2575 *
2576 * - attribute 0 of patch 0 vertex 0
2577 * - attribute 0 of patch 0 vertex 1
2578 * - attribute 0 of patch 0 vertex 2
2579 * ...
2580 * - attribute 0 of patch 1 vertex 0
2581 * - attribute 0 of patch 1 vertex 1
2582 * ...
2583 * - attribute 1 of patch 0 vertex 0
2584 * - attribute 1 of patch 0 vertex 1
2585 * ...
2586 * - per patch attribute 0 of patch 0
2587 * - per patch attribute 0 of patch 1
2588 * ...
2589 *
2590 * Note that every attribute has 4 components.
2591 */
2592 static LLVMValueRef get_tcs_tes_buffer_address(struct nir_to_llvm_context *ctx,
2593 LLVMValueRef vertex_index,
2594 LLVMValueRef param_index)
2595 {
2596 LLVMValueRef base_addr, vertices_per_patch, num_patches, total_vertices;
2597 LLVMValueRef param_stride, constant16;
2598 LLVMValueRef rel_patch_id = get_rel_patch_id(ctx);
2599
2600 vertices_per_patch = unpack_param(ctx, ctx->tcs_offchip_layout, 9, 6);
2601 num_patches = unpack_param(ctx, ctx->tcs_offchip_layout, 0, 9);
2602 total_vertices = LLVMBuildMul(ctx->builder, vertices_per_patch,
2603 num_patches, "");
2604
2605 constant16 = LLVMConstInt(ctx->i32, 16, false);
2606 if (vertex_index) {
2607 base_addr = LLVMBuildMul(ctx->builder, rel_patch_id,
2608 vertices_per_patch, "");
2609
2610 base_addr = LLVMBuildAdd(ctx->builder, base_addr,
2611 vertex_index, "");
2612
2613 param_stride = total_vertices;
2614 } else {
2615 base_addr = rel_patch_id;
2616 param_stride = num_patches;
2617 }
2618
2619 base_addr = LLVMBuildAdd(ctx->builder, base_addr,
2620 LLVMBuildMul(ctx->builder, param_index,
2621 param_stride, ""), "");
2622
2623 base_addr = LLVMBuildMul(ctx->builder, base_addr, constant16, "");
2624
2625 if (!vertex_index) {
2626 LLVMValueRef patch_data_offset =
2627 unpack_param(ctx, ctx->tcs_offchip_layout, 16, 16);
2628
2629 base_addr = LLVMBuildAdd(ctx->builder, base_addr,
2630 patch_data_offset, "");
2631 }
2632 return base_addr;
2633 }
2634
2635 static LLVMValueRef get_tcs_tes_buffer_address_params(struct nir_to_llvm_context *ctx,
2636 unsigned param,
2637 unsigned const_index,
2638 bool is_compact,
2639 LLVMValueRef vertex_index,
2640 LLVMValueRef indir_index)
2641 {
2642 LLVMValueRef param_index;
2643
2644 if (indir_index)
2645 param_index = LLVMBuildAdd(ctx->builder, LLVMConstInt(ctx->i32, param, false),
2646 indir_index, "");
2647 else {
2648 if (const_index && !is_compact)
2649 param += const_index;
2650 param_index = LLVMConstInt(ctx->i32, param, false);
2651 }
2652 return get_tcs_tes_buffer_address(ctx, vertex_index, param_index);
2653 }
2654
2655 static void
2656 mark_tess_output(struct nir_to_llvm_context *ctx,
2657 bool is_patch, uint32_t param)
2658
2659 {
2660 if (is_patch) {
2661 ctx->tess_patch_outputs_written |= (1ull << param);
2662 } else
2663 ctx->tess_outputs_written |= (1ull << param);
2664 }
2665
2666 static LLVMValueRef
2667 get_dw_address(struct nir_to_llvm_context *ctx,
2668 LLVMValueRef dw_addr,
2669 unsigned param,
2670 unsigned const_index,
2671 bool compact_const_index,
2672 LLVMValueRef vertex_index,
2673 LLVMValueRef stride,
2674 LLVMValueRef indir_index)
2675
2676 {
2677
2678 if (vertex_index) {
2679 dw_addr = LLVMBuildAdd(ctx->builder, dw_addr,
2680 LLVMBuildMul(ctx->builder,
2681 vertex_index,
2682 stride, ""), "");
2683 }
2684
2685 if (indir_index)
2686 dw_addr = LLVMBuildAdd(ctx->builder, dw_addr,
2687 LLVMBuildMul(ctx->builder, indir_index,
2688 LLVMConstInt(ctx->i32, 4, false), ""), "");
2689 else if (const_index && !compact_const_index)
2690 dw_addr = LLVMBuildAdd(ctx->builder, dw_addr,
2691 LLVMConstInt(ctx->i32, const_index, false), "");
2692
2693 dw_addr = LLVMBuildAdd(ctx->builder, dw_addr,
2694 LLVMConstInt(ctx->i32, param * 4, false), "");
2695
2696 if (const_index && compact_const_index)
2697 dw_addr = LLVMBuildAdd(ctx->builder, dw_addr,
2698 LLVMConstInt(ctx->i32, const_index, false), "");
2699 return dw_addr;
2700 }
2701
2702 static LLVMValueRef
2703 load_tcs_input(struct nir_to_llvm_context *ctx,
2704 nir_intrinsic_instr *instr)
2705 {
2706 LLVMValueRef dw_addr, stride;
2707 unsigned const_index;
2708 LLVMValueRef vertex_index;
2709 LLVMValueRef indir_index;
2710 unsigned param;
2711 LLVMValueRef value[4], result;
2712 const bool per_vertex = nir_is_per_vertex_io(instr->variables[0]->var, ctx->stage);
2713 const bool is_compact = instr->variables[0]->var->data.compact;
2714 param = shader_io_get_unique_index(instr->variables[0]->var->data.location);
2715 radv_get_deref_offset(ctx, instr->variables[0],
2716 false, NULL, per_vertex ? &vertex_index : NULL,
2717 &const_index, &indir_index);
2718
2719 stride = unpack_param(ctx, ctx->tcs_in_layout, 13, 8);
2720 dw_addr = get_tcs_in_current_patch_offset(ctx);
2721 dw_addr = get_dw_address(ctx, dw_addr, param, const_index, is_compact, vertex_index, stride,
2722 indir_index);
2723
2724 for (unsigned i = 0; i < instr->num_components; i++) {
2725 value[i] = lds_load(ctx, dw_addr);
2726 dw_addr = LLVMBuildAdd(ctx->builder, dw_addr,
2727 ctx->i32one, "");
2728 }
2729 result = ac_build_gather_values(&ctx->ac, value, instr->num_components);
2730 result = LLVMBuildBitCast(ctx->builder, result, get_def_type(ctx->nir, &instr->dest.ssa), "");
2731 return result;
2732 }
2733
2734 static LLVMValueRef
2735 load_tcs_output(struct nir_to_llvm_context *ctx,
2736 nir_intrinsic_instr *instr)
2737 {
2738 LLVMValueRef dw_addr, stride;
2739 LLVMValueRef value[4], result;
2740 LLVMValueRef vertex_index = NULL;
2741 LLVMValueRef indir_index = NULL;
2742 unsigned const_index = 0;
2743 unsigned param;
2744 const bool per_vertex = nir_is_per_vertex_io(instr->variables[0]->var, ctx->stage);
2745 const bool is_compact = instr->variables[0]->var->data.compact;
2746 param = shader_io_get_unique_index(instr->variables[0]->var->data.location);
2747 radv_get_deref_offset(ctx, instr->variables[0],
2748 false, NULL, per_vertex ? &vertex_index : NULL,
2749 &const_index, &indir_index);
2750
2751 if (!instr->variables[0]->var->data.patch) {
2752 stride = unpack_param(ctx, ctx->tcs_out_layout, 13, 8);
2753 dw_addr = get_tcs_out_current_patch_offset(ctx);
2754 } else {
2755 dw_addr = get_tcs_out_current_patch_data_offset(ctx);
2756 }
2757
2758 dw_addr = get_dw_address(ctx, dw_addr, param, const_index, is_compact, vertex_index, stride,
2759 indir_index);
2760
2761 for (unsigned i = 0; i < instr->num_components; i++) {
2762 value[i] = lds_load(ctx, dw_addr);
2763 dw_addr = LLVMBuildAdd(ctx->builder, dw_addr,
2764 ctx->i32one, "");
2765 }
2766 result = ac_build_gather_values(&ctx->ac, value, instr->num_components);
2767 result = LLVMBuildBitCast(ctx->builder, result, get_def_type(ctx->nir, &instr->dest.ssa), "");
2768 return result;
2769 }
2770
2771 static void
2772 store_tcs_output(struct nir_to_llvm_context *ctx,
2773 nir_intrinsic_instr *instr,
2774 LLVMValueRef src,
2775 unsigned writemask)
2776 {
2777 LLVMValueRef stride, dw_addr;
2778 LLVMValueRef buf_addr = NULL;
2779 LLVMValueRef vertex_index = NULL;
2780 LLVMValueRef indir_index = NULL;
2781 unsigned const_index = 0;
2782 unsigned param;
2783 const bool per_vertex = nir_is_per_vertex_io(instr->variables[0]->var, ctx->stage);
2784 const bool is_compact = instr->variables[0]->var->data.compact;
2785
2786 radv_get_deref_offset(ctx, instr->variables[0],
2787 false, NULL, per_vertex ? &vertex_index : NULL,
2788 &const_index, &indir_index);
2789
2790 param = shader_io_get_unique_index(instr->variables[0]->var->data.location);
2791 if (instr->variables[0]->var->data.location == VARYING_SLOT_CLIP_DIST0 &&
2792 is_compact && const_index > 3) {
2793 const_index -= 3;
2794 param++;
2795 }
2796
2797 if (!instr->variables[0]->var->data.patch) {
2798 stride = unpack_param(ctx, ctx->tcs_out_layout, 13, 8);
2799 dw_addr = get_tcs_out_current_patch_offset(ctx);
2800 } else {
2801 dw_addr = get_tcs_out_current_patch_data_offset(ctx);
2802 }
2803
2804 mark_tess_output(ctx, instr->variables[0]->var->data.patch, param);
2805
2806 dw_addr = get_dw_address(ctx, dw_addr, param, const_index, is_compact, vertex_index, stride,
2807 indir_index);
2808 buf_addr = get_tcs_tes_buffer_address_params(ctx, param, const_index, is_compact,
2809 vertex_index, indir_index);
2810
2811 unsigned base = is_compact ? const_index : 0;
2812 for (unsigned chan = 0; chan < 8; chan++) {
2813 bool is_tess_factor = false;
2814 if (!(writemask & (1 << chan)))
2815 continue;
2816 LLVMValueRef value = llvm_extract_elem(ctx, src, chan);
2817
2818 lds_store(ctx, dw_addr, value);
2819
2820 if (instr->variables[0]->var->data.location == VARYING_SLOT_TESS_LEVEL_INNER ||
2821 instr->variables[0]->var->data.location == VARYING_SLOT_TESS_LEVEL_OUTER)
2822 is_tess_factor = true;
2823
2824 if (!is_tess_factor && writemask != 0xF)
2825 ac_build_buffer_store_dword(&ctx->ac, ctx->hs_ring_tess_offchip, value, 1,
2826 buf_addr, ctx->oc_lds,
2827 4 * (base + chan), 1, 0, true, false);
2828
2829 dw_addr = LLVMBuildAdd(ctx->builder, dw_addr,
2830 ctx->i32one, "");
2831 }
2832
2833 if (writemask == 0xF) {
2834 ac_build_buffer_store_dword(&ctx->ac, ctx->hs_ring_tess_offchip, src, 4,
2835 buf_addr, ctx->oc_lds,
2836 (base * 4), 1, 0, true, false);
2837 }
2838 }
2839
2840 static LLVMValueRef
2841 load_tes_input(struct nir_to_llvm_context *ctx,
2842 const nir_intrinsic_instr *instr)
2843 {
2844 LLVMValueRef buf_addr;
2845 LLVMValueRef result;
2846 LLVMValueRef vertex_index = NULL;
2847 LLVMValueRef indir_index = NULL;
2848 unsigned const_index = 0;
2849 unsigned param;
2850 const bool per_vertex = nir_is_per_vertex_io(instr->variables[0]->var, ctx->stage);
2851 const bool is_compact = instr->variables[0]->var->data.compact;
2852
2853 radv_get_deref_offset(ctx, instr->variables[0],
2854 false, NULL, per_vertex ? &vertex_index : NULL,
2855 &const_index, &indir_index);
2856 param = shader_io_get_unique_index(instr->variables[0]->var->data.location);
2857 if (instr->variables[0]->var->data.location == VARYING_SLOT_CLIP_DIST0 &&
2858 is_compact && const_index > 3) {
2859 const_index -= 3;
2860 param++;
2861 }
2862 buf_addr = get_tcs_tes_buffer_address_params(ctx, param, const_index,
2863 is_compact, vertex_index, indir_index);
2864
2865 result = ac_build_buffer_load(&ctx->ac, ctx->hs_ring_tess_offchip, instr->num_components, NULL,
2866 buf_addr, ctx->oc_lds, is_compact ? (4 * const_index) : 0, 1, 0, true, false);
2867 result = trim_vector(ctx, result, instr->num_components);
2868 result = LLVMBuildBitCast(ctx->builder, result, get_def_type(ctx->nir, &instr->dest.ssa), "");
2869 return result;
2870 }
2871
2872 static LLVMValueRef
2873 load_gs_input(struct nir_to_llvm_context *ctx,
2874 nir_intrinsic_instr *instr)
2875 {
2876 LLVMValueRef indir_index, vtx_offset;
2877 unsigned const_index;
2878 LLVMValueRef args[9];
2879 unsigned param, vtx_offset_param;
2880 LLVMValueRef value[4], result;
2881 unsigned vertex_index;
2882 radv_get_deref_offset(ctx, instr->variables[0],
2883 false, &vertex_index, NULL,
2884 &const_index, &indir_index);
2885 vtx_offset_param = vertex_index;
2886 assert(vtx_offset_param < 6);
2887 vtx_offset = LLVMBuildMul(ctx->builder, ctx->gs_vtx_offset[vtx_offset_param],
2888 LLVMConstInt(ctx->i32, 4, false), "");
2889
2890 param = shader_io_get_unique_index(instr->variables[0]->var->data.location);
2891 for (unsigned i = 0; i < instr->num_components; i++) {
2892
2893 args[0] = ctx->esgs_ring;
2894 args[1] = vtx_offset;
2895 args[2] = LLVMConstInt(ctx->i32, (param * 4 + i + const_index) * 256, false);
2896 args[3] = ctx->i32zero;
2897 args[4] = ctx->i32one; /* OFFEN */
2898 args[5] = ctx->i32zero; /* IDXEN */
2899 args[6] = ctx->i32one; /* GLC */
2900 args[7] = ctx->i32zero; /* SLC */
2901 args[8] = ctx->i32zero; /* TFE */
2902
2903 value[i] = ac_build_intrinsic(&ctx->ac, "llvm.SI.buffer.load.dword.i32.i32",
2904 ctx->i32, args, 9,
2905 AC_FUNC_ATTR_READONLY |
2906 AC_FUNC_ATTR_LEGACY);
2907 }
2908 result = ac_build_gather_values(&ctx->ac, value, instr->num_components);
2909
2910 return result;
2911 }
2912
2913 static LLVMValueRef
2914 build_gep_for_deref(struct ac_nir_context *ctx,
2915 nir_deref_var *deref)
2916 {
2917 struct hash_entry *entry = _mesa_hash_table_search(ctx->vars, deref->var);
2918 assert(entry->data);
2919 LLVMValueRef val = entry->data;
2920 nir_deref *tail = deref->deref.child;
2921 while (tail != NULL) {
2922 LLVMValueRef offset;
2923 switch (tail->deref_type) {
2924 case nir_deref_type_array: {
2925 nir_deref_array *array = nir_deref_as_array(tail);
2926 offset = LLVMConstInt(ctx->ac.i32, array->base_offset, 0);
2927 if (array->deref_array_type ==
2928 nir_deref_array_type_indirect) {
2929 offset = LLVMBuildAdd(ctx->ac.builder, offset,
2930 get_src(ctx,
2931 array->indirect),
2932 "");
2933 }
2934 break;
2935 }
2936 case nir_deref_type_struct: {
2937 nir_deref_struct *deref_struct =
2938 nir_deref_as_struct(tail);
2939 offset = LLVMConstInt(ctx->ac.i32,
2940 deref_struct->index, 0);
2941 break;
2942 }
2943 default:
2944 unreachable("bad deref type");
2945 }
2946 val = ac_build_gep0(&ctx->ac, val, offset);
2947 tail = tail->child;
2948 }
2949 return val;
2950 }
2951
2952 static LLVMValueRef visit_load_var(struct nir_to_llvm_context *ctx,
2953 nir_intrinsic_instr *instr)
2954 {
2955 LLVMValueRef values[8];
2956 int idx = instr->variables[0]->var->data.driver_location;
2957 int ve = instr->dest.ssa.num_components;
2958 LLVMValueRef indir_index;
2959 LLVMValueRef ret;
2960 unsigned const_index;
2961 bool vs_in = ctx->stage == MESA_SHADER_VERTEX &&
2962 instr->variables[0]->var->data.mode == nir_var_shader_in;
2963 radv_get_deref_offset(ctx, instr->variables[0], vs_in, NULL, NULL,
2964 &const_index, &indir_index);
2965
2966 if (instr->dest.ssa.bit_size == 64)
2967 ve *= 2;
2968
2969 switch (instr->variables[0]->var->data.mode) {
2970 case nir_var_shader_in:
2971 if (ctx->stage == MESA_SHADER_TESS_CTRL)
2972 return load_tcs_input(ctx, instr);
2973 if (ctx->stage == MESA_SHADER_TESS_EVAL)
2974 return load_tes_input(ctx, instr);
2975 if (ctx->stage == MESA_SHADER_GEOMETRY) {
2976 return load_gs_input(ctx, instr);
2977 }
2978 for (unsigned chan = 0; chan < ve; chan++) {
2979 if (indir_index) {
2980 unsigned count = glsl_count_attribute_slots(
2981 instr->variables[0]->var->type,
2982 ctx->stage == MESA_SHADER_VERTEX);
2983 count -= chan / 4;
2984 LLVMValueRef tmp_vec = ac_build_gather_values_extended(
2985 &ctx->ac, ctx->inputs + idx + chan, count,
2986 4, false);
2987
2988 values[chan] = LLVMBuildExtractElement(ctx->builder,
2989 tmp_vec,
2990 indir_index, "");
2991 } else
2992 values[chan] = ctx->inputs[idx + chan + const_index * 4];
2993 }
2994 break;
2995 case nir_var_local:
2996 for (unsigned chan = 0; chan < ve; chan++) {
2997 if (indir_index) {
2998 unsigned count = glsl_count_attribute_slots(
2999 instr->variables[0]->var->type, false);
3000 count -= chan / 4;
3001 LLVMValueRef tmp_vec = ac_build_gather_values_extended(
3002 &ctx->ac, ctx->nir->locals + idx + chan, count,
3003 4, true);
3004
3005 values[chan] = LLVMBuildExtractElement(ctx->builder,
3006 tmp_vec,
3007 indir_index, "");
3008 } else {
3009 values[chan] = LLVMBuildLoad(ctx->builder, ctx->nir->locals[idx + chan + const_index * 4], "");
3010 }
3011 }
3012 break;
3013 case nir_var_shared: {
3014 LLVMValueRef address = build_gep_for_deref(ctx->nir,
3015 instr->variables[0]);
3016 LLVMValueRef val = LLVMBuildLoad(ctx->builder, address, "");
3017 return LLVMBuildBitCast(ctx->builder, val,
3018 get_def_type(ctx->nir, &instr->dest.ssa),
3019 "");
3020 }
3021 case nir_var_shader_out:
3022 if (ctx->stage == MESA_SHADER_TESS_CTRL)
3023 return load_tcs_output(ctx, instr);
3024 for (unsigned chan = 0; chan < ve; chan++) {
3025 if (indir_index) {
3026 unsigned count = glsl_count_attribute_slots(
3027 instr->variables[0]->var->type, false);
3028 count -= chan / 4;
3029 LLVMValueRef tmp_vec = ac_build_gather_values_extended(
3030 &ctx->ac, ctx->nir->outputs + idx + chan, count,
3031 4, true);
3032
3033 values[chan] = LLVMBuildExtractElement(ctx->builder,
3034 tmp_vec,
3035 indir_index, "");
3036 } else {
3037 values[chan] = LLVMBuildLoad(ctx->builder,
3038 ctx->nir->outputs[idx + chan + const_index * 4],
3039 "");
3040 }
3041 }
3042 break;
3043 default:
3044 unreachable("unhandle variable mode");
3045 }
3046 ret = ac_build_gather_values(&ctx->ac, values, ve);
3047 return LLVMBuildBitCast(ctx->builder, ret, get_def_type(ctx->nir, &instr->dest.ssa), "");
3048 }
3049
3050 static void
3051 visit_store_var(struct nir_to_llvm_context *ctx,
3052 nir_intrinsic_instr *instr)
3053 {
3054 LLVMValueRef temp_ptr, value;
3055 int idx = instr->variables[0]->var->data.driver_location;
3056 LLVMValueRef src = to_float(&ctx->ac, get_src(ctx->nir, instr->src[0]));
3057 int writemask = instr->const_index[0];
3058 LLVMValueRef indir_index;
3059 unsigned const_index;
3060 radv_get_deref_offset(ctx, instr->variables[0], false,
3061 NULL, NULL, &const_index, &indir_index);
3062
3063 if (get_elem_bits(&ctx->ac, LLVMTypeOf(src)) == 64) {
3064 int old_writemask = writemask;
3065
3066 src = LLVMBuildBitCast(ctx->builder, src,
3067 LLVMVectorType(ctx->f32, get_llvm_num_components(src) * 2),
3068 "");
3069
3070 writemask = 0;
3071 for (unsigned chan = 0; chan < 4; chan++) {
3072 if (old_writemask & (1 << chan))
3073 writemask |= 3u << (2 * chan);
3074 }
3075 }
3076
3077 switch (instr->variables[0]->var->data.mode) {
3078 case nir_var_shader_out:
3079
3080 if (ctx->stage == MESA_SHADER_TESS_CTRL) {
3081 store_tcs_output(ctx, instr, src, writemask);
3082 return;
3083 }
3084
3085 for (unsigned chan = 0; chan < 8; chan++) {
3086 int stride = 4;
3087 if (!(writemask & (1 << chan)))
3088 continue;
3089
3090 value = llvm_extract_elem(ctx, src, chan);
3091
3092 if (instr->variables[0]->var->data.compact)
3093 stride = 1;
3094 if (indir_index) {
3095 unsigned count = glsl_count_attribute_slots(
3096 instr->variables[0]->var->type, false);
3097 count -= chan / 4;
3098 LLVMValueRef tmp_vec = ac_build_gather_values_extended(
3099 &ctx->ac, ctx->nir->outputs + idx + chan, count,
3100 stride, true);
3101
3102 if (get_llvm_num_components(tmp_vec) > 1) {
3103 tmp_vec = LLVMBuildInsertElement(ctx->builder, tmp_vec,
3104 value, indir_index, "");
3105 } else
3106 tmp_vec = value;
3107 build_store_values_extended(ctx, ctx->nir->outputs + idx + chan,
3108 count, stride, tmp_vec);
3109
3110 } else {
3111 temp_ptr = ctx->nir->outputs[idx + chan + const_index * stride];
3112
3113 LLVMBuildStore(ctx->builder, value, temp_ptr);
3114 }
3115 }
3116 break;
3117 case nir_var_local:
3118 for (unsigned chan = 0; chan < 8; chan++) {
3119 if (!(writemask & (1 << chan)))
3120 continue;
3121
3122 value = llvm_extract_elem(ctx, src, chan);
3123 if (indir_index) {
3124 unsigned count = glsl_count_attribute_slots(
3125 instr->variables[0]->var->type, false);
3126 count -= chan / 4;
3127 LLVMValueRef tmp_vec = ac_build_gather_values_extended(
3128 &ctx->ac, ctx->nir->locals + idx + chan, count,
3129 4, true);
3130
3131 tmp_vec = LLVMBuildInsertElement(ctx->builder, tmp_vec,
3132 value, indir_index, "");
3133 build_store_values_extended(ctx, ctx->nir->locals + idx + chan,
3134 count, 4, tmp_vec);
3135 } else {
3136 temp_ptr = ctx->nir->locals[idx + chan + const_index * 4];
3137
3138 LLVMBuildStore(ctx->builder, value, temp_ptr);
3139 }
3140 }
3141 break;
3142 case nir_var_shared: {
3143 int writemask = instr->const_index[0];
3144 LLVMValueRef address = build_gep_for_deref(ctx->nir,
3145 instr->variables[0]);
3146 LLVMValueRef val = get_src(ctx->nir, instr->src[0]);
3147 unsigned components =
3148 glsl_get_vector_elements(
3149 nir_deref_tail(&instr->variables[0]->deref)->type);
3150 if (writemask == (1 << components) - 1) {
3151 val = LLVMBuildBitCast(
3152 ctx->builder, val,
3153 LLVMGetElementType(LLVMTypeOf(address)), "");
3154 LLVMBuildStore(ctx->builder, val, address);
3155 } else {
3156 for (unsigned chan = 0; chan < 4; chan++) {
3157 if (!(writemask & (1 << chan)))
3158 continue;
3159 LLVMValueRef ptr =
3160 LLVMBuildStructGEP(ctx->builder,
3161 address, chan, "");
3162 LLVMValueRef src = llvm_extract_elem(ctx, val,
3163 chan);
3164 src = LLVMBuildBitCast(
3165 ctx->builder, src,
3166 LLVMGetElementType(LLVMTypeOf(ptr)), "");
3167 LLVMBuildStore(ctx->builder, src, ptr);
3168 }
3169 }
3170 break;
3171 }
3172 default:
3173 break;
3174 }
3175 }
3176
3177 static int image_type_to_components_count(enum glsl_sampler_dim dim, bool array)
3178 {
3179 switch (dim) {
3180 case GLSL_SAMPLER_DIM_BUF:
3181 return 1;
3182 case GLSL_SAMPLER_DIM_1D:
3183 return array ? 2 : 1;
3184 case GLSL_SAMPLER_DIM_2D:
3185 return array ? 3 : 2;
3186 case GLSL_SAMPLER_DIM_MS:
3187 return array ? 4 : 3;
3188 case GLSL_SAMPLER_DIM_3D:
3189 case GLSL_SAMPLER_DIM_CUBE:
3190 return 3;
3191 case GLSL_SAMPLER_DIM_RECT:
3192 case GLSL_SAMPLER_DIM_SUBPASS:
3193 return 2;
3194 case GLSL_SAMPLER_DIM_SUBPASS_MS:
3195 return 3;
3196 default:
3197 break;
3198 }
3199 return 0;
3200 }
3201
3202
3203
3204 /* Adjust the sample index according to FMASK.
3205 *
3206 * For uncompressed MSAA surfaces, FMASK should return 0x76543210,
3207 * which is the identity mapping. Each nibble says which physical sample
3208 * should be fetched to get that sample.
3209 *
3210 * For example, 0x11111100 means there are only 2 samples stored and
3211 * the second sample covers 3/4 of the pixel. When reading samples 0
3212 * and 1, return physical sample 0 (determined by the first two 0s
3213 * in FMASK), otherwise return physical sample 1.
3214 *
3215 * The sample index should be adjusted as follows:
3216 * sample_index = (fmask >> (sample_index * 4)) & 0xF;
3217 */
3218 static LLVMValueRef adjust_sample_index_using_fmask(struct nir_to_llvm_context *ctx,
3219 LLVMValueRef coord_x, LLVMValueRef coord_y,
3220 LLVMValueRef coord_z,
3221 LLVMValueRef sample_index,
3222 LLVMValueRef fmask_desc_ptr)
3223 {
3224 LLVMValueRef fmask_load_address[4];
3225 LLVMValueRef res;
3226
3227 fmask_load_address[0] = coord_x;
3228 fmask_load_address[1] = coord_y;
3229 if (coord_z) {
3230 fmask_load_address[2] = coord_z;
3231 fmask_load_address[3] = LLVMGetUndef(ctx->i32);
3232 }
3233
3234 struct ac_image_args args = {0};
3235
3236 args.opcode = ac_image_load;
3237 args.da = coord_z ? true : false;
3238 args.resource = fmask_desc_ptr;
3239 args.dmask = 0xf;
3240 args.addr = ac_build_gather_values(&ctx->ac, fmask_load_address, coord_z ? 4 : 2);
3241
3242 res = ac_build_image_opcode(&ctx->ac, &args);
3243
3244 res = to_integer(&ctx->ac, res);
3245 LLVMValueRef four = LLVMConstInt(ctx->i32, 4, false);
3246 LLVMValueRef F = LLVMConstInt(ctx->i32, 0xf, false);
3247
3248 LLVMValueRef fmask = LLVMBuildExtractElement(ctx->builder,
3249 res,
3250 ctx->i32zero, "");
3251
3252 LLVMValueRef sample_index4 =
3253 LLVMBuildMul(ctx->builder, sample_index, four, "");
3254 LLVMValueRef shifted_fmask =
3255 LLVMBuildLShr(ctx->builder, fmask, sample_index4, "");
3256 LLVMValueRef final_sample =
3257 LLVMBuildAnd(ctx->builder, shifted_fmask, F, "");
3258
3259 /* Don't rewrite the sample index if WORD1.DATA_FORMAT of the FMASK
3260 * resource descriptor is 0 (invalid),
3261 */
3262 LLVMValueRef fmask_desc =
3263 LLVMBuildBitCast(ctx->builder, fmask_desc_ptr,
3264 ctx->v8i32, "");
3265
3266 LLVMValueRef fmask_word1 =
3267 LLVMBuildExtractElement(ctx->builder, fmask_desc,
3268 ctx->i32one, "");
3269
3270 LLVMValueRef word1_is_nonzero =
3271 LLVMBuildICmp(ctx->builder, LLVMIntNE,
3272 fmask_word1, ctx->i32zero, "");
3273
3274 /* Replace the MSAA sample index. */
3275 sample_index =
3276 LLVMBuildSelect(ctx->builder, word1_is_nonzero,
3277 final_sample, sample_index, "");
3278 return sample_index;
3279 }
3280
3281 static LLVMValueRef get_image_coords(struct nir_to_llvm_context *ctx,
3282 const nir_intrinsic_instr *instr)
3283 {
3284 const struct glsl_type *type = instr->variables[0]->var->type;
3285 if(instr->variables[0]->deref.child)
3286 type = instr->variables[0]->deref.child->type;
3287
3288 LLVMValueRef src0 = get_src(ctx->nir, instr->src[0]);
3289 LLVMValueRef coords[4];
3290 LLVMValueRef masks[] = {
3291 LLVMConstInt(ctx->i32, 0, false), LLVMConstInt(ctx->i32, 1, false),
3292 LLVMConstInt(ctx->i32, 2, false), LLVMConstInt(ctx->i32, 3, false),
3293 };
3294 LLVMValueRef res;
3295 LLVMValueRef sample_index = llvm_extract_elem(ctx, get_src(ctx->nir, instr->src[1]), 0);
3296
3297 int count;
3298 enum glsl_sampler_dim dim = glsl_get_sampler_dim(type);
3299 bool add_frag_pos = (dim == GLSL_SAMPLER_DIM_SUBPASS ||
3300 dim == GLSL_SAMPLER_DIM_SUBPASS_MS);
3301 bool is_ms = (dim == GLSL_SAMPLER_DIM_MS ||
3302 dim == GLSL_SAMPLER_DIM_SUBPASS_MS);
3303
3304 count = image_type_to_components_count(dim,
3305 glsl_sampler_type_is_array(type));
3306
3307 if (is_ms) {
3308 LLVMValueRef fmask_load_address[3];
3309 int chan;
3310
3311 fmask_load_address[0] = LLVMBuildExtractElement(ctx->builder, src0, masks[0], "");
3312 fmask_load_address[1] = LLVMBuildExtractElement(ctx->builder, src0, masks[1], "");
3313 if (glsl_sampler_type_is_array(type))
3314 fmask_load_address[2] = LLVMBuildExtractElement(ctx->builder, src0, masks[2], "");
3315 else
3316 fmask_load_address[2] = NULL;
3317 if (add_frag_pos) {
3318 for (chan = 0; chan < 2; ++chan)
3319 fmask_load_address[chan] = LLVMBuildAdd(ctx->builder, fmask_load_address[chan], LLVMBuildFPToUI(ctx->builder, ctx->frag_pos[chan], ctx->i32, ""), "");
3320 }
3321 sample_index = adjust_sample_index_using_fmask(ctx,
3322 fmask_load_address[0],
3323 fmask_load_address[1],
3324 fmask_load_address[2],
3325 sample_index,
3326 get_sampler_desc(ctx, instr->variables[0], DESC_FMASK));
3327 }
3328 if (count == 1) {
3329 if (instr->src[0].ssa->num_components)
3330 res = LLVMBuildExtractElement(ctx->builder, src0, masks[0], "");
3331 else
3332 res = src0;
3333 } else {
3334 int chan;
3335 if (is_ms)
3336 count--;
3337 for (chan = 0; chan < count; ++chan) {
3338 coords[chan] = LLVMBuildExtractElement(ctx->builder, src0, masks[chan], "");
3339 }
3340
3341 if (add_frag_pos) {
3342 for (chan = 0; chan < count; ++chan)
3343 coords[chan] = LLVMBuildAdd(ctx->builder, coords[chan], LLVMBuildFPToUI(ctx->builder, ctx->frag_pos[chan], ctx->i32, ""), "");
3344 }
3345 if (is_ms) {
3346 coords[count] = sample_index;
3347 count++;
3348 }
3349
3350 if (count == 3) {
3351 coords[3] = LLVMGetUndef(ctx->i32);
3352 count = 4;
3353 }
3354 res = ac_build_gather_values(&ctx->ac, coords, count);
3355 }
3356 return res;
3357 }
3358
3359 static LLVMValueRef visit_image_load(struct nir_to_llvm_context *ctx,
3360 const nir_intrinsic_instr *instr)
3361 {
3362 LLVMValueRef params[7];
3363 LLVMValueRef res;
3364 char intrinsic_name[64];
3365 const nir_variable *var = instr->variables[0]->var;
3366 const struct glsl_type *type = var->type;
3367 if(instr->variables[0]->deref.child)
3368 type = instr->variables[0]->deref.child->type;
3369
3370 type = glsl_without_array(type);
3371 if (glsl_get_sampler_dim(type) == GLSL_SAMPLER_DIM_BUF) {
3372 params[0] = get_sampler_desc(ctx, instr->variables[0], DESC_BUFFER);
3373 params[1] = LLVMBuildExtractElement(ctx->builder, get_src(ctx->nir, instr->src[0]),
3374 LLVMConstInt(ctx->i32, 0, false), ""); /* vindex */
3375 params[2] = LLVMConstInt(ctx->i32, 0, false); /* voffset */
3376 params[3] = ctx->i1false; /* glc */
3377 params[4] = ctx->i1false; /* slc */
3378 res = ac_build_intrinsic(&ctx->ac, "llvm.amdgcn.buffer.load.format.v4f32", ctx->v4f32,
3379 params, 5, 0);
3380
3381 res = trim_vector(ctx, res, instr->dest.ssa.num_components);
3382 res = to_integer(&ctx->ac, res);
3383 } else {
3384 bool is_da = glsl_sampler_type_is_array(type) ||
3385 glsl_get_sampler_dim(type) == GLSL_SAMPLER_DIM_CUBE;
3386 LLVMValueRef da = is_da ? ctx->i1true : ctx->i1false;
3387 LLVMValueRef glc = ctx->i1false;
3388 LLVMValueRef slc = ctx->i1false;
3389
3390 params[0] = get_image_coords(ctx, instr);
3391 params[1] = get_sampler_desc(ctx, instr->variables[0], DESC_IMAGE);
3392 params[2] = LLVMConstInt(ctx->i32, 15, false); /* dmask */
3393 if (HAVE_LLVM <= 0x0309) {
3394 params[3] = ctx->i1false; /* r128 */
3395 params[4] = da;
3396 params[5] = glc;
3397 params[6] = slc;
3398 } else {
3399 LLVMValueRef lwe = ctx->i1false;
3400 params[3] = glc;
3401 params[4] = slc;
3402 params[5] = lwe;
3403 params[6] = da;
3404 }
3405
3406 ac_get_image_intr_name("llvm.amdgcn.image.load",
3407 ctx->v4f32, /* vdata */
3408 LLVMTypeOf(params[0]), /* coords */
3409 LLVMTypeOf(params[1]), /* rsrc */
3410 intrinsic_name, sizeof(intrinsic_name));
3411
3412 res = ac_build_intrinsic(&ctx->ac, intrinsic_name, ctx->v4f32,
3413 params, 7, AC_FUNC_ATTR_READONLY);
3414 }
3415 return to_integer(&ctx->ac, res);
3416 }
3417
3418 static void visit_image_store(struct nir_to_llvm_context *ctx,
3419 nir_intrinsic_instr *instr)
3420 {
3421 LLVMValueRef params[8];
3422 char intrinsic_name[64];
3423 const nir_variable *var = instr->variables[0]->var;
3424 const struct glsl_type *type = glsl_without_array(var->type);
3425 LLVMValueRef glc = ctx->i1false;
3426 bool force_glc = ctx->options->chip_class == SI;
3427 if (force_glc)
3428 glc = ctx->i1true;
3429 if (ctx->stage == MESA_SHADER_FRAGMENT)
3430 ctx->shader_info->fs.writes_memory = true;
3431
3432 if (glsl_get_sampler_dim(type) == GLSL_SAMPLER_DIM_BUF) {
3433 params[0] = to_float(&ctx->ac, get_src(ctx->nir, instr->src[2])); /* data */
3434 params[1] = get_sampler_desc(ctx, instr->variables[0], DESC_BUFFER);
3435 params[2] = LLVMBuildExtractElement(ctx->builder, get_src(ctx->nir, instr->src[0]),
3436 LLVMConstInt(ctx->i32, 0, false), ""); /* vindex */
3437 params[3] = LLVMConstInt(ctx->i32, 0, false); /* voffset */
3438 params[4] = glc; /* glc */
3439 params[5] = ctx->i1false; /* slc */
3440 ac_build_intrinsic(&ctx->ac, "llvm.amdgcn.buffer.store.format.v4f32", ctx->voidt,
3441 params, 6, 0);
3442 } else {
3443 bool is_da = glsl_sampler_type_is_array(type) ||
3444 glsl_get_sampler_dim(type) == GLSL_SAMPLER_DIM_CUBE;
3445 LLVMValueRef da = is_da ? ctx->i1true : ctx->i1false;
3446 LLVMValueRef slc = ctx->i1false;
3447
3448 params[0] = to_float(&ctx->ac, get_src(ctx->nir, instr->src[2]));
3449 params[1] = get_image_coords(ctx, instr); /* coords */
3450 params[2] = get_sampler_desc(ctx, instr->variables[0], DESC_IMAGE);
3451 params[3] = LLVMConstInt(ctx->i32, 15, false); /* dmask */
3452 if (HAVE_LLVM <= 0x0309) {
3453 params[4] = ctx->i1false; /* r128 */
3454 params[5] = da;
3455 params[6] = glc;
3456 params[7] = slc;
3457 } else {
3458 LLVMValueRef lwe = ctx->i1false;
3459 params[4] = glc;
3460 params[5] = slc;
3461 params[6] = lwe;
3462 params[7] = da;
3463 }
3464
3465 ac_get_image_intr_name("llvm.amdgcn.image.store",
3466 LLVMTypeOf(params[0]), /* vdata */
3467 LLVMTypeOf(params[1]), /* coords */
3468 LLVMTypeOf(params[2]), /* rsrc */
3469 intrinsic_name, sizeof(intrinsic_name));
3470
3471 ac_build_intrinsic(&ctx->ac, intrinsic_name, ctx->voidt,
3472 params, 8, 0);
3473 }
3474
3475 }
3476
3477 static LLVMValueRef visit_image_atomic(struct nir_to_llvm_context *ctx,
3478 const nir_intrinsic_instr *instr)
3479 {
3480 LLVMValueRef params[6];
3481 int param_count = 0;
3482 const nir_variable *var = instr->variables[0]->var;
3483
3484 const char *atomic_name;
3485 char intrinsic_name[41];
3486 const struct glsl_type *type = glsl_without_array(var->type);
3487 MAYBE_UNUSED int length;
3488
3489 if (ctx->stage == MESA_SHADER_FRAGMENT)
3490 ctx->shader_info->fs.writes_memory = true;
3491
3492 switch (instr->intrinsic) {
3493 case nir_intrinsic_image_atomic_add:
3494 atomic_name = "add";
3495 break;
3496 case nir_intrinsic_image_atomic_min:
3497 atomic_name = "smin";
3498 break;
3499 case nir_intrinsic_image_atomic_max:
3500 atomic_name = "smax";
3501 break;
3502 case nir_intrinsic_image_atomic_and:
3503 atomic_name = "and";
3504 break;
3505 case nir_intrinsic_image_atomic_or:
3506 atomic_name = "or";
3507 break;
3508 case nir_intrinsic_image_atomic_xor:
3509 atomic_name = "xor";
3510 break;
3511 case nir_intrinsic_image_atomic_exchange:
3512 atomic_name = "swap";
3513 break;
3514 case nir_intrinsic_image_atomic_comp_swap:
3515 atomic_name = "cmpswap";
3516 break;
3517 default:
3518 abort();
3519 }
3520
3521 if (instr->intrinsic == nir_intrinsic_image_atomic_comp_swap)
3522 params[param_count++] = get_src(ctx->nir, instr->src[3]);
3523 params[param_count++] = get_src(ctx->nir, instr->src[2]);
3524
3525 if (glsl_get_sampler_dim(type) == GLSL_SAMPLER_DIM_BUF) {
3526 params[param_count++] = get_sampler_desc(ctx, instr->variables[0], DESC_BUFFER);
3527 params[param_count++] = LLVMBuildExtractElement(ctx->builder, get_src(ctx->nir, instr->src[0]),
3528 LLVMConstInt(ctx->i32, 0, false), ""); /* vindex */
3529 params[param_count++] = ctx->i32zero; /* voffset */
3530 params[param_count++] = ctx->i1false; /* slc */
3531
3532 length = snprintf(intrinsic_name, sizeof(intrinsic_name),
3533 "llvm.amdgcn.buffer.atomic.%s", atomic_name);
3534 } else {
3535 char coords_type[8];
3536
3537 bool da = glsl_sampler_type_is_array(type) ||
3538 glsl_get_sampler_dim(type) == GLSL_SAMPLER_DIM_CUBE;
3539
3540 LLVMValueRef coords = params[param_count++] = get_image_coords(ctx, instr);
3541 params[param_count++] = get_sampler_desc(ctx, instr->variables[0], DESC_IMAGE);
3542 params[param_count++] = ctx->i1false; /* r128 */
3543 params[param_count++] = da ? ctx->i1true : ctx->i1false; /* da */
3544 params[param_count++] = ctx->i1false; /* slc */
3545
3546 build_int_type_name(LLVMTypeOf(coords),
3547 coords_type, sizeof(coords_type));
3548
3549 length = snprintf(intrinsic_name, sizeof(intrinsic_name),
3550 "llvm.amdgcn.image.atomic.%s.%s", atomic_name, coords_type);
3551 }
3552
3553 assert(length < sizeof(intrinsic_name));
3554 return ac_build_intrinsic(&ctx->ac, intrinsic_name, ctx->i32, params, param_count, 0);
3555 }
3556
3557 static LLVMValueRef visit_image_size(struct nir_to_llvm_context *ctx,
3558 const nir_intrinsic_instr *instr)
3559 {
3560 LLVMValueRef res;
3561 const nir_variable *var = instr->variables[0]->var;
3562 const struct glsl_type *type = instr->variables[0]->var->type;
3563 bool da = glsl_sampler_type_is_array(var->type) ||
3564 glsl_get_sampler_dim(var->type) == GLSL_SAMPLER_DIM_CUBE;
3565 if(instr->variables[0]->deref.child)
3566 type = instr->variables[0]->deref.child->type;
3567
3568 if (glsl_get_sampler_dim(type) == GLSL_SAMPLER_DIM_BUF)
3569 return get_buffer_size(ctx, get_sampler_desc(ctx, instr->variables[0], DESC_BUFFER), true);
3570
3571 struct ac_image_args args = { 0 };
3572
3573 args.da = da;
3574 args.dmask = 0xf;
3575 args.resource = get_sampler_desc(ctx, instr->variables[0], DESC_IMAGE);
3576 args.opcode = ac_image_get_resinfo;
3577 args.addr = ctx->i32zero;
3578
3579 res = ac_build_image_opcode(&ctx->ac, &args);
3580
3581 if (glsl_get_sampler_dim(type) == GLSL_SAMPLER_DIM_CUBE &&
3582 glsl_sampler_type_is_array(type)) {
3583 LLVMValueRef two = LLVMConstInt(ctx->i32, 2, false);
3584 LLVMValueRef six = LLVMConstInt(ctx->i32, 6, false);
3585 LLVMValueRef z = LLVMBuildExtractElement(ctx->builder, res, two, "");
3586 z = LLVMBuildSDiv(ctx->builder, z, six, "");
3587 res = LLVMBuildInsertElement(ctx->builder, res, z, two, "");
3588 }
3589 return res;
3590 }
3591
3592 #define NOOP_WAITCNT 0xf7f
3593 #define LGKM_CNT 0x07f
3594 #define VM_CNT 0xf70
3595
3596 static void emit_waitcnt(struct nir_to_llvm_context *ctx,
3597 unsigned simm16)
3598 {
3599 LLVMValueRef args[1] = {
3600 LLVMConstInt(ctx->i32, simm16, false),
3601 };
3602 ac_build_intrinsic(&ctx->ac, "llvm.amdgcn.s.waitcnt",
3603 ctx->voidt, args, 1, 0);
3604 }
3605
3606 static void emit_barrier(struct nir_to_llvm_context *ctx)
3607 {
3608 /* SI only (thanks to a hw bug workaround):
3609 * The real barrier instruction isn’t needed, because an entire patch
3610 * always fits into a single wave.
3611 */
3612 if (ctx->options->chip_class == SI &&
3613 ctx->stage == MESA_SHADER_TESS_CTRL) {
3614 emit_waitcnt(ctx, LGKM_CNT & VM_CNT);
3615 return;
3616 }
3617 ac_build_intrinsic(&ctx->ac, "llvm.amdgcn.s.barrier",
3618 ctx->voidt, NULL, 0, AC_FUNC_ATTR_CONVERGENT);
3619 }
3620
3621 static void emit_discard_if(struct nir_to_llvm_context *ctx,
3622 const nir_intrinsic_instr *instr)
3623 {
3624 LLVMValueRef cond;
3625 ctx->shader_info->fs.can_discard = true;
3626
3627 cond = LLVMBuildICmp(ctx->builder, LLVMIntNE,
3628 get_src(ctx->nir, instr->src[0]),
3629 ctx->i32zero, "");
3630
3631 cond = LLVMBuildSelect(ctx->builder, cond,
3632 LLVMConstReal(ctx->f32, -1.0f),
3633 ctx->f32zero, "");
3634 ac_build_kill(&ctx->ac, cond);
3635 }
3636
3637 static LLVMValueRef
3638 visit_load_local_invocation_index(struct nir_to_llvm_context *ctx)
3639 {
3640 LLVMValueRef result;
3641 LLVMValueRef thread_id = ac_get_thread_id(&ctx->ac);
3642 result = LLVMBuildAnd(ctx->builder, ctx->tg_size,
3643 LLVMConstInt(ctx->i32, 0xfc0, false), "");
3644
3645 return LLVMBuildAdd(ctx->builder, result, thread_id, "");
3646 }
3647
3648 static LLVMValueRef visit_var_atomic(struct nir_to_llvm_context *ctx,
3649 const nir_intrinsic_instr *instr)
3650 {
3651 LLVMValueRef ptr, result;
3652 LLVMValueRef src = get_src(ctx->nir, instr->src[0]);
3653 ptr = build_gep_for_deref(ctx->nir, instr->variables[0]);
3654
3655 if (instr->intrinsic == nir_intrinsic_var_atomic_comp_swap) {
3656 LLVMValueRef src1 = get_src(ctx->nir, instr->src[1]);
3657 result = LLVMBuildAtomicCmpXchg(ctx->builder,
3658 ptr, src, src1,
3659 LLVMAtomicOrderingSequentiallyConsistent,
3660 LLVMAtomicOrderingSequentiallyConsistent,
3661 false);
3662 } else {
3663 LLVMAtomicRMWBinOp op;
3664 switch (instr->intrinsic) {
3665 case nir_intrinsic_var_atomic_add:
3666 op = LLVMAtomicRMWBinOpAdd;
3667 break;
3668 case nir_intrinsic_var_atomic_umin:
3669 op = LLVMAtomicRMWBinOpUMin;
3670 break;
3671 case nir_intrinsic_var_atomic_umax:
3672 op = LLVMAtomicRMWBinOpUMax;
3673 break;
3674 case nir_intrinsic_var_atomic_imin:
3675 op = LLVMAtomicRMWBinOpMin;
3676 break;
3677 case nir_intrinsic_var_atomic_imax:
3678 op = LLVMAtomicRMWBinOpMax;
3679 break;
3680 case nir_intrinsic_var_atomic_and:
3681 op = LLVMAtomicRMWBinOpAnd;
3682 break;
3683 case nir_intrinsic_var_atomic_or:
3684 op = LLVMAtomicRMWBinOpOr;
3685 break;
3686 case nir_intrinsic_var_atomic_xor:
3687 op = LLVMAtomicRMWBinOpXor;
3688 break;
3689 case nir_intrinsic_var_atomic_exchange:
3690 op = LLVMAtomicRMWBinOpXchg;
3691 break;
3692 default:
3693 return NULL;
3694 }
3695
3696 result = LLVMBuildAtomicRMW(ctx->builder, op, ptr, to_integer(&ctx->ac, src),
3697 LLVMAtomicOrderingSequentiallyConsistent,
3698 false);
3699 }
3700 return result;
3701 }
3702
3703 #define INTERP_CENTER 0
3704 #define INTERP_CENTROID 1
3705 #define INTERP_SAMPLE 2
3706
3707 static LLVMValueRef lookup_interp_param(struct nir_to_llvm_context *ctx,
3708 enum glsl_interp_mode interp, unsigned location)
3709 {
3710 switch (interp) {
3711 case INTERP_MODE_FLAT:
3712 default:
3713 return NULL;
3714 case INTERP_MODE_SMOOTH:
3715 case INTERP_MODE_NONE:
3716 if (location == INTERP_CENTER)
3717 return ctx->persp_center;
3718 else if (location == INTERP_CENTROID)
3719 return ctx->persp_centroid;
3720 else if (location == INTERP_SAMPLE)
3721 return ctx->persp_sample;
3722 break;
3723 case INTERP_MODE_NOPERSPECTIVE:
3724 if (location == INTERP_CENTER)
3725 return ctx->linear_center;
3726 else if (location == INTERP_CENTROID)
3727 return ctx->linear_centroid;
3728 else if (location == INTERP_SAMPLE)
3729 return ctx->linear_sample;
3730 break;
3731 }
3732 return NULL;
3733 }
3734
3735 static LLVMValueRef load_sample_position(struct nir_to_llvm_context *ctx,
3736 LLVMValueRef sample_id)
3737 {
3738 LLVMValueRef result;
3739 LLVMValueRef ptr = ac_build_gep0(&ctx->ac, ctx->ring_offsets, LLVMConstInt(ctx->i32, RING_PS_SAMPLE_POSITIONS, false));
3740
3741 ptr = LLVMBuildBitCast(ctx->builder, ptr,
3742 const_array(ctx->v2f32, 64), "");
3743
3744 sample_id = LLVMBuildAdd(ctx->builder, sample_id, ctx->sample_pos_offset, "");
3745 result = ac_build_indexed_load(&ctx->ac, ptr, sample_id, false);
3746
3747 return result;
3748 }
3749
3750 static LLVMValueRef load_sample_pos(struct nir_to_llvm_context *ctx)
3751 {
3752 LLVMValueRef values[2];
3753
3754 values[0] = emit_ffract(&ctx->ac, ctx->frag_pos[0]);
3755 values[1] = emit_ffract(&ctx->ac, ctx->frag_pos[1]);
3756 return ac_build_gather_values(&ctx->ac, values, 2);
3757 }
3758
3759 static LLVMValueRef visit_interp(struct nir_to_llvm_context *ctx,
3760 const nir_intrinsic_instr *instr)
3761 {
3762 LLVMValueRef result[2];
3763 LLVMValueRef interp_param, attr_number;
3764 unsigned location;
3765 unsigned chan;
3766 LLVMValueRef src_c0, src_c1;
3767 LLVMValueRef src0;
3768 int input_index = instr->variables[0]->var->data.location - VARYING_SLOT_VAR0;
3769 switch (instr->intrinsic) {
3770 case nir_intrinsic_interp_var_at_centroid:
3771 location = INTERP_CENTROID;
3772 break;
3773 case nir_intrinsic_interp_var_at_sample:
3774 case nir_intrinsic_interp_var_at_offset:
3775 location = INTERP_CENTER;
3776 src0 = get_src(ctx->nir, instr->src[0]);
3777 break;
3778 default:
3779 break;
3780 }
3781
3782 if (instr->intrinsic == nir_intrinsic_interp_var_at_offset) {
3783 src_c0 = to_float(&ctx->ac, LLVMBuildExtractElement(ctx->builder, src0, ctx->i32zero, ""));
3784 src_c1 = to_float(&ctx->ac, LLVMBuildExtractElement(ctx->builder, src0, ctx->i32one, ""));
3785 } else if (instr->intrinsic == nir_intrinsic_interp_var_at_sample) {
3786 LLVMValueRef sample_position;
3787 LLVMValueRef halfval = LLVMConstReal(ctx->f32, 0.5f);
3788
3789 /* fetch sample ID */
3790 sample_position = load_sample_position(ctx, src0);
3791
3792 src_c0 = LLVMBuildExtractElement(ctx->builder, sample_position, ctx->i32zero, "");
3793 src_c0 = LLVMBuildFSub(ctx->builder, src_c0, halfval, "");
3794 src_c1 = LLVMBuildExtractElement(ctx->builder, sample_position, ctx->i32one, "");
3795 src_c1 = LLVMBuildFSub(ctx->builder, src_c1, halfval, "");
3796 }
3797 interp_param = lookup_interp_param(ctx, instr->variables[0]->var->data.interpolation, location);
3798 attr_number = LLVMConstInt(ctx->i32, input_index, false);
3799
3800 if (location == INTERP_SAMPLE || location == INTERP_CENTER) {
3801 LLVMValueRef ij_out[2];
3802 LLVMValueRef ddxy_out = emit_ddxy_interp(ctx, interp_param);
3803
3804 /*
3805 * take the I then J parameters, and the DDX/Y for it, and
3806 * calculate the IJ inputs for the interpolator.
3807 * temp1 = ddx * offset/sample.x + I;
3808 * interp_param.I = ddy * offset/sample.y + temp1;
3809 * temp1 = ddx * offset/sample.x + J;
3810 * interp_param.J = ddy * offset/sample.y + temp1;
3811 */
3812 for (unsigned i = 0; i < 2; i++) {
3813 LLVMValueRef ix_ll = LLVMConstInt(ctx->i32, i, false);
3814 LLVMValueRef iy_ll = LLVMConstInt(ctx->i32, i + 2, false);
3815 LLVMValueRef ddx_el = LLVMBuildExtractElement(ctx->builder,
3816 ddxy_out, ix_ll, "");
3817 LLVMValueRef ddy_el = LLVMBuildExtractElement(ctx->builder,
3818 ddxy_out, iy_ll, "");
3819 LLVMValueRef interp_el = LLVMBuildExtractElement(ctx->builder,
3820 interp_param, ix_ll, "");
3821 LLVMValueRef temp1, temp2;
3822
3823 interp_el = LLVMBuildBitCast(ctx->builder, interp_el,
3824 ctx->f32, "");
3825
3826 temp1 = LLVMBuildFMul(ctx->builder, ddx_el, src_c0, "");
3827 temp1 = LLVMBuildFAdd(ctx->builder, temp1, interp_el, "");
3828
3829 temp2 = LLVMBuildFMul(ctx->builder, ddy_el, src_c1, "");
3830 temp2 = LLVMBuildFAdd(ctx->builder, temp2, temp1, "");
3831
3832 ij_out[i] = LLVMBuildBitCast(ctx->builder,
3833 temp2, ctx->i32, "");
3834 }
3835 interp_param = ac_build_gather_values(&ctx->ac, ij_out, 2);
3836
3837 }
3838
3839 for (chan = 0; chan < 2; chan++) {
3840 LLVMValueRef llvm_chan = LLVMConstInt(ctx->i32, chan, false);
3841
3842 if (interp_param) {
3843 interp_param = LLVMBuildBitCast(ctx->builder,
3844 interp_param, LLVMVectorType(ctx->f32, 2), "");
3845 LLVMValueRef i = LLVMBuildExtractElement(
3846 ctx->builder, interp_param, ctx->i32zero, "");
3847 LLVMValueRef j = LLVMBuildExtractElement(
3848 ctx->builder, interp_param, ctx->i32one, "");
3849
3850 result[chan] = ac_build_fs_interp(&ctx->ac,
3851 llvm_chan, attr_number,
3852 ctx->prim_mask, i, j);
3853 } else {
3854 result[chan] = ac_build_fs_interp_mov(&ctx->ac,
3855 LLVMConstInt(ctx->i32, 2, false),
3856 llvm_chan, attr_number,
3857 ctx->prim_mask);
3858 }
3859 }
3860 return ac_build_gather_values(&ctx->ac, result, 2);
3861 }
3862
3863 static void
3864 visit_emit_vertex(struct nir_to_llvm_context *ctx,
3865 const nir_intrinsic_instr *instr)
3866 {
3867 LLVMValueRef gs_next_vertex;
3868 LLVMValueRef can_emit, kill;
3869 int idx;
3870
3871 assert(instr->const_index[0] == 0);
3872 /* Write vertex attribute values to GSVS ring */
3873 gs_next_vertex = LLVMBuildLoad(ctx->builder,
3874 ctx->gs_next_vertex,
3875 "");
3876
3877 /* If this thread has already emitted the declared maximum number of
3878 * vertices, kill it: excessive vertex emissions are not supposed to
3879 * have any effect, and GS threads have no externally observable
3880 * effects other than emitting vertices.
3881 */
3882 can_emit = LLVMBuildICmp(ctx->builder, LLVMIntULT, gs_next_vertex,
3883 LLVMConstInt(ctx->i32, ctx->gs_max_out_vertices, false), "");
3884
3885 kill = LLVMBuildSelect(ctx->builder, can_emit,
3886 LLVMConstReal(ctx->f32, 1.0f),
3887 LLVMConstReal(ctx->f32, -1.0f), "");
3888 ac_build_kill(&ctx->ac, kill);
3889
3890 /* loop num outputs */
3891 idx = 0;
3892 for (unsigned i = 0; i < RADEON_LLVM_MAX_OUTPUTS; ++i) {
3893 LLVMValueRef *out_ptr = &ctx->nir->outputs[i * 4];
3894 int length = 4;
3895 int slot = idx;
3896 int slot_inc = 1;
3897
3898 if (!(ctx->output_mask & (1ull << i)))
3899 continue;
3900
3901 if (i == VARYING_SLOT_CLIP_DIST0) {
3902 /* pack clip and cull into a single set of slots */
3903 length = ctx->num_output_clips + ctx->num_output_culls;
3904 if (length > 4)
3905 slot_inc = 2;
3906 }
3907 for (unsigned j = 0; j < length; j++) {
3908 LLVMValueRef out_val = LLVMBuildLoad(ctx->builder,
3909 out_ptr[j], "");
3910 LLVMValueRef voffset = LLVMConstInt(ctx->i32, (slot * 4 + j) * ctx->gs_max_out_vertices, false);
3911 voffset = LLVMBuildAdd(ctx->builder, voffset, gs_next_vertex, "");
3912 voffset = LLVMBuildMul(ctx->builder, voffset, LLVMConstInt(ctx->i32, 4, false), "");
3913
3914 out_val = LLVMBuildBitCast(ctx->builder, out_val, ctx->i32, "");
3915
3916 ac_build_buffer_store_dword(&ctx->ac, ctx->gsvs_ring,
3917 out_val, 1,
3918 voffset, ctx->gs2vs_offset, 0,
3919 1, 1, true, true);
3920 }
3921 idx += slot_inc;
3922 }
3923
3924 gs_next_vertex = LLVMBuildAdd(ctx->builder, gs_next_vertex,
3925 ctx->i32one, "");
3926 LLVMBuildStore(ctx->builder, gs_next_vertex, ctx->gs_next_vertex);
3927
3928 ac_build_sendmsg(&ctx->ac, AC_SENDMSG_GS_OP_EMIT | AC_SENDMSG_GS | (0 << 8), ctx->gs_wave_id);
3929 }
3930
3931 static void
3932 visit_end_primitive(struct nir_to_llvm_context *ctx,
3933 const nir_intrinsic_instr *instr)
3934 {
3935 ac_build_sendmsg(&ctx->ac, AC_SENDMSG_GS_OP_CUT | AC_SENDMSG_GS | (0 << 8), ctx->gs_wave_id);
3936 }
3937
3938 static LLVMValueRef
3939 visit_load_tess_coord(struct nir_to_llvm_context *ctx,
3940 const nir_intrinsic_instr *instr)
3941 {
3942 LLVMValueRef coord[4] = {
3943 ctx->tes_u,
3944 ctx->tes_v,
3945 ctx->f32zero,
3946 ctx->f32zero,
3947 };
3948
3949 if (ctx->tes_primitive_mode == GL_TRIANGLES)
3950 coord[2] = LLVMBuildFSub(ctx->builder, ctx->f32one,
3951 LLVMBuildFAdd(ctx->builder, coord[0], coord[1], ""), "");
3952
3953 LLVMValueRef result = ac_build_gather_values(&ctx->ac, coord, instr->num_components);
3954 return LLVMBuildBitCast(ctx->builder, result,
3955 get_def_type(ctx->nir, &instr->dest.ssa), "");
3956 }
3957
3958 static void visit_intrinsic(struct nir_to_llvm_context *ctx,
3959 nir_intrinsic_instr *instr)
3960 {
3961 LLVMValueRef result = NULL;
3962
3963 switch (instr->intrinsic) {
3964 case nir_intrinsic_load_work_group_id: {
3965 result = ctx->workgroup_ids;
3966 break;
3967 }
3968 case nir_intrinsic_load_base_vertex: {
3969 result = ctx->abi.base_vertex;
3970 break;
3971 }
3972 case nir_intrinsic_load_vertex_id_zero_base: {
3973 result = ctx->abi.vertex_id;
3974 break;
3975 }
3976 case nir_intrinsic_load_local_invocation_id: {
3977 result = ctx->local_invocation_ids;
3978 break;
3979 }
3980 case nir_intrinsic_load_base_instance:
3981 result = ctx->abi.start_instance;
3982 break;
3983 case nir_intrinsic_load_draw_id:
3984 result = ctx->abi.draw_id;
3985 break;
3986 case nir_intrinsic_load_invocation_id:
3987 if (ctx->stage == MESA_SHADER_TESS_CTRL)
3988 result = unpack_param(ctx, ctx->tcs_rel_ids, 8, 5);
3989 else
3990 result = ctx->gs_invocation_id;
3991 break;
3992 case nir_intrinsic_load_primitive_id:
3993 if (ctx->stage == MESA_SHADER_GEOMETRY) {
3994 ctx->shader_info->gs.uses_prim_id = true;
3995 result = ctx->gs_prim_id;
3996 } else if (ctx->stage == MESA_SHADER_TESS_CTRL) {
3997 ctx->shader_info->tcs.uses_prim_id = true;
3998 result = ctx->tcs_patch_id;
3999 } else if (ctx->stage == MESA_SHADER_TESS_EVAL) {
4000 ctx->shader_info->tcs.uses_prim_id = true;
4001 result = ctx->tes_patch_id;
4002 } else
4003 fprintf(stderr, "Unknown primitive id intrinsic: %d", ctx->stage);
4004 break;
4005 case nir_intrinsic_load_sample_id:
4006 ctx->shader_info->fs.force_persample = true;
4007 result = unpack_param(ctx, ctx->ancillary, 8, 4);
4008 break;
4009 case nir_intrinsic_load_sample_pos:
4010 ctx->shader_info->fs.force_persample = true;
4011 result = load_sample_pos(ctx);
4012 break;
4013 case nir_intrinsic_load_sample_mask_in:
4014 result = ctx->sample_coverage;
4015 break;
4016 case nir_intrinsic_load_front_face:
4017 result = ctx->front_face;
4018 break;
4019 case nir_intrinsic_load_instance_id:
4020 result = ctx->abi.instance_id;
4021 ctx->shader_info->vs.vgpr_comp_cnt = MAX2(3,
4022 ctx->shader_info->vs.vgpr_comp_cnt);
4023 break;
4024 case nir_intrinsic_load_num_work_groups:
4025 result = ctx->num_work_groups;
4026 break;
4027 case nir_intrinsic_load_local_invocation_index:
4028 result = visit_load_local_invocation_index(ctx);
4029 break;
4030 case nir_intrinsic_load_push_constant:
4031 result = visit_load_push_constant(ctx, instr);
4032 break;
4033 case nir_intrinsic_vulkan_resource_index:
4034 result = visit_vulkan_resource_index(ctx, instr);
4035 break;
4036 case nir_intrinsic_store_ssbo:
4037 visit_store_ssbo(ctx, instr);
4038 break;
4039 case nir_intrinsic_load_ssbo:
4040 result = visit_load_buffer(ctx, instr);
4041 break;
4042 case nir_intrinsic_ssbo_atomic_add:
4043 case nir_intrinsic_ssbo_atomic_imin:
4044 case nir_intrinsic_ssbo_atomic_umin:
4045 case nir_intrinsic_ssbo_atomic_imax:
4046 case nir_intrinsic_ssbo_atomic_umax:
4047 case nir_intrinsic_ssbo_atomic_and:
4048 case nir_intrinsic_ssbo_atomic_or:
4049 case nir_intrinsic_ssbo_atomic_xor:
4050 case nir_intrinsic_ssbo_atomic_exchange:
4051 case nir_intrinsic_ssbo_atomic_comp_swap:
4052 result = visit_atomic_ssbo(ctx, instr);
4053 break;
4054 case nir_intrinsic_load_ubo:
4055 result = visit_load_ubo_buffer(ctx, instr);
4056 break;
4057 case nir_intrinsic_get_buffer_size:
4058 result = visit_get_buffer_size(ctx, instr);
4059 break;
4060 case nir_intrinsic_load_var:
4061 result = visit_load_var(ctx, instr);
4062 break;
4063 case nir_intrinsic_store_var:
4064 visit_store_var(ctx, instr);
4065 break;
4066 case nir_intrinsic_image_load:
4067 result = visit_image_load(ctx, instr);
4068 break;
4069 case nir_intrinsic_image_store:
4070 visit_image_store(ctx, instr);
4071 break;
4072 case nir_intrinsic_image_atomic_add:
4073 case nir_intrinsic_image_atomic_min:
4074 case nir_intrinsic_image_atomic_max:
4075 case nir_intrinsic_image_atomic_and:
4076 case nir_intrinsic_image_atomic_or:
4077 case nir_intrinsic_image_atomic_xor:
4078 case nir_intrinsic_image_atomic_exchange:
4079 case nir_intrinsic_image_atomic_comp_swap:
4080 result = visit_image_atomic(ctx, instr);
4081 break;
4082 case nir_intrinsic_image_size:
4083 result = visit_image_size(ctx, instr);
4084 break;
4085 case nir_intrinsic_discard:
4086 ctx->shader_info->fs.can_discard = true;
4087 ac_build_intrinsic(&ctx->ac, "llvm.AMDGPU.kilp",
4088 ctx->voidt,
4089 NULL, 0, AC_FUNC_ATTR_LEGACY);
4090 break;
4091 case nir_intrinsic_discard_if:
4092 emit_discard_if(ctx, instr);
4093 break;
4094 case nir_intrinsic_memory_barrier:
4095 emit_waitcnt(ctx, VM_CNT);
4096 break;
4097 case nir_intrinsic_barrier:
4098 emit_barrier(ctx);
4099 break;
4100 case nir_intrinsic_var_atomic_add:
4101 case nir_intrinsic_var_atomic_imin:
4102 case nir_intrinsic_var_atomic_umin:
4103 case nir_intrinsic_var_atomic_imax:
4104 case nir_intrinsic_var_atomic_umax:
4105 case nir_intrinsic_var_atomic_and:
4106 case nir_intrinsic_var_atomic_or:
4107 case nir_intrinsic_var_atomic_xor:
4108 case nir_intrinsic_var_atomic_exchange:
4109 case nir_intrinsic_var_atomic_comp_swap:
4110 result = visit_var_atomic(ctx, instr);
4111 break;
4112 case nir_intrinsic_interp_var_at_centroid:
4113 case nir_intrinsic_interp_var_at_sample:
4114 case nir_intrinsic_interp_var_at_offset:
4115 result = visit_interp(ctx, instr);
4116 break;
4117 case nir_intrinsic_emit_vertex:
4118 visit_emit_vertex(ctx, instr);
4119 break;
4120 case nir_intrinsic_end_primitive:
4121 visit_end_primitive(ctx, instr);
4122 break;
4123 case nir_intrinsic_load_tess_coord:
4124 result = visit_load_tess_coord(ctx, instr);
4125 break;
4126 case nir_intrinsic_load_patch_vertices_in:
4127 result = LLVMConstInt(ctx->i32, ctx->options->key.tcs.input_vertices, false);
4128 break;
4129 default:
4130 fprintf(stderr, "Unknown intrinsic: ");
4131 nir_print_instr(&instr->instr, stderr);
4132 fprintf(stderr, "\n");
4133 break;
4134 }
4135 if (result) {
4136 _mesa_hash_table_insert(ctx->nir->defs, &instr->dest.ssa, result);
4137 }
4138 }
4139
4140 static LLVMValueRef get_sampler_desc(struct nir_to_llvm_context *ctx,
4141 const nir_deref_var *deref,
4142 enum desc_type desc_type)
4143 {
4144 unsigned desc_set = deref->var->data.descriptor_set;
4145 LLVMValueRef list = ctx->descriptor_sets[desc_set];
4146 struct radv_descriptor_set_layout *layout = ctx->options->layout->set[desc_set].layout;
4147 struct radv_descriptor_set_binding_layout *binding = layout->binding + deref->var->data.binding;
4148 unsigned offset = binding->offset;
4149 unsigned stride = binding->size;
4150 unsigned type_size;
4151 LLVMBuilderRef builder = ctx->builder;
4152 LLVMTypeRef type;
4153 LLVMValueRef index = NULL;
4154 unsigned constant_index = 0;
4155
4156 assert(deref->var->data.binding < layout->binding_count);
4157
4158 switch (desc_type) {
4159 case DESC_IMAGE:
4160 type = ctx->v8i32;
4161 type_size = 32;
4162 break;
4163 case DESC_FMASK:
4164 type = ctx->v8i32;
4165 offset += 32;
4166 type_size = 32;
4167 break;
4168 case DESC_SAMPLER:
4169 type = ctx->v4i32;
4170 if (binding->type == VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER)
4171 offset += 64;
4172
4173 type_size = 16;
4174 break;
4175 case DESC_BUFFER:
4176 type = ctx->v4i32;
4177 type_size = 16;
4178 break;
4179 default:
4180 unreachable("invalid desc_type\n");
4181 }
4182
4183 if (deref->deref.child) {
4184 const nir_deref_array *child =
4185 (const nir_deref_array *)deref->deref.child;
4186
4187 assert(child->deref_array_type != nir_deref_array_type_wildcard);
4188 offset += child->base_offset * stride;
4189 if (child->deref_array_type == nir_deref_array_type_indirect) {
4190 index = get_src(ctx->nir, child->indirect);
4191 }
4192
4193 constant_index = child->base_offset;
4194 }
4195 if (desc_type == DESC_SAMPLER && binding->immutable_samplers_offset &&
4196 (!index || binding->immutable_samplers_equal)) {
4197 if (binding->immutable_samplers_equal)
4198 constant_index = 0;
4199
4200 const uint32_t *samplers = radv_immutable_samplers(layout, binding);
4201
4202 LLVMValueRef constants[] = {
4203 LLVMConstInt(ctx->i32, samplers[constant_index * 4 + 0], 0),
4204 LLVMConstInt(ctx->i32, samplers[constant_index * 4 + 1], 0),
4205 LLVMConstInt(ctx->i32, samplers[constant_index * 4 + 2], 0),
4206 LLVMConstInt(ctx->i32, samplers[constant_index * 4 + 3], 0),
4207 };
4208 return ac_build_gather_values(&ctx->ac, constants, 4);
4209 }
4210
4211 assert(stride % type_size == 0);
4212
4213 if (!index)
4214 index = ctx->i32zero;
4215
4216 index = LLVMBuildMul(builder, index, LLVMConstInt(ctx->i32, stride / type_size, 0), "");
4217
4218 list = ac_build_gep0(&ctx->ac, list, LLVMConstInt(ctx->i32, offset, 0));
4219 list = LLVMBuildPointerCast(builder, list, const_array(type, 0), "");
4220
4221 return ac_build_indexed_load_const(&ctx->ac, list, index);
4222 }
4223
4224 static void set_tex_fetch_args(struct nir_to_llvm_context *ctx,
4225 struct ac_image_args *args,
4226 const nir_tex_instr *instr,
4227 nir_texop op,
4228 LLVMValueRef res_ptr, LLVMValueRef samp_ptr,
4229 LLVMValueRef *param, unsigned count,
4230 unsigned dmask)
4231 {
4232 unsigned is_rect = 0;
4233 bool da = instr->is_array || instr->sampler_dim == GLSL_SAMPLER_DIM_CUBE;
4234
4235 if (op == nir_texop_lod)
4236 da = false;
4237 /* Pad to power of two vector */
4238 while (count < util_next_power_of_two(count))
4239 param[count++] = LLVMGetUndef(ctx->i32);
4240
4241 if (count > 1)
4242 args->addr = ac_build_gather_values(&ctx->ac, param, count);
4243 else
4244 args->addr = param[0];
4245
4246 args->resource = res_ptr;
4247 args->sampler = samp_ptr;
4248
4249 if (instr->sampler_dim == GLSL_SAMPLER_DIM_BUF && op == nir_texop_txf) {
4250 args->addr = param[0];
4251 return;
4252 }
4253
4254 args->dmask = dmask;
4255 args->unorm = is_rect;
4256 args->da = da;
4257 }
4258
4259 /* Disable anisotropic filtering if BASE_LEVEL == LAST_LEVEL.
4260 *
4261 * SI-CI:
4262 * If BASE_LEVEL == LAST_LEVEL, the shader must disable anisotropic
4263 * filtering manually. The driver sets img7 to a mask clearing
4264 * MAX_ANISO_RATIO if BASE_LEVEL == LAST_LEVEL. The shader must do:
4265 * s_and_b32 samp0, samp0, img7
4266 *
4267 * VI:
4268 * The ANISO_OVERRIDE sampler field enables this fix in TA.
4269 */
4270 static LLVMValueRef sici_fix_sampler_aniso(struct nir_to_llvm_context *ctx,
4271 LLVMValueRef res, LLVMValueRef samp)
4272 {
4273 LLVMBuilderRef builder = ctx->builder;
4274 LLVMValueRef img7, samp0;
4275
4276 if (ctx->options->chip_class >= VI)
4277 return samp;
4278
4279 img7 = LLVMBuildExtractElement(builder, res,
4280 LLVMConstInt(ctx->i32, 7, 0), "");
4281 samp0 = LLVMBuildExtractElement(builder, samp,
4282 LLVMConstInt(ctx->i32, 0, 0), "");
4283 samp0 = LLVMBuildAnd(builder, samp0, img7, "");
4284 return LLVMBuildInsertElement(builder, samp, samp0,
4285 LLVMConstInt(ctx->i32, 0, 0), "");
4286 }
4287
4288 static void tex_fetch_ptrs(struct nir_to_llvm_context *ctx,
4289 nir_tex_instr *instr,
4290 LLVMValueRef *res_ptr, LLVMValueRef *samp_ptr,
4291 LLVMValueRef *fmask_ptr)
4292 {
4293 if (instr->sampler_dim == GLSL_SAMPLER_DIM_BUF)
4294 *res_ptr = get_sampler_desc(ctx, instr->texture, DESC_BUFFER);
4295 else
4296 *res_ptr = get_sampler_desc(ctx, instr->texture, DESC_IMAGE);
4297 if (samp_ptr) {
4298 if (instr->sampler)
4299 *samp_ptr = get_sampler_desc(ctx, instr->sampler, DESC_SAMPLER);
4300 else
4301 *samp_ptr = get_sampler_desc(ctx, instr->texture, DESC_SAMPLER);
4302 if (instr->sampler_dim < GLSL_SAMPLER_DIM_RECT)
4303 *samp_ptr = sici_fix_sampler_aniso(ctx, *res_ptr, *samp_ptr);
4304 }
4305 if (fmask_ptr && !instr->sampler && (instr->op == nir_texop_txf_ms ||
4306 instr->op == nir_texop_samples_identical))
4307 *fmask_ptr = get_sampler_desc(ctx, instr->texture, DESC_FMASK);
4308 }
4309
4310 static LLVMValueRef apply_round_slice(struct nir_to_llvm_context *ctx,
4311 LLVMValueRef coord)
4312 {
4313 coord = to_float(&ctx->ac, coord);
4314 coord = ac_build_intrinsic(&ctx->ac, "llvm.rint.f32", ctx->f32, &coord, 1, 0);
4315 coord = to_integer(&ctx->ac, coord);
4316 return coord;
4317 }
4318
4319 static void visit_tex(struct nir_to_llvm_context *ctx, nir_tex_instr *instr)
4320 {
4321 LLVMValueRef result = NULL;
4322 struct ac_image_args args = { 0 };
4323 unsigned dmask = 0xf;
4324 LLVMValueRef address[16];
4325 LLVMValueRef coords[5];
4326 LLVMValueRef coord = NULL, lod = NULL, comparator = NULL;
4327 LLVMValueRef bias = NULL, offsets = NULL;
4328 LLVMValueRef res_ptr, samp_ptr, fmask_ptr = NULL, sample_index = NULL;
4329 LLVMValueRef ddx = NULL, ddy = NULL;
4330 LLVMValueRef derivs[6];
4331 unsigned chan, count = 0;
4332 unsigned const_src = 0, num_deriv_comp = 0;
4333 bool lod_is_zero = false;
4334 tex_fetch_ptrs(ctx, instr, &res_ptr, &samp_ptr, &fmask_ptr);
4335
4336 for (unsigned i = 0; i < instr->num_srcs; i++) {
4337 switch (instr->src[i].src_type) {
4338 case nir_tex_src_coord:
4339 coord = get_src(ctx->nir, instr->src[i].src);
4340 break;
4341 case nir_tex_src_projector:
4342 break;
4343 case nir_tex_src_comparator:
4344 comparator = get_src(ctx->nir, instr->src[i].src);
4345 break;
4346 case nir_tex_src_offset:
4347 offsets = get_src(ctx->nir, instr->src[i].src);
4348 const_src = i;
4349 break;
4350 case nir_tex_src_bias:
4351 bias = get_src(ctx->nir, instr->src[i].src);
4352 break;
4353 case nir_tex_src_lod: {
4354 nir_const_value *val = nir_src_as_const_value(instr->src[i].src);
4355
4356 if (val && val->i32[0] == 0)
4357 lod_is_zero = true;
4358 lod = get_src(ctx->nir, instr->src[i].src);
4359 break;
4360 }
4361 case nir_tex_src_ms_index:
4362 sample_index = get_src(ctx->nir, instr->src[i].src);
4363 break;
4364 case nir_tex_src_ms_mcs:
4365 break;
4366 case nir_tex_src_ddx:
4367 ddx = get_src(ctx->nir, instr->src[i].src);
4368 num_deriv_comp = instr->src[i].src.ssa->num_components;
4369 break;
4370 case nir_tex_src_ddy:
4371 ddy = get_src(ctx->nir, instr->src[i].src);
4372 break;
4373 case nir_tex_src_texture_offset:
4374 case nir_tex_src_sampler_offset:
4375 case nir_tex_src_plane:
4376 default:
4377 break;
4378 }
4379 }
4380
4381 if (instr->op == nir_texop_txs && instr->sampler_dim == GLSL_SAMPLER_DIM_BUF) {
4382 result = get_buffer_size(ctx, res_ptr, true);
4383 goto write_result;
4384 }
4385
4386 if (instr->op == nir_texop_texture_samples) {
4387 LLVMValueRef res, samples, is_msaa;
4388 res = LLVMBuildBitCast(ctx->builder, res_ptr, ctx->v8i32, "");
4389 samples = LLVMBuildExtractElement(ctx->builder, res,
4390 LLVMConstInt(ctx->i32, 3, false), "");
4391 is_msaa = LLVMBuildLShr(ctx->builder, samples,
4392 LLVMConstInt(ctx->i32, 28, false), "");
4393 is_msaa = LLVMBuildAnd(ctx->builder, is_msaa,
4394 LLVMConstInt(ctx->i32, 0xe, false), "");
4395 is_msaa = LLVMBuildICmp(ctx->builder, LLVMIntEQ, is_msaa,
4396 LLVMConstInt(ctx->i32, 0xe, false), "");
4397
4398 samples = LLVMBuildLShr(ctx->builder, samples,
4399 LLVMConstInt(ctx->i32, 16, false), "");
4400 samples = LLVMBuildAnd(ctx->builder, samples,
4401 LLVMConstInt(ctx->i32, 0xf, false), "");
4402 samples = LLVMBuildShl(ctx->builder, ctx->i32one,
4403 samples, "");
4404 samples = LLVMBuildSelect(ctx->builder, is_msaa, samples,
4405 ctx->i32one, "");
4406 result = samples;
4407 goto write_result;
4408 }
4409
4410 if (coord)
4411 for (chan = 0; chan < instr->coord_components; chan++)
4412 coords[chan] = llvm_extract_elem(ctx, coord, chan);
4413
4414 if (offsets && instr->op != nir_texop_txf) {
4415 LLVMValueRef offset[3], pack;
4416 for (chan = 0; chan < 3; ++chan)
4417 offset[chan] = ctx->i32zero;
4418
4419 args.offset = true;
4420 for (chan = 0; chan < get_llvm_num_components(offsets); chan++) {
4421 offset[chan] = llvm_extract_elem(ctx, offsets, chan);
4422 offset[chan] = LLVMBuildAnd(ctx->builder, offset[chan],
4423 LLVMConstInt(ctx->i32, 0x3f, false), "");
4424 if (chan)
4425 offset[chan] = LLVMBuildShl(ctx->builder, offset[chan],
4426 LLVMConstInt(ctx->i32, chan * 8, false), "");
4427 }
4428 pack = LLVMBuildOr(ctx->builder, offset[0], offset[1], "");
4429 pack = LLVMBuildOr(ctx->builder, pack, offset[2], "");
4430 address[count++] = pack;
4431
4432 }
4433 /* pack LOD bias value */
4434 if (instr->op == nir_texop_txb && bias) {
4435 address[count++] = bias;
4436 }
4437
4438 /* Pack depth comparison value */
4439 if (instr->is_shadow && comparator) {
4440 address[count++] = llvm_extract_elem(ctx, comparator, 0);
4441 }
4442
4443 /* pack derivatives */
4444 if (ddx || ddy) {
4445 switch (instr->sampler_dim) {
4446 case GLSL_SAMPLER_DIM_3D:
4447 case GLSL_SAMPLER_DIM_CUBE:
4448 num_deriv_comp = 3;
4449 break;
4450 case GLSL_SAMPLER_DIM_2D:
4451 default:
4452 num_deriv_comp = 2;
4453 break;
4454 case GLSL_SAMPLER_DIM_1D:
4455 num_deriv_comp = 1;
4456 break;
4457 }
4458
4459 for (unsigned i = 0; i < num_deriv_comp; i++) {
4460 derivs[i] = to_float(&ctx->ac, llvm_extract_elem(ctx, ddx, i));
4461 derivs[num_deriv_comp + i] = to_float(&ctx->ac, llvm_extract_elem(ctx, ddy, i));
4462 }
4463 }
4464
4465 if (instr->sampler_dim == GLSL_SAMPLER_DIM_CUBE && coord) {
4466 if (instr->is_array && instr->op != nir_texop_lod)
4467 coords[3] = apply_round_slice(ctx, coords[3]);
4468 for (chan = 0; chan < instr->coord_components; chan++)
4469 coords[chan] = to_float(&ctx->ac, coords[chan]);
4470 if (instr->coord_components == 3)
4471 coords[3] = LLVMGetUndef(ctx->f32);
4472 ac_prepare_cube_coords(&ctx->ac,
4473 instr->op == nir_texop_txd, instr->is_array,
4474 coords, derivs);
4475 if (num_deriv_comp)
4476 num_deriv_comp--;
4477 }
4478
4479 if (ddx || ddy) {
4480 for (unsigned i = 0; i < num_deriv_comp * 2; i++)
4481 address[count++] = derivs[i];
4482 }
4483
4484 /* Pack texture coordinates */
4485 if (coord) {
4486 address[count++] = coords[0];
4487 if (instr->coord_components > 1) {
4488 if (instr->sampler_dim == GLSL_SAMPLER_DIM_1D && instr->is_array && instr->op != nir_texop_txf) {
4489 coords[1] = apply_round_slice(ctx, coords[1]);
4490 }
4491 address[count++] = coords[1];
4492 }
4493 if (instr->coord_components > 2) {
4494 /* This seems like a bit of a hack - but it passes Vulkan CTS with it */
4495 if (instr->sampler_dim != GLSL_SAMPLER_DIM_3D &&
4496 instr->sampler_dim != GLSL_SAMPLER_DIM_CUBE &&
4497 instr->op != nir_texop_txf) {
4498 coords[2] = apply_round_slice(ctx, coords[2]);
4499 }
4500 address[count++] = coords[2];
4501 }
4502 }
4503
4504 /* Pack LOD */
4505 if (lod && ((instr->op == nir_texop_txl && !lod_is_zero) ||
4506 instr->op == nir_texop_txf)) {
4507 address[count++] = lod;
4508 } else if (instr->op == nir_texop_txf_ms && sample_index) {
4509 address[count++] = sample_index;
4510 } else if(instr->op == nir_texop_txs) {
4511 count = 0;
4512 if (lod)
4513 address[count++] = lod;
4514 else
4515 address[count++] = ctx->i32zero;
4516 }
4517
4518 for (chan = 0; chan < count; chan++) {
4519 address[chan] = LLVMBuildBitCast(ctx->builder,
4520 address[chan], ctx->i32, "");
4521 }
4522
4523 if (instr->op == nir_texop_samples_identical) {
4524 LLVMValueRef txf_address[4];
4525 struct ac_image_args txf_args = { 0 };
4526 unsigned txf_count = count;
4527 memcpy(txf_address, address, sizeof(txf_address));
4528
4529 if (!instr->is_array)
4530 txf_address[2] = ctx->i32zero;
4531 txf_address[3] = ctx->i32zero;
4532
4533 set_tex_fetch_args(ctx, &txf_args, instr, nir_texop_txf,
4534 fmask_ptr, NULL,
4535 txf_address, txf_count, 0xf);
4536
4537 result = build_tex_intrinsic(ctx, instr, false, &txf_args);
4538
4539 result = LLVMBuildExtractElement(ctx->builder, result, ctx->i32zero, "");
4540 result = emit_int_cmp(&ctx->ac, LLVMIntEQ, result, ctx->i32zero);
4541 goto write_result;
4542 }
4543
4544 if (instr->sampler_dim == GLSL_SAMPLER_DIM_MS &&
4545 instr->op != nir_texop_txs) {
4546 unsigned sample_chan = instr->is_array ? 3 : 2;
4547 address[sample_chan] = adjust_sample_index_using_fmask(ctx,
4548 address[0],
4549 address[1],
4550 instr->is_array ? address[2] : NULL,
4551 address[sample_chan],
4552 fmask_ptr);
4553 }
4554
4555 if (offsets && instr->op == nir_texop_txf) {
4556 nir_const_value *const_offset =
4557 nir_src_as_const_value(instr->src[const_src].src);
4558 int num_offsets = instr->src[const_src].src.ssa->num_components;
4559 assert(const_offset);
4560 num_offsets = MIN2(num_offsets, instr->coord_components);
4561 if (num_offsets > 2)
4562 address[2] = LLVMBuildAdd(ctx->builder,
4563 address[2], LLVMConstInt(ctx->i32, const_offset->i32[2], false), "");
4564 if (num_offsets > 1)
4565 address[1] = LLVMBuildAdd(ctx->builder,
4566 address[1], LLVMConstInt(ctx->i32, const_offset->i32[1], false), "");
4567 address[0] = LLVMBuildAdd(ctx->builder,
4568 address[0], LLVMConstInt(ctx->i32, const_offset->i32[0], false), "");
4569
4570 }
4571
4572 /* TODO TG4 support */
4573 if (instr->op == nir_texop_tg4) {
4574 if (instr->is_shadow)
4575 dmask = 1;
4576 else
4577 dmask = 1 << instr->component;
4578 }
4579 set_tex_fetch_args(ctx, &args, instr, instr->op,
4580 res_ptr, samp_ptr, address, count, dmask);
4581
4582 result = build_tex_intrinsic(ctx, instr, lod_is_zero, &args);
4583
4584 if (instr->op == nir_texop_query_levels)
4585 result = LLVMBuildExtractElement(ctx->builder, result, LLVMConstInt(ctx->i32, 3, false), "");
4586 else if (instr->is_shadow && instr->op != nir_texop_txs && instr->op != nir_texop_lod && instr->op != nir_texop_tg4)
4587 result = LLVMBuildExtractElement(ctx->builder, result, ctx->i32zero, "");
4588 else if (instr->op == nir_texop_txs &&
4589 instr->sampler_dim == GLSL_SAMPLER_DIM_CUBE &&
4590 instr->is_array) {
4591 LLVMValueRef two = LLVMConstInt(ctx->i32, 2, false);
4592 LLVMValueRef six = LLVMConstInt(ctx->i32, 6, false);
4593 LLVMValueRef z = LLVMBuildExtractElement(ctx->builder, result, two, "");
4594 z = LLVMBuildSDiv(ctx->builder, z, six, "");
4595 result = LLVMBuildInsertElement(ctx->builder, result, z, two, "");
4596 } else if (instr->dest.ssa.num_components != 4)
4597 result = trim_vector(ctx, result, instr->dest.ssa.num_components);
4598
4599 write_result:
4600 if (result) {
4601 assert(instr->dest.is_ssa);
4602 result = to_integer(&ctx->ac, result);
4603 _mesa_hash_table_insert(ctx->nir->defs, &instr->dest.ssa, result);
4604 }
4605 }
4606
4607
4608 static void visit_phi(struct ac_nir_context *ctx, nir_phi_instr *instr)
4609 {
4610 LLVMTypeRef type = get_def_type(ctx, &instr->dest.ssa);
4611 LLVMValueRef result = LLVMBuildPhi(ctx->ac.builder, type, "");
4612
4613 _mesa_hash_table_insert(ctx->defs, &instr->dest.ssa, result);
4614 _mesa_hash_table_insert(ctx->phis, instr, result);
4615 }
4616
4617 static void visit_post_phi(struct ac_nir_context *ctx,
4618 nir_phi_instr *instr,
4619 LLVMValueRef llvm_phi)
4620 {
4621 nir_foreach_phi_src(src, instr) {
4622 LLVMBasicBlockRef block = get_block(ctx, src->pred);
4623 LLVMValueRef llvm_src = get_src(ctx, src->src);
4624
4625 LLVMAddIncoming(llvm_phi, &llvm_src, &block, 1);
4626 }
4627 }
4628
4629 static void phi_post_pass(struct ac_nir_context *ctx)
4630 {
4631 struct hash_entry *entry;
4632 hash_table_foreach(ctx->phis, entry) {
4633 visit_post_phi(ctx, (nir_phi_instr*)entry->key,
4634 (LLVMValueRef)entry->data);
4635 }
4636 }
4637
4638
4639 static void visit_ssa_undef(struct ac_nir_context *ctx,
4640 const nir_ssa_undef_instr *instr)
4641 {
4642 unsigned num_components = instr->def.num_components;
4643 LLVMValueRef undef;
4644
4645 if (num_components == 1)
4646 undef = LLVMGetUndef(ctx->ac.i32);
4647 else {
4648 undef = LLVMGetUndef(LLVMVectorType(ctx->ac.i32, num_components));
4649 }
4650 _mesa_hash_table_insert(ctx->defs, &instr->def, undef);
4651 }
4652
4653 static void visit_jump(struct ac_nir_context *ctx,
4654 const nir_jump_instr *instr)
4655 {
4656 switch (instr->type) {
4657 case nir_jump_break:
4658 LLVMBuildBr(ctx->ac.builder, ctx->break_block);
4659 LLVMClearInsertionPosition(ctx->ac.builder);
4660 break;
4661 case nir_jump_continue:
4662 LLVMBuildBr(ctx->ac.builder, ctx->continue_block);
4663 LLVMClearInsertionPosition(ctx->ac.builder);
4664 break;
4665 default:
4666 fprintf(stderr, "Unknown NIR jump instr: ");
4667 nir_print_instr(&instr->instr, stderr);
4668 fprintf(stderr, "\n");
4669 abort();
4670 }
4671 }
4672
4673 static void visit_cf_list(struct ac_nir_context *ctx,
4674 struct exec_list *list);
4675
4676 static void visit_block(struct ac_nir_context *ctx, nir_block *block)
4677 {
4678 LLVMBasicBlockRef llvm_block = LLVMGetInsertBlock(ctx->ac.builder);
4679 nir_foreach_instr(instr, block)
4680 {
4681 switch (instr->type) {
4682 case nir_instr_type_alu:
4683 visit_alu(ctx, nir_instr_as_alu(instr));
4684 break;
4685 case nir_instr_type_load_const:
4686 visit_load_const(ctx, nir_instr_as_load_const(instr));
4687 break;
4688 case nir_instr_type_intrinsic:
4689 visit_intrinsic(ctx->nctx, nir_instr_as_intrinsic(instr));
4690 break;
4691 case nir_instr_type_tex:
4692 visit_tex(ctx->nctx, nir_instr_as_tex(instr));
4693 break;
4694 case nir_instr_type_phi:
4695 visit_phi(ctx, nir_instr_as_phi(instr));
4696 break;
4697 case nir_instr_type_ssa_undef:
4698 visit_ssa_undef(ctx, nir_instr_as_ssa_undef(instr));
4699 break;
4700 case nir_instr_type_jump:
4701 visit_jump(ctx, nir_instr_as_jump(instr));
4702 break;
4703 default:
4704 fprintf(stderr, "Unknown NIR instr type: ");
4705 nir_print_instr(instr, stderr);
4706 fprintf(stderr, "\n");
4707 abort();
4708 }
4709 }
4710
4711 _mesa_hash_table_insert(ctx->defs, block, llvm_block);
4712 }
4713
4714 static void visit_if(struct ac_nir_context *ctx, nir_if *if_stmt)
4715 {
4716 LLVMValueRef value = get_src(ctx, if_stmt->condition);
4717
4718 LLVMValueRef fn = LLVMGetBasicBlockParent(LLVMGetInsertBlock(ctx->ac.builder));
4719 LLVMBasicBlockRef merge_block =
4720 LLVMAppendBasicBlockInContext(ctx->ac.context, fn, "");
4721 LLVMBasicBlockRef if_block =
4722 LLVMAppendBasicBlockInContext(ctx->ac.context, fn, "");
4723 LLVMBasicBlockRef else_block = merge_block;
4724 if (!exec_list_is_empty(&if_stmt->else_list))
4725 else_block = LLVMAppendBasicBlockInContext(
4726 ctx->ac.context, fn, "");
4727
4728 LLVMValueRef cond = LLVMBuildICmp(ctx->ac.builder, LLVMIntNE, value,
4729 LLVMConstInt(ctx->ac.i32, 0, false), "");
4730 LLVMBuildCondBr(ctx->ac.builder, cond, if_block, else_block);
4731
4732 LLVMPositionBuilderAtEnd(ctx->ac.builder, if_block);
4733 visit_cf_list(ctx, &if_stmt->then_list);
4734 if (LLVMGetInsertBlock(ctx->ac.builder))
4735 LLVMBuildBr(ctx->ac.builder, merge_block);
4736
4737 if (!exec_list_is_empty(&if_stmt->else_list)) {
4738 LLVMPositionBuilderAtEnd(ctx->ac.builder, else_block);
4739 visit_cf_list(ctx, &if_stmt->else_list);
4740 if (LLVMGetInsertBlock(ctx->ac.builder))
4741 LLVMBuildBr(ctx->ac.builder, merge_block);
4742 }
4743
4744 LLVMPositionBuilderAtEnd(ctx->ac.builder, merge_block);
4745 }
4746
4747 static void visit_loop(struct ac_nir_context *ctx, nir_loop *loop)
4748 {
4749 LLVMValueRef fn = LLVMGetBasicBlockParent(LLVMGetInsertBlock(ctx->ac.builder));
4750 LLVMBasicBlockRef continue_parent = ctx->continue_block;
4751 LLVMBasicBlockRef break_parent = ctx->break_block;
4752
4753 ctx->continue_block =
4754 LLVMAppendBasicBlockInContext(ctx->ac.context, fn, "");
4755 ctx->break_block =
4756 LLVMAppendBasicBlockInContext(ctx->ac.context, fn, "");
4757
4758 LLVMBuildBr(ctx->ac.builder, ctx->continue_block);
4759 LLVMPositionBuilderAtEnd(ctx->ac.builder, ctx->continue_block);
4760 visit_cf_list(ctx, &loop->body);
4761
4762 if (LLVMGetInsertBlock(ctx->ac.builder))
4763 LLVMBuildBr(ctx->ac.builder, ctx->continue_block);
4764 LLVMPositionBuilderAtEnd(ctx->ac.builder, ctx->break_block);
4765
4766 ctx->continue_block = continue_parent;
4767 ctx->break_block = break_parent;
4768 }
4769
4770 static void visit_cf_list(struct ac_nir_context *ctx,
4771 struct exec_list *list)
4772 {
4773 foreach_list_typed(nir_cf_node, node, node, list)
4774 {
4775 switch (node->type) {
4776 case nir_cf_node_block:
4777 visit_block(ctx, nir_cf_node_as_block(node));
4778 break;
4779
4780 case nir_cf_node_if:
4781 visit_if(ctx, nir_cf_node_as_if(node));
4782 break;
4783
4784 case nir_cf_node_loop:
4785 visit_loop(ctx, nir_cf_node_as_loop(node));
4786 break;
4787
4788 default:
4789 assert(0);
4790 }
4791 }
4792 }
4793
4794 static void
4795 handle_vs_input_decl(struct nir_to_llvm_context *ctx,
4796 struct nir_variable *variable)
4797 {
4798 LLVMValueRef t_list_ptr = ctx->vertex_buffers;
4799 LLVMValueRef t_offset;
4800 LLVMValueRef t_list;
4801 LLVMValueRef input;
4802 LLVMValueRef buffer_index;
4803 int index = variable->data.location - VERT_ATTRIB_GENERIC0;
4804 int idx = variable->data.location;
4805 unsigned attrib_count = glsl_count_attribute_slots(variable->type, true);
4806
4807 variable->data.driver_location = idx * 4;
4808
4809 if (ctx->options->key.vs.instance_rate_inputs & (1u << index)) {
4810 buffer_index = LLVMBuildAdd(ctx->builder, ctx->abi.instance_id,
4811 ctx->abi.start_instance, "");
4812 ctx->shader_info->vs.vgpr_comp_cnt = MAX2(3,
4813 ctx->shader_info->vs.vgpr_comp_cnt);
4814 } else
4815 buffer_index = LLVMBuildAdd(ctx->builder, ctx->abi.vertex_id,
4816 ctx->abi.base_vertex, "");
4817
4818 for (unsigned i = 0; i < attrib_count; ++i, ++idx) {
4819 t_offset = LLVMConstInt(ctx->i32, index + i, false);
4820
4821 t_list = ac_build_indexed_load_const(&ctx->ac, t_list_ptr, t_offset);
4822
4823 input = ac_build_buffer_load_format(&ctx->ac, t_list,
4824 buffer_index,
4825 LLVMConstInt(ctx->i32, 0, false),
4826 true);
4827
4828 for (unsigned chan = 0; chan < 4; chan++) {
4829 LLVMValueRef llvm_chan = LLVMConstInt(ctx->i32, chan, false);
4830 ctx->inputs[radeon_llvm_reg_index_soa(idx, chan)] =
4831 to_integer(&ctx->ac, LLVMBuildExtractElement(ctx->builder,
4832 input, llvm_chan, ""));
4833 }
4834 }
4835 }
4836
4837 static void interp_fs_input(struct nir_to_llvm_context *ctx,
4838 unsigned attr,
4839 LLVMValueRef interp_param,
4840 LLVMValueRef prim_mask,
4841 LLVMValueRef result[4])
4842 {
4843 LLVMValueRef attr_number;
4844 unsigned chan;
4845 LLVMValueRef i, j;
4846 bool interp = interp_param != NULL;
4847
4848 attr_number = LLVMConstInt(ctx->i32, attr, false);
4849
4850 /* fs.constant returns the param from the middle vertex, so it's not
4851 * really useful for flat shading. It's meant to be used for custom
4852 * interpolation (but the intrinsic can't fetch from the other two
4853 * vertices).
4854 *
4855 * Luckily, it doesn't matter, because we rely on the FLAT_SHADE state
4856 * to do the right thing. The only reason we use fs.constant is that
4857 * fs.interp cannot be used on integers, because they can be equal
4858 * to NaN.
4859 */
4860 if (interp) {
4861 interp_param = LLVMBuildBitCast(ctx->builder, interp_param,
4862 LLVMVectorType(ctx->f32, 2), "");
4863
4864 i = LLVMBuildExtractElement(ctx->builder, interp_param,
4865 ctx->i32zero, "");
4866 j = LLVMBuildExtractElement(ctx->builder, interp_param,
4867 ctx->i32one, "");
4868 }
4869
4870 for (chan = 0; chan < 4; chan++) {
4871 LLVMValueRef llvm_chan = LLVMConstInt(ctx->i32, chan, false);
4872
4873 if (interp) {
4874 result[chan] = ac_build_fs_interp(&ctx->ac,
4875 llvm_chan,
4876 attr_number,
4877 prim_mask, i, j);
4878 } else {
4879 result[chan] = ac_build_fs_interp_mov(&ctx->ac,
4880 LLVMConstInt(ctx->i32, 2, false),
4881 llvm_chan,
4882 attr_number,
4883 prim_mask);
4884 }
4885 }
4886 }
4887
4888 static void
4889 handle_fs_input_decl(struct nir_to_llvm_context *ctx,
4890 struct nir_variable *variable)
4891 {
4892 int idx = variable->data.location;
4893 unsigned attrib_count = glsl_count_attribute_slots(variable->type, false);
4894 LLVMValueRef interp;
4895
4896 variable->data.driver_location = idx * 4;
4897 ctx->input_mask |= ((1ull << attrib_count) - 1) << variable->data.location;
4898
4899 if (glsl_get_base_type(glsl_without_array(variable->type)) == GLSL_TYPE_FLOAT) {
4900 unsigned interp_type;
4901 if (variable->data.sample) {
4902 interp_type = INTERP_SAMPLE;
4903 ctx->shader_info->fs.force_persample = true;
4904 } else if (variable->data.centroid)
4905 interp_type = INTERP_CENTROID;
4906 else
4907 interp_type = INTERP_CENTER;
4908
4909 interp = lookup_interp_param(ctx, variable->data.interpolation, interp_type);
4910 } else
4911 interp = NULL;
4912
4913 for (unsigned i = 0; i < attrib_count; ++i)
4914 ctx->inputs[radeon_llvm_reg_index_soa(idx + i, 0)] = interp;
4915
4916 }
4917
4918 static void
4919 handle_shader_input_decl(struct nir_to_llvm_context *ctx,
4920 struct nir_variable *variable)
4921 {
4922 switch (ctx->stage) {
4923 case MESA_SHADER_VERTEX:
4924 handle_vs_input_decl(ctx, variable);
4925 break;
4926 case MESA_SHADER_FRAGMENT:
4927 handle_fs_input_decl(ctx, variable);
4928 break;
4929 default:
4930 break;
4931 }
4932
4933 }
4934
4935 static void
4936 handle_fs_inputs_pre(struct nir_to_llvm_context *ctx,
4937 struct nir_shader *nir)
4938 {
4939 unsigned index = 0;
4940 for (unsigned i = 0; i < RADEON_LLVM_MAX_INPUTS; ++i) {
4941 LLVMValueRef interp_param;
4942 LLVMValueRef *inputs = ctx->inputs +radeon_llvm_reg_index_soa(i, 0);
4943
4944 if (!(ctx->input_mask & (1ull << i)))
4945 continue;
4946
4947 if (i >= VARYING_SLOT_VAR0 || i == VARYING_SLOT_PNTC ||
4948 i == VARYING_SLOT_PRIMITIVE_ID || i == VARYING_SLOT_LAYER) {
4949 interp_param = *inputs;
4950 interp_fs_input(ctx, index, interp_param, ctx->prim_mask,
4951 inputs);
4952
4953 if (!interp_param)
4954 ctx->shader_info->fs.flat_shaded_mask |= 1u << index;
4955 ++index;
4956 } else if (i == VARYING_SLOT_POS) {
4957 for(int i = 0; i < 3; ++i)
4958 inputs[i] = ctx->frag_pos[i];
4959
4960 inputs[3] = ac_build_fdiv(&ctx->ac, ctx->f32one, ctx->frag_pos[3]);
4961 }
4962 }
4963 ctx->shader_info->fs.num_interp = index;
4964 if (ctx->input_mask & (1 << VARYING_SLOT_PNTC))
4965 ctx->shader_info->fs.has_pcoord = true;
4966 if (ctx->input_mask & (1 << VARYING_SLOT_PRIMITIVE_ID))
4967 ctx->shader_info->fs.prim_id_input = true;
4968 if (ctx->input_mask & (1 << VARYING_SLOT_LAYER))
4969 ctx->shader_info->fs.layer_input = true;
4970 ctx->shader_info->fs.input_mask = ctx->input_mask >> VARYING_SLOT_VAR0;
4971 }
4972
4973 static LLVMValueRef
4974 ac_build_alloca(struct ac_llvm_context *ac,
4975 LLVMTypeRef type,
4976 const char *name)
4977 {
4978 LLVMBuilderRef builder = ac->builder;
4979 LLVMBasicBlockRef current_block = LLVMGetInsertBlock(builder);
4980 LLVMValueRef function = LLVMGetBasicBlockParent(current_block);
4981 LLVMBasicBlockRef first_block = LLVMGetEntryBasicBlock(function);
4982 LLVMValueRef first_instr = LLVMGetFirstInstruction(first_block);
4983 LLVMBuilderRef first_builder = LLVMCreateBuilderInContext(ac->context);
4984 LLVMValueRef res;
4985
4986 if (first_instr) {
4987 LLVMPositionBuilderBefore(first_builder, first_instr);
4988 } else {
4989 LLVMPositionBuilderAtEnd(first_builder, first_block);
4990 }
4991
4992 res = LLVMBuildAlloca(first_builder, type, name);
4993 LLVMBuildStore(builder, LLVMConstNull(type), res);
4994
4995 LLVMDisposeBuilder(first_builder);
4996
4997 return res;
4998 }
4999
5000 static LLVMValueRef si_build_alloca_undef(struct ac_llvm_context *ac,
5001 LLVMTypeRef type,
5002 const char *name)
5003 {
5004 LLVMValueRef ptr = ac_build_alloca(ac, type, name);
5005 LLVMBuildStore(ac->builder, LLVMGetUndef(type), ptr);
5006 return ptr;
5007 }
5008
5009 static void
5010 scan_shader_output_decl(struct nir_to_llvm_context *ctx,
5011 struct nir_variable *variable)
5012 {
5013 int idx = variable->data.location + variable->data.index;
5014 unsigned attrib_count = glsl_count_attribute_slots(variable->type, false);
5015 uint64_t mask_attribs;
5016
5017 variable->data.driver_location = idx * 4;
5018
5019 /* tess ctrl has it's own load/store paths for outputs */
5020 if (ctx->stage == MESA_SHADER_TESS_CTRL)
5021 return;
5022
5023 mask_attribs = ((1ull << attrib_count) - 1) << idx;
5024 if (ctx->stage == MESA_SHADER_VERTEX ||
5025 ctx->stage == MESA_SHADER_TESS_EVAL ||
5026 ctx->stage == MESA_SHADER_GEOMETRY) {
5027 if (idx == VARYING_SLOT_CLIP_DIST0) {
5028 int length = ctx->num_output_clips + ctx->num_output_culls;
5029 if (ctx->stage == MESA_SHADER_VERTEX) {
5030 ctx->shader_info->vs.outinfo.clip_dist_mask = (1 << ctx->num_output_clips) - 1;
5031 ctx->shader_info->vs.outinfo.cull_dist_mask = (1 << ctx->num_output_culls) - 1;
5032 }
5033 if (ctx->stage == MESA_SHADER_TESS_EVAL) {
5034 ctx->shader_info->tes.outinfo.clip_dist_mask = (1 << ctx->num_output_clips) - 1;
5035 ctx->shader_info->tes.outinfo.cull_dist_mask = (1 << ctx->num_output_culls) - 1;
5036 }
5037
5038 if (length > 4)
5039 attrib_count = 2;
5040 else
5041 attrib_count = 1;
5042 mask_attribs = 1ull << idx;
5043 }
5044 }
5045
5046 ctx->output_mask |= mask_attribs;
5047 }
5048
5049 static void
5050 handle_shader_output_decl(struct ac_nir_context *ctx,
5051 struct nir_shader *nir,
5052 struct nir_variable *variable)
5053 {
5054 unsigned output_loc = variable->data.driver_location / 4;
5055 unsigned attrib_count = glsl_count_attribute_slots(variable->type, false);
5056
5057 /* tess ctrl has it's own load/store paths for outputs */
5058 if (ctx->stage == MESA_SHADER_TESS_CTRL)
5059 return;
5060
5061 if (ctx->stage == MESA_SHADER_VERTEX ||
5062 ctx->stage == MESA_SHADER_TESS_EVAL ||
5063 ctx->stage == MESA_SHADER_GEOMETRY) {
5064 int idx = variable->data.location + variable->data.index;
5065 if (idx == VARYING_SLOT_CLIP_DIST0) {
5066 int length = nir->info.clip_distance_array_size +
5067 nir->info.cull_distance_array_size;
5068
5069 if (length > 4)
5070 attrib_count = 2;
5071 else
5072 attrib_count = 1;
5073 }
5074 }
5075
5076 for (unsigned i = 0; i < attrib_count; ++i) {
5077 for (unsigned chan = 0; chan < 4; chan++) {
5078 ctx->outputs[radeon_llvm_reg_index_soa(output_loc + i, chan)] =
5079 si_build_alloca_undef(&ctx->ac, ctx->ac.f32, "");
5080 }
5081 }
5082 }
5083
5084 static LLVMTypeRef
5085 glsl_base_to_llvm_type(struct nir_to_llvm_context *ctx,
5086 enum glsl_base_type type)
5087 {
5088 switch (type) {
5089 case GLSL_TYPE_INT:
5090 case GLSL_TYPE_UINT:
5091 case GLSL_TYPE_BOOL:
5092 case GLSL_TYPE_SUBROUTINE:
5093 return ctx->i32;
5094 case GLSL_TYPE_FLOAT: /* TODO handle mediump */
5095 return ctx->f32;
5096 case GLSL_TYPE_INT64:
5097 case GLSL_TYPE_UINT64:
5098 return ctx->i64;
5099 case GLSL_TYPE_DOUBLE:
5100 return ctx->f64;
5101 default:
5102 unreachable("unknown GLSL type");
5103 }
5104 }
5105
5106 static LLVMTypeRef
5107 glsl_to_llvm_type(struct nir_to_llvm_context *ctx,
5108 const struct glsl_type *type)
5109 {
5110 if (glsl_type_is_scalar(type)) {
5111 return glsl_base_to_llvm_type(ctx, glsl_get_base_type(type));
5112 }
5113
5114 if (glsl_type_is_vector(type)) {
5115 return LLVMVectorType(
5116 glsl_base_to_llvm_type(ctx, glsl_get_base_type(type)),
5117 glsl_get_vector_elements(type));
5118 }
5119
5120 if (glsl_type_is_matrix(type)) {
5121 return LLVMArrayType(
5122 glsl_to_llvm_type(ctx, glsl_get_column_type(type)),
5123 glsl_get_matrix_columns(type));
5124 }
5125
5126 if (glsl_type_is_array(type)) {
5127 return LLVMArrayType(
5128 glsl_to_llvm_type(ctx, glsl_get_array_element(type)),
5129 glsl_get_length(type));
5130 }
5131
5132 assert(glsl_type_is_struct(type));
5133
5134 LLVMTypeRef member_types[glsl_get_length(type)];
5135
5136 for (unsigned i = 0; i < glsl_get_length(type); i++) {
5137 member_types[i] =
5138 glsl_to_llvm_type(ctx,
5139 glsl_get_struct_field(type, i));
5140 }
5141
5142 return LLVMStructTypeInContext(ctx->context, member_types,
5143 glsl_get_length(type), false);
5144 }
5145
5146 static void
5147 setup_locals(struct ac_nir_context *ctx,
5148 struct nir_function *func)
5149 {
5150 int i, j;
5151 ctx->num_locals = 0;
5152 nir_foreach_variable(variable, &func->impl->locals) {
5153 unsigned attrib_count = glsl_count_attribute_slots(variable->type, false);
5154 variable->data.driver_location = ctx->num_locals * 4;
5155 ctx->num_locals += attrib_count;
5156 }
5157 ctx->locals = malloc(4 * ctx->num_locals * sizeof(LLVMValueRef));
5158 if (!ctx->locals)
5159 return;
5160
5161 for (i = 0; i < ctx->num_locals; i++) {
5162 for (j = 0; j < 4; j++) {
5163 ctx->locals[i * 4 + j] =
5164 si_build_alloca_undef(&ctx->ac, ctx->ac.f32, "temp");
5165 }
5166 }
5167 }
5168
5169 static void
5170 setup_shared(struct ac_nir_context *ctx,
5171 struct nir_shader *nir)
5172 {
5173 nir_foreach_variable(variable, &nir->shared) {
5174 LLVMValueRef shared =
5175 LLVMAddGlobalInAddressSpace(
5176 ctx->ac.module, glsl_to_llvm_type(ctx->nctx, variable->type),
5177 variable->name ? variable->name : "",
5178 LOCAL_ADDR_SPACE);
5179 _mesa_hash_table_insert(ctx->vars, variable, shared);
5180 }
5181 }
5182
5183 static LLVMValueRef
5184 emit_float_saturate(struct ac_llvm_context *ctx, LLVMValueRef v, float lo, float hi)
5185 {
5186 v = to_float(ctx, v);
5187 v = emit_intrin_2f_param(ctx, "llvm.maxnum.f32", ctx->f32, v, LLVMConstReal(ctx->f32, lo));
5188 return emit_intrin_2f_param(ctx, "llvm.minnum.f32", ctx->f32, v, LLVMConstReal(ctx->f32, hi));
5189 }
5190
5191
5192 static LLVMValueRef emit_pack_int16(struct nir_to_llvm_context *ctx,
5193 LLVMValueRef src0, LLVMValueRef src1)
5194 {
5195 LLVMValueRef const16 = LLVMConstInt(ctx->i32, 16, false);
5196 LLVMValueRef comp[2];
5197
5198 comp[0] = LLVMBuildAnd(ctx->builder, src0, LLVMConstInt(ctx-> i32, 65535, 0), "");
5199 comp[1] = LLVMBuildAnd(ctx->builder, src1, LLVMConstInt(ctx-> i32, 65535, 0), "");
5200 comp[1] = LLVMBuildShl(ctx->builder, comp[1], const16, "");
5201 return LLVMBuildOr(ctx->builder, comp[0], comp[1], "");
5202 }
5203
5204 /* Initialize arguments for the shader export intrinsic */
5205 static void
5206 si_llvm_init_export_args(struct nir_to_llvm_context *ctx,
5207 LLVMValueRef *values,
5208 unsigned target,
5209 struct ac_export_args *args)
5210 {
5211 /* Default is 0xf. Adjusted below depending on the format. */
5212 args->enabled_channels = 0xf;
5213
5214 /* Specify whether the EXEC mask represents the valid mask */
5215 args->valid_mask = 0;
5216
5217 /* Specify whether this is the last export */
5218 args->done = 0;
5219
5220 /* Specify the target we are exporting */
5221 args->target = target;
5222
5223 args->compr = false;
5224 args->out[0] = LLVMGetUndef(ctx->f32);
5225 args->out[1] = LLVMGetUndef(ctx->f32);
5226 args->out[2] = LLVMGetUndef(ctx->f32);
5227 args->out[3] = LLVMGetUndef(ctx->f32);
5228
5229 if (!values)
5230 return;
5231
5232 if (ctx->stage == MESA_SHADER_FRAGMENT && target >= V_008DFC_SQ_EXP_MRT) {
5233 LLVMValueRef val[4];
5234 unsigned index = target - V_008DFC_SQ_EXP_MRT;
5235 unsigned col_format = (ctx->options->key.fs.col_format >> (4 * index)) & 0xf;
5236 bool is_int8 = (ctx->options->key.fs.is_int8 >> index) & 1;
5237
5238 switch(col_format) {
5239 case V_028714_SPI_SHADER_ZERO:
5240 args->enabled_channels = 0; /* writemask */
5241 args->target = V_008DFC_SQ_EXP_NULL;
5242 break;
5243
5244 case V_028714_SPI_SHADER_32_R:
5245 args->enabled_channels = 1;
5246 args->out[0] = values[0];
5247 break;
5248
5249 case V_028714_SPI_SHADER_32_GR:
5250 args->enabled_channels = 0x3;
5251 args->out[0] = values[0];
5252 args->out[1] = values[1];
5253 break;
5254
5255 case V_028714_SPI_SHADER_32_AR:
5256 args->enabled_channels = 0x9;
5257 args->out[0] = values[0];
5258 args->out[3] = values[3];
5259 break;
5260
5261 case V_028714_SPI_SHADER_FP16_ABGR:
5262 args->compr = 1;
5263
5264 for (unsigned chan = 0; chan < 2; chan++) {
5265 LLVMValueRef pack_args[2] = {
5266 values[2 * chan],
5267 values[2 * chan + 1]
5268 };
5269 LLVMValueRef packed;
5270
5271 packed = ac_build_cvt_pkrtz_f16(&ctx->ac, pack_args);
5272 args->out[chan] = packed;
5273 }
5274 break;
5275
5276 case V_028714_SPI_SHADER_UNORM16_ABGR:
5277 for (unsigned chan = 0; chan < 4; chan++) {
5278 val[chan] = ac_build_clamp(&ctx->ac, values[chan]);
5279 val[chan] = LLVMBuildFMul(ctx->builder, val[chan],
5280 LLVMConstReal(ctx->f32, 65535), "");
5281 val[chan] = LLVMBuildFAdd(ctx->builder, val[chan],
5282 LLVMConstReal(ctx->f32, 0.5), "");
5283 val[chan] = LLVMBuildFPToUI(ctx->builder, val[chan],
5284 ctx->i32, "");
5285 }
5286
5287 args->compr = 1;
5288 args->out[0] = emit_pack_int16(ctx, val[0], val[1]);
5289 args->out[1] = emit_pack_int16(ctx, val[2], val[3]);
5290 break;
5291
5292 case V_028714_SPI_SHADER_SNORM16_ABGR:
5293 for (unsigned chan = 0; chan < 4; chan++) {
5294 val[chan] = emit_float_saturate(&ctx->ac, values[chan], -1, 1);
5295 val[chan] = LLVMBuildFMul(ctx->builder, val[chan],
5296 LLVMConstReal(ctx->f32, 32767), "");
5297
5298 /* If positive, add 0.5, else add -0.5. */
5299 val[chan] = LLVMBuildFAdd(ctx->builder, val[chan],
5300 LLVMBuildSelect(ctx->builder,
5301 LLVMBuildFCmp(ctx->builder, LLVMRealOGE,
5302 val[chan], ctx->f32zero, ""),
5303 LLVMConstReal(ctx->f32, 0.5),
5304 LLVMConstReal(ctx->f32, -0.5), ""), "");
5305 val[chan] = LLVMBuildFPToSI(ctx->builder, val[chan], ctx->i32, "");
5306 }
5307
5308 args->compr = 1;
5309 args->out[0] = emit_pack_int16(ctx, val[0], val[1]);
5310 args->out[1] = emit_pack_int16(ctx, val[2], val[3]);
5311 break;
5312
5313 case V_028714_SPI_SHADER_UINT16_ABGR: {
5314 LLVMValueRef max = LLVMConstInt(ctx->i32, is_int8 ? 255 : 65535, 0);
5315
5316 for (unsigned chan = 0; chan < 4; chan++) {
5317 val[chan] = to_integer(&ctx->ac, values[chan]);
5318 val[chan] = emit_minmax_int(&ctx->ac, LLVMIntULT, val[chan], max);
5319 }
5320
5321 args->compr = 1;
5322 args->out[0] = emit_pack_int16(ctx, val[0], val[1]);
5323 args->out[1] = emit_pack_int16(ctx, val[2], val[3]);
5324 break;
5325 }
5326
5327 case V_028714_SPI_SHADER_SINT16_ABGR: {
5328 LLVMValueRef max = LLVMConstInt(ctx->i32, is_int8 ? 127 : 32767, 0);
5329 LLVMValueRef min = LLVMConstInt(ctx->i32, is_int8 ? -128 : -32768, 0);
5330
5331 /* Clamp. */
5332 for (unsigned chan = 0; chan < 4; chan++) {
5333 val[chan] = to_integer(&ctx->ac, values[chan]);
5334 val[chan] = emit_minmax_int(&ctx->ac, LLVMIntSLT, val[chan], max);
5335 val[chan] = emit_minmax_int(&ctx->ac, LLVMIntSGT, val[chan], min);
5336 }
5337
5338 args->compr = 1;
5339 args->out[0] = emit_pack_int16(ctx, val[0], val[1]);
5340 args->out[1] = emit_pack_int16(ctx, val[2], val[3]);
5341 break;
5342 }
5343
5344 default:
5345 case V_028714_SPI_SHADER_32_ABGR:
5346 memcpy(&args->out[0], values, sizeof(values[0]) * 4);
5347 break;
5348 }
5349 } else
5350 memcpy(&args->out[0], values, sizeof(values[0]) * 4);
5351
5352 for (unsigned i = 0; i < 4; ++i)
5353 args->out[i] = to_float(&ctx->ac, args->out[i]);
5354 }
5355
5356 static void
5357 handle_vs_outputs_post(struct nir_to_llvm_context *ctx,
5358 bool export_prim_id,
5359 struct ac_vs_output_info *outinfo)
5360 {
5361 uint32_t param_count = 0;
5362 unsigned target;
5363 unsigned pos_idx, num_pos_exports = 0;
5364 struct ac_export_args args, pos_args[4] = {};
5365 LLVMValueRef psize_value = NULL, layer_value = NULL, viewport_index_value = NULL;
5366 int i;
5367
5368 memset(outinfo->vs_output_param_offset, AC_EXP_PARAM_UNDEFINED,
5369 sizeof(outinfo->vs_output_param_offset));
5370
5371 if (ctx->output_mask & (1ull << VARYING_SLOT_CLIP_DIST0)) {
5372 LLVMValueRef slots[8];
5373 unsigned j;
5374
5375 if (outinfo->cull_dist_mask)
5376 outinfo->cull_dist_mask <<= ctx->num_output_clips;
5377
5378 i = VARYING_SLOT_CLIP_DIST0;
5379 for (j = 0; j < ctx->num_output_clips + ctx->num_output_culls; j++)
5380 slots[j] = to_float(&ctx->ac, LLVMBuildLoad(ctx->builder,
5381 ctx->nir->outputs[radeon_llvm_reg_index_soa(i, j)], ""));
5382
5383 for (i = ctx->num_output_clips + ctx->num_output_culls; i < 8; i++)
5384 slots[i] = LLVMGetUndef(ctx->f32);
5385
5386 if (ctx->num_output_clips + ctx->num_output_culls > 4) {
5387 target = V_008DFC_SQ_EXP_POS + 3;
5388 si_llvm_init_export_args(ctx, &slots[4], target, &args);
5389 memcpy(&pos_args[target - V_008DFC_SQ_EXP_POS],
5390 &args, sizeof(args));
5391 }
5392
5393 target = V_008DFC_SQ_EXP_POS + 2;
5394 si_llvm_init_export_args(ctx, &slots[0], target, &args);
5395 memcpy(&pos_args[target - V_008DFC_SQ_EXP_POS],
5396 &args, sizeof(args));
5397
5398 }
5399
5400 LLVMValueRef pos_values[4] = {ctx->f32zero, ctx->f32zero, ctx->f32zero, ctx->f32one};
5401 if (ctx->output_mask & (1ull << VARYING_SLOT_POS)) {
5402 for (unsigned j = 0; j < 4; j++)
5403 pos_values[j] = LLVMBuildLoad(ctx->builder,
5404 ctx->nir->outputs[radeon_llvm_reg_index_soa(VARYING_SLOT_POS, j)], "");
5405 }
5406 si_llvm_init_export_args(ctx, pos_values, V_008DFC_SQ_EXP_POS, &pos_args[0]);
5407
5408 if (ctx->output_mask & (1ull << VARYING_SLOT_PSIZ)) {
5409 outinfo->writes_pointsize = true;
5410 psize_value = LLVMBuildLoad(ctx->builder,
5411 ctx->nir->outputs[radeon_llvm_reg_index_soa(VARYING_SLOT_PSIZ, 0)], "");
5412 }
5413
5414 if (ctx->output_mask & (1ull << VARYING_SLOT_LAYER)) {
5415 outinfo->writes_layer = true;
5416 layer_value = LLVMBuildLoad(ctx->builder,
5417 ctx->nir->outputs[radeon_llvm_reg_index_soa(VARYING_SLOT_LAYER, 0)], "");
5418 }
5419
5420 if (ctx->output_mask & (1ull << VARYING_SLOT_VIEWPORT)) {
5421 outinfo->writes_viewport_index = true;
5422 viewport_index_value = LLVMBuildLoad(ctx->builder,
5423 ctx->nir->outputs[radeon_llvm_reg_index_soa(VARYING_SLOT_VIEWPORT, 0)], "");
5424 }
5425
5426 uint32_t mask = ((outinfo->writes_pointsize == true ? 1 : 0) |
5427 (outinfo->writes_layer == true ? 4 : 0) |
5428 (outinfo->writes_viewport_index == true ? 8 : 0));
5429 if (mask) {
5430 pos_args[1].enabled_channels = mask;
5431 pos_args[1].valid_mask = 0;
5432 pos_args[1].done = 0;
5433 pos_args[1].target = V_008DFC_SQ_EXP_POS + 1;
5434 pos_args[1].compr = 0;
5435 pos_args[1].out[0] = ctx->f32zero; /* X */
5436 pos_args[1].out[1] = ctx->f32zero; /* Y */
5437 pos_args[1].out[2] = ctx->f32zero; /* Z */
5438 pos_args[1].out[3] = ctx->f32zero; /* W */
5439
5440 if (outinfo->writes_pointsize == true)
5441 pos_args[1].out[0] = psize_value;
5442 if (outinfo->writes_layer == true)
5443 pos_args[1].out[2] = layer_value;
5444 if (outinfo->writes_viewport_index == true)
5445 pos_args[1].out[3] = viewport_index_value;
5446 }
5447 for (i = 0; i < 4; i++) {
5448 if (pos_args[i].out[0])
5449 num_pos_exports++;
5450 }
5451
5452 pos_idx = 0;
5453 for (i = 0; i < 4; i++) {
5454 if (!pos_args[i].out[0])
5455 continue;
5456
5457 /* Specify the target we are exporting */
5458 pos_args[i].target = V_008DFC_SQ_EXP_POS + pos_idx++;
5459 if (pos_idx == num_pos_exports)
5460 pos_args[i].done = 1;
5461 ac_build_export(&ctx->ac, &pos_args[i]);
5462 }
5463
5464 for (unsigned i = 0; i < RADEON_LLVM_MAX_OUTPUTS; ++i) {
5465 LLVMValueRef values[4];
5466 if (!(ctx->output_mask & (1ull << i)))
5467 continue;
5468
5469 for (unsigned j = 0; j < 4; j++)
5470 values[j] = to_float(&ctx->ac, LLVMBuildLoad(ctx->builder,
5471 ctx->nir->outputs[radeon_llvm_reg_index_soa(i, j)], ""));
5472
5473 if (i == VARYING_SLOT_LAYER) {
5474 target = V_008DFC_SQ_EXP_PARAM + param_count;
5475 outinfo->vs_output_param_offset[VARYING_SLOT_LAYER] = param_count;
5476 param_count++;
5477 } else if (i == VARYING_SLOT_PRIMITIVE_ID) {
5478 target = V_008DFC_SQ_EXP_PARAM + param_count;
5479 outinfo->vs_output_param_offset[VARYING_SLOT_PRIMITIVE_ID] = param_count;
5480 param_count++;
5481 } else if (i >= VARYING_SLOT_VAR0) {
5482 outinfo->export_mask |= 1u << (i - VARYING_SLOT_VAR0);
5483 target = V_008DFC_SQ_EXP_PARAM + param_count;
5484 outinfo->vs_output_param_offset[i] = param_count;
5485 param_count++;
5486 } else
5487 continue;
5488
5489 si_llvm_init_export_args(ctx, values, target, &args);
5490
5491 if (target >= V_008DFC_SQ_EXP_POS &&
5492 target <= (V_008DFC_SQ_EXP_POS + 3)) {
5493 memcpy(&pos_args[target - V_008DFC_SQ_EXP_POS],
5494 &args, sizeof(args));
5495 } else {
5496 ac_build_export(&ctx->ac, &args);
5497 }
5498 }
5499
5500 if (export_prim_id) {
5501 LLVMValueRef values[4];
5502 target = V_008DFC_SQ_EXP_PARAM + param_count;
5503 outinfo->vs_output_param_offset[VARYING_SLOT_PRIMITIVE_ID] = param_count;
5504 param_count++;
5505
5506 values[0] = ctx->vs_prim_id;
5507 ctx->shader_info->vs.vgpr_comp_cnt = MAX2(2,
5508 ctx->shader_info->vs.vgpr_comp_cnt);
5509 for (unsigned j = 1; j < 4; j++)
5510 values[j] = ctx->f32zero;
5511 si_llvm_init_export_args(ctx, values, target, &args);
5512 ac_build_export(&ctx->ac, &args);
5513 outinfo->export_prim_id = true;
5514 }
5515
5516 outinfo->pos_exports = num_pos_exports;
5517 outinfo->param_exports = param_count;
5518 }
5519
5520 static void
5521 handle_es_outputs_post(struct nir_to_llvm_context *ctx,
5522 struct ac_es_output_info *outinfo)
5523 {
5524 int j;
5525 uint64_t max_output_written = 0;
5526 for (unsigned i = 0; i < RADEON_LLVM_MAX_OUTPUTS; ++i) {
5527 LLVMValueRef *out_ptr = &ctx->nir->outputs[i * 4];
5528 int param_index;
5529 int length = 4;
5530
5531 if (!(ctx->output_mask & (1ull << i)))
5532 continue;
5533
5534 if (i == VARYING_SLOT_CLIP_DIST0)
5535 length = ctx->num_output_clips + ctx->num_output_culls;
5536
5537 param_index = shader_io_get_unique_index(i);
5538
5539 max_output_written = MAX2(param_index + (length > 4), max_output_written);
5540
5541 for (j = 0; j < length; j++) {
5542 LLVMValueRef out_val = LLVMBuildLoad(ctx->builder, out_ptr[j], "");
5543 out_val = LLVMBuildBitCast(ctx->builder, out_val, ctx->i32, "");
5544
5545 ac_build_buffer_store_dword(&ctx->ac,
5546 ctx->esgs_ring,
5547 out_val, 1,
5548 NULL, ctx->es2gs_offset,
5549 (4 * param_index + j) * 4,
5550 1, 1, true, true);
5551 }
5552 }
5553 outinfo->esgs_itemsize = (max_output_written + 1) * 16;
5554 }
5555
5556 static void
5557 handle_ls_outputs_post(struct nir_to_llvm_context *ctx)
5558 {
5559 LLVMValueRef vertex_id = ctx->rel_auto_id;
5560 LLVMValueRef vertex_dw_stride = unpack_param(ctx, ctx->ls_out_layout, 13, 8);
5561 LLVMValueRef base_dw_addr = LLVMBuildMul(ctx->builder, vertex_id,
5562 vertex_dw_stride, "");
5563
5564 for (unsigned i = 0; i < RADEON_LLVM_MAX_OUTPUTS; ++i) {
5565 LLVMValueRef *out_ptr = &ctx->nir->outputs[i * 4];
5566 int length = 4;
5567
5568 if (!(ctx->output_mask & (1ull << i)))
5569 continue;
5570
5571 if (i == VARYING_SLOT_CLIP_DIST0)
5572 length = ctx->num_output_clips + ctx->num_output_culls;
5573 int param = shader_io_get_unique_index(i);
5574 mark_tess_output(ctx, false, param);
5575 if (length > 4)
5576 mark_tess_output(ctx, false, param + 1);
5577 LLVMValueRef dw_addr = LLVMBuildAdd(ctx->builder, base_dw_addr,
5578 LLVMConstInt(ctx->i32, param * 4, false),
5579 "");
5580 for (unsigned j = 0; j < length; j++) {
5581 lds_store(ctx, dw_addr,
5582 LLVMBuildLoad(ctx->builder, out_ptr[j], ""));
5583 dw_addr = LLVMBuildAdd(ctx->builder, dw_addr, ctx->i32one, "");
5584 }
5585 }
5586 }
5587
5588 struct ac_build_if_state
5589 {
5590 struct nir_to_llvm_context *ctx;
5591 LLVMValueRef condition;
5592 LLVMBasicBlockRef entry_block;
5593 LLVMBasicBlockRef true_block;
5594 LLVMBasicBlockRef false_block;
5595 LLVMBasicBlockRef merge_block;
5596 };
5597
5598 static LLVMBasicBlockRef
5599 ac_build_insert_new_block(struct nir_to_llvm_context *ctx, const char *name)
5600 {
5601 LLVMBasicBlockRef current_block;
5602 LLVMBasicBlockRef next_block;
5603 LLVMBasicBlockRef new_block;
5604
5605 /* get current basic block */
5606 current_block = LLVMGetInsertBlock(ctx->builder);
5607
5608 /* chqeck if there's another block after this one */
5609 next_block = LLVMGetNextBasicBlock(current_block);
5610 if (next_block) {
5611 /* insert the new block before the next block */
5612 new_block = LLVMInsertBasicBlockInContext(ctx->context, next_block, name);
5613 }
5614 else {
5615 /* append new block after current block */
5616 LLVMValueRef function = LLVMGetBasicBlockParent(current_block);
5617 new_block = LLVMAppendBasicBlockInContext(ctx->context, function, name);
5618 }
5619 return new_block;
5620 }
5621
5622 static void
5623 ac_nir_build_if(struct ac_build_if_state *ifthen,
5624 struct nir_to_llvm_context *ctx,
5625 LLVMValueRef condition)
5626 {
5627 LLVMBasicBlockRef block = LLVMGetInsertBlock(ctx->builder);
5628
5629 memset(ifthen, 0, sizeof *ifthen);
5630 ifthen->ctx = ctx;
5631 ifthen->condition = condition;
5632 ifthen->entry_block = block;
5633
5634 /* create endif/merge basic block for the phi functions */
5635 ifthen->merge_block = ac_build_insert_new_block(ctx, "endif-block");
5636
5637 /* create/insert true_block before merge_block */
5638 ifthen->true_block =
5639 LLVMInsertBasicBlockInContext(ctx->context,
5640 ifthen->merge_block,
5641 "if-true-block");
5642
5643 /* successive code goes into the true block */
5644 LLVMPositionBuilderAtEnd(ctx->builder, ifthen->true_block);
5645 }
5646
5647 /**
5648 * End a conditional.
5649 */
5650 static void
5651 ac_nir_build_endif(struct ac_build_if_state *ifthen)
5652 {
5653 LLVMBuilderRef builder = ifthen->ctx->builder;
5654
5655 /* Insert branch to the merge block from current block */
5656 LLVMBuildBr(builder, ifthen->merge_block);
5657
5658 /*
5659 * Now patch in the various branch instructions.
5660 */
5661
5662 /* Insert the conditional branch instruction at the end of entry_block */
5663 LLVMPositionBuilderAtEnd(builder, ifthen->entry_block);
5664 if (ifthen->false_block) {
5665 /* we have an else clause */
5666 LLVMBuildCondBr(builder, ifthen->condition,
5667 ifthen->true_block, ifthen->false_block);
5668 }
5669 else {
5670 /* no else clause */
5671 LLVMBuildCondBr(builder, ifthen->condition,
5672 ifthen->true_block, ifthen->merge_block);
5673 }
5674
5675 /* Resume building code at end of the ifthen->merge_block */
5676 LLVMPositionBuilderAtEnd(builder, ifthen->merge_block);
5677 }
5678
5679 static void
5680 write_tess_factors(struct nir_to_llvm_context *ctx)
5681 {
5682 unsigned stride, outer_comps, inner_comps;
5683 struct ac_build_if_state if_ctx, inner_if_ctx;
5684 LLVMValueRef invocation_id = unpack_param(ctx, ctx->tcs_rel_ids, 8, 5);
5685 LLVMValueRef rel_patch_id = unpack_param(ctx, ctx->tcs_rel_ids, 0, 8);
5686 unsigned tess_inner_index, tess_outer_index;
5687 LLVMValueRef lds_base, lds_inner, lds_outer, byteoffset, buffer;
5688 LLVMValueRef out[6], vec0, vec1, tf_base, inner[4], outer[4];
5689 int i;
5690 emit_barrier(ctx);
5691
5692 switch (ctx->options->key.tcs.primitive_mode) {
5693 case GL_ISOLINES:
5694 stride = 2;
5695 outer_comps = 2;
5696 inner_comps = 0;
5697 break;
5698 case GL_TRIANGLES:
5699 stride = 4;
5700 outer_comps = 3;
5701 inner_comps = 1;
5702 break;
5703 case GL_QUADS:
5704 stride = 6;
5705 outer_comps = 4;
5706 inner_comps = 2;
5707 break;
5708 default:
5709 return;
5710 }
5711
5712 ac_nir_build_if(&if_ctx, ctx,
5713 LLVMBuildICmp(ctx->builder, LLVMIntEQ,
5714 invocation_id, ctx->i32zero, ""));
5715
5716 tess_inner_index = shader_io_get_unique_index(VARYING_SLOT_TESS_LEVEL_INNER);
5717 tess_outer_index = shader_io_get_unique_index(VARYING_SLOT_TESS_LEVEL_OUTER);
5718
5719 mark_tess_output(ctx, true, tess_inner_index);
5720 mark_tess_output(ctx, true, tess_outer_index);
5721 lds_base = get_tcs_out_current_patch_data_offset(ctx);
5722 lds_inner = LLVMBuildAdd(ctx->builder, lds_base,
5723 LLVMConstInt(ctx->i32, tess_inner_index * 4, false), "");
5724 lds_outer = LLVMBuildAdd(ctx->builder, lds_base,
5725 LLVMConstInt(ctx->i32, tess_outer_index * 4, false), "");
5726
5727 for (i = 0; i < 4; i++) {
5728 inner[i] = LLVMGetUndef(ctx->i32);
5729 outer[i] = LLVMGetUndef(ctx->i32);
5730 }
5731
5732 // LINES reverseal
5733 if (ctx->options->key.tcs.primitive_mode == GL_ISOLINES) {
5734 outer[0] = out[1] = lds_load(ctx, lds_outer);
5735 lds_outer = LLVMBuildAdd(ctx->builder, lds_outer,
5736 LLVMConstInt(ctx->i32, 1, false), "");
5737 outer[1] = out[0] = lds_load(ctx, lds_outer);
5738 } else {
5739 for (i = 0; i < outer_comps; i++) {
5740 outer[i] = out[i] =
5741 lds_load(ctx, lds_outer);
5742 lds_outer = LLVMBuildAdd(ctx->builder, lds_outer,
5743 LLVMConstInt(ctx->i32, 1, false), "");
5744 }
5745 for (i = 0; i < inner_comps; i++) {
5746 inner[i] = out[outer_comps+i] =
5747 lds_load(ctx, lds_inner);
5748 lds_inner = LLVMBuildAdd(ctx->builder, lds_inner,
5749 LLVMConstInt(ctx->i32, 1, false), "");
5750 }
5751 }
5752
5753 /* Convert the outputs to vectors for stores. */
5754 vec0 = ac_build_gather_values(&ctx->ac, out, MIN2(stride, 4));
5755 vec1 = NULL;
5756
5757 if (stride > 4)
5758 vec1 = ac_build_gather_values(&ctx->ac, out + 4, stride - 4);
5759
5760
5761 buffer = ctx->hs_ring_tess_factor;
5762 tf_base = ctx->tess_factor_offset;
5763 byteoffset = LLVMBuildMul(ctx->builder, rel_patch_id,
5764 LLVMConstInt(ctx->i32, 4 * stride, false), "");
5765
5766 ac_nir_build_if(&inner_if_ctx, ctx,
5767 LLVMBuildICmp(ctx->builder, LLVMIntEQ,
5768 rel_patch_id, ctx->i32zero, ""));
5769
5770 /* Store the dynamic HS control word. */
5771 ac_build_buffer_store_dword(&ctx->ac, buffer,
5772 LLVMConstInt(ctx->i32, 0x80000000, false),
5773 1, ctx->i32zero, tf_base,
5774 0, 1, 0, true, false);
5775 ac_nir_build_endif(&inner_if_ctx);
5776
5777 /* Store the tessellation factors. */
5778 ac_build_buffer_store_dword(&ctx->ac, buffer, vec0,
5779 MIN2(stride, 4), byteoffset, tf_base,
5780 4, 1, 0, true, false);
5781 if (vec1)
5782 ac_build_buffer_store_dword(&ctx->ac, buffer, vec1,
5783 stride - 4, byteoffset, tf_base,
5784 20, 1, 0, true, false);
5785
5786 //TODO store to offchip for TES to read - only if TES reads them
5787 if (1) {
5788 LLVMValueRef inner_vec, outer_vec, tf_outer_offset;
5789 LLVMValueRef tf_inner_offset;
5790 unsigned param_outer, param_inner;
5791
5792 param_outer = shader_io_get_unique_index(VARYING_SLOT_TESS_LEVEL_OUTER);
5793 tf_outer_offset = get_tcs_tes_buffer_address(ctx, NULL,
5794 LLVMConstInt(ctx->i32, param_outer, 0));
5795
5796 outer_vec = ac_build_gather_values(&ctx->ac, outer,
5797 util_next_power_of_two(outer_comps));
5798
5799 ac_build_buffer_store_dword(&ctx->ac, ctx->hs_ring_tess_offchip, outer_vec,
5800 outer_comps, tf_outer_offset,
5801 ctx->oc_lds, 0, 1, 0, true, false);
5802 if (inner_comps) {
5803 param_inner = shader_io_get_unique_index(VARYING_SLOT_TESS_LEVEL_INNER);
5804 tf_inner_offset = get_tcs_tes_buffer_address(ctx, NULL,
5805 LLVMConstInt(ctx->i32, param_inner, 0));
5806
5807 inner_vec = inner_comps == 1 ? inner[0] :
5808 ac_build_gather_values(&ctx->ac, inner, inner_comps);
5809 ac_build_buffer_store_dword(&ctx->ac, ctx->hs_ring_tess_offchip, inner_vec,
5810 inner_comps, tf_inner_offset,
5811 ctx->oc_lds, 0, 1, 0, true, false);
5812 }
5813 }
5814 ac_nir_build_endif(&if_ctx);
5815 }
5816
5817 static void
5818 handle_tcs_outputs_post(struct nir_to_llvm_context *ctx)
5819 {
5820 write_tess_factors(ctx);
5821 }
5822
5823 static bool
5824 si_export_mrt_color(struct nir_to_llvm_context *ctx,
5825 LLVMValueRef *color, unsigned param, bool is_last,
5826 struct ac_export_args *args)
5827 {
5828 /* Export */
5829 si_llvm_init_export_args(ctx, color, param,
5830 args);
5831
5832 if (is_last) {
5833 args->valid_mask = 1; /* whether the EXEC mask is valid */
5834 args->done = 1; /* DONE bit */
5835 } else if (!args->enabled_channels)
5836 return false; /* unnecessary NULL export */
5837
5838 return true;
5839 }
5840
5841 static void
5842 si_export_mrt_z(struct nir_to_llvm_context *ctx,
5843 LLVMValueRef depth, LLVMValueRef stencil,
5844 LLVMValueRef samplemask)
5845 {
5846 struct ac_export_args args;
5847
5848 args.enabled_channels = 0;
5849 args.valid_mask = 1;
5850 args.done = 1;
5851 args.target = V_008DFC_SQ_EXP_MRTZ;
5852 args.compr = false;
5853
5854 args.out[0] = LLVMGetUndef(ctx->f32); /* R, depth */
5855 args.out[1] = LLVMGetUndef(ctx->f32); /* G, stencil test val[0:7], stencil op val[8:15] */
5856 args.out[2] = LLVMGetUndef(ctx->f32); /* B, sample mask */
5857 args.out[3] = LLVMGetUndef(ctx->f32); /* A, alpha to mask */
5858
5859 if (depth) {
5860 args.out[0] = depth;
5861 args.enabled_channels |= 0x1;
5862 }
5863
5864 if (stencil) {
5865 args.out[1] = stencil;
5866 args.enabled_channels |= 0x2;
5867 }
5868
5869 if (samplemask) {
5870 args.out[2] = samplemask;
5871 args.enabled_channels |= 0x4;
5872 }
5873
5874 /* SI (except OLAND and HAINAN) has a bug that it only looks
5875 * at the X writemask component. */
5876 if (ctx->options->chip_class == SI &&
5877 ctx->options->family != CHIP_OLAND &&
5878 ctx->options->family != CHIP_HAINAN)
5879 args.enabled_channels |= 0x1;
5880
5881 ac_build_export(&ctx->ac, &args);
5882 }
5883
5884 static void
5885 handle_fs_outputs_post(struct nir_to_llvm_context *ctx)
5886 {
5887 unsigned index = 0;
5888 LLVMValueRef depth = NULL, stencil = NULL, samplemask = NULL;
5889 struct ac_export_args color_args[8];
5890
5891 for (unsigned i = 0; i < RADEON_LLVM_MAX_OUTPUTS; ++i) {
5892 LLVMValueRef values[4];
5893
5894 if (!(ctx->output_mask & (1ull << i)))
5895 continue;
5896
5897 if (i == FRAG_RESULT_DEPTH) {
5898 ctx->shader_info->fs.writes_z = true;
5899 depth = to_float(&ctx->ac, LLVMBuildLoad(ctx->builder,
5900 ctx->nir->outputs[radeon_llvm_reg_index_soa(i, 0)], ""));
5901 } else if (i == FRAG_RESULT_STENCIL) {
5902 ctx->shader_info->fs.writes_stencil = true;
5903 stencil = to_float(&ctx->ac, LLVMBuildLoad(ctx->builder,
5904 ctx->nir->outputs[radeon_llvm_reg_index_soa(i, 0)], ""));
5905 } else if (i == FRAG_RESULT_SAMPLE_MASK) {
5906 ctx->shader_info->fs.writes_sample_mask = true;
5907 samplemask = to_float(&ctx->ac, LLVMBuildLoad(ctx->builder,
5908 ctx->nir->outputs[radeon_llvm_reg_index_soa(i, 0)], ""));
5909 } else {
5910 bool last = false;
5911 for (unsigned j = 0; j < 4; j++)
5912 values[j] = to_float(&ctx->ac, LLVMBuildLoad(ctx->builder,
5913 ctx->nir->outputs[radeon_llvm_reg_index_soa(i, j)], ""));
5914
5915 if (!ctx->shader_info->fs.writes_z && !ctx->shader_info->fs.writes_stencil && !ctx->shader_info->fs.writes_sample_mask)
5916 last = ctx->output_mask <= ((1ull << (i + 1)) - 1);
5917
5918 bool ret = si_export_mrt_color(ctx, values, V_008DFC_SQ_EXP_MRT + (i - FRAG_RESULT_DATA0), last, &color_args[index]);
5919 if (ret)
5920 index++;
5921 }
5922 }
5923
5924 for (unsigned i = 0; i < index; i++)
5925 ac_build_export(&ctx->ac, &color_args[i]);
5926 if (depth || stencil || samplemask)
5927 si_export_mrt_z(ctx, depth, stencil, samplemask);
5928 else if (!index) {
5929 si_export_mrt_color(ctx, NULL, V_008DFC_SQ_EXP_NULL, true, &color_args[0]);
5930 ac_build_export(&ctx->ac, &color_args[0]);
5931 }
5932
5933 ctx->shader_info->fs.output_mask = index ? ((1ull << index) - 1) : 0;
5934 }
5935
5936 static void
5937 emit_gs_epilogue(struct nir_to_llvm_context *ctx)
5938 {
5939 ac_build_sendmsg(&ctx->ac, AC_SENDMSG_GS_OP_NOP | AC_SENDMSG_GS_DONE, ctx->gs_wave_id);
5940 }
5941
5942 static void
5943 handle_shader_outputs_post(struct ac_shader_abi *abi, unsigned max_outputs,
5944 LLVMValueRef *addrs)
5945 {
5946 struct nir_to_llvm_context *ctx = nir_to_llvm_context_from_abi(abi);
5947
5948 switch (ctx->stage) {
5949 case MESA_SHADER_VERTEX:
5950 if (ctx->options->key.vs.as_ls)
5951 handle_ls_outputs_post(ctx);
5952 else if (ctx->options->key.vs.as_es)
5953 handle_es_outputs_post(ctx, &ctx->shader_info->vs.es_info);
5954 else
5955 handle_vs_outputs_post(ctx, ctx->options->key.vs.export_prim_id,
5956 &ctx->shader_info->vs.outinfo);
5957 break;
5958 case MESA_SHADER_FRAGMENT:
5959 handle_fs_outputs_post(ctx);
5960 break;
5961 case MESA_SHADER_GEOMETRY:
5962 emit_gs_epilogue(ctx);
5963 break;
5964 case MESA_SHADER_TESS_CTRL:
5965 handle_tcs_outputs_post(ctx);
5966 break;
5967 case MESA_SHADER_TESS_EVAL:
5968 if (ctx->options->key.tes.as_es)
5969 handle_es_outputs_post(ctx, &ctx->shader_info->tes.es_info);
5970 else
5971 handle_vs_outputs_post(ctx, ctx->options->key.tes.export_prim_id,
5972 &ctx->shader_info->tes.outinfo);
5973 break;
5974 default:
5975 break;
5976 }
5977 }
5978
5979 static void ac_llvm_finalize_module(struct nir_to_llvm_context * ctx)
5980 {
5981 LLVMPassManagerRef passmgr;
5982 /* Create the pass manager */
5983 passmgr = LLVMCreateFunctionPassManagerForModule(
5984 ctx->module);
5985
5986 /* This pass should eliminate all the load and store instructions */
5987 LLVMAddPromoteMemoryToRegisterPass(passmgr);
5988
5989 /* Add some optimization passes */
5990 LLVMAddScalarReplAggregatesPass(passmgr);
5991 LLVMAddLICMPass(passmgr);
5992 LLVMAddAggressiveDCEPass(passmgr);
5993 LLVMAddCFGSimplificationPass(passmgr);
5994 LLVMAddInstructionCombiningPass(passmgr);
5995
5996 /* Run the pass */
5997 LLVMInitializeFunctionPassManager(passmgr);
5998 LLVMRunFunctionPassManager(passmgr, ctx->main_function);
5999 LLVMFinalizeFunctionPassManager(passmgr);
6000
6001 LLVMDisposeBuilder(ctx->builder);
6002 LLVMDisposePassManager(passmgr);
6003 }
6004
6005 static void
6006 ac_nir_eliminate_const_vs_outputs(struct nir_to_llvm_context *ctx)
6007 {
6008 struct ac_vs_output_info *outinfo;
6009
6010 switch (ctx->stage) {
6011 case MESA_SHADER_FRAGMENT:
6012 case MESA_SHADER_COMPUTE:
6013 case MESA_SHADER_TESS_CTRL:
6014 case MESA_SHADER_GEOMETRY:
6015 return;
6016 case MESA_SHADER_VERTEX:
6017 if (ctx->options->key.vs.as_ls ||
6018 ctx->options->key.vs.as_es)
6019 return;
6020 outinfo = &ctx->shader_info->vs.outinfo;
6021 break;
6022 case MESA_SHADER_TESS_EVAL:
6023 if (ctx->options->key.vs.as_es)
6024 return;
6025 outinfo = &ctx->shader_info->tes.outinfo;
6026 break;
6027 default:
6028 unreachable("Unhandled shader type");
6029 }
6030
6031 ac_optimize_vs_outputs(&ctx->ac,
6032 ctx->main_function,
6033 outinfo->vs_output_param_offset,
6034 VARYING_SLOT_MAX,
6035 &outinfo->param_exports);
6036 }
6037
6038 static void
6039 ac_setup_rings(struct nir_to_llvm_context *ctx)
6040 {
6041 if ((ctx->stage == MESA_SHADER_VERTEX && ctx->options->key.vs.as_es) ||
6042 (ctx->stage == MESA_SHADER_TESS_EVAL && ctx->options->key.tes.as_es)) {
6043 ctx->esgs_ring = ac_build_indexed_load_const(&ctx->ac, ctx->ring_offsets, LLVMConstInt(ctx->i32, RING_ESGS_VS, false));
6044 }
6045
6046 if (ctx->is_gs_copy_shader) {
6047 ctx->gsvs_ring = ac_build_indexed_load_const(&ctx->ac, ctx->ring_offsets, LLVMConstInt(ctx->i32, RING_GSVS_VS, false));
6048 }
6049 if (ctx->stage == MESA_SHADER_GEOMETRY) {
6050 LLVMValueRef tmp;
6051 ctx->esgs_ring = ac_build_indexed_load_const(&ctx->ac, ctx->ring_offsets, LLVMConstInt(ctx->i32, RING_ESGS_GS, false));
6052 ctx->gsvs_ring = ac_build_indexed_load_const(&ctx->ac, ctx->ring_offsets, LLVMConstInt(ctx->i32, RING_GSVS_GS, false));
6053
6054 ctx->gsvs_ring = LLVMBuildBitCast(ctx->builder, ctx->gsvs_ring, ctx->v4i32, "");
6055
6056 ctx->gsvs_ring = LLVMBuildInsertElement(ctx->builder, ctx->gsvs_ring, ctx->gsvs_num_entries, LLVMConstInt(ctx->i32, 2, false), "");
6057 tmp = LLVMBuildExtractElement(ctx->builder, ctx->gsvs_ring, ctx->i32one, "");
6058 tmp = LLVMBuildOr(ctx->builder, tmp, ctx->gsvs_ring_stride, "");
6059 ctx->gsvs_ring = LLVMBuildInsertElement(ctx->builder, ctx->gsvs_ring, tmp, ctx->i32one, "");
6060 }
6061
6062 if (ctx->stage == MESA_SHADER_TESS_CTRL ||
6063 ctx->stage == MESA_SHADER_TESS_EVAL) {
6064 ctx->hs_ring_tess_offchip = ac_build_indexed_load_const(&ctx->ac, ctx->ring_offsets, LLVMConstInt(ctx->i32, RING_HS_TESS_OFFCHIP, false));
6065 ctx->hs_ring_tess_factor = ac_build_indexed_load_const(&ctx->ac, ctx->ring_offsets, LLVMConstInt(ctx->i32, RING_HS_TESS_FACTOR, false));
6066 }
6067 }
6068
6069 static unsigned
6070 ac_nir_get_max_workgroup_size(enum chip_class chip_class,
6071 const struct nir_shader *nir)
6072 {
6073 switch (nir->stage) {
6074 case MESA_SHADER_TESS_CTRL:
6075 return chip_class >= CIK ? 128 : 64;
6076 case MESA_SHADER_GEOMETRY:
6077 return 64;
6078 case MESA_SHADER_COMPUTE:
6079 break;
6080 default:
6081 return 0;
6082 }
6083
6084 unsigned max_workgroup_size = nir->info.cs.local_size[0] *
6085 nir->info.cs.local_size[1] *
6086 nir->info.cs.local_size[2];
6087 return max_workgroup_size;
6088 }
6089
6090 void ac_nir_translate(struct ac_llvm_context *ac, struct ac_shader_abi *abi,
6091 struct nir_shader *nir, struct nir_to_llvm_context *nctx)
6092 {
6093 struct ac_nir_context ctx = {};
6094 struct nir_function *func;
6095
6096 ctx.ac = *ac;
6097 ctx.abi = abi;
6098
6099 ctx.nctx = nctx;
6100 if (nctx)
6101 nctx->nir = &ctx;
6102
6103 ctx.stage = nir->stage;
6104
6105 nir_foreach_variable(variable, &nir->outputs)
6106 handle_shader_output_decl(&ctx, nir, variable);
6107
6108 ctx.defs = _mesa_hash_table_create(NULL, _mesa_hash_pointer,
6109 _mesa_key_pointer_equal);
6110 ctx.phis = _mesa_hash_table_create(NULL, _mesa_hash_pointer,
6111 _mesa_key_pointer_equal);
6112 ctx.vars = _mesa_hash_table_create(NULL, _mesa_hash_pointer,
6113 _mesa_key_pointer_equal);
6114
6115 func = (struct nir_function *)exec_list_get_head(&nir->functions);
6116
6117 setup_locals(&ctx, func);
6118
6119 if (nir->stage == MESA_SHADER_COMPUTE)
6120 setup_shared(&ctx, nir);
6121
6122 visit_cf_list(&ctx, &func->impl->body);
6123 phi_post_pass(&ctx);
6124
6125 ctx.abi->emit_outputs(ctx.abi, RADEON_LLVM_MAX_OUTPUTS,
6126 ctx.outputs);
6127
6128 free(ctx.locals);
6129 ralloc_free(ctx.defs);
6130 ralloc_free(ctx.phis);
6131 ralloc_free(ctx.vars);
6132
6133 if (nctx)
6134 nctx->nir = NULL;
6135 }
6136
6137 static
6138 LLVMModuleRef ac_translate_nir_to_llvm(LLVMTargetMachineRef tm,
6139 struct nir_shader *nir,
6140 struct ac_shader_variant_info *shader_info,
6141 const struct ac_nir_compiler_options *options)
6142 {
6143 struct nir_to_llvm_context ctx = {0};
6144 unsigned i;
6145 ctx.options = options;
6146 ctx.shader_info = shader_info;
6147 ctx.context = LLVMContextCreate();
6148 ctx.module = LLVMModuleCreateWithNameInContext("shader", ctx.context);
6149
6150 ac_llvm_context_init(&ctx.ac, ctx.context);
6151 ctx.ac.module = ctx.module;
6152
6153 ctx.has_ds_bpermute = ctx.options->chip_class >= VI;
6154
6155 memset(shader_info, 0, sizeof(*shader_info));
6156
6157 ac_nir_shader_info_pass(nir, options, &shader_info->info);
6158
6159 LLVMSetTarget(ctx.module, options->supports_spill ? "amdgcn-mesa-mesa3d" : "amdgcn--");
6160
6161 LLVMTargetDataRef data_layout = LLVMCreateTargetDataLayout(tm);
6162 char *data_layout_str = LLVMCopyStringRepOfTargetData(data_layout);
6163 LLVMSetDataLayout(ctx.module, data_layout_str);
6164 LLVMDisposeTargetData(data_layout);
6165 LLVMDisposeMessage(data_layout_str);
6166
6167 setup_types(&ctx);
6168
6169 ctx.builder = LLVMCreateBuilderInContext(ctx.context);
6170 ctx.ac.builder = ctx.builder;
6171 ctx.stage = nir->stage;
6172 ctx.max_workgroup_size = ac_nir_get_max_workgroup_size(ctx.options->chip_class, nir);
6173
6174 for (i = 0; i < AC_UD_MAX_SETS; i++)
6175 shader_info->user_sgprs_locs.descriptor_sets[i].sgpr_idx = -1;
6176 for (i = 0; i < AC_UD_MAX_UD; i++)
6177 shader_info->user_sgprs_locs.shader_data[i].sgpr_idx = -1;
6178
6179 create_function(&ctx);
6180
6181 if (nir->stage == MESA_SHADER_GEOMETRY) {
6182 ctx.gs_next_vertex = ac_build_alloca(&ctx.ac, ctx.i32, "gs_next_vertex");
6183
6184 ctx.gs_max_out_vertices = nir->info.gs.vertices_out;
6185 } else if (nir->stage == MESA_SHADER_TESS_EVAL) {
6186 ctx.tes_primitive_mode = nir->info.tess.primitive_mode;
6187 }
6188
6189 ac_setup_rings(&ctx);
6190
6191 ctx.num_output_clips = nir->info.clip_distance_array_size;
6192 ctx.num_output_culls = nir->info.cull_distance_array_size;
6193
6194 nir_foreach_variable(variable, &nir->inputs)
6195 handle_shader_input_decl(&ctx, variable);
6196
6197 if (nir->stage == MESA_SHADER_FRAGMENT)
6198 handle_fs_inputs_pre(&ctx, nir);
6199
6200 ctx.abi.inputs = &ctx.inputs[0];
6201 ctx.abi.emit_outputs = handle_shader_outputs_post;
6202
6203 nir_foreach_variable(variable, &nir->outputs)
6204 scan_shader_output_decl(&ctx, variable);
6205
6206 ac_nir_translate(&ctx.ac, &ctx.abi, nir, &ctx);
6207
6208 LLVMBuildRetVoid(ctx.builder);
6209
6210 ac_llvm_finalize_module(&ctx);
6211
6212 ac_nir_eliminate_const_vs_outputs(&ctx);
6213
6214 if (nir->stage == MESA_SHADER_GEOMETRY) {
6215 unsigned addclip = ctx.num_output_clips + ctx.num_output_culls > 4;
6216 shader_info->gs.gsvs_vertex_size = (util_bitcount64(ctx.output_mask) + addclip) * 16;
6217 shader_info->gs.max_gsvs_emit_size = shader_info->gs.gsvs_vertex_size *
6218 nir->info.gs.vertices_out;
6219 } else if (nir->stage == MESA_SHADER_TESS_CTRL) {
6220 shader_info->tcs.outputs_written = ctx.tess_outputs_written;
6221 shader_info->tcs.patch_outputs_written = ctx.tess_patch_outputs_written;
6222 } else if (nir->stage == MESA_SHADER_VERTEX && ctx.options->key.vs.as_ls) {
6223 shader_info->vs.outputs_written = ctx.tess_outputs_written;
6224 }
6225
6226 return ctx.module;
6227 }
6228
6229 static void ac_diagnostic_handler(LLVMDiagnosticInfoRef di, void *context)
6230 {
6231 unsigned *retval = (unsigned *)context;
6232 LLVMDiagnosticSeverity severity = LLVMGetDiagInfoSeverity(di);
6233 char *description = LLVMGetDiagInfoDescription(di);
6234
6235 if (severity == LLVMDSError) {
6236 *retval = 1;
6237 fprintf(stderr, "LLVM triggered Diagnostic Handler: %s\n",
6238 description);
6239 }
6240
6241 LLVMDisposeMessage(description);
6242 }
6243
6244 static unsigned ac_llvm_compile(LLVMModuleRef M,
6245 struct ac_shader_binary *binary,
6246 LLVMTargetMachineRef tm)
6247 {
6248 unsigned retval = 0;
6249 char *err;
6250 LLVMContextRef llvm_ctx;
6251 LLVMMemoryBufferRef out_buffer;
6252 unsigned buffer_size;
6253 const char *buffer_data;
6254 LLVMBool mem_err;
6255
6256 /* Setup Diagnostic Handler*/
6257 llvm_ctx = LLVMGetModuleContext(M);
6258
6259 LLVMContextSetDiagnosticHandler(llvm_ctx, ac_diagnostic_handler,
6260 &retval);
6261
6262 /* Compile IR*/
6263 mem_err = LLVMTargetMachineEmitToMemoryBuffer(tm, M, LLVMObjectFile,
6264 &err, &out_buffer);
6265
6266 /* Process Errors/Warnings */
6267 if (mem_err) {
6268 fprintf(stderr, "%s: %s", __FUNCTION__, err);
6269 free(err);
6270 retval = 1;
6271 goto out;
6272 }
6273
6274 /* Extract Shader Code*/
6275 buffer_size = LLVMGetBufferSize(out_buffer);
6276 buffer_data = LLVMGetBufferStart(out_buffer);
6277
6278 ac_elf_read(buffer_data, buffer_size, binary);
6279
6280 /* Clean up */
6281 LLVMDisposeMemoryBuffer(out_buffer);
6282
6283 out:
6284 return retval;
6285 }
6286
6287 static void ac_compile_llvm_module(LLVMTargetMachineRef tm,
6288 LLVMModuleRef llvm_module,
6289 struct ac_shader_binary *binary,
6290 struct ac_shader_config *config,
6291 struct ac_shader_variant_info *shader_info,
6292 gl_shader_stage stage,
6293 bool dump_shader, bool supports_spill)
6294 {
6295 if (dump_shader)
6296 ac_dump_module(llvm_module);
6297
6298 memset(binary, 0, sizeof(*binary));
6299 int v = ac_llvm_compile(llvm_module, binary, tm);
6300 if (v) {
6301 fprintf(stderr, "compile failed\n");
6302 }
6303
6304 if (dump_shader)
6305 fprintf(stderr, "disasm:\n%s\n", binary->disasm_string);
6306
6307 ac_shader_binary_read_config(binary, config, 0, supports_spill);
6308
6309 LLVMContextRef ctx = LLVMGetModuleContext(llvm_module);
6310 LLVMDisposeModule(llvm_module);
6311 LLVMContextDispose(ctx);
6312
6313 if (stage == MESA_SHADER_FRAGMENT) {
6314 shader_info->num_input_vgprs = 0;
6315 if (G_0286CC_PERSP_SAMPLE_ENA(config->spi_ps_input_addr))
6316 shader_info->num_input_vgprs += 2;
6317 if (G_0286CC_PERSP_CENTER_ENA(config->spi_ps_input_addr))
6318 shader_info->num_input_vgprs += 2;
6319 if (G_0286CC_PERSP_CENTROID_ENA(config->spi_ps_input_addr))
6320 shader_info->num_input_vgprs += 2;
6321 if (G_0286CC_PERSP_PULL_MODEL_ENA(config->spi_ps_input_addr))
6322 shader_info->num_input_vgprs += 3;
6323 if (G_0286CC_LINEAR_SAMPLE_ENA(config->spi_ps_input_addr))
6324 shader_info->num_input_vgprs += 2;
6325 if (G_0286CC_LINEAR_CENTER_ENA(config->spi_ps_input_addr))
6326 shader_info->num_input_vgprs += 2;
6327 if (G_0286CC_LINEAR_CENTROID_ENA(config->spi_ps_input_addr))
6328 shader_info->num_input_vgprs += 2;
6329 if (G_0286CC_LINE_STIPPLE_TEX_ENA(config->spi_ps_input_addr))
6330 shader_info->num_input_vgprs += 1;
6331 if (G_0286CC_POS_X_FLOAT_ENA(config->spi_ps_input_addr))
6332 shader_info->num_input_vgprs += 1;
6333 if (G_0286CC_POS_Y_FLOAT_ENA(config->spi_ps_input_addr))
6334 shader_info->num_input_vgprs += 1;
6335 if (G_0286CC_POS_Z_FLOAT_ENA(config->spi_ps_input_addr))
6336 shader_info->num_input_vgprs += 1;
6337 if (G_0286CC_POS_W_FLOAT_ENA(config->spi_ps_input_addr))
6338 shader_info->num_input_vgprs += 1;
6339 if (G_0286CC_FRONT_FACE_ENA(config->spi_ps_input_addr))
6340 shader_info->num_input_vgprs += 1;
6341 if (G_0286CC_ANCILLARY_ENA(config->spi_ps_input_addr))
6342 shader_info->num_input_vgprs += 1;
6343 if (G_0286CC_SAMPLE_COVERAGE_ENA(config->spi_ps_input_addr))
6344 shader_info->num_input_vgprs += 1;
6345 if (G_0286CC_POS_FIXED_PT_ENA(config->spi_ps_input_addr))
6346 shader_info->num_input_vgprs += 1;
6347 }
6348 config->num_vgprs = MAX2(config->num_vgprs, shader_info->num_input_vgprs);
6349
6350 /* +3 for scratch wave offset and VCC */
6351 config->num_sgprs = MAX2(config->num_sgprs,
6352 shader_info->num_input_sgprs + 3);
6353 }
6354
6355 void ac_compile_nir_shader(LLVMTargetMachineRef tm,
6356 struct ac_shader_binary *binary,
6357 struct ac_shader_config *config,
6358 struct ac_shader_variant_info *shader_info,
6359 struct nir_shader *nir,
6360 const struct ac_nir_compiler_options *options,
6361 bool dump_shader)
6362 {
6363
6364 LLVMModuleRef llvm_module = ac_translate_nir_to_llvm(tm, nir, shader_info,
6365 options);
6366
6367 ac_compile_llvm_module(tm, llvm_module, binary, config, shader_info, nir->stage, dump_shader, options->supports_spill);
6368 switch (nir->stage) {
6369 case MESA_SHADER_COMPUTE:
6370 for (int i = 0; i < 3; ++i)
6371 shader_info->cs.block_size[i] = nir->info.cs.local_size[i];
6372 break;
6373 case MESA_SHADER_FRAGMENT:
6374 shader_info->fs.early_fragment_test = nir->info.fs.early_fragment_tests;
6375 break;
6376 case MESA_SHADER_GEOMETRY:
6377 shader_info->gs.vertices_in = nir->info.gs.vertices_in;
6378 shader_info->gs.vertices_out = nir->info.gs.vertices_out;
6379 shader_info->gs.output_prim = nir->info.gs.output_primitive;
6380 shader_info->gs.invocations = nir->info.gs.invocations;
6381 break;
6382 case MESA_SHADER_TESS_EVAL:
6383 shader_info->tes.primitive_mode = nir->info.tess.primitive_mode;
6384 shader_info->tes.spacing = nir->info.tess.spacing;
6385 shader_info->tes.ccw = nir->info.tess.ccw;
6386 shader_info->tes.point_mode = nir->info.tess.point_mode;
6387 shader_info->tes.as_es = options->key.tes.as_es;
6388 break;
6389 case MESA_SHADER_TESS_CTRL:
6390 shader_info->tcs.tcs_vertices_out = nir->info.tess.tcs_vertices_out;
6391 break;
6392 case MESA_SHADER_VERTEX:
6393 shader_info->vs.as_es = options->key.vs.as_es;
6394 shader_info->vs.as_ls = options->key.vs.as_ls;
6395 /* in LS mode we need at least 1, invocation id needs 3, handled elsewhere */
6396 if (options->key.vs.as_ls)
6397 shader_info->vs.vgpr_comp_cnt = MAX2(1, shader_info->vs.vgpr_comp_cnt);
6398 break;
6399 default:
6400 break;
6401 }
6402 }
6403
6404 static void
6405 ac_gs_copy_shader_emit(struct nir_to_llvm_context *ctx)
6406 {
6407 LLVMValueRef args[9];
6408 args[0] = ctx->gsvs_ring;
6409 args[1] = LLVMBuildMul(ctx->builder, ctx->abi.vertex_id, LLVMConstInt(ctx->i32, 4, false), "");
6410 args[3] = ctx->i32zero;
6411 args[4] = ctx->i32one; /* OFFEN */
6412 args[5] = ctx->i32zero; /* IDXEN */
6413 args[6] = ctx->i32one; /* GLC */
6414 args[7] = ctx->i32one; /* SLC */
6415 args[8] = ctx->i32zero; /* TFE */
6416
6417 int idx = 0;
6418
6419 for (unsigned i = 0; i < RADEON_LLVM_MAX_OUTPUTS; ++i) {
6420 int length = 4;
6421 int slot = idx;
6422 int slot_inc = 1;
6423 if (!(ctx->output_mask & (1ull << i)))
6424 continue;
6425
6426 if (i == VARYING_SLOT_CLIP_DIST0) {
6427 /* unpack clip and cull from a single set of slots */
6428 length = ctx->num_output_clips + ctx->num_output_culls;
6429 if (length > 4)
6430 slot_inc = 2;
6431 }
6432
6433 for (unsigned j = 0; j < length; j++) {
6434 LLVMValueRef value;
6435 args[2] = LLVMConstInt(ctx->i32,
6436 (slot * 4 + j) *
6437 ctx->gs_max_out_vertices * 16 * 4, false);
6438
6439 value = ac_build_intrinsic(&ctx->ac,
6440 "llvm.SI.buffer.load.dword.i32.i32",
6441 ctx->i32, args, 9,
6442 AC_FUNC_ATTR_READONLY |
6443 AC_FUNC_ATTR_LEGACY);
6444
6445 LLVMBuildStore(ctx->builder,
6446 to_float(&ctx->ac, value), ctx->nir->outputs[radeon_llvm_reg_index_soa(i, j)]);
6447 }
6448 idx += slot_inc;
6449 }
6450 handle_vs_outputs_post(ctx, false, &ctx->shader_info->vs.outinfo);
6451 }
6452
6453 void ac_create_gs_copy_shader(LLVMTargetMachineRef tm,
6454 struct nir_shader *geom_shader,
6455 struct ac_shader_binary *binary,
6456 struct ac_shader_config *config,
6457 struct ac_shader_variant_info *shader_info,
6458 const struct ac_nir_compiler_options *options,
6459 bool dump_shader)
6460 {
6461 struct nir_to_llvm_context ctx = {0};
6462 ctx.context = LLVMContextCreate();
6463 ctx.module = LLVMModuleCreateWithNameInContext("shader", ctx.context);
6464 ctx.options = options;
6465 ctx.shader_info = shader_info;
6466
6467 ac_llvm_context_init(&ctx.ac, ctx.context);
6468 ctx.ac.module = ctx.module;
6469
6470 ctx.is_gs_copy_shader = true;
6471 LLVMSetTarget(ctx.module, "amdgcn--");
6472 setup_types(&ctx);
6473
6474 ctx.builder = LLVMCreateBuilderInContext(ctx.context);
6475 ctx.ac.builder = ctx.builder;
6476 ctx.stage = MESA_SHADER_VERTEX;
6477
6478 create_function(&ctx);
6479
6480 ctx.gs_max_out_vertices = geom_shader->info.gs.vertices_out;
6481 ac_setup_rings(&ctx);
6482
6483 ctx.num_output_clips = geom_shader->info.clip_distance_array_size;
6484 ctx.num_output_culls = geom_shader->info.cull_distance_array_size;
6485
6486 struct ac_nir_context nir_ctx = {};
6487 nir_ctx.ac = ctx.ac;
6488 nir_ctx.abi = &ctx.abi;
6489
6490 nir_ctx.nctx = &ctx;
6491 ctx.nir = &nir_ctx;
6492
6493 nir_foreach_variable(variable, &geom_shader->outputs) {
6494 scan_shader_output_decl(&ctx, variable);
6495 handle_shader_output_decl(&nir_ctx, geom_shader, variable);
6496 }
6497
6498 ac_gs_copy_shader_emit(&ctx);
6499
6500 ctx.nir = NULL;
6501
6502 LLVMBuildRetVoid(ctx.builder);
6503
6504 ac_llvm_finalize_module(&ctx);
6505
6506 ac_compile_llvm_module(tm, ctx.module, binary, config, shader_info,
6507 MESA_SHADER_VERTEX,
6508 dump_shader, options->supports_spill);
6509 }