radv/ac: handle writing out tess factors.
[mesa.git] / src / amd / common / ac_nir_to_llvm.c
1 /*
2 * Copyright © 2016 Bas Nieuwenhuizen
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21 * IN THE SOFTWARE.
22 */
23
24 #include "ac_nir_to_llvm.h"
25 #include "ac_llvm_build.h"
26 #include "ac_llvm_util.h"
27 #include "ac_binary.h"
28 #include "sid.h"
29 #include "nir/nir.h"
30 #include "../vulkan/radv_descriptor_set.h"
31 #include "util/bitscan.h"
32 #include <llvm-c/Transforms/Scalar.h>
33
34 enum radeon_llvm_calling_convention {
35 RADEON_LLVM_AMDGPU_VS = 87,
36 RADEON_LLVM_AMDGPU_GS = 88,
37 RADEON_LLVM_AMDGPU_PS = 89,
38 RADEON_LLVM_AMDGPU_CS = 90,
39 };
40
41 #define CONST_ADDR_SPACE 2
42 #define LOCAL_ADDR_SPACE 3
43
44 #define RADEON_LLVM_MAX_INPUTS (VARYING_SLOT_VAR31 + 1)
45 #define RADEON_LLVM_MAX_OUTPUTS (VARYING_SLOT_VAR31 + 1)
46
47 enum desc_type {
48 DESC_IMAGE,
49 DESC_FMASK,
50 DESC_SAMPLER,
51 DESC_BUFFER,
52 };
53
54 struct nir_to_llvm_context {
55 struct ac_llvm_context ac;
56 const struct ac_nir_compiler_options *options;
57 struct ac_shader_variant_info *shader_info;
58
59 LLVMContextRef context;
60 LLVMModuleRef module;
61 LLVMBuilderRef builder;
62 LLVMValueRef main_function;
63
64 struct hash_table *defs;
65 struct hash_table *phis;
66
67 LLVMValueRef descriptor_sets[AC_UD_MAX_SETS];
68 LLVMValueRef ring_offsets;
69 LLVMValueRef push_constants;
70 LLVMValueRef num_work_groups;
71 LLVMValueRef workgroup_ids;
72 LLVMValueRef local_invocation_ids;
73 LLVMValueRef tg_size;
74
75 LLVMValueRef vertex_buffers;
76 LLVMValueRef base_vertex;
77 LLVMValueRef start_instance;
78 LLVMValueRef draw_index;
79 LLVMValueRef vertex_id;
80 LLVMValueRef rel_auto_id;
81 LLVMValueRef vs_prim_id;
82 LLVMValueRef instance_id;
83 LLVMValueRef ls_out_layout;
84 LLVMValueRef es2gs_offset;
85
86 LLVMValueRef tcs_offchip_layout;
87 LLVMValueRef tcs_out_offsets;
88 LLVMValueRef tcs_out_layout;
89 LLVMValueRef tcs_in_layout;
90 LLVMValueRef oc_lds;
91 LLVMValueRef tess_factor_offset;
92 LLVMValueRef tcs_patch_id;
93 LLVMValueRef tcs_rel_ids;
94 LLVMValueRef tes_rel_patch_id;
95 LLVMValueRef tes_patch_id;
96 LLVMValueRef tes_u;
97 LLVMValueRef tes_v;
98
99 LLVMValueRef gsvs_ring_stride;
100 LLVMValueRef gsvs_num_entries;
101 LLVMValueRef gs2vs_offset;
102 LLVMValueRef gs_wave_id;
103 LLVMValueRef gs_vtx_offset[6];
104 LLVMValueRef gs_prim_id, gs_invocation_id;
105
106 LLVMValueRef esgs_ring;
107 LLVMValueRef gsvs_ring;
108 LLVMValueRef hs_ring_tess_offchip;
109 LLVMValueRef hs_ring_tess_factor;
110
111 LLVMValueRef prim_mask;
112 LLVMValueRef sample_positions;
113 LLVMValueRef persp_sample, persp_center, persp_centroid;
114 LLVMValueRef linear_sample, linear_center, linear_centroid;
115 LLVMValueRef front_face;
116 LLVMValueRef ancillary;
117 LLVMValueRef sample_coverage;
118 LLVMValueRef frag_pos[4];
119
120 LLVMBasicBlockRef continue_block;
121 LLVMBasicBlockRef break_block;
122
123 LLVMTypeRef i1;
124 LLVMTypeRef i8;
125 LLVMTypeRef i16;
126 LLVMTypeRef i32;
127 LLVMTypeRef i64;
128 LLVMTypeRef v2i32;
129 LLVMTypeRef v3i32;
130 LLVMTypeRef v4i32;
131 LLVMTypeRef v8i32;
132 LLVMTypeRef f64;
133 LLVMTypeRef f32;
134 LLVMTypeRef f16;
135 LLVMTypeRef v2f32;
136 LLVMTypeRef v4f32;
137 LLVMTypeRef v16i8;
138 LLVMTypeRef voidt;
139
140 LLVMValueRef i1true;
141 LLVMValueRef i1false;
142 LLVMValueRef i32zero;
143 LLVMValueRef i32one;
144 LLVMValueRef f32zero;
145 LLVMValueRef f32one;
146 LLVMValueRef v4f32empty;
147
148 unsigned uniform_md_kind;
149 LLVMValueRef empty_md;
150 gl_shader_stage stage;
151
152 LLVMValueRef lds;
153 LLVMValueRef inputs[RADEON_LLVM_MAX_INPUTS * 4];
154 LLVMValueRef outputs[RADEON_LLVM_MAX_OUTPUTS * 4];
155
156 LLVMValueRef shared_memory;
157 uint64_t input_mask;
158 uint64_t output_mask;
159 int num_locals;
160 LLVMValueRef *locals;
161 bool has_ddxy;
162 uint8_t num_output_clips;
163 uint8_t num_output_culls;
164
165 bool has_ds_bpermute;
166
167 bool is_gs_copy_shader;
168 LLVMValueRef gs_next_vertex;
169 unsigned gs_max_out_vertices;
170
171 unsigned tes_primitive_mode;
172 uint64_t tess_outputs_written;
173 uint64_t tess_patch_outputs_written;
174 };
175
176 static LLVMValueRef get_sampler_desc(struct nir_to_llvm_context *ctx,
177 nir_deref_var *deref,
178 enum desc_type desc_type);
179 static unsigned radeon_llvm_reg_index_soa(unsigned index, unsigned chan)
180 {
181 return (index * 4) + chan;
182 }
183
184 static unsigned shader_io_get_unique_index(gl_varying_slot slot)
185 {
186 /* handle patch indices separate */
187 if (slot == VARYING_SLOT_TESS_LEVEL_OUTER)
188 return 0;
189 if (slot == VARYING_SLOT_TESS_LEVEL_INNER)
190 return 1;
191 if (slot >= VARYING_SLOT_PATCH0 && slot <= VARYING_SLOT_TESS_MAX)
192 return 2 + (slot - VARYING_SLOT_PATCH0);
193
194 if (slot == VARYING_SLOT_POS)
195 return 0;
196 if (slot == VARYING_SLOT_PSIZ)
197 return 1;
198 if (slot == VARYING_SLOT_CLIP_DIST0)
199 return 2;
200 /* 3 is reserved for clip dist as well */
201 if (slot >= VARYING_SLOT_VAR0 && slot <= VARYING_SLOT_VAR31)
202 return 4 + (slot - VARYING_SLOT_VAR0);
203 unreachable("illegal slot in get unique index\n");
204 }
205
206 static unsigned llvm_get_type_size(LLVMTypeRef type)
207 {
208 LLVMTypeKind kind = LLVMGetTypeKind(type);
209
210 switch (kind) {
211 case LLVMIntegerTypeKind:
212 return LLVMGetIntTypeWidth(type) / 8;
213 case LLVMFloatTypeKind:
214 return 4;
215 case LLVMPointerTypeKind:
216 return 8;
217 case LLVMVectorTypeKind:
218 return LLVMGetVectorSize(type) *
219 llvm_get_type_size(LLVMGetElementType(type));
220 default:
221 assert(0);
222 return 0;
223 }
224 }
225
226 static void set_llvm_calling_convention(LLVMValueRef func,
227 gl_shader_stage stage)
228 {
229 enum radeon_llvm_calling_convention calling_conv;
230
231 switch (stage) {
232 case MESA_SHADER_VERTEX:
233 case MESA_SHADER_TESS_CTRL:
234 case MESA_SHADER_TESS_EVAL:
235 calling_conv = RADEON_LLVM_AMDGPU_VS;
236 break;
237 case MESA_SHADER_GEOMETRY:
238 calling_conv = RADEON_LLVM_AMDGPU_GS;
239 break;
240 case MESA_SHADER_FRAGMENT:
241 calling_conv = RADEON_LLVM_AMDGPU_PS;
242 break;
243 case MESA_SHADER_COMPUTE:
244 calling_conv = RADEON_LLVM_AMDGPU_CS;
245 break;
246 default:
247 unreachable("Unhandle shader type");
248 }
249
250 LLVMSetFunctionCallConv(func, calling_conv);
251 }
252
253 static LLVMValueRef
254 create_llvm_function(LLVMContextRef ctx, LLVMModuleRef module,
255 LLVMBuilderRef builder, LLVMTypeRef *return_types,
256 unsigned num_return_elems, LLVMTypeRef *param_types,
257 unsigned param_count, unsigned array_params_mask,
258 unsigned sgpr_params, bool unsafe_math)
259 {
260 LLVMTypeRef main_function_type, ret_type;
261 LLVMBasicBlockRef main_function_body;
262
263 if (num_return_elems)
264 ret_type = LLVMStructTypeInContext(ctx, return_types,
265 num_return_elems, true);
266 else
267 ret_type = LLVMVoidTypeInContext(ctx);
268
269 /* Setup the function */
270 main_function_type =
271 LLVMFunctionType(ret_type, param_types, param_count, 0);
272 LLVMValueRef main_function =
273 LLVMAddFunction(module, "main", main_function_type);
274 main_function_body =
275 LLVMAppendBasicBlockInContext(ctx, main_function, "main_body");
276 LLVMPositionBuilderAtEnd(builder, main_function_body);
277
278 LLVMSetFunctionCallConv(main_function, RADEON_LLVM_AMDGPU_CS);
279 for (unsigned i = 0; i < sgpr_params; ++i) {
280 if (array_params_mask & (1 << i)) {
281 LLVMValueRef P = LLVMGetParam(main_function, i);
282 ac_add_function_attr(ctx, main_function, i + 1, AC_FUNC_ATTR_BYVAL);
283 ac_add_attr_dereferenceable(P, UINT64_MAX);
284 }
285 else {
286 ac_add_function_attr(ctx, main_function, i + 1, AC_FUNC_ATTR_INREG);
287 }
288 }
289
290 if (unsafe_math) {
291 /* These were copied from some LLVM test. */
292 LLVMAddTargetDependentFunctionAttr(main_function,
293 "less-precise-fpmad",
294 "true");
295 LLVMAddTargetDependentFunctionAttr(main_function,
296 "no-infs-fp-math",
297 "true");
298 LLVMAddTargetDependentFunctionAttr(main_function,
299 "no-nans-fp-math",
300 "true");
301 LLVMAddTargetDependentFunctionAttr(main_function,
302 "unsafe-fp-math",
303 "true");
304 }
305 return main_function;
306 }
307
308 static LLVMTypeRef const_array(LLVMTypeRef elem_type, int num_elements)
309 {
310 return LLVMPointerType(LLVMArrayType(elem_type, num_elements),
311 CONST_ADDR_SPACE);
312 }
313
314 static LLVMValueRef get_shared_memory_ptr(struct nir_to_llvm_context *ctx,
315 int idx,
316 LLVMTypeRef type)
317 {
318 LLVMValueRef offset;
319 LLVMValueRef ptr;
320 int addr_space;
321
322 offset = LLVMConstInt(ctx->i32, idx * 16, false);
323
324 ptr = ctx->shared_memory;
325 ptr = LLVMBuildGEP(ctx->builder, ptr, &offset, 1, "");
326 addr_space = LLVMGetPointerAddressSpace(LLVMTypeOf(ptr));
327 ptr = LLVMBuildBitCast(ctx->builder, ptr, LLVMPointerType(type, addr_space), "");
328 return ptr;
329 }
330
331 static LLVMTypeRef to_integer_type_scalar(struct nir_to_llvm_context *ctx, LLVMTypeRef t)
332 {
333 if (t == ctx->f16 || t == ctx->i16)
334 return ctx->i16;
335 else if (t == ctx->f32 || t == ctx->i32)
336 return ctx->i32;
337 else if (t == ctx->f64 || t == ctx->i64)
338 return ctx->i64;
339 else
340 unreachable("Unhandled integer size");
341 }
342
343 static LLVMTypeRef to_integer_type(struct nir_to_llvm_context *ctx, LLVMTypeRef t)
344 {
345 if (LLVMGetTypeKind(t) == LLVMVectorTypeKind) {
346 LLVMTypeRef elem_type = LLVMGetElementType(t);
347 return LLVMVectorType(to_integer_type_scalar(ctx, elem_type),
348 LLVMGetVectorSize(t));
349 }
350 return to_integer_type_scalar(ctx, t);
351 }
352
353 static LLVMValueRef to_integer(struct nir_to_llvm_context *ctx, LLVMValueRef v)
354 {
355 LLVMTypeRef type = LLVMTypeOf(v);
356 return LLVMBuildBitCast(ctx->builder, v, to_integer_type(ctx, type), "");
357 }
358
359 static LLVMTypeRef to_float_type_scalar(struct nir_to_llvm_context *ctx, LLVMTypeRef t)
360 {
361 if (t == ctx->i16 || t == ctx->f16)
362 return ctx->f16;
363 else if (t == ctx->i32 || t == ctx->f32)
364 return ctx->f32;
365 else if (t == ctx->i64 || t == ctx->f64)
366 return ctx->f64;
367 else
368 unreachable("Unhandled float size");
369 }
370
371 static LLVMTypeRef to_float_type(struct nir_to_llvm_context *ctx, LLVMTypeRef t)
372 {
373 if (LLVMGetTypeKind(t) == LLVMVectorTypeKind) {
374 LLVMTypeRef elem_type = LLVMGetElementType(t);
375 return LLVMVectorType(to_float_type_scalar(ctx, elem_type),
376 LLVMGetVectorSize(t));
377 }
378 return to_float_type_scalar(ctx, t);
379 }
380
381 static LLVMValueRef to_float(struct nir_to_llvm_context *ctx, LLVMValueRef v)
382 {
383 LLVMTypeRef type = LLVMTypeOf(v);
384 return LLVMBuildBitCast(ctx->builder, v, to_float_type(ctx, type), "");
385 }
386
387 static int get_elem_bits(struct nir_to_llvm_context *ctx, LLVMTypeRef type)
388 {
389 if (LLVMGetTypeKind(type) == LLVMVectorTypeKind)
390 type = LLVMGetElementType(type);
391
392 if (LLVMGetTypeKind(type) == LLVMIntegerTypeKind)
393 return LLVMGetIntTypeWidth(type);
394
395 if (type == ctx->f16)
396 return 16;
397 if (type == ctx->f32)
398 return 32;
399 if (type == ctx->f64)
400 return 64;
401
402 unreachable("Unhandled type kind in get_elem_bits");
403 }
404
405 static LLVMValueRef unpack_param(struct nir_to_llvm_context *ctx,
406 LLVMValueRef param, unsigned rshift,
407 unsigned bitwidth)
408 {
409 LLVMValueRef value = param;
410 if (rshift)
411 value = LLVMBuildLShr(ctx->builder, value,
412 LLVMConstInt(ctx->i32, rshift, false), "");
413
414 if (rshift + bitwidth < 32) {
415 unsigned mask = (1 << bitwidth) - 1;
416 value = LLVMBuildAnd(ctx->builder, value,
417 LLVMConstInt(ctx->i32, mask, false), "");
418 }
419 return value;
420 }
421
422 static LLVMValueRef get_rel_patch_id(struct nir_to_llvm_context *ctx)
423 {
424 switch (ctx->stage) {
425 case MESA_SHADER_TESS_CTRL:
426 return unpack_param(ctx, ctx->tcs_rel_ids, 0, 8);
427 case MESA_SHADER_TESS_EVAL:
428 return ctx->tes_rel_patch_id;
429 break;
430 default:
431 unreachable("Illegal stage");
432 }
433 }
434
435 /* Tessellation shaders pass outputs to the next shader using LDS.
436 *
437 * LS outputs = TCS inputs
438 * TCS outputs = TES inputs
439 *
440 * The LDS layout is:
441 * - TCS inputs for patch 0
442 * - TCS inputs for patch 1
443 * - TCS inputs for patch 2 = get_tcs_in_current_patch_offset (if RelPatchID==2)
444 * - ...
445 * - TCS outputs for patch 0 = get_tcs_out_patch0_offset
446 * - Per-patch TCS outputs for patch 0 = get_tcs_out_patch0_patch_data_offset
447 * - TCS outputs for patch 1
448 * - Per-patch TCS outputs for patch 1
449 * - TCS outputs for patch 2 = get_tcs_out_current_patch_offset (if RelPatchID==2)
450 * - Per-patch TCS outputs for patch 2 = get_tcs_out_current_patch_data_offset (if RelPatchID==2)
451 * - ...
452 *
453 * All three shaders VS(LS), TCS, TES share the same LDS space.
454 */
455 static LLVMValueRef
456 get_tcs_in_patch_stride(struct nir_to_llvm_context *ctx)
457 {
458 if (ctx->stage == MESA_SHADER_VERTEX)
459 return unpack_param(ctx, ctx->ls_out_layout, 0, 13);
460 else if (ctx->stage == MESA_SHADER_TESS_CTRL)
461 return unpack_param(ctx, ctx->tcs_in_layout, 0, 13);
462 else {
463 assert(0);
464 return NULL;
465 }
466 }
467
468 static LLVMValueRef
469 get_tcs_out_patch_stride(struct nir_to_llvm_context *ctx)
470 {
471 return unpack_param(ctx, ctx->tcs_out_layout, 0, 13);
472 }
473
474 static LLVMValueRef
475 get_tcs_out_patch0_offset(struct nir_to_llvm_context *ctx)
476 {
477 return LLVMBuildMul(ctx->builder,
478 unpack_param(ctx, ctx->tcs_out_offsets, 0, 16),
479 LLVMConstInt(ctx->i32, 4, false), "");
480 }
481
482 static LLVMValueRef
483 get_tcs_out_patch0_patch_data_offset(struct nir_to_llvm_context *ctx)
484 {
485 return LLVMBuildMul(ctx->builder,
486 unpack_param(ctx, ctx->tcs_out_offsets, 16, 16),
487 LLVMConstInt(ctx->i32, 4, false), "");
488 }
489
490 static LLVMValueRef
491 get_tcs_in_current_patch_offset(struct nir_to_llvm_context *ctx)
492 {
493 LLVMValueRef patch_stride = get_tcs_in_patch_stride(ctx);
494 LLVMValueRef rel_patch_id = get_rel_patch_id(ctx);
495
496 return LLVMBuildMul(ctx->builder, patch_stride, rel_patch_id, "");
497 }
498
499 static LLVMValueRef
500 get_tcs_out_current_patch_offset(struct nir_to_llvm_context *ctx)
501 {
502 LLVMValueRef patch0_offset = get_tcs_out_patch0_offset(ctx);
503 LLVMValueRef patch_stride = get_tcs_out_patch_stride(ctx);
504 LLVMValueRef rel_patch_id = get_rel_patch_id(ctx);
505
506 return LLVMBuildAdd(ctx->builder, patch0_offset,
507 LLVMBuildMul(ctx->builder, patch_stride,
508 rel_patch_id, ""),
509 "");
510 }
511
512 static LLVMValueRef
513 get_tcs_out_current_patch_data_offset(struct nir_to_llvm_context *ctx)
514 {
515 LLVMValueRef patch0_patch_data_offset =
516 get_tcs_out_patch0_patch_data_offset(ctx);
517 LLVMValueRef patch_stride = get_tcs_out_patch_stride(ctx);
518 LLVMValueRef rel_patch_id = get_rel_patch_id(ctx);
519
520 return LLVMBuildAdd(ctx->builder, patch0_patch_data_offset,
521 LLVMBuildMul(ctx->builder, patch_stride,
522 rel_patch_id, ""),
523 "");
524 }
525
526 static void set_userdata_location(struct ac_userdata_info *ud_info, uint8_t sgpr_idx, uint8_t num_sgprs)
527 {
528 ud_info->sgpr_idx = sgpr_idx;
529 ud_info->num_sgprs = num_sgprs;
530 ud_info->indirect = false;
531 ud_info->indirect_offset = 0;
532 }
533
534 static void set_userdata_location_shader(struct nir_to_llvm_context *ctx,
535 int idx, uint8_t sgpr_idx, uint8_t num_sgprs)
536 {
537 set_userdata_location(&ctx->shader_info->user_sgprs_locs.shader_data[idx], sgpr_idx, num_sgprs);
538 }
539
540 #if 0
541 static void set_userdata_location_indirect(struct ac_userdata_info *ud_info, uint8_t sgpr_idx, uint8_t num_sgprs,
542 uint32_t indirect_offset)
543 {
544 ud_info->sgpr_idx = sgpr_idx;
545 ud_info->num_sgprs = num_sgprs;
546 ud_info->indirect = true;
547 ud_info->indirect_offset = indirect_offset;
548 }
549 #endif
550
551 static void create_function(struct nir_to_llvm_context *ctx)
552 {
553 LLVMTypeRef arg_types[23];
554 unsigned arg_idx = 0;
555 unsigned array_params_mask = 0;
556 unsigned sgpr_count = 0, user_sgpr_count;
557 unsigned i;
558 unsigned num_sets = ctx->options->layout ? ctx->options->layout->num_sets : 0;
559 unsigned user_sgpr_idx;
560 bool need_push_constants;
561 bool need_ring_offsets = false;
562
563 /* until we sort out scratch/global buffers always assign ring offsets for gs/vs/es */
564 if (ctx->stage == MESA_SHADER_GEOMETRY ||
565 ctx->stage == MESA_SHADER_VERTEX ||
566 ctx->stage == MESA_SHADER_TESS_CTRL ||
567 ctx->stage == MESA_SHADER_TESS_EVAL ||
568 ctx->is_gs_copy_shader)
569 need_ring_offsets = true;
570
571 need_push_constants = true;
572 if (!ctx->options->layout)
573 need_push_constants = false;
574 else if (!ctx->options->layout->push_constant_size &&
575 !ctx->options->layout->dynamic_offset_count)
576 need_push_constants = false;
577
578 if (need_ring_offsets && !ctx->options->supports_spill) {
579 arg_types[arg_idx++] = const_array(ctx->v16i8, 8); /* address of rings */
580 }
581
582 /* 1 for each descriptor set */
583 for (unsigned i = 0; i < num_sets; ++i) {
584 if (ctx->options->layout->set[i].layout->shader_stages & (1 << ctx->stage)) {
585 array_params_mask |= (1 << arg_idx);
586 arg_types[arg_idx++] = const_array(ctx->i8, 1024 * 1024);
587 }
588 }
589
590 if (need_push_constants) {
591 /* 1 for push constants and dynamic descriptors */
592 array_params_mask |= (1 << arg_idx);
593 arg_types[arg_idx++] = const_array(ctx->i8, 1024 * 1024);
594 }
595
596 switch (ctx->stage) {
597 case MESA_SHADER_COMPUTE:
598 arg_types[arg_idx++] = LLVMVectorType(ctx->i32, 3); /* grid size */
599 user_sgpr_count = arg_idx;
600 arg_types[arg_idx++] = LLVMVectorType(ctx->i32, 3);
601 arg_types[arg_idx++] = ctx->i32;
602 sgpr_count = arg_idx;
603
604 arg_types[arg_idx++] = LLVMVectorType(ctx->i32, 3);
605 break;
606 case MESA_SHADER_VERTEX:
607 if (!ctx->is_gs_copy_shader) {
608 arg_types[arg_idx++] = const_array(ctx->v16i8, 16); /* vertex buffers */
609 arg_types[arg_idx++] = ctx->i32; // base vertex
610 arg_types[arg_idx++] = ctx->i32; // start instance
611 arg_types[arg_idx++] = ctx->i32; // draw index
612 }
613 user_sgpr_count = arg_idx;
614 if (ctx->options->key.vs.as_es)
615 arg_types[arg_idx++] = ctx->i32; //es2gs offset
616 else if (ctx->options->key.vs.as_ls) {
617 arg_types[arg_idx++] = ctx->i32; //ls out layout
618 user_sgpr_count++;
619 }
620 sgpr_count = arg_idx;
621 arg_types[arg_idx++] = ctx->i32; // vertex id
622 if (!ctx->is_gs_copy_shader) {
623 arg_types[arg_idx++] = ctx->i32; // rel auto id
624 arg_types[arg_idx++] = ctx->i32; // vs prim id
625 arg_types[arg_idx++] = ctx->i32; // instance id
626 }
627 break;
628 case MESA_SHADER_TESS_CTRL:
629 arg_types[arg_idx++] = ctx->i32; // tcs offchip layout
630 arg_types[arg_idx++] = ctx->i32; // tcs out offsets
631 arg_types[arg_idx++] = ctx->i32; // tcs out layout
632 arg_types[arg_idx++] = ctx->i32; // tcs in layout
633 user_sgpr_count = arg_idx;
634 arg_types[arg_idx++] = ctx->i32; // param oc lds
635 arg_types[arg_idx++] = ctx->i32; // tess factor offset
636 sgpr_count = arg_idx;
637 arg_types[arg_idx++] = ctx->i32; // patch id
638 arg_types[arg_idx++] = ctx->i32; // rel ids;
639 break;
640 case MESA_SHADER_TESS_EVAL:
641 arg_types[arg_idx++] = ctx->i32; // tcs offchip layout
642 user_sgpr_count = arg_idx;
643 if (ctx->options->key.tes.as_es) {
644 arg_types[arg_idx++] = ctx->i32; // OC LDS
645 arg_types[arg_idx++] = ctx->i32; //
646 arg_types[arg_idx++] = ctx->i32; // es2gs offset
647 } else {
648 arg_types[arg_idx++] = ctx->i32; //
649 arg_types[arg_idx++] = ctx->i32; // OC LDS
650 }
651 sgpr_count = arg_idx;
652 arg_types[arg_idx++] = ctx->f32; // tes_u
653 arg_types[arg_idx++] = ctx->f32; // tes_v
654 arg_types[arg_idx++] = ctx->i32; // tes rel patch id
655 arg_types[arg_idx++] = ctx->i32; // tes patch id
656 break;
657 case MESA_SHADER_GEOMETRY:
658 arg_types[arg_idx++] = ctx->i32; // gsvs stride
659 arg_types[arg_idx++] = ctx->i32; // gsvs num entires
660 user_sgpr_count = arg_idx;
661 arg_types[arg_idx++] = ctx->i32; // gs2vs offset
662 arg_types[arg_idx++] = ctx->i32; // wave id
663 sgpr_count = arg_idx;
664 arg_types[arg_idx++] = ctx->i32; // vtx0
665 arg_types[arg_idx++] = ctx->i32; // vtx1
666 arg_types[arg_idx++] = ctx->i32; // prim id
667 arg_types[arg_idx++] = ctx->i32; // vtx2
668 arg_types[arg_idx++] = ctx->i32; // vtx3
669 arg_types[arg_idx++] = ctx->i32; // vtx4
670 arg_types[arg_idx++] = ctx->i32; // vtx5
671 arg_types[arg_idx++] = ctx->i32; // GS instance id
672 break;
673 case MESA_SHADER_FRAGMENT:
674 arg_types[arg_idx++] = const_array(ctx->f32, 32); /* sample positions */
675 user_sgpr_count = arg_idx;
676 arg_types[arg_idx++] = ctx->i32; /* prim mask */
677 sgpr_count = arg_idx;
678 arg_types[arg_idx++] = ctx->v2i32; /* persp sample */
679 arg_types[arg_idx++] = ctx->v2i32; /* persp center */
680 arg_types[arg_idx++] = ctx->v2i32; /* persp centroid */
681 arg_types[arg_idx++] = ctx->v3i32; /* persp pull model */
682 arg_types[arg_idx++] = ctx->v2i32; /* linear sample */
683 arg_types[arg_idx++] = ctx->v2i32; /* linear center */
684 arg_types[arg_idx++] = ctx->v2i32; /* linear centroid */
685 arg_types[arg_idx++] = ctx->f32; /* line stipple tex */
686 arg_types[arg_idx++] = ctx->f32; /* pos x float */
687 arg_types[arg_idx++] = ctx->f32; /* pos y float */
688 arg_types[arg_idx++] = ctx->f32; /* pos z float */
689 arg_types[arg_idx++] = ctx->f32; /* pos w float */
690 arg_types[arg_idx++] = ctx->i32; /* front face */
691 arg_types[arg_idx++] = ctx->i32; /* ancillary */
692 arg_types[arg_idx++] = ctx->i32; /* sample coverage */
693 arg_types[arg_idx++] = ctx->i32; /* fixed pt */
694 break;
695 default:
696 unreachable("Shader stage not implemented");
697 }
698
699 ctx->main_function = create_llvm_function(
700 ctx->context, ctx->module, ctx->builder, NULL, 0, arg_types,
701 arg_idx, array_params_mask, sgpr_count, ctx->options->unsafe_math);
702 set_llvm_calling_convention(ctx->main_function, ctx->stage);
703
704 ctx->shader_info->num_input_sgprs = 0;
705 ctx->shader_info->num_input_vgprs = 0;
706
707 ctx->shader_info->num_user_sgprs = ctx->options->supports_spill ? 2 : 0;
708 for (i = 0; i < user_sgpr_count; i++)
709 ctx->shader_info->num_user_sgprs += llvm_get_type_size(arg_types[i]) / 4;
710
711 ctx->shader_info->num_input_sgprs = ctx->shader_info->num_user_sgprs;
712 for (; i < sgpr_count; i++)
713 ctx->shader_info->num_input_sgprs += llvm_get_type_size(arg_types[i]) / 4;
714
715 if (ctx->stage != MESA_SHADER_FRAGMENT)
716 for (; i < arg_idx; ++i)
717 ctx->shader_info->num_input_vgprs += llvm_get_type_size(arg_types[i]) / 4;
718
719 arg_idx = 0;
720 user_sgpr_idx = 0;
721
722 if (ctx->options->supports_spill || need_ring_offsets) {
723 set_userdata_location_shader(ctx, AC_UD_SCRATCH_RING_OFFSETS, user_sgpr_idx, 2);
724 user_sgpr_idx += 2;
725 if (ctx->options->supports_spill) {
726 ctx->ring_offsets = ac_build_intrinsic(&ctx->ac, "llvm.amdgcn.implicit.buffer.ptr",
727 LLVMPointerType(ctx->i8, CONST_ADDR_SPACE),
728 NULL, 0, AC_FUNC_ATTR_READNONE);
729 ctx->ring_offsets = LLVMBuildBitCast(ctx->builder, ctx->ring_offsets,
730 const_array(ctx->v16i8, 8), "");
731 } else
732 ctx->ring_offsets = LLVMGetParam(ctx->main_function, arg_idx++);
733 }
734
735 for (unsigned i = 0; i < num_sets; ++i) {
736 if (ctx->options->layout->set[i].layout->shader_stages & (1 << ctx->stage)) {
737 set_userdata_location(&ctx->shader_info->user_sgprs_locs.descriptor_sets[i], user_sgpr_idx, 2);
738 user_sgpr_idx += 2;
739 ctx->descriptor_sets[i] =
740 LLVMGetParam(ctx->main_function, arg_idx++);
741 } else
742 ctx->descriptor_sets[i] = NULL;
743 }
744
745 if (need_push_constants) {
746 ctx->push_constants = LLVMGetParam(ctx->main_function, arg_idx++);
747 set_userdata_location_shader(ctx, AC_UD_PUSH_CONSTANTS, user_sgpr_idx, 2);
748 user_sgpr_idx += 2;
749 }
750
751 switch (ctx->stage) {
752 case MESA_SHADER_COMPUTE:
753 set_userdata_location_shader(ctx, AC_UD_CS_GRID_SIZE, user_sgpr_idx, 3);
754 user_sgpr_idx += 3;
755 ctx->num_work_groups =
756 LLVMGetParam(ctx->main_function, arg_idx++);
757 ctx->workgroup_ids =
758 LLVMGetParam(ctx->main_function, arg_idx++);
759 ctx->tg_size =
760 LLVMGetParam(ctx->main_function, arg_idx++);
761 ctx->local_invocation_ids =
762 LLVMGetParam(ctx->main_function, arg_idx++);
763 break;
764 case MESA_SHADER_VERTEX:
765 if (!ctx->is_gs_copy_shader) {
766 set_userdata_location_shader(ctx, AC_UD_VS_VERTEX_BUFFERS, user_sgpr_idx, 2);
767 user_sgpr_idx += 2;
768 ctx->vertex_buffers = LLVMGetParam(ctx->main_function, arg_idx++);
769 set_userdata_location_shader(ctx, AC_UD_VS_BASE_VERTEX_START_INSTANCE, user_sgpr_idx, 3);
770 user_sgpr_idx += 3;
771 ctx->base_vertex = LLVMGetParam(ctx->main_function, arg_idx++);
772 ctx->start_instance = LLVMGetParam(ctx->main_function, arg_idx++);
773 ctx->draw_index = LLVMGetParam(ctx->main_function, arg_idx++);
774 }
775 if (ctx->options->key.vs.as_es)
776 ctx->es2gs_offset = LLVMGetParam(ctx->main_function, arg_idx++);
777 else if (ctx->options->key.vs.as_ls) {
778 set_userdata_location_shader(ctx, AC_UD_VS_LS_TCS_IN_LAYOUT, user_sgpr_idx, 1);
779 user_sgpr_idx += 1;
780 ctx->ls_out_layout = LLVMGetParam(ctx->main_function, arg_idx++);
781 }
782 ctx->vertex_id = LLVMGetParam(ctx->main_function, arg_idx++);
783 if (!ctx->is_gs_copy_shader) {
784 ctx->rel_auto_id = LLVMGetParam(ctx->main_function, arg_idx++);
785 ctx->vs_prim_id = LLVMGetParam(ctx->main_function, arg_idx++);
786 ctx->instance_id = LLVMGetParam(ctx->main_function, arg_idx++);
787 }
788 break;
789 case MESA_SHADER_TESS_CTRL:
790 set_userdata_location_shader(ctx, AC_UD_TCS_OFFCHIP_LAYOUT, user_sgpr_idx, 4);
791 user_sgpr_idx += 4;
792 ctx->tcs_offchip_layout = LLVMGetParam(ctx->main_function, arg_idx++);
793 ctx->tcs_out_offsets = LLVMGetParam(ctx->main_function, arg_idx++);
794 ctx->tcs_out_layout = LLVMGetParam(ctx->main_function, arg_idx++);
795 ctx->tcs_in_layout = LLVMGetParam(ctx->main_function, arg_idx++);
796 ctx->oc_lds = LLVMGetParam(ctx->main_function, arg_idx++);
797 ctx->tess_factor_offset = LLVMGetParam(ctx->main_function, arg_idx++);
798 ctx->tcs_patch_id = LLVMGetParam(ctx->main_function, arg_idx++);
799 ctx->tcs_rel_ids = LLVMGetParam(ctx->main_function, arg_idx++);
800 break;
801 case MESA_SHADER_TESS_EVAL:
802 set_userdata_location_shader(ctx, AC_UD_TES_OFFCHIP_LAYOUT, user_sgpr_idx, 1);
803 user_sgpr_idx += 1;
804 ctx->tcs_offchip_layout = LLVMGetParam(ctx->main_function, arg_idx++);
805 if (ctx->options->key.tes.as_es) {
806 ctx->oc_lds = LLVMGetParam(ctx->main_function, arg_idx++);
807 arg_idx++;
808 ctx->es2gs_offset = LLVMGetParam(ctx->main_function, arg_idx++);
809 } else {
810 arg_idx++;
811 ctx->oc_lds = LLVMGetParam(ctx->main_function, arg_idx++);
812 }
813 ctx->tes_u = LLVMGetParam(ctx->main_function, arg_idx++);
814 ctx->tes_v = LLVMGetParam(ctx->main_function, arg_idx++);
815 ctx->tes_rel_patch_id = LLVMGetParam(ctx->main_function, arg_idx++);
816 ctx->tes_patch_id = LLVMGetParam(ctx->main_function, arg_idx++);
817 break;
818 case MESA_SHADER_GEOMETRY:
819 set_userdata_location_shader(ctx, AC_UD_GS_VS_RING_STRIDE_ENTRIES, user_sgpr_idx, 2);
820 user_sgpr_idx += 2;
821 ctx->gsvs_ring_stride = LLVMGetParam(ctx->main_function, arg_idx++);
822 ctx->gsvs_num_entries = LLVMGetParam(ctx->main_function, arg_idx++);
823 ctx->gs2vs_offset = LLVMGetParam(ctx->main_function, arg_idx++);
824 ctx->gs_wave_id = LLVMGetParam(ctx->main_function, arg_idx++);
825 ctx->gs_vtx_offset[0] = LLVMGetParam(ctx->main_function, arg_idx++);
826 ctx->gs_vtx_offset[1] = LLVMGetParam(ctx->main_function, arg_idx++);
827 ctx->gs_prim_id = LLVMGetParam(ctx->main_function, arg_idx++);
828 ctx->gs_vtx_offset[2] = LLVMGetParam(ctx->main_function, arg_idx++);
829 ctx->gs_vtx_offset[3] = LLVMGetParam(ctx->main_function, arg_idx++);
830 ctx->gs_vtx_offset[4] = LLVMGetParam(ctx->main_function, arg_idx++);
831 ctx->gs_vtx_offset[5] = LLVMGetParam(ctx->main_function, arg_idx++);
832 ctx->gs_invocation_id = LLVMGetParam(ctx->main_function, arg_idx++);
833 break;
834 case MESA_SHADER_FRAGMENT:
835 set_userdata_location_shader(ctx, AC_UD_PS_SAMPLE_POS, user_sgpr_idx, 2);
836 user_sgpr_idx += 2;
837 ctx->sample_positions = LLVMGetParam(ctx->main_function, arg_idx++);
838 ctx->prim_mask = LLVMGetParam(ctx->main_function, arg_idx++);
839 ctx->persp_sample = LLVMGetParam(ctx->main_function, arg_idx++);
840 ctx->persp_center = LLVMGetParam(ctx->main_function, arg_idx++);
841 ctx->persp_centroid = LLVMGetParam(ctx->main_function, arg_idx++);
842 arg_idx++;
843 ctx->linear_sample = LLVMGetParam(ctx->main_function, arg_idx++);
844 ctx->linear_center = LLVMGetParam(ctx->main_function, arg_idx++);
845 ctx->linear_centroid = LLVMGetParam(ctx->main_function, arg_idx++);
846 arg_idx++; /* line stipple */
847 ctx->frag_pos[0] = LLVMGetParam(ctx->main_function, arg_idx++);
848 ctx->frag_pos[1] = LLVMGetParam(ctx->main_function, arg_idx++);
849 ctx->frag_pos[2] = LLVMGetParam(ctx->main_function, arg_idx++);
850 ctx->frag_pos[3] = LLVMGetParam(ctx->main_function, arg_idx++);
851 ctx->front_face = LLVMGetParam(ctx->main_function, arg_idx++);
852 ctx->ancillary = LLVMGetParam(ctx->main_function, arg_idx++);
853 ctx->sample_coverage = LLVMGetParam(ctx->main_function, arg_idx++);
854 break;
855 default:
856 unreachable("Shader stage not implemented");
857 }
858 }
859
860 static void setup_types(struct nir_to_llvm_context *ctx)
861 {
862 LLVMValueRef args[4];
863
864 ctx->voidt = LLVMVoidTypeInContext(ctx->context);
865 ctx->i1 = LLVMIntTypeInContext(ctx->context, 1);
866 ctx->i8 = LLVMIntTypeInContext(ctx->context, 8);
867 ctx->i16 = LLVMIntTypeInContext(ctx->context, 16);
868 ctx->i32 = LLVMIntTypeInContext(ctx->context, 32);
869 ctx->i64 = LLVMIntTypeInContext(ctx->context, 64);
870 ctx->v2i32 = LLVMVectorType(ctx->i32, 2);
871 ctx->v3i32 = LLVMVectorType(ctx->i32, 3);
872 ctx->v4i32 = LLVMVectorType(ctx->i32, 4);
873 ctx->v8i32 = LLVMVectorType(ctx->i32, 8);
874 ctx->f32 = LLVMFloatTypeInContext(ctx->context);
875 ctx->f16 = LLVMHalfTypeInContext(ctx->context);
876 ctx->f64 = LLVMDoubleTypeInContext(ctx->context);
877 ctx->v2f32 = LLVMVectorType(ctx->f32, 2);
878 ctx->v4f32 = LLVMVectorType(ctx->f32, 4);
879 ctx->v16i8 = LLVMVectorType(ctx->i8, 16);
880
881 ctx->i1false = LLVMConstInt(ctx->i1, 0, false);
882 ctx->i1true = LLVMConstInt(ctx->i1, 1, false);
883 ctx->i32zero = LLVMConstInt(ctx->i32, 0, false);
884 ctx->i32one = LLVMConstInt(ctx->i32, 1, false);
885 ctx->f32zero = LLVMConstReal(ctx->f32, 0.0);
886 ctx->f32one = LLVMConstReal(ctx->f32, 1.0);
887
888 args[0] = ctx->f32zero;
889 args[1] = ctx->f32zero;
890 args[2] = ctx->f32zero;
891 args[3] = ctx->f32one;
892 ctx->v4f32empty = LLVMConstVector(args, 4);
893
894 ctx->uniform_md_kind =
895 LLVMGetMDKindIDInContext(ctx->context, "amdgpu.uniform", 14);
896 ctx->empty_md = LLVMMDNodeInContext(ctx->context, NULL, 0);
897
898 args[0] = LLVMConstReal(ctx->f32, 2.5);
899 }
900
901 static int get_llvm_num_components(LLVMValueRef value)
902 {
903 LLVMTypeRef type = LLVMTypeOf(value);
904 unsigned num_components = LLVMGetTypeKind(type) == LLVMVectorTypeKind
905 ? LLVMGetVectorSize(type)
906 : 1;
907 return num_components;
908 }
909
910 static LLVMValueRef llvm_extract_elem(struct nir_to_llvm_context *ctx,
911 LLVMValueRef value,
912 int index)
913 {
914 int count = get_llvm_num_components(value);
915
916 assert(index < count);
917 if (count == 1)
918 return value;
919
920 return LLVMBuildExtractElement(ctx->builder, value,
921 LLVMConstInt(ctx->i32, index, false), "");
922 }
923
924 static LLVMValueRef trim_vector(struct nir_to_llvm_context *ctx,
925 LLVMValueRef value, unsigned count)
926 {
927 unsigned num_components = get_llvm_num_components(value);
928 if (count == num_components)
929 return value;
930
931 LLVMValueRef masks[] = {
932 LLVMConstInt(ctx->i32, 0, false), LLVMConstInt(ctx->i32, 1, false),
933 LLVMConstInt(ctx->i32, 2, false), LLVMConstInt(ctx->i32, 3, false)};
934
935 if (count == 1)
936 return LLVMBuildExtractElement(ctx->builder, value, masks[0],
937 "");
938
939 LLVMValueRef swizzle = LLVMConstVector(masks, count);
940 return LLVMBuildShuffleVector(ctx->builder, value, value, swizzle, "");
941 }
942
943 static void
944 build_store_values_extended(struct nir_to_llvm_context *ctx,
945 LLVMValueRef *values,
946 unsigned value_count,
947 unsigned value_stride,
948 LLVMValueRef vec)
949 {
950 LLVMBuilderRef builder = ctx->builder;
951 unsigned i;
952
953 if (value_count == 1) {
954 LLVMBuildStore(builder, vec, values[0]);
955 return;
956 }
957
958 for (i = 0; i < value_count; i++) {
959 LLVMValueRef ptr = values[i * value_stride];
960 LLVMValueRef index = LLVMConstInt(ctx->i32, i, false);
961 LLVMValueRef value = LLVMBuildExtractElement(builder, vec, index, "");
962 LLVMBuildStore(builder, value, ptr);
963 }
964 }
965
966 static LLVMTypeRef get_def_type(struct nir_to_llvm_context *ctx,
967 nir_ssa_def *def)
968 {
969 LLVMTypeRef type = LLVMIntTypeInContext(ctx->context, def->bit_size);
970 if (def->num_components > 1) {
971 type = LLVMVectorType(type, def->num_components);
972 }
973 return type;
974 }
975
976 static LLVMValueRef get_src(struct nir_to_llvm_context *ctx, nir_src src)
977 {
978 assert(src.is_ssa);
979 struct hash_entry *entry = _mesa_hash_table_search(ctx->defs, src.ssa);
980 return (LLVMValueRef)entry->data;
981 }
982
983
984 static LLVMBasicBlockRef get_block(struct nir_to_llvm_context *ctx,
985 struct nir_block *b)
986 {
987 struct hash_entry *entry = _mesa_hash_table_search(ctx->defs, b);
988 return (LLVMBasicBlockRef)entry->data;
989 }
990
991 static LLVMValueRef get_alu_src(struct nir_to_llvm_context *ctx,
992 nir_alu_src src,
993 unsigned num_components)
994 {
995 LLVMValueRef value = get_src(ctx, src.src);
996 bool need_swizzle = false;
997
998 assert(value);
999 LLVMTypeRef type = LLVMTypeOf(value);
1000 unsigned src_components = LLVMGetTypeKind(type) == LLVMVectorTypeKind
1001 ? LLVMGetVectorSize(type)
1002 : 1;
1003
1004 for (unsigned i = 0; i < num_components; ++i) {
1005 assert(src.swizzle[i] < src_components);
1006 if (src.swizzle[i] != i)
1007 need_swizzle = true;
1008 }
1009
1010 if (need_swizzle || num_components != src_components) {
1011 LLVMValueRef masks[] = {
1012 LLVMConstInt(ctx->i32, src.swizzle[0], false),
1013 LLVMConstInt(ctx->i32, src.swizzle[1], false),
1014 LLVMConstInt(ctx->i32, src.swizzle[2], false),
1015 LLVMConstInt(ctx->i32, src.swizzle[3], false)};
1016
1017 if (src_components > 1 && num_components == 1) {
1018 value = LLVMBuildExtractElement(ctx->builder, value,
1019 masks[0], "");
1020 } else if (src_components == 1 && num_components > 1) {
1021 LLVMValueRef values[] = {value, value, value, value};
1022 value = ac_build_gather_values(&ctx->ac, values, num_components);
1023 } else {
1024 LLVMValueRef swizzle = LLVMConstVector(masks, num_components);
1025 value = LLVMBuildShuffleVector(ctx->builder, value, value,
1026 swizzle, "");
1027 }
1028 }
1029 assert(!src.negate);
1030 assert(!src.abs);
1031 return value;
1032 }
1033
1034 static LLVMValueRef emit_int_cmp(struct nir_to_llvm_context *ctx,
1035 LLVMIntPredicate pred, LLVMValueRef src0,
1036 LLVMValueRef src1)
1037 {
1038 LLVMValueRef result = LLVMBuildICmp(ctx->builder, pred, src0, src1, "");
1039 return LLVMBuildSelect(ctx->builder, result,
1040 LLVMConstInt(ctx->i32, 0xFFFFFFFF, false),
1041 LLVMConstInt(ctx->i32, 0, false), "");
1042 }
1043
1044 static LLVMValueRef emit_float_cmp(struct nir_to_llvm_context *ctx,
1045 LLVMRealPredicate pred, LLVMValueRef src0,
1046 LLVMValueRef src1)
1047 {
1048 LLVMValueRef result;
1049 src0 = to_float(ctx, src0);
1050 src1 = to_float(ctx, src1);
1051 result = LLVMBuildFCmp(ctx->builder, pred, src0, src1, "");
1052 return LLVMBuildSelect(ctx->builder, result,
1053 LLVMConstInt(ctx->i32, 0xFFFFFFFF, false),
1054 LLVMConstInt(ctx->i32, 0, false), "");
1055 }
1056
1057 static LLVMValueRef emit_intrin_1f_param(struct nir_to_llvm_context *ctx,
1058 const char *intrin,
1059 LLVMTypeRef result_type,
1060 LLVMValueRef src0)
1061 {
1062 char name[64];
1063 LLVMValueRef params[] = {
1064 to_float(ctx, src0),
1065 };
1066
1067 sprintf(name, "%s.f%d", intrin, get_elem_bits(ctx, result_type));
1068 return ac_build_intrinsic(&ctx->ac, name, result_type, params, 1, AC_FUNC_ATTR_READNONE);
1069 }
1070
1071 static LLVMValueRef emit_intrin_2f_param(struct nir_to_llvm_context *ctx,
1072 const char *intrin,
1073 LLVMTypeRef result_type,
1074 LLVMValueRef src0, LLVMValueRef src1)
1075 {
1076 char name[64];
1077 LLVMValueRef params[] = {
1078 to_float(ctx, src0),
1079 to_float(ctx, src1),
1080 };
1081
1082 sprintf(name, "%s.f%d", intrin, get_elem_bits(ctx, result_type));
1083 return ac_build_intrinsic(&ctx->ac, name, result_type, params, 2, AC_FUNC_ATTR_READNONE);
1084 }
1085
1086 static LLVMValueRef emit_intrin_3f_param(struct nir_to_llvm_context *ctx,
1087 const char *intrin,
1088 LLVMTypeRef result_type,
1089 LLVMValueRef src0, LLVMValueRef src1, LLVMValueRef src2)
1090 {
1091 char name[64];
1092 LLVMValueRef params[] = {
1093 to_float(ctx, src0),
1094 to_float(ctx, src1),
1095 to_float(ctx, src2),
1096 };
1097
1098 sprintf(name, "%s.f%d", intrin, get_elem_bits(ctx, result_type));
1099 return ac_build_intrinsic(&ctx->ac, name, result_type, params, 3, AC_FUNC_ATTR_READNONE);
1100 }
1101
1102 static LLVMValueRef emit_bcsel(struct nir_to_llvm_context *ctx,
1103 LLVMValueRef src0, LLVMValueRef src1, LLVMValueRef src2)
1104 {
1105 LLVMValueRef v = LLVMBuildICmp(ctx->builder, LLVMIntNE, src0,
1106 ctx->i32zero, "");
1107 return LLVMBuildSelect(ctx->builder, v, src1, src2, "");
1108 }
1109
1110 static LLVMValueRef emit_find_lsb(struct nir_to_llvm_context *ctx,
1111 LLVMValueRef src0)
1112 {
1113 LLVMValueRef params[2] = {
1114 src0,
1115
1116 /* The value of 1 means that ffs(x=0) = undef, so LLVM won't
1117 * add special code to check for x=0. The reason is that
1118 * the LLVM behavior for x=0 is different from what we
1119 * need here.
1120 *
1121 * The hardware already implements the correct behavior.
1122 */
1123 LLVMConstInt(ctx->i32, 1, false),
1124 };
1125 return ac_build_intrinsic(&ctx->ac, "llvm.cttz.i32", ctx->i32, params, 2, AC_FUNC_ATTR_READNONE);
1126 }
1127
1128 static LLVMValueRef emit_ifind_msb(struct nir_to_llvm_context *ctx,
1129 LLVMValueRef src0)
1130 {
1131 return ac_build_imsb(&ctx->ac, src0, ctx->i32);
1132 }
1133
1134 static LLVMValueRef emit_ufind_msb(struct nir_to_llvm_context *ctx,
1135 LLVMValueRef src0)
1136 {
1137 return ac_build_umsb(&ctx->ac, src0, ctx->i32);
1138 }
1139
1140 static LLVMValueRef emit_minmax_int(struct nir_to_llvm_context *ctx,
1141 LLVMIntPredicate pred,
1142 LLVMValueRef src0, LLVMValueRef src1)
1143 {
1144 return LLVMBuildSelect(ctx->builder,
1145 LLVMBuildICmp(ctx->builder, pred, src0, src1, ""),
1146 src0,
1147 src1, "");
1148
1149 }
1150 static LLVMValueRef emit_iabs(struct nir_to_llvm_context *ctx,
1151 LLVMValueRef src0)
1152 {
1153 return emit_minmax_int(ctx, LLVMIntSGT, src0,
1154 LLVMBuildNeg(ctx->builder, src0, ""));
1155 }
1156
1157 static LLVMValueRef emit_fsign(struct nir_to_llvm_context *ctx,
1158 LLVMValueRef src0)
1159 {
1160 LLVMValueRef cmp, val;
1161
1162 cmp = LLVMBuildFCmp(ctx->builder, LLVMRealOGT, src0, ctx->f32zero, "");
1163 val = LLVMBuildSelect(ctx->builder, cmp, ctx->f32one, src0, "");
1164 cmp = LLVMBuildFCmp(ctx->builder, LLVMRealOGE, val, ctx->f32zero, "");
1165 val = LLVMBuildSelect(ctx->builder, cmp, val, LLVMConstReal(ctx->f32, -1.0), "");
1166 return val;
1167 }
1168
1169 static LLVMValueRef emit_isign(struct nir_to_llvm_context *ctx,
1170 LLVMValueRef src0)
1171 {
1172 LLVMValueRef cmp, val;
1173
1174 cmp = LLVMBuildICmp(ctx->builder, LLVMIntSGT, src0, ctx->i32zero, "");
1175 val = LLVMBuildSelect(ctx->builder, cmp, ctx->i32one, src0, "");
1176 cmp = LLVMBuildICmp(ctx->builder, LLVMIntSGE, val, ctx->i32zero, "");
1177 val = LLVMBuildSelect(ctx->builder, cmp, val, LLVMConstInt(ctx->i32, -1, true), "");
1178 return val;
1179 }
1180
1181 static LLVMValueRef emit_ffract(struct nir_to_llvm_context *ctx,
1182 LLVMValueRef src0)
1183 {
1184 const char *intr = "llvm.floor.f32";
1185 LLVMValueRef fsrc0 = to_float(ctx, src0);
1186 LLVMValueRef params[] = {
1187 fsrc0,
1188 };
1189 LLVMValueRef floor = ac_build_intrinsic(&ctx->ac, intr,
1190 ctx->f32, params, 1,
1191 AC_FUNC_ATTR_READNONE);
1192 return LLVMBuildFSub(ctx->builder, fsrc0, floor, "");
1193 }
1194
1195 static LLVMValueRef emit_uint_carry(struct nir_to_llvm_context *ctx,
1196 const char *intrin,
1197 LLVMValueRef src0, LLVMValueRef src1)
1198 {
1199 LLVMTypeRef ret_type;
1200 LLVMTypeRef types[] = { ctx->i32, ctx->i1 };
1201 LLVMValueRef res;
1202 LLVMValueRef params[] = { src0, src1 };
1203 ret_type = LLVMStructTypeInContext(ctx->context, types,
1204 2, true);
1205
1206 res = ac_build_intrinsic(&ctx->ac, intrin, ret_type,
1207 params, 2, AC_FUNC_ATTR_READNONE);
1208
1209 res = LLVMBuildExtractValue(ctx->builder, res, 1, "");
1210 res = LLVMBuildZExt(ctx->builder, res, ctx->i32, "");
1211 return res;
1212 }
1213
1214 static LLVMValueRef emit_b2f(struct nir_to_llvm_context *ctx,
1215 LLVMValueRef src0)
1216 {
1217 return LLVMBuildAnd(ctx->builder, src0, LLVMBuildBitCast(ctx->builder, LLVMConstReal(ctx->f32, 1.0), ctx->i32, ""), "");
1218 }
1219
1220 static LLVMValueRef emit_umul_high(struct nir_to_llvm_context *ctx,
1221 LLVMValueRef src0, LLVMValueRef src1)
1222 {
1223 LLVMValueRef dst64, result;
1224 src0 = LLVMBuildZExt(ctx->builder, src0, ctx->i64, "");
1225 src1 = LLVMBuildZExt(ctx->builder, src1, ctx->i64, "");
1226
1227 dst64 = LLVMBuildMul(ctx->builder, src0, src1, "");
1228 dst64 = LLVMBuildLShr(ctx->builder, dst64, LLVMConstInt(ctx->i64, 32, false), "");
1229 result = LLVMBuildTrunc(ctx->builder, dst64, ctx->i32, "");
1230 return result;
1231 }
1232
1233 static LLVMValueRef emit_imul_high(struct nir_to_llvm_context *ctx,
1234 LLVMValueRef src0, LLVMValueRef src1)
1235 {
1236 LLVMValueRef dst64, result;
1237 src0 = LLVMBuildSExt(ctx->builder, src0, ctx->i64, "");
1238 src1 = LLVMBuildSExt(ctx->builder, src1, ctx->i64, "");
1239
1240 dst64 = LLVMBuildMul(ctx->builder, src0, src1, "");
1241 dst64 = LLVMBuildAShr(ctx->builder, dst64, LLVMConstInt(ctx->i64, 32, false), "");
1242 result = LLVMBuildTrunc(ctx->builder, dst64, ctx->i32, "");
1243 return result;
1244 }
1245
1246 static LLVMValueRef emit_bitfield_extract(struct nir_to_llvm_context *ctx,
1247 bool is_signed,
1248 LLVMValueRef srcs[3])
1249 {
1250 LLVMValueRef result;
1251 LLVMValueRef icond = LLVMBuildICmp(ctx->builder, LLVMIntEQ, srcs[2], LLVMConstInt(ctx->i32, 32, false), "");
1252
1253 result = ac_build_bfe(&ctx->ac, srcs[0], srcs[1], srcs[2], is_signed);
1254 result = LLVMBuildSelect(ctx->builder, icond, srcs[0], result, "");
1255 return result;
1256 }
1257
1258 static LLVMValueRef emit_bitfield_insert(struct nir_to_llvm_context *ctx,
1259 LLVMValueRef src0, LLVMValueRef src1,
1260 LLVMValueRef src2, LLVMValueRef src3)
1261 {
1262 LLVMValueRef bfi_args[3], result;
1263
1264 bfi_args[0] = LLVMBuildShl(ctx->builder,
1265 LLVMBuildSub(ctx->builder,
1266 LLVMBuildShl(ctx->builder,
1267 ctx->i32one,
1268 src3, ""),
1269 ctx->i32one, ""),
1270 src2, "");
1271 bfi_args[1] = LLVMBuildShl(ctx->builder, src1, src2, "");
1272 bfi_args[2] = src0;
1273
1274 LLVMValueRef icond = LLVMBuildICmp(ctx->builder, LLVMIntEQ, src3, LLVMConstInt(ctx->i32, 32, false), "");
1275
1276 /* Calculate:
1277 * (arg0 & arg1) | (~arg0 & arg2) = arg2 ^ (arg0 & (arg1 ^ arg2)
1278 * Use the right-hand side, which the LLVM backend can convert to V_BFI.
1279 */
1280 result = LLVMBuildXor(ctx->builder, bfi_args[2],
1281 LLVMBuildAnd(ctx->builder, bfi_args[0],
1282 LLVMBuildXor(ctx->builder, bfi_args[1], bfi_args[2], ""), ""), "");
1283
1284 result = LLVMBuildSelect(ctx->builder, icond, src1, result, "");
1285 return result;
1286 }
1287
1288 static LLVMValueRef emit_pack_half_2x16(struct nir_to_llvm_context *ctx,
1289 LLVMValueRef src0)
1290 {
1291 LLVMValueRef const16 = LLVMConstInt(ctx->i32, 16, false);
1292 int i;
1293 LLVMValueRef comp[2];
1294
1295 src0 = to_float(ctx, src0);
1296 comp[0] = LLVMBuildExtractElement(ctx->builder, src0, ctx->i32zero, "");
1297 comp[1] = LLVMBuildExtractElement(ctx->builder, src0, ctx->i32one, "");
1298 for (i = 0; i < 2; i++) {
1299 comp[i] = LLVMBuildFPTrunc(ctx->builder, comp[i], ctx->f16, "");
1300 comp[i] = LLVMBuildBitCast(ctx->builder, comp[i], ctx->i16, "");
1301 comp[i] = LLVMBuildZExt(ctx->builder, comp[i], ctx->i32, "");
1302 }
1303
1304 comp[1] = LLVMBuildShl(ctx->builder, comp[1], const16, "");
1305 comp[0] = LLVMBuildOr(ctx->builder, comp[0], comp[1], "");
1306
1307 return comp[0];
1308 }
1309
1310 static LLVMValueRef emit_unpack_half_2x16(struct nir_to_llvm_context *ctx,
1311 LLVMValueRef src0)
1312 {
1313 LLVMValueRef const16 = LLVMConstInt(ctx->i32, 16, false);
1314 LLVMValueRef temps[2], result, val;
1315 int i;
1316
1317 for (i = 0; i < 2; i++) {
1318 val = i == 1 ? LLVMBuildLShr(ctx->builder, src0, const16, "") : src0;
1319 val = LLVMBuildTrunc(ctx->builder, val, ctx->i16, "");
1320 val = LLVMBuildBitCast(ctx->builder, val, ctx->f16, "");
1321 temps[i] = LLVMBuildFPExt(ctx->builder, val, ctx->f32, "");
1322 }
1323
1324 result = LLVMBuildInsertElement(ctx->builder, LLVMGetUndef(ctx->v2f32), temps[0],
1325 ctx->i32zero, "");
1326 result = LLVMBuildInsertElement(ctx->builder, result, temps[1],
1327 ctx->i32one, "");
1328 return result;
1329 }
1330
1331 static LLVMValueRef emit_ddxy(struct nir_to_llvm_context *ctx,
1332 nir_op op,
1333 LLVMValueRef src0)
1334 {
1335 unsigned mask;
1336 int idx;
1337 LLVMValueRef result;
1338 ctx->has_ddxy = true;
1339
1340 if (!ctx->lds && !ctx->has_ds_bpermute)
1341 ctx->lds = LLVMAddGlobalInAddressSpace(ctx->module,
1342 LLVMArrayType(ctx->i32, 64),
1343 "ddxy_lds", LOCAL_ADDR_SPACE);
1344
1345 if (op == nir_op_fddx_fine || op == nir_op_fddx)
1346 mask = AC_TID_MASK_LEFT;
1347 else if (op == nir_op_fddy_fine || op == nir_op_fddy)
1348 mask = AC_TID_MASK_TOP;
1349 else
1350 mask = AC_TID_MASK_TOP_LEFT;
1351
1352 /* for DDX we want to next X pixel, DDY next Y pixel. */
1353 if (op == nir_op_fddx_fine ||
1354 op == nir_op_fddx_coarse ||
1355 op == nir_op_fddx)
1356 idx = 1;
1357 else
1358 idx = 2;
1359
1360 result = ac_build_ddxy(&ctx->ac, ctx->has_ds_bpermute,
1361 mask, idx, ctx->lds,
1362 src0);
1363 return result;
1364 }
1365
1366 /*
1367 * this takes an I,J coordinate pair,
1368 * and works out the X and Y derivatives.
1369 * it returns DDX(I), DDX(J), DDY(I), DDY(J).
1370 */
1371 static LLVMValueRef emit_ddxy_interp(
1372 struct nir_to_llvm_context *ctx,
1373 LLVMValueRef interp_ij)
1374 {
1375 LLVMValueRef result[4], a;
1376 unsigned i;
1377
1378 for (i = 0; i < 2; i++) {
1379 a = LLVMBuildExtractElement(ctx->builder, interp_ij,
1380 LLVMConstInt(ctx->i32, i, false), "");
1381 result[i] = emit_ddxy(ctx, nir_op_fddx, a);
1382 result[2+i] = emit_ddxy(ctx, nir_op_fddy, a);
1383 }
1384 return ac_build_gather_values(&ctx->ac, result, 4);
1385 }
1386
1387 static void visit_alu(struct nir_to_llvm_context *ctx, nir_alu_instr *instr)
1388 {
1389 LLVMValueRef src[4], result = NULL;
1390 unsigned num_components = instr->dest.dest.ssa.num_components;
1391 unsigned src_components;
1392 LLVMTypeRef def_type = get_def_type(ctx, &instr->dest.dest.ssa);
1393
1394 assert(nir_op_infos[instr->op].num_inputs <= ARRAY_SIZE(src));
1395 switch (instr->op) {
1396 case nir_op_vec2:
1397 case nir_op_vec3:
1398 case nir_op_vec4:
1399 src_components = 1;
1400 break;
1401 case nir_op_pack_half_2x16:
1402 src_components = 2;
1403 break;
1404 case nir_op_unpack_half_2x16:
1405 src_components = 1;
1406 break;
1407 default:
1408 src_components = num_components;
1409 break;
1410 }
1411 for (unsigned i = 0; i < nir_op_infos[instr->op].num_inputs; i++)
1412 src[i] = get_alu_src(ctx, instr->src[i], src_components);
1413
1414 switch (instr->op) {
1415 case nir_op_fmov:
1416 case nir_op_imov:
1417 result = src[0];
1418 break;
1419 case nir_op_fneg:
1420 src[0] = to_float(ctx, src[0]);
1421 result = LLVMBuildFNeg(ctx->builder, src[0], "");
1422 break;
1423 case nir_op_ineg:
1424 result = LLVMBuildNeg(ctx->builder, src[0], "");
1425 break;
1426 case nir_op_inot:
1427 result = LLVMBuildNot(ctx->builder, src[0], "");
1428 break;
1429 case nir_op_iadd:
1430 result = LLVMBuildAdd(ctx->builder, src[0], src[1], "");
1431 break;
1432 case nir_op_fadd:
1433 src[0] = to_float(ctx, src[0]);
1434 src[1] = to_float(ctx, src[1]);
1435 result = LLVMBuildFAdd(ctx->builder, src[0], src[1], "");
1436 break;
1437 case nir_op_fsub:
1438 src[0] = to_float(ctx, src[0]);
1439 src[1] = to_float(ctx, src[1]);
1440 result = LLVMBuildFSub(ctx->builder, src[0], src[1], "");
1441 break;
1442 case nir_op_isub:
1443 result = LLVMBuildSub(ctx->builder, src[0], src[1], "");
1444 break;
1445 case nir_op_imul:
1446 result = LLVMBuildMul(ctx->builder, src[0], src[1], "");
1447 break;
1448 case nir_op_imod:
1449 result = LLVMBuildSRem(ctx->builder, src[0], src[1], "");
1450 break;
1451 case nir_op_umod:
1452 result = LLVMBuildURem(ctx->builder, src[0], src[1], "");
1453 break;
1454 case nir_op_fmod:
1455 src[0] = to_float(ctx, src[0]);
1456 src[1] = to_float(ctx, src[1]);
1457 result = ac_build_fdiv(&ctx->ac, src[0], src[1]);
1458 result = emit_intrin_1f_param(ctx, "llvm.floor",
1459 to_float_type(ctx, def_type), result);
1460 result = LLVMBuildFMul(ctx->builder, src[1] , result, "");
1461 result = LLVMBuildFSub(ctx->builder, src[0], result, "");
1462 break;
1463 case nir_op_frem:
1464 src[0] = to_float(ctx, src[0]);
1465 src[1] = to_float(ctx, src[1]);
1466 result = LLVMBuildFRem(ctx->builder, src[0], src[1], "");
1467 break;
1468 case nir_op_irem:
1469 result = LLVMBuildSRem(ctx->builder, src[0], src[1], "");
1470 break;
1471 case nir_op_idiv:
1472 result = LLVMBuildSDiv(ctx->builder, src[0], src[1], "");
1473 break;
1474 case nir_op_udiv:
1475 result = LLVMBuildUDiv(ctx->builder, src[0], src[1], "");
1476 break;
1477 case nir_op_fmul:
1478 src[0] = to_float(ctx, src[0]);
1479 src[1] = to_float(ctx, src[1]);
1480 result = LLVMBuildFMul(ctx->builder, src[0], src[1], "");
1481 break;
1482 case nir_op_fdiv:
1483 src[0] = to_float(ctx, src[0]);
1484 src[1] = to_float(ctx, src[1]);
1485 result = ac_build_fdiv(&ctx->ac, src[0], src[1]);
1486 break;
1487 case nir_op_frcp:
1488 src[0] = to_float(ctx, src[0]);
1489 result = ac_build_fdiv(&ctx->ac, ctx->f32one, src[0]);
1490 break;
1491 case nir_op_iand:
1492 result = LLVMBuildAnd(ctx->builder, src[0], src[1], "");
1493 break;
1494 case nir_op_ior:
1495 result = LLVMBuildOr(ctx->builder, src[0], src[1], "");
1496 break;
1497 case nir_op_ixor:
1498 result = LLVMBuildXor(ctx->builder, src[0], src[1], "");
1499 break;
1500 case nir_op_ishl:
1501 result = LLVMBuildShl(ctx->builder, src[0], src[1], "");
1502 break;
1503 case nir_op_ishr:
1504 result = LLVMBuildAShr(ctx->builder, src[0], src[1], "");
1505 break;
1506 case nir_op_ushr:
1507 result = LLVMBuildLShr(ctx->builder, src[0], src[1], "");
1508 break;
1509 case nir_op_ilt:
1510 result = emit_int_cmp(ctx, LLVMIntSLT, src[0], src[1]);
1511 break;
1512 case nir_op_ine:
1513 result = emit_int_cmp(ctx, LLVMIntNE, src[0], src[1]);
1514 break;
1515 case nir_op_ieq:
1516 result = emit_int_cmp(ctx, LLVMIntEQ, src[0], src[1]);
1517 break;
1518 case nir_op_ige:
1519 result = emit_int_cmp(ctx, LLVMIntSGE, src[0], src[1]);
1520 break;
1521 case nir_op_ult:
1522 result = emit_int_cmp(ctx, LLVMIntULT, src[0], src[1]);
1523 break;
1524 case nir_op_uge:
1525 result = emit_int_cmp(ctx, LLVMIntUGE, src[0], src[1]);
1526 break;
1527 case nir_op_feq:
1528 result = emit_float_cmp(ctx, LLVMRealUEQ, src[0], src[1]);
1529 break;
1530 case nir_op_fne:
1531 result = emit_float_cmp(ctx, LLVMRealUNE, src[0], src[1]);
1532 break;
1533 case nir_op_flt:
1534 result = emit_float_cmp(ctx, LLVMRealULT, src[0], src[1]);
1535 break;
1536 case nir_op_fge:
1537 result = emit_float_cmp(ctx, LLVMRealUGE, src[0], src[1]);
1538 break;
1539 case nir_op_fabs:
1540 result = emit_intrin_1f_param(ctx, "llvm.fabs",
1541 to_float_type(ctx, def_type), src[0]);
1542 break;
1543 case nir_op_iabs:
1544 result = emit_iabs(ctx, src[0]);
1545 break;
1546 case nir_op_imax:
1547 result = emit_minmax_int(ctx, LLVMIntSGT, src[0], src[1]);
1548 break;
1549 case nir_op_imin:
1550 result = emit_minmax_int(ctx, LLVMIntSLT, src[0], src[1]);
1551 break;
1552 case nir_op_umax:
1553 result = emit_minmax_int(ctx, LLVMIntUGT, src[0], src[1]);
1554 break;
1555 case nir_op_umin:
1556 result = emit_minmax_int(ctx, LLVMIntULT, src[0], src[1]);
1557 break;
1558 case nir_op_isign:
1559 result = emit_isign(ctx, src[0]);
1560 break;
1561 case nir_op_fsign:
1562 src[0] = to_float(ctx, src[0]);
1563 result = emit_fsign(ctx, src[0]);
1564 break;
1565 case nir_op_ffloor:
1566 result = emit_intrin_1f_param(ctx, "llvm.floor",
1567 to_float_type(ctx, def_type), src[0]);
1568 break;
1569 case nir_op_ftrunc:
1570 result = emit_intrin_1f_param(ctx, "llvm.trunc",
1571 to_float_type(ctx, def_type), src[0]);
1572 break;
1573 case nir_op_fceil:
1574 result = emit_intrin_1f_param(ctx, "llvm.ceil",
1575 to_float_type(ctx, def_type), src[0]);
1576 break;
1577 case nir_op_fround_even:
1578 result = emit_intrin_1f_param(ctx, "llvm.rint",
1579 to_float_type(ctx, def_type),src[0]);
1580 break;
1581 case nir_op_ffract:
1582 result = emit_ffract(ctx, src[0]);
1583 break;
1584 case nir_op_fsin:
1585 result = emit_intrin_1f_param(ctx, "llvm.sin",
1586 to_float_type(ctx, def_type), src[0]);
1587 break;
1588 case nir_op_fcos:
1589 result = emit_intrin_1f_param(ctx, "llvm.cos",
1590 to_float_type(ctx, def_type), src[0]);
1591 break;
1592 case nir_op_fsqrt:
1593 result = emit_intrin_1f_param(ctx, "llvm.sqrt",
1594 to_float_type(ctx, def_type), src[0]);
1595 break;
1596 case nir_op_fexp2:
1597 result = emit_intrin_1f_param(ctx, "llvm.exp2",
1598 to_float_type(ctx, def_type), src[0]);
1599 break;
1600 case nir_op_flog2:
1601 result = emit_intrin_1f_param(ctx, "llvm.log2",
1602 to_float_type(ctx, def_type), src[0]);
1603 break;
1604 case nir_op_frsq:
1605 result = emit_intrin_1f_param(ctx, "llvm.sqrt",
1606 to_float_type(ctx, def_type), src[0]);
1607 result = ac_build_fdiv(&ctx->ac, ctx->f32one, result);
1608 break;
1609 case nir_op_fpow:
1610 result = emit_intrin_2f_param(ctx, "llvm.pow",
1611 to_float_type(ctx, def_type), src[0], src[1]);
1612 break;
1613 case nir_op_fmax:
1614 result = emit_intrin_2f_param(ctx, "llvm.maxnum",
1615 to_float_type(ctx, def_type), src[0], src[1]);
1616 break;
1617 case nir_op_fmin:
1618 result = emit_intrin_2f_param(ctx, "llvm.minnum",
1619 to_float_type(ctx, def_type), src[0], src[1]);
1620 break;
1621 case nir_op_ffma:
1622 result = emit_intrin_3f_param(ctx, "llvm.fma",
1623 to_float_type(ctx, def_type), src[0], src[1], src[2]);
1624 break;
1625 case nir_op_ibitfield_extract:
1626 result = emit_bitfield_extract(ctx, true, src);
1627 break;
1628 case nir_op_ubitfield_extract:
1629 result = emit_bitfield_extract(ctx, false, src);
1630 break;
1631 case nir_op_bitfield_insert:
1632 result = emit_bitfield_insert(ctx, src[0], src[1], src[2], src[3]);
1633 break;
1634 case nir_op_bitfield_reverse:
1635 result = ac_build_intrinsic(&ctx->ac, "llvm.bitreverse.i32", ctx->i32, src, 1, AC_FUNC_ATTR_READNONE);
1636 break;
1637 case nir_op_bit_count:
1638 result = ac_build_intrinsic(&ctx->ac, "llvm.ctpop.i32", ctx->i32, src, 1, AC_FUNC_ATTR_READNONE);
1639 break;
1640 case nir_op_vec2:
1641 case nir_op_vec3:
1642 case nir_op_vec4:
1643 for (unsigned i = 0; i < nir_op_infos[instr->op].num_inputs; i++)
1644 src[i] = to_integer(ctx, src[i]);
1645 result = ac_build_gather_values(&ctx->ac, src, num_components);
1646 break;
1647 case nir_op_f2i32:
1648 case nir_op_f2i64:
1649 src[0] = to_float(ctx, src[0]);
1650 result = LLVMBuildFPToSI(ctx->builder, src[0], def_type, "");
1651 break;
1652 case nir_op_f2u32:
1653 case nir_op_f2u64:
1654 src[0] = to_float(ctx, src[0]);
1655 result = LLVMBuildFPToUI(ctx->builder, src[0], def_type, "");
1656 break;
1657 case nir_op_i2f32:
1658 case nir_op_i2f64:
1659 result = LLVMBuildSIToFP(ctx->builder, src[0], to_float_type(ctx, def_type), "");
1660 break;
1661 case nir_op_u2f32:
1662 case nir_op_u2f64:
1663 result = LLVMBuildUIToFP(ctx->builder, src[0], to_float_type(ctx, def_type), "");
1664 break;
1665 case nir_op_f2f64:
1666 result = LLVMBuildFPExt(ctx->builder, src[0], to_float_type(ctx, def_type), "");
1667 break;
1668 case nir_op_f2f32:
1669 result = LLVMBuildFPTrunc(ctx->builder, src[0], to_float_type(ctx, def_type), "");
1670 break;
1671 case nir_op_u2u32:
1672 case nir_op_u2u64:
1673 if (get_elem_bits(ctx, LLVMTypeOf(src[0])) < get_elem_bits(ctx, def_type))
1674 result = LLVMBuildZExt(ctx->builder, src[0], def_type, "");
1675 else
1676 result = LLVMBuildTrunc(ctx->builder, src[0], def_type, "");
1677 break;
1678 case nir_op_i2i32:
1679 case nir_op_i2i64:
1680 if (get_elem_bits(ctx, LLVMTypeOf(src[0])) < get_elem_bits(ctx, def_type))
1681 result = LLVMBuildSExt(ctx->builder, src[0], def_type, "");
1682 else
1683 result = LLVMBuildTrunc(ctx->builder, src[0], def_type, "");
1684 break;
1685 case nir_op_bcsel:
1686 result = emit_bcsel(ctx, src[0], src[1], src[2]);
1687 break;
1688 case nir_op_find_lsb:
1689 result = emit_find_lsb(ctx, src[0]);
1690 break;
1691 case nir_op_ufind_msb:
1692 result = emit_ufind_msb(ctx, src[0]);
1693 break;
1694 case nir_op_ifind_msb:
1695 result = emit_ifind_msb(ctx, src[0]);
1696 break;
1697 case nir_op_uadd_carry:
1698 result = emit_uint_carry(ctx, "llvm.uadd.with.overflow.i32", src[0], src[1]);
1699 break;
1700 case nir_op_usub_borrow:
1701 result = emit_uint_carry(ctx, "llvm.usub.with.overflow.i32", src[0], src[1]);
1702 break;
1703 case nir_op_b2f:
1704 result = emit_b2f(ctx, src[0]);
1705 break;
1706 case nir_op_fquantize2f16:
1707 src[0] = to_float(ctx, src[0]);
1708 result = LLVMBuildFPTrunc(ctx->builder, src[0], ctx->f16, "");
1709 /* need to convert back up to f32 */
1710 result = LLVMBuildFPExt(ctx->builder, result, ctx->f32, "");
1711 break;
1712 case nir_op_umul_high:
1713 result = emit_umul_high(ctx, src[0], src[1]);
1714 break;
1715 case nir_op_imul_high:
1716 result = emit_imul_high(ctx, src[0], src[1]);
1717 break;
1718 case nir_op_pack_half_2x16:
1719 result = emit_pack_half_2x16(ctx, src[0]);
1720 break;
1721 case nir_op_unpack_half_2x16:
1722 result = emit_unpack_half_2x16(ctx, src[0]);
1723 break;
1724 case nir_op_fddx:
1725 case nir_op_fddy:
1726 case nir_op_fddx_fine:
1727 case nir_op_fddy_fine:
1728 case nir_op_fddx_coarse:
1729 case nir_op_fddy_coarse:
1730 result = emit_ddxy(ctx, instr->op, src[0]);
1731 break;
1732 default:
1733 fprintf(stderr, "Unknown NIR alu instr: ");
1734 nir_print_instr(&instr->instr, stderr);
1735 fprintf(stderr, "\n");
1736 abort();
1737 }
1738
1739 if (result) {
1740 assert(instr->dest.dest.is_ssa);
1741 result = to_integer(ctx, result);
1742 _mesa_hash_table_insert(ctx->defs, &instr->dest.dest.ssa,
1743 result);
1744 }
1745 }
1746
1747 static void visit_load_const(struct nir_to_llvm_context *ctx,
1748 nir_load_const_instr *instr)
1749 {
1750 LLVMValueRef values[4], value = NULL;
1751 LLVMTypeRef element_type =
1752 LLVMIntTypeInContext(ctx->context, instr->def.bit_size);
1753
1754 for (unsigned i = 0; i < instr->def.num_components; ++i) {
1755 switch (instr->def.bit_size) {
1756 case 32:
1757 values[i] = LLVMConstInt(element_type,
1758 instr->value.u32[i], false);
1759 break;
1760 case 64:
1761 values[i] = LLVMConstInt(element_type,
1762 instr->value.u64[i], false);
1763 break;
1764 default:
1765 fprintf(stderr,
1766 "unsupported nir load_const bit_size: %d\n",
1767 instr->def.bit_size);
1768 abort();
1769 }
1770 }
1771 if (instr->def.num_components > 1) {
1772 value = LLVMConstVector(values, instr->def.num_components);
1773 } else
1774 value = values[0];
1775
1776 _mesa_hash_table_insert(ctx->defs, &instr->def, value);
1777 }
1778
1779 static LLVMValueRef cast_ptr(struct nir_to_llvm_context *ctx, LLVMValueRef ptr,
1780 LLVMTypeRef type)
1781 {
1782 int addr_space = LLVMGetPointerAddressSpace(LLVMTypeOf(ptr));
1783 return LLVMBuildBitCast(ctx->builder, ptr,
1784 LLVMPointerType(type, addr_space), "");
1785 }
1786
1787 static LLVMValueRef
1788 get_buffer_size(struct nir_to_llvm_context *ctx, LLVMValueRef descriptor, bool in_elements)
1789 {
1790 LLVMValueRef size =
1791 LLVMBuildExtractElement(ctx->builder, descriptor,
1792 LLVMConstInt(ctx->i32, 2, false), "");
1793
1794 /* VI only */
1795 if (ctx->options->chip_class >= VI && in_elements) {
1796 /* On VI, the descriptor contains the size in bytes,
1797 * but TXQ must return the size in elements.
1798 * The stride is always non-zero for resources using TXQ.
1799 */
1800 LLVMValueRef stride =
1801 LLVMBuildExtractElement(ctx->builder, descriptor,
1802 LLVMConstInt(ctx->i32, 1, false), "");
1803 stride = LLVMBuildLShr(ctx->builder, stride,
1804 LLVMConstInt(ctx->i32, 16, false), "");
1805 stride = LLVMBuildAnd(ctx->builder, stride,
1806 LLVMConstInt(ctx->i32, 0x3fff, false), "");
1807
1808 size = LLVMBuildUDiv(ctx->builder, size, stride, "");
1809 }
1810 return size;
1811 }
1812
1813 /**
1814 * Given the i32 or vNi32 \p type, generate the textual name (e.g. for use with
1815 * intrinsic names).
1816 */
1817 static void build_int_type_name(
1818 LLVMTypeRef type,
1819 char *buf, unsigned bufsize)
1820 {
1821 assert(bufsize >= 6);
1822
1823 if (LLVMGetTypeKind(type) == LLVMVectorTypeKind)
1824 snprintf(buf, bufsize, "v%ui32",
1825 LLVMGetVectorSize(type));
1826 else
1827 strcpy(buf, "i32");
1828 }
1829
1830 static LLVMValueRef radv_lower_gather4_integer(struct nir_to_llvm_context *ctx,
1831 struct ac_image_args *args,
1832 nir_tex_instr *instr)
1833 {
1834 enum glsl_base_type stype = glsl_get_sampler_result_type(instr->texture->var->type);
1835 LLVMValueRef coord = args->addr;
1836 LLVMValueRef half_texel[2];
1837 LLVMValueRef compare_cube_wa;
1838 LLVMValueRef result;
1839 int c;
1840 unsigned coord_vgpr_index = (unsigned)args->offset + (unsigned)args->compare;
1841
1842 //TODO Rect
1843 {
1844 struct ac_image_args txq_args = { 0 };
1845
1846 txq_args.da = instr->is_array || instr->sampler_dim == GLSL_SAMPLER_DIM_CUBE;
1847 txq_args.opcode = ac_image_get_resinfo;
1848 txq_args.dmask = 0xf;
1849 txq_args.addr = ctx->i32zero;
1850 txq_args.resource = args->resource;
1851 LLVMValueRef size = ac_build_image_opcode(&ctx->ac, &txq_args);
1852
1853 for (c = 0; c < 2; c++) {
1854 half_texel[c] = LLVMBuildExtractElement(ctx->builder, size,
1855 LLVMConstInt(ctx->i32, c, false), "");
1856 half_texel[c] = LLVMBuildUIToFP(ctx->builder, half_texel[c], ctx->f32, "");
1857 half_texel[c] = ac_build_fdiv(&ctx->ac, ctx->f32one, half_texel[c]);
1858 half_texel[c] = LLVMBuildFMul(ctx->builder, half_texel[c],
1859 LLVMConstReal(ctx->f32, -0.5), "");
1860 }
1861 }
1862
1863 LLVMValueRef orig_coords = args->addr;
1864
1865 for (c = 0; c < 2; c++) {
1866 LLVMValueRef tmp;
1867 LLVMValueRef index = LLVMConstInt(ctx->i32, coord_vgpr_index + c, 0);
1868 tmp = LLVMBuildExtractElement(ctx->builder, coord, index, "");
1869 tmp = LLVMBuildBitCast(ctx->builder, tmp, ctx->f32, "");
1870 tmp = LLVMBuildFAdd(ctx->builder, tmp, half_texel[c], "");
1871 tmp = LLVMBuildBitCast(ctx->builder, tmp, ctx->i32, "");
1872 coord = LLVMBuildInsertElement(ctx->builder, coord, tmp, index, "");
1873 }
1874
1875
1876 /*
1877 * Apparantly cube has issue with integer types that the workaround doesn't solve,
1878 * so this tests if the format is 8_8_8_8 and an integer type do an alternate
1879 * workaround by sampling using a scaled type and converting.
1880 * This is taken from amdgpu-pro shaders.
1881 */
1882 /* NOTE this produces some ugly code compared to amdgpu-pro,
1883 * LLVM ends up dumping SGPRs into VGPRs to deal with the compare/select,
1884 * and then reads them back. -pro generates two selects,
1885 * one s_cmp for the descriptor rewriting
1886 * one v_cmp for the coordinate and result changes.
1887 */
1888 if (instr->sampler_dim == GLSL_SAMPLER_DIM_CUBE) {
1889 LLVMValueRef tmp, tmp2;
1890
1891 /* workaround 8/8/8/8 uint/sint cube gather bug */
1892 /* first detect it then change to a scaled read and f2i */
1893 tmp = LLVMBuildExtractElement(ctx->builder, args->resource, ctx->i32one, "");
1894 tmp2 = tmp;
1895
1896 /* extract the DATA_FORMAT */
1897 tmp = ac_build_bfe(&ctx->ac, tmp, LLVMConstInt(ctx->i32, 20, false),
1898 LLVMConstInt(ctx->i32, 6, false), false);
1899
1900 /* is the DATA_FORMAT == 8_8_8_8 */
1901 compare_cube_wa = LLVMBuildICmp(ctx->builder, LLVMIntEQ, tmp, LLVMConstInt(ctx->i32, V_008F14_IMG_DATA_FORMAT_8_8_8_8, false), "");
1902
1903 if (stype == GLSL_TYPE_UINT)
1904 /* Create a NUM FORMAT - 0x2 or 0x4 - USCALED or UINT */
1905 tmp = LLVMBuildSelect(ctx->builder, compare_cube_wa, LLVMConstInt(ctx->i32, 0x8000000, false),
1906 LLVMConstInt(ctx->i32, 0x10000000, false), "");
1907 else
1908 /* Create a NUM FORMAT - 0x3 or 0x5 - SSCALED or SINT */
1909 tmp = LLVMBuildSelect(ctx->builder, compare_cube_wa, LLVMConstInt(ctx->i32, 0xc000000, false),
1910 LLVMConstInt(ctx->i32, 0x14000000, false), "");
1911
1912 /* replace the NUM FORMAT in the descriptor */
1913 tmp2 = LLVMBuildAnd(ctx->builder, tmp2, LLVMConstInt(ctx->i32, C_008F14_NUM_FORMAT_GFX6, false), "");
1914 tmp2 = LLVMBuildOr(ctx->builder, tmp2, tmp, "");
1915
1916 args->resource = LLVMBuildInsertElement(ctx->builder, args->resource, tmp2, ctx->i32one, "");
1917
1918 /* don't modify the coordinates for this case */
1919 coord = LLVMBuildSelect(ctx->builder, compare_cube_wa, orig_coords, coord, "");
1920 }
1921 args->addr = coord;
1922 result = ac_build_image_opcode(&ctx->ac, args);
1923
1924 if (instr->sampler_dim == GLSL_SAMPLER_DIM_CUBE) {
1925 LLVMValueRef tmp, tmp2;
1926
1927 /* if the cube workaround is in place, f2i the result. */
1928 for (c = 0; c < 4; c++) {
1929 tmp = LLVMBuildExtractElement(ctx->builder, result, LLVMConstInt(ctx->i32, c, false), "");
1930 if (stype == GLSL_TYPE_UINT)
1931 tmp2 = LLVMBuildFPToUI(ctx->builder, tmp, ctx->i32, "");
1932 else
1933 tmp2 = LLVMBuildFPToSI(ctx->builder, tmp, ctx->i32, "");
1934 tmp = LLVMBuildBitCast(ctx->builder, tmp, ctx->i32, "");
1935 tmp2 = LLVMBuildBitCast(ctx->builder, tmp2, ctx->i32, "");
1936 tmp = LLVMBuildSelect(ctx->builder, compare_cube_wa, tmp2, tmp, "");
1937 tmp = LLVMBuildBitCast(ctx->builder, tmp, ctx->f32, "");
1938 result = LLVMBuildInsertElement(ctx->builder, result, tmp, LLVMConstInt(ctx->i32, c, false), "");
1939 }
1940 }
1941 return result;
1942 }
1943
1944 static LLVMValueRef build_tex_intrinsic(struct nir_to_llvm_context *ctx,
1945 nir_tex_instr *instr,
1946 struct ac_image_args *args)
1947 {
1948 if (instr->sampler_dim == GLSL_SAMPLER_DIM_BUF) {
1949 return ac_build_buffer_load_format(&ctx->ac,
1950 args->resource,
1951 args->addr,
1952 LLVMConstInt(ctx->i32, 0, false),
1953 true);
1954 }
1955
1956 args->opcode = ac_image_sample;
1957 args->compare = instr->is_shadow;
1958
1959 switch (instr->op) {
1960 case nir_texop_txf:
1961 case nir_texop_txf_ms:
1962 case nir_texop_samples_identical:
1963 args->opcode = instr->sampler_dim == GLSL_SAMPLER_DIM_MS ? ac_image_load : ac_image_load_mip;
1964 args->compare = false;
1965 args->offset = false;
1966 break;
1967 case nir_texop_txb:
1968 args->bias = true;
1969 break;
1970 case nir_texop_txl:
1971 args->lod = true;
1972 break;
1973 case nir_texop_txs:
1974 case nir_texop_query_levels:
1975 args->opcode = ac_image_get_resinfo;
1976 break;
1977 case nir_texop_tex:
1978 if (ctx->stage != MESA_SHADER_FRAGMENT)
1979 args->level_zero = true;
1980 break;
1981 case nir_texop_txd:
1982 args->deriv = true;
1983 break;
1984 case nir_texop_tg4:
1985 args->opcode = ac_image_gather4;
1986 args->level_zero = true;
1987 break;
1988 case nir_texop_lod:
1989 args->opcode = ac_image_get_lod;
1990 args->compare = false;
1991 args->offset = false;
1992 break;
1993 default:
1994 break;
1995 }
1996
1997 if (instr->op == nir_texop_tg4) {
1998 enum glsl_base_type stype = glsl_get_sampler_result_type(instr->texture->var->type);
1999 if (stype == GLSL_TYPE_UINT || stype == GLSL_TYPE_INT) {
2000 return radv_lower_gather4_integer(ctx, args, instr);
2001 }
2002 }
2003 return ac_build_image_opcode(&ctx->ac, args);
2004 }
2005
2006 static LLVMValueRef visit_vulkan_resource_index(struct nir_to_llvm_context *ctx,
2007 nir_intrinsic_instr *instr)
2008 {
2009 LLVMValueRef index = get_src(ctx, instr->src[0]);
2010 unsigned desc_set = nir_intrinsic_desc_set(instr);
2011 unsigned binding = nir_intrinsic_binding(instr);
2012 LLVMValueRef desc_ptr = ctx->descriptor_sets[desc_set];
2013 struct radv_pipeline_layout *pipeline_layout = ctx->options->layout;
2014 struct radv_descriptor_set_layout *layout = pipeline_layout->set[desc_set].layout;
2015 unsigned base_offset = layout->binding[binding].offset;
2016 LLVMValueRef offset, stride;
2017
2018 if (layout->binding[binding].type == VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER_DYNAMIC ||
2019 layout->binding[binding].type == VK_DESCRIPTOR_TYPE_STORAGE_BUFFER_DYNAMIC) {
2020 unsigned idx = pipeline_layout->set[desc_set].dynamic_offset_start +
2021 layout->binding[binding].dynamic_offset_offset;
2022 desc_ptr = ctx->push_constants;
2023 base_offset = pipeline_layout->push_constant_size + 16 * idx;
2024 stride = LLVMConstInt(ctx->i32, 16, false);
2025 } else
2026 stride = LLVMConstInt(ctx->i32, layout->binding[binding].size, false);
2027
2028 offset = LLVMConstInt(ctx->i32, base_offset, false);
2029 index = LLVMBuildMul(ctx->builder, index, stride, "");
2030 offset = LLVMBuildAdd(ctx->builder, offset, index, "");
2031
2032 desc_ptr = ac_build_gep0(&ctx->ac, desc_ptr, offset);
2033 desc_ptr = cast_ptr(ctx, desc_ptr, ctx->v4i32);
2034 LLVMSetMetadata(desc_ptr, ctx->uniform_md_kind, ctx->empty_md);
2035
2036 return LLVMBuildLoad(ctx->builder, desc_ptr, "");
2037 }
2038
2039 static LLVMValueRef visit_load_push_constant(struct nir_to_llvm_context *ctx,
2040 nir_intrinsic_instr *instr)
2041 {
2042 LLVMValueRef ptr, addr;
2043
2044 addr = LLVMConstInt(ctx->i32, nir_intrinsic_base(instr), 0);
2045 addr = LLVMBuildAdd(ctx->builder, addr, get_src(ctx, instr->src[0]), "");
2046
2047 ptr = ac_build_gep0(&ctx->ac, ctx->push_constants, addr);
2048 ptr = cast_ptr(ctx, ptr, get_def_type(ctx, &instr->dest.ssa));
2049
2050 return LLVMBuildLoad(ctx->builder, ptr, "");
2051 }
2052
2053 static LLVMValueRef visit_get_buffer_size(struct nir_to_llvm_context *ctx,
2054 nir_intrinsic_instr *instr)
2055 {
2056 LLVMValueRef desc = get_src(ctx, instr->src[0]);
2057
2058 return get_buffer_size(ctx, desc, false);
2059 }
2060 static void visit_store_ssbo(struct nir_to_llvm_context *ctx,
2061 nir_intrinsic_instr *instr)
2062 {
2063 const char *store_name;
2064 LLVMValueRef src_data = get_src(ctx, instr->src[0]);
2065 LLVMTypeRef data_type = ctx->f32;
2066 int elem_size_mult = get_elem_bits(ctx, LLVMTypeOf(src_data)) / 32;
2067 int components_32bit = elem_size_mult * instr->num_components;
2068 unsigned writemask = nir_intrinsic_write_mask(instr);
2069 LLVMValueRef base_data, base_offset;
2070 LLVMValueRef params[6];
2071
2072 if (ctx->stage == MESA_SHADER_FRAGMENT)
2073 ctx->shader_info->fs.writes_memory = true;
2074
2075 params[1] = get_src(ctx, instr->src[1]);
2076 params[2] = LLVMConstInt(ctx->i32, 0, false); /* vindex */
2077 params[4] = ctx->i1false; /* glc */
2078 params[5] = ctx->i1false; /* slc */
2079
2080 if (components_32bit > 1)
2081 data_type = LLVMVectorType(ctx->f32, components_32bit);
2082
2083 base_data = to_float(ctx, src_data);
2084 base_data = trim_vector(ctx, base_data, instr->num_components);
2085 base_data = LLVMBuildBitCast(ctx->builder, base_data,
2086 data_type, "");
2087 base_offset = get_src(ctx, instr->src[2]); /* voffset */
2088 while (writemask) {
2089 int start, count;
2090 LLVMValueRef data;
2091 LLVMValueRef offset;
2092 LLVMValueRef tmp;
2093 u_bit_scan_consecutive_range(&writemask, &start, &count);
2094
2095 /* Due to an LLVM limitation, split 3-element writes
2096 * into a 2-element and a 1-element write. */
2097 if (count == 3) {
2098 writemask |= 1 << (start + 2);
2099 count = 2;
2100 }
2101
2102 start *= elem_size_mult;
2103 count *= elem_size_mult;
2104
2105 if (count > 4) {
2106 writemask |= ((1u << (count - 4)) - 1u) << (start + 4);
2107 count = 4;
2108 }
2109
2110 if (count == 4) {
2111 store_name = "llvm.amdgcn.buffer.store.v4f32";
2112 data = base_data;
2113 } else if (count == 2) {
2114 tmp = LLVMBuildExtractElement(ctx->builder,
2115 base_data, LLVMConstInt(ctx->i32, start, false), "");
2116 data = LLVMBuildInsertElement(ctx->builder, LLVMGetUndef(ctx->v2f32), tmp,
2117 ctx->i32zero, "");
2118
2119 tmp = LLVMBuildExtractElement(ctx->builder,
2120 base_data, LLVMConstInt(ctx->i32, start + 1, false), "");
2121 data = LLVMBuildInsertElement(ctx->builder, data, tmp,
2122 ctx->i32one, "");
2123 store_name = "llvm.amdgcn.buffer.store.v2f32";
2124
2125 } else {
2126 assert(count == 1);
2127 if (get_llvm_num_components(base_data) > 1)
2128 data = LLVMBuildExtractElement(ctx->builder, base_data,
2129 LLVMConstInt(ctx->i32, start, false), "");
2130 else
2131 data = base_data;
2132 store_name = "llvm.amdgcn.buffer.store.f32";
2133 }
2134
2135 offset = base_offset;
2136 if (start != 0) {
2137 offset = LLVMBuildAdd(ctx->builder, offset, LLVMConstInt(ctx->i32, start * 4, false), "");
2138 }
2139 params[0] = data;
2140 params[3] = offset;
2141 ac_build_intrinsic(&ctx->ac, store_name,
2142 ctx->voidt, params, 6, 0);
2143 }
2144 }
2145
2146 static LLVMValueRef visit_atomic_ssbo(struct nir_to_llvm_context *ctx,
2147 nir_intrinsic_instr *instr)
2148 {
2149 const char *name;
2150 LLVMValueRef params[6];
2151 int arg_count = 0;
2152 if (ctx->stage == MESA_SHADER_FRAGMENT)
2153 ctx->shader_info->fs.writes_memory = true;
2154
2155 if (instr->intrinsic == nir_intrinsic_ssbo_atomic_comp_swap) {
2156 params[arg_count++] = llvm_extract_elem(ctx, get_src(ctx, instr->src[3]), 0);
2157 }
2158 params[arg_count++] = llvm_extract_elem(ctx, get_src(ctx, instr->src[2]), 0);
2159 params[arg_count++] = get_src(ctx, instr->src[0]);
2160 params[arg_count++] = LLVMConstInt(ctx->i32, 0, false); /* vindex */
2161 params[arg_count++] = get_src(ctx, instr->src[1]); /* voffset */
2162 params[arg_count++] = ctx->i1false; /* slc */
2163
2164 switch (instr->intrinsic) {
2165 case nir_intrinsic_ssbo_atomic_add:
2166 name = "llvm.amdgcn.buffer.atomic.add";
2167 break;
2168 case nir_intrinsic_ssbo_atomic_imin:
2169 name = "llvm.amdgcn.buffer.atomic.smin";
2170 break;
2171 case nir_intrinsic_ssbo_atomic_umin:
2172 name = "llvm.amdgcn.buffer.atomic.umin";
2173 break;
2174 case nir_intrinsic_ssbo_atomic_imax:
2175 name = "llvm.amdgcn.buffer.atomic.smax";
2176 break;
2177 case nir_intrinsic_ssbo_atomic_umax:
2178 name = "llvm.amdgcn.buffer.atomic.umax";
2179 break;
2180 case nir_intrinsic_ssbo_atomic_and:
2181 name = "llvm.amdgcn.buffer.atomic.and";
2182 break;
2183 case nir_intrinsic_ssbo_atomic_or:
2184 name = "llvm.amdgcn.buffer.atomic.or";
2185 break;
2186 case nir_intrinsic_ssbo_atomic_xor:
2187 name = "llvm.amdgcn.buffer.atomic.xor";
2188 break;
2189 case nir_intrinsic_ssbo_atomic_exchange:
2190 name = "llvm.amdgcn.buffer.atomic.swap";
2191 break;
2192 case nir_intrinsic_ssbo_atomic_comp_swap:
2193 name = "llvm.amdgcn.buffer.atomic.cmpswap";
2194 break;
2195 default:
2196 abort();
2197 }
2198
2199 return ac_build_intrinsic(&ctx->ac, name, ctx->i32, params, arg_count, 0);
2200 }
2201
2202 static LLVMValueRef visit_load_buffer(struct nir_to_llvm_context *ctx,
2203 nir_intrinsic_instr *instr)
2204 {
2205 LLVMValueRef results[2];
2206 int load_components;
2207 int num_components = instr->num_components;
2208 if (instr->dest.ssa.bit_size == 64)
2209 num_components *= 2;
2210
2211 for (int i = 0; i < num_components; i += load_components) {
2212 load_components = MIN2(num_components - i, 4);
2213 const char *load_name;
2214 LLVMTypeRef data_type = ctx->f32;
2215 LLVMValueRef offset = LLVMConstInt(ctx->i32, i * 4, false);
2216 offset = LLVMBuildAdd(ctx->builder, get_src(ctx, instr->src[1]), offset, "");
2217
2218 if (load_components == 3)
2219 data_type = LLVMVectorType(ctx->f32, 4);
2220 else if (load_components > 1)
2221 data_type = LLVMVectorType(ctx->f32, load_components);
2222
2223 if (load_components >= 3)
2224 load_name = "llvm.amdgcn.buffer.load.v4f32";
2225 else if (load_components == 2)
2226 load_name = "llvm.amdgcn.buffer.load.v2f32";
2227 else if (load_components == 1)
2228 load_name = "llvm.amdgcn.buffer.load.f32";
2229 else
2230 unreachable("unhandled number of components");
2231
2232 LLVMValueRef params[] = {
2233 get_src(ctx, instr->src[0]),
2234 LLVMConstInt(ctx->i32, 0, false),
2235 offset,
2236 ctx->i1false,
2237 ctx->i1false,
2238 };
2239
2240 results[i] = ac_build_intrinsic(&ctx->ac, load_name, data_type, params, 5, 0);
2241
2242 }
2243
2244 LLVMValueRef ret = results[0];
2245 if (num_components > 4 || num_components == 3) {
2246 LLVMValueRef masks[] = {
2247 LLVMConstInt(ctx->i32, 0, false), LLVMConstInt(ctx->i32, 1, false),
2248 LLVMConstInt(ctx->i32, 2, false), LLVMConstInt(ctx->i32, 3, false),
2249 LLVMConstInt(ctx->i32, 4, false), LLVMConstInt(ctx->i32, 5, false),
2250 LLVMConstInt(ctx->i32, 6, false), LLVMConstInt(ctx->i32, 7, false)
2251 };
2252
2253 LLVMValueRef swizzle = LLVMConstVector(masks, num_components);
2254 ret = LLVMBuildShuffleVector(ctx->builder, results[0],
2255 results[num_components > 4 ? 1 : 0], swizzle, "");
2256 }
2257
2258 return LLVMBuildBitCast(ctx->builder, ret,
2259 get_def_type(ctx, &instr->dest.ssa), "");
2260 }
2261
2262 static LLVMValueRef visit_load_ubo_buffer(struct nir_to_llvm_context *ctx,
2263 nir_intrinsic_instr *instr)
2264 {
2265 LLVMValueRef results[8], ret;
2266 LLVMValueRef rsrc = get_src(ctx, instr->src[0]);
2267 LLVMValueRef offset = get_src(ctx, instr->src[1]);
2268 int num_components = instr->num_components;
2269
2270 rsrc = LLVMBuildBitCast(ctx->builder, rsrc, LLVMVectorType(ctx->i8, 16), "");
2271
2272 if (instr->dest.ssa.bit_size == 64)
2273 num_components *= 2;
2274
2275 for (unsigned i = 0; i < num_components; ++i) {
2276 LLVMValueRef params[] = {
2277 rsrc,
2278 LLVMBuildAdd(ctx->builder, LLVMConstInt(ctx->i32, 4 * i, 0),
2279 offset, "")
2280 };
2281 results[i] = ac_build_intrinsic(&ctx->ac, "llvm.SI.load.const", ctx->f32,
2282 params, 2,
2283 AC_FUNC_ATTR_READNONE |
2284 AC_FUNC_ATTR_LEGACY);
2285 }
2286
2287
2288 ret = ac_build_gather_values(&ctx->ac, results, instr->num_components);
2289 return LLVMBuildBitCast(ctx->builder, ret,
2290 get_def_type(ctx, &instr->dest.ssa), "");
2291 }
2292
2293 static void
2294 radv_get_deref_offset(struct nir_to_llvm_context *ctx, nir_deref_var *deref,
2295 bool vs_in, unsigned *vertex_index_out,
2296 LLVMValueRef *vertex_index_ref,
2297 unsigned *const_out, LLVMValueRef *indir_out)
2298 {
2299 unsigned const_offset = 0;
2300 nir_deref *tail = &deref->deref;
2301 LLVMValueRef offset = NULL;
2302
2303 if (vertex_index_out != NULL || vertex_index_ref != NULL) {
2304 tail = tail->child;
2305 nir_deref_array *deref_array = nir_deref_as_array(tail);
2306 if (vertex_index_out)
2307 *vertex_index_out = deref_array->base_offset;
2308
2309 if (vertex_index_ref) {
2310 LLVMValueRef vtx = LLVMConstInt(ctx->i32, deref_array->base_offset, false);
2311 if (deref_array->deref_array_type == nir_deref_array_type_indirect) {
2312 vtx = LLVMBuildAdd(ctx->builder, vtx, get_src(ctx, deref_array->indirect), "");
2313 }
2314 *vertex_index_ref = vtx;
2315 }
2316 }
2317
2318 if (deref->var->data.compact) {
2319 assert(tail->child->deref_type == nir_deref_type_array);
2320 assert(glsl_type_is_scalar(glsl_without_array(deref->var->type)));
2321 nir_deref_array *deref_array = nir_deref_as_array(tail->child);
2322 /* We always lower indirect dereferences for "compact" array vars. */
2323 assert(deref_array->deref_array_type == nir_deref_array_type_direct);
2324
2325 const_offset = deref_array->base_offset;
2326 goto out;
2327 }
2328
2329 while (tail->child != NULL) {
2330 const struct glsl_type *parent_type = tail->type;
2331 tail = tail->child;
2332
2333 if (tail->deref_type == nir_deref_type_array) {
2334 nir_deref_array *deref_array = nir_deref_as_array(tail);
2335 LLVMValueRef index, stride, local_offset;
2336 unsigned size = glsl_count_attribute_slots(tail->type, vs_in);
2337
2338 const_offset += size * deref_array->base_offset;
2339 if (deref_array->deref_array_type == nir_deref_array_type_direct)
2340 continue;
2341
2342 assert(deref_array->deref_array_type == nir_deref_array_type_indirect);
2343 index = get_src(ctx, deref_array->indirect);
2344 stride = LLVMConstInt(ctx->i32, size, 0);
2345 local_offset = LLVMBuildMul(ctx->builder, stride, index, "");
2346
2347 if (offset)
2348 offset = LLVMBuildAdd(ctx->builder, offset, local_offset, "");
2349 else
2350 offset = local_offset;
2351 } else if (tail->deref_type == nir_deref_type_struct) {
2352 nir_deref_struct *deref_struct = nir_deref_as_struct(tail);
2353
2354 for (unsigned i = 0; i < deref_struct->index; i++) {
2355 const struct glsl_type *ft = glsl_get_struct_field(parent_type, i);
2356 const_offset += glsl_count_attribute_slots(ft, vs_in);
2357 }
2358 } else
2359 unreachable("unsupported deref type");
2360
2361 }
2362 out:
2363 if (const_offset && offset)
2364 offset = LLVMBuildAdd(ctx->builder, offset,
2365 LLVMConstInt(ctx->i32, const_offset, 0),
2366 "");
2367
2368 *const_out = const_offset;
2369 *indir_out = offset;
2370 }
2371
2372 static LLVMValueRef
2373 lds_load(struct nir_to_llvm_context *ctx,
2374 LLVMValueRef dw_addr)
2375 {
2376 LLVMValueRef value;
2377 value = ac_build_indexed_load(&ctx->ac, ctx->lds, dw_addr, false);
2378 return value;
2379 }
2380
2381 static void
2382 lds_store(struct nir_to_llvm_context *ctx,
2383 LLVMValueRef dw_addr, LLVMValueRef value)
2384 {
2385 value = LLVMBuildBitCast(ctx->builder, value, ctx->i32, "");
2386 ac_build_indexed_store(&ctx->ac, ctx->lds,
2387 dw_addr, value);
2388 }
2389
2390 /* The offchip buffer layout for TCS->TES is
2391 *
2392 * - attribute 0 of patch 0 vertex 0
2393 * - attribute 0 of patch 0 vertex 1
2394 * - attribute 0 of patch 0 vertex 2
2395 * ...
2396 * - attribute 0 of patch 1 vertex 0
2397 * - attribute 0 of patch 1 vertex 1
2398 * ...
2399 * - attribute 1 of patch 0 vertex 0
2400 * - attribute 1 of patch 0 vertex 1
2401 * ...
2402 * - per patch attribute 0 of patch 0
2403 * - per patch attribute 0 of patch 1
2404 * ...
2405 *
2406 * Note that every attribute has 4 components.
2407 */
2408 static LLVMValueRef get_tcs_tes_buffer_address(struct nir_to_llvm_context *ctx,
2409 LLVMValueRef vertex_index,
2410 LLVMValueRef param_index)
2411 {
2412 LLVMValueRef base_addr, vertices_per_patch, num_patches, total_vertices;
2413 LLVMValueRef param_stride, constant16;
2414 LLVMValueRef rel_patch_id = get_rel_patch_id(ctx);
2415
2416 vertices_per_patch = unpack_param(ctx, ctx->tcs_offchip_layout, 9, 6);
2417 num_patches = unpack_param(ctx, ctx->tcs_offchip_layout, 0, 9);
2418 total_vertices = LLVMBuildMul(ctx->builder, vertices_per_patch,
2419 num_patches, "");
2420
2421 constant16 = LLVMConstInt(ctx->i32, 16, false);
2422 if (vertex_index) {
2423 base_addr = LLVMBuildMul(ctx->builder, rel_patch_id,
2424 vertices_per_patch, "");
2425
2426 base_addr = LLVMBuildAdd(ctx->builder, base_addr,
2427 vertex_index, "");
2428
2429 param_stride = total_vertices;
2430 } else {
2431 base_addr = rel_patch_id;
2432 param_stride = num_patches;
2433 }
2434
2435 base_addr = LLVMBuildAdd(ctx->builder, base_addr,
2436 LLVMBuildMul(ctx->builder, param_index,
2437 param_stride, ""), "");
2438
2439 base_addr = LLVMBuildMul(ctx->builder, base_addr, constant16, "");
2440
2441 if (!vertex_index) {
2442 LLVMValueRef patch_data_offset =
2443 unpack_param(ctx, ctx->tcs_offchip_layout, 16, 16);
2444
2445 base_addr = LLVMBuildAdd(ctx->builder, base_addr,
2446 patch_data_offset, "");
2447 }
2448 return base_addr;
2449 }
2450
2451 static LLVMValueRef get_tcs_tes_buffer_address_params(struct nir_to_llvm_context *ctx,
2452 unsigned param,
2453 unsigned const_index,
2454 bool is_compact,
2455 LLVMValueRef vertex_index,
2456 LLVMValueRef indir_index)
2457 {
2458 LLVMValueRef param_index;
2459
2460 if (indir_index)
2461 param_index = LLVMBuildAdd(ctx->builder, LLVMConstInt(ctx->i32, param, false),
2462 indir_index, "");
2463 else {
2464 if (const_index && !is_compact)
2465 param += const_index;
2466 param_index = LLVMConstInt(ctx->i32, param, false);
2467 }
2468 return get_tcs_tes_buffer_address(ctx, vertex_index, param_index);
2469 }
2470
2471 static void
2472 mark_tess_output(struct nir_to_llvm_context *ctx,
2473 bool is_patch, uint32_t param)
2474
2475 {
2476 if (is_patch) {
2477 ctx->tess_patch_outputs_written |= (1ull << param);
2478 } else
2479 ctx->tess_outputs_written |= (1ull << param);
2480 }
2481
2482 static LLVMValueRef
2483 get_dw_address(struct nir_to_llvm_context *ctx,
2484 LLVMValueRef dw_addr,
2485 unsigned param,
2486 unsigned const_index,
2487 bool compact_const_index,
2488 LLVMValueRef vertex_index,
2489 LLVMValueRef stride,
2490 LLVMValueRef indir_index)
2491
2492 {
2493
2494 if (vertex_index) {
2495 dw_addr = LLVMBuildAdd(ctx->builder, dw_addr,
2496 LLVMBuildMul(ctx->builder,
2497 vertex_index,
2498 stride, ""), "");
2499 }
2500
2501 if (indir_index)
2502 dw_addr = LLVMBuildAdd(ctx->builder, dw_addr,
2503 LLVMBuildMul(ctx->builder, indir_index,
2504 LLVMConstInt(ctx->i32, 4, false), ""), "");
2505 else if (const_index && !compact_const_index)
2506 dw_addr = LLVMBuildAdd(ctx->builder, dw_addr,
2507 LLVMConstInt(ctx->i32, const_index, false), "");
2508
2509 dw_addr = LLVMBuildAdd(ctx->builder, dw_addr,
2510 LLVMConstInt(ctx->i32, param * 4, false), "");
2511
2512 if (const_index && compact_const_index)
2513 dw_addr = LLVMBuildAdd(ctx->builder, dw_addr,
2514 LLVMConstInt(ctx->i32, const_index, false), "");
2515 return dw_addr;
2516 }
2517
2518 static LLVMValueRef
2519 load_tcs_input(struct nir_to_llvm_context *ctx,
2520 nir_intrinsic_instr *instr)
2521 {
2522 LLVMValueRef dw_addr, stride;
2523 unsigned const_index;
2524 LLVMValueRef vertex_index;
2525 LLVMValueRef indir_index;
2526 unsigned param;
2527 LLVMValueRef value[4], result;
2528 const bool per_vertex = nir_is_per_vertex_io(instr->variables[0]->var, ctx->stage);
2529 const bool is_compact = instr->variables[0]->var->data.compact;
2530 param = shader_io_get_unique_index(instr->variables[0]->var->data.location);
2531 radv_get_deref_offset(ctx, instr->variables[0],
2532 false, NULL, per_vertex ? &vertex_index : NULL,
2533 &const_index, &indir_index);
2534
2535 stride = unpack_param(ctx, ctx->tcs_in_layout, 13, 8);
2536 dw_addr = get_tcs_in_current_patch_offset(ctx);
2537 dw_addr = get_dw_address(ctx, dw_addr, param, const_index, is_compact, vertex_index, stride,
2538 indir_index);
2539
2540 for (unsigned i = 0; i < instr->num_components; i++) {
2541 value[i] = lds_load(ctx, dw_addr);
2542 dw_addr = LLVMBuildAdd(ctx->builder, dw_addr,
2543 ctx->i32one, "");
2544 }
2545 result = ac_build_gather_values(&ctx->ac, value, instr->num_components);
2546 result = LLVMBuildBitCast(ctx->builder, result, get_def_type(ctx, &instr->dest.ssa), "");
2547 return result;
2548 }
2549
2550 static LLVMValueRef
2551 load_tcs_output(struct nir_to_llvm_context *ctx,
2552 nir_intrinsic_instr *instr)
2553 {
2554 LLVMValueRef dw_addr, stride;
2555 LLVMValueRef value[4], result;
2556 LLVMValueRef vertex_index = NULL;
2557 LLVMValueRef indir_index = NULL;
2558 unsigned const_index = 0;
2559 unsigned param;
2560 const bool per_vertex = nir_is_per_vertex_io(instr->variables[0]->var, ctx->stage);
2561 const bool is_compact = instr->variables[0]->var->data.compact;
2562 param = shader_io_get_unique_index(instr->variables[0]->var->data.location);
2563 radv_get_deref_offset(ctx, instr->variables[0],
2564 false, NULL, per_vertex ? &vertex_index : NULL,
2565 &const_index, &indir_index);
2566
2567 if (!instr->variables[0]->var->data.patch) {
2568 stride = unpack_param(ctx, ctx->tcs_out_layout, 13, 8);
2569 dw_addr = get_tcs_out_current_patch_offset(ctx);
2570 } else {
2571 dw_addr = get_tcs_out_current_patch_data_offset(ctx);
2572 }
2573
2574 dw_addr = get_dw_address(ctx, dw_addr, param, const_index, is_compact, vertex_index, stride,
2575 indir_index);
2576
2577 for (unsigned i = 0; i < instr->num_components; i++) {
2578 value[i] = lds_load(ctx, dw_addr);
2579 dw_addr = LLVMBuildAdd(ctx->builder, dw_addr,
2580 ctx->i32one, "");
2581 }
2582 result = ac_build_gather_values(&ctx->ac, value, instr->num_components);
2583 result = LLVMBuildBitCast(ctx->builder, result, get_def_type(ctx, &instr->dest.ssa), "");
2584 return result;
2585 }
2586
2587 static void
2588 store_tcs_output(struct nir_to_llvm_context *ctx,
2589 nir_intrinsic_instr *instr,
2590 LLVMValueRef src,
2591 unsigned writemask)
2592 {
2593 LLVMValueRef stride, dw_addr;
2594 LLVMValueRef buf_addr = NULL;
2595 LLVMValueRef vertex_index = NULL;
2596 LLVMValueRef indir_index = NULL;
2597 unsigned const_index = 0;
2598 unsigned param;
2599 const bool per_vertex = nir_is_per_vertex_io(instr->variables[0]->var, ctx->stage);
2600 const bool is_compact = instr->variables[0]->var->data.compact;
2601
2602 radv_get_deref_offset(ctx, instr->variables[0],
2603 false, NULL, per_vertex ? &vertex_index : NULL,
2604 &const_index, &indir_index);
2605
2606 param = shader_io_get_unique_index(instr->variables[0]->var->data.location);
2607 if (instr->variables[0]->var->data.location == VARYING_SLOT_CLIP_DIST0 &&
2608 is_compact && const_index > 3) {
2609 const_index -= 3;
2610 param++;
2611 }
2612
2613 if (!instr->variables[0]->var->data.patch) {
2614 stride = unpack_param(ctx, ctx->tcs_out_layout, 13, 8);
2615 dw_addr = get_tcs_out_current_patch_offset(ctx);
2616 } else {
2617 dw_addr = get_tcs_out_current_patch_data_offset(ctx);
2618 }
2619
2620 mark_tess_output(ctx, instr->variables[0]->var->data.patch, param);
2621
2622 dw_addr = get_dw_address(ctx, dw_addr, param, const_index, is_compact, vertex_index, stride,
2623 indir_index);
2624 buf_addr = get_tcs_tes_buffer_address_params(ctx, param, const_index, is_compact,
2625 vertex_index, indir_index);
2626
2627 unsigned base = is_compact ? const_index : 0;
2628 for (unsigned chan = 0; chan < 8; chan++) {
2629 bool is_tess_factor = false;
2630 if (!(writemask & (1 << chan)))
2631 continue;
2632 LLVMValueRef value = llvm_extract_elem(ctx, src, chan);
2633
2634 lds_store(ctx, dw_addr, value);
2635
2636 if (instr->variables[0]->var->data.location == VARYING_SLOT_TESS_LEVEL_INNER ||
2637 instr->variables[0]->var->data.location == VARYING_SLOT_TESS_LEVEL_OUTER)
2638 is_tess_factor = true;
2639
2640 if (!is_tess_factor && writemask != 0xF)
2641 ac_build_buffer_store_dword(&ctx->ac, ctx->hs_ring_tess_offchip, value, 1,
2642 buf_addr, ctx->oc_lds,
2643 4 * (base + chan), 1, 0, true, false);
2644
2645 dw_addr = LLVMBuildAdd(ctx->builder, dw_addr,
2646 ctx->i32one, "");
2647 }
2648
2649 if (writemask == 0xF) {
2650 ac_build_buffer_store_dword(&ctx->ac, ctx->hs_ring_tess_offchip, src, 4,
2651 buf_addr, ctx->oc_lds,
2652 (base * 4), 1, 0, true, false);
2653 }
2654 }
2655
2656 static LLVMValueRef
2657 load_tes_input(struct nir_to_llvm_context *ctx,
2658 nir_intrinsic_instr *instr)
2659 {
2660 LLVMValueRef buf_addr;
2661 LLVMValueRef result;
2662 LLVMValueRef vertex_index = NULL;
2663 LLVMValueRef indir_index = NULL;
2664 unsigned const_index = 0;
2665 unsigned param;
2666 const bool per_vertex = nir_is_per_vertex_io(instr->variables[0]->var, ctx->stage);
2667 const bool is_compact = instr->variables[0]->var->data.compact;
2668
2669 radv_get_deref_offset(ctx, instr->variables[0],
2670 false, NULL, per_vertex ? &vertex_index : NULL,
2671 &const_index, &indir_index);
2672 param = shader_io_get_unique_index(instr->variables[0]->var->data.location);
2673 if (instr->variables[0]->var->data.location == VARYING_SLOT_CLIP_DIST0 &&
2674 is_compact && const_index > 3) {
2675 const_index -= 3;
2676 param++;
2677 }
2678 buf_addr = get_tcs_tes_buffer_address_params(ctx, param, const_index,
2679 is_compact, vertex_index, indir_index);
2680
2681 result = ac_build_buffer_load(&ctx->ac, ctx->hs_ring_tess_offchip, instr->num_components, NULL,
2682 buf_addr, ctx->oc_lds, is_compact ? (4 * const_index) : 0, 1, 0, true);
2683 result = trim_vector(ctx, result, instr->num_components);
2684 result = LLVMBuildBitCast(ctx->builder, result, get_def_type(ctx, &instr->dest.ssa), "");
2685 return result;
2686 }
2687
2688 static LLVMValueRef
2689 load_gs_input(struct nir_to_llvm_context *ctx,
2690 nir_intrinsic_instr *instr)
2691 {
2692 LLVMValueRef indir_index, vtx_offset;
2693 unsigned const_index;
2694 LLVMValueRef args[9];
2695 unsigned param, vtx_offset_param;
2696 LLVMValueRef value[4], result;
2697 unsigned vertex_index;
2698 radv_get_deref_offset(ctx, instr->variables[0],
2699 false, &vertex_index, NULL,
2700 &const_index, &indir_index);
2701 vtx_offset_param = vertex_index;
2702 assert(vtx_offset_param < 6);
2703 vtx_offset = LLVMBuildMul(ctx->builder, ctx->gs_vtx_offset[vtx_offset_param],
2704 LLVMConstInt(ctx->i32, 4, false), "");
2705
2706 param = shader_io_get_unique_index(instr->variables[0]->var->data.location);
2707 for (unsigned i = 0; i < instr->num_components; i++) {
2708
2709 args[0] = ctx->esgs_ring;
2710 args[1] = vtx_offset;
2711 args[2] = LLVMConstInt(ctx->i32, (param * 4 + i + const_index) * 256, false);
2712 args[3] = ctx->i32zero;
2713 args[4] = ctx->i32one; /* OFFEN */
2714 args[5] = ctx->i32zero; /* IDXEN */
2715 args[6] = ctx->i32one; /* GLC */
2716 args[7] = ctx->i32zero; /* SLC */
2717 args[8] = ctx->i32zero; /* TFE */
2718
2719 value[i] = ac_build_intrinsic(&ctx->ac, "llvm.SI.buffer.load.dword.i32.i32",
2720 ctx->i32, args, 9,
2721 AC_FUNC_ATTR_READONLY |
2722 AC_FUNC_ATTR_LEGACY);
2723 }
2724 result = ac_build_gather_values(&ctx->ac, value, instr->num_components);
2725
2726 return result;
2727 }
2728
2729 static LLVMValueRef visit_load_var(struct nir_to_llvm_context *ctx,
2730 nir_intrinsic_instr *instr)
2731 {
2732 LLVMValueRef values[8];
2733 int idx = instr->variables[0]->var->data.driver_location;
2734 int ve = instr->dest.ssa.num_components;
2735 LLVMValueRef indir_index;
2736 LLVMValueRef ret;
2737 unsigned const_index;
2738 bool vs_in = ctx->stage == MESA_SHADER_VERTEX &&
2739 instr->variables[0]->var->data.mode == nir_var_shader_in;
2740 radv_get_deref_offset(ctx, instr->variables[0], vs_in, NULL, NULL,
2741 &const_index, &indir_index);
2742
2743 if (instr->dest.ssa.bit_size == 64)
2744 ve *= 2;
2745
2746 switch (instr->variables[0]->var->data.mode) {
2747 case nir_var_shader_in:
2748 if (ctx->stage == MESA_SHADER_TESS_CTRL)
2749 return load_tcs_input(ctx, instr);
2750 if (ctx->stage == MESA_SHADER_TESS_EVAL)
2751 return load_tes_input(ctx, instr);
2752 if (ctx->stage == MESA_SHADER_GEOMETRY) {
2753 return load_gs_input(ctx, instr);
2754 }
2755 for (unsigned chan = 0; chan < ve; chan++) {
2756 if (indir_index) {
2757 unsigned count = glsl_count_attribute_slots(
2758 instr->variables[0]->var->type,
2759 ctx->stage == MESA_SHADER_VERTEX);
2760 count -= chan / 4;
2761 LLVMValueRef tmp_vec = ac_build_gather_values_extended(
2762 &ctx->ac, ctx->inputs + idx + chan, count,
2763 4, false);
2764
2765 values[chan] = LLVMBuildExtractElement(ctx->builder,
2766 tmp_vec,
2767 indir_index, "");
2768 } else
2769 values[chan] = ctx->inputs[idx + chan + const_index * 4];
2770 }
2771 break;
2772 case nir_var_local:
2773 for (unsigned chan = 0; chan < ve; chan++) {
2774 if (indir_index) {
2775 unsigned count = glsl_count_attribute_slots(
2776 instr->variables[0]->var->type, false);
2777 count -= chan / 4;
2778 LLVMValueRef tmp_vec = ac_build_gather_values_extended(
2779 &ctx->ac, ctx->locals + idx + chan, count,
2780 4, true);
2781
2782 values[chan] = LLVMBuildExtractElement(ctx->builder,
2783 tmp_vec,
2784 indir_index, "");
2785 } else {
2786 values[chan] = LLVMBuildLoad(ctx->builder, ctx->locals[idx + chan + const_index * 4], "");
2787 }
2788 }
2789 break;
2790 case nir_var_shader_out:
2791 if (ctx->stage == MESA_SHADER_TESS_CTRL)
2792 return load_tcs_output(ctx, instr);
2793 for (unsigned chan = 0; chan < ve; chan++) {
2794 if (indir_index) {
2795 unsigned count = glsl_count_attribute_slots(
2796 instr->variables[0]->var->type, false);
2797 count -= chan / 4;
2798 LLVMValueRef tmp_vec = ac_build_gather_values_extended(
2799 &ctx->ac, ctx->outputs + idx + chan, count,
2800 4, true);
2801
2802 values[chan] = LLVMBuildExtractElement(ctx->builder,
2803 tmp_vec,
2804 indir_index, "");
2805 } else {
2806 values[chan] = LLVMBuildLoad(ctx->builder,
2807 ctx->outputs[idx + chan + const_index * 4],
2808 "");
2809 }
2810 }
2811 break;
2812 case nir_var_shared: {
2813 LLVMValueRef ptr = get_shared_memory_ptr(ctx, idx, ctx->i32);
2814 LLVMValueRef derived_ptr;
2815
2816 if (indir_index)
2817 indir_index = LLVMBuildMul(ctx->builder, indir_index, LLVMConstInt(ctx->i32, 4, false), "");
2818
2819 for (unsigned chan = 0; chan < ve; chan++) {
2820 LLVMValueRef index = LLVMConstInt(ctx->i32, chan, false);
2821 if (indir_index)
2822 index = LLVMBuildAdd(ctx->builder, index, indir_index, "");
2823 derived_ptr = LLVMBuildGEP(ctx->builder, ptr, &index, 1, "");
2824
2825 values[chan] = LLVMBuildLoad(ctx->builder, derived_ptr, "");
2826 }
2827 break;
2828 }
2829 default:
2830 unreachable("unhandle variable mode");
2831 }
2832 ret = ac_build_gather_values(&ctx->ac, values, ve);
2833 return LLVMBuildBitCast(ctx->builder, ret, get_def_type(ctx, &instr->dest.ssa), "");
2834 }
2835
2836 static void
2837 visit_store_var(struct nir_to_llvm_context *ctx,
2838 nir_intrinsic_instr *instr)
2839 {
2840 LLVMValueRef temp_ptr, value;
2841 int idx = instr->variables[0]->var->data.driver_location;
2842 LLVMValueRef src = to_float(ctx, get_src(ctx, instr->src[0]));
2843 int writemask = instr->const_index[0];
2844 LLVMValueRef indir_index;
2845 unsigned const_index;
2846 radv_get_deref_offset(ctx, instr->variables[0], false,
2847 NULL, NULL, &const_index, &indir_index);
2848
2849 if (get_elem_bits(ctx, LLVMTypeOf(src)) == 64) {
2850 int old_writemask = writemask;
2851
2852 src = LLVMBuildBitCast(ctx->builder, src,
2853 LLVMVectorType(ctx->f32, get_llvm_num_components(src) * 2),
2854 "");
2855
2856 writemask = 0;
2857 for (unsigned chan = 0; chan < 4; chan++) {
2858 if (old_writemask & (1 << chan))
2859 writemask |= 3u << (2 * chan);
2860 }
2861 }
2862
2863 switch (instr->variables[0]->var->data.mode) {
2864 case nir_var_shader_out:
2865
2866 if (ctx->stage == MESA_SHADER_TESS_CTRL) {
2867 store_tcs_output(ctx, instr, src, writemask);
2868 return;
2869 }
2870
2871 for (unsigned chan = 0; chan < 8; chan++) {
2872 int stride = 4;
2873 if (!(writemask & (1 << chan)))
2874 continue;
2875
2876 value = llvm_extract_elem(ctx, src, chan);
2877
2878 if (instr->variables[0]->var->data.compact)
2879 stride = 1;
2880 if (indir_index) {
2881 unsigned count = glsl_count_attribute_slots(
2882 instr->variables[0]->var->type, false);
2883 count -= chan / 4;
2884 LLVMValueRef tmp_vec = ac_build_gather_values_extended(
2885 &ctx->ac, ctx->outputs + idx + chan, count,
2886 stride, true);
2887
2888 if (get_llvm_num_components(tmp_vec) > 1) {
2889 tmp_vec = LLVMBuildInsertElement(ctx->builder, tmp_vec,
2890 value, indir_index, "");
2891 } else
2892 tmp_vec = value;
2893 build_store_values_extended(ctx, ctx->outputs + idx + chan,
2894 count, stride, tmp_vec);
2895
2896 } else {
2897 temp_ptr = ctx->outputs[idx + chan + const_index * stride];
2898
2899 LLVMBuildStore(ctx->builder, value, temp_ptr);
2900 }
2901 }
2902 break;
2903 case nir_var_local:
2904 for (unsigned chan = 0; chan < 8; chan++) {
2905 if (!(writemask & (1 << chan)))
2906 continue;
2907
2908 value = llvm_extract_elem(ctx, src, chan);
2909 if (indir_index) {
2910 unsigned count = glsl_count_attribute_slots(
2911 instr->variables[0]->var->type, false);
2912 count -= chan / 4;
2913 LLVMValueRef tmp_vec = ac_build_gather_values_extended(
2914 &ctx->ac, ctx->locals + idx + chan, count,
2915 4, true);
2916
2917 tmp_vec = LLVMBuildInsertElement(ctx->builder, tmp_vec,
2918 value, indir_index, "");
2919 build_store_values_extended(ctx, ctx->locals + idx + chan,
2920 count, 4, tmp_vec);
2921 } else {
2922 temp_ptr = ctx->locals[idx + chan + const_index * 4];
2923
2924 LLVMBuildStore(ctx->builder, value, temp_ptr);
2925 }
2926 }
2927 break;
2928 case nir_var_shared: {
2929 LLVMValueRef ptr = get_shared_memory_ptr(ctx, idx, ctx->i32);
2930
2931 if (indir_index)
2932 indir_index = LLVMBuildMul(ctx->builder, indir_index, LLVMConstInt(ctx->i32, 4, false), "");
2933
2934 for (unsigned chan = 0; chan < 8; chan++) {
2935 if (!(writemask & (1 << chan)))
2936 continue;
2937 LLVMValueRef index = LLVMConstInt(ctx->i32, chan, false);
2938 LLVMValueRef derived_ptr;
2939
2940 if (indir_index)
2941 index = LLVMBuildAdd(ctx->builder, index, indir_index, "");
2942
2943 value = llvm_extract_elem(ctx, src, chan);
2944 derived_ptr = LLVMBuildGEP(ctx->builder, ptr, &index, 1, "");
2945 LLVMBuildStore(ctx->builder,
2946 to_integer(ctx, value), derived_ptr);
2947 }
2948 break;
2949 }
2950 default:
2951 break;
2952 }
2953 }
2954
2955 static int image_type_to_components_count(enum glsl_sampler_dim dim, bool array)
2956 {
2957 switch (dim) {
2958 case GLSL_SAMPLER_DIM_BUF:
2959 return 1;
2960 case GLSL_SAMPLER_DIM_1D:
2961 return array ? 2 : 1;
2962 case GLSL_SAMPLER_DIM_2D:
2963 return array ? 3 : 2;
2964 case GLSL_SAMPLER_DIM_MS:
2965 return array ? 4 : 3;
2966 case GLSL_SAMPLER_DIM_3D:
2967 case GLSL_SAMPLER_DIM_CUBE:
2968 return 3;
2969 case GLSL_SAMPLER_DIM_RECT:
2970 case GLSL_SAMPLER_DIM_SUBPASS:
2971 return 2;
2972 case GLSL_SAMPLER_DIM_SUBPASS_MS:
2973 return 3;
2974 default:
2975 break;
2976 }
2977 return 0;
2978 }
2979
2980
2981
2982 /* Adjust the sample index according to FMASK.
2983 *
2984 * For uncompressed MSAA surfaces, FMASK should return 0x76543210,
2985 * which is the identity mapping. Each nibble says which physical sample
2986 * should be fetched to get that sample.
2987 *
2988 * For example, 0x11111100 means there are only 2 samples stored and
2989 * the second sample covers 3/4 of the pixel. When reading samples 0
2990 * and 1, return physical sample 0 (determined by the first two 0s
2991 * in FMASK), otherwise return physical sample 1.
2992 *
2993 * The sample index should be adjusted as follows:
2994 * sample_index = (fmask >> (sample_index * 4)) & 0xF;
2995 */
2996 static LLVMValueRef adjust_sample_index_using_fmask(struct nir_to_llvm_context *ctx,
2997 LLVMValueRef coord_x, LLVMValueRef coord_y,
2998 LLVMValueRef coord_z,
2999 LLVMValueRef sample_index,
3000 LLVMValueRef fmask_desc_ptr)
3001 {
3002 LLVMValueRef fmask_load_address[4];
3003 LLVMValueRef res;
3004
3005 fmask_load_address[0] = coord_x;
3006 fmask_load_address[1] = coord_y;
3007 if (coord_z) {
3008 fmask_load_address[2] = coord_z;
3009 fmask_load_address[3] = LLVMGetUndef(ctx->i32);
3010 }
3011
3012 struct ac_image_args args = {0};
3013
3014 args.opcode = ac_image_load;
3015 args.da = coord_z ? true : false;
3016 args.resource = fmask_desc_ptr;
3017 args.dmask = 0xf;
3018 args.addr = ac_build_gather_values(&ctx->ac, fmask_load_address, coord_z ? 4 : 2);
3019
3020 res = ac_build_image_opcode(&ctx->ac, &args);
3021
3022 res = to_integer(ctx, res);
3023 LLVMValueRef four = LLVMConstInt(ctx->i32, 4, false);
3024 LLVMValueRef F = LLVMConstInt(ctx->i32, 0xf, false);
3025
3026 LLVMValueRef fmask = LLVMBuildExtractElement(ctx->builder,
3027 res,
3028 ctx->i32zero, "");
3029
3030 LLVMValueRef sample_index4 =
3031 LLVMBuildMul(ctx->builder, sample_index, four, "");
3032 LLVMValueRef shifted_fmask =
3033 LLVMBuildLShr(ctx->builder, fmask, sample_index4, "");
3034 LLVMValueRef final_sample =
3035 LLVMBuildAnd(ctx->builder, shifted_fmask, F, "");
3036
3037 /* Don't rewrite the sample index if WORD1.DATA_FORMAT of the FMASK
3038 * resource descriptor is 0 (invalid),
3039 */
3040 LLVMValueRef fmask_desc =
3041 LLVMBuildBitCast(ctx->builder, fmask_desc_ptr,
3042 ctx->v8i32, "");
3043
3044 LLVMValueRef fmask_word1 =
3045 LLVMBuildExtractElement(ctx->builder, fmask_desc,
3046 ctx->i32one, "");
3047
3048 LLVMValueRef word1_is_nonzero =
3049 LLVMBuildICmp(ctx->builder, LLVMIntNE,
3050 fmask_word1, ctx->i32zero, "");
3051
3052 /* Replace the MSAA sample index. */
3053 sample_index =
3054 LLVMBuildSelect(ctx->builder, word1_is_nonzero,
3055 final_sample, sample_index, "");
3056 return sample_index;
3057 }
3058
3059 static LLVMValueRef get_image_coords(struct nir_to_llvm_context *ctx,
3060 nir_intrinsic_instr *instr)
3061 {
3062 const struct glsl_type *type = instr->variables[0]->var->type;
3063 if(instr->variables[0]->deref.child)
3064 type = instr->variables[0]->deref.child->type;
3065
3066 LLVMValueRef src0 = get_src(ctx, instr->src[0]);
3067 LLVMValueRef coords[4];
3068 LLVMValueRef masks[] = {
3069 LLVMConstInt(ctx->i32, 0, false), LLVMConstInt(ctx->i32, 1, false),
3070 LLVMConstInt(ctx->i32, 2, false), LLVMConstInt(ctx->i32, 3, false),
3071 };
3072 LLVMValueRef res;
3073 LLVMValueRef sample_index = llvm_extract_elem(ctx, get_src(ctx, instr->src[1]), 0);
3074
3075 int count;
3076 enum glsl_sampler_dim dim = glsl_get_sampler_dim(type);
3077 bool add_frag_pos = (dim == GLSL_SAMPLER_DIM_SUBPASS ||
3078 dim == GLSL_SAMPLER_DIM_SUBPASS_MS);
3079 bool is_ms = (dim == GLSL_SAMPLER_DIM_MS ||
3080 dim == GLSL_SAMPLER_DIM_SUBPASS_MS);
3081
3082 count = image_type_to_components_count(dim,
3083 glsl_sampler_type_is_array(type));
3084
3085 if (is_ms) {
3086 LLVMValueRef fmask_load_address[3];
3087 int chan;
3088
3089 fmask_load_address[0] = LLVMBuildExtractElement(ctx->builder, src0, masks[0], "");
3090 fmask_load_address[1] = LLVMBuildExtractElement(ctx->builder, src0, masks[1], "");
3091 if (glsl_sampler_type_is_array(type))
3092 fmask_load_address[2] = LLVMBuildExtractElement(ctx->builder, src0, masks[2], "");
3093 else
3094 fmask_load_address[2] = NULL;
3095 if (add_frag_pos) {
3096 for (chan = 0; chan < 2; ++chan)
3097 fmask_load_address[chan] = LLVMBuildAdd(ctx->builder, fmask_load_address[chan], LLVMBuildFPToUI(ctx->builder, ctx->frag_pos[chan], ctx->i32, ""), "");
3098 }
3099 sample_index = adjust_sample_index_using_fmask(ctx,
3100 fmask_load_address[0],
3101 fmask_load_address[1],
3102 fmask_load_address[2],
3103 sample_index,
3104 get_sampler_desc(ctx, instr->variables[0], DESC_FMASK));
3105 }
3106 if (count == 1) {
3107 if (instr->src[0].ssa->num_components)
3108 res = LLVMBuildExtractElement(ctx->builder, src0, masks[0], "");
3109 else
3110 res = src0;
3111 } else {
3112 int chan;
3113 if (is_ms)
3114 count--;
3115 for (chan = 0; chan < count; ++chan) {
3116 coords[chan] = LLVMBuildExtractElement(ctx->builder, src0, masks[chan], "");
3117 }
3118
3119 if (add_frag_pos) {
3120 for (chan = 0; chan < count; ++chan)
3121 coords[chan] = LLVMBuildAdd(ctx->builder, coords[chan], LLVMBuildFPToUI(ctx->builder, ctx->frag_pos[chan], ctx->i32, ""), "");
3122 }
3123 if (is_ms) {
3124 coords[count] = sample_index;
3125 count++;
3126 }
3127
3128 if (count == 3) {
3129 coords[3] = LLVMGetUndef(ctx->i32);
3130 count = 4;
3131 }
3132 res = ac_build_gather_values(&ctx->ac, coords, count);
3133 }
3134 return res;
3135 }
3136
3137 static LLVMValueRef visit_image_load(struct nir_to_llvm_context *ctx,
3138 nir_intrinsic_instr *instr)
3139 {
3140 LLVMValueRef params[7];
3141 LLVMValueRef res;
3142 char intrinsic_name[64];
3143 const nir_variable *var = instr->variables[0]->var;
3144 const struct glsl_type *type = var->type;
3145 if(instr->variables[0]->deref.child)
3146 type = instr->variables[0]->deref.child->type;
3147
3148 type = glsl_without_array(type);
3149 if (glsl_get_sampler_dim(type) == GLSL_SAMPLER_DIM_BUF) {
3150 params[0] = get_sampler_desc(ctx, instr->variables[0], DESC_BUFFER);
3151 params[1] = LLVMBuildExtractElement(ctx->builder, get_src(ctx, instr->src[0]),
3152 LLVMConstInt(ctx->i32, 0, false), ""); /* vindex */
3153 params[2] = LLVMConstInt(ctx->i32, 0, false); /* voffset */
3154 params[3] = ctx->i1false; /* glc */
3155 params[4] = ctx->i1false; /* slc */
3156 res = ac_build_intrinsic(&ctx->ac, "llvm.amdgcn.buffer.load.format.v4f32", ctx->v4f32,
3157 params, 5, 0);
3158
3159 res = trim_vector(ctx, res, instr->dest.ssa.num_components);
3160 res = to_integer(ctx, res);
3161 } else {
3162 bool is_da = glsl_sampler_type_is_array(type) ||
3163 glsl_get_sampler_dim(type) == GLSL_SAMPLER_DIM_CUBE;
3164 LLVMValueRef da = is_da ? ctx->i1true : ctx->i1false;
3165 LLVMValueRef glc = ctx->i1false;
3166 LLVMValueRef slc = ctx->i1false;
3167
3168 params[0] = get_image_coords(ctx, instr);
3169 params[1] = get_sampler_desc(ctx, instr->variables[0], DESC_IMAGE);
3170 params[2] = LLVMConstInt(ctx->i32, 15, false); /* dmask */
3171 if (HAVE_LLVM <= 0x0309) {
3172 params[3] = ctx->i1false; /* r128 */
3173 params[4] = da;
3174 params[5] = glc;
3175 params[6] = slc;
3176 } else {
3177 LLVMValueRef lwe = ctx->i1false;
3178 params[3] = glc;
3179 params[4] = slc;
3180 params[5] = lwe;
3181 params[6] = da;
3182 }
3183
3184 ac_get_image_intr_name("llvm.amdgcn.image.load",
3185 ctx->v4f32, /* vdata */
3186 LLVMTypeOf(params[0]), /* coords */
3187 LLVMTypeOf(params[1]), /* rsrc */
3188 intrinsic_name, sizeof(intrinsic_name));
3189
3190 res = ac_build_intrinsic(&ctx->ac, intrinsic_name, ctx->v4f32,
3191 params, 7, AC_FUNC_ATTR_READONLY);
3192 }
3193 return to_integer(ctx, res);
3194 }
3195
3196 static void visit_image_store(struct nir_to_llvm_context *ctx,
3197 nir_intrinsic_instr *instr)
3198 {
3199 LLVMValueRef params[8];
3200 char intrinsic_name[64];
3201 const nir_variable *var = instr->variables[0]->var;
3202 const struct glsl_type *type = glsl_without_array(var->type);
3203
3204 if (ctx->stage == MESA_SHADER_FRAGMENT)
3205 ctx->shader_info->fs.writes_memory = true;
3206
3207 if (glsl_get_sampler_dim(type) == GLSL_SAMPLER_DIM_BUF) {
3208 params[0] = to_float(ctx, get_src(ctx, instr->src[2])); /* data */
3209 params[1] = get_sampler_desc(ctx, instr->variables[0], DESC_BUFFER);
3210 params[2] = LLVMBuildExtractElement(ctx->builder, get_src(ctx, instr->src[0]),
3211 LLVMConstInt(ctx->i32, 0, false), ""); /* vindex */
3212 params[3] = LLVMConstInt(ctx->i32, 0, false); /* voffset */
3213 params[4] = ctx->i1false; /* glc */
3214 params[5] = ctx->i1false; /* slc */
3215 ac_build_intrinsic(&ctx->ac, "llvm.amdgcn.buffer.store.format.v4f32", ctx->voidt,
3216 params, 6, 0);
3217 } else {
3218 bool is_da = glsl_sampler_type_is_array(type) ||
3219 glsl_get_sampler_dim(type) == GLSL_SAMPLER_DIM_CUBE;
3220 LLVMValueRef da = is_da ? ctx->i1true : ctx->i1false;
3221 LLVMValueRef glc = ctx->i1false;
3222 LLVMValueRef slc = ctx->i1false;
3223
3224 params[0] = to_float(ctx, get_src(ctx, instr->src[2]));
3225 params[1] = get_image_coords(ctx, instr); /* coords */
3226 params[2] = get_sampler_desc(ctx, instr->variables[0], DESC_IMAGE);
3227 params[3] = LLVMConstInt(ctx->i32, 15, false); /* dmask */
3228 if (HAVE_LLVM <= 0x0309) {
3229 params[4] = ctx->i1false; /* r128 */
3230 params[5] = da;
3231 params[6] = glc;
3232 params[7] = slc;
3233 } else {
3234 LLVMValueRef lwe = ctx->i1false;
3235 params[4] = glc;
3236 params[5] = slc;
3237 params[6] = lwe;
3238 params[7] = da;
3239 }
3240
3241 ac_get_image_intr_name("llvm.amdgcn.image.store",
3242 LLVMTypeOf(params[0]), /* vdata */
3243 LLVMTypeOf(params[1]), /* coords */
3244 LLVMTypeOf(params[2]), /* rsrc */
3245 intrinsic_name, sizeof(intrinsic_name));
3246
3247 ac_build_intrinsic(&ctx->ac, intrinsic_name, ctx->voidt,
3248 params, 8, 0);
3249 }
3250
3251 }
3252
3253 static LLVMValueRef visit_image_atomic(struct nir_to_llvm_context *ctx,
3254 nir_intrinsic_instr *instr)
3255 {
3256 LLVMValueRef params[6];
3257 int param_count = 0;
3258 const nir_variable *var = instr->variables[0]->var;
3259
3260 const char *base_name = "llvm.amdgcn.image.atomic";
3261 const char *atomic_name;
3262 LLVMValueRef coords;
3263 char intrinsic_name[32], coords_type[8];
3264 const struct glsl_type *type = glsl_without_array(var->type);
3265
3266 if (ctx->stage == MESA_SHADER_FRAGMENT)
3267 ctx->shader_info->fs.writes_memory = true;
3268
3269 params[param_count++] = get_src(ctx, instr->src[2]);
3270 if (instr->intrinsic == nir_intrinsic_image_atomic_comp_swap)
3271 params[param_count++] = get_src(ctx, instr->src[3]);
3272
3273 if (glsl_get_sampler_dim(type) == GLSL_SAMPLER_DIM_BUF) {
3274 params[param_count++] = get_sampler_desc(ctx, instr->variables[0], DESC_BUFFER);
3275 coords = params[param_count++] = LLVMBuildExtractElement(ctx->builder, get_src(ctx, instr->src[0]),
3276 LLVMConstInt(ctx->i32, 0, false), ""); /* vindex */
3277 params[param_count++] = ctx->i32zero; /* voffset */
3278 params[param_count++] = ctx->i1false; /* glc */
3279 params[param_count++] = ctx->i1false; /* slc */
3280 } else {
3281 bool da = glsl_sampler_type_is_array(type) ||
3282 glsl_get_sampler_dim(type) == GLSL_SAMPLER_DIM_CUBE;
3283
3284 coords = params[param_count++] = get_image_coords(ctx, instr);
3285 params[param_count++] = get_sampler_desc(ctx, instr->variables[0], DESC_IMAGE);
3286 params[param_count++] = ctx->i1false; /* r128 */
3287 params[param_count++] = da ? ctx->i1true : ctx->i1false; /* da */
3288 params[param_count++] = ctx->i1false; /* slc */
3289 }
3290
3291 switch (instr->intrinsic) {
3292 case nir_intrinsic_image_atomic_add:
3293 atomic_name = "add";
3294 break;
3295 case nir_intrinsic_image_atomic_min:
3296 atomic_name = "smin";
3297 break;
3298 case nir_intrinsic_image_atomic_max:
3299 atomic_name = "smax";
3300 break;
3301 case nir_intrinsic_image_atomic_and:
3302 atomic_name = "and";
3303 break;
3304 case nir_intrinsic_image_atomic_or:
3305 atomic_name = "or";
3306 break;
3307 case nir_intrinsic_image_atomic_xor:
3308 atomic_name = "xor";
3309 break;
3310 case nir_intrinsic_image_atomic_exchange:
3311 atomic_name = "swap";
3312 break;
3313 case nir_intrinsic_image_atomic_comp_swap:
3314 atomic_name = "cmpswap";
3315 break;
3316 default:
3317 abort();
3318 }
3319 build_int_type_name(LLVMTypeOf(coords),
3320 coords_type, sizeof(coords_type));
3321
3322 snprintf(intrinsic_name, sizeof(intrinsic_name),
3323 "%s.%s.%s", base_name, atomic_name, coords_type);
3324 return ac_build_intrinsic(&ctx->ac, intrinsic_name, ctx->i32, params, param_count, 0);
3325 }
3326
3327 static LLVMValueRef visit_image_size(struct nir_to_llvm_context *ctx,
3328 nir_intrinsic_instr *instr)
3329 {
3330 LLVMValueRef res;
3331 const nir_variable *var = instr->variables[0]->var;
3332 const struct glsl_type *type = instr->variables[0]->var->type;
3333 bool da = glsl_sampler_type_is_array(var->type) ||
3334 glsl_get_sampler_dim(var->type) == GLSL_SAMPLER_DIM_CUBE;
3335 if(instr->variables[0]->deref.child)
3336 type = instr->variables[0]->deref.child->type;
3337
3338 if (glsl_get_sampler_dim(type) == GLSL_SAMPLER_DIM_BUF)
3339 return get_buffer_size(ctx, get_sampler_desc(ctx, instr->variables[0], DESC_BUFFER), true);
3340
3341 struct ac_image_args args = { 0 };
3342
3343 args.da = da;
3344 args.dmask = 0xf;
3345 args.resource = get_sampler_desc(ctx, instr->variables[0], DESC_IMAGE);
3346 args.opcode = ac_image_get_resinfo;
3347 args.addr = ctx->i32zero;
3348
3349 res = ac_build_image_opcode(&ctx->ac, &args);
3350
3351 if (glsl_get_sampler_dim(type) == GLSL_SAMPLER_DIM_CUBE &&
3352 glsl_sampler_type_is_array(type)) {
3353 LLVMValueRef two = LLVMConstInt(ctx->i32, 2, false);
3354 LLVMValueRef six = LLVMConstInt(ctx->i32, 6, false);
3355 LLVMValueRef z = LLVMBuildExtractElement(ctx->builder, res, two, "");
3356 z = LLVMBuildSDiv(ctx->builder, z, six, "");
3357 res = LLVMBuildInsertElement(ctx->builder, res, z, two, "");
3358 }
3359 return res;
3360 }
3361
3362 #define NOOP_WAITCNT 0xf7f
3363 #define LGKM_CNT 0x07f
3364 #define VM_CNT 0xf70
3365
3366 static void emit_waitcnt(struct nir_to_llvm_context *ctx,
3367 unsigned simm16)
3368 {
3369 LLVMValueRef args[1] = {
3370 LLVMConstInt(ctx->i32, simm16, false),
3371 };
3372 ac_build_intrinsic(&ctx->ac, "llvm.amdgcn.s.waitcnt",
3373 ctx->voidt, args, 1, 0);
3374 }
3375
3376 static void emit_barrier(struct nir_to_llvm_context *ctx)
3377 {
3378 /* SI only (thanks to a hw bug workaround):
3379 * The real barrier instruction isn’t needed, because an entire patch
3380 * always fits into a single wave.
3381 */
3382 if (ctx->options->chip_class == SI &&
3383 ctx->stage == MESA_SHADER_TESS_CTRL) {
3384 emit_waitcnt(ctx, LGKM_CNT & VM_CNT);
3385 return;
3386 }
3387 ac_build_intrinsic(&ctx->ac, "llvm.amdgcn.s.barrier",
3388 ctx->voidt, NULL, 0, AC_FUNC_ATTR_CONVERGENT);
3389 }
3390
3391 static void emit_discard_if(struct nir_to_llvm_context *ctx,
3392 nir_intrinsic_instr *instr)
3393 {
3394 LLVMValueRef cond;
3395 ctx->shader_info->fs.can_discard = true;
3396
3397 cond = LLVMBuildICmp(ctx->builder, LLVMIntNE,
3398 get_src(ctx, instr->src[0]),
3399 ctx->i32zero, "");
3400
3401 cond = LLVMBuildSelect(ctx->builder, cond,
3402 LLVMConstReal(ctx->f32, -1.0f),
3403 ctx->f32zero, "");
3404 ac_build_kill(&ctx->ac, cond);
3405 }
3406
3407 static LLVMValueRef
3408 visit_load_local_invocation_index(struct nir_to_llvm_context *ctx)
3409 {
3410 LLVMValueRef result;
3411 LLVMValueRef thread_id = ac_get_thread_id(&ctx->ac);
3412 result = LLVMBuildAnd(ctx->builder, ctx->tg_size,
3413 LLVMConstInt(ctx->i32, 0xfc0, false), "");
3414
3415 return LLVMBuildAdd(ctx->builder, result, thread_id, "");
3416 }
3417
3418 static LLVMValueRef visit_var_atomic(struct nir_to_llvm_context *ctx,
3419 nir_intrinsic_instr *instr)
3420 {
3421 LLVMValueRef ptr, result;
3422 int idx = instr->variables[0]->var->data.driver_location;
3423 LLVMValueRef src = get_src(ctx, instr->src[0]);
3424 ptr = get_shared_memory_ptr(ctx, idx, ctx->i32);
3425
3426 if (instr->intrinsic == nir_intrinsic_var_atomic_comp_swap) {
3427 LLVMValueRef src1 = get_src(ctx, instr->src[1]);
3428 result = LLVMBuildAtomicCmpXchg(ctx->builder,
3429 ptr, src, src1,
3430 LLVMAtomicOrderingSequentiallyConsistent,
3431 LLVMAtomicOrderingSequentiallyConsistent,
3432 false);
3433 } else {
3434 LLVMAtomicRMWBinOp op;
3435 switch (instr->intrinsic) {
3436 case nir_intrinsic_var_atomic_add:
3437 op = LLVMAtomicRMWBinOpAdd;
3438 break;
3439 case nir_intrinsic_var_atomic_umin:
3440 op = LLVMAtomicRMWBinOpUMin;
3441 break;
3442 case nir_intrinsic_var_atomic_umax:
3443 op = LLVMAtomicRMWBinOpUMax;
3444 break;
3445 case nir_intrinsic_var_atomic_imin:
3446 op = LLVMAtomicRMWBinOpMin;
3447 break;
3448 case nir_intrinsic_var_atomic_imax:
3449 op = LLVMAtomicRMWBinOpMax;
3450 break;
3451 case nir_intrinsic_var_atomic_and:
3452 op = LLVMAtomicRMWBinOpAnd;
3453 break;
3454 case nir_intrinsic_var_atomic_or:
3455 op = LLVMAtomicRMWBinOpOr;
3456 break;
3457 case nir_intrinsic_var_atomic_xor:
3458 op = LLVMAtomicRMWBinOpXor;
3459 break;
3460 case nir_intrinsic_var_atomic_exchange:
3461 op = LLVMAtomicRMWBinOpXchg;
3462 break;
3463 default:
3464 return NULL;
3465 }
3466
3467 result = LLVMBuildAtomicRMW(ctx->builder, op, ptr, to_integer(ctx, src),
3468 LLVMAtomicOrderingSequentiallyConsistent,
3469 false);
3470 }
3471 return result;
3472 }
3473
3474 #define INTERP_CENTER 0
3475 #define INTERP_CENTROID 1
3476 #define INTERP_SAMPLE 2
3477
3478 static LLVMValueRef lookup_interp_param(struct nir_to_llvm_context *ctx,
3479 enum glsl_interp_mode interp, unsigned location)
3480 {
3481 switch (interp) {
3482 case INTERP_MODE_FLAT:
3483 default:
3484 return NULL;
3485 case INTERP_MODE_SMOOTH:
3486 case INTERP_MODE_NONE:
3487 if (location == INTERP_CENTER)
3488 return ctx->persp_center;
3489 else if (location == INTERP_CENTROID)
3490 return ctx->persp_centroid;
3491 else if (location == INTERP_SAMPLE)
3492 return ctx->persp_sample;
3493 break;
3494 case INTERP_MODE_NOPERSPECTIVE:
3495 if (location == INTERP_CENTER)
3496 return ctx->linear_center;
3497 else if (location == INTERP_CENTROID)
3498 return ctx->linear_centroid;
3499 else if (location == INTERP_SAMPLE)
3500 return ctx->linear_sample;
3501 break;
3502 }
3503 return NULL;
3504 }
3505
3506 static LLVMValueRef load_sample_position(struct nir_to_llvm_context *ctx,
3507 LLVMValueRef sample_id)
3508 {
3509 /* offset = sample_id * 8 (8 = 2 floats containing samplepos.xy) */
3510 LLVMValueRef offset0 = LLVMBuildMul(ctx->builder, sample_id, LLVMConstInt(ctx->i32, 8, false), "");
3511 LLVMValueRef offset1 = LLVMBuildAdd(ctx->builder, offset0, LLVMConstInt(ctx->i32, 4, false), "");
3512 LLVMValueRef result[2];
3513
3514 result[0] = ac_build_indexed_load_const(&ctx->ac, ctx->sample_positions, offset0);
3515 result[1] = ac_build_indexed_load_const(&ctx->ac, ctx->sample_positions, offset1);
3516
3517 return ac_build_gather_values(&ctx->ac, result, 2);
3518 }
3519
3520 static LLVMValueRef load_sample_pos(struct nir_to_llvm_context *ctx)
3521 {
3522 LLVMValueRef values[2];
3523
3524 values[0] = emit_ffract(ctx, ctx->frag_pos[0]);
3525 values[1] = emit_ffract(ctx, ctx->frag_pos[1]);
3526 return ac_build_gather_values(&ctx->ac, values, 2);
3527 }
3528
3529 static LLVMValueRef visit_interp(struct nir_to_llvm_context *ctx,
3530 nir_intrinsic_instr *instr)
3531 {
3532 LLVMValueRef result[2];
3533 LLVMValueRef interp_param, attr_number;
3534 unsigned location;
3535 unsigned chan;
3536 LLVMValueRef src_c0, src_c1;
3537 LLVMValueRef src0;
3538 int input_index = instr->variables[0]->var->data.location - VARYING_SLOT_VAR0;
3539 switch (instr->intrinsic) {
3540 case nir_intrinsic_interp_var_at_centroid:
3541 location = INTERP_CENTROID;
3542 break;
3543 case nir_intrinsic_interp_var_at_sample:
3544 location = INTERP_SAMPLE;
3545 src0 = get_src(ctx, instr->src[0]);
3546 break;
3547 case nir_intrinsic_interp_var_at_offset:
3548 location = INTERP_CENTER;
3549 src0 = get_src(ctx, instr->src[0]);
3550 default:
3551 break;
3552 }
3553
3554 if (instr->intrinsic == nir_intrinsic_interp_var_at_offset) {
3555 src_c0 = to_float(ctx, LLVMBuildExtractElement(ctx->builder, src0, ctx->i32zero, ""));
3556 src_c1 = to_float(ctx, LLVMBuildExtractElement(ctx->builder, src0, ctx->i32one, ""));
3557 } else if (instr->intrinsic == nir_intrinsic_interp_var_at_sample) {
3558 LLVMValueRef sample_position;
3559 LLVMValueRef halfval = LLVMConstReal(ctx->f32, 0.5f);
3560
3561 /* fetch sample ID */
3562 sample_position = load_sample_position(ctx, src0);
3563
3564 src_c0 = LLVMBuildExtractElement(ctx->builder, sample_position, ctx->i32zero, "");
3565 src_c0 = LLVMBuildFSub(ctx->builder, src_c0, halfval, "");
3566 src_c1 = LLVMBuildExtractElement(ctx->builder, sample_position, ctx->i32one, "");
3567 src_c1 = LLVMBuildFSub(ctx->builder, src_c1, halfval, "");
3568 }
3569 interp_param = lookup_interp_param(ctx, instr->variables[0]->var->data.interpolation, location);
3570 attr_number = LLVMConstInt(ctx->i32, input_index, false);
3571
3572 if (location == INTERP_SAMPLE || location == INTERP_CENTER) {
3573 LLVMValueRef ij_out[2];
3574 LLVMValueRef ddxy_out = emit_ddxy_interp(ctx, interp_param);
3575
3576 /*
3577 * take the I then J parameters, and the DDX/Y for it, and
3578 * calculate the IJ inputs for the interpolator.
3579 * temp1 = ddx * offset/sample.x + I;
3580 * interp_param.I = ddy * offset/sample.y + temp1;
3581 * temp1 = ddx * offset/sample.x + J;
3582 * interp_param.J = ddy * offset/sample.y + temp1;
3583 */
3584 for (unsigned i = 0; i < 2; i++) {
3585 LLVMValueRef ix_ll = LLVMConstInt(ctx->i32, i, false);
3586 LLVMValueRef iy_ll = LLVMConstInt(ctx->i32, i + 2, false);
3587 LLVMValueRef ddx_el = LLVMBuildExtractElement(ctx->builder,
3588 ddxy_out, ix_ll, "");
3589 LLVMValueRef ddy_el = LLVMBuildExtractElement(ctx->builder,
3590 ddxy_out, iy_ll, "");
3591 LLVMValueRef interp_el = LLVMBuildExtractElement(ctx->builder,
3592 interp_param, ix_ll, "");
3593 LLVMValueRef temp1, temp2;
3594
3595 interp_el = LLVMBuildBitCast(ctx->builder, interp_el,
3596 ctx->f32, "");
3597
3598 temp1 = LLVMBuildFMul(ctx->builder, ddx_el, src_c0, "");
3599 temp1 = LLVMBuildFAdd(ctx->builder, temp1, interp_el, "");
3600
3601 temp2 = LLVMBuildFMul(ctx->builder, ddy_el, src_c1, "");
3602 temp2 = LLVMBuildFAdd(ctx->builder, temp2, temp1, "");
3603
3604 ij_out[i] = LLVMBuildBitCast(ctx->builder,
3605 temp2, ctx->i32, "");
3606 }
3607 interp_param = ac_build_gather_values(&ctx->ac, ij_out, 2);
3608
3609 }
3610
3611 for (chan = 0; chan < 2; chan++) {
3612 LLVMValueRef llvm_chan = LLVMConstInt(ctx->i32, chan, false);
3613
3614 if (interp_param) {
3615 interp_param = LLVMBuildBitCast(ctx->builder,
3616 interp_param, LLVMVectorType(ctx->f32, 2), "");
3617 LLVMValueRef i = LLVMBuildExtractElement(
3618 ctx->builder, interp_param, ctx->i32zero, "");
3619 LLVMValueRef j = LLVMBuildExtractElement(
3620 ctx->builder, interp_param, ctx->i32one, "");
3621
3622 result[chan] = ac_build_fs_interp(&ctx->ac,
3623 llvm_chan, attr_number,
3624 ctx->prim_mask, i, j);
3625 } else {
3626 result[chan] = ac_build_fs_interp_mov(&ctx->ac,
3627 LLVMConstInt(ctx->i32, 2, false),
3628 llvm_chan, attr_number,
3629 ctx->prim_mask);
3630 }
3631 }
3632 return ac_build_gather_values(&ctx->ac, result, 2);
3633 }
3634
3635 static void
3636 visit_emit_vertex(struct nir_to_llvm_context *ctx,
3637 nir_intrinsic_instr *instr)
3638 {
3639 LLVMValueRef gs_next_vertex;
3640 LLVMValueRef can_emit, kill;
3641 int idx;
3642
3643 assert(instr->const_index[0] == 0);
3644 /* Write vertex attribute values to GSVS ring */
3645 gs_next_vertex = LLVMBuildLoad(ctx->builder,
3646 ctx->gs_next_vertex,
3647 "");
3648
3649 /* If this thread has already emitted the declared maximum number of
3650 * vertices, kill it: excessive vertex emissions are not supposed to
3651 * have any effect, and GS threads have no externally observable
3652 * effects other than emitting vertices.
3653 */
3654 can_emit = LLVMBuildICmp(ctx->builder, LLVMIntULT, gs_next_vertex,
3655 LLVMConstInt(ctx->i32, ctx->gs_max_out_vertices, false), "");
3656
3657 kill = LLVMBuildSelect(ctx->builder, can_emit,
3658 LLVMConstReal(ctx->f32, 1.0f),
3659 LLVMConstReal(ctx->f32, -1.0f), "");
3660 ac_build_kill(&ctx->ac, kill);
3661
3662 /* loop num outputs */
3663 idx = 0;
3664 for (unsigned i = 0; i < RADEON_LLVM_MAX_OUTPUTS; ++i) {
3665 LLVMValueRef *out_ptr = &ctx->outputs[i * 4];
3666 int length = 4;
3667 int slot = idx;
3668 int slot_inc = 1;
3669
3670 if (!(ctx->output_mask & (1ull << i)))
3671 continue;
3672
3673 if (i == VARYING_SLOT_CLIP_DIST0) {
3674 /* pack clip and cull into a single set of slots */
3675 length = ctx->num_output_clips + ctx->num_output_culls;
3676 if (length > 4)
3677 slot_inc = 2;
3678 }
3679 for (unsigned j = 0; j < length; j++) {
3680 LLVMValueRef out_val = LLVMBuildLoad(ctx->builder,
3681 out_ptr[j], "");
3682 LLVMValueRef voffset = LLVMConstInt(ctx->i32, (slot * 4 + j) * ctx->gs_max_out_vertices, false);
3683 voffset = LLVMBuildAdd(ctx->builder, voffset, gs_next_vertex, "");
3684 voffset = LLVMBuildMul(ctx->builder, voffset, LLVMConstInt(ctx->i32, 4, false), "");
3685
3686 out_val = LLVMBuildBitCast(ctx->builder, out_val, ctx->i32, "");
3687
3688 ac_build_buffer_store_dword(&ctx->ac, ctx->gsvs_ring,
3689 out_val, 1,
3690 voffset, ctx->gs2vs_offset, 0,
3691 1, 1, true, true);
3692 }
3693 idx += slot_inc;
3694 }
3695
3696 gs_next_vertex = LLVMBuildAdd(ctx->builder, gs_next_vertex,
3697 ctx->i32one, "");
3698 LLVMBuildStore(ctx->builder, gs_next_vertex, ctx->gs_next_vertex);
3699
3700 ac_build_sendmsg(&ctx->ac, AC_SENDMSG_GS_OP_EMIT | AC_SENDMSG_GS | (0 << 8), ctx->gs_wave_id);
3701 }
3702
3703 static void
3704 visit_end_primitive(struct nir_to_llvm_context *ctx,
3705 nir_intrinsic_instr *instr)
3706 {
3707 ac_build_sendmsg(&ctx->ac, AC_SENDMSG_GS_OP_CUT | AC_SENDMSG_GS | (0 << 8), ctx->gs_wave_id);
3708 }
3709
3710 static LLVMValueRef
3711 visit_load_tess_coord(struct nir_to_llvm_context *ctx,
3712 nir_intrinsic_instr *instr)
3713 {
3714 LLVMValueRef coord[4] = {
3715 ctx->tes_u,
3716 ctx->tes_v,
3717 ctx->f32zero,
3718 ctx->f32zero,
3719 };
3720
3721 if (ctx->tes_primitive_mode == GL_TRIANGLES)
3722 coord[2] = LLVMBuildFSub(ctx->builder, ctx->f32one,
3723 LLVMBuildFAdd(ctx->builder, coord[0], coord[1], ""), "");
3724
3725 LLVMValueRef result = ac_build_gather_values(&ctx->ac, coord, instr->num_components);
3726 return LLVMBuildBitCast(ctx->builder, result,
3727 get_def_type(ctx, &instr->dest.ssa), "");
3728 }
3729
3730 static void visit_intrinsic(struct nir_to_llvm_context *ctx,
3731 nir_intrinsic_instr *instr)
3732 {
3733 LLVMValueRef result = NULL;
3734
3735 switch (instr->intrinsic) {
3736 case nir_intrinsic_load_work_group_id: {
3737 result = ctx->workgroup_ids;
3738 break;
3739 }
3740 case nir_intrinsic_load_base_vertex: {
3741 result = ctx->base_vertex;
3742 break;
3743 }
3744 case nir_intrinsic_load_vertex_id_zero_base: {
3745 result = ctx->vertex_id;
3746 break;
3747 }
3748 case nir_intrinsic_load_local_invocation_id: {
3749 result = ctx->local_invocation_ids;
3750 break;
3751 }
3752 case nir_intrinsic_load_base_instance:
3753 result = ctx->start_instance;
3754 break;
3755 case nir_intrinsic_load_draw_id:
3756 result = ctx->draw_index;
3757 break;
3758 case nir_intrinsic_load_invocation_id:
3759 if (ctx->stage == MESA_SHADER_TESS_CTRL)
3760 result = unpack_param(ctx, ctx->tcs_rel_ids, 8, 5);
3761 else
3762 result = ctx->gs_invocation_id;
3763 break;
3764 case nir_intrinsic_load_primitive_id:
3765 if (ctx->stage == MESA_SHADER_GEOMETRY)
3766 result = ctx->gs_prim_id;
3767 else if (ctx->stage == MESA_SHADER_TESS_CTRL)
3768 result = ctx->tcs_patch_id;
3769 else if (ctx->stage == MESA_SHADER_TESS_EVAL)
3770 result = ctx->tes_patch_id;
3771 else
3772 fprintf(stderr, "Unknown primitive id intrinsic: %d", ctx->stage);
3773 break;
3774 case nir_intrinsic_load_sample_id:
3775 ctx->shader_info->fs.force_persample = true;
3776 result = unpack_param(ctx, ctx->ancillary, 8, 4);
3777 break;
3778 case nir_intrinsic_load_sample_pos:
3779 ctx->shader_info->fs.force_persample = true;
3780 result = load_sample_pos(ctx);
3781 break;
3782 case nir_intrinsic_load_sample_mask_in:
3783 result = ctx->sample_coverage;
3784 break;
3785 case nir_intrinsic_load_front_face:
3786 result = ctx->front_face;
3787 break;
3788 case nir_intrinsic_load_instance_id:
3789 result = ctx->instance_id;
3790 ctx->shader_info->vs.vgpr_comp_cnt = MAX2(3,
3791 ctx->shader_info->vs.vgpr_comp_cnt);
3792 break;
3793 case nir_intrinsic_load_num_work_groups:
3794 result = ctx->num_work_groups;
3795 break;
3796 case nir_intrinsic_load_local_invocation_index:
3797 result = visit_load_local_invocation_index(ctx);
3798 break;
3799 case nir_intrinsic_load_push_constant:
3800 result = visit_load_push_constant(ctx, instr);
3801 break;
3802 case nir_intrinsic_vulkan_resource_index:
3803 result = visit_vulkan_resource_index(ctx, instr);
3804 break;
3805 case nir_intrinsic_store_ssbo:
3806 visit_store_ssbo(ctx, instr);
3807 break;
3808 case nir_intrinsic_load_ssbo:
3809 result = visit_load_buffer(ctx, instr);
3810 break;
3811 case nir_intrinsic_ssbo_atomic_add:
3812 case nir_intrinsic_ssbo_atomic_imin:
3813 case nir_intrinsic_ssbo_atomic_umin:
3814 case nir_intrinsic_ssbo_atomic_imax:
3815 case nir_intrinsic_ssbo_atomic_umax:
3816 case nir_intrinsic_ssbo_atomic_and:
3817 case nir_intrinsic_ssbo_atomic_or:
3818 case nir_intrinsic_ssbo_atomic_xor:
3819 case nir_intrinsic_ssbo_atomic_exchange:
3820 case nir_intrinsic_ssbo_atomic_comp_swap:
3821 result = visit_atomic_ssbo(ctx, instr);
3822 break;
3823 case nir_intrinsic_load_ubo:
3824 result = visit_load_ubo_buffer(ctx, instr);
3825 break;
3826 case nir_intrinsic_get_buffer_size:
3827 result = visit_get_buffer_size(ctx, instr);
3828 break;
3829 case nir_intrinsic_load_var:
3830 result = visit_load_var(ctx, instr);
3831 break;
3832 case nir_intrinsic_store_var:
3833 visit_store_var(ctx, instr);
3834 break;
3835 case nir_intrinsic_image_load:
3836 result = visit_image_load(ctx, instr);
3837 break;
3838 case nir_intrinsic_image_store:
3839 visit_image_store(ctx, instr);
3840 break;
3841 case nir_intrinsic_image_atomic_add:
3842 case nir_intrinsic_image_atomic_min:
3843 case nir_intrinsic_image_atomic_max:
3844 case nir_intrinsic_image_atomic_and:
3845 case nir_intrinsic_image_atomic_or:
3846 case nir_intrinsic_image_atomic_xor:
3847 case nir_intrinsic_image_atomic_exchange:
3848 case nir_intrinsic_image_atomic_comp_swap:
3849 result = visit_image_atomic(ctx, instr);
3850 break;
3851 case nir_intrinsic_image_size:
3852 result = visit_image_size(ctx, instr);
3853 break;
3854 case nir_intrinsic_discard:
3855 ctx->shader_info->fs.can_discard = true;
3856 ac_build_intrinsic(&ctx->ac, "llvm.AMDGPU.kilp",
3857 ctx->voidt,
3858 NULL, 0, AC_FUNC_ATTR_LEGACY);
3859 break;
3860 case nir_intrinsic_discard_if:
3861 emit_discard_if(ctx, instr);
3862 break;
3863 case nir_intrinsic_memory_barrier:
3864 emit_waitcnt(ctx, VM_CNT);
3865 break;
3866 case nir_intrinsic_barrier:
3867 emit_barrier(ctx);
3868 break;
3869 case nir_intrinsic_var_atomic_add:
3870 case nir_intrinsic_var_atomic_imin:
3871 case nir_intrinsic_var_atomic_umin:
3872 case nir_intrinsic_var_atomic_imax:
3873 case nir_intrinsic_var_atomic_umax:
3874 case nir_intrinsic_var_atomic_and:
3875 case nir_intrinsic_var_atomic_or:
3876 case nir_intrinsic_var_atomic_xor:
3877 case nir_intrinsic_var_atomic_exchange:
3878 case nir_intrinsic_var_atomic_comp_swap:
3879 result = visit_var_atomic(ctx, instr);
3880 break;
3881 case nir_intrinsic_interp_var_at_centroid:
3882 case nir_intrinsic_interp_var_at_sample:
3883 case nir_intrinsic_interp_var_at_offset:
3884 result = visit_interp(ctx, instr);
3885 break;
3886 case nir_intrinsic_emit_vertex:
3887 visit_emit_vertex(ctx, instr);
3888 break;
3889 case nir_intrinsic_end_primitive:
3890 visit_end_primitive(ctx, instr);
3891 break;
3892 case nir_intrinsic_load_tess_coord:
3893 result = visit_load_tess_coord(ctx, instr);
3894 break;
3895 case nir_intrinsic_load_patch_vertices_in:
3896 result = LLVMConstInt(ctx->i32, ctx->options->key.tcs.input_vertices, false);
3897 break;
3898 default:
3899 fprintf(stderr, "Unknown intrinsic: ");
3900 nir_print_instr(&instr->instr, stderr);
3901 fprintf(stderr, "\n");
3902 break;
3903 }
3904 if (result) {
3905 _mesa_hash_table_insert(ctx->defs, &instr->dest.ssa, result);
3906 }
3907 }
3908
3909 static LLVMValueRef get_sampler_desc(struct nir_to_llvm_context *ctx,
3910 nir_deref_var *deref,
3911 enum desc_type desc_type)
3912 {
3913 unsigned desc_set = deref->var->data.descriptor_set;
3914 LLVMValueRef list = ctx->descriptor_sets[desc_set];
3915 struct radv_descriptor_set_layout *layout = ctx->options->layout->set[desc_set].layout;
3916 struct radv_descriptor_set_binding_layout *binding = layout->binding + deref->var->data.binding;
3917 unsigned offset = binding->offset;
3918 unsigned stride = binding->size;
3919 unsigned type_size;
3920 LLVMBuilderRef builder = ctx->builder;
3921 LLVMTypeRef type;
3922 LLVMValueRef index = NULL;
3923 unsigned constant_index = 0;
3924
3925 assert(deref->var->data.binding < layout->binding_count);
3926
3927 switch (desc_type) {
3928 case DESC_IMAGE:
3929 type = ctx->v8i32;
3930 type_size = 32;
3931 break;
3932 case DESC_FMASK:
3933 type = ctx->v8i32;
3934 offset += 32;
3935 type_size = 32;
3936 break;
3937 case DESC_SAMPLER:
3938 type = ctx->v4i32;
3939 if (binding->type == VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER)
3940 offset += 64;
3941
3942 type_size = 16;
3943 break;
3944 case DESC_BUFFER:
3945 type = ctx->v4i32;
3946 type_size = 16;
3947 break;
3948 default:
3949 unreachable("invalid desc_type\n");
3950 }
3951
3952 if (deref->deref.child) {
3953 nir_deref_array *child = (nir_deref_array*)deref->deref.child;
3954
3955 assert(child->deref_array_type != nir_deref_array_type_wildcard);
3956 offset += child->base_offset * stride;
3957 if (child->deref_array_type == nir_deref_array_type_indirect) {
3958 index = get_src(ctx, child->indirect);
3959 }
3960
3961 constant_index = child->base_offset;
3962 }
3963 if (desc_type == DESC_SAMPLER && binding->immutable_samplers &&
3964 (!index || binding->immutable_samplers_equal)) {
3965 if (binding->immutable_samplers_equal)
3966 constant_index = 0;
3967
3968 LLVMValueRef constants[] = {
3969 LLVMConstInt(ctx->i32, binding->immutable_samplers[constant_index * 4 + 0], 0),
3970 LLVMConstInt(ctx->i32, binding->immutable_samplers[constant_index * 4 + 1], 0),
3971 LLVMConstInt(ctx->i32, binding->immutable_samplers[constant_index * 4 + 2], 0),
3972 LLVMConstInt(ctx->i32, binding->immutable_samplers[constant_index * 4 + 3], 0),
3973 };
3974 return ac_build_gather_values(&ctx->ac, constants, 4);
3975 }
3976
3977 assert(stride % type_size == 0);
3978
3979 if (!index)
3980 index = ctx->i32zero;
3981
3982 index = LLVMBuildMul(builder, index, LLVMConstInt(ctx->i32, stride / type_size, 0), "");
3983
3984 list = ac_build_gep0(&ctx->ac, list, LLVMConstInt(ctx->i32, offset, 0));
3985 list = LLVMBuildPointerCast(builder, list, const_array(type, 0), "");
3986
3987 return ac_build_indexed_load_const(&ctx->ac, list, index);
3988 }
3989
3990 static void set_tex_fetch_args(struct nir_to_llvm_context *ctx,
3991 struct ac_image_args *args,
3992 nir_tex_instr *instr,
3993 nir_texop op,
3994 LLVMValueRef res_ptr, LLVMValueRef samp_ptr,
3995 LLVMValueRef *param, unsigned count,
3996 unsigned dmask)
3997 {
3998 unsigned is_rect = 0;
3999 bool da = instr->is_array || instr->sampler_dim == GLSL_SAMPLER_DIM_CUBE;
4000
4001 if (op == nir_texop_lod)
4002 da = false;
4003 /* Pad to power of two vector */
4004 while (count < util_next_power_of_two(count))
4005 param[count++] = LLVMGetUndef(ctx->i32);
4006
4007 if (count > 1)
4008 args->addr = ac_build_gather_values(&ctx->ac, param, count);
4009 else
4010 args->addr = param[0];
4011
4012 args->resource = res_ptr;
4013 args->sampler = samp_ptr;
4014
4015 if (instr->sampler_dim == GLSL_SAMPLER_DIM_BUF && op == nir_texop_txf) {
4016 args->addr = param[0];
4017 return;
4018 }
4019
4020 args->dmask = dmask;
4021 args->unorm = is_rect;
4022 args->da = da;
4023 }
4024
4025 /* Disable anisotropic filtering if BASE_LEVEL == LAST_LEVEL.
4026 *
4027 * SI-CI:
4028 * If BASE_LEVEL == LAST_LEVEL, the shader must disable anisotropic
4029 * filtering manually. The driver sets img7 to a mask clearing
4030 * MAX_ANISO_RATIO if BASE_LEVEL == LAST_LEVEL. The shader must do:
4031 * s_and_b32 samp0, samp0, img7
4032 *
4033 * VI:
4034 * The ANISO_OVERRIDE sampler field enables this fix in TA.
4035 */
4036 static LLVMValueRef sici_fix_sampler_aniso(struct nir_to_llvm_context *ctx,
4037 LLVMValueRef res, LLVMValueRef samp)
4038 {
4039 LLVMBuilderRef builder = ctx->builder;
4040 LLVMValueRef img7, samp0;
4041
4042 if (ctx->options->chip_class >= VI)
4043 return samp;
4044
4045 img7 = LLVMBuildExtractElement(builder, res,
4046 LLVMConstInt(ctx->i32, 7, 0), "");
4047 samp0 = LLVMBuildExtractElement(builder, samp,
4048 LLVMConstInt(ctx->i32, 0, 0), "");
4049 samp0 = LLVMBuildAnd(builder, samp0, img7, "");
4050 return LLVMBuildInsertElement(builder, samp, samp0,
4051 LLVMConstInt(ctx->i32, 0, 0), "");
4052 }
4053
4054 static void tex_fetch_ptrs(struct nir_to_llvm_context *ctx,
4055 nir_tex_instr *instr,
4056 LLVMValueRef *res_ptr, LLVMValueRef *samp_ptr,
4057 LLVMValueRef *fmask_ptr)
4058 {
4059 if (instr->sampler_dim == GLSL_SAMPLER_DIM_BUF)
4060 *res_ptr = get_sampler_desc(ctx, instr->texture, DESC_BUFFER);
4061 else
4062 *res_ptr = get_sampler_desc(ctx, instr->texture, DESC_IMAGE);
4063 if (samp_ptr) {
4064 if (instr->sampler)
4065 *samp_ptr = get_sampler_desc(ctx, instr->sampler, DESC_SAMPLER);
4066 else
4067 *samp_ptr = get_sampler_desc(ctx, instr->texture, DESC_SAMPLER);
4068 if (instr->sampler_dim < GLSL_SAMPLER_DIM_RECT)
4069 *samp_ptr = sici_fix_sampler_aniso(ctx, *res_ptr, *samp_ptr);
4070 }
4071 if (fmask_ptr && !instr->sampler && (instr->op == nir_texop_txf_ms ||
4072 instr->op == nir_texop_samples_identical))
4073 *fmask_ptr = get_sampler_desc(ctx, instr->texture, DESC_FMASK);
4074 }
4075
4076 static LLVMValueRef apply_round_slice(struct nir_to_llvm_context *ctx,
4077 LLVMValueRef coord)
4078 {
4079 coord = to_float(ctx, coord);
4080 coord = ac_build_intrinsic(&ctx->ac, "llvm.rint.f32", ctx->f32, &coord, 1, 0);
4081 coord = to_integer(ctx, coord);
4082 return coord;
4083 }
4084
4085 static void visit_tex(struct nir_to_llvm_context *ctx, nir_tex_instr *instr)
4086 {
4087 LLVMValueRef result = NULL;
4088 struct ac_image_args args = { 0 };
4089 unsigned dmask = 0xf;
4090 LLVMValueRef address[16];
4091 LLVMValueRef coords[5];
4092 LLVMValueRef coord = NULL, lod = NULL, comparator = NULL;
4093 LLVMValueRef bias = NULL, offsets = NULL;
4094 LLVMValueRef res_ptr, samp_ptr, fmask_ptr = NULL, sample_index = NULL;
4095 LLVMValueRef ddx = NULL, ddy = NULL;
4096 LLVMValueRef derivs[6];
4097 unsigned chan, count = 0;
4098 unsigned const_src = 0, num_deriv_comp = 0;
4099
4100 tex_fetch_ptrs(ctx, instr, &res_ptr, &samp_ptr, &fmask_ptr);
4101
4102 for (unsigned i = 0; i < instr->num_srcs; i++) {
4103 switch (instr->src[i].src_type) {
4104 case nir_tex_src_coord:
4105 coord = get_src(ctx, instr->src[i].src);
4106 break;
4107 case nir_tex_src_projector:
4108 break;
4109 case nir_tex_src_comparator:
4110 comparator = get_src(ctx, instr->src[i].src);
4111 break;
4112 case nir_tex_src_offset:
4113 offsets = get_src(ctx, instr->src[i].src);
4114 const_src = i;
4115 break;
4116 case nir_tex_src_bias:
4117 bias = get_src(ctx, instr->src[i].src);
4118 break;
4119 case nir_tex_src_lod:
4120 lod = get_src(ctx, instr->src[i].src);
4121 break;
4122 case nir_tex_src_ms_index:
4123 sample_index = get_src(ctx, instr->src[i].src);
4124 break;
4125 case nir_tex_src_ms_mcs:
4126 break;
4127 case nir_tex_src_ddx:
4128 ddx = get_src(ctx, instr->src[i].src);
4129 num_deriv_comp = instr->src[i].src.ssa->num_components;
4130 break;
4131 case nir_tex_src_ddy:
4132 ddy = get_src(ctx, instr->src[i].src);
4133 break;
4134 case nir_tex_src_texture_offset:
4135 case nir_tex_src_sampler_offset:
4136 case nir_tex_src_plane:
4137 default:
4138 break;
4139 }
4140 }
4141
4142 if (instr->op == nir_texop_txs && instr->sampler_dim == GLSL_SAMPLER_DIM_BUF) {
4143 result = get_buffer_size(ctx, res_ptr, true);
4144 goto write_result;
4145 }
4146
4147 if (instr->op == nir_texop_texture_samples) {
4148 LLVMValueRef res, samples, is_msaa;
4149 res = LLVMBuildBitCast(ctx->builder, res_ptr, ctx->v8i32, "");
4150 samples = LLVMBuildExtractElement(ctx->builder, res,
4151 LLVMConstInt(ctx->i32, 3, false), "");
4152 is_msaa = LLVMBuildLShr(ctx->builder, samples,
4153 LLVMConstInt(ctx->i32, 28, false), "");
4154 is_msaa = LLVMBuildAnd(ctx->builder, is_msaa,
4155 LLVMConstInt(ctx->i32, 0xe, false), "");
4156 is_msaa = LLVMBuildICmp(ctx->builder, LLVMIntEQ, is_msaa,
4157 LLVMConstInt(ctx->i32, 0xe, false), "");
4158
4159 samples = LLVMBuildLShr(ctx->builder, samples,
4160 LLVMConstInt(ctx->i32, 16, false), "");
4161 samples = LLVMBuildAnd(ctx->builder, samples,
4162 LLVMConstInt(ctx->i32, 0xf, false), "");
4163 samples = LLVMBuildShl(ctx->builder, ctx->i32one,
4164 samples, "");
4165 samples = LLVMBuildSelect(ctx->builder, is_msaa, samples,
4166 ctx->i32one, "");
4167 result = samples;
4168 goto write_result;
4169 }
4170
4171 if (coord)
4172 for (chan = 0; chan < instr->coord_components; chan++)
4173 coords[chan] = llvm_extract_elem(ctx, coord, chan);
4174
4175 if (offsets && instr->op != nir_texop_txf) {
4176 LLVMValueRef offset[3], pack;
4177 for (chan = 0; chan < 3; ++chan)
4178 offset[chan] = ctx->i32zero;
4179
4180 args.offset = true;
4181 for (chan = 0; chan < get_llvm_num_components(offsets); chan++) {
4182 offset[chan] = llvm_extract_elem(ctx, offsets, chan);
4183 offset[chan] = LLVMBuildAnd(ctx->builder, offset[chan],
4184 LLVMConstInt(ctx->i32, 0x3f, false), "");
4185 if (chan)
4186 offset[chan] = LLVMBuildShl(ctx->builder, offset[chan],
4187 LLVMConstInt(ctx->i32, chan * 8, false), "");
4188 }
4189 pack = LLVMBuildOr(ctx->builder, offset[0], offset[1], "");
4190 pack = LLVMBuildOr(ctx->builder, pack, offset[2], "");
4191 address[count++] = pack;
4192
4193 }
4194 /* pack LOD bias value */
4195 if (instr->op == nir_texop_txb && bias) {
4196 address[count++] = bias;
4197 }
4198
4199 /* Pack depth comparison value */
4200 if (instr->is_shadow && comparator) {
4201 address[count++] = llvm_extract_elem(ctx, comparator, 0);
4202 }
4203
4204 /* pack derivatives */
4205 if (ddx || ddy) {
4206 switch (instr->sampler_dim) {
4207 case GLSL_SAMPLER_DIM_3D:
4208 case GLSL_SAMPLER_DIM_CUBE:
4209 num_deriv_comp = 3;
4210 break;
4211 case GLSL_SAMPLER_DIM_2D:
4212 default:
4213 num_deriv_comp = 2;
4214 break;
4215 case GLSL_SAMPLER_DIM_1D:
4216 num_deriv_comp = 1;
4217 break;
4218 }
4219
4220 for (unsigned i = 0; i < num_deriv_comp; i++) {
4221 derivs[i * 2] = to_float(ctx, llvm_extract_elem(ctx, ddx, i));
4222 derivs[i * 2 + 1] = to_float(ctx, llvm_extract_elem(ctx, ddy, i));
4223 }
4224 }
4225
4226 if (instr->sampler_dim == GLSL_SAMPLER_DIM_CUBE && coord) {
4227 for (chan = 0; chan < instr->coord_components; chan++)
4228 coords[chan] = to_float(ctx, coords[chan]);
4229 if (instr->coord_components == 3)
4230 coords[3] = LLVMGetUndef(ctx->f32);
4231 ac_prepare_cube_coords(&ctx->ac,
4232 instr->op == nir_texop_txd, instr->is_array,
4233 coords, derivs);
4234 if (num_deriv_comp)
4235 num_deriv_comp--;
4236 }
4237
4238 if (ddx || ddy) {
4239 for (unsigned i = 0; i < num_deriv_comp * 2; i++)
4240 address[count++] = derivs[i];
4241 }
4242
4243 /* Pack texture coordinates */
4244 if (coord) {
4245 address[count++] = coords[0];
4246 if (instr->coord_components > 1) {
4247 if (instr->sampler_dim == GLSL_SAMPLER_DIM_1D && instr->is_array && instr->op != nir_texop_txf) {
4248 coords[1] = apply_round_slice(ctx, coords[1]);
4249 }
4250 address[count++] = coords[1];
4251 }
4252 if (instr->coord_components > 2) {
4253 /* This seems like a bit of a hack - but it passes Vulkan CTS with it */
4254 if (instr->sampler_dim != GLSL_SAMPLER_DIM_3D && instr->op != nir_texop_txf) {
4255 coords[2] = apply_round_slice(ctx, coords[2]);
4256 }
4257 address[count++] = coords[2];
4258 }
4259 }
4260
4261 /* Pack LOD */
4262 if ((instr->op == nir_texop_txl || instr->op == nir_texop_txf) && lod) {
4263 address[count++] = lod;
4264 } else if (instr->op == nir_texop_txf_ms && sample_index) {
4265 address[count++] = sample_index;
4266 } else if(instr->op == nir_texop_txs) {
4267 count = 0;
4268 if (lod)
4269 address[count++] = lod;
4270 else
4271 address[count++] = ctx->i32zero;
4272 }
4273
4274 for (chan = 0; chan < count; chan++) {
4275 address[chan] = LLVMBuildBitCast(ctx->builder,
4276 address[chan], ctx->i32, "");
4277 }
4278
4279 if (instr->op == nir_texop_samples_identical) {
4280 LLVMValueRef txf_address[4];
4281 struct ac_image_args txf_args = { 0 };
4282 unsigned txf_count = count;
4283 memcpy(txf_address, address, sizeof(txf_address));
4284
4285 if (!instr->is_array)
4286 txf_address[2] = ctx->i32zero;
4287 txf_address[3] = ctx->i32zero;
4288
4289 set_tex_fetch_args(ctx, &txf_args, instr, nir_texop_txf,
4290 fmask_ptr, NULL,
4291 txf_address, txf_count, 0xf);
4292
4293 result = build_tex_intrinsic(ctx, instr, &txf_args);
4294
4295 result = LLVMBuildExtractElement(ctx->builder, result, ctx->i32zero, "");
4296 result = emit_int_cmp(ctx, LLVMIntEQ, result, ctx->i32zero);
4297 goto write_result;
4298 }
4299
4300 if (instr->sampler_dim == GLSL_SAMPLER_DIM_MS &&
4301 instr->op != nir_texop_txs) {
4302 unsigned sample_chan = instr->is_array ? 3 : 2;
4303 address[sample_chan] = adjust_sample_index_using_fmask(ctx,
4304 address[0],
4305 address[1],
4306 instr->is_array ? address[2] : NULL,
4307 address[sample_chan],
4308 fmask_ptr);
4309 }
4310
4311 if (offsets && instr->op == nir_texop_txf) {
4312 nir_const_value *const_offset =
4313 nir_src_as_const_value(instr->src[const_src].src);
4314 int num_offsets = instr->src[const_src].src.ssa->num_components;
4315 assert(const_offset);
4316 num_offsets = MIN2(num_offsets, instr->coord_components);
4317 if (num_offsets > 2)
4318 address[2] = LLVMBuildAdd(ctx->builder,
4319 address[2], LLVMConstInt(ctx->i32, const_offset->i32[2], false), "");
4320 if (num_offsets > 1)
4321 address[1] = LLVMBuildAdd(ctx->builder,
4322 address[1], LLVMConstInt(ctx->i32, const_offset->i32[1], false), "");
4323 address[0] = LLVMBuildAdd(ctx->builder,
4324 address[0], LLVMConstInt(ctx->i32, const_offset->i32[0], false), "");
4325
4326 }
4327
4328 /* TODO TG4 support */
4329 if (instr->op == nir_texop_tg4) {
4330 if (instr->is_shadow)
4331 dmask = 1;
4332 else
4333 dmask = 1 << instr->component;
4334 }
4335 set_tex_fetch_args(ctx, &args, instr, instr->op,
4336 res_ptr, samp_ptr, address, count, dmask);
4337
4338 result = build_tex_intrinsic(ctx, instr, &args);
4339
4340 if (instr->op == nir_texop_query_levels)
4341 result = LLVMBuildExtractElement(ctx->builder, result, LLVMConstInt(ctx->i32, 3, false), "");
4342 else if (instr->is_shadow && instr->op != nir_texop_txs && instr->op != nir_texop_lod && instr->op != nir_texop_tg4)
4343 result = LLVMBuildExtractElement(ctx->builder, result, ctx->i32zero, "");
4344 else if (instr->op == nir_texop_txs &&
4345 instr->sampler_dim == GLSL_SAMPLER_DIM_CUBE &&
4346 instr->is_array) {
4347 LLVMValueRef two = LLVMConstInt(ctx->i32, 2, false);
4348 LLVMValueRef six = LLVMConstInt(ctx->i32, 6, false);
4349 LLVMValueRef z = LLVMBuildExtractElement(ctx->builder, result, two, "");
4350 z = LLVMBuildSDiv(ctx->builder, z, six, "");
4351 result = LLVMBuildInsertElement(ctx->builder, result, z, two, "");
4352 } else if (instr->dest.ssa.num_components != 4)
4353 result = trim_vector(ctx, result, instr->dest.ssa.num_components);
4354
4355 write_result:
4356 if (result) {
4357 assert(instr->dest.is_ssa);
4358 result = to_integer(ctx, result);
4359 _mesa_hash_table_insert(ctx->defs, &instr->dest.ssa, result);
4360 }
4361 }
4362
4363
4364 static void visit_phi(struct nir_to_llvm_context *ctx, nir_phi_instr *instr)
4365 {
4366 LLVMTypeRef type = get_def_type(ctx, &instr->dest.ssa);
4367 LLVMValueRef result = LLVMBuildPhi(ctx->builder, type, "");
4368
4369 _mesa_hash_table_insert(ctx->defs, &instr->dest.ssa, result);
4370 _mesa_hash_table_insert(ctx->phis, instr, result);
4371 }
4372
4373 static void visit_post_phi(struct nir_to_llvm_context *ctx,
4374 nir_phi_instr *instr,
4375 LLVMValueRef llvm_phi)
4376 {
4377 nir_foreach_phi_src(src, instr) {
4378 LLVMBasicBlockRef block = get_block(ctx, src->pred);
4379 LLVMValueRef llvm_src = get_src(ctx, src->src);
4380
4381 LLVMAddIncoming(llvm_phi, &llvm_src, &block, 1);
4382 }
4383 }
4384
4385 static void phi_post_pass(struct nir_to_llvm_context *ctx)
4386 {
4387 struct hash_entry *entry;
4388 hash_table_foreach(ctx->phis, entry) {
4389 visit_post_phi(ctx, (nir_phi_instr*)entry->key,
4390 (LLVMValueRef)entry->data);
4391 }
4392 }
4393
4394
4395 static void visit_ssa_undef(struct nir_to_llvm_context *ctx,
4396 nir_ssa_undef_instr *instr)
4397 {
4398 unsigned num_components = instr->def.num_components;
4399 LLVMValueRef undef;
4400
4401 if (num_components == 1)
4402 undef = LLVMGetUndef(ctx->i32);
4403 else {
4404 undef = LLVMGetUndef(LLVMVectorType(ctx->i32, num_components));
4405 }
4406 _mesa_hash_table_insert(ctx->defs, &instr->def, undef);
4407 }
4408
4409 static void visit_jump(struct nir_to_llvm_context *ctx,
4410 nir_jump_instr *instr)
4411 {
4412 switch (instr->type) {
4413 case nir_jump_break:
4414 LLVMBuildBr(ctx->builder, ctx->break_block);
4415 LLVMClearInsertionPosition(ctx->builder);
4416 break;
4417 case nir_jump_continue:
4418 LLVMBuildBr(ctx->builder, ctx->continue_block);
4419 LLVMClearInsertionPosition(ctx->builder);
4420 break;
4421 default:
4422 fprintf(stderr, "Unknown NIR jump instr: ");
4423 nir_print_instr(&instr->instr, stderr);
4424 fprintf(stderr, "\n");
4425 abort();
4426 }
4427 }
4428
4429 static void visit_cf_list(struct nir_to_llvm_context *ctx,
4430 struct exec_list *list);
4431
4432 static void visit_block(struct nir_to_llvm_context *ctx, nir_block *block)
4433 {
4434 LLVMBasicBlockRef llvm_block = LLVMGetInsertBlock(ctx->builder);
4435 nir_foreach_instr(instr, block)
4436 {
4437 switch (instr->type) {
4438 case nir_instr_type_alu:
4439 visit_alu(ctx, nir_instr_as_alu(instr));
4440 break;
4441 case nir_instr_type_load_const:
4442 visit_load_const(ctx, nir_instr_as_load_const(instr));
4443 break;
4444 case nir_instr_type_intrinsic:
4445 visit_intrinsic(ctx, nir_instr_as_intrinsic(instr));
4446 break;
4447 case nir_instr_type_tex:
4448 visit_tex(ctx, nir_instr_as_tex(instr));
4449 break;
4450 case nir_instr_type_phi:
4451 visit_phi(ctx, nir_instr_as_phi(instr));
4452 break;
4453 case nir_instr_type_ssa_undef:
4454 visit_ssa_undef(ctx, nir_instr_as_ssa_undef(instr));
4455 break;
4456 case nir_instr_type_jump:
4457 visit_jump(ctx, nir_instr_as_jump(instr));
4458 break;
4459 default:
4460 fprintf(stderr, "Unknown NIR instr type: ");
4461 nir_print_instr(instr, stderr);
4462 fprintf(stderr, "\n");
4463 abort();
4464 }
4465 }
4466
4467 _mesa_hash_table_insert(ctx->defs, block, llvm_block);
4468 }
4469
4470 static void visit_if(struct nir_to_llvm_context *ctx, nir_if *if_stmt)
4471 {
4472 LLVMValueRef value = get_src(ctx, if_stmt->condition);
4473
4474 LLVMBasicBlockRef merge_block =
4475 LLVMAppendBasicBlockInContext(ctx->context, ctx->main_function, "");
4476 LLVMBasicBlockRef if_block =
4477 LLVMAppendBasicBlockInContext(ctx->context, ctx->main_function, "");
4478 LLVMBasicBlockRef else_block = merge_block;
4479 if (!exec_list_is_empty(&if_stmt->else_list))
4480 else_block = LLVMAppendBasicBlockInContext(
4481 ctx->context, ctx->main_function, "");
4482
4483 LLVMValueRef cond = LLVMBuildICmp(ctx->builder, LLVMIntNE, value,
4484 LLVMConstInt(ctx->i32, 0, false), "");
4485 LLVMBuildCondBr(ctx->builder, cond, if_block, else_block);
4486
4487 LLVMPositionBuilderAtEnd(ctx->builder, if_block);
4488 visit_cf_list(ctx, &if_stmt->then_list);
4489 if (LLVMGetInsertBlock(ctx->builder))
4490 LLVMBuildBr(ctx->builder, merge_block);
4491
4492 if (!exec_list_is_empty(&if_stmt->else_list)) {
4493 LLVMPositionBuilderAtEnd(ctx->builder, else_block);
4494 visit_cf_list(ctx, &if_stmt->else_list);
4495 if (LLVMGetInsertBlock(ctx->builder))
4496 LLVMBuildBr(ctx->builder, merge_block);
4497 }
4498
4499 LLVMPositionBuilderAtEnd(ctx->builder, merge_block);
4500 }
4501
4502 static void visit_loop(struct nir_to_llvm_context *ctx, nir_loop *loop)
4503 {
4504 LLVMBasicBlockRef continue_parent = ctx->continue_block;
4505 LLVMBasicBlockRef break_parent = ctx->break_block;
4506
4507 ctx->continue_block =
4508 LLVMAppendBasicBlockInContext(ctx->context, ctx->main_function, "");
4509 ctx->break_block =
4510 LLVMAppendBasicBlockInContext(ctx->context, ctx->main_function, "");
4511
4512 LLVMBuildBr(ctx->builder, ctx->continue_block);
4513 LLVMPositionBuilderAtEnd(ctx->builder, ctx->continue_block);
4514 visit_cf_list(ctx, &loop->body);
4515
4516 if (LLVMGetInsertBlock(ctx->builder))
4517 LLVMBuildBr(ctx->builder, ctx->continue_block);
4518 LLVMPositionBuilderAtEnd(ctx->builder, ctx->break_block);
4519
4520 ctx->continue_block = continue_parent;
4521 ctx->break_block = break_parent;
4522 }
4523
4524 static void visit_cf_list(struct nir_to_llvm_context *ctx,
4525 struct exec_list *list)
4526 {
4527 foreach_list_typed(nir_cf_node, node, node, list)
4528 {
4529 switch (node->type) {
4530 case nir_cf_node_block:
4531 visit_block(ctx, nir_cf_node_as_block(node));
4532 break;
4533
4534 case nir_cf_node_if:
4535 visit_if(ctx, nir_cf_node_as_if(node));
4536 break;
4537
4538 case nir_cf_node_loop:
4539 visit_loop(ctx, nir_cf_node_as_loop(node));
4540 break;
4541
4542 default:
4543 assert(0);
4544 }
4545 }
4546 }
4547
4548 static void
4549 handle_vs_input_decl(struct nir_to_llvm_context *ctx,
4550 struct nir_variable *variable)
4551 {
4552 LLVMValueRef t_list_ptr = ctx->vertex_buffers;
4553 LLVMValueRef t_offset;
4554 LLVMValueRef t_list;
4555 LLVMValueRef args[3];
4556 LLVMValueRef input;
4557 LLVMValueRef buffer_index;
4558 int index = variable->data.location - VERT_ATTRIB_GENERIC0;
4559 int idx = variable->data.location;
4560 unsigned attrib_count = glsl_count_attribute_slots(variable->type, true);
4561
4562 variable->data.driver_location = idx * 4;
4563
4564 if (ctx->options->key.vs.instance_rate_inputs & (1u << index)) {
4565 buffer_index = LLVMBuildAdd(ctx->builder, ctx->instance_id,
4566 ctx->start_instance, "");
4567 ctx->shader_info->vs.vgpr_comp_cnt = MAX2(3,
4568 ctx->shader_info->vs.vgpr_comp_cnt);
4569 } else
4570 buffer_index = LLVMBuildAdd(ctx->builder, ctx->vertex_id,
4571 ctx->base_vertex, "");
4572
4573 for (unsigned i = 0; i < attrib_count; ++i, ++idx) {
4574 t_offset = LLVMConstInt(ctx->i32, index + i, false);
4575
4576 t_list = ac_build_indexed_load_const(&ctx->ac, t_list_ptr, t_offset);
4577 args[0] = t_list;
4578 args[1] = LLVMConstInt(ctx->i32, 0, false);
4579 args[2] = buffer_index;
4580 input = ac_build_intrinsic(&ctx->ac,
4581 "llvm.SI.vs.load.input", ctx->v4f32, args, 3,
4582 AC_FUNC_ATTR_READNONE | AC_FUNC_ATTR_NOUNWIND |
4583 AC_FUNC_ATTR_LEGACY);
4584
4585 for (unsigned chan = 0; chan < 4; chan++) {
4586 LLVMValueRef llvm_chan = LLVMConstInt(ctx->i32, chan, false);
4587 ctx->inputs[radeon_llvm_reg_index_soa(idx, chan)] =
4588 to_integer(ctx, LLVMBuildExtractElement(ctx->builder,
4589 input, llvm_chan, ""));
4590 }
4591 }
4592 }
4593
4594 static void interp_fs_input(struct nir_to_llvm_context *ctx,
4595 unsigned attr,
4596 LLVMValueRef interp_param,
4597 LLVMValueRef prim_mask,
4598 LLVMValueRef result[4])
4599 {
4600 LLVMValueRef attr_number;
4601 unsigned chan;
4602 LLVMValueRef i, j;
4603 bool interp = interp_param != NULL;
4604
4605 attr_number = LLVMConstInt(ctx->i32, attr, false);
4606
4607 /* fs.constant returns the param from the middle vertex, so it's not
4608 * really useful for flat shading. It's meant to be used for custom
4609 * interpolation (but the intrinsic can't fetch from the other two
4610 * vertices).
4611 *
4612 * Luckily, it doesn't matter, because we rely on the FLAT_SHADE state
4613 * to do the right thing. The only reason we use fs.constant is that
4614 * fs.interp cannot be used on integers, because they can be equal
4615 * to NaN.
4616 */
4617 if (interp) {
4618 interp_param = LLVMBuildBitCast(ctx->builder, interp_param,
4619 LLVMVectorType(ctx->f32, 2), "");
4620
4621 i = LLVMBuildExtractElement(ctx->builder, interp_param,
4622 ctx->i32zero, "");
4623 j = LLVMBuildExtractElement(ctx->builder, interp_param,
4624 ctx->i32one, "");
4625 }
4626
4627 for (chan = 0; chan < 4; chan++) {
4628 LLVMValueRef llvm_chan = LLVMConstInt(ctx->i32, chan, false);
4629
4630 if (interp) {
4631 result[chan] = ac_build_fs_interp(&ctx->ac,
4632 llvm_chan,
4633 attr_number,
4634 prim_mask, i, j);
4635 } else {
4636 result[chan] = ac_build_fs_interp_mov(&ctx->ac,
4637 LLVMConstInt(ctx->i32, 2, false),
4638 llvm_chan,
4639 attr_number,
4640 prim_mask);
4641 }
4642 }
4643 }
4644
4645 static void
4646 handle_fs_input_decl(struct nir_to_llvm_context *ctx,
4647 struct nir_variable *variable)
4648 {
4649 int idx = variable->data.location;
4650 unsigned attrib_count = glsl_count_attribute_slots(variable->type, false);
4651 LLVMValueRef interp;
4652
4653 variable->data.driver_location = idx * 4;
4654 ctx->input_mask |= ((1ull << attrib_count) - 1) << variable->data.location;
4655
4656 if (glsl_get_base_type(glsl_without_array(variable->type)) == GLSL_TYPE_FLOAT) {
4657 unsigned interp_type;
4658 if (variable->data.sample) {
4659 interp_type = INTERP_SAMPLE;
4660 ctx->shader_info->fs.force_persample = true;
4661 } else if (variable->data.centroid)
4662 interp_type = INTERP_CENTROID;
4663 else
4664 interp_type = INTERP_CENTER;
4665
4666 interp = lookup_interp_param(ctx, variable->data.interpolation, interp_type);
4667 } else
4668 interp = NULL;
4669
4670 for (unsigned i = 0; i < attrib_count; ++i)
4671 ctx->inputs[radeon_llvm_reg_index_soa(idx + i, 0)] = interp;
4672
4673 }
4674
4675 static void
4676 handle_shader_input_decl(struct nir_to_llvm_context *ctx,
4677 struct nir_variable *variable)
4678 {
4679 switch (ctx->stage) {
4680 case MESA_SHADER_VERTEX:
4681 handle_vs_input_decl(ctx, variable);
4682 break;
4683 case MESA_SHADER_FRAGMENT:
4684 handle_fs_input_decl(ctx, variable);
4685 break;
4686 default:
4687 break;
4688 }
4689
4690 }
4691
4692 static void
4693 handle_fs_inputs_pre(struct nir_to_llvm_context *ctx,
4694 struct nir_shader *nir)
4695 {
4696 unsigned index = 0;
4697 for (unsigned i = 0; i < RADEON_LLVM_MAX_INPUTS; ++i) {
4698 LLVMValueRef interp_param;
4699 LLVMValueRef *inputs = ctx->inputs +radeon_llvm_reg_index_soa(i, 0);
4700
4701 if (!(ctx->input_mask & (1ull << i)))
4702 continue;
4703
4704 if (i >= VARYING_SLOT_VAR0 || i == VARYING_SLOT_PNTC ||
4705 i == VARYING_SLOT_PRIMITIVE_ID || i == VARYING_SLOT_LAYER) {
4706 interp_param = *inputs;
4707 interp_fs_input(ctx, index, interp_param, ctx->prim_mask,
4708 inputs);
4709
4710 if (!interp_param)
4711 ctx->shader_info->fs.flat_shaded_mask |= 1u << index;
4712 ++index;
4713 } else if (i == VARYING_SLOT_POS) {
4714 for(int i = 0; i < 3; ++i)
4715 inputs[i] = ctx->frag_pos[i];
4716
4717 inputs[3] = ac_build_fdiv(&ctx->ac, ctx->f32one, ctx->frag_pos[3]);
4718 }
4719 }
4720 ctx->shader_info->fs.num_interp = index;
4721 if (ctx->input_mask & (1 << VARYING_SLOT_PNTC))
4722 ctx->shader_info->fs.has_pcoord = true;
4723 if (ctx->input_mask & (1 << VARYING_SLOT_PRIMITIVE_ID))
4724 ctx->shader_info->fs.prim_id_input = true;
4725 if (ctx->input_mask & (1 << VARYING_SLOT_LAYER))
4726 ctx->shader_info->fs.layer_input = true;
4727 ctx->shader_info->fs.input_mask = ctx->input_mask >> VARYING_SLOT_VAR0;
4728 }
4729
4730 static LLVMValueRef
4731 ac_build_alloca(struct nir_to_llvm_context *ctx,
4732 LLVMTypeRef type,
4733 const char *name)
4734 {
4735 LLVMBuilderRef builder = ctx->builder;
4736 LLVMBasicBlockRef current_block = LLVMGetInsertBlock(builder);
4737 LLVMValueRef function = LLVMGetBasicBlockParent(current_block);
4738 LLVMBasicBlockRef first_block = LLVMGetEntryBasicBlock(function);
4739 LLVMValueRef first_instr = LLVMGetFirstInstruction(first_block);
4740 LLVMBuilderRef first_builder = LLVMCreateBuilderInContext(ctx->context);
4741 LLVMValueRef res;
4742
4743 if (first_instr) {
4744 LLVMPositionBuilderBefore(first_builder, first_instr);
4745 } else {
4746 LLVMPositionBuilderAtEnd(first_builder, first_block);
4747 }
4748
4749 res = LLVMBuildAlloca(first_builder, type, name);
4750 LLVMBuildStore(builder, LLVMConstNull(type), res);
4751
4752 LLVMDisposeBuilder(first_builder);
4753
4754 return res;
4755 }
4756
4757 static LLVMValueRef si_build_alloca_undef(struct nir_to_llvm_context *ctx,
4758 LLVMTypeRef type,
4759 const char *name)
4760 {
4761 LLVMValueRef ptr = ac_build_alloca(ctx, type, name);
4762 LLVMBuildStore(ctx->builder, LLVMGetUndef(type), ptr);
4763 return ptr;
4764 }
4765
4766 static void
4767 handle_shader_output_decl(struct nir_to_llvm_context *ctx,
4768 struct nir_variable *variable)
4769 {
4770 int idx = variable->data.location + variable->data.index;
4771 unsigned attrib_count = glsl_count_attribute_slots(variable->type, false);
4772 uint64_t mask_attribs;
4773 variable->data.driver_location = idx * 4;
4774
4775 /* tess ctrl has it's own load/store paths for outputs */
4776 if (ctx->stage == MESA_SHADER_TESS_CTRL)
4777 return;
4778
4779 mask_attribs = ((1ull << attrib_count) - 1) << idx;
4780 if (ctx->stage == MESA_SHADER_VERTEX ||
4781 ctx->stage == MESA_SHADER_TESS_EVAL ||
4782 ctx->stage == MESA_SHADER_GEOMETRY) {
4783 if (idx == VARYING_SLOT_CLIP_DIST0) {
4784 int length = ctx->num_output_clips + ctx->num_output_culls;
4785 if (ctx->stage == MESA_SHADER_VERTEX) {
4786 ctx->shader_info->vs.outinfo.clip_dist_mask = (1 << ctx->num_output_clips) - 1;
4787 ctx->shader_info->vs.outinfo.cull_dist_mask = (1 << ctx->num_output_culls) - 1;
4788 }
4789 if (ctx->stage == MESA_SHADER_TESS_EVAL) {
4790 ctx->shader_info->tes.outinfo.clip_dist_mask = (1 << ctx->num_output_clips) - 1;
4791 ctx->shader_info->tes.outinfo.cull_dist_mask = (1 << ctx->num_output_culls) - 1;
4792 }
4793
4794 if (length > 4)
4795 attrib_count = 2;
4796 else
4797 attrib_count = 1;
4798 mask_attribs = 1ull << idx;
4799 }
4800 }
4801
4802 for (unsigned i = 0; i < attrib_count; ++i) {
4803 for (unsigned chan = 0; chan < 4; chan++) {
4804 ctx->outputs[radeon_llvm_reg_index_soa(idx + i, chan)] =
4805 si_build_alloca_undef(ctx, ctx->f32, "");
4806 }
4807 }
4808 ctx->output_mask |= mask_attribs;
4809 }
4810
4811 static void
4812 setup_locals(struct nir_to_llvm_context *ctx,
4813 struct nir_function *func)
4814 {
4815 int i, j;
4816 ctx->num_locals = 0;
4817 nir_foreach_variable(variable, &func->impl->locals) {
4818 unsigned attrib_count = glsl_count_attribute_slots(variable->type, false);
4819 variable->data.driver_location = ctx->num_locals * 4;
4820 ctx->num_locals += attrib_count;
4821 }
4822 ctx->locals = malloc(4 * ctx->num_locals * sizeof(LLVMValueRef));
4823 if (!ctx->locals)
4824 return;
4825
4826 for (i = 0; i < ctx->num_locals; i++) {
4827 for (j = 0; j < 4; j++) {
4828 ctx->locals[i * 4 + j] =
4829 si_build_alloca_undef(ctx, ctx->f32, "temp");
4830 }
4831 }
4832 }
4833
4834 static LLVMValueRef
4835 emit_float_saturate(struct nir_to_llvm_context *ctx, LLVMValueRef v, float lo, float hi)
4836 {
4837 v = to_float(ctx, v);
4838 v = emit_intrin_2f_param(ctx, "llvm.maxnum.f32", ctx->f32, v, LLVMConstReal(ctx->f32, lo));
4839 return emit_intrin_2f_param(ctx, "llvm.minnum.f32", ctx->f32, v, LLVMConstReal(ctx->f32, hi));
4840 }
4841
4842
4843 static LLVMValueRef emit_pack_int16(struct nir_to_llvm_context *ctx,
4844 LLVMValueRef src0, LLVMValueRef src1)
4845 {
4846 LLVMValueRef const16 = LLVMConstInt(ctx->i32, 16, false);
4847 LLVMValueRef comp[2];
4848
4849 comp[0] = LLVMBuildAnd(ctx->builder, src0, LLVMConstInt(ctx-> i32, 65535, 0), "");
4850 comp[1] = LLVMBuildAnd(ctx->builder, src1, LLVMConstInt(ctx-> i32, 65535, 0), "");
4851 comp[1] = LLVMBuildShl(ctx->builder, comp[1], const16, "");
4852 return LLVMBuildOr(ctx->builder, comp[0], comp[1], "");
4853 }
4854
4855 /* Initialize arguments for the shader export intrinsic */
4856 static void
4857 si_llvm_init_export_args(struct nir_to_llvm_context *ctx,
4858 LLVMValueRef *values,
4859 unsigned target,
4860 struct ac_export_args *args)
4861 {
4862 /* Default is 0xf. Adjusted below depending on the format. */
4863 args->enabled_channels = 0xf;
4864
4865 /* Specify whether the EXEC mask represents the valid mask */
4866 args->valid_mask = 0;
4867
4868 /* Specify whether this is the last export */
4869 args->done = 0;
4870
4871 /* Specify the target we are exporting */
4872 args->target = target;
4873
4874 args->compr = false;
4875 args->out[0] = LLVMGetUndef(ctx->f32);
4876 args->out[1] = LLVMGetUndef(ctx->f32);
4877 args->out[2] = LLVMGetUndef(ctx->f32);
4878 args->out[3] = LLVMGetUndef(ctx->f32);
4879
4880 if (!values)
4881 return;
4882
4883 if (ctx->stage == MESA_SHADER_FRAGMENT && target >= V_008DFC_SQ_EXP_MRT) {
4884 LLVMValueRef val[4];
4885 unsigned index = target - V_008DFC_SQ_EXP_MRT;
4886 unsigned col_format = (ctx->options->key.fs.col_format >> (4 * index)) & 0xf;
4887 bool is_int8 = (ctx->options->key.fs.is_int8 >> index) & 1;
4888
4889 switch(col_format) {
4890 case V_028714_SPI_SHADER_ZERO:
4891 args->enabled_channels = 0; /* writemask */
4892 args->target = V_008DFC_SQ_EXP_NULL;
4893 break;
4894
4895 case V_028714_SPI_SHADER_32_R:
4896 args->enabled_channels = 1;
4897 args->out[0] = values[0];
4898 break;
4899
4900 case V_028714_SPI_SHADER_32_GR:
4901 args->enabled_channels = 0x3;
4902 args->out[0] = values[0];
4903 args->out[1] = values[1];
4904 break;
4905
4906 case V_028714_SPI_SHADER_32_AR:
4907 args->enabled_channels = 0x9;
4908 args->out[0] = values[0];
4909 args->out[3] = values[3];
4910 break;
4911
4912 case V_028714_SPI_SHADER_FP16_ABGR:
4913 args->compr = 1;
4914
4915 for (unsigned chan = 0; chan < 2; chan++) {
4916 LLVMValueRef pack_args[2] = {
4917 values[2 * chan],
4918 values[2 * chan + 1]
4919 };
4920 LLVMValueRef packed;
4921
4922 packed = ac_build_cvt_pkrtz_f16(&ctx->ac, pack_args);
4923 args->out[chan] = packed;
4924 }
4925 break;
4926
4927 case V_028714_SPI_SHADER_UNORM16_ABGR:
4928 for (unsigned chan = 0; chan < 4; chan++) {
4929 val[chan] = ac_build_clamp(&ctx->ac, values[chan]);
4930 val[chan] = LLVMBuildFMul(ctx->builder, val[chan],
4931 LLVMConstReal(ctx->f32, 65535), "");
4932 val[chan] = LLVMBuildFAdd(ctx->builder, val[chan],
4933 LLVMConstReal(ctx->f32, 0.5), "");
4934 val[chan] = LLVMBuildFPToUI(ctx->builder, val[chan],
4935 ctx->i32, "");
4936 }
4937
4938 args->compr = 1;
4939 args->out[0] = emit_pack_int16(ctx, val[0], val[1]);
4940 args->out[1] = emit_pack_int16(ctx, val[2], val[3]);
4941 break;
4942
4943 case V_028714_SPI_SHADER_SNORM16_ABGR:
4944 for (unsigned chan = 0; chan < 4; chan++) {
4945 val[chan] = emit_float_saturate(ctx, values[chan], -1, 1);
4946 val[chan] = LLVMBuildFMul(ctx->builder, val[chan],
4947 LLVMConstReal(ctx->f32, 32767), "");
4948
4949 /* If positive, add 0.5, else add -0.5. */
4950 val[chan] = LLVMBuildFAdd(ctx->builder, val[chan],
4951 LLVMBuildSelect(ctx->builder,
4952 LLVMBuildFCmp(ctx->builder, LLVMRealOGE,
4953 val[chan], ctx->f32zero, ""),
4954 LLVMConstReal(ctx->f32, 0.5),
4955 LLVMConstReal(ctx->f32, -0.5), ""), "");
4956 val[chan] = LLVMBuildFPToSI(ctx->builder, val[chan], ctx->i32, "");
4957 }
4958
4959 args->compr = 1;
4960 args->out[0] = emit_pack_int16(ctx, val[0], val[1]);
4961 args->out[1] = emit_pack_int16(ctx, val[2], val[3]);
4962 break;
4963
4964 case V_028714_SPI_SHADER_UINT16_ABGR: {
4965 LLVMValueRef max = LLVMConstInt(ctx->i32, is_int8 ? 255 : 65535, 0);
4966
4967 for (unsigned chan = 0; chan < 4; chan++) {
4968 val[chan] = to_integer(ctx, values[chan]);
4969 val[chan] = emit_minmax_int(ctx, LLVMIntULT, val[chan], max);
4970 }
4971
4972 args->compr = 1;
4973 args->out[0] = emit_pack_int16(ctx, val[0], val[1]);
4974 args->out[1] = emit_pack_int16(ctx, val[2], val[3]);
4975 break;
4976 }
4977
4978 case V_028714_SPI_SHADER_SINT16_ABGR: {
4979 LLVMValueRef max = LLVMConstInt(ctx->i32, is_int8 ? 127 : 32767, 0);
4980 LLVMValueRef min = LLVMConstInt(ctx->i32, is_int8 ? -128 : -32768, 0);
4981
4982 /* Clamp. */
4983 for (unsigned chan = 0; chan < 4; chan++) {
4984 val[chan] = to_integer(ctx, values[chan]);
4985 val[chan] = emit_minmax_int(ctx, LLVMIntSLT, val[chan], max);
4986 val[chan] = emit_minmax_int(ctx, LLVMIntSGT, val[chan], min);
4987 }
4988
4989 args->compr = 1;
4990 args->out[0] = emit_pack_int16(ctx, val[0], val[1]);
4991 args->out[1] = emit_pack_int16(ctx, val[2], val[3]);
4992 break;
4993 }
4994
4995 default:
4996 case V_028714_SPI_SHADER_32_ABGR:
4997 memcpy(&args->out[0], values, sizeof(values[0]) * 4);
4998 break;
4999 }
5000 } else
5001 memcpy(&args->out[0], values, sizeof(values[0]) * 4);
5002
5003 for (unsigned i = 0; i < 4; ++i)
5004 args->out[i] = to_float(ctx, args->out[i]);
5005 }
5006
5007 static void
5008 handle_vs_outputs_post(struct nir_to_llvm_context *ctx,
5009 struct ac_vs_output_info *outinfo)
5010 {
5011 uint32_t param_count = 0;
5012 unsigned target;
5013 unsigned pos_idx, num_pos_exports = 0;
5014 struct ac_export_args args, pos_args[4] = {};
5015 LLVMValueRef psize_value = NULL, layer_value = NULL, viewport_index_value = NULL;
5016 int i;
5017
5018 outinfo->prim_id_output = 0xffffffff;
5019 outinfo->layer_output = 0xffffffff;
5020 if (ctx->output_mask & (1ull << VARYING_SLOT_CLIP_DIST0)) {
5021 LLVMValueRef slots[8];
5022 unsigned j;
5023
5024 if (outinfo->cull_dist_mask)
5025 outinfo->cull_dist_mask <<= ctx->num_output_clips;
5026
5027 i = VARYING_SLOT_CLIP_DIST0;
5028 for (j = 0; j < ctx->num_output_clips + ctx->num_output_culls; j++)
5029 slots[j] = to_float(ctx, LLVMBuildLoad(ctx->builder,
5030 ctx->outputs[radeon_llvm_reg_index_soa(i, j)], ""));
5031
5032 for (i = ctx->num_output_clips + ctx->num_output_culls; i < 8; i++)
5033 slots[i] = LLVMGetUndef(ctx->f32);
5034
5035 if (ctx->num_output_clips + ctx->num_output_culls > 4) {
5036 target = V_008DFC_SQ_EXP_POS + 3;
5037 si_llvm_init_export_args(ctx, &slots[4], target, &args);
5038 memcpy(&pos_args[target - V_008DFC_SQ_EXP_POS],
5039 &args, sizeof(args));
5040 }
5041
5042 target = V_008DFC_SQ_EXP_POS + 2;
5043 si_llvm_init_export_args(ctx, &slots[0], target, &args);
5044 memcpy(&pos_args[target - V_008DFC_SQ_EXP_POS],
5045 &args, sizeof(args));
5046
5047 }
5048
5049 for (unsigned i = 0; i < RADEON_LLVM_MAX_OUTPUTS; ++i) {
5050 LLVMValueRef values[4];
5051 if (!(ctx->output_mask & (1ull << i)))
5052 continue;
5053
5054 for (unsigned j = 0; j < 4; j++)
5055 values[j] = to_float(ctx, LLVMBuildLoad(ctx->builder,
5056 ctx->outputs[radeon_llvm_reg_index_soa(i, j)], ""));
5057
5058 if (i == VARYING_SLOT_POS) {
5059 target = V_008DFC_SQ_EXP_POS;
5060 } else if (i == VARYING_SLOT_CLIP_DIST0) {
5061 continue;
5062 } else if (i == VARYING_SLOT_PSIZ) {
5063 outinfo->writes_pointsize = true;
5064 psize_value = values[0];
5065 continue;
5066 } else if (i == VARYING_SLOT_LAYER) {
5067 outinfo->writes_layer = true;
5068 layer_value = values[0];
5069 outinfo->layer_output = param_count;
5070 target = V_008DFC_SQ_EXP_PARAM + param_count;
5071 param_count++;
5072 } else if (i == VARYING_SLOT_VIEWPORT) {
5073 outinfo->writes_viewport_index = true;
5074 viewport_index_value = values[0];
5075 continue;
5076 } else if (i == VARYING_SLOT_PRIMITIVE_ID) {
5077 outinfo->prim_id_output = param_count;
5078 target = V_008DFC_SQ_EXP_PARAM + param_count;
5079 param_count++;
5080 } else if (i >= VARYING_SLOT_VAR0) {
5081 outinfo->export_mask |= 1u << (i - VARYING_SLOT_VAR0);
5082 target = V_008DFC_SQ_EXP_PARAM + param_count;
5083 param_count++;
5084 }
5085
5086 si_llvm_init_export_args(ctx, values, target, &args);
5087
5088 if (target >= V_008DFC_SQ_EXP_POS &&
5089 target <= (V_008DFC_SQ_EXP_POS + 3)) {
5090 memcpy(&pos_args[target - V_008DFC_SQ_EXP_POS],
5091 &args, sizeof(args));
5092 } else {
5093 ac_build_export(&ctx->ac, &args);
5094 }
5095 }
5096
5097 /* We need to add the position output manually if it's missing. */
5098 if (!pos_args[0].out[0]) {
5099 pos_args[0].enabled_channels = 0xf;
5100 pos_args[0].valid_mask = 0;
5101 pos_args[0].done = 0;
5102 pos_args[0].target = V_008DFC_SQ_EXP_POS;
5103 pos_args[0].compr = 0;
5104 pos_args[0].out[0] = ctx->f32zero; /* X */
5105 pos_args[0].out[1] = ctx->f32zero; /* Y */
5106 pos_args[0].out[2] = ctx->f32zero; /* Z */
5107 pos_args[0].out[3] = ctx->f32one; /* W */
5108 }
5109
5110 uint32_t mask = ((outinfo->writes_pointsize == true ? 1 : 0) |
5111 (outinfo->writes_layer == true ? 4 : 0) |
5112 (outinfo->writes_viewport_index == true ? 8 : 0));
5113 if (mask) {
5114 pos_args[1].enabled_channels = mask;
5115 pos_args[1].valid_mask = 0;
5116 pos_args[1].done = 0;
5117 pos_args[1].target = V_008DFC_SQ_EXP_POS + 1;
5118 pos_args[1].compr = 0;
5119 pos_args[1].out[0] = ctx->f32zero; /* X */
5120 pos_args[1].out[1] = ctx->f32zero; /* Y */
5121 pos_args[1].out[2] = ctx->f32zero; /* Z */
5122 pos_args[1].out[3] = ctx->f32zero; /* W */
5123
5124 if (outinfo->writes_pointsize == true)
5125 pos_args[1].out[0] = psize_value;
5126 if (outinfo->writes_layer == true)
5127 pos_args[1].out[2] = layer_value;
5128 if (outinfo->writes_viewport_index == true)
5129 pos_args[1].out[3] = viewport_index_value;
5130 }
5131 for (i = 0; i < 4; i++) {
5132 if (pos_args[i].out[0])
5133 num_pos_exports++;
5134 }
5135
5136 pos_idx = 0;
5137 for (i = 0; i < 4; i++) {
5138 if (!pos_args[i].out[0])
5139 continue;
5140
5141 /* Specify the target we are exporting */
5142 pos_args[i].target = V_008DFC_SQ_EXP_POS + pos_idx++;
5143 if (pos_idx == num_pos_exports)
5144 pos_args[i].done = 1;
5145 ac_build_export(&ctx->ac, &pos_args[i]);
5146 }
5147
5148 outinfo->pos_exports = num_pos_exports;
5149 outinfo->param_exports = param_count;
5150 }
5151
5152 static void
5153 handle_es_outputs_post(struct nir_to_llvm_context *ctx,
5154 struct ac_es_output_info *outinfo)
5155 {
5156 int j;
5157 uint64_t max_output_written = 0;
5158 for (unsigned i = 0; i < RADEON_LLVM_MAX_OUTPUTS; ++i) {
5159 LLVMValueRef *out_ptr = &ctx->outputs[i * 4];
5160 int param_index;
5161 int length = 4;
5162
5163 if (!(ctx->output_mask & (1ull << i)))
5164 continue;
5165
5166 if (i == VARYING_SLOT_CLIP_DIST0)
5167 length = ctx->num_output_clips + ctx->num_output_culls;
5168
5169 param_index = shader_io_get_unique_index(i);
5170
5171 max_output_written = MAX2(param_index + (length > 4), max_output_written);
5172
5173 for (j = 0; j < length; j++) {
5174 LLVMValueRef out_val = LLVMBuildLoad(ctx->builder, out_ptr[j], "");
5175 out_val = LLVMBuildBitCast(ctx->builder, out_val, ctx->i32, "");
5176
5177 ac_build_buffer_store_dword(&ctx->ac,
5178 ctx->esgs_ring,
5179 out_val, 1,
5180 NULL, ctx->es2gs_offset,
5181 (4 * param_index + j) * 4,
5182 1, 1, true, true);
5183 }
5184 }
5185 outinfo->esgs_itemsize = (max_output_written + 1) * 16;
5186 }
5187
5188 static void
5189 handle_ls_outputs_post(struct nir_to_llvm_context *ctx)
5190 {
5191 LLVMValueRef vertex_id = ctx->rel_auto_id;
5192 LLVMValueRef vertex_dw_stride = unpack_param(ctx, ctx->ls_out_layout, 13, 8);
5193 LLVMValueRef base_dw_addr = LLVMBuildMul(ctx->builder, vertex_id,
5194 vertex_dw_stride, "");
5195
5196 for (unsigned i = 0; i < RADEON_LLVM_MAX_OUTPUTS; ++i) {
5197 LLVMValueRef *out_ptr = &ctx->outputs[i * 4];
5198 int length = 4;
5199
5200 if (!(ctx->output_mask & (1ull << i)))
5201 continue;
5202
5203 if (i == VARYING_SLOT_CLIP_DIST0)
5204 length = ctx->num_output_clips + ctx->num_output_culls;
5205 int param = shader_io_get_unique_index(i);
5206 mark_tess_output(ctx, false, param);
5207 if (length > 4)
5208 mark_tess_output(ctx, false, param + 1);
5209 LLVMValueRef dw_addr = LLVMBuildAdd(ctx->builder, base_dw_addr,
5210 LLVMConstInt(ctx->i32, param * 4, false),
5211 "");
5212 for (unsigned j = 0; j < length; j++) {
5213 lds_store(ctx, dw_addr,
5214 LLVMBuildLoad(ctx->builder, out_ptr[j], ""));
5215 dw_addr = LLVMBuildAdd(ctx->builder, dw_addr, ctx->i32one, "");
5216 }
5217 }
5218 }
5219
5220 struct ac_build_if_state
5221 {
5222 struct nir_to_llvm_context *ctx;
5223 LLVMValueRef condition;
5224 LLVMBasicBlockRef entry_block;
5225 LLVMBasicBlockRef true_block;
5226 LLVMBasicBlockRef false_block;
5227 LLVMBasicBlockRef merge_block;
5228 };
5229
5230 static LLVMBasicBlockRef
5231 ac_build_insert_new_block(struct nir_to_llvm_context *ctx, const char *name)
5232 {
5233 LLVMBasicBlockRef current_block;
5234 LLVMBasicBlockRef next_block;
5235 LLVMBasicBlockRef new_block;
5236
5237 /* get current basic block */
5238 current_block = LLVMGetInsertBlock(ctx->builder);
5239
5240 /* chqeck if there's another block after this one */
5241 next_block = LLVMGetNextBasicBlock(current_block);
5242 if (next_block) {
5243 /* insert the new block before the next block */
5244 new_block = LLVMInsertBasicBlockInContext(ctx->context, next_block, name);
5245 }
5246 else {
5247 /* append new block after current block */
5248 LLVMValueRef function = LLVMGetBasicBlockParent(current_block);
5249 new_block = LLVMAppendBasicBlockInContext(ctx->context, function, name);
5250 }
5251 return new_block;
5252 }
5253
5254 static void
5255 ac_nir_build_if(struct ac_build_if_state *ifthen,
5256 struct nir_to_llvm_context *ctx,
5257 LLVMValueRef condition)
5258 {
5259 LLVMBasicBlockRef block = LLVMGetInsertBlock(ctx->builder);
5260
5261 memset(ifthen, 0, sizeof *ifthen);
5262 ifthen->ctx = ctx;
5263 ifthen->condition = condition;
5264 ifthen->entry_block = block;
5265
5266 /* create endif/merge basic block for the phi functions */
5267 ifthen->merge_block = ac_build_insert_new_block(ctx, "endif-block");
5268
5269 /* create/insert true_block before merge_block */
5270 ifthen->true_block =
5271 LLVMInsertBasicBlockInContext(ctx->context,
5272 ifthen->merge_block,
5273 "if-true-block");
5274
5275 /* successive code goes into the true block */
5276 LLVMPositionBuilderAtEnd(ctx->builder, ifthen->true_block);
5277 }
5278
5279 /**
5280 * End a conditional.
5281 */
5282 static void
5283 ac_nir_build_endif(struct ac_build_if_state *ifthen)
5284 {
5285 LLVMBuilderRef builder = ifthen->ctx->builder;
5286
5287 /* Insert branch to the merge block from current block */
5288 LLVMBuildBr(builder, ifthen->merge_block);
5289
5290 /*
5291 * Now patch in the various branch instructions.
5292 */
5293
5294 /* Insert the conditional branch instruction at the end of entry_block */
5295 LLVMPositionBuilderAtEnd(builder, ifthen->entry_block);
5296 if (ifthen->false_block) {
5297 /* we have an else clause */
5298 LLVMBuildCondBr(builder, ifthen->condition,
5299 ifthen->true_block, ifthen->false_block);
5300 }
5301 else {
5302 /* no else clause */
5303 LLVMBuildCondBr(builder, ifthen->condition,
5304 ifthen->true_block, ifthen->merge_block);
5305 }
5306
5307 /* Resume building code at end of the ifthen->merge_block */
5308 LLVMPositionBuilderAtEnd(builder, ifthen->merge_block);
5309 }
5310
5311 static void
5312 write_tess_factors(struct nir_to_llvm_context *ctx)
5313 {
5314 unsigned stride, outer_comps, inner_comps;
5315 struct ac_build_if_state if_ctx, inner_if_ctx;
5316 LLVMValueRef invocation_id = unpack_param(ctx, ctx->tcs_rel_ids, 8, 5);
5317 LLVMValueRef rel_patch_id = unpack_param(ctx, ctx->tcs_rel_ids, 0, 8);
5318 unsigned tess_inner_index, tess_outer_index;
5319 LLVMValueRef lds_base, lds_inner, lds_outer, byteoffset, buffer;
5320 LLVMValueRef out[6], vec0, vec1, tf_base, inner[4], outer[4];
5321 int i;
5322 emit_barrier(ctx);
5323
5324 switch (ctx->options->key.tcs.primitive_mode) {
5325 case GL_ISOLINES:
5326 stride = 2;
5327 outer_comps = 2;
5328 inner_comps = 0;
5329 break;
5330 case GL_TRIANGLES:
5331 stride = 4;
5332 outer_comps = 3;
5333 inner_comps = 1;
5334 break;
5335 case GL_QUADS:
5336 stride = 6;
5337 outer_comps = 4;
5338 inner_comps = 2;
5339 break;
5340 default:
5341 return;
5342 }
5343
5344 ac_nir_build_if(&if_ctx, ctx,
5345 LLVMBuildICmp(ctx->builder, LLVMIntEQ,
5346 invocation_id, ctx->i32zero, ""));
5347
5348 tess_inner_index = shader_io_get_unique_index(VARYING_SLOT_TESS_LEVEL_INNER);
5349 tess_outer_index = shader_io_get_unique_index(VARYING_SLOT_TESS_LEVEL_OUTER);
5350
5351 mark_tess_output(ctx, true, tess_inner_index);
5352 mark_tess_output(ctx, true, tess_outer_index);
5353 lds_base = get_tcs_out_current_patch_data_offset(ctx);
5354 lds_inner = LLVMBuildAdd(ctx->builder, lds_base,
5355 LLVMConstInt(ctx->i32, tess_inner_index * 4, false), "");
5356 lds_outer = LLVMBuildAdd(ctx->builder, lds_base,
5357 LLVMConstInt(ctx->i32, tess_outer_index * 4, false), "");
5358
5359 for (i = 0; i < 4; i++) {
5360 inner[i] = LLVMGetUndef(ctx->i32);
5361 outer[i] = LLVMGetUndef(ctx->i32);
5362 }
5363
5364 // LINES reverseal
5365 if (ctx->options->key.tcs.primitive_mode == GL_ISOLINES) {
5366 outer[0] = out[1] = lds_load(ctx, lds_outer);
5367 lds_outer = LLVMBuildAdd(ctx->builder, lds_outer,
5368 LLVMConstInt(ctx->i32, 1, false), "");
5369 outer[1] = out[0] = lds_load(ctx, lds_outer);
5370 } else {
5371 for (i = 0; i < outer_comps; i++) {
5372 outer[i] = out[i] =
5373 lds_load(ctx, lds_outer);
5374 lds_outer = LLVMBuildAdd(ctx->builder, lds_outer,
5375 LLVMConstInt(ctx->i32, 1, false), "");
5376 }
5377 for (i = 0; i < inner_comps; i++) {
5378 inner[i] = out[outer_comps+i] =
5379 lds_load(ctx, lds_inner);
5380 lds_inner = LLVMBuildAdd(ctx->builder, lds_inner,
5381 LLVMConstInt(ctx->i32, 1, false), "");
5382 }
5383 }
5384
5385 /* Convert the outputs to vectors for stores. */
5386 vec0 = ac_build_gather_values(&ctx->ac, out, MIN2(stride, 4));
5387 vec1 = NULL;
5388
5389 if (stride > 4)
5390 vec1 = ac_build_gather_values(&ctx->ac, out + 4, stride - 4);
5391
5392
5393 buffer = ctx->hs_ring_tess_factor;
5394 tf_base = ctx->tess_factor_offset;
5395 byteoffset = LLVMBuildMul(ctx->builder, rel_patch_id,
5396 LLVMConstInt(ctx->i32, 4 * stride, false), "");
5397
5398 ac_nir_build_if(&inner_if_ctx, ctx,
5399 LLVMBuildICmp(ctx->builder, LLVMIntEQ,
5400 rel_patch_id, ctx->i32zero, ""));
5401
5402 /* Store the dynamic HS control word. */
5403 ac_build_buffer_store_dword(&ctx->ac, buffer,
5404 LLVMConstInt(ctx->i32, 0x80000000, false),
5405 1, ctx->i32zero, tf_base,
5406 0, 1, 0, true, false);
5407 ac_nir_build_endif(&inner_if_ctx);
5408
5409 /* Store the tessellation factors. */
5410 ac_build_buffer_store_dword(&ctx->ac, buffer, vec0,
5411 MIN2(stride, 4), byteoffset, tf_base,
5412 4, 1, 0, true, false);
5413 if (vec1)
5414 ac_build_buffer_store_dword(&ctx->ac, buffer, vec1,
5415 stride - 4, byteoffset, tf_base,
5416 20, 1, 0, true, false);
5417
5418 //TODO store to offchip for TES to read - only if TES reads them
5419 if (1) {
5420 LLVMValueRef inner_vec, outer_vec, tf_outer_offset;
5421 LLVMValueRef tf_inner_offset;
5422 unsigned param_outer, param_inner;
5423
5424 param_outer = shader_io_get_unique_index(VARYING_SLOT_TESS_LEVEL_OUTER);
5425 tf_outer_offset = get_tcs_tes_buffer_address(ctx, NULL,
5426 LLVMConstInt(ctx->i32, param_outer, 0));
5427
5428 outer_vec = ac_build_gather_values(&ctx->ac, outer,
5429 util_next_power_of_two(outer_comps));
5430
5431 ac_build_buffer_store_dword(&ctx->ac, ctx->hs_ring_tess_offchip, outer_vec,
5432 outer_comps, tf_outer_offset,
5433 ctx->oc_lds, 0, 1, 0, true, false);
5434 if (inner_comps) {
5435 param_inner = shader_io_get_unique_index(VARYING_SLOT_TESS_LEVEL_INNER);
5436 tf_inner_offset = get_tcs_tes_buffer_address(ctx, NULL,
5437 LLVMConstInt(ctx->i32, param_inner, 0));
5438
5439 inner_vec = inner_comps == 1 ? inner[0] :
5440 ac_build_gather_values(&ctx->ac, inner, inner_comps);
5441 ac_build_buffer_store_dword(&ctx->ac, ctx->hs_ring_tess_offchip, inner_vec,
5442 inner_comps, tf_inner_offset,
5443 ctx->oc_lds, 0, 1, 0, true, false);
5444 }
5445 }
5446 ac_nir_build_endif(&if_ctx);
5447 }
5448
5449 static void
5450 handle_tcs_outputs_post(struct nir_to_llvm_context *ctx)
5451 {
5452 write_tess_factors(ctx);
5453 }
5454
5455 static void
5456 si_export_mrt_color(struct nir_to_llvm_context *ctx,
5457 LLVMValueRef *color, unsigned param, bool is_last)
5458 {
5459
5460 struct ac_export_args args;
5461
5462 /* Export */
5463 si_llvm_init_export_args(ctx, color, param,
5464 &args);
5465
5466 if (is_last) {
5467 args.valid_mask = 1; /* whether the EXEC mask is valid */
5468 args.done = 1; /* DONE bit */
5469 } else if (!args.enabled_channels)
5470 return; /* unnecessary NULL export */
5471
5472 ac_build_export(&ctx->ac, &args);
5473 }
5474
5475 static void
5476 si_export_mrt_z(struct nir_to_llvm_context *ctx,
5477 LLVMValueRef depth, LLVMValueRef stencil,
5478 LLVMValueRef samplemask)
5479 {
5480 struct ac_export_args args;
5481
5482 args.enabled_channels = 0;
5483 args.valid_mask = 1;
5484 args.done = 1;
5485 args.target = V_008DFC_SQ_EXP_MRTZ;
5486 args.compr = false;
5487
5488 args.out[0] = LLVMGetUndef(ctx->f32); /* R, depth */
5489 args.out[1] = LLVMGetUndef(ctx->f32); /* G, stencil test val[0:7], stencil op val[8:15] */
5490 args.out[2] = LLVMGetUndef(ctx->f32); /* B, sample mask */
5491 args.out[3] = LLVMGetUndef(ctx->f32); /* A, alpha to mask */
5492
5493 if (depth) {
5494 args.out[0] = depth;
5495 args.enabled_channels |= 0x1;
5496 }
5497
5498 if (stencil) {
5499 args.out[1] = stencil;
5500 args.enabled_channels |= 0x2;
5501 }
5502
5503 if (samplemask) {
5504 args.out[2] = samplemask;
5505 args.enabled_channels |= 0x4;
5506 }
5507
5508 /* SI (except OLAND) has a bug that it only looks
5509 * at the X writemask component. */
5510 if (ctx->options->chip_class == SI &&
5511 ctx->options->family != CHIP_OLAND)
5512 args.enabled_channels |= 0x1;
5513
5514 ac_build_export(&ctx->ac, &args);
5515 }
5516
5517 static void
5518 handle_fs_outputs_post(struct nir_to_llvm_context *ctx)
5519 {
5520 unsigned index = 0;
5521 LLVMValueRef depth = NULL, stencil = NULL, samplemask = NULL;
5522
5523 for (unsigned i = 0; i < RADEON_LLVM_MAX_OUTPUTS; ++i) {
5524 LLVMValueRef values[4];
5525
5526 if (!(ctx->output_mask & (1ull << i)))
5527 continue;
5528
5529 if (i == FRAG_RESULT_DEPTH) {
5530 ctx->shader_info->fs.writes_z = true;
5531 depth = to_float(ctx, LLVMBuildLoad(ctx->builder,
5532 ctx->outputs[radeon_llvm_reg_index_soa(i, 0)], ""));
5533 } else if (i == FRAG_RESULT_STENCIL) {
5534 ctx->shader_info->fs.writes_stencil = true;
5535 stencil = to_float(ctx, LLVMBuildLoad(ctx->builder,
5536 ctx->outputs[radeon_llvm_reg_index_soa(i, 0)], ""));
5537 } else if (i == FRAG_RESULT_SAMPLE_MASK) {
5538 ctx->shader_info->fs.writes_sample_mask = true;
5539 samplemask = to_float(ctx, LLVMBuildLoad(ctx->builder,
5540 ctx->outputs[radeon_llvm_reg_index_soa(i, 0)], ""));
5541 } else {
5542 bool last = false;
5543 for (unsigned j = 0; j < 4; j++)
5544 values[j] = to_float(ctx, LLVMBuildLoad(ctx->builder,
5545 ctx->outputs[radeon_llvm_reg_index_soa(i, j)], ""));
5546
5547 if (!ctx->shader_info->fs.writes_z && !ctx->shader_info->fs.writes_stencil && !ctx->shader_info->fs.writes_sample_mask)
5548 last = ctx->output_mask <= ((1ull << (i + 1)) - 1);
5549
5550 si_export_mrt_color(ctx, values, V_008DFC_SQ_EXP_MRT + index, last);
5551 index++;
5552 }
5553 }
5554
5555 if (depth || stencil || samplemask)
5556 si_export_mrt_z(ctx, depth, stencil, samplemask);
5557 else if (!index)
5558 si_export_mrt_color(ctx, NULL, V_008DFC_SQ_EXP_NULL, true);
5559
5560 ctx->shader_info->fs.output_mask = index ? ((1ull << index) - 1) : 0;
5561 }
5562
5563 static void
5564 emit_gs_epilogue(struct nir_to_llvm_context *ctx)
5565 {
5566 ac_build_sendmsg(&ctx->ac, AC_SENDMSG_GS_OP_NOP | AC_SENDMSG_GS_DONE, ctx->gs_wave_id);
5567 }
5568
5569 static void
5570 handle_shader_outputs_post(struct nir_to_llvm_context *ctx)
5571 {
5572 switch (ctx->stage) {
5573 case MESA_SHADER_VERTEX:
5574 if (ctx->options->key.vs.as_ls)
5575 handle_ls_outputs_post(ctx);
5576 else if (ctx->options->key.vs.as_es)
5577 handle_es_outputs_post(ctx, &ctx->shader_info->vs.es_info);
5578 else
5579 handle_vs_outputs_post(ctx, &ctx->shader_info->vs.outinfo);
5580 break;
5581 case MESA_SHADER_FRAGMENT:
5582 handle_fs_outputs_post(ctx);
5583 break;
5584 case MESA_SHADER_GEOMETRY:
5585 emit_gs_epilogue(ctx);
5586 break;
5587 case MESA_SHADER_TESS_CTRL:
5588 handle_tcs_outputs_post(ctx);
5589 break;
5590 case MESA_SHADER_TESS_EVAL:
5591 if (ctx->options->key.tes.as_es)
5592 handle_es_outputs_post(ctx, &ctx->shader_info->tes.es_info);
5593 else
5594 handle_vs_outputs_post(ctx, &ctx->shader_info->tes.outinfo);
5595 break;
5596 default:
5597 break;
5598 }
5599 }
5600
5601 static void
5602 handle_shared_compute_var(struct nir_to_llvm_context *ctx,
5603 struct nir_variable *variable, uint32_t *offset, int idx)
5604 {
5605 unsigned size = glsl_count_attribute_slots(variable->type, false);
5606 variable->data.driver_location = *offset;
5607 *offset += size;
5608 }
5609
5610 static void ac_llvm_finalize_module(struct nir_to_llvm_context * ctx)
5611 {
5612 LLVMPassManagerRef passmgr;
5613 /* Create the pass manager */
5614 passmgr = LLVMCreateFunctionPassManagerForModule(
5615 ctx->module);
5616
5617 /* This pass should eliminate all the load and store instructions */
5618 LLVMAddPromoteMemoryToRegisterPass(passmgr);
5619
5620 /* Add some optimization passes */
5621 LLVMAddScalarReplAggregatesPass(passmgr);
5622 LLVMAddLICMPass(passmgr);
5623 LLVMAddAggressiveDCEPass(passmgr);
5624 LLVMAddCFGSimplificationPass(passmgr);
5625 LLVMAddInstructionCombiningPass(passmgr);
5626
5627 /* Run the pass */
5628 LLVMInitializeFunctionPassManager(passmgr);
5629 LLVMRunFunctionPassManager(passmgr, ctx->main_function);
5630 LLVMFinalizeFunctionPassManager(passmgr);
5631
5632 LLVMDisposeBuilder(ctx->builder);
5633 LLVMDisposePassManager(passmgr);
5634 }
5635
5636 static void
5637 ac_setup_rings(struct nir_to_llvm_context *ctx)
5638 {
5639 if ((ctx->stage == MESA_SHADER_VERTEX && ctx->options->key.vs.as_es) ||
5640 (ctx->stage == MESA_SHADER_TESS_EVAL && ctx->options->key.tes.as_es)) {
5641 ctx->esgs_ring = ac_build_indexed_load_const(&ctx->ac, ctx->ring_offsets, LLVMConstInt(ctx->i32, RING_ESGS_VS, false));
5642 }
5643
5644 if (ctx->is_gs_copy_shader) {
5645 ctx->gsvs_ring = ac_build_indexed_load_const(&ctx->ac, ctx->ring_offsets, LLVMConstInt(ctx->i32, RING_GSVS_VS, false));
5646 }
5647 if (ctx->stage == MESA_SHADER_GEOMETRY) {
5648 LLVMValueRef tmp;
5649 ctx->esgs_ring = ac_build_indexed_load_const(&ctx->ac, ctx->ring_offsets, LLVMConstInt(ctx->i32, RING_ESGS_GS, false));
5650 ctx->gsvs_ring = ac_build_indexed_load_const(&ctx->ac, ctx->ring_offsets, LLVMConstInt(ctx->i32, RING_GSVS_GS, false));
5651
5652 ctx->gsvs_ring = LLVMBuildBitCast(ctx->builder, ctx->gsvs_ring, ctx->v4i32, "");
5653
5654 ctx->gsvs_ring = LLVMBuildInsertElement(ctx->builder, ctx->gsvs_ring, ctx->gsvs_num_entries, LLVMConstInt(ctx->i32, 2, false), "");
5655 tmp = LLVMBuildExtractElement(ctx->builder, ctx->gsvs_ring, ctx->i32one, "");
5656 tmp = LLVMBuildOr(ctx->builder, tmp, ctx->gsvs_ring_stride, "");
5657 ctx->gsvs_ring = LLVMBuildInsertElement(ctx->builder, ctx->gsvs_ring, tmp, ctx->i32one, "");
5658
5659 ctx->gsvs_ring = LLVMBuildBitCast(ctx->builder, ctx->gsvs_ring, ctx->v16i8, "");
5660 }
5661
5662 if (ctx->stage == MESA_SHADER_TESS_CTRL ||
5663 ctx->stage == MESA_SHADER_TESS_EVAL) {
5664 ctx->hs_ring_tess_offchip = ac_build_indexed_load_const(&ctx->ac, ctx->ring_offsets, LLVMConstInt(ctx->i32, RING_HS_TESS_OFFCHIP, false));
5665 ctx->hs_ring_tess_factor = ac_build_indexed_load_const(&ctx->ac, ctx->ring_offsets, LLVMConstInt(ctx->i32, RING_HS_TESS_FACTOR, false));
5666 }
5667 }
5668
5669 static
5670 LLVMModuleRef ac_translate_nir_to_llvm(LLVMTargetMachineRef tm,
5671 struct nir_shader *nir,
5672 struct ac_shader_variant_info *shader_info,
5673 const struct ac_nir_compiler_options *options)
5674 {
5675 struct nir_to_llvm_context ctx = {0};
5676 struct nir_function *func;
5677 unsigned i;
5678 ctx.options = options;
5679 ctx.shader_info = shader_info;
5680 ctx.context = LLVMContextCreate();
5681 ctx.module = LLVMModuleCreateWithNameInContext("shader", ctx.context);
5682
5683 ac_llvm_context_init(&ctx.ac, ctx.context);
5684 ctx.ac.module = ctx.module;
5685
5686 ctx.has_ds_bpermute = ctx.options->chip_class >= VI;
5687
5688 memset(shader_info, 0, sizeof(*shader_info));
5689
5690 LLVMSetTarget(ctx.module, options->supports_spill ? "amdgcn-mesa-mesa3d" : "amdgcn--");
5691
5692 LLVMTargetDataRef data_layout = LLVMCreateTargetDataLayout(tm);
5693 char *data_layout_str = LLVMCopyStringRepOfTargetData(data_layout);
5694 LLVMSetDataLayout(ctx.module, data_layout_str);
5695 LLVMDisposeTargetData(data_layout);
5696 LLVMDisposeMessage(data_layout_str);
5697
5698 setup_types(&ctx);
5699
5700 ctx.builder = LLVMCreateBuilderInContext(ctx.context);
5701 ctx.ac.builder = ctx.builder;
5702 ctx.stage = nir->stage;
5703
5704 for (i = 0; i < AC_UD_MAX_SETS; i++)
5705 shader_info->user_sgprs_locs.descriptor_sets[i].sgpr_idx = -1;
5706 for (i = 0; i < AC_UD_MAX_UD; i++)
5707 shader_info->user_sgprs_locs.shader_data[i].sgpr_idx = -1;
5708
5709 create_function(&ctx);
5710
5711 if (nir->stage == MESA_SHADER_COMPUTE) {
5712 int num_shared = 0;
5713 nir_foreach_variable(variable, &nir->shared)
5714 num_shared++;
5715 if (num_shared) {
5716 int idx = 0;
5717 uint32_t shared_size = 0;
5718 LLVMValueRef var;
5719 LLVMTypeRef i8p = LLVMPointerType(ctx.i8, LOCAL_ADDR_SPACE);
5720 nir_foreach_variable(variable, &nir->shared) {
5721 handle_shared_compute_var(&ctx, variable, &shared_size, idx);
5722 idx++;
5723 }
5724
5725 shared_size *= 16;
5726 var = LLVMAddGlobalInAddressSpace(ctx.module,
5727 LLVMArrayType(ctx.i8, shared_size),
5728 "compute_lds",
5729 LOCAL_ADDR_SPACE);
5730 LLVMSetAlignment(var, 4);
5731 ctx.shared_memory = LLVMBuildBitCast(ctx.builder, var, i8p, "");
5732 }
5733 } else if (nir->stage == MESA_SHADER_GEOMETRY) {
5734 ctx.gs_next_vertex = ac_build_alloca(&ctx, ctx.i32, "gs_next_vertex");
5735
5736 ctx.gs_max_out_vertices = nir->info->gs.vertices_out;
5737 } else if (nir->stage == MESA_SHADER_TESS_EVAL) {
5738 ctx.tes_primitive_mode = nir->info->tess.primitive_mode;
5739 }
5740
5741 ac_setup_rings(&ctx);
5742
5743 nir_foreach_variable(variable, &nir->inputs)
5744 handle_shader_input_decl(&ctx, variable);
5745
5746 if (nir->stage == MESA_SHADER_FRAGMENT)
5747 handle_fs_inputs_pre(&ctx, nir);
5748
5749 ctx.num_output_clips = nir->info->clip_distance_array_size;
5750 ctx.num_output_culls = nir->info->cull_distance_array_size;
5751
5752 nir_foreach_variable(variable, &nir->outputs)
5753 handle_shader_output_decl(&ctx, variable);
5754
5755 ctx.defs = _mesa_hash_table_create(NULL, _mesa_hash_pointer,
5756 _mesa_key_pointer_equal);
5757 ctx.phis = _mesa_hash_table_create(NULL, _mesa_hash_pointer,
5758 _mesa_key_pointer_equal);
5759
5760 func = (struct nir_function *)exec_list_get_head(&nir->functions);
5761
5762 setup_locals(&ctx, func);
5763
5764 visit_cf_list(&ctx, &func->impl->body);
5765 phi_post_pass(&ctx);
5766
5767 handle_shader_outputs_post(&ctx);
5768 LLVMBuildRetVoid(ctx.builder);
5769
5770 ac_llvm_finalize_module(&ctx);
5771 free(ctx.locals);
5772 ralloc_free(ctx.defs);
5773 ralloc_free(ctx.phis);
5774
5775 if (nir->stage == MESA_SHADER_GEOMETRY) {
5776 unsigned addclip = ctx.num_output_clips + ctx.num_output_culls > 4;
5777 shader_info->gs.gsvs_vertex_size = (util_bitcount64(ctx.output_mask) + addclip) * 16;
5778 shader_info->gs.max_gsvs_emit_size = shader_info->gs.gsvs_vertex_size *
5779 nir->info->gs.vertices_out;
5780 } else if (nir->stage == MESA_SHADER_TESS_CTRL) {
5781 shader_info->tcs.outputs_written = ctx.tess_outputs_written;
5782 shader_info->tcs.patch_outputs_written = ctx.tess_patch_outputs_written;
5783 } else if (nir->stage == MESA_SHADER_VERTEX && ctx.options->key.vs.as_ls) {
5784 shader_info->vs.outputs_written = ctx.tess_outputs_written;
5785 }
5786
5787 return ctx.module;
5788 }
5789
5790 static void ac_diagnostic_handler(LLVMDiagnosticInfoRef di, void *context)
5791 {
5792 unsigned *retval = (unsigned *)context;
5793 LLVMDiagnosticSeverity severity = LLVMGetDiagInfoSeverity(di);
5794 char *description = LLVMGetDiagInfoDescription(di);
5795
5796 if (severity == LLVMDSError) {
5797 *retval = 1;
5798 fprintf(stderr, "LLVM triggered Diagnostic Handler: %s\n",
5799 description);
5800 }
5801
5802 LLVMDisposeMessage(description);
5803 }
5804
5805 static unsigned ac_llvm_compile(LLVMModuleRef M,
5806 struct ac_shader_binary *binary,
5807 LLVMTargetMachineRef tm)
5808 {
5809 unsigned retval = 0;
5810 char *err;
5811 LLVMContextRef llvm_ctx;
5812 LLVMMemoryBufferRef out_buffer;
5813 unsigned buffer_size;
5814 const char *buffer_data;
5815 LLVMBool mem_err;
5816
5817 /* Setup Diagnostic Handler*/
5818 llvm_ctx = LLVMGetModuleContext(M);
5819
5820 LLVMContextSetDiagnosticHandler(llvm_ctx, ac_diagnostic_handler,
5821 &retval);
5822
5823 /* Compile IR*/
5824 mem_err = LLVMTargetMachineEmitToMemoryBuffer(tm, M, LLVMObjectFile,
5825 &err, &out_buffer);
5826
5827 /* Process Errors/Warnings */
5828 if (mem_err) {
5829 fprintf(stderr, "%s: %s", __FUNCTION__, err);
5830 free(err);
5831 retval = 1;
5832 goto out;
5833 }
5834
5835 /* Extract Shader Code*/
5836 buffer_size = LLVMGetBufferSize(out_buffer);
5837 buffer_data = LLVMGetBufferStart(out_buffer);
5838
5839 ac_elf_read(buffer_data, buffer_size, binary);
5840
5841 /* Clean up */
5842 LLVMDisposeMemoryBuffer(out_buffer);
5843
5844 out:
5845 return retval;
5846 }
5847
5848 static void ac_compile_llvm_module(LLVMTargetMachineRef tm,
5849 LLVMModuleRef llvm_module,
5850 struct ac_shader_binary *binary,
5851 struct ac_shader_config *config,
5852 struct ac_shader_variant_info *shader_info,
5853 gl_shader_stage stage,
5854 bool dump_shader, bool supports_spill)
5855 {
5856 if (dump_shader)
5857 ac_dump_module(llvm_module);
5858
5859 memset(binary, 0, sizeof(*binary));
5860 int v = ac_llvm_compile(llvm_module, binary, tm);
5861 if (v) {
5862 fprintf(stderr, "compile failed\n");
5863 }
5864
5865 if (dump_shader)
5866 fprintf(stderr, "disasm:\n%s\n", binary->disasm_string);
5867
5868 ac_shader_binary_read_config(binary, config, 0, supports_spill);
5869
5870 LLVMContextRef ctx = LLVMGetModuleContext(llvm_module);
5871 LLVMDisposeModule(llvm_module);
5872 LLVMContextDispose(ctx);
5873
5874 if (stage == MESA_SHADER_FRAGMENT) {
5875 shader_info->num_input_vgprs = 0;
5876 if (G_0286CC_PERSP_SAMPLE_ENA(config->spi_ps_input_addr))
5877 shader_info->num_input_vgprs += 2;
5878 if (G_0286CC_PERSP_CENTER_ENA(config->spi_ps_input_addr))
5879 shader_info->num_input_vgprs += 2;
5880 if (G_0286CC_PERSP_CENTROID_ENA(config->spi_ps_input_addr))
5881 shader_info->num_input_vgprs += 2;
5882 if (G_0286CC_PERSP_PULL_MODEL_ENA(config->spi_ps_input_addr))
5883 shader_info->num_input_vgprs += 3;
5884 if (G_0286CC_LINEAR_SAMPLE_ENA(config->spi_ps_input_addr))
5885 shader_info->num_input_vgprs += 2;
5886 if (G_0286CC_LINEAR_CENTER_ENA(config->spi_ps_input_addr))
5887 shader_info->num_input_vgprs += 2;
5888 if (G_0286CC_LINEAR_CENTROID_ENA(config->spi_ps_input_addr))
5889 shader_info->num_input_vgprs += 2;
5890 if (G_0286CC_LINE_STIPPLE_TEX_ENA(config->spi_ps_input_addr))
5891 shader_info->num_input_vgprs += 1;
5892 if (G_0286CC_POS_X_FLOAT_ENA(config->spi_ps_input_addr))
5893 shader_info->num_input_vgprs += 1;
5894 if (G_0286CC_POS_Y_FLOAT_ENA(config->spi_ps_input_addr))
5895 shader_info->num_input_vgprs += 1;
5896 if (G_0286CC_POS_Z_FLOAT_ENA(config->spi_ps_input_addr))
5897 shader_info->num_input_vgprs += 1;
5898 if (G_0286CC_POS_W_FLOAT_ENA(config->spi_ps_input_addr))
5899 shader_info->num_input_vgprs += 1;
5900 if (G_0286CC_FRONT_FACE_ENA(config->spi_ps_input_addr))
5901 shader_info->num_input_vgprs += 1;
5902 if (G_0286CC_ANCILLARY_ENA(config->spi_ps_input_addr))
5903 shader_info->num_input_vgprs += 1;
5904 if (G_0286CC_SAMPLE_COVERAGE_ENA(config->spi_ps_input_addr))
5905 shader_info->num_input_vgprs += 1;
5906 if (G_0286CC_POS_FIXED_PT_ENA(config->spi_ps_input_addr))
5907 shader_info->num_input_vgprs += 1;
5908 }
5909 config->num_vgprs = MAX2(config->num_vgprs, shader_info->num_input_vgprs);
5910
5911 /* +3 for scratch wave offset and VCC */
5912 config->num_sgprs = MAX2(config->num_sgprs,
5913 shader_info->num_input_sgprs + 3);
5914 }
5915
5916 void ac_compile_nir_shader(LLVMTargetMachineRef tm,
5917 struct ac_shader_binary *binary,
5918 struct ac_shader_config *config,
5919 struct ac_shader_variant_info *shader_info,
5920 struct nir_shader *nir,
5921 const struct ac_nir_compiler_options *options,
5922 bool dump_shader)
5923 {
5924
5925 LLVMModuleRef llvm_module = ac_translate_nir_to_llvm(tm, nir, shader_info,
5926 options);
5927
5928 ac_compile_llvm_module(tm, llvm_module, binary, config, shader_info, nir->stage, dump_shader, options->supports_spill);
5929 switch (nir->stage) {
5930 case MESA_SHADER_COMPUTE:
5931 for (int i = 0; i < 3; ++i)
5932 shader_info->cs.block_size[i] = nir->info->cs.local_size[i];
5933 break;
5934 case MESA_SHADER_FRAGMENT:
5935 shader_info->fs.early_fragment_test = nir->info->fs.early_fragment_tests;
5936 break;
5937 case MESA_SHADER_GEOMETRY:
5938 shader_info->gs.vertices_in = nir->info->gs.vertices_in;
5939 shader_info->gs.vertices_out = nir->info->gs.vertices_out;
5940 shader_info->gs.output_prim = nir->info->gs.output_primitive;
5941 shader_info->gs.invocations = nir->info->gs.invocations;
5942 break;
5943 case MESA_SHADER_TESS_EVAL:
5944 shader_info->tes.primitive_mode = nir->info->tess.primitive_mode;
5945 shader_info->tes.spacing = nir->info->tess.spacing;
5946 shader_info->tes.ccw = nir->info->tess.ccw;
5947 shader_info->tes.point_mode = nir->info->tess.point_mode;
5948 shader_info->tes.as_es = options->key.tes.as_es;
5949 break;
5950 case MESA_SHADER_TESS_CTRL:
5951 shader_info->tcs.tcs_vertices_out = nir->info->tess.tcs_vertices_out;
5952 break;
5953 case MESA_SHADER_VERTEX:
5954 shader_info->vs.as_es = options->key.vs.as_es;
5955 shader_info->vs.as_ls = options->key.vs.as_ls;
5956 /* in LS mode we need at least 1, invocation id needs 3, handled elsewhere */
5957 if (options->key.vs.as_ls)
5958 shader_info->vs.vgpr_comp_cnt = MAX2(1, shader_info->vs.vgpr_comp_cnt);
5959 break;
5960 default:
5961 break;
5962 }
5963 }
5964
5965 static void
5966 ac_gs_copy_shader_emit(struct nir_to_llvm_context *ctx)
5967 {
5968 LLVMValueRef args[9];
5969 args[0] = ctx->gsvs_ring;
5970 args[1] = LLVMBuildMul(ctx->builder, ctx->vertex_id, LLVMConstInt(ctx->i32, 4, false), "");
5971 args[3] = ctx->i32zero;
5972 args[4] = ctx->i32one; /* OFFEN */
5973 args[5] = ctx->i32zero; /* IDXEN */
5974 args[6] = ctx->i32one; /* GLC */
5975 args[7] = ctx->i32one; /* SLC */
5976 args[8] = ctx->i32zero; /* TFE */
5977
5978 int idx = 0;
5979
5980 for (unsigned i = 0; i < RADEON_LLVM_MAX_OUTPUTS; ++i) {
5981 int length = 4;
5982 int slot = idx;
5983 int slot_inc = 1;
5984 if (!(ctx->output_mask & (1ull << i)))
5985 continue;
5986
5987 if (i == VARYING_SLOT_CLIP_DIST0) {
5988 /* unpack clip and cull from a single set of slots */
5989 length = ctx->num_output_clips + ctx->num_output_culls;
5990 if (length > 4)
5991 slot_inc = 2;
5992 }
5993
5994 for (unsigned j = 0; j < length; j++) {
5995 LLVMValueRef value;
5996 args[2] = LLVMConstInt(ctx->i32,
5997 (slot * 4 + j) *
5998 ctx->gs_max_out_vertices * 16 * 4, false);
5999
6000 value = ac_build_intrinsic(&ctx->ac,
6001 "llvm.SI.buffer.load.dword.i32.i32",
6002 ctx->i32, args, 9,
6003 AC_FUNC_ATTR_READONLY |
6004 AC_FUNC_ATTR_LEGACY);
6005
6006 LLVMBuildStore(ctx->builder,
6007 to_float(ctx, value), ctx->outputs[radeon_llvm_reg_index_soa(i, j)]);
6008 }
6009 idx += slot_inc;
6010 }
6011 handle_vs_outputs_post(ctx, &ctx->shader_info->vs.outinfo);
6012 }
6013
6014 void ac_create_gs_copy_shader(LLVMTargetMachineRef tm,
6015 struct nir_shader *geom_shader,
6016 struct ac_shader_binary *binary,
6017 struct ac_shader_config *config,
6018 struct ac_shader_variant_info *shader_info,
6019 const struct ac_nir_compiler_options *options,
6020 bool dump_shader)
6021 {
6022 struct nir_to_llvm_context ctx = {0};
6023 ctx.context = LLVMContextCreate();
6024 ctx.module = LLVMModuleCreateWithNameInContext("shader", ctx.context);
6025 ctx.options = options;
6026 ctx.shader_info = shader_info;
6027
6028 ac_llvm_context_init(&ctx.ac, ctx.context);
6029 ctx.ac.module = ctx.module;
6030
6031 ctx.is_gs_copy_shader = true;
6032 LLVMSetTarget(ctx.module, "amdgcn--");
6033 setup_types(&ctx);
6034
6035 ctx.builder = LLVMCreateBuilderInContext(ctx.context);
6036 ctx.ac.builder = ctx.builder;
6037 ctx.stage = MESA_SHADER_VERTEX;
6038
6039 create_function(&ctx);
6040
6041 ctx.gs_max_out_vertices = geom_shader->info->gs.vertices_out;
6042 ac_setup_rings(&ctx);
6043
6044 ctx.num_output_clips = geom_shader->info->clip_distance_array_size;
6045 ctx.num_output_culls = geom_shader->info->cull_distance_array_size;
6046
6047 nir_foreach_variable(variable, &geom_shader->outputs)
6048 handle_shader_output_decl(&ctx, variable);
6049
6050 ac_gs_copy_shader_emit(&ctx);
6051
6052 LLVMBuildRetVoid(ctx.builder);
6053
6054 ac_llvm_finalize_module(&ctx);
6055
6056 ac_compile_llvm_module(tm, ctx.module, binary, config, shader_info,
6057 MESA_SHADER_VERTEX,
6058 dump_shader, options->supports_spill);
6059 }