radv: only emit descriptor sgprs when needed
[mesa.git] / src / amd / common / ac_nir_to_llvm.c
1 /*
2 * Copyright © 2016 Bas Nieuwenhuizen
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21 * IN THE SOFTWARE.
22 */
23
24 #include "ac_nir_to_llvm.h"
25 #include "ac_llvm_util.h"
26 #include "ac_binary.h"
27 #include "sid.h"
28 #include "nir/nir.h"
29 #include "../vulkan/radv_descriptor_set.h"
30 #include "util/bitscan.h"
31 #include <llvm-c/Transforms/Scalar.h>
32
33 enum radeon_llvm_calling_convention {
34 RADEON_LLVM_AMDGPU_VS = 87,
35 RADEON_LLVM_AMDGPU_GS = 88,
36 RADEON_LLVM_AMDGPU_PS = 89,
37 RADEON_LLVM_AMDGPU_CS = 90,
38 };
39
40 #define CONST_ADDR_SPACE 2
41 #define LOCAL_ADDR_SPACE 3
42
43 #define RADEON_LLVM_MAX_INPUTS (VARYING_SLOT_VAR31 + 1)
44 #define RADEON_LLVM_MAX_OUTPUTS (VARYING_SLOT_VAR31 + 1)
45
46 enum desc_type {
47 DESC_IMAGE,
48 DESC_FMASK,
49 DESC_SAMPLER,
50 DESC_BUFFER,
51 };
52
53 struct nir_to_llvm_context {
54 const struct ac_nir_compiler_options *options;
55 struct ac_shader_variant_info *shader_info;
56
57 LLVMContextRef context;
58 LLVMModuleRef module;
59 LLVMBuilderRef builder;
60 LLVMValueRef main_function;
61
62 struct hash_table *defs;
63 struct hash_table *phis;
64
65 LLVMValueRef descriptor_sets[AC_UD_MAX_SETS];
66 LLVMValueRef push_constants;
67 LLVMValueRef num_work_groups;
68 LLVMValueRef workgroup_ids;
69 LLVMValueRef local_invocation_ids;
70 LLVMValueRef tg_size;
71
72 LLVMValueRef vertex_buffers;
73 LLVMValueRef base_vertex;
74 LLVMValueRef start_instance;
75 LLVMValueRef vertex_id;
76 LLVMValueRef rel_auto_id;
77 LLVMValueRef vs_prim_id;
78 LLVMValueRef instance_id;
79
80 LLVMValueRef prim_mask;
81 LLVMValueRef sample_positions;
82 LLVMValueRef persp_sample, persp_center, persp_centroid;
83 LLVMValueRef linear_sample, linear_center, linear_centroid;
84 LLVMValueRef front_face;
85 LLVMValueRef ancillary;
86 LLVMValueRef frag_pos[4];
87
88 LLVMBasicBlockRef continue_block;
89 LLVMBasicBlockRef break_block;
90
91 LLVMTypeRef i1;
92 LLVMTypeRef i8;
93 LLVMTypeRef i16;
94 LLVMTypeRef i32;
95 LLVMTypeRef i64;
96 LLVMTypeRef v2i32;
97 LLVMTypeRef v3i32;
98 LLVMTypeRef v4i32;
99 LLVMTypeRef v8i32;
100 LLVMTypeRef f32;
101 LLVMTypeRef f16;
102 LLVMTypeRef v2f32;
103 LLVMTypeRef v4f32;
104 LLVMTypeRef v16i8;
105 LLVMTypeRef voidt;
106
107 LLVMValueRef i32zero;
108 LLVMValueRef i32one;
109 LLVMValueRef f32zero;
110 LLVMValueRef f32one;
111 LLVMValueRef v4f32empty;
112
113 unsigned range_md_kind;
114 unsigned uniform_md_kind;
115 unsigned fpmath_md_kind;
116 unsigned invariant_load_md_kind;
117 LLVMValueRef empty_md;
118 LLVMValueRef fpmath_md_2p5_ulp;
119 gl_shader_stage stage;
120
121 LLVMValueRef lds;
122 LLVMValueRef inputs[RADEON_LLVM_MAX_INPUTS * 4];
123 LLVMValueRef outputs[RADEON_LLVM_MAX_OUTPUTS * 4];
124
125 LLVMValueRef shared_memory;
126 uint64_t input_mask;
127 uint64_t output_mask;
128 int num_locals;
129 LLVMValueRef *locals;
130 bool has_ddxy;
131 unsigned num_clips;
132 unsigned num_culls;
133
134 bool has_ds_bpermute;
135 };
136
137 struct ac_tex_info {
138 LLVMValueRef args[12];
139 int arg_count;
140 LLVMTypeRef dst_type;
141 bool has_offset;
142 };
143
144 enum ac_func_attr {
145 AC_FUNC_ATTR_ALWAYSINLINE = (1 << 0),
146 AC_FUNC_ATTR_BYVAL = (1 << 1),
147 AC_FUNC_ATTR_INREG = (1 << 2),
148 AC_FUNC_ATTR_NOALIAS = (1 << 3),
149 AC_FUNC_ATTR_NOUNWIND = (1 << 4),
150 AC_FUNC_ATTR_READNONE = (1 << 5),
151 AC_FUNC_ATTR_READONLY = (1 << 6),
152 AC_FUNC_ATTR_LAST = (1 << 7)
153 };
154
155 #if HAVE_LLVM < 0x0400
156 static LLVMAttribute ac_attr_to_llvm_attr(enum ac_func_attr attr)
157 {
158 switch (attr) {
159 case AC_FUNC_ATTR_ALWAYSINLINE: return LLVMAlwaysInlineAttribute;
160 case AC_FUNC_ATTR_BYVAL: return LLVMByValAttribute;
161 case AC_FUNC_ATTR_INREG: return LLVMInRegAttribute;
162 case AC_FUNC_ATTR_NOALIAS: return LLVMNoAliasAttribute;
163 case AC_FUNC_ATTR_NOUNWIND: return LLVMNoUnwindAttribute;
164 case AC_FUNC_ATTR_READNONE: return LLVMReadNoneAttribute;
165 case AC_FUNC_ATTR_READONLY: return LLVMReadOnlyAttribute;
166 default:
167 fprintf(stderr, "Unhandled function attribute: %x\n", attr);
168 return 0;
169 }
170 }
171
172 #else
173
174 static const char *attr_to_str(enum ac_func_attr attr)
175 {
176 switch (attr) {
177 case AC_FUNC_ATTR_ALWAYSINLINE: return "alwaysinline";
178 case AC_FUNC_ATTR_BYVAL: return "byval";
179 case AC_FUNC_ATTR_INREG: return "inreg";
180 case AC_FUNC_ATTR_NOALIAS: return "noalias";
181 case AC_FUNC_ATTR_NOUNWIND: return "nounwind";
182 case AC_FUNC_ATTR_READNONE: return "readnone";
183 case AC_FUNC_ATTR_READONLY: return "readonly";
184 default:
185 fprintf(stderr, "Unhandled function attribute: %x\n", attr);
186 return 0;
187 }
188 }
189
190 #endif
191
192 static void
193 ac_add_function_attr(LLVMValueRef function,
194 int attr_idx,
195 enum ac_func_attr attr)
196 {
197
198 #if HAVE_LLVM < 0x0400
199 LLVMAttribute llvm_attr = ac_attr_to_llvm_attr(attr);
200 if (attr_idx == -1) {
201 LLVMAddFunctionAttr(function, llvm_attr);
202 } else {
203 LLVMAddAttribute(LLVMGetParam(function, attr_idx - 1), llvm_attr);
204 }
205 #else
206 LLVMContextRef context = LLVMGetModuleContext(LLVMGetGlobalParent(function));
207 const char *attr_name = attr_to_str(attr);
208 unsigned kind_id = LLVMGetEnumAttributeKindForName(attr_name,
209 strlen(attr_name));
210 LLVMAttributeRef llvm_attr = LLVMCreateEnumAttribute(context, kind_id, 0);
211 LLVMAddAttributeAtIndex(function, attr_idx, llvm_attr);
212 #endif
213 }
214
215 static LLVMValueRef
216 emit_llvm_intrinsic(struct nir_to_llvm_context *ctx, const char *name,
217 LLVMTypeRef return_type, LLVMValueRef *params,
218 unsigned param_count, unsigned attr_mask);
219 static LLVMValueRef get_sampler_desc(struct nir_to_llvm_context *ctx,
220 nir_deref_var *deref,
221 enum desc_type desc_type);
222 static unsigned radeon_llvm_reg_index_soa(unsigned index, unsigned chan)
223 {
224 return (index * 4) + chan;
225 }
226
227 static unsigned llvm_get_type_size(LLVMTypeRef type)
228 {
229 LLVMTypeKind kind = LLVMGetTypeKind(type);
230
231 switch (kind) {
232 case LLVMIntegerTypeKind:
233 return LLVMGetIntTypeWidth(type) / 8;
234 case LLVMFloatTypeKind:
235 return 4;
236 case LLVMPointerTypeKind:
237 return 8;
238 case LLVMVectorTypeKind:
239 return LLVMGetVectorSize(type) *
240 llvm_get_type_size(LLVMGetElementType(type));
241 default:
242 assert(0);
243 return 0;
244 }
245 }
246
247 static void set_llvm_calling_convention(LLVMValueRef func,
248 gl_shader_stage stage)
249 {
250 enum radeon_llvm_calling_convention calling_conv;
251
252 switch (stage) {
253 case MESA_SHADER_VERTEX:
254 case MESA_SHADER_TESS_CTRL:
255 case MESA_SHADER_TESS_EVAL:
256 calling_conv = RADEON_LLVM_AMDGPU_VS;
257 break;
258 case MESA_SHADER_GEOMETRY:
259 calling_conv = RADEON_LLVM_AMDGPU_GS;
260 break;
261 case MESA_SHADER_FRAGMENT:
262 calling_conv = RADEON_LLVM_AMDGPU_PS;
263 break;
264 case MESA_SHADER_COMPUTE:
265 calling_conv = RADEON_LLVM_AMDGPU_CS;
266 break;
267 default:
268 unreachable("Unhandle shader type");
269 }
270
271 LLVMSetFunctionCallConv(func, calling_conv);
272 }
273
274 static LLVMValueRef
275 create_llvm_function(LLVMContextRef ctx, LLVMModuleRef module,
276 LLVMBuilderRef builder, LLVMTypeRef *return_types,
277 unsigned num_return_elems, LLVMTypeRef *param_types,
278 unsigned param_count, unsigned array_params,
279 unsigned sgpr_params, bool unsafe_math)
280 {
281 LLVMTypeRef main_function_type, ret_type;
282 LLVMBasicBlockRef main_function_body;
283
284 if (num_return_elems)
285 ret_type = LLVMStructTypeInContext(ctx, return_types,
286 num_return_elems, true);
287 else
288 ret_type = LLVMVoidTypeInContext(ctx);
289
290 /* Setup the function */
291 main_function_type =
292 LLVMFunctionType(ret_type, param_types, param_count, 0);
293 LLVMValueRef main_function =
294 LLVMAddFunction(module, "main", main_function_type);
295 main_function_body =
296 LLVMAppendBasicBlockInContext(ctx, main_function, "main_body");
297 LLVMPositionBuilderAtEnd(builder, main_function_body);
298
299 LLVMSetFunctionCallConv(main_function, RADEON_LLVM_AMDGPU_CS);
300 for (unsigned i = 0; i < sgpr_params; ++i) {
301 if (i < array_params) {
302 LLVMValueRef P = LLVMGetParam(main_function, i);
303 ac_add_function_attr(main_function, i + 1, AC_FUNC_ATTR_BYVAL);
304 ac_add_attr_dereferenceable(P, UINT64_MAX);
305 }
306 else {
307 ac_add_function_attr(main_function, i + 1, AC_FUNC_ATTR_INREG);
308 }
309 }
310
311 if (unsafe_math) {
312 /* These were copied from some LLVM test. */
313 LLVMAddTargetDependentFunctionAttr(main_function,
314 "less-precise-fpmad",
315 "true");
316 LLVMAddTargetDependentFunctionAttr(main_function,
317 "no-infs-fp-math",
318 "true");
319 LLVMAddTargetDependentFunctionAttr(main_function,
320 "no-nans-fp-math",
321 "true");
322 LLVMAddTargetDependentFunctionAttr(main_function,
323 "unsafe-fp-math",
324 "true");
325 }
326 return main_function;
327 }
328
329 static LLVMTypeRef const_array(LLVMTypeRef elem_type, int num_elements)
330 {
331 return LLVMPointerType(LLVMArrayType(elem_type, num_elements),
332 CONST_ADDR_SPACE);
333 }
334
335 static LLVMValueRef get_shared_memory_ptr(struct nir_to_llvm_context *ctx,
336 int idx,
337 LLVMTypeRef type)
338 {
339 LLVMValueRef offset;
340 LLVMValueRef ptr;
341 int addr_space;
342
343 offset = LLVMConstInt(ctx->i32, idx, false);
344
345 ptr = ctx->shared_memory;
346 ptr = LLVMBuildGEP(ctx->builder, ptr, &offset, 1, "");
347 addr_space = LLVMGetPointerAddressSpace(LLVMTypeOf(ptr));
348 ptr = LLVMBuildBitCast(ctx->builder, ptr, LLVMPointerType(type, addr_space), "");
349 return ptr;
350 }
351
352 static LLVMValueRef to_integer(struct nir_to_llvm_context *ctx, LLVMValueRef v)
353 {
354 LLVMTypeRef type = LLVMTypeOf(v);
355 if (type == ctx->f32) {
356 return LLVMBuildBitCast(ctx->builder, v, ctx->i32, "");
357 } else if (LLVMGetTypeKind(type) == LLVMVectorTypeKind) {
358 LLVMTypeRef elem_type = LLVMGetElementType(type);
359 if (elem_type == ctx->f32) {
360 LLVMTypeRef nt = LLVMVectorType(ctx->i32, LLVMGetVectorSize(type));
361 return LLVMBuildBitCast(ctx->builder, v, nt, "");
362 }
363 }
364 return v;
365 }
366
367 static LLVMValueRef to_float(struct nir_to_llvm_context *ctx, LLVMValueRef v)
368 {
369 LLVMTypeRef type = LLVMTypeOf(v);
370 if (type == ctx->i32) {
371 return LLVMBuildBitCast(ctx->builder, v, ctx->f32, "");
372 } else if (LLVMGetTypeKind(type) == LLVMVectorTypeKind) {
373 LLVMTypeRef elem_type = LLVMGetElementType(type);
374 if (elem_type == ctx->i32) {
375 LLVMTypeRef nt = LLVMVectorType(ctx->f32, LLVMGetVectorSize(type));
376 return LLVMBuildBitCast(ctx->builder, v, nt, "");
377 }
378 }
379 return v;
380 }
381
382 static LLVMValueRef unpack_param(struct nir_to_llvm_context *ctx,
383 LLVMValueRef param, unsigned rshift,
384 unsigned bitwidth)
385 {
386 LLVMValueRef value = param;
387 if (rshift)
388 value = LLVMBuildLShr(ctx->builder, value,
389 LLVMConstInt(ctx->i32, rshift, false), "");
390
391 if (rshift + bitwidth < 32) {
392 unsigned mask = (1 << bitwidth) - 1;
393 value = LLVMBuildAnd(ctx->builder, value,
394 LLVMConstInt(ctx->i32, mask, false), "");
395 }
396 return value;
397 }
398
399 static LLVMValueRef build_gep0(struct nir_to_llvm_context *ctx,
400 LLVMValueRef base_ptr, LLVMValueRef index)
401 {
402 LLVMValueRef indices[2] = {
403 ctx->i32zero,
404 index,
405 };
406 return LLVMBuildGEP(ctx->builder, base_ptr,
407 indices, 2, "");
408 }
409
410 static LLVMValueRef build_indexed_load(struct nir_to_llvm_context *ctx,
411 LLVMValueRef base_ptr, LLVMValueRef index,
412 bool uniform)
413 {
414 LLVMValueRef pointer;
415 pointer = build_gep0(ctx, base_ptr, index);
416 if (uniform)
417 LLVMSetMetadata(pointer, ctx->uniform_md_kind, ctx->empty_md);
418 return LLVMBuildLoad(ctx->builder, pointer, "");
419 }
420
421 static LLVMValueRef build_indexed_load_const(struct nir_to_llvm_context *ctx,
422 LLVMValueRef base_ptr, LLVMValueRef index)
423 {
424 LLVMValueRef result = build_indexed_load(ctx, base_ptr, index, true);
425 LLVMSetMetadata(result, ctx->invariant_load_md_kind, ctx->empty_md);
426 return result;
427 }
428
429 static void set_userdata_location(struct ac_userdata_info *ud_info, uint8_t sgpr_idx, uint8_t num_sgprs)
430 {
431 ud_info->sgpr_idx = sgpr_idx;
432 ud_info->num_sgprs = num_sgprs;
433 ud_info->indirect = false;
434 ud_info->indirect_offset = 0;
435 }
436
437 static void set_userdata_location_shader(struct nir_to_llvm_context *ctx,
438 int idx, uint8_t sgpr_idx, uint8_t num_sgprs)
439 {
440 set_userdata_location(&ctx->shader_info->user_sgprs_locs.shader_data[idx], sgpr_idx, num_sgprs);
441 }
442
443 #if 0
444 static void set_userdata_location_indirect(struct ac_userdata_info *ud_info, uint8_t sgpr_idx, uint8_t num_sgprs,
445 uint32_t indirect_offset)
446 {
447 ud_info->sgpr_idx = sgpr_idx;
448 ud_info->num_sgprs = num_sgprs;
449 ud_info->indirect = true;
450 ud_info->indirect_offset = indirect_offset;
451 }
452 #endif
453
454 static void create_function(struct nir_to_llvm_context *ctx,
455 struct nir_shader *nir)
456 {
457 LLVMTypeRef arg_types[23];
458 unsigned arg_idx = 0;
459 unsigned array_count = 0;
460 unsigned sgpr_count = 0, user_sgpr_count;
461 unsigned i;
462 unsigned num_sets = ctx->options->layout ? ctx->options->layout->num_sets : 0;
463 unsigned user_sgpr_idx;
464
465 /* 1 for each descriptor set */
466 for (unsigned i = 0; i < num_sets; ++i) {
467 if (ctx->options->layout->set[i].layout->shader_stages & (1 << ctx->stage)) {
468 arg_types[arg_idx++] = const_array(ctx->i8, 1024 * 1024);
469 }
470 }
471
472 /* 1 for push constants and dynamic descriptors */
473 arg_types[arg_idx++] = const_array(ctx->i8, 1024 * 1024);
474
475 array_count = arg_idx;
476 switch (nir->stage) {
477 case MESA_SHADER_COMPUTE:
478 arg_types[arg_idx++] = LLVMVectorType(ctx->i32, 3); /* grid size */
479 user_sgpr_count = arg_idx;
480 arg_types[arg_idx++] = LLVMVectorType(ctx->i32, 3);
481 arg_types[arg_idx++] = ctx->i32;
482 sgpr_count = arg_idx;
483
484 arg_types[arg_idx++] = LLVMVectorType(ctx->i32, 3);
485 break;
486 case MESA_SHADER_VERTEX:
487 arg_types[arg_idx++] = const_array(ctx->v16i8, 16); /* vertex buffers */
488 arg_types[arg_idx++] = ctx->i32; // base vertex
489 arg_types[arg_idx++] = ctx->i32; // start instance
490 user_sgpr_count = sgpr_count = arg_idx;
491 arg_types[arg_idx++] = ctx->i32; // vertex id
492 arg_types[arg_idx++] = ctx->i32; // rel auto id
493 arg_types[arg_idx++] = ctx->i32; // vs prim id
494 arg_types[arg_idx++] = ctx->i32; // instance id
495 break;
496 case MESA_SHADER_FRAGMENT:
497 arg_types[arg_idx++] = const_array(ctx->f32, 32); /* sample positions */
498 user_sgpr_count = arg_idx;
499 arg_types[arg_idx++] = ctx->i32; /* prim mask */
500 sgpr_count = arg_idx;
501 arg_types[arg_idx++] = ctx->v2i32; /* persp sample */
502 arg_types[arg_idx++] = ctx->v2i32; /* persp center */
503 arg_types[arg_idx++] = ctx->v2i32; /* persp centroid */
504 arg_types[arg_idx++] = ctx->v3i32; /* persp pull model */
505 arg_types[arg_idx++] = ctx->v2i32; /* linear sample */
506 arg_types[arg_idx++] = ctx->v2i32; /* linear center */
507 arg_types[arg_idx++] = ctx->v2i32; /* linear centroid */
508 arg_types[arg_idx++] = ctx->f32; /* line stipple tex */
509 arg_types[arg_idx++] = ctx->f32; /* pos x float */
510 arg_types[arg_idx++] = ctx->f32; /* pos y float */
511 arg_types[arg_idx++] = ctx->f32; /* pos z float */
512 arg_types[arg_idx++] = ctx->f32; /* pos w float */
513 arg_types[arg_idx++] = ctx->i32; /* front face */
514 arg_types[arg_idx++] = ctx->i32; /* ancillary */
515 arg_types[arg_idx++] = ctx->f32; /* sample coverage */
516 arg_types[arg_idx++] = ctx->i32; /* fixed pt */
517 break;
518 default:
519 unreachable("Shader stage not implemented");
520 }
521
522 ctx->main_function = create_llvm_function(
523 ctx->context, ctx->module, ctx->builder, NULL, 0, arg_types,
524 arg_idx, array_count, sgpr_count, ctx->options->unsafe_math);
525 set_llvm_calling_convention(ctx->main_function, nir->stage);
526
527
528 ctx->shader_info->num_input_sgprs = 0;
529 ctx->shader_info->num_input_vgprs = 0;
530
531 for (i = 0; i < user_sgpr_count; i++)
532 ctx->shader_info->num_user_sgprs += llvm_get_type_size(arg_types[i]) / 4;
533
534 ctx->shader_info->num_input_sgprs = ctx->shader_info->num_user_sgprs;
535 for (; i < sgpr_count; i++)
536 ctx->shader_info->num_input_sgprs += llvm_get_type_size(arg_types[i]) / 4;
537
538 if (nir->stage != MESA_SHADER_FRAGMENT)
539 for (; i < arg_idx; ++i)
540 ctx->shader_info->num_input_vgprs += llvm_get_type_size(arg_types[i]) / 4;
541
542 arg_idx = 0;
543 user_sgpr_idx = 0;
544 for (unsigned i = 0; i < num_sets; ++i) {
545 if (ctx->options->layout->set[i].layout->shader_stages & (1 << ctx->stage)) {
546 set_userdata_location(&ctx->shader_info->user_sgprs_locs.descriptor_sets[i], user_sgpr_idx, 2);
547 user_sgpr_idx += 2;
548 ctx->descriptor_sets[i] =
549 LLVMGetParam(ctx->main_function, arg_idx++);
550 } else
551 ctx->descriptor_sets[i] = NULL;
552 }
553
554 ctx->push_constants = LLVMGetParam(ctx->main_function, arg_idx++);
555 set_userdata_location_shader(ctx, AC_UD_PUSH_CONSTANTS, user_sgpr_idx, 2);
556 user_sgpr_idx += 2;
557
558 switch (nir->stage) {
559 case MESA_SHADER_COMPUTE:
560 set_userdata_location_shader(ctx, AC_UD_CS_GRID_SIZE, user_sgpr_idx, 3);
561 user_sgpr_idx += 3;
562 ctx->num_work_groups =
563 LLVMGetParam(ctx->main_function, arg_idx++);
564 ctx->workgroup_ids =
565 LLVMGetParam(ctx->main_function, arg_idx++);
566 ctx->tg_size =
567 LLVMGetParam(ctx->main_function, arg_idx++);
568 ctx->local_invocation_ids =
569 LLVMGetParam(ctx->main_function, arg_idx++);
570 break;
571 case MESA_SHADER_VERTEX:
572 set_userdata_location_shader(ctx, AC_UD_VS_VERTEX_BUFFERS, user_sgpr_idx, 2);
573 user_sgpr_idx += 2;
574 ctx->vertex_buffers = LLVMGetParam(ctx->main_function, arg_idx++);
575 set_userdata_location_shader(ctx, AC_UD_VS_BASE_VERTEX_START_INSTANCE, user_sgpr_idx, 2);
576 user_sgpr_idx += 2;
577 ctx->base_vertex = LLVMGetParam(ctx->main_function, arg_idx++);
578 ctx->start_instance = LLVMGetParam(ctx->main_function, arg_idx++);
579 ctx->vertex_id = LLVMGetParam(ctx->main_function, arg_idx++);
580 ctx->rel_auto_id = LLVMGetParam(ctx->main_function, arg_idx++);
581 ctx->vs_prim_id = LLVMGetParam(ctx->main_function, arg_idx++);
582 ctx->instance_id = LLVMGetParam(ctx->main_function, arg_idx++);
583 break;
584 case MESA_SHADER_FRAGMENT:
585 set_userdata_location_shader(ctx, AC_UD_PS_SAMPLE_POS, user_sgpr_idx, 2);
586 user_sgpr_idx += 2;
587 ctx->sample_positions = LLVMGetParam(ctx->main_function, arg_idx++);
588 ctx->prim_mask = LLVMGetParam(ctx->main_function, arg_idx++);
589 ctx->persp_sample = LLVMGetParam(ctx->main_function, arg_idx++);
590 ctx->persp_center = LLVMGetParam(ctx->main_function, arg_idx++);
591 ctx->persp_centroid = LLVMGetParam(ctx->main_function, arg_idx++);
592 arg_idx++;
593 ctx->linear_sample = LLVMGetParam(ctx->main_function, arg_idx++);
594 ctx->linear_center = LLVMGetParam(ctx->main_function, arg_idx++);
595 ctx->linear_centroid = LLVMGetParam(ctx->main_function, arg_idx++);
596 arg_idx++; /* line stipple */
597 ctx->frag_pos[0] = LLVMGetParam(ctx->main_function, arg_idx++);
598 ctx->frag_pos[1] = LLVMGetParam(ctx->main_function, arg_idx++);
599 ctx->frag_pos[2] = LLVMGetParam(ctx->main_function, arg_idx++);
600 ctx->frag_pos[3] = LLVMGetParam(ctx->main_function, arg_idx++);
601 ctx->front_face = LLVMGetParam(ctx->main_function, arg_idx++);
602 ctx->ancillary = LLVMGetParam(ctx->main_function, arg_idx++);
603 break;
604 default:
605 unreachable("Shader stage not implemented");
606 }
607 }
608
609 static void setup_types(struct nir_to_llvm_context *ctx)
610 {
611 LLVMValueRef args[4];
612
613 ctx->voidt = LLVMVoidTypeInContext(ctx->context);
614 ctx->i1 = LLVMIntTypeInContext(ctx->context, 1);
615 ctx->i8 = LLVMIntTypeInContext(ctx->context, 8);
616 ctx->i16 = LLVMIntTypeInContext(ctx->context, 16);
617 ctx->i32 = LLVMIntTypeInContext(ctx->context, 32);
618 ctx->i64 = LLVMIntTypeInContext(ctx->context, 64);
619 ctx->v2i32 = LLVMVectorType(ctx->i32, 2);
620 ctx->v3i32 = LLVMVectorType(ctx->i32, 3);
621 ctx->v4i32 = LLVMVectorType(ctx->i32, 4);
622 ctx->v8i32 = LLVMVectorType(ctx->i32, 8);
623 ctx->f32 = LLVMFloatTypeInContext(ctx->context);
624 ctx->f16 = LLVMHalfTypeInContext(ctx->context);
625 ctx->v2f32 = LLVMVectorType(ctx->f32, 2);
626 ctx->v4f32 = LLVMVectorType(ctx->f32, 4);
627 ctx->v16i8 = LLVMVectorType(ctx->i8, 16);
628
629 ctx->i32zero = LLVMConstInt(ctx->i32, 0, false);
630 ctx->i32one = LLVMConstInt(ctx->i32, 1, false);
631 ctx->f32zero = LLVMConstReal(ctx->f32, 0.0);
632 ctx->f32one = LLVMConstReal(ctx->f32, 1.0);
633
634 args[0] = ctx->f32zero;
635 args[1] = ctx->f32zero;
636 args[2] = ctx->f32zero;
637 args[3] = ctx->f32one;
638 ctx->v4f32empty = LLVMConstVector(args, 4);
639
640 ctx->range_md_kind = LLVMGetMDKindIDInContext(ctx->context,
641 "range", 5);
642 ctx->invariant_load_md_kind = LLVMGetMDKindIDInContext(ctx->context,
643 "invariant.load", 14);
644 ctx->uniform_md_kind =
645 LLVMGetMDKindIDInContext(ctx->context, "amdgpu.uniform", 14);
646 ctx->empty_md = LLVMMDNodeInContext(ctx->context, NULL, 0);
647
648 ctx->fpmath_md_kind = LLVMGetMDKindIDInContext(ctx->context, "fpmath", 6);
649
650 args[0] = LLVMConstReal(ctx->f32, 2.5);
651 ctx->fpmath_md_2p5_ulp = LLVMMDNodeInContext(ctx->context, args, 1);
652 }
653
654 static int get_llvm_num_components(LLVMValueRef value)
655 {
656 LLVMTypeRef type = LLVMTypeOf(value);
657 unsigned num_components = LLVMGetTypeKind(type) == LLVMVectorTypeKind
658 ? LLVMGetVectorSize(type)
659 : 1;
660 return num_components;
661 }
662
663 static LLVMValueRef llvm_extract_elem(struct nir_to_llvm_context *ctx,
664 LLVMValueRef value,
665 int index)
666 {
667 int count = get_llvm_num_components(value);
668
669 assert(index < count);
670 if (count == 1)
671 return value;
672
673 return LLVMBuildExtractElement(ctx->builder, value,
674 LLVMConstInt(ctx->i32, index, false), "");
675 }
676
677 static LLVMValueRef trim_vector(struct nir_to_llvm_context *ctx,
678 LLVMValueRef value, unsigned count)
679 {
680 unsigned num_components = get_llvm_num_components(value);
681 if (count == num_components)
682 return value;
683
684 LLVMValueRef masks[] = {
685 LLVMConstInt(ctx->i32, 0, false), LLVMConstInt(ctx->i32, 1, false),
686 LLVMConstInt(ctx->i32, 2, false), LLVMConstInt(ctx->i32, 3, false)};
687
688 if (count == 1)
689 return LLVMBuildExtractElement(ctx->builder, value, masks[0],
690 "");
691
692 LLVMValueRef swizzle = LLVMConstVector(masks, count);
693 return LLVMBuildShuffleVector(ctx->builder, value, value, swizzle, "");
694 }
695
696 static LLVMValueRef
697 build_gather_values_extended(struct nir_to_llvm_context *ctx,
698 LLVMValueRef *values,
699 unsigned value_count,
700 unsigned value_stride,
701 bool load)
702 {
703 LLVMBuilderRef builder = ctx->builder;
704 LLVMValueRef vec;
705 unsigned i;
706
707
708 if (value_count == 1) {
709 if (load)
710 return LLVMBuildLoad(builder, values[0], "");
711 return values[0];
712 }
713
714 for (i = 0; i < value_count; i++) {
715 LLVMValueRef value = values[i * value_stride];
716 if (load)
717 value = LLVMBuildLoad(builder, value, "");
718
719 if (!i)
720 vec = LLVMGetUndef( LLVMVectorType(LLVMTypeOf(value), value_count));
721 LLVMValueRef index = LLVMConstInt(ctx->i32, i, false);
722 vec = LLVMBuildInsertElement(builder, vec, value, index, "");
723 }
724 return vec;
725 }
726
727
728 static void
729 build_store_values_extended(struct nir_to_llvm_context *ctx,
730 LLVMValueRef *values,
731 unsigned value_count,
732 unsigned value_stride,
733 LLVMValueRef vec)
734 {
735 LLVMBuilderRef builder = ctx->builder;
736 unsigned i;
737
738 if (value_count == 1) {
739 LLVMBuildStore(builder, vec, values[0]);
740 return;
741 }
742
743 for (i = 0; i < value_count; i++) {
744 LLVMValueRef ptr = values[i * value_stride];
745 LLVMValueRef index = LLVMConstInt(ctx->i32, i, false);
746 LLVMValueRef value = LLVMBuildExtractElement(builder, vec, index, "");
747 LLVMBuildStore(builder, value, ptr);
748 }
749 }
750
751 static LLVMValueRef
752 build_gather_values(struct nir_to_llvm_context *ctx,
753 LLVMValueRef *values,
754 unsigned value_count)
755 {
756 return build_gather_values_extended(ctx, values, value_count, 1, false);
757 }
758
759 static LLVMTypeRef get_def_type(struct nir_to_llvm_context *ctx,
760 nir_ssa_def *def)
761 {
762 LLVMTypeRef type = LLVMIntTypeInContext(ctx->context, def->bit_size);
763 if (def->num_components > 1) {
764 type = LLVMVectorType(type, def->num_components);
765 }
766 return type;
767 }
768
769 static LLVMValueRef get_src(struct nir_to_llvm_context *ctx, nir_src src)
770 {
771 assert(src.is_ssa);
772 struct hash_entry *entry = _mesa_hash_table_search(ctx->defs, src.ssa);
773 return (LLVMValueRef)entry->data;
774 }
775
776
777 static LLVMBasicBlockRef get_block(struct nir_to_llvm_context *ctx,
778 struct nir_block *b)
779 {
780 struct hash_entry *entry = _mesa_hash_table_search(ctx->defs, b);
781 return (LLVMBasicBlockRef)entry->data;
782 }
783
784 static LLVMValueRef get_alu_src(struct nir_to_llvm_context *ctx,
785 nir_alu_src src,
786 unsigned num_components)
787 {
788 LLVMValueRef value = get_src(ctx, src.src);
789 bool need_swizzle = false;
790
791 assert(value);
792 LLVMTypeRef type = LLVMTypeOf(value);
793 unsigned src_components = LLVMGetTypeKind(type) == LLVMVectorTypeKind
794 ? LLVMGetVectorSize(type)
795 : 1;
796
797 for (unsigned i = 0; i < num_components; ++i) {
798 assert(src.swizzle[i] < src_components);
799 if (src.swizzle[i] != i)
800 need_swizzle = true;
801 }
802
803 if (need_swizzle || num_components != src_components) {
804 LLVMValueRef masks[] = {
805 LLVMConstInt(ctx->i32, src.swizzle[0], false),
806 LLVMConstInt(ctx->i32, src.swizzle[1], false),
807 LLVMConstInt(ctx->i32, src.swizzle[2], false),
808 LLVMConstInt(ctx->i32, src.swizzle[3], false)};
809
810 if (src_components > 1 && num_components == 1) {
811 value = LLVMBuildExtractElement(ctx->builder, value,
812 masks[0], "");
813 } else if (src_components == 1 && num_components > 1) {
814 LLVMValueRef values[] = {value, value, value, value};
815 value = build_gather_values(ctx, values, num_components);
816 } else {
817 LLVMValueRef swizzle = LLVMConstVector(masks, num_components);
818 value = LLVMBuildShuffleVector(ctx->builder, value, value,
819 swizzle, "");
820 }
821 }
822 assert(!src.negate);
823 assert(!src.abs);
824 return value;
825 }
826
827 static LLVMValueRef emit_int_cmp(struct nir_to_llvm_context *ctx,
828 LLVMIntPredicate pred, LLVMValueRef src0,
829 LLVMValueRef src1)
830 {
831 LLVMValueRef result = LLVMBuildICmp(ctx->builder, pred, src0, src1, "");
832 return LLVMBuildSelect(ctx->builder, result,
833 LLVMConstInt(ctx->i32, 0xFFFFFFFF, false),
834 LLVMConstInt(ctx->i32, 0, false), "");
835 }
836
837 static LLVMValueRef emit_float_cmp(struct nir_to_llvm_context *ctx,
838 LLVMRealPredicate pred, LLVMValueRef src0,
839 LLVMValueRef src1)
840 {
841 LLVMValueRef result;
842 src0 = to_float(ctx, src0);
843 src1 = to_float(ctx, src1);
844 result = LLVMBuildFCmp(ctx->builder, pred, src0, src1, "");
845 return LLVMBuildSelect(ctx->builder, result,
846 LLVMConstInt(ctx->i32, 0xFFFFFFFF, false),
847 LLVMConstInt(ctx->i32, 0, false), "");
848 }
849
850 static LLVMValueRef emit_intrin_1f_param(struct nir_to_llvm_context *ctx,
851 const char *intrin,
852 LLVMValueRef src0)
853 {
854 LLVMValueRef params[] = {
855 to_float(ctx, src0),
856 };
857 return emit_llvm_intrinsic(ctx, intrin, ctx->f32, params, 1, AC_FUNC_ATTR_READNONE);
858 }
859
860 static LLVMValueRef emit_intrin_2f_param(struct nir_to_llvm_context *ctx,
861 const char *intrin,
862 LLVMValueRef src0, LLVMValueRef src1)
863 {
864 LLVMValueRef params[] = {
865 to_float(ctx, src0),
866 to_float(ctx, src1),
867 };
868 return emit_llvm_intrinsic(ctx, intrin, ctx->f32, params, 2, AC_FUNC_ATTR_READNONE);
869 }
870
871 static LLVMValueRef emit_intrin_3f_param(struct nir_to_llvm_context *ctx,
872 const char *intrin,
873 LLVMValueRef src0, LLVMValueRef src1, LLVMValueRef src2)
874 {
875 LLVMValueRef params[] = {
876 to_float(ctx, src0),
877 to_float(ctx, src1),
878 to_float(ctx, src2),
879 };
880 return emit_llvm_intrinsic(ctx, intrin, ctx->f32, params, 3, AC_FUNC_ATTR_READNONE);
881 }
882
883 static LLVMValueRef emit_bcsel(struct nir_to_llvm_context *ctx,
884 LLVMValueRef src0, LLVMValueRef src1, LLVMValueRef src2)
885 {
886 LLVMValueRef v = LLVMBuildICmp(ctx->builder, LLVMIntNE, src0,
887 ctx->i32zero, "");
888 return LLVMBuildSelect(ctx->builder, v, src1, src2, "");
889 }
890
891 static LLVMValueRef emit_find_lsb(struct nir_to_llvm_context *ctx,
892 LLVMValueRef src0)
893 {
894 LLVMValueRef params[2] = {
895 src0,
896
897 /* The value of 1 means that ffs(x=0) = undef, so LLVM won't
898 * add special code to check for x=0. The reason is that
899 * the LLVM behavior for x=0 is different from what we
900 * need here.
901 *
902 * The hardware already implements the correct behavior.
903 */
904 LLVMConstInt(ctx->i32, 1, false),
905 };
906 return emit_llvm_intrinsic(ctx, "llvm.cttz.i32", ctx->i32, params, 2, AC_FUNC_ATTR_READNONE);
907 }
908
909 static LLVMValueRef emit_ifind_msb(struct nir_to_llvm_context *ctx,
910 LLVMValueRef src0)
911 {
912 LLVMValueRef msb = emit_llvm_intrinsic(ctx, "llvm.AMDGPU.flbit.i32",
913 ctx->i32, &src0, 1,
914 AC_FUNC_ATTR_READNONE);
915
916 /* The HW returns the last bit index from MSB, but NIR wants
917 * the index from LSB. Invert it by doing "31 - msb". */
918 msb = LLVMBuildSub(ctx->builder, LLVMConstInt(ctx->i32, 31, false),
919 msb, "");
920
921 LLVMValueRef all_ones = LLVMConstInt(ctx->i32, -1, true);
922 LLVMValueRef cond = LLVMBuildOr(ctx->builder,
923 LLVMBuildICmp(ctx->builder, LLVMIntEQ,
924 src0, ctx->i32zero, ""),
925 LLVMBuildICmp(ctx->builder, LLVMIntEQ,
926 src0, all_ones, ""), "");
927
928 return LLVMBuildSelect(ctx->builder, cond, all_ones, msb, "");
929 }
930
931 static LLVMValueRef emit_ufind_msb(struct nir_to_llvm_context *ctx,
932 LLVMValueRef src0)
933 {
934 LLVMValueRef args[2] = {
935 src0,
936 ctx->i32one,
937 };
938 LLVMValueRef msb = emit_llvm_intrinsic(ctx, "llvm.ctlz.i32",
939 ctx->i32, args, ARRAY_SIZE(args),
940 AC_FUNC_ATTR_READNONE);
941
942 /* The HW returns the last bit index from MSB, but NIR wants
943 * the index from LSB. Invert it by doing "31 - msb". */
944 msb = LLVMBuildSub(ctx->builder, LLVMConstInt(ctx->i32, 31, false),
945 msb, "");
946
947 return LLVMBuildSelect(ctx->builder,
948 LLVMBuildICmp(ctx->builder, LLVMIntEQ, src0,
949 ctx->i32zero, ""),
950 LLVMConstInt(ctx->i32, -1, true), msb, "");
951 }
952
953 static LLVMValueRef emit_minmax_int(struct nir_to_llvm_context *ctx,
954 LLVMIntPredicate pred,
955 LLVMValueRef src0, LLVMValueRef src1)
956 {
957 return LLVMBuildSelect(ctx->builder,
958 LLVMBuildICmp(ctx->builder, pred, src0, src1, ""),
959 src0,
960 src1, "");
961
962 }
963 static LLVMValueRef emit_iabs(struct nir_to_llvm_context *ctx,
964 LLVMValueRef src0)
965 {
966 return emit_minmax_int(ctx, LLVMIntSGT, src0,
967 LLVMBuildNeg(ctx->builder, src0, ""));
968 }
969
970 static LLVMValueRef emit_fsign(struct nir_to_llvm_context *ctx,
971 LLVMValueRef src0)
972 {
973 LLVMValueRef cmp, val;
974
975 cmp = LLVMBuildFCmp(ctx->builder, LLVMRealOGT, src0, ctx->f32zero, "");
976 val = LLVMBuildSelect(ctx->builder, cmp, ctx->f32one, src0, "");
977 cmp = LLVMBuildFCmp(ctx->builder, LLVMRealOGE, val, ctx->f32zero, "");
978 val = LLVMBuildSelect(ctx->builder, cmp, val, LLVMConstReal(ctx->f32, -1.0), "");
979 return val;
980 }
981
982 static LLVMValueRef emit_isign(struct nir_to_llvm_context *ctx,
983 LLVMValueRef src0)
984 {
985 LLVMValueRef cmp, val;
986
987 cmp = LLVMBuildICmp(ctx->builder, LLVMIntSGT, src0, ctx->i32zero, "");
988 val = LLVMBuildSelect(ctx->builder, cmp, ctx->i32one, src0, "");
989 cmp = LLVMBuildICmp(ctx->builder, LLVMIntSGE, val, ctx->i32zero, "");
990 val = LLVMBuildSelect(ctx->builder, cmp, val, LLVMConstInt(ctx->i32, -1, true), "");
991 return val;
992 }
993
994 static LLVMValueRef emit_ffract(struct nir_to_llvm_context *ctx,
995 LLVMValueRef src0)
996 {
997 const char *intr = "llvm.floor.f32";
998 LLVMValueRef fsrc0 = to_float(ctx, src0);
999 LLVMValueRef params[] = {
1000 fsrc0,
1001 };
1002 LLVMValueRef floor = emit_llvm_intrinsic(ctx, intr,
1003 ctx->f32, params, 1,
1004 AC_FUNC_ATTR_READNONE);
1005 return LLVMBuildFSub(ctx->builder, fsrc0, floor, "");
1006 }
1007
1008 static LLVMValueRef emit_uint_carry(struct nir_to_llvm_context *ctx,
1009 const char *intrin,
1010 LLVMValueRef src0, LLVMValueRef src1)
1011 {
1012 LLVMTypeRef ret_type;
1013 LLVMTypeRef types[] = { ctx->i32, ctx->i1 };
1014 LLVMValueRef res;
1015 LLVMValueRef params[] = { src0, src1 };
1016 ret_type = LLVMStructTypeInContext(ctx->context, types,
1017 2, true);
1018
1019 res = emit_llvm_intrinsic(ctx, intrin, ret_type,
1020 params, 2, AC_FUNC_ATTR_READNONE);
1021
1022 res = LLVMBuildExtractValue(ctx->builder, res, 1, "");
1023 res = LLVMBuildZExt(ctx->builder, res, ctx->i32, "");
1024 return res;
1025 }
1026
1027 static LLVMValueRef emit_b2f(struct nir_to_llvm_context *ctx,
1028 LLVMValueRef src0)
1029 {
1030 return LLVMBuildAnd(ctx->builder, src0, LLVMBuildBitCast(ctx->builder, LLVMConstReal(ctx->f32, 1.0), ctx->i32, ""), "");
1031 }
1032
1033 static LLVMValueRef emit_umul_high(struct nir_to_llvm_context *ctx,
1034 LLVMValueRef src0, LLVMValueRef src1)
1035 {
1036 LLVMValueRef dst64, result;
1037 src0 = LLVMBuildZExt(ctx->builder, src0, ctx->i64, "");
1038 src1 = LLVMBuildZExt(ctx->builder, src1, ctx->i64, "");
1039
1040 dst64 = LLVMBuildMul(ctx->builder, src0, src1, "");
1041 dst64 = LLVMBuildLShr(ctx->builder, dst64, LLVMConstInt(ctx->i64, 32, false), "");
1042 result = LLVMBuildTrunc(ctx->builder, dst64, ctx->i32, "");
1043 return result;
1044 }
1045
1046 static LLVMValueRef emit_imul_high(struct nir_to_llvm_context *ctx,
1047 LLVMValueRef src0, LLVMValueRef src1)
1048 {
1049 LLVMValueRef dst64, result;
1050 src0 = LLVMBuildSExt(ctx->builder, src0, ctx->i64, "");
1051 src1 = LLVMBuildSExt(ctx->builder, src1, ctx->i64, "");
1052
1053 dst64 = LLVMBuildMul(ctx->builder, src0, src1, "");
1054 dst64 = LLVMBuildAShr(ctx->builder, dst64, LLVMConstInt(ctx->i64, 32, false), "");
1055 result = LLVMBuildTrunc(ctx->builder, dst64, ctx->i32, "");
1056 return result;
1057 }
1058
1059 static LLVMValueRef emit_bitfield_extract(struct nir_to_llvm_context *ctx,
1060 const char *intrin,
1061 LLVMValueRef srcs[3])
1062 {
1063 LLVMValueRef result;
1064 LLVMValueRef icond = LLVMBuildICmp(ctx->builder, LLVMIntEQ, srcs[2], LLVMConstInt(ctx->i32, 32, false), "");
1065 result = emit_llvm_intrinsic(ctx, intrin, ctx->i32, srcs, 3, AC_FUNC_ATTR_READNONE);
1066
1067 result = LLVMBuildSelect(ctx->builder, icond, srcs[0], result, "");
1068 return result;
1069 }
1070
1071 static LLVMValueRef emit_bitfield_insert(struct nir_to_llvm_context *ctx,
1072 LLVMValueRef src0, LLVMValueRef src1,
1073 LLVMValueRef src2, LLVMValueRef src3)
1074 {
1075 LLVMValueRef bfi_args[3], result;
1076
1077 bfi_args[0] = LLVMBuildShl(ctx->builder,
1078 LLVMBuildSub(ctx->builder,
1079 LLVMBuildShl(ctx->builder,
1080 ctx->i32one,
1081 src3, ""),
1082 ctx->i32one, ""),
1083 src2, "");
1084 bfi_args[1] = LLVMBuildShl(ctx->builder, src1, src2, "");
1085 bfi_args[2] = src0;
1086
1087 LLVMValueRef icond = LLVMBuildICmp(ctx->builder, LLVMIntEQ, src3, LLVMConstInt(ctx->i32, 32, false), "");
1088
1089 /* Calculate:
1090 * (arg0 & arg1) | (~arg0 & arg2) = arg2 ^ (arg0 & (arg1 ^ arg2)
1091 * Use the right-hand side, which the LLVM backend can convert to V_BFI.
1092 */
1093 result = LLVMBuildXor(ctx->builder, bfi_args[2],
1094 LLVMBuildAnd(ctx->builder, bfi_args[0],
1095 LLVMBuildXor(ctx->builder, bfi_args[1], bfi_args[2], ""), ""), "");
1096
1097 result = LLVMBuildSelect(ctx->builder, icond, src1, result, "");
1098 return result;
1099 }
1100
1101 static LLVMValueRef emit_pack_half_2x16(struct nir_to_llvm_context *ctx,
1102 LLVMValueRef src0)
1103 {
1104 LLVMValueRef const16 = LLVMConstInt(ctx->i32, 16, false);
1105 int i;
1106 LLVMValueRef comp[2];
1107
1108 src0 = to_float(ctx, src0);
1109 comp[0] = LLVMBuildExtractElement(ctx->builder, src0, ctx->i32zero, "");
1110 comp[1] = LLVMBuildExtractElement(ctx->builder, src0, ctx->i32one, "");
1111 for (i = 0; i < 2; i++) {
1112 comp[i] = LLVMBuildFPTrunc(ctx->builder, comp[i], ctx->f16, "");
1113 comp[i] = LLVMBuildBitCast(ctx->builder, comp[i], ctx->i16, "");
1114 comp[i] = LLVMBuildZExt(ctx->builder, comp[i], ctx->i32, "");
1115 }
1116
1117 comp[1] = LLVMBuildShl(ctx->builder, comp[1], const16, "");
1118 comp[0] = LLVMBuildOr(ctx->builder, comp[0], comp[1], "");
1119
1120 return comp[0];
1121 }
1122
1123 static LLVMValueRef emit_unpack_half_2x16(struct nir_to_llvm_context *ctx,
1124 LLVMValueRef src0)
1125 {
1126 LLVMValueRef const16 = LLVMConstInt(ctx->i32, 16, false);
1127 LLVMValueRef temps[2], result, val;
1128 int i;
1129
1130 for (i = 0; i < 2; i++) {
1131 val = i == 1 ? LLVMBuildLShr(ctx->builder, src0, const16, "") : src0;
1132 val = LLVMBuildTrunc(ctx->builder, val, ctx->i16, "");
1133 val = LLVMBuildBitCast(ctx->builder, val, ctx->f16, "");
1134 temps[i] = LLVMBuildFPExt(ctx->builder, val, ctx->f32, "");
1135 }
1136
1137 result = LLVMBuildInsertElement(ctx->builder, LLVMGetUndef(ctx->v2f32), temps[0],
1138 ctx->i32zero, "");
1139 result = LLVMBuildInsertElement(ctx->builder, result, temps[1],
1140 ctx->i32one, "");
1141 return result;
1142 }
1143
1144 /**
1145 * Set range metadata on an instruction. This can only be used on load and
1146 * call instructions. If you know an instruction can only produce the values
1147 * 0, 1, 2, you would do set_range_metadata(value, 0, 3);
1148 * \p lo is the minimum value inclusive.
1149 * \p hi is the maximum value exclusive.
1150 */
1151 static void set_range_metadata(struct nir_to_llvm_context *ctx,
1152 LLVMValueRef value, unsigned lo, unsigned hi)
1153 {
1154 LLVMValueRef range_md, md_args[2];
1155 LLVMTypeRef type = LLVMTypeOf(value);
1156 LLVMContextRef context = LLVMGetTypeContext(type);
1157
1158 md_args[0] = LLVMConstInt(type, lo, false);
1159 md_args[1] = LLVMConstInt(type, hi, false);
1160 range_md = LLVMMDNodeInContext(context, md_args, 2);
1161 LLVMSetMetadata(value, ctx->range_md_kind, range_md);
1162 }
1163
1164 static LLVMValueRef get_thread_id(struct nir_to_llvm_context *ctx)
1165 {
1166 LLVMValueRef tid;
1167 LLVMValueRef tid_args[2];
1168 tid_args[0] = LLVMConstInt(ctx->i32, 0xffffffff, false);
1169 tid_args[1] = ctx->i32zero;
1170 tid_args[1] = emit_llvm_intrinsic(ctx,
1171 "llvm.amdgcn.mbcnt.lo", ctx->i32,
1172 tid_args, 2, AC_FUNC_ATTR_READNONE);
1173
1174 tid = emit_llvm_intrinsic(ctx,
1175 "llvm.amdgcn.mbcnt.hi", ctx->i32,
1176 tid_args, 2, AC_FUNC_ATTR_READNONE);
1177 set_range_metadata(ctx, tid, 0, 64);
1178 return tid;
1179 }
1180
1181 /*
1182 * SI implements derivatives using the local data store (LDS)
1183 * All writes to the LDS happen in all executing threads at
1184 * the same time. TID is the Thread ID for the current
1185 * thread and is a value between 0 and 63, representing
1186 * the thread's position in the wavefront.
1187 *
1188 * For the pixel shader threads are grouped into quads of four pixels.
1189 * The TIDs of the pixels of a quad are:
1190 *
1191 * +------+------+
1192 * |4n + 0|4n + 1|
1193 * +------+------+
1194 * |4n + 2|4n + 3|
1195 * +------+------+
1196 *
1197 * So, masking the TID with 0xfffffffc yields the TID of the top left pixel
1198 * of the quad, masking with 0xfffffffd yields the TID of the top pixel of
1199 * the current pixel's column, and masking with 0xfffffffe yields the TID
1200 * of the left pixel of the current pixel's row.
1201 *
1202 * Adding 1 yields the TID of the pixel to the right of the left pixel, and
1203 * adding 2 yields the TID of the pixel below the top pixel.
1204 */
1205 /* masks for thread ID. */
1206 #define TID_MASK_TOP_LEFT 0xfffffffc
1207 #define TID_MASK_TOP 0xfffffffd
1208 #define TID_MASK_LEFT 0xfffffffe
1209 static LLVMValueRef emit_ddxy(struct nir_to_llvm_context *ctx,
1210 nir_op op,
1211 LLVMValueRef src0)
1212 {
1213 LLVMValueRef tl, trbl, result;
1214 LLVMValueRef tl_tid, trbl_tid;
1215 LLVMValueRef args[2];
1216 LLVMValueRef thread_id;
1217 unsigned mask;
1218 int idx;
1219 ctx->has_ddxy = true;
1220
1221 if (!ctx->lds && !ctx->has_ds_bpermute)
1222 ctx->lds = LLVMAddGlobalInAddressSpace(ctx->module,
1223 LLVMArrayType(ctx->i32, 64),
1224 "ddxy_lds", LOCAL_ADDR_SPACE);
1225
1226 thread_id = get_thread_id(ctx);
1227 if (op == nir_op_fddx_fine || op == nir_op_fddx)
1228 mask = TID_MASK_LEFT;
1229 else if (op == nir_op_fddy_fine || op == nir_op_fddy)
1230 mask = TID_MASK_TOP;
1231 else
1232 mask = TID_MASK_TOP_LEFT;
1233
1234 tl_tid = LLVMBuildAnd(ctx->builder, thread_id,
1235 LLVMConstInt(ctx->i32, mask, false), "");
1236 /* for DDX we want to next X pixel, DDY next Y pixel. */
1237 if (op == nir_op_fddx_fine ||
1238 op == nir_op_fddx_coarse ||
1239 op == nir_op_fddx)
1240 idx = 1;
1241 else
1242 idx = 2;
1243
1244 trbl_tid = LLVMBuildAdd(ctx->builder, tl_tid,
1245 LLVMConstInt(ctx->i32, idx, false), "");
1246
1247 if (ctx->has_ds_bpermute) {
1248 args[0] = LLVMBuildMul(ctx->builder, tl_tid,
1249 LLVMConstInt(ctx->i32, 4, false), "");
1250 args[1] = src0;
1251 tl = emit_llvm_intrinsic(ctx, "llvm.amdgcn.ds.bpermute",
1252 ctx->i32, args, 2,
1253 AC_FUNC_ATTR_READNONE);
1254
1255 args[0] = LLVMBuildMul(ctx->builder, trbl_tid,
1256 LLVMConstInt(ctx->i32, 4, false), "");
1257 trbl = emit_llvm_intrinsic(ctx, "llvm.amdgcn.ds.bpermute",
1258 ctx->i32, args, 2,
1259 AC_FUNC_ATTR_READNONE);
1260 } else {
1261 LLVMValueRef store_ptr, load_ptr0, load_ptr1;
1262
1263 store_ptr = build_gep0(ctx, ctx->lds, thread_id);
1264 load_ptr0 = build_gep0(ctx, ctx->lds, tl_tid);
1265 load_ptr1 = build_gep0(ctx, ctx->lds, trbl_tid);
1266
1267 LLVMBuildStore(ctx->builder, src0, store_ptr);
1268 tl = LLVMBuildLoad(ctx->builder, load_ptr0, "");
1269 trbl = LLVMBuildLoad(ctx->builder, load_ptr1, "");
1270 }
1271 tl = LLVMBuildBitCast(ctx->builder, tl, ctx->f32, "");
1272 trbl = LLVMBuildBitCast(ctx->builder, trbl, ctx->f32, "");
1273 result = LLVMBuildFSub(ctx->builder, trbl, tl, "");
1274 return result;
1275 }
1276
1277 /*
1278 * this takes an I,J coordinate pair,
1279 * and works out the X and Y derivatives.
1280 * it returns DDX(I), DDX(J), DDY(I), DDY(J).
1281 */
1282 static LLVMValueRef emit_ddxy_interp(
1283 struct nir_to_llvm_context *ctx,
1284 LLVMValueRef interp_ij)
1285 {
1286 LLVMValueRef result[4], a;
1287 unsigned i;
1288
1289 for (i = 0; i < 2; i++) {
1290 a = LLVMBuildExtractElement(ctx->builder, interp_ij,
1291 LLVMConstInt(ctx->i32, i, false), "");
1292 result[i] = emit_ddxy(ctx, nir_op_fddx, a);
1293 result[2+i] = emit_ddxy(ctx, nir_op_fddy, a);
1294 }
1295 return build_gather_values(ctx, result, 4);
1296 }
1297
1298 static LLVMValueRef emit_fdiv(struct nir_to_llvm_context *ctx,
1299 LLVMValueRef num,
1300 LLVMValueRef den)
1301 {
1302 LLVMValueRef ret = LLVMBuildFDiv(ctx->builder, num, den, "");
1303
1304 if (!LLVMIsConstant(ret))
1305 LLVMSetMetadata(ret, ctx->fpmath_md_kind, ctx->fpmath_md_2p5_ulp);
1306 return ret;
1307 }
1308
1309 static void visit_alu(struct nir_to_llvm_context *ctx, nir_alu_instr *instr)
1310 {
1311 LLVMValueRef src[4], result = NULL;
1312 unsigned num_components = instr->dest.dest.ssa.num_components;
1313 unsigned src_components;
1314
1315 assert(nir_op_infos[instr->op].num_inputs <= ARRAY_SIZE(src));
1316 switch (instr->op) {
1317 case nir_op_vec2:
1318 case nir_op_vec3:
1319 case nir_op_vec4:
1320 src_components = 1;
1321 break;
1322 case nir_op_pack_half_2x16:
1323 src_components = 2;
1324 break;
1325 case nir_op_unpack_half_2x16:
1326 src_components = 1;
1327 break;
1328 default:
1329 src_components = num_components;
1330 break;
1331 }
1332 for (unsigned i = 0; i < nir_op_infos[instr->op].num_inputs; i++)
1333 src[i] = get_alu_src(ctx, instr->src[i], src_components);
1334
1335 switch (instr->op) {
1336 case nir_op_fmov:
1337 case nir_op_imov:
1338 result = src[0];
1339 break;
1340 case nir_op_fneg:
1341 src[0] = to_float(ctx, src[0]);
1342 result = LLVMBuildFNeg(ctx->builder, src[0], "");
1343 break;
1344 case nir_op_ineg:
1345 result = LLVMBuildNeg(ctx->builder, src[0], "");
1346 break;
1347 case nir_op_inot:
1348 result = LLVMBuildNot(ctx->builder, src[0], "");
1349 break;
1350 case nir_op_iadd:
1351 result = LLVMBuildAdd(ctx->builder, src[0], src[1], "");
1352 break;
1353 case nir_op_fadd:
1354 src[0] = to_float(ctx, src[0]);
1355 src[1] = to_float(ctx, src[1]);
1356 result = LLVMBuildFAdd(ctx->builder, src[0], src[1], "");
1357 break;
1358 case nir_op_fsub:
1359 src[0] = to_float(ctx, src[0]);
1360 src[1] = to_float(ctx, src[1]);
1361 result = LLVMBuildFSub(ctx->builder, src[0], src[1], "");
1362 break;
1363 case nir_op_isub:
1364 result = LLVMBuildSub(ctx->builder, src[0], src[1], "");
1365 break;
1366 case nir_op_imul:
1367 result = LLVMBuildMul(ctx->builder, src[0], src[1], "");
1368 break;
1369 case nir_op_imod:
1370 result = LLVMBuildSRem(ctx->builder, src[0], src[1], "");
1371 break;
1372 case nir_op_umod:
1373 result = LLVMBuildURem(ctx->builder, src[0], src[1], "");
1374 break;
1375 case nir_op_fmod:
1376 src[0] = to_float(ctx, src[0]);
1377 src[1] = to_float(ctx, src[1]);
1378 result = emit_fdiv(ctx, src[0], src[1]);
1379 result = emit_intrin_1f_param(ctx, "llvm.floor.f32", result);
1380 result = LLVMBuildFMul(ctx->builder, src[1] , result, "");
1381 result = LLVMBuildFSub(ctx->builder, src[0], result, "");
1382 break;
1383 case nir_op_frem:
1384 src[0] = to_float(ctx, src[0]);
1385 src[1] = to_float(ctx, src[1]);
1386 result = LLVMBuildFRem(ctx->builder, src[0], src[1], "");
1387 break;
1388 case nir_op_idiv:
1389 result = LLVMBuildSDiv(ctx->builder, src[0], src[1], "");
1390 break;
1391 case nir_op_udiv:
1392 result = LLVMBuildUDiv(ctx->builder, src[0], src[1], "");
1393 break;
1394 case nir_op_fmul:
1395 src[0] = to_float(ctx, src[0]);
1396 src[1] = to_float(ctx, src[1]);
1397 result = LLVMBuildFMul(ctx->builder, src[0], src[1], "");
1398 break;
1399 case nir_op_fdiv:
1400 src[0] = to_float(ctx, src[0]);
1401 src[1] = to_float(ctx, src[1]);
1402 result = emit_fdiv(ctx, src[0], src[1]);
1403 break;
1404 case nir_op_frcp:
1405 src[0] = to_float(ctx, src[0]);
1406 result = emit_fdiv(ctx, ctx->f32one, src[0]);
1407 break;
1408 case nir_op_iand:
1409 result = LLVMBuildAnd(ctx->builder, src[0], src[1], "");
1410 break;
1411 case nir_op_ior:
1412 result = LLVMBuildOr(ctx->builder, src[0], src[1], "");
1413 break;
1414 case nir_op_ixor:
1415 result = LLVMBuildXor(ctx->builder, src[0], src[1], "");
1416 break;
1417 case nir_op_ishl:
1418 result = LLVMBuildShl(ctx->builder, src[0], src[1], "");
1419 break;
1420 case nir_op_ishr:
1421 result = LLVMBuildAShr(ctx->builder, src[0], src[1], "");
1422 break;
1423 case nir_op_ushr:
1424 result = LLVMBuildLShr(ctx->builder, src[0], src[1], "");
1425 break;
1426 case nir_op_ilt:
1427 result = emit_int_cmp(ctx, LLVMIntSLT, src[0], src[1]);
1428 break;
1429 case nir_op_ine:
1430 result = emit_int_cmp(ctx, LLVMIntNE, src[0], src[1]);
1431 break;
1432 case nir_op_ieq:
1433 result = emit_int_cmp(ctx, LLVMIntEQ, src[0], src[1]);
1434 break;
1435 case nir_op_ige:
1436 result = emit_int_cmp(ctx, LLVMIntSGE, src[0], src[1]);
1437 break;
1438 case nir_op_ult:
1439 result = emit_int_cmp(ctx, LLVMIntULT, src[0], src[1]);
1440 break;
1441 case nir_op_uge:
1442 result = emit_int_cmp(ctx, LLVMIntUGE, src[0], src[1]);
1443 break;
1444 case nir_op_feq:
1445 result = emit_float_cmp(ctx, LLVMRealUEQ, src[0], src[1]);
1446 break;
1447 case nir_op_fne:
1448 result = emit_float_cmp(ctx, LLVMRealUNE, src[0], src[1]);
1449 break;
1450 case nir_op_flt:
1451 result = emit_float_cmp(ctx, LLVMRealULT, src[0], src[1]);
1452 break;
1453 case nir_op_fge:
1454 result = emit_float_cmp(ctx, LLVMRealUGE, src[0], src[1]);
1455 break;
1456 case nir_op_fabs:
1457 result = emit_intrin_1f_param(ctx, "llvm.fabs.f32", src[0]);
1458 break;
1459 case nir_op_iabs:
1460 result = emit_iabs(ctx, src[0]);
1461 break;
1462 case nir_op_imax:
1463 result = emit_minmax_int(ctx, LLVMIntSGT, src[0], src[1]);
1464 break;
1465 case nir_op_imin:
1466 result = emit_minmax_int(ctx, LLVMIntSLT, src[0], src[1]);
1467 break;
1468 case nir_op_umax:
1469 result = emit_minmax_int(ctx, LLVMIntUGT, src[0], src[1]);
1470 break;
1471 case nir_op_umin:
1472 result = emit_minmax_int(ctx, LLVMIntULT, src[0], src[1]);
1473 break;
1474 case nir_op_isign:
1475 result = emit_isign(ctx, src[0]);
1476 break;
1477 case nir_op_fsign:
1478 src[0] = to_float(ctx, src[0]);
1479 result = emit_fsign(ctx, src[0]);
1480 break;
1481 case nir_op_ffloor:
1482 result = emit_intrin_1f_param(ctx, "llvm.floor.f32", src[0]);
1483 break;
1484 case nir_op_ftrunc:
1485 result = emit_intrin_1f_param(ctx, "llvm.trunc.f32", src[0]);
1486 break;
1487 case nir_op_fceil:
1488 result = emit_intrin_1f_param(ctx, "llvm.ceil.f32", src[0]);
1489 break;
1490 case nir_op_fround_even:
1491 result = emit_intrin_1f_param(ctx, "llvm.rint.f32", src[0]);
1492 break;
1493 case nir_op_ffract:
1494 result = emit_ffract(ctx, src[0]);
1495 break;
1496 case nir_op_fsin:
1497 result = emit_intrin_1f_param(ctx, "llvm.sin.f32", src[0]);
1498 break;
1499 case nir_op_fcos:
1500 result = emit_intrin_1f_param(ctx, "llvm.cos.f32", src[0]);
1501 break;
1502 case nir_op_fsqrt:
1503 result = emit_intrin_1f_param(ctx, "llvm.sqrt.f32", src[0]);
1504 break;
1505 case nir_op_fexp2:
1506 result = emit_intrin_1f_param(ctx, "llvm.exp2.f32", src[0]);
1507 break;
1508 case nir_op_flog2:
1509 result = emit_intrin_1f_param(ctx, "llvm.log2.f32", src[0]);
1510 break;
1511 case nir_op_frsq:
1512 result = emit_intrin_1f_param(ctx, "llvm.sqrt.f32", src[0]);
1513 result = emit_fdiv(ctx, ctx->f32one, result);
1514 break;
1515 case nir_op_fpow:
1516 result = emit_intrin_2f_param(ctx, "llvm.pow.f32", src[0], src[1]);
1517 break;
1518 case nir_op_fmax:
1519 result = emit_intrin_2f_param(ctx, "llvm.maxnum.f32", src[0], src[1]);
1520 break;
1521 case nir_op_fmin:
1522 result = emit_intrin_2f_param(ctx, "llvm.minnum.f32", src[0], src[1]);
1523 break;
1524 case nir_op_ffma:
1525 result = emit_intrin_3f_param(ctx, "llvm.fma.f32", src[0], src[1], src[2]);
1526 break;
1527 case nir_op_ibitfield_extract:
1528 result = emit_bitfield_extract(ctx, "llvm.AMDGPU.bfe.i32", src);
1529 break;
1530 case nir_op_ubitfield_extract:
1531 result = emit_bitfield_extract(ctx, "llvm.AMDGPU.bfe.u32", src);
1532 break;
1533 case nir_op_bitfield_insert:
1534 result = emit_bitfield_insert(ctx, src[0], src[1], src[2], src[3]);
1535 break;
1536 case nir_op_bitfield_reverse:
1537 result = emit_llvm_intrinsic(ctx, "llvm.bitreverse.i32", ctx->i32, src, 1, AC_FUNC_ATTR_READNONE);
1538 break;
1539 case nir_op_bit_count:
1540 result = emit_llvm_intrinsic(ctx, "llvm.ctpop.i32", ctx->i32, src, 1, AC_FUNC_ATTR_READNONE);
1541 break;
1542 case nir_op_vec2:
1543 case nir_op_vec3:
1544 case nir_op_vec4:
1545 for (unsigned i = 0; i < nir_op_infos[instr->op].num_inputs; i++)
1546 src[i] = to_integer(ctx, src[i]);
1547 result = build_gather_values(ctx, src, num_components);
1548 break;
1549 case nir_op_f2i:
1550 src[0] = to_float(ctx, src[0]);
1551 result = LLVMBuildFPToSI(ctx->builder, src[0], ctx->i32, "");
1552 break;
1553 case nir_op_f2u:
1554 src[0] = to_float(ctx, src[0]);
1555 result = LLVMBuildFPToUI(ctx->builder, src[0], ctx->i32, "");
1556 break;
1557 case nir_op_i2f:
1558 result = LLVMBuildSIToFP(ctx->builder, src[0], ctx->f32, "");
1559 break;
1560 case nir_op_u2f:
1561 result = LLVMBuildUIToFP(ctx->builder, src[0], ctx->f32, "");
1562 break;
1563 case nir_op_bcsel:
1564 result = emit_bcsel(ctx, src[0], src[1], src[2]);
1565 break;
1566 case nir_op_find_lsb:
1567 result = emit_find_lsb(ctx, src[0]);
1568 break;
1569 case nir_op_ufind_msb:
1570 result = emit_ufind_msb(ctx, src[0]);
1571 break;
1572 case nir_op_ifind_msb:
1573 result = emit_ifind_msb(ctx, src[0]);
1574 break;
1575 case nir_op_uadd_carry:
1576 result = emit_uint_carry(ctx, "llvm.uadd.with.overflow.i32", src[0], src[1]);
1577 break;
1578 case nir_op_usub_borrow:
1579 result = emit_uint_carry(ctx, "llvm.usub.with.overflow.i32", src[0], src[1]);
1580 break;
1581 case nir_op_b2f:
1582 result = emit_b2f(ctx, src[0]);
1583 break;
1584 case nir_op_fquantize2f16:
1585 src[0] = to_float(ctx, src[0]);
1586 result = LLVMBuildFPTrunc(ctx->builder, src[0], ctx->f16, "");
1587 /* need to convert back up to f32 */
1588 result = LLVMBuildFPExt(ctx->builder, result, ctx->f32, "");
1589 break;
1590 case nir_op_umul_high:
1591 result = emit_umul_high(ctx, src[0], src[1]);
1592 break;
1593 case nir_op_imul_high:
1594 result = emit_imul_high(ctx, src[0], src[1]);
1595 break;
1596 case nir_op_pack_half_2x16:
1597 result = emit_pack_half_2x16(ctx, src[0]);
1598 break;
1599 case nir_op_unpack_half_2x16:
1600 result = emit_unpack_half_2x16(ctx, src[0]);
1601 break;
1602 case nir_op_fddx:
1603 case nir_op_fddy:
1604 case nir_op_fddx_fine:
1605 case nir_op_fddy_fine:
1606 case nir_op_fddx_coarse:
1607 case nir_op_fddy_coarse:
1608 result = emit_ddxy(ctx, instr->op, src[0]);
1609 break;
1610 default:
1611 fprintf(stderr, "Unknown NIR alu instr: ");
1612 nir_print_instr(&instr->instr, stderr);
1613 fprintf(stderr, "\n");
1614 abort();
1615 }
1616
1617 if (result) {
1618 assert(instr->dest.dest.is_ssa);
1619 result = to_integer(ctx, result);
1620 _mesa_hash_table_insert(ctx->defs, &instr->dest.dest.ssa,
1621 result);
1622 }
1623 }
1624
1625 static void visit_load_const(struct nir_to_llvm_context *ctx,
1626 nir_load_const_instr *instr)
1627 {
1628 LLVMValueRef values[4], value = NULL;
1629 LLVMTypeRef element_type =
1630 LLVMIntTypeInContext(ctx->context, instr->def.bit_size);
1631
1632 for (unsigned i = 0; i < instr->def.num_components; ++i) {
1633 switch (instr->def.bit_size) {
1634 case 32:
1635 values[i] = LLVMConstInt(element_type,
1636 instr->value.u32[i], false);
1637 break;
1638 case 64:
1639 values[i] = LLVMConstInt(element_type,
1640 instr->value.u64[i], false);
1641 break;
1642 default:
1643 fprintf(stderr,
1644 "unsupported nir load_const bit_size: %d\n",
1645 instr->def.bit_size);
1646 abort();
1647 }
1648 }
1649 if (instr->def.num_components > 1) {
1650 value = LLVMConstVector(values, instr->def.num_components);
1651 } else
1652 value = values[0];
1653
1654 _mesa_hash_table_insert(ctx->defs, &instr->def, value);
1655 }
1656
1657 static LLVMValueRef cast_ptr(struct nir_to_llvm_context *ctx, LLVMValueRef ptr,
1658 LLVMTypeRef type)
1659 {
1660 int addr_space = LLVMGetPointerAddressSpace(LLVMTypeOf(ptr));
1661 return LLVMBuildBitCast(ctx->builder, ptr,
1662 LLVMPointerType(type, addr_space), "");
1663 }
1664
1665 static LLVMValueRef
1666 emit_llvm_intrinsic(struct nir_to_llvm_context *ctx, const char *name,
1667 LLVMTypeRef return_type, LLVMValueRef *params,
1668 unsigned param_count, unsigned attrib_mask)
1669 {
1670 LLVMValueRef function;
1671
1672 function = LLVMGetNamedFunction(ctx->module, name);
1673 if (!function) {
1674 LLVMTypeRef param_types[32], function_type;
1675 unsigned i;
1676
1677 assert(param_count <= 32);
1678
1679 for (i = 0; i < param_count; ++i) {
1680 assert(params[i]);
1681 param_types[i] = LLVMTypeOf(params[i]);
1682 }
1683 function_type =
1684 LLVMFunctionType(return_type, param_types, param_count, 0);
1685 function = LLVMAddFunction(ctx->module, name, function_type);
1686
1687 LLVMSetFunctionCallConv(function, LLVMCCallConv);
1688 LLVMSetLinkage(function, LLVMExternalLinkage);
1689
1690 attrib_mask |= AC_FUNC_ATTR_NOUNWIND;
1691 while (attrib_mask) {
1692 enum ac_func_attr attr = 1u << u_bit_scan(&attrib_mask);
1693 ac_add_function_attr(function, -1, attr);
1694 }
1695 }
1696 return LLVMBuildCall(ctx->builder, function, params, param_count, "");
1697 }
1698
1699 static LLVMValueRef
1700 get_buffer_size(struct nir_to_llvm_context *ctx, LLVMValueRef descriptor, bool in_elements)
1701 {
1702 LLVMValueRef size =
1703 LLVMBuildExtractElement(ctx->builder, descriptor,
1704 LLVMConstInt(ctx->i32, 2, false), "");
1705
1706 /* VI only */
1707 if (ctx->options->chip_class >= VI && in_elements) {
1708 /* On VI, the descriptor contains the size in bytes,
1709 * but TXQ must return the size in elements.
1710 * The stride is always non-zero for resources using TXQ.
1711 */
1712 LLVMValueRef stride =
1713 LLVMBuildExtractElement(ctx->builder, descriptor,
1714 LLVMConstInt(ctx->i32, 1, false), "");
1715 stride = LLVMBuildLShr(ctx->builder, stride,
1716 LLVMConstInt(ctx->i32, 16, false), "");
1717 stride = LLVMBuildAnd(ctx->builder, stride,
1718 LLVMConstInt(ctx->i32, 0x3fff, false), "");
1719
1720 size = LLVMBuildUDiv(ctx->builder, size, stride, "");
1721 }
1722 return size;
1723 }
1724
1725 /**
1726 * Given the i32 or vNi32 \p type, generate the textual name (e.g. for use with
1727 * intrinsic names).
1728 */
1729 static void build_int_type_name(
1730 LLVMTypeRef type,
1731 char *buf, unsigned bufsize)
1732 {
1733 assert(bufsize >= 6);
1734
1735 if (LLVMGetTypeKind(type) == LLVMVectorTypeKind)
1736 snprintf(buf, bufsize, "v%ui32",
1737 LLVMGetVectorSize(type));
1738 else
1739 strcpy(buf, "i32");
1740 }
1741
1742 static LLVMValueRef radv_lower_gather4_integer(struct nir_to_llvm_context *ctx,
1743 struct ac_tex_info *tinfo,
1744 nir_tex_instr *instr,
1745 const char *intr_name,
1746 unsigned coord_vgpr_index)
1747 {
1748 LLVMValueRef coord = tinfo->args[0];
1749 LLVMValueRef half_texel[2];
1750 int c;
1751
1752 //TODO Rect
1753 {
1754 LLVMValueRef txq_args[10];
1755 int txq_arg_count = 0;
1756 LLVMValueRef size;
1757 bool da = instr->is_array || instr->sampler_dim == GLSL_SAMPLER_DIM_CUBE;
1758 txq_args[txq_arg_count++] = LLVMConstInt(ctx->i32, 0, false);
1759 txq_args[txq_arg_count++] = tinfo->args[1];
1760 txq_args[txq_arg_count++] = LLVMConstInt(ctx->i32, 0xf, 0); /* dmask */
1761 txq_args[txq_arg_count++] = LLVMConstInt(ctx->i32, 0, 0); /* unorm */
1762 txq_args[txq_arg_count++] = LLVMConstInt(ctx->i32, 0, 0); /* r128 */
1763 txq_args[txq_arg_count++] = LLVMConstInt(ctx->i32, da ? 1 : 0, 0);
1764 txq_args[txq_arg_count++] = LLVMConstInt(ctx->i32, 0, 0); /* glc */
1765 txq_args[txq_arg_count++] = LLVMConstInt(ctx->i32, 0, 0); /* slc */
1766 txq_args[txq_arg_count++] = LLVMConstInt(ctx->i32, 0, 0); /* tfe */
1767 txq_args[txq_arg_count++] = LLVMConstInt(ctx->i32, 0, 0); /* lwe */
1768 size = emit_llvm_intrinsic(ctx, "llvm.SI.getresinfo.i32", ctx->v4i32,
1769 txq_args, txq_arg_count,
1770 AC_FUNC_ATTR_READNONE);
1771
1772 for (c = 0; c < 2; c++) {
1773 half_texel[c] = LLVMBuildExtractElement(ctx->builder, size,
1774 LLVMConstInt(ctx->i32, c, false), "");
1775 half_texel[c] = LLVMBuildUIToFP(ctx->builder, half_texel[c], ctx->f32, "");
1776 half_texel[c] = emit_fdiv(ctx, ctx->f32one, half_texel[c]);
1777 half_texel[c] = LLVMBuildFMul(ctx->builder, half_texel[c],
1778 LLVMConstReal(ctx->f32, -0.5), "");
1779 }
1780 }
1781
1782 for (c = 0; c < 2; c++) {
1783 LLVMValueRef tmp;
1784 LLVMValueRef index = LLVMConstInt(ctx->i32, coord_vgpr_index + c, 0);
1785 tmp = LLVMBuildExtractElement(ctx->builder, coord, index, "");
1786 tmp = LLVMBuildBitCast(ctx->builder, tmp, ctx->f32, "");
1787 tmp = LLVMBuildFAdd(ctx->builder, tmp, half_texel[c], "");
1788 tmp = LLVMBuildBitCast(ctx->builder, tmp, ctx->i32, "");
1789 coord = LLVMBuildInsertElement(ctx->builder, coord, tmp, index, "");
1790 }
1791
1792 tinfo->args[0] = coord;
1793 return emit_llvm_intrinsic(ctx, intr_name, tinfo->dst_type, tinfo->args, tinfo->arg_count,
1794 AC_FUNC_ATTR_READNONE | AC_FUNC_ATTR_NOUNWIND);
1795
1796 }
1797
1798 static LLVMValueRef build_tex_intrinsic(struct nir_to_llvm_context *ctx,
1799 nir_tex_instr *instr,
1800 struct ac_tex_info *tinfo)
1801 {
1802 const char *name = "llvm.SI.image.sample";
1803 const char *infix = "";
1804 char intr_name[127];
1805 char type[64];
1806 bool is_shadow = instr->is_shadow;
1807 bool has_offset = tinfo->has_offset;
1808 switch (instr->op) {
1809 case nir_texop_txf:
1810 case nir_texop_txf_ms:
1811 case nir_texop_samples_identical:
1812 name = instr->sampler_dim == GLSL_SAMPLER_DIM_MS ? "llvm.SI.image.load" :
1813 instr->sampler_dim == GLSL_SAMPLER_DIM_BUF ? "llvm.SI.vs.load.input" :
1814 "llvm.SI.image.load.mip";
1815 is_shadow = false;
1816 has_offset = false;
1817 break;
1818 case nir_texop_txb:
1819 infix = ".b";
1820 break;
1821 case nir_texop_txl:
1822 infix = ".l";
1823 break;
1824 case nir_texop_txs:
1825 name = "llvm.SI.getresinfo";
1826 break;
1827 case nir_texop_query_levels:
1828 name = "llvm.SI.getresinfo";
1829 break;
1830 case nir_texop_tex:
1831 if (ctx->stage != MESA_SHADER_FRAGMENT)
1832 infix = ".lz";
1833 break;
1834 case nir_texop_txd:
1835 infix = ".d";
1836 break;
1837 case nir_texop_tg4:
1838 name = "llvm.SI.gather4";
1839 infix = ".lz";
1840 break;
1841 case nir_texop_lod:
1842 name = "llvm.SI.getlod";
1843 is_shadow = false;
1844 has_offset = false;
1845 break;
1846 default:
1847 break;
1848 }
1849
1850 build_int_type_name(LLVMTypeOf(tinfo->args[0]), type, sizeof(type));
1851 sprintf(intr_name, "%s%s%s%s.%s", name, is_shadow ? ".c" : "", infix,
1852 has_offset ? ".o" : "", type);
1853
1854 if (instr->op == nir_texop_tg4) {
1855 enum glsl_base_type stype = glsl_get_sampler_result_type(instr->texture->var->type);
1856 if (stype == GLSL_TYPE_UINT || stype == GLSL_TYPE_INT) {
1857 return radv_lower_gather4_integer(ctx, tinfo, instr, intr_name,
1858 (int)has_offset + (int)is_shadow);
1859 }
1860 }
1861 return emit_llvm_intrinsic(ctx, intr_name, tinfo->dst_type, tinfo->args, tinfo->arg_count,
1862 AC_FUNC_ATTR_READNONE | AC_FUNC_ATTR_NOUNWIND);
1863
1864 }
1865
1866 static LLVMValueRef visit_vulkan_resource_index(struct nir_to_llvm_context *ctx,
1867 nir_intrinsic_instr *instr)
1868 {
1869 LLVMValueRef index = get_src(ctx, instr->src[0]);
1870 unsigned desc_set = nir_intrinsic_desc_set(instr);
1871 unsigned binding = nir_intrinsic_binding(instr);
1872 LLVMValueRef desc_ptr = ctx->descriptor_sets[desc_set];
1873 struct radv_descriptor_set_layout *layout = ctx->options->layout->set[desc_set].layout;
1874 unsigned base_offset = layout->binding[binding].offset;
1875 LLVMValueRef offset, stride;
1876
1877 if (layout->binding[binding].type == VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER_DYNAMIC ||
1878 layout->binding[binding].type == VK_DESCRIPTOR_TYPE_STORAGE_BUFFER_DYNAMIC) {
1879 desc_ptr = ctx->push_constants;
1880 base_offset = ctx->options->layout->push_constant_size;
1881 base_offset += 16 * layout->binding[binding].dynamic_offset_offset;
1882 stride = LLVMConstInt(ctx->i32, 16, false);
1883 } else
1884 stride = LLVMConstInt(ctx->i32, layout->binding[binding].size, false);
1885
1886 offset = LLVMConstInt(ctx->i32, base_offset, false);
1887 index = LLVMBuildMul(ctx->builder, index, stride, "");
1888 offset = LLVMBuildAdd(ctx->builder, offset, index, "");
1889
1890 LLVMValueRef indices[] = {ctx->i32zero, offset};
1891 desc_ptr = LLVMBuildGEP(ctx->builder, desc_ptr, indices, 2, "");
1892 desc_ptr = cast_ptr(ctx, desc_ptr, ctx->v4i32);
1893 LLVMSetMetadata(desc_ptr, ctx->uniform_md_kind, ctx->empty_md);
1894
1895 return LLVMBuildLoad(ctx->builder, desc_ptr, "");
1896 }
1897
1898 static LLVMValueRef visit_load_push_constant(struct nir_to_llvm_context *ctx,
1899 nir_intrinsic_instr *instr)
1900 {
1901 LLVMValueRef ptr;
1902
1903 LLVMValueRef indices[] = {ctx->i32zero, get_src(ctx, instr->src[0])};
1904 ptr = LLVMBuildGEP(ctx->builder, ctx->push_constants, indices, 2, "");
1905 ptr = cast_ptr(ctx, ptr, get_def_type(ctx, &instr->dest.ssa));
1906
1907 return LLVMBuildLoad(ctx->builder, ptr, "");
1908 }
1909
1910 static LLVMValueRef visit_get_buffer_size(struct nir_to_llvm_context *ctx,
1911 nir_intrinsic_instr *instr)
1912 {
1913 LLVMValueRef desc = get_src(ctx, instr->src[0]);
1914
1915 return get_buffer_size(ctx, desc, false);
1916 }
1917 static void visit_store_ssbo(struct nir_to_llvm_context *ctx,
1918 nir_intrinsic_instr *instr)
1919 {
1920 const char *store_name;
1921 LLVMTypeRef data_type = ctx->f32;
1922 unsigned writemask = nir_intrinsic_write_mask(instr);
1923 LLVMValueRef base_data, base_offset;
1924 LLVMValueRef params[6];
1925
1926 if (ctx->stage == MESA_SHADER_FRAGMENT)
1927 ctx->shader_info->fs.writes_memory = true;
1928
1929 params[1] = get_src(ctx, instr->src[1]);
1930 params[2] = LLVMConstInt(ctx->i32, 0, false); /* vindex */
1931 params[4] = LLVMConstInt(ctx->i1, 0, false); /* glc */
1932 params[5] = LLVMConstInt(ctx->i1, 0, false); /* slc */
1933
1934 if (instr->num_components > 1)
1935 data_type = LLVMVectorType(ctx->f32, instr->num_components);
1936
1937 base_data = to_float(ctx, get_src(ctx, instr->src[0]));
1938 base_data = trim_vector(ctx, base_data, instr->num_components);
1939 base_data = LLVMBuildBitCast(ctx->builder, base_data,
1940 data_type, "");
1941 base_offset = get_src(ctx, instr->src[2]); /* voffset */
1942 while (writemask) {
1943 int start, count;
1944 LLVMValueRef data;
1945 LLVMValueRef offset;
1946 LLVMValueRef tmp;
1947 u_bit_scan_consecutive_range(&writemask, &start, &count);
1948
1949 /* Due to an LLVM limitation, split 3-element writes
1950 * into a 2-element and a 1-element write. */
1951 if (count == 3) {
1952 writemask |= 1 << (start + 2);
1953 count = 2;
1954 }
1955
1956 if (count == 4) {
1957 store_name = "llvm.amdgcn.buffer.store.v4f32";
1958 data = base_data;
1959 } else if (count == 2) {
1960 tmp = LLVMBuildExtractElement(ctx->builder,
1961 base_data, LLVMConstInt(ctx->i32, start, false), "");
1962 data = LLVMBuildInsertElement(ctx->builder, LLVMGetUndef(ctx->v2f32), tmp,
1963 ctx->i32zero, "");
1964
1965 tmp = LLVMBuildExtractElement(ctx->builder,
1966 base_data, LLVMConstInt(ctx->i32, start + 1, false), "");
1967 data = LLVMBuildInsertElement(ctx->builder, data, tmp,
1968 ctx->i32one, "");
1969 store_name = "llvm.amdgcn.buffer.store.v2f32";
1970
1971 } else {
1972 assert(count == 1);
1973 if (get_llvm_num_components(base_data) > 1)
1974 data = LLVMBuildExtractElement(ctx->builder, base_data,
1975 LLVMConstInt(ctx->i32, start, false), "");
1976 else
1977 data = base_data;
1978 store_name = "llvm.amdgcn.buffer.store.f32";
1979 }
1980
1981 offset = base_offset;
1982 if (start != 0) {
1983 offset = LLVMBuildAdd(ctx->builder, offset, LLVMConstInt(ctx->i32, start * 4, false), "");
1984 }
1985 params[0] = data;
1986 params[3] = offset;
1987 emit_llvm_intrinsic(ctx, store_name,
1988 LLVMVoidTypeInContext(ctx->context), params, 6, 0);
1989 }
1990 }
1991
1992 static LLVMValueRef visit_atomic_ssbo(struct nir_to_llvm_context *ctx,
1993 nir_intrinsic_instr *instr)
1994 {
1995 const char *name;
1996 LLVMValueRef params[6];
1997 int arg_count = 0;
1998 if (ctx->stage == MESA_SHADER_FRAGMENT)
1999 ctx->shader_info->fs.writes_memory = true;
2000
2001 if (instr->intrinsic == nir_intrinsic_ssbo_atomic_comp_swap) {
2002 params[arg_count++] = llvm_extract_elem(ctx, get_src(ctx, instr->src[3]), 0);
2003 }
2004 params[arg_count++] = llvm_extract_elem(ctx, get_src(ctx, instr->src[2]), 0);
2005 params[arg_count++] = get_src(ctx, instr->src[0]);
2006 params[arg_count++] = LLVMConstInt(ctx->i32, 0, false); /* vindex */
2007 params[arg_count++] = get_src(ctx, instr->src[1]); /* voffset */
2008 params[arg_count++] = LLVMConstInt(ctx->i1, 0, false); /* slc */
2009
2010 switch (instr->intrinsic) {
2011 case nir_intrinsic_ssbo_atomic_add:
2012 name = "llvm.amdgcn.buffer.atomic.add";
2013 break;
2014 case nir_intrinsic_ssbo_atomic_imin:
2015 name = "llvm.amdgcn.buffer.atomic.smin";
2016 break;
2017 case nir_intrinsic_ssbo_atomic_umin:
2018 name = "llvm.amdgcn.buffer.atomic.umin";
2019 break;
2020 case nir_intrinsic_ssbo_atomic_imax:
2021 name = "llvm.amdgcn.buffer.atomic.smax";
2022 break;
2023 case nir_intrinsic_ssbo_atomic_umax:
2024 name = "llvm.amdgcn.buffer.atomic.umax";
2025 break;
2026 case nir_intrinsic_ssbo_atomic_and:
2027 name = "llvm.amdgcn.buffer.atomic.and";
2028 break;
2029 case nir_intrinsic_ssbo_atomic_or:
2030 name = "llvm.amdgcn.buffer.atomic.or";
2031 break;
2032 case nir_intrinsic_ssbo_atomic_xor:
2033 name = "llvm.amdgcn.buffer.atomic.xor";
2034 break;
2035 case nir_intrinsic_ssbo_atomic_exchange:
2036 name = "llvm.amdgcn.buffer.atomic.swap";
2037 break;
2038 case nir_intrinsic_ssbo_atomic_comp_swap:
2039 name = "llvm.amdgcn.buffer.atomic.cmpswap";
2040 break;
2041 default:
2042 abort();
2043 }
2044
2045 return emit_llvm_intrinsic(ctx, name, ctx->i32, params, arg_count, 0);
2046 }
2047
2048 static LLVMValueRef visit_load_buffer(struct nir_to_llvm_context *ctx,
2049 nir_intrinsic_instr *instr)
2050 {
2051 const char *load_name;
2052 LLVMTypeRef data_type = ctx->f32;
2053 if (instr->num_components == 3)
2054 data_type = LLVMVectorType(ctx->f32, 4);
2055 else if (instr->num_components > 1)
2056 data_type = LLVMVectorType(ctx->f32, instr->num_components);
2057
2058 if (instr->num_components == 4 || instr->num_components == 3)
2059 load_name = "llvm.amdgcn.buffer.load.v4f32";
2060 else if (instr->num_components == 2)
2061 load_name = "llvm.amdgcn.buffer.load.v2f32";
2062 else if (instr->num_components == 1)
2063 load_name = "llvm.amdgcn.buffer.load.f32";
2064 else
2065 abort();
2066
2067 LLVMValueRef params[] = {
2068 get_src(ctx, instr->src[0]),
2069 LLVMConstInt(ctx->i32, 0, false),
2070 get_src(ctx, instr->src[1]),
2071 LLVMConstInt(ctx->i1, 0, false),
2072 LLVMConstInt(ctx->i1, 0, false),
2073 };
2074
2075 LLVMValueRef ret =
2076 emit_llvm_intrinsic(ctx, load_name, data_type, params, 5, 0);
2077
2078 if (instr->num_components == 3)
2079 ret = trim_vector(ctx, ret, 3);
2080
2081 return LLVMBuildBitCast(ctx->builder, ret,
2082 get_def_type(ctx, &instr->dest.ssa), "");
2083 }
2084
2085 static LLVMValueRef visit_load_ubo_buffer(struct nir_to_llvm_context *ctx,
2086 nir_intrinsic_instr *instr)
2087 {
2088 const nir_intrinsic_info *info = &nir_intrinsic_infos[instr->intrinsic];
2089 const char *load_name;
2090 LLVMTypeRef data_type = ctx->f32;
2091 LLVMValueRef results[4], ret;
2092 LLVMValueRef rsrc = get_src(ctx, instr->src[0]);
2093 LLVMValueRef offset = get_src(ctx, instr->src[1]);
2094
2095 rsrc = LLVMBuildBitCast(ctx->builder, rsrc, LLVMVectorType(ctx->i8, 16), "");
2096
2097 for (unsigned i = 0; i < instr->num_components; ++i) {
2098 LLVMValueRef params[] = {
2099 rsrc,
2100 LLVMBuildAdd(ctx->builder, LLVMConstInt(ctx->i32, 4 * i, 0),
2101 offset, "")
2102 };
2103 results[i] = emit_llvm_intrinsic(ctx, "llvm.SI.load.const", ctx->f32,
2104 params, 2, AC_FUNC_ATTR_READNONE);
2105 }
2106
2107
2108 ret = build_gather_values(ctx, results, instr->num_components);
2109 return LLVMBuildBitCast(ctx->builder, ret,
2110 get_def_type(ctx, &instr->dest.ssa), "");
2111 }
2112
2113 static void
2114 radv_get_deref_offset(struct nir_to_llvm_context *ctx, nir_deref *tail,
2115 bool vs_in, unsigned *const_out, LLVMValueRef *indir_out)
2116 {
2117 unsigned const_offset = 0;
2118 LLVMValueRef offset = NULL;
2119
2120
2121 while (tail->child != NULL) {
2122 const struct glsl_type *parent_type = tail->type;
2123 tail = tail->child;
2124
2125 if (tail->deref_type == nir_deref_type_array) {
2126 nir_deref_array *deref_array = nir_deref_as_array(tail);
2127 LLVMValueRef index, stride, local_offset;
2128 unsigned size = glsl_count_attribute_slots(tail->type, vs_in);
2129
2130 const_offset += size * deref_array->base_offset;
2131 if (deref_array->deref_array_type == nir_deref_array_type_direct)
2132 continue;
2133
2134 assert(deref_array->deref_array_type == nir_deref_array_type_indirect);
2135 index = get_src(ctx, deref_array->indirect);
2136 stride = LLVMConstInt(ctx->i32, size, 0);
2137 local_offset = LLVMBuildMul(ctx->builder, stride, index, "");
2138
2139 if (offset)
2140 offset = LLVMBuildAdd(ctx->builder, offset, local_offset, "");
2141 else
2142 offset = local_offset;
2143 } else if (tail->deref_type == nir_deref_type_struct) {
2144 nir_deref_struct *deref_struct = nir_deref_as_struct(tail);
2145
2146 for (unsigned i = 0; i < deref_struct->index; i++) {
2147 const struct glsl_type *ft = glsl_get_struct_field(parent_type, i);
2148 const_offset += glsl_count_attribute_slots(ft, vs_in);
2149 }
2150 } else
2151 unreachable("unsupported deref type");
2152
2153 }
2154
2155 if (const_offset && offset)
2156 offset = LLVMBuildAdd(ctx->builder, offset,
2157 LLVMConstInt(ctx->i32, const_offset, 0),
2158 "");
2159
2160 *const_out = const_offset;
2161 *indir_out = offset;
2162 }
2163
2164 static LLVMValueRef visit_load_var(struct nir_to_llvm_context *ctx,
2165 nir_intrinsic_instr *instr)
2166 {
2167 LLVMValueRef values[4];
2168 int idx = instr->variables[0]->var->data.driver_location;
2169 int ve = instr->dest.ssa.num_components;
2170 LLVMValueRef indir_index;
2171 unsigned const_index;
2172 switch (instr->variables[0]->var->data.mode) {
2173 case nir_var_shader_in:
2174 radv_get_deref_offset(ctx, &instr->variables[0]->deref,
2175 ctx->stage == MESA_SHADER_VERTEX,
2176 &const_index, &indir_index);
2177 for (unsigned chan = 0; chan < ve; chan++) {
2178 if (indir_index) {
2179 unsigned count = glsl_count_attribute_slots(
2180 instr->variables[0]->var->type,
2181 ctx->stage == MESA_SHADER_VERTEX);
2182 LLVMValueRef tmp_vec = build_gather_values_extended(
2183 ctx, ctx->inputs + idx + chan, count,
2184 4, false);
2185
2186 values[chan] = LLVMBuildExtractElement(ctx->builder,
2187 tmp_vec,
2188 indir_index, "");
2189 } else
2190 values[chan] = ctx->inputs[idx + chan + const_index * 4];
2191 }
2192 return to_integer(ctx, build_gather_values(ctx, values, ve));
2193 break;
2194 case nir_var_local:
2195 radv_get_deref_offset(ctx, &instr->variables[0]->deref, false,
2196 &const_index, &indir_index);
2197 for (unsigned chan = 0; chan < ve; chan++) {
2198 if (indir_index) {
2199 unsigned count = glsl_count_attribute_slots(
2200 instr->variables[0]->var->type, false);
2201 LLVMValueRef tmp_vec = build_gather_values_extended(
2202 ctx, ctx->locals + idx + chan, count,
2203 4, true);
2204
2205 values[chan] = LLVMBuildExtractElement(ctx->builder,
2206 tmp_vec,
2207 indir_index, "");
2208 } else {
2209 values[chan] = LLVMBuildLoad(ctx->builder, ctx->locals[idx + chan + const_index * 4], "");
2210 }
2211 }
2212 return to_integer(ctx, build_gather_values(ctx, values, ve));
2213 case nir_var_shader_out:
2214 radv_get_deref_offset(ctx, &instr->variables[0]->deref, false,
2215 &const_index, &indir_index);
2216 for (unsigned chan = 0; chan < ve; chan++) {
2217 if (indir_index) {
2218 unsigned count = glsl_count_attribute_slots(
2219 instr->variables[0]->var->type, false);
2220 LLVMValueRef tmp_vec = build_gather_values_extended(
2221 ctx, ctx->outputs + idx + chan, count,
2222 4, true);
2223
2224 values[chan] = LLVMBuildExtractElement(ctx->builder,
2225 tmp_vec,
2226 indir_index, "");
2227 } else {
2228 values[chan] = LLVMBuildLoad(ctx->builder,
2229 ctx->outputs[idx + chan + const_index * 4],
2230 "");
2231 }
2232 }
2233 return to_integer(ctx, build_gather_values(ctx, values, ve));
2234 case nir_var_shared: {
2235 radv_get_deref_offset(ctx, &instr->variables[0]->deref, false,
2236 &const_index, &indir_index);
2237 LLVMValueRef ptr = get_shared_memory_ptr(ctx, idx, ctx->i32);
2238 LLVMValueRef derived_ptr;
2239 LLVMValueRef index = ctx->i32zero;
2240 if (indir_index)
2241 index = LLVMBuildAdd(ctx->builder, index, indir_index, "");
2242 derived_ptr = LLVMBuildGEP(ctx->builder, ptr, &index, 1, "");
2243
2244 return to_integer(ctx, LLVMBuildLoad(ctx->builder, derived_ptr, ""));
2245 break;
2246 }
2247 default:
2248 break;
2249 }
2250 return NULL;
2251 }
2252
2253 static void
2254 visit_store_var(struct nir_to_llvm_context *ctx,
2255 nir_intrinsic_instr *instr)
2256 {
2257 LLVMValueRef temp_ptr, value;
2258 int idx = instr->variables[0]->var->data.driver_location;
2259 LLVMValueRef src = to_float(ctx, get_src(ctx, instr->src[0]));
2260 int writemask = instr->const_index[0];
2261 LLVMValueRef indir_index;
2262 unsigned const_index;
2263 switch (instr->variables[0]->var->data.mode) {
2264 case nir_var_shader_out:
2265 radv_get_deref_offset(ctx, &instr->variables[0]->deref, false,
2266 &const_index, &indir_index);
2267 for (unsigned chan = 0; chan < 4; chan++) {
2268 int stride = 4;
2269 if (!(writemask & (1 << chan)))
2270 continue;
2271 if (get_llvm_num_components(src) == 1)
2272 value = src;
2273 else
2274 value = LLVMBuildExtractElement(ctx->builder, src,
2275 LLVMConstInt(ctx->i32,
2276 chan, false),
2277 "");
2278
2279 if (instr->variables[0]->var->data.location == VARYING_SLOT_CLIP_DIST0 ||
2280 instr->variables[0]->var->data.location == VARYING_SLOT_CULL_DIST0)
2281 stride = 1;
2282 if (indir_index) {
2283 unsigned count = glsl_count_attribute_slots(
2284 instr->variables[0]->var->type, false);
2285 LLVMValueRef tmp_vec = build_gather_values_extended(
2286 ctx, ctx->outputs + idx + chan, count,
2287 stride, true);
2288
2289 if (get_llvm_num_components(tmp_vec) > 1) {
2290 tmp_vec = LLVMBuildInsertElement(ctx->builder, tmp_vec,
2291 value, indir_index, "");
2292 } else
2293 tmp_vec = value;
2294 build_store_values_extended(ctx, ctx->outputs + idx + chan,
2295 count, stride, tmp_vec);
2296
2297 } else {
2298 temp_ptr = ctx->outputs[idx + chan + const_index * stride];
2299
2300 LLVMBuildStore(ctx->builder, value, temp_ptr);
2301 }
2302 }
2303 break;
2304 case nir_var_local:
2305 radv_get_deref_offset(ctx, &instr->variables[0]->deref, false,
2306 &const_index, &indir_index);
2307 for (unsigned chan = 0; chan < 4; chan++) {
2308 if (!(writemask & (1 << chan)))
2309 continue;
2310
2311 if (get_llvm_num_components(src) == 1)
2312 value = src;
2313 else
2314 value = LLVMBuildExtractElement(ctx->builder, src,
2315 LLVMConstInt(ctx->i32, chan, false), "");
2316 if (indir_index) {
2317 unsigned count = glsl_count_attribute_slots(
2318 instr->variables[0]->var->type, false);
2319 LLVMValueRef tmp_vec = build_gather_values_extended(
2320 ctx, ctx->locals + idx + chan, count,
2321 4, true);
2322
2323 tmp_vec = LLVMBuildInsertElement(ctx->builder, tmp_vec,
2324 value, indir_index, "");
2325 build_store_values_extended(ctx, ctx->locals + idx + chan,
2326 count, 4, tmp_vec);
2327 } else {
2328 temp_ptr = ctx->locals[idx + chan + const_index * 4];
2329
2330 LLVMBuildStore(ctx->builder, value, temp_ptr);
2331 }
2332 }
2333 break;
2334 case nir_var_shared: {
2335 LLVMValueRef ptr;
2336 radv_get_deref_offset(ctx, &instr->variables[0]->deref, false,
2337 &const_index, &indir_index);
2338
2339 ptr = get_shared_memory_ptr(ctx, idx, ctx->i32);
2340 LLVMValueRef index = ctx->i32zero;
2341 LLVMValueRef derived_ptr;
2342
2343 if (indir_index)
2344 index = LLVMBuildAdd(ctx->builder, index, indir_index, "");
2345 derived_ptr = LLVMBuildGEP(ctx->builder, ptr, &index, 1, "");
2346 LLVMBuildStore(ctx->builder,
2347 to_integer(ctx, src), derived_ptr);
2348 break;
2349 }
2350 default:
2351 break;
2352 }
2353 }
2354
2355 static int image_type_to_components_count(enum glsl_sampler_dim dim, bool array)
2356 {
2357 switch (dim) {
2358 case GLSL_SAMPLER_DIM_BUF:
2359 return 1;
2360 case GLSL_SAMPLER_DIM_1D:
2361 return array ? 2 : 1;
2362 case GLSL_SAMPLER_DIM_2D:
2363 return array ? 3 : 2;
2364 case GLSL_SAMPLER_DIM_3D:
2365 case GLSL_SAMPLER_DIM_CUBE:
2366 return 3;
2367 case GLSL_SAMPLER_DIM_RECT:
2368 case GLSL_SAMPLER_DIM_SUBPASS:
2369 return 2;
2370 default:
2371 break;
2372 }
2373 return 0;
2374 }
2375
2376 static LLVMValueRef get_image_coords(struct nir_to_llvm_context *ctx,
2377 nir_intrinsic_instr *instr, bool add_frag_pos)
2378 {
2379 const struct glsl_type *type = instr->variables[0]->var->type;
2380 if(instr->variables[0]->deref.child)
2381 type = instr->variables[0]->deref.child->type;
2382
2383 LLVMValueRef src0 = get_src(ctx, instr->src[0]);
2384 LLVMValueRef coords[4];
2385 LLVMValueRef masks[] = {
2386 LLVMConstInt(ctx->i32, 0, false), LLVMConstInt(ctx->i32, 1, false),
2387 LLVMConstInt(ctx->i32, 2, false), LLVMConstInt(ctx->i32, 3, false),
2388 };
2389 LLVMValueRef res;
2390 int count;
2391 count = image_type_to_components_count(glsl_get_sampler_dim(type),
2392 glsl_sampler_type_is_array(type));
2393
2394 if (count == 1) {
2395 if (instr->src[0].ssa->num_components)
2396 res = LLVMBuildExtractElement(ctx->builder, src0, masks[0], "");
2397 else
2398 res = src0;
2399 } else {
2400 int chan;
2401 for (chan = 0; chan < count; ++chan) {
2402 coords[chan] = LLVMBuildExtractElement(ctx->builder, src0, masks[chan], "");
2403 }
2404
2405 if (add_frag_pos) {
2406 for (chan = 0; chan < count; ++chan)
2407 coords[chan] = LLVMBuildAdd(ctx->builder, coords[chan], LLVMBuildFPToUI(ctx->builder, ctx->frag_pos[chan], ctx->i32, ""), "");
2408 }
2409 if (count == 3) {
2410 coords[3] = LLVMGetUndef(ctx->i32);
2411 count = 4;
2412 }
2413 res = build_gather_values(ctx, coords, count);
2414 }
2415 return res;
2416 }
2417
2418 static void build_type_name_for_intr(
2419 LLVMTypeRef type,
2420 char *buf, unsigned bufsize)
2421 {
2422 LLVMTypeRef elem_type = type;
2423
2424 assert(bufsize >= 8);
2425
2426 if (LLVMGetTypeKind(type) == LLVMVectorTypeKind) {
2427 int ret = snprintf(buf, bufsize, "v%u",
2428 LLVMGetVectorSize(type));
2429 if (ret < 0) {
2430 char *type_name = LLVMPrintTypeToString(type);
2431 fprintf(stderr, "Error building type name for: %s\n",
2432 type_name);
2433 return;
2434 }
2435 elem_type = LLVMGetElementType(type);
2436 buf += ret;
2437 bufsize -= ret;
2438 }
2439 switch (LLVMGetTypeKind(elem_type)) {
2440 default: break;
2441 case LLVMIntegerTypeKind:
2442 snprintf(buf, bufsize, "i%d", LLVMGetIntTypeWidth(elem_type));
2443 break;
2444 case LLVMFloatTypeKind:
2445 snprintf(buf, bufsize, "f32");
2446 break;
2447 case LLVMDoubleTypeKind:
2448 snprintf(buf, bufsize, "f64");
2449 break;
2450 }
2451 }
2452
2453 static void get_image_intr_name(const char *base_name,
2454 LLVMTypeRef data_type,
2455 LLVMTypeRef coords_type,
2456 LLVMTypeRef rsrc_type,
2457 char *out_name, unsigned out_len)
2458 {
2459 char coords_type_name[8];
2460
2461 build_type_name_for_intr(coords_type, coords_type_name,
2462 sizeof(coords_type_name));
2463
2464 if (HAVE_LLVM <= 0x0309) {
2465 snprintf(out_name, out_len, "%s.%s", base_name, coords_type_name);
2466 } else {
2467 char data_type_name[8];
2468 char rsrc_type_name[8];
2469
2470 build_type_name_for_intr(data_type, data_type_name,
2471 sizeof(data_type_name));
2472 build_type_name_for_intr(rsrc_type, rsrc_type_name,
2473 sizeof(rsrc_type_name));
2474 snprintf(out_name, out_len, "%s.%s.%s.%s", base_name,
2475 data_type_name, coords_type_name, rsrc_type_name);
2476 }
2477 }
2478
2479 static LLVMValueRef visit_image_load(struct nir_to_llvm_context *ctx,
2480 nir_intrinsic_instr *instr)
2481 {
2482 LLVMValueRef params[7];
2483 LLVMValueRef res;
2484 char intrinsic_name[64];
2485 const nir_variable *var = instr->variables[0]->var;
2486 const struct glsl_type *type = var->type;
2487 if(instr->variables[0]->deref.child)
2488 type = instr->variables[0]->deref.child->type;
2489
2490 type = glsl_without_array(type);
2491 if (glsl_get_sampler_dim(type) == GLSL_SAMPLER_DIM_BUF) {
2492 params[0] = get_sampler_desc(ctx, instr->variables[0], DESC_BUFFER);
2493 params[1] = LLVMBuildExtractElement(ctx->builder, get_src(ctx, instr->src[0]),
2494 LLVMConstInt(ctx->i32, 0, false), ""); /* vindex */
2495 params[2] = LLVMConstInt(ctx->i32, 0, false); /* voffset */
2496 params[3] = LLVMConstInt(ctx->i1, 0, false); /* glc */
2497 params[4] = LLVMConstInt(ctx->i1, 0, false); /* slc */
2498 res = emit_llvm_intrinsic(ctx, "llvm.amdgcn.buffer.load.format.v4f32", ctx->v4f32,
2499 params, 5, 0);
2500
2501 res = trim_vector(ctx, res, instr->dest.ssa.num_components);
2502 res = to_integer(ctx, res);
2503 } else {
2504 bool is_da = glsl_sampler_type_is_array(type) ||
2505 glsl_get_sampler_dim(type) == GLSL_SAMPLER_DIM_CUBE;
2506 bool add_frag_pos = glsl_get_sampler_dim(type) == GLSL_SAMPLER_DIM_SUBPASS;
2507 LLVMValueRef da = is_da ? ctx->i32one : ctx->i32zero;
2508 LLVMValueRef glc = LLVMConstInt(ctx->i1, 0, false);
2509 LLVMValueRef slc = LLVMConstInt(ctx->i1, 0, false);
2510
2511 params[0] = get_image_coords(ctx, instr, add_frag_pos);
2512 params[1] = get_sampler_desc(ctx, instr->variables[0], DESC_IMAGE);
2513 params[2] = LLVMConstInt(ctx->i32, 15, false); /* dmask */
2514 if (HAVE_LLVM <= 0x0309) {
2515 params[3] = LLVMConstInt(ctx->i1, 0, false); /* r128 */
2516 params[4] = da;
2517 params[5] = glc;
2518 params[6] = slc;
2519 } else {
2520 LLVMValueRef lwe = LLVMConstInt(ctx->i1, 0, false);
2521 params[3] = glc;
2522 params[4] = slc;
2523 params[5] = lwe;
2524 params[6] = da;
2525 }
2526
2527 get_image_intr_name("llvm.amdgcn.image.load",
2528 ctx->v4f32, /* vdata */
2529 LLVMTypeOf(params[0]), /* coords */
2530 LLVMTypeOf(params[1]), /* rsrc */
2531 intrinsic_name, sizeof(intrinsic_name));
2532
2533 res = emit_llvm_intrinsic(ctx, intrinsic_name, ctx->v4f32,
2534 params, 7, AC_FUNC_ATTR_READONLY);
2535 }
2536 return to_integer(ctx, res);
2537 }
2538
2539 static void visit_image_store(struct nir_to_llvm_context *ctx,
2540 nir_intrinsic_instr *instr)
2541 {
2542 LLVMValueRef params[8];
2543 char intrinsic_name[64];
2544 const nir_variable *var = instr->variables[0]->var;
2545 LLVMValueRef i1false = LLVMConstInt(ctx->i1, 0, 0);
2546 LLVMValueRef i1true = LLVMConstInt(ctx->i1, 1, 0);
2547 const struct glsl_type *type = glsl_without_array(var->type);
2548
2549 if (ctx->stage == MESA_SHADER_FRAGMENT)
2550 ctx->shader_info->fs.writes_memory = true;
2551
2552 if (glsl_get_sampler_dim(type) == GLSL_SAMPLER_DIM_BUF) {
2553 params[0] = to_float(ctx, get_src(ctx, instr->src[2])); /* data */
2554 params[1] = get_sampler_desc(ctx, instr->variables[0], DESC_BUFFER);
2555 params[2] = LLVMBuildExtractElement(ctx->builder, get_src(ctx, instr->src[0]),
2556 LLVMConstInt(ctx->i32, 0, false), ""); /* vindex */
2557 params[3] = LLVMConstInt(ctx->i32, 0, false); /* voffset */
2558 params[4] = i1false; /* glc */
2559 params[5] = i1false; /* slc */
2560 emit_llvm_intrinsic(ctx, "llvm.amdgcn.buffer.store.format.v4f32", ctx->voidt,
2561 params, 6, 0);
2562 } else {
2563 bool is_da = glsl_sampler_type_is_array(type) ||
2564 glsl_get_sampler_dim(type) == GLSL_SAMPLER_DIM_CUBE;
2565 LLVMValueRef da = is_da ? i1true : i1false;
2566 LLVMValueRef glc = i1false;
2567 LLVMValueRef slc = i1false;
2568
2569 params[0] = to_float(ctx, get_src(ctx, instr->src[2]));
2570 params[1] = get_image_coords(ctx, instr, false); /* coords */
2571 params[2] = get_sampler_desc(ctx, instr->variables[0], DESC_IMAGE);
2572 params[3] = LLVMConstInt(ctx->i32, 15, false); /* dmask */
2573 if (HAVE_LLVM <= 0x0309) {
2574 params[4] = i1false; /* r128 */
2575 params[5] = da;
2576 params[6] = glc;
2577 params[7] = slc;
2578 } else {
2579 LLVMValueRef lwe = i1false;
2580 params[4] = glc;
2581 params[5] = slc;
2582 params[6] = lwe;
2583 params[7] = da;
2584 }
2585
2586 get_image_intr_name("llvm.amdgcn.image.store",
2587 LLVMTypeOf(params[0]), /* vdata */
2588 LLVMTypeOf(params[1]), /* coords */
2589 LLVMTypeOf(params[2]), /* rsrc */
2590 intrinsic_name, sizeof(intrinsic_name));
2591
2592 emit_llvm_intrinsic(ctx, intrinsic_name, ctx->voidt,
2593 params, 8, 0);
2594 }
2595
2596 }
2597
2598 static LLVMValueRef visit_image_atomic(struct nir_to_llvm_context *ctx,
2599 nir_intrinsic_instr *instr)
2600 {
2601 LLVMValueRef params[6];
2602 int param_count = 0;
2603 const nir_variable *var = instr->variables[0]->var;
2604 LLVMValueRef i1false = LLVMConstInt(ctx->i1, 0, 0);
2605 LLVMValueRef i1true = LLVMConstInt(ctx->i1, 1, 0);
2606 const char *base_name = "llvm.amdgcn.image.atomic";
2607 const char *atomic_name;
2608 LLVMValueRef coords;
2609 char intrinsic_name[32], coords_type[8];
2610 const struct glsl_type *type = glsl_without_array(var->type);
2611
2612 if (ctx->stage == MESA_SHADER_FRAGMENT)
2613 ctx->shader_info->fs.writes_memory = true;
2614
2615 params[param_count++] = get_src(ctx, instr->src[2]);
2616 if (instr->intrinsic == nir_intrinsic_image_atomic_comp_swap)
2617 params[param_count++] = get_src(ctx, instr->src[3]);
2618
2619 if (glsl_get_sampler_dim(type) == GLSL_SAMPLER_DIM_BUF) {
2620 params[param_count++] = get_sampler_desc(ctx, instr->variables[0], DESC_BUFFER);
2621 coords = params[param_count++] = LLVMBuildExtractElement(ctx->builder, get_src(ctx, instr->src[0]),
2622 LLVMConstInt(ctx->i32, 0, false), ""); /* vindex */
2623 params[param_count++] = ctx->i32zero; /* voffset */
2624 params[param_count++] = i1false; /* glc */
2625 params[param_count++] = i1false; /* slc */
2626 } else {
2627 bool da = glsl_sampler_type_is_array(type) ||
2628 glsl_get_sampler_dim(type) == GLSL_SAMPLER_DIM_CUBE;
2629
2630 coords = params[param_count++] = get_image_coords(ctx, instr, false);
2631 params[param_count++] = get_sampler_desc(ctx, instr->variables[0], DESC_IMAGE);
2632 params[param_count++] = i1false; /* r128 */
2633 params[param_count++] = da ? i1true : i1false; /* da */
2634 params[param_count++] = i1false; /* slc */
2635 }
2636
2637 switch (instr->intrinsic) {
2638 case nir_intrinsic_image_atomic_add:
2639 atomic_name = "add";
2640 break;
2641 case nir_intrinsic_image_atomic_min:
2642 atomic_name = "smin";
2643 break;
2644 case nir_intrinsic_image_atomic_max:
2645 atomic_name = "smax";
2646 break;
2647 case nir_intrinsic_image_atomic_and:
2648 atomic_name = "and";
2649 break;
2650 case nir_intrinsic_image_atomic_or:
2651 atomic_name = "or";
2652 break;
2653 case nir_intrinsic_image_atomic_xor:
2654 atomic_name = "xor";
2655 break;
2656 case nir_intrinsic_image_atomic_exchange:
2657 atomic_name = "swap";
2658 break;
2659 case nir_intrinsic_image_atomic_comp_swap:
2660 atomic_name = "cmpswap";
2661 break;
2662 default:
2663 abort();
2664 }
2665 build_int_type_name(LLVMTypeOf(coords),
2666 coords_type, sizeof(coords_type));
2667
2668 snprintf(intrinsic_name, sizeof(intrinsic_name),
2669 "%s.%s.%s", base_name, atomic_name, coords_type);
2670 return emit_llvm_intrinsic(ctx, intrinsic_name, ctx->i32, params, param_count, 0);
2671 }
2672
2673 static LLVMValueRef visit_image_size(struct nir_to_llvm_context *ctx,
2674 nir_intrinsic_instr *instr)
2675 {
2676 LLVMValueRef res;
2677 LLVMValueRef params[10];
2678 const nir_variable *var = instr->variables[0]->var;
2679 const struct glsl_type *type = instr->variables[0]->var->type;
2680 bool da = glsl_sampler_type_is_array(var->type) ||
2681 glsl_get_sampler_dim(var->type) == GLSL_SAMPLER_DIM_CUBE;
2682 if(instr->variables[0]->deref.child)
2683 type = instr->variables[0]->deref.child->type;
2684
2685 if (glsl_get_sampler_dim(type) == GLSL_SAMPLER_DIM_BUF)
2686 return get_buffer_size(ctx, get_sampler_desc(ctx, instr->variables[0], DESC_BUFFER), true);
2687 params[0] = ctx->i32zero;
2688 params[1] = get_sampler_desc(ctx, instr->variables[0], DESC_IMAGE);
2689 params[2] = LLVMConstInt(ctx->i32, 15, false);
2690 params[3] = ctx->i32zero;
2691 params[4] = ctx->i32zero;
2692 params[5] = da ? ctx->i32one : ctx->i32zero;
2693 params[6] = ctx->i32zero;
2694 params[7] = ctx->i32zero;
2695 params[8] = ctx->i32zero;
2696 params[9] = ctx->i32zero;
2697
2698 res = emit_llvm_intrinsic(ctx, "llvm.SI.getresinfo.i32", ctx->v4i32,
2699 params, 10, AC_FUNC_ATTR_READNONE);
2700
2701 if (glsl_get_sampler_dim(type) == GLSL_SAMPLER_DIM_CUBE &&
2702 glsl_sampler_type_is_array(type)) {
2703 LLVMValueRef two = LLVMConstInt(ctx->i32, 2, false);
2704 LLVMValueRef six = LLVMConstInt(ctx->i32, 6, false);
2705 LLVMValueRef z = LLVMBuildExtractElement(ctx->builder, res, two, "");
2706 z = LLVMBuildSDiv(ctx->builder, z, six, "");
2707 res = LLVMBuildInsertElement(ctx->builder, res, z, two, "");
2708 }
2709 return res;
2710 }
2711
2712 static void emit_waitcnt(struct nir_to_llvm_context *ctx)
2713 {
2714 LLVMValueRef args[1] = {
2715 LLVMConstInt(ctx->i32, 0xf70, false),
2716 };
2717 emit_llvm_intrinsic(ctx, "llvm.amdgcn.s.waitcnt",
2718 ctx->voidt, args, 1, 0);
2719 }
2720
2721 static void emit_barrier(struct nir_to_llvm_context *ctx)
2722 {
2723 // TODO tess
2724 emit_llvm_intrinsic(ctx, "llvm.amdgcn.s.barrier",
2725 ctx->voidt, NULL, 0, 0);
2726 }
2727
2728 static void emit_discard_if(struct nir_to_llvm_context *ctx,
2729 nir_intrinsic_instr *instr)
2730 {
2731 LLVMValueRef cond;
2732 ctx->shader_info->fs.can_discard = true;
2733
2734 cond = LLVMBuildICmp(ctx->builder, LLVMIntNE,
2735 get_src(ctx, instr->src[0]),
2736 ctx->i32zero, "");
2737
2738 cond = LLVMBuildSelect(ctx->builder, cond,
2739 LLVMConstReal(ctx->f32, -1.0f),
2740 ctx->f32zero, "");
2741 emit_llvm_intrinsic(ctx, "llvm.AMDGPU.kill",
2742 LLVMVoidTypeInContext(ctx->context),
2743 &cond, 1, 0);
2744 }
2745
2746 static LLVMValueRef
2747 visit_load_local_invocation_index(struct nir_to_llvm_context *ctx)
2748 {
2749 LLVMValueRef result;
2750 LLVMValueRef thread_id = get_thread_id(ctx);
2751 result = LLVMBuildAnd(ctx->builder, ctx->tg_size,
2752 LLVMConstInt(ctx->i32, 0xfc0, false), "");
2753
2754 return LLVMBuildAdd(ctx->builder, result, thread_id, "");
2755 }
2756
2757 static LLVMValueRef visit_var_atomic(struct nir_to_llvm_context *ctx,
2758 nir_intrinsic_instr *instr)
2759 {
2760 LLVMValueRef ptr, result;
2761 int idx = instr->variables[0]->var->data.driver_location;
2762 LLVMValueRef src = get_src(ctx, instr->src[0]);
2763 ptr = get_shared_memory_ptr(ctx, idx, ctx->i32);
2764
2765 if (instr->intrinsic == nir_intrinsic_var_atomic_comp_swap) {
2766 LLVMValueRef src1 = get_src(ctx, instr->src[1]);
2767 result = LLVMBuildAtomicCmpXchg(ctx->builder,
2768 ptr, src, src1,
2769 LLVMAtomicOrderingSequentiallyConsistent,
2770 LLVMAtomicOrderingSequentiallyConsistent,
2771 false);
2772 } else {
2773 LLVMAtomicRMWBinOp op;
2774 switch (instr->intrinsic) {
2775 case nir_intrinsic_var_atomic_add:
2776 op = LLVMAtomicRMWBinOpAdd;
2777 break;
2778 case nir_intrinsic_var_atomic_umin:
2779 op = LLVMAtomicRMWBinOpUMin;
2780 break;
2781 case nir_intrinsic_var_atomic_umax:
2782 op = LLVMAtomicRMWBinOpUMax;
2783 break;
2784 case nir_intrinsic_var_atomic_imin:
2785 op = LLVMAtomicRMWBinOpMin;
2786 break;
2787 case nir_intrinsic_var_atomic_imax:
2788 op = LLVMAtomicRMWBinOpMax;
2789 break;
2790 case nir_intrinsic_var_atomic_and:
2791 op = LLVMAtomicRMWBinOpAnd;
2792 break;
2793 case nir_intrinsic_var_atomic_or:
2794 op = LLVMAtomicRMWBinOpOr;
2795 break;
2796 case nir_intrinsic_var_atomic_xor:
2797 op = LLVMAtomicRMWBinOpXor;
2798 break;
2799 case nir_intrinsic_var_atomic_exchange:
2800 op = LLVMAtomicRMWBinOpXchg;
2801 break;
2802 default:
2803 return NULL;
2804 }
2805
2806 result = LLVMBuildAtomicRMW(ctx->builder, op, ptr, to_integer(ctx, src),
2807 LLVMAtomicOrderingSequentiallyConsistent,
2808 false);
2809 }
2810 return result;
2811 }
2812
2813 #define INTERP_CENTER 0
2814 #define INTERP_CENTROID 1
2815 #define INTERP_SAMPLE 2
2816
2817 static LLVMValueRef lookup_interp_param(struct nir_to_llvm_context *ctx,
2818 enum glsl_interp_mode interp, unsigned location)
2819 {
2820 switch (interp) {
2821 case INTERP_MODE_FLAT:
2822 default:
2823 return NULL;
2824 case INTERP_MODE_SMOOTH:
2825 case INTERP_MODE_NONE:
2826 if (location == INTERP_CENTER)
2827 return ctx->persp_center;
2828 else if (location == INTERP_CENTROID)
2829 return ctx->persp_centroid;
2830 else if (location == INTERP_SAMPLE)
2831 return ctx->persp_sample;
2832 break;
2833 case INTERP_MODE_NOPERSPECTIVE:
2834 if (location == INTERP_CENTER)
2835 return ctx->linear_center;
2836 else if (location == INTERP_CENTROID)
2837 return ctx->linear_centroid;
2838 else if (location == INTERP_SAMPLE)
2839 return ctx->linear_sample;
2840 break;
2841 }
2842 return NULL;
2843 }
2844
2845 static LLVMValueRef load_sample_position(struct nir_to_llvm_context *ctx,
2846 LLVMValueRef sample_id)
2847 {
2848 /* offset = sample_id * 8 (8 = 2 floats containing samplepos.xy) */
2849 LLVMValueRef offset0 = LLVMBuildMul(ctx->builder, sample_id, LLVMConstInt(ctx->i32, 8, false), "");
2850 LLVMValueRef offset1 = LLVMBuildAdd(ctx->builder, offset0, LLVMConstInt(ctx->i32, 4, false), "");
2851 LLVMValueRef result[2];
2852
2853 result[0] = build_indexed_load_const(ctx, ctx->sample_positions, offset0);
2854 result[1] = build_indexed_load_const(ctx, ctx->sample_positions, offset1);
2855
2856 return build_gather_values(ctx, result, 2);
2857 }
2858
2859 static LLVMValueRef load_sample_pos(struct nir_to_llvm_context *ctx)
2860 {
2861 LLVMValueRef values[2];
2862
2863 values[0] = emit_ffract(ctx, ctx->frag_pos[0]);
2864 values[1] = emit_ffract(ctx, ctx->frag_pos[1]);
2865 return build_gather_values(ctx, values, 2);
2866 }
2867
2868 static LLVMValueRef visit_interp(struct nir_to_llvm_context *ctx,
2869 nir_intrinsic_instr *instr)
2870 {
2871 LLVMValueRef result[2];
2872 LLVMValueRef interp_param, attr_number;
2873 unsigned location;
2874 unsigned chan;
2875 LLVMValueRef src_c0, src_c1;
2876 const char *intr_name;
2877 LLVMValueRef src0;
2878 int input_index = instr->variables[0]->var->data.location - VARYING_SLOT_VAR0;
2879 switch (instr->intrinsic) {
2880 case nir_intrinsic_interp_var_at_centroid:
2881 location = INTERP_CENTROID;
2882 break;
2883 case nir_intrinsic_interp_var_at_sample:
2884 case nir_intrinsic_interp_var_at_offset:
2885 location = INTERP_SAMPLE;
2886 src0 = get_src(ctx, instr->src[0]);
2887 break;
2888 default:
2889 break;
2890 }
2891
2892 if (instr->intrinsic == nir_intrinsic_interp_var_at_offset) {
2893 src_c0 = to_float(ctx, LLVMBuildExtractElement(ctx->builder, src0, ctx->i32zero, ""));
2894 src_c1 = to_float(ctx, LLVMBuildExtractElement(ctx->builder, src0, ctx->i32one, ""));
2895 } else if (instr->intrinsic == nir_intrinsic_interp_var_at_sample) {
2896 LLVMValueRef sample_position;
2897 LLVMValueRef halfval = LLVMConstReal(ctx->f32, 0.5f);
2898
2899 /* fetch sample ID */
2900 sample_position = load_sample_position(ctx, src0);
2901
2902 src_c0 = LLVMBuildExtractElement(ctx->builder, sample_position, ctx->i32zero, "");
2903 src_c0 = LLVMBuildFSub(ctx->builder, src_c0, halfval, "");
2904 src_c1 = LLVMBuildExtractElement(ctx->builder, sample_position, ctx->i32one, "");
2905 src_c1 = LLVMBuildFSub(ctx->builder, src_c1, halfval, "");
2906 }
2907 interp_param = lookup_interp_param(ctx, instr->variables[0]->var->data.interpolation, location);
2908 attr_number = LLVMConstInt(ctx->i32, input_index, false);
2909
2910 if (location == INTERP_SAMPLE) {
2911 LLVMValueRef ij_out[2];
2912 LLVMValueRef ddxy_out = emit_ddxy_interp(ctx, interp_param);
2913
2914 /*
2915 * take the I then J parameters, and the DDX/Y for it, and
2916 * calculate the IJ inputs for the interpolator.
2917 * temp1 = ddx * offset/sample.x + I;
2918 * interp_param.I = ddy * offset/sample.y + temp1;
2919 * temp1 = ddx * offset/sample.x + J;
2920 * interp_param.J = ddy * offset/sample.y + temp1;
2921 */
2922 for (unsigned i = 0; i < 2; i++) {
2923 LLVMValueRef ix_ll = LLVMConstInt(ctx->i32, i, false);
2924 LLVMValueRef iy_ll = LLVMConstInt(ctx->i32, i + 2, false);
2925 LLVMValueRef ddx_el = LLVMBuildExtractElement(ctx->builder,
2926 ddxy_out, ix_ll, "");
2927 LLVMValueRef ddy_el = LLVMBuildExtractElement(ctx->builder,
2928 ddxy_out, iy_ll, "");
2929 LLVMValueRef interp_el = LLVMBuildExtractElement(ctx->builder,
2930 interp_param, ix_ll, "");
2931 LLVMValueRef temp1, temp2;
2932
2933 interp_el = LLVMBuildBitCast(ctx->builder, interp_el,
2934 ctx->f32, "");
2935
2936 temp1 = LLVMBuildFMul(ctx->builder, ddx_el, src_c0, "");
2937 temp1 = LLVMBuildFAdd(ctx->builder, temp1, interp_el, "");
2938
2939 temp2 = LLVMBuildFMul(ctx->builder, ddy_el, src_c1, "");
2940 temp2 = LLVMBuildFAdd(ctx->builder, temp2, temp1, "");
2941
2942 ij_out[i] = LLVMBuildBitCast(ctx->builder,
2943 temp2, ctx->i32, "");
2944 }
2945 interp_param = build_gather_values(ctx, ij_out, 2);
2946
2947 }
2948 intr_name = interp_param ? "llvm.SI.fs.interp" : "llvm.SI.fs.constant";
2949 for (chan = 0; chan < 2; chan++) {
2950 LLVMValueRef args[4];
2951 LLVMValueRef llvm_chan = LLVMConstInt(ctx->i32, chan, false);
2952
2953 args[0] = llvm_chan;
2954 args[1] = attr_number;
2955 args[2] = ctx->prim_mask;
2956 args[3] = interp_param;
2957 result[chan] = emit_llvm_intrinsic(ctx, intr_name,
2958 ctx->f32, args, args[3] ? 4 : 3,
2959 AC_FUNC_ATTR_READNONE);
2960 }
2961 return build_gather_values(ctx, result, 2);
2962 }
2963
2964 static void visit_intrinsic(struct nir_to_llvm_context *ctx,
2965 nir_intrinsic_instr *instr)
2966 {
2967 LLVMValueRef result = NULL;
2968
2969 switch (instr->intrinsic) {
2970 case nir_intrinsic_load_work_group_id: {
2971 result = ctx->workgroup_ids;
2972 break;
2973 }
2974 case nir_intrinsic_load_base_vertex: {
2975 result = ctx->base_vertex;
2976 break;
2977 }
2978 case nir_intrinsic_load_vertex_id_zero_base: {
2979 result = ctx->vertex_id;
2980 break;
2981 }
2982 case nir_intrinsic_load_local_invocation_id: {
2983 result = ctx->local_invocation_ids;
2984 break;
2985 }
2986 case nir_intrinsic_load_base_instance:
2987 result = ctx->start_instance;
2988 break;
2989 case nir_intrinsic_load_sample_id:
2990 ctx->shader_info->fs.force_persample = true;
2991 result = unpack_param(ctx, ctx->ancillary, 8, 4);
2992 break;
2993 case nir_intrinsic_load_sample_pos:
2994 ctx->shader_info->fs.force_persample = true;
2995 result = load_sample_pos(ctx);
2996 break;
2997 case nir_intrinsic_load_front_face:
2998 result = ctx->front_face;
2999 break;
3000 case nir_intrinsic_load_instance_id:
3001 result = ctx->instance_id;
3002 ctx->shader_info->vs.vgpr_comp_cnt = MAX2(3,
3003 ctx->shader_info->vs.vgpr_comp_cnt);
3004 break;
3005 case nir_intrinsic_load_num_work_groups:
3006 result = ctx->num_work_groups;
3007 break;
3008 case nir_intrinsic_load_local_invocation_index:
3009 result = visit_load_local_invocation_index(ctx);
3010 break;
3011 case nir_intrinsic_load_push_constant:
3012 result = visit_load_push_constant(ctx, instr);
3013 break;
3014 case nir_intrinsic_vulkan_resource_index:
3015 result = visit_vulkan_resource_index(ctx, instr);
3016 break;
3017 case nir_intrinsic_store_ssbo:
3018 visit_store_ssbo(ctx, instr);
3019 break;
3020 case nir_intrinsic_load_ssbo:
3021 result = visit_load_buffer(ctx, instr);
3022 break;
3023 case nir_intrinsic_ssbo_atomic_add:
3024 case nir_intrinsic_ssbo_atomic_imin:
3025 case nir_intrinsic_ssbo_atomic_umin:
3026 case nir_intrinsic_ssbo_atomic_imax:
3027 case nir_intrinsic_ssbo_atomic_umax:
3028 case nir_intrinsic_ssbo_atomic_and:
3029 case nir_intrinsic_ssbo_atomic_or:
3030 case nir_intrinsic_ssbo_atomic_xor:
3031 case nir_intrinsic_ssbo_atomic_exchange:
3032 case nir_intrinsic_ssbo_atomic_comp_swap:
3033 result = visit_atomic_ssbo(ctx, instr);
3034 break;
3035 case nir_intrinsic_load_ubo:
3036 result = visit_load_ubo_buffer(ctx, instr);
3037 break;
3038 case nir_intrinsic_get_buffer_size:
3039 result = visit_get_buffer_size(ctx, instr);
3040 break;
3041 case nir_intrinsic_load_var:
3042 result = visit_load_var(ctx, instr);
3043 break;
3044 case nir_intrinsic_store_var:
3045 visit_store_var(ctx, instr);
3046 break;
3047 case nir_intrinsic_image_load:
3048 result = visit_image_load(ctx, instr);
3049 break;
3050 case nir_intrinsic_image_store:
3051 visit_image_store(ctx, instr);
3052 break;
3053 case nir_intrinsic_image_atomic_add:
3054 case nir_intrinsic_image_atomic_min:
3055 case nir_intrinsic_image_atomic_max:
3056 case nir_intrinsic_image_atomic_and:
3057 case nir_intrinsic_image_atomic_or:
3058 case nir_intrinsic_image_atomic_xor:
3059 case nir_intrinsic_image_atomic_exchange:
3060 case nir_intrinsic_image_atomic_comp_swap:
3061 result = visit_image_atomic(ctx, instr);
3062 break;
3063 case nir_intrinsic_image_size:
3064 result = visit_image_size(ctx, instr);
3065 break;
3066 case nir_intrinsic_discard:
3067 ctx->shader_info->fs.can_discard = true;
3068 emit_llvm_intrinsic(ctx, "llvm.AMDGPU.kilp",
3069 LLVMVoidTypeInContext(ctx->context),
3070 NULL, 0, 0);
3071 break;
3072 case nir_intrinsic_discard_if:
3073 emit_discard_if(ctx, instr);
3074 break;
3075 case nir_intrinsic_memory_barrier:
3076 emit_waitcnt(ctx);
3077 break;
3078 case nir_intrinsic_barrier:
3079 emit_barrier(ctx);
3080 break;
3081 case nir_intrinsic_var_atomic_add:
3082 case nir_intrinsic_var_atomic_imin:
3083 case nir_intrinsic_var_atomic_umin:
3084 case nir_intrinsic_var_atomic_imax:
3085 case nir_intrinsic_var_atomic_umax:
3086 case nir_intrinsic_var_atomic_and:
3087 case nir_intrinsic_var_atomic_or:
3088 case nir_intrinsic_var_atomic_xor:
3089 case nir_intrinsic_var_atomic_exchange:
3090 case nir_intrinsic_var_atomic_comp_swap:
3091 result = visit_var_atomic(ctx, instr);
3092 break;
3093 case nir_intrinsic_interp_var_at_centroid:
3094 case nir_intrinsic_interp_var_at_sample:
3095 case nir_intrinsic_interp_var_at_offset:
3096 result = visit_interp(ctx, instr);
3097 break;
3098 default:
3099 fprintf(stderr, "Unknown intrinsic: ");
3100 nir_print_instr(&instr->instr, stderr);
3101 fprintf(stderr, "\n");
3102 break;
3103 }
3104 if (result) {
3105 _mesa_hash_table_insert(ctx->defs, &instr->dest.ssa, result);
3106 }
3107 }
3108
3109 static LLVMValueRef get_sampler_desc(struct nir_to_llvm_context *ctx,
3110 nir_deref_var *deref,
3111 enum desc_type desc_type)
3112 {
3113 unsigned desc_set = deref->var->data.descriptor_set;
3114 LLVMValueRef list = ctx->descriptor_sets[desc_set];
3115 struct radv_descriptor_set_layout *layout = ctx->options->layout->set[desc_set].layout;
3116 struct radv_descriptor_set_binding_layout *binding = layout->binding + deref->var->data.binding;
3117 unsigned offset = binding->offset;
3118 unsigned stride = binding->size;
3119 unsigned type_size;
3120 LLVMBuilderRef builder = ctx->builder;
3121 LLVMTypeRef type;
3122 LLVMValueRef indices[2];
3123 LLVMValueRef index = NULL;
3124
3125 assert(deref->var->data.binding < layout->binding_count);
3126
3127 switch (desc_type) {
3128 case DESC_IMAGE:
3129 type = ctx->v8i32;
3130 type_size = 32;
3131 break;
3132 case DESC_FMASK:
3133 type = ctx->v8i32;
3134 offset += 32;
3135 type_size = 32;
3136 break;
3137 case DESC_SAMPLER:
3138 type = ctx->v4i32;
3139 if (binding->type == VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER)
3140 offset += 64;
3141
3142 type_size = 16;
3143 break;
3144 case DESC_BUFFER:
3145 type = ctx->v4i32;
3146 type_size = 16;
3147 break;
3148 }
3149
3150 if (deref->deref.child) {
3151 nir_deref_array *child = (nir_deref_array*)deref->deref.child;
3152
3153 assert(child->deref_array_type != nir_deref_array_type_wildcard);
3154 offset += child->base_offset * stride;
3155 if (child->deref_array_type == nir_deref_array_type_indirect) {
3156 index = get_src(ctx, child->indirect);
3157 }
3158 }
3159
3160 assert(stride % type_size == 0);
3161
3162 if (!index)
3163 index = ctx->i32zero;
3164
3165 index = LLVMBuildMul(builder, index, LLVMConstInt(ctx->i32, stride / type_size, 0), "");
3166 indices[0] = ctx->i32zero;
3167 indices[1] = LLVMConstInt(ctx->i32, offset, 0);
3168 list = LLVMBuildGEP(builder, list, indices, 2, "");
3169 list = LLVMBuildPointerCast(builder, list, const_array(type, 0), "");
3170
3171 return build_indexed_load_const(ctx, list, index);
3172 }
3173
3174 static void set_tex_fetch_args(struct nir_to_llvm_context *ctx,
3175 struct ac_tex_info *tinfo,
3176 nir_tex_instr *instr,
3177 nir_texop op,
3178 LLVMValueRef res_ptr, LLVMValueRef samp_ptr,
3179 LLVMValueRef *param, unsigned count,
3180 unsigned dmask)
3181 {
3182 int num_args;
3183 unsigned is_rect = 0;
3184 bool da = instr->is_array || instr->sampler_dim == GLSL_SAMPLER_DIM_CUBE;
3185
3186 if (op == nir_texop_lod)
3187 da = false;
3188 /* Pad to power of two vector */
3189 while (count < util_next_power_of_two(count))
3190 param[count++] = LLVMGetUndef(ctx->i32);
3191
3192 if (count > 1)
3193 tinfo->args[0] = build_gather_values(ctx, param, count);
3194 else
3195 tinfo->args[0] = param[0];
3196
3197 tinfo->args[1] = res_ptr;
3198 num_args = 2;
3199
3200 if (op == nir_texop_txf ||
3201 op == nir_texop_txf_ms ||
3202 op == nir_texop_query_levels ||
3203 op == nir_texop_texture_samples ||
3204 op == nir_texop_txs)
3205 tinfo->dst_type = ctx->v4i32;
3206 else {
3207 tinfo->dst_type = ctx->v4f32;
3208 tinfo->args[num_args++] = samp_ptr;
3209 }
3210
3211 if (instr->sampler_dim == GLSL_SAMPLER_DIM_BUF && op == nir_texop_txf) {
3212 tinfo->args[0] = res_ptr;
3213 tinfo->args[1] = LLVMConstInt(ctx->i32, 0, false);
3214 tinfo->args[2] = param[0];
3215 tinfo->arg_count = 3;
3216 return;
3217 }
3218
3219 tinfo->args[num_args++] = LLVMConstInt(ctx->i32, dmask, 0);
3220 tinfo->args[num_args++] = LLVMConstInt(ctx->i32, is_rect, 0); /* unorm */
3221 tinfo->args[num_args++] = LLVMConstInt(ctx->i32, 0, 0); /* r128 */
3222 tinfo->args[num_args++] = LLVMConstInt(ctx->i32, da ? 1 : 0, 0);
3223 tinfo->args[num_args++] = LLVMConstInt(ctx->i32, 0, 0); /* glc */
3224 tinfo->args[num_args++] = LLVMConstInt(ctx->i32, 0, 0); /* slc */
3225 tinfo->args[num_args++] = LLVMConstInt(ctx->i32, 0, 0); /* tfe */
3226 tinfo->args[num_args++] = LLVMConstInt(ctx->i32, 0, 0); /* lwe */
3227
3228 tinfo->arg_count = num_args;
3229 }
3230
3231 /* Disable anisotropic filtering if BASE_LEVEL == LAST_LEVEL.
3232 *
3233 * SI-CI:
3234 * If BASE_LEVEL == LAST_LEVEL, the shader must disable anisotropic
3235 * filtering manually. The driver sets img7 to a mask clearing
3236 * MAX_ANISO_RATIO if BASE_LEVEL == LAST_LEVEL. The shader must do:
3237 * s_and_b32 samp0, samp0, img7
3238 *
3239 * VI:
3240 * The ANISO_OVERRIDE sampler field enables this fix in TA.
3241 */
3242 static LLVMValueRef sici_fix_sampler_aniso(struct nir_to_llvm_context *ctx,
3243 LLVMValueRef res, LLVMValueRef samp)
3244 {
3245 LLVMBuilderRef builder = ctx->builder;
3246 LLVMValueRef img7, samp0;
3247
3248 if (ctx->options->chip_class >= VI)
3249 return samp;
3250
3251 img7 = LLVMBuildExtractElement(builder, res,
3252 LLVMConstInt(ctx->i32, 7, 0), "");
3253 samp0 = LLVMBuildExtractElement(builder, samp,
3254 LLVMConstInt(ctx->i32, 0, 0), "");
3255 samp0 = LLVMBuildAnd(builder, samp0, img7, "");
3256 return LLVMBuildInsertElement(builder, samp, samp0,
3257 LLVMConstInt(ctx->i32, 0, 0), "");
3258 }
3259
3260 static void tex_fetch_ptrs(struct nir_to_llvm_context *ctx,
3261 nir_tex_instr *instr,
3262 LLVMValueRef *res_ptr, LLVMValueRef *samp_ptr,
3263 LLVMValueRef *fmask_ptr)
3264 {
3265 if (instr->sampler_dim == GLSL_SAMPLER_DIM_BUF)
3266 *res_ptr = get_sampler_desc(ctx, instr->texture, DESC_BUFFER);
3267 else
3268 *res_ptr = get_sampler_desc(ctx, instr->texture, DESC_IMAGE);
3269 if (samp_ptr) {
3270 if (instr->sampler)
3271 *samp_ptr = get_sampler_desc(ctx, instr->sampler, DESC_SAMPLER);
3272 else
3273 *samp_ptr = get_sampler_desc(ctx, instr->texture, DESC_SAMPLER);
3274 if (instr->sampler_dim < GLSL_SAMPLER_DIM_RECT)
3275 *samp_ptr = sici_fix_sampler_aniso(ctx, *res_ptr, *samp_ptr);
3276 }
3277 if (fmask_ptr && !instr->sampler && (instr->op == nir_texop_txf_ms ||
3278 instr->op == nir_texop_samples_identical))
3279 *fmask_ptr = get_sampler_desc(ctx, instr->texture, DESC_FMASK);
3280 }
3281
3282 static LLVMValueRef build_cube_intrinsic(struct nir_to_llvm_context *ctx,
3283 LLVMValueRef *in)
3284 {
3285
3286 LLVMValueRef v, cube_vec;
3287
3288 if (1) {
3289 LLVMTypeRef f32 = LLVMTypeOf(in[0]);
3290 LLVMValueRef out[4];
3291
3292 out[0] = emit_llvm_intrinsic(ctx, "llvm.amdgcn.cubetc",
3293 f32, in, 3, AC_FUNC_ATTR_READNONE);
3294 out[1] = emit_llvm_intrinsic(ctx, "llvm.amdgcn.cubesc",
3295 f32, in, 3, AC_FUNC_ATTR_READNONE);
3296 out[2] = emit_llvm_intrinsic(ctx, "llvm.amdgcn.cubema",
3297 f32, in, 3, AC_FUNC_ATTR_READNONE);
3298 out[3] = emit_llvm_intrinsic(ctx, "llvm.amdgcn.cubeid",
3299 f32, in, 3, AC_FUNC_ATTR_READNONE);
3300
3301 return build_gather_values(ctx, out, 4);
3302 } else {
3303 LLVMValueRef c[4];
3304 c[0] = in[0];
3305 c[1] = in[1];
3306 c[2] = in[2];
3307 c[3] = LLVMGetUndef(LLVMTypeOf(in[0]));
3308 cube_vec = build_gather_values(ctx, c, 4);
3309 v = emit_llvm_intrinsic(ctx, "llvm.AMDGPU.cube", LLVMTypeOf(cube_vec),
3310 &cube_vec, 1, AC_FUNC_ATTR_READNONE);
3311 }
3312 return v;
3313 }
3314
3315 static void cube_to_2d_coords(struct nir_to_llvm_context *ctx,
3316 LLVMValueRef *in, LLVMValueRef *out)
3317 {
3318 LLVMValueRef coords[4];
3319 LLVMValueRef mad_args[3];
3320 LLVMValueRef v;
3321 LLVMValueRef tmp;
3322 int i;
3323
3324 v = build_cube_intrinsic(ctx, in);
3325 for (i = 0; i < 4; i++)
3326 coords[i] = LLVMBuildExtractElement(ctx->builder, v,
3327 LLVMConstInt(ctx->i32, i, false), "");
3328
3329 coords[2] = emit_llvm_intrinsic(ctx, "llvm.fabs.f32", ctx->f32,
3330 &coords[2], 1, AC_FUNC_ATTR_READNONE);
3331 coords[2] = emit_fdiv(ctx, ctx->f32one, coords[2]);
3332
3333 mad_args[1] = coords[2];
3334 mad_args[2] = LLVMConstReal(ctx->f32, 1.5);
3335 mad_args[0] = coords[0];
3336
3337 /* emit MAD */
3338 tmp = LLVMBuildFMul(ctx->builder, mad_args[0], mad_args[1], "");
3339 coords[0] = LLVMBuildFAdd(ctx->builder, tmp, mad_args[2], "");
3340
3341 mad_args[0] = coords[1];
3342
3343 /* emit MAD */
3344 tmp = LLVMBuildFMul(ctx->builder, mad_args[0], mad_args[1], "");
3345 coords[1] = LLVMBuildFAdd(ctx->builder, tmp, mad_args[2], "");
3346
3347 /* apply xyz = yxw swizzle to cooords */
3348 out[0] = coords[1];
3349 out[1] = coords[0];
3350 out[2] = coords[3];
3351 }
3352
3353 static void emit_prepare_cube_coords(struct nir_to_llvm_context *ctx,
3354 LLVMValueRef *coords_arg, int num_coords,
3355 bool is_deriv,
3356 bool is_array, LLVMValueRef *derivs_arg)
3357 {
3358 LLVMValueRef coords[4];
3359 int i;
3360 cube_to_2d_coords(ctx, coords_arg, coords);
3361
3362 if (is_deriv && derivs_arg) {
3363 LLVMValueRef derivs[4];
3364 int axis;
3365
3366 /* Convert cube derivatives to 2D derivatives. */
3367 for (axis = 0; axis < 2; axis++) {
3368 LLVMValueRef shifted_cube_coords[4], shifted_coords[4];
3369
3370 /* Shift the cube coordinates by the derivatives to get
3371 * the cube coordinates of the "neighboring pixel".
3372 */
3373 for (i = 0; i < 3; i++)
3374 shifted_cube_coords[i] =
3375 LLVMBuildFAdd(ctx->builder, coords_arg[i],
3376 derivs_arg[axis*3+i], "");
3377 shifted_cube_coords[3] = LLVMGetUndef(ctx->f32);
3378
3379 /* Project the shifted cube coordinates onto the face. */
3380 cube_to_2d_coords(ctx, shifted_cube_coords,
3381 shifted_coords);
3382
3383 /* Subtract both sets of 2D coordinates to get 2D derivatives.
3384 * This won't work if the shifted coordinates ended up
3385 * in a different face.
3386 */
3387 for (i = 0; i < 2; i++)
3388 derivs[axis * 2 + i] =
3389 LLVMBuildFSub(ctx->builder, shifted_coords[i],
3390 coords[i], "");
3391 }
3392
3393 memcpy(derivs_arg, derivs, sizeof(derivs));
3394 }
3395
3396 if (is_array) {
3397 /* for cube arrays coord.z = coord.w(array_index) * 8 + face */
3398 /* coords_arg.w component - array_index for cube arrays */
3399 LLVMValueRef tmp = LLVMBuildFMul(ctx->builder, coords_arg[3], LLVMConstReal(ctx->f32, 8.0), "");
3400 coords[2] = LLVMBuildFAdd(ctx->builder, tmp, coords[2], "");
3401 }
3402
3403 memcpy(coords_arg, coords, sizeof(coords));
3404 }
3405
3406 static void visit_tex(struct nir_to_llvm_context *ctx, nir_tex_instr *instr)
3407 {
3408 LLVMValueRef result = NULL;
3409 struct ac_tex_info tinfo = { 0 };
3410 unsigned dmask = 0xf;
3411 LLVMValueRef address[16];
3412 LLVMValueRef coords[5];
3413 LLVMValueRef coord = NULL, lod = NULL, comparitor = NULL;
3414 LLVMValueRef bias = NULL, offsets = NULL;
3415 LLVMValueRef res_ptr, samp_ptr, fmask_ptr = NULL, sample_index = NULL;
3416 LLVMValueRef ddx = NULL, ddy = NULL;
3417 LLVMValueRef derivs[6];
3418 unsigned chan, count = 0;
3419 unsigned const_src = 0, num_deriv_comp = 0;
3420
3421 tex_fetch_ptrs(ctx, instr, &res_ptr, &samp_ptr, &fmask_ptr);
3422
3423 for (unsigned i = 0; i < instr->num_srcs; i++) {
3424 switch (instr->src[i].src_type) {
3425 case nir_tex_src_coord:
3426 coord = get_src(ctx, instr->src[i].src);
3427 break;
3428 case nir_tex_src_projector:
3429 break;
3430 case nir_tex_src_comparitor:
3431 comparitor = get_src(ctx, instr->src[i].src);
3432 break;
3433 case nir_tex_src_offset:
3434 offsets = get_src(ctx, instr->src[i].src);
3435 const_src = i;
3436 break;
3437 case nir_tex_src_bias:
3438 bias = get_src(ctx, instr->src[i].src);
3439 break;
3440 case nir_tex_src_lod:
3441 lod = get_src(ctx, instr->src[i].src);
3442 break;
3443 case nir_tex_src_ms_index:
3444 sample_index = get_src(ctx, instr->src[i].src);
3445 break;
3446 case nir_tex_src_ms_mcs:
3447 break;
3448 case nir_tex_src_ddx:
3449 ddx = get_src(ctx, instr->src[i].src);
3450 num_deriv_comp = instr->src[i].src.ssa->num_components;
3451 break;
3452 case nir_tex_src_ddy:
3453 ddy = get_src(ctx, instr->src[i].src);
3454 break;
3455 case nir_tex_src_texture_offset:
3456 case nir_tex_src_sampler_offset:
3457 case nir_tex_src_plane:
3458 default:
3459 break;
3460 }
3461 }
3462
3463 if (instr->op == nir_texop_texture_samples) {
3464 LLVMValueRef res, samples, is_msaa;
3465 res = LLVMBuildBitCast(ctx->builder, res_ptr, ctx->v8i32, "");
3466 samples = LLVMBuildExtractElement(ctx->builder, res,
3467 LLVMConstInt(ctx->i32, 3, false), "");
3468 is_msaa = LLVMBuildLShr(ctx->builder, samples,
3469 LLVMConstInt(ctx->i32, 28, false), "");
3470 is_msaa = LLVMBuildAnd(ctx->builder, is_msaa,
3471 LLVMConstInt(ctx->i32, 0xe, false), "");
3472 is_msaa = LLVMBuildICmp(ctx->builder, LLVMIntEQ, is_msaa,
3473 LLVMConstInt(ctx->i32, 0xe, false), "");
3474
3475 samples = LLVMBuildLShr(ctx->builder, samples,
3476 LLVMConstInt(ctx->i32, 16, false), "");
3477 samples = LLVMBuildAnd(ctx->builder, samples,
3478 LLVMConstInt(ctx->i32, 0xf, false), "");
3479 samples = LLVMBuildShl(ctx->builder, ctx->i32one,
3480 samples, "");
3481 samples = LLVMBuildSelect(ctx->builder, is_msaa, samples,
3482 ctx->i32one, "");
3483 result = samples;
3484 goto write_result;
3485 }
3486
3487 if (coord)
3488 for (chan = 0; chan < instr->coord_components; chan++)
3489 coords[chan] = llvm_extract_elem(ctx, coord, chan);
3490
3491 if (offsets && instr->op != nir_texop_txf) {
3492 LLVMValueRef offset[3], pack;
3493 for (chan = 0; chan < 3; ++chan)
3494 offset[chan] = ctx->i32zero;
3495
3496 tinfo.has_offset = true;
3497 for (chan = 0; chan < get_llvm_num_components(offsets); chan++) {
3498 offset[chan] = llvm_extract_elem(ctx, offsets, chan);
3499 offset[chan] = LLVMBuildAnd(ctx->builder, offset[chan],
3500 LLVMConstInt(ctx->i32, 0x3f, false), "");
3501 if (chan)
3502 offset[chan] = LLVMBuildShl(ctx->builder, offset[chan],
3503 LLVMConstInt(ctx->i32, chan * 8, false), "");
3504 }
3505 pack = LLVMBuildOr(ctx->builder, offset[0], offset[1], "");
3506 pack = LLVMBuildOr(ctx->builder, pack, offset[2], "");
3507 address[count++] = pack;
3508
3509 }
3510 /* pack LOD bias value */
3511 if (instr->op == nir_texop_txb && bias) {
3512 address[count++] = bias;
3513 }
3514
3515 /* Pack depth comparison value */
3516 if (instr->is_shadow && comparitor) {
3517 address[count++] = llvm_extract_elem(ctx, comparitor, 0);
3518 }
3519
3520 /* pack derivatives */
3521 if (ddx || ddy) {
3522 switch (instr->sampler_dim) {
3523 case GLSL_SAMPLER_DIM_3D:
3524 case GLSL_SAMPLER_DIM_CUBE:
3525 num_deriv_comp = 3;
3526 break;
3527 case GLSL_SAMPLER_DIM_2D:
3528 default:
3529 num_deriv_comp = 2;
3530 break;
3531 case GLSL_SAMPLER_DIM_1D:
3532 num_deriv_comp = 1;
3533 break;
3534 }
3535
3536 for (unsigned i = 0; i < num_deriv_comp; i++) {
3537 derivs[i * 2] = to_float(ctx, llvm_extract_elem(ctx, ddx, i));
3538 derivs[i * 2 + 1] = to_float(ctx, llvm_extract_elem(ctx, ddy, i));
3539 }
3540 }
3541
3542 if (instr->sampler_dim == GLSL_SAMPLER_DIM_CUBE && coord) {
3543 for (chan = 0; chan < instr->coord_components; chan++)
3544 coords[chan] = to_float(ctx, coords[chan]);
3545 if (instr->coord_components == 3)
3546 coords[3] = LLVMGetUndef(ctx->f32);
3547 emit_prepare_cube_coords(ctx, coords, instr->coord_components, instr->op == nir_texop_txd, instr->is_array, derivs);
3548 if (num_deriv_comp)
3549 num_deriv_comp--;
3550 }
3551
3552 if (ddx || ddy) {
3553 for (unsigned i = 0; i < num_deriv_comp * 2; i++)
3554 address[count++] = derivs[i];
3555 }
3556
3557 /* Pack texture coordinates */
3558 if (coord) {
3559 address[count++] = coords[0];
3560 if (instr->coord_components > 1)
3561 address[count++] = coords[1];
3562 if (instr->coord_components > 2) {
3563 /* This seems like a bit of a hack - but it passes Vulkan CTS with it */
3564 if (instr->sampler_dim != GLSL_SAMPLER_DIM_3D && instr->op != nir_texop_txf) {
3565 coords[2] = to_float(ctx, coords[2]);
3566 coords[2] = emit_llvm_intrinsic(ctx, "llvm.rint.f32", ctx->f32, &coords[2],
3567 1, 0);
3568 coords[2] = to_integer(ctx, coords[2]);
3569 }
3570 address[count++] = coords[2];
3571 }
3572 }
3573
3574 /* Pack LOD */
3575 if ((instr->op == nir_texop_txl || instr->op == nir_texop_txf) && lod) {
3576 address[count++] = lod;
3577 } else if (instr->op == nir_texop_txf_ms && sample_index) {
3578 address[count++] = sample_index;
3579 } else if(instr->op == nir_texop_txs) {
3580 count = 0;
3581 if (lod)
3582 address[count++] = lod;
3583 else
3584 address[count++] = ctx->i32zero;
3585 }
3586
3587 for (chan = 0; chan < count; chan++) {
3588 address[chan] = LLVMBuildBitCast(ctx->builder,
3589 address[chan], ctx->i32, "");
3590 }
3591
3592 if (instr->op == nir_texop_samples_identical) {
3593 LLVMValueRef txf_address[4];
3594 struct ac_tex_info txf_info = { 0 };
3595 unsigned txf_count = count;
3596 memcpy(txf_address, address, sizeof(txf_address));
3597
3598 if (!instr->is_array)
3599 txf_address[2] = ctx->i32zero;
3600 txf_address[3] = ctx->i32zero;
3601
3602 set_tex_fetch_args(ctx, &txf_info, instr, nir_texop_txf,
3603 fmask_ptr, NULL,
3604 txf_address, txf_count, 0xf);
3605
3606 result = build_tex_intrinsic(ctx, instr, &txf_info);
3607
3608 result = LLVMBuildExtractElement(ctx->builder, result, ctx->i32zero, "");
3609 result = emit_int_cmp(ctx, LLVMIntEQ, result, ctx->i32zero);
3610 goto write_result;
3611 }
3612
3613 /* Adjust the sample index according to FMASK.
3614 *
3615 * For uncompressed MSAA surfaces, FMASK should return 0x76543210,
3616 * which is the identity mapping. Each nibble says which physical sample
3617 * should be fetched to get that sample.
3618 *
3619 * For example, 0x11111100 means there are only 2 samples stored and
3620 * the second sample covers 3/4 of the pixel. When reading samples 0
3621 * and 1, return physical sample 0 (determined by the first two 0s
3622 * in FMASK), otherwise return physical sample 1.
3623 *
3624 * The sample index should be adjusted as follows:
3625 * sample_index = (fmask >> (sample_index * 4)) & 0xF;
3626 */
3627 if (instr->sampler_dim == GLSL_SAMPLER_DIM_MS) {
3628 LLVMValueRef txf_address[4];
3629 struct ac_tex_info txf_info = { 0 };
3630 unsigned txf_count = count;
3631 memcpy(txf_address, address, sizeof(txf_address));
3632
3633 if (!instr->is_array)
3634 txf_address[2] = ctx->i32zero;
3635 txf_address[3] = ctx->i32zero;
3636
3637 set_tex_fetch_args(ctx, &txf_info, instr, nir_texop_txf,
3638 fmask_ptr, NULL,
3639 txf_address, txf_count, 0xf);
3640
3641 result = build_tex_intrinsic(ctx, instr, &txf_info);
3642 LLVMValueRef four = LLVMConstInt(ctx->i32, 4, false);
3643 LLVMValueRef F = LLVMConstInt(ctx->i32, 0xf, false);
3644
3645 LLVMValueRef fmask = LLVMBuildExtractElement(ctx->builder,
3646 result,
3647 ctx->i32zero, "");
3648
3649 unsigned sample_chan = instr->is_array ? 3 : 2;
3650
3651 LLVMValueRef sample_index4 =
3652 LLVMBuildMul(ctx->builder, address[sample_chan], four, "");
3653 LLVMValueRef shifted_fmask =
3654 LLVMBuildLShr(ctx->builder, fmask, sample_index4, "");
3655 LLVMValueRef final_sample =
3656 LLVMBuildAnd(ctx->builder, shifted_fmask, F, "");
3657
3658 /* Don't rewrite the sample index if WORD1.DATA_FORMAT of the FMASK
3659 * resource descriptor is 0 (invalid),
3660 */
3661 LLVMValueRef fmask_desc =
3662 LLVMBuildBitCast(ctx->builder, fmask_ptr,
3663 ctx->v8i32, "");
3664
3665 LLVMValueRef fmask_word1 =
3666 LLVMBuildExtractElement(ctx->builder, fmask_desc,
3667 ctx->i32one, "");
3668
3669 LLVMValueRef word1_is_nonzero =
3670 LLVMBuildICmp(ctx->builder, LLVMIntNE,
3671 fmask_word1, ctx->i32zero, "");
3672
3673 /* Replace the MSAA sample index. */
3674 address[sample_chan] =
3675 LLVMBuildSelect(ctx->builder, word1_is_nonzero,
3676 final_sample, address[sample_chan], "");
3677 }
3678
3679 if (offsets && instr->op == nir_texop_txf) {
3680 nir_const_value *const_offset =
3681 nir_src_as_const_value(instr->src[const_src].src);
3682 int num_offsets = instr->src[const_src].src.ssa->num_components;
3683 assert(const_offset);
3684 num_offsets = MIN2(num_offsets, instr->coord_components);
3685 if (num_offsets > 2)
3686 address[2] = LLVMBuildAdd(ctx->builder,
3687 address[2], LLVMConstInt(ctx->i32, const_offset->i32[2], false), "");
3688 if (num_offsets > 1)
3689 address[1] = LLVMBuildAdd(ctx->builder,
3690 address[1], LLVMConstInt(ctx->i32, const_offset->i32[1], false), "");
3691 address[0] = LLVMBuildAdd(ctx->builder,
3692 address[0], LLVMConstInt(ctx->i32, const_offset->i32[0], false), "");
3693
3694 }
3695
3696 /* TODO TG4 support */
3697 if (instr->op == nir_texop_tg4) {
3698 if (instr->is_shadow)
3699 dmask = 1;
3700 else
3701 dmask = 1 << instr->component;
3702 }
3703 set_tex_fetch_args(ctx, &tinfo, instr, instr->op,
3704 res_ptr, samp_ptr, address, count, dmask);
3705
3706 result = build_tex_intrinsic(ctx, instr, &tinfo);
3707
3708 if (instr->op == nir_texop_query_levels)
3709 result = LLVMBuildExtractElement(ctx->builder, result, LLVMConstInt(ctx->i32, 3, false), "");
3710 else if (instr->is_shadow && instr->op != nir_texop_txs && instr->op != nir_texop_lod && instr->op != nir_texop_tg4)
3711 result = LLVMBuildExtractElement(ctx->builder, result, ctx->i32zero, "");
3712 else if (instr->op == nir_texop_txs &&
3713 instr->sampler_dim == GLSL_SAMPLER_DIM_CUBE &&
3714 instr->is_array) {
3715 LLVMValueRef two = LLVMConstInt(ctx->i32, 2, false);
3716 LLVMValueRef six = LLVMConstInt(ctx->i32, 6, false);
3717 LLVMValueRef z = LLVMBuildExtractElement(ctx->builder, result, two, "");
3718 z = LLVMBuildSDiv(ctx->builder, z, six, "");
3719 result = LLVMBuildInsertElement(ctx->builder, result, z, two, "");
3720 } else if (instr->dest.ssa.num_components != 4)
3721 result = trim_vector(ctx, result, instr->dest.ssa.num_components);
3722
3723 write_result:
3724 if (result) {
3725 assert(instr->dest.is_ssa);
3726 result = to_integer(ctx, result);
3727 _mesa_hash_table_insert(ctx->defs, &instr->dest.ssa, result);
3728 }
3729 }
3730
3731
3732 static void visit_phi(struct nir_to_llvm_context *ctx, nir_phi_instr *instr)
3733 {
3734 LLVMTypeRef type = get_def_type(ctx, &instr->dest.ssa);
3735 LLVMValueRef result = LLVMBuildPhi(ctx->builder, type, "");
3736
3737 _mesa_hash_table_insert(ctx->defs, &instr->dest.ssa, result);
3738 _mesa_hash_table_insert(ctx->phis, instr, result);
3739 }
3740
3741 static void visit_post_phi(struct nir_to_llvm_context *ctx,
3742 nir_phi_instr *instr,
3743 LLVMValueRef llvm_phi)
3744 {
3745 nir_foreach_phi_src(src, instr) {
3746 LLVMBasicBlockRef block = get_block(ctx, src->pred);
3747 LLVMValueRef llvm_src = get_src(ctx, src->src);
3748
3749 LLVMAddIncoming(llvm_phi, &llvm_src, &block, 1);
3750 }
3751 }
3752
3753 static void phi_post_pass(struct nir_to_llvm_context *ctx)
3754 {
3755 struct hash_entry *entry;
3756 hash_table_foreach(ctx->phis, entry) {
3757 visit_post_phi(ctx, (nir_phi_instr*)entry->key,
3758 (LLVMValueRef)entry->data);
3759 }
3760 }
3761
3762
3763 static void visit_ssa_undef(struct nir_to_llvm_context *ctx,
3764 nir_ssa_undef_instr *instr)
3765 {
3766 unsigned num_components = instr->def.num_components;
3767 LLVMValueRef undef;
3768
3769 if (num_components == 1)
3770 undef = LLVMGetUndef(ctx->i32);
3771 else {
3772 undef = LLVMGetUndef(LLVMVectorType(ctx->i32, num_components));
3773 }
3774 _mesa_hash_table_insert(ctx->defs, &instr->def, undef);
3775 }
3776
3777 static void visit_jump(struct nir_to_llvm_context *ctx,
3778 nir_jump_instr *instr)
3779 {
3780 switch (instr->type) {
3781 case nir_jump_break:
3782 LLVMBuildBr(ctx->builder, ctx->break_block);
3783 LLVMClearInsertionPosition(ctx->builder);
3784 break;
3785 case nir_jump_continue:
3786 LLVMBuildBr(ctx->builder, ctx->continue_block);
3787 LLVMClearInsertionPosition(ctx->builder);
3788 break;
3789 default:
3790 fprintf(stderr, "Unknown NIR jump instr: ");
3791 nir_print_instr(&instr->instr, stderr);
3792 fprintf(stderr, "\n");
3793 abort();
3794 }
3795 }
3796
3797 static void visit_cf_list(struct nir_to_llvm_context *ctx,
3798 struct exec_list *list);
3799
3800 static void visit_block(struct nir_to_llvm_context *ctx, nir_block *block)
3801 {
3802 LLVMBasicBlockRef llvm_block = LLVMGetInsertBlock(ctx->builder);
3803 nir_foreach_instr(instr, block)
3804 {
3805 switch (instr->type) {
3806 case nir_instr_type_alu:
3807 visit_alu(ctx, nir_instr_as_alu(instr));
3808 break;
3809 case nir_instr_type_load_const:
3810 visit_load_const(ctx, nir_instr_as_load_const(instr));
3811 break;
3812 case nir_instr_type_intrinsic:
3813 visit_intrinsic(ctx, nir_instr_as_intrinsic(instr));
3814 break;
3815 case nir_instr_type_tex:
3816 visit_tex(ctx, nir_instr_as_tex(instr));
3817 break;
3818 case nir_instr_type_phi:
3819 visit_phi(ctx, nir_instr_as_phi(instr));
3820 break;
3821 case nir_instr_type_ssa_undef:
3822 visit_ssa_undef(ctx, nir_instr_as_ssa_undef(instr));
3823 break;
3824 case nir_instr_type_jump:
3825 visit_jump(ctx, nir_instr_as_jump(instr));
3826 break;
3827 default:
3828 fprintf(stderr, "Unknown NIR instr type: ");
3829 nir_print_instr(instr, stderr);
3830 fprintf(stderr, "\n");
3831 abort();
3832 }
3833 }
3834
3835 _mesa_hash_table_insert(ctx->defs, block, llvm_block);
3836 }
3837
3838 static void visit_if(struct nir_to_llvm_context *ctx, nir_if *if_stmt)
3839 {
3840 LLVMValueRef value = get_src(ctx, if_stmt->condition);
3841
3842 LLVMBasicBlockRef merge_block =
3843 LLVMAppendBasicBlockInContext(ctx->context, ctx->main_function, "");
3844 LLVMBasicBlockRef if_block =
3845 LLVMAppendBasicBlockInContext(ctx->context, ctx->main_function, "");
3846 LLVMBasicBlockRef else_block = merge_block;
3847 if (!exec_list_is_empty(&if_stmt->else_list))
3848 else_block = LLVMAppendBasicBlockInContext(
3849 ctx->context, ctx->main_function, "");
3850
3851 LLVMValueRef cond = LLVMBuildICmp(ctx->builder, LLVMIntNE, value,
3852 LLVMConstInt(ctx->i32, 0, false), "");
3853 LLVMBuildCondBr(ctx->builder, cond, if_block, else_block);
3854
3855 LLVMPositionBuilderAtEnd(ctx->builder, if_block);
3856 visit_cf_list(ctx, &if_stmt->then_list);
3857 if (LLVMGetInsertBlock(ctx->builder))
3858 LLVMBuildBr(ctx->builder, merge_block);
3859
3860 if (!exec_list_is_empty(&if_stmt->else_list)) {
3861 LLVMPositionBuilderAtEnd(ctx->builder, else_block);
3862 visit_cf_list(ctx, &if_stmt->else_list);
3863 if (LLVMGetInsertBlock(ctx->builder))
3864 LLVMBuildBr(ctx->builder, merge_block);
3865 }
3866
3867 LLVMPositionBuilderAtEnd(ctx->builder, merge_block);
3868 }
3869
3870 static void visit_loop(struct nir_to_llvm_context *ctx, nir_loop *loop)
3871 {
3872 LLVMBasicBlockRef continue_parent = ctx->continue_block;
3873 LLVMBasicBlockRef break_parent = ctx->break_block;
3874
3875 ctx->continue_block =
3876 LLVMAppendBasicBlockInContext(ctx->context, ctx->main_function, "");
3877 ctx->break_block =
3878 LLVMAppendBasicBlockInContext(ctx->context, ctx->main_function, "");
3879
3880 LLVMBuildBr(ctx->builder, ctx->continue_block);
3881 LLVMPositionBuilderAtEnd(ctx->builder, ctx->continue_block);
3882 visit_cf_list(ctx, &loop->body);
3883
3884 if (LLVMGetInsertBlock(ctx->builder))
3885 LLVMBuildBr(ctx->builder, ctx->continue_block);
3886 LLVMPositionBuilderAtEnd(ctx->builder, ctx->break_block);
3887
3888 ctx->continue_block = continue_parent;
3889 ctx->break_block = break_parent;
3890 }
3891
3892 static void visit_cf_list(struct nir_to_llvm_context *ctx,
3893 struct exec_list *list)
3894 {
3895 foreach_list_typed(nir_cf_node, node, node, list)
3896 {
3897 switch (node->type) {
3898 case nir_cf_node_block:
3899 visit_block(ctx, nir_cf_node_as_block(node));
3900 break;
3901
3902 case nir_cf_node_if:
3903 visit_if(ctx, nir_cf_node_as_if(node));
3904 break;
3905
3906 case nir_cf_node_loop:
3907 visit_loop(ctx, nir_cf_node_as_loop(node));
3908 break;
3909
3910 default:
3911 assert(0);
3912 }
3913 }
3914 }
3915
3916 static void
3917 handle_vs_input_decl(struct nir_to_llvm_context *ctx,
3918 struct nir_variable *variable)
3919 {
3920 LLVMValueRef t_list_ptr = ctx->vertex_buffers;
3921 LLVMValueRef t_offset;
3922 LLVMValueRef t_list;
3923 LLVMValueRef args[3];
3924 LLVMValueRef input;
3925 LLVMValueRef buffer_index;
3926 int index = variable->data.location - VERT_ATTRIB_GENERIC0;
3927 int idx = variable->data.location;
3928 unsigned attrib_count = glsl_count_attribute_slots(variable->type, true);
3929
3930 variable->data.driver_location = idx * 4;
3931
3932 if (ctx->options->key.vs.instance_rate_inputs & (1u << index)) {
3933 buffer_index = LLVMBuildAdd(ctx->builder, ctx->instance_id,
3934 ctx->start_instance, "");
3935 ctx->shader_info->vs.vgpr_comp_cnt = MAX2(3,
3936 ctx->shader_info->vs.vgpr_comp_cnt);
3937 } else
3938 buffer_index = LLVMBuildAdd(ctx->builder, ctx->vertex_id,
3939 ctx->base_vertex, "");
3940
3941 for (unsigned i = 0; i < attrib_count; ++i, ++idx) {
3942 t_offset = LLVMConstInt(ctx->i32, index + i, false);
3943
3944 t_list = build_indexed_load_const(ctx, t_list_ptr, t_offset);
3945 args[0] = t_list;
3946 args[1] = LLVMConstInt(ctx->i32, 0, false);
3947 args[2] = buffer_index;
3948 input = emit_llvm_intrinsic(ctx,
3949 "llvm.SI.vs.load.input", ctx->v4f32, args, 3,
3950 AC_FUNC_ATTR_READNONE | AC_FUNC_ATTR_NOUNWIND);
3951
3952 for (unsigned chan = 0; chan < 4; chan++) {
3953 LLVMValueRef llvm_chan = LLVMConstInt(ctx->i32, chan, false);
3954 ctx->inputs[radeon_llvm_reg_index_soa(idx, chan)] =
3955 to_integer(ctx, LLVMBuildExtractElement(ctx->builder,
3956 input, llvm_chan, ""));
3957 }
3958 }
3959 }
3960
3961
3962 static void interp_fs_input(struct nir_to_llvm_context *ctx,
3963 unsigned attr,
3964 LLVMValueRef interp_param,
3965 LLVMValueRef prim_mask,
3966 LLVMValueRef result[4])
3967 {
3968 const char *intr_name;
3969 LLVMValueRef attr_number;
3970 unsigned chan;
3971
3972 attr_number = LLVMConstInt(ctx->i32, attr, false);
3973
3974 /* fs.constant returns the param from the middle vertex, so it's not
3975 * really useful for flat shading. It's meant to be used for custom
3976 * interpolation (but the intrinsic can't fetch from the other two
3977 * vertices).
3978 *
3979 * Luckily, it doesn't matter, because we rely on the FLAT_SHADE state
3980 * to do the right thing. The only reason we use fs.constant is that
3981 * fs.interp cannot be used on integers, because they can be equal
3982 * to NaN.
3983 */
3984 intr_name = interp_param ? "llvm.SI.fs.interp" : "llvm.SI.fs.constant";
3985
3986 for (chan = 0; chan < 4; chan++) {
3987 LLVMValueRef args[4];
3988 LLVMValueRef llvm_chan = LLVMConstInt(ctx->i32, chan, false);
3989
3990 args[0] = llvm_chan;
3991 args[1] = attr_number;
3992 args[2] = prim_mask;
3993 args[3] = interp_param;
3994 result[chan] = emit_llvm_intrinsic(ctx, intr_name,
3995 ctx->f32, args, args[3] ? 4 : 3,
3996 AC_FUNC_ATTR_READNONE | AC_FUNC_ATTR_NOUNWIND);
3997 }
3998 }
3999
4000 static void
4001 handle_fs_input_decl(struct nir_to_llvm_context *ctx,
4002 struct nir_variable *variable)
4003 {
4004 int idx = variable->data.location;
4005 unsigned attrib_count = glsl_count_attribute_slots(variable->type, false);
4006 LLVMValueRef interp;
4007
4008 variable->data.driver_location = idx * 4;
4009 ctx->input_mask |= ((1ull << attrib_count) - 1) << variable->data.location;
4010
4011 if (glsl_get_base_type(glsl_without_array(variable->type)) == GLSL_TYPE_FLOAT) {
4012 unsigned interp_type;
4013 if (variable->data.sample) {
4014 interp_type = INTERP_SAMPLE;
4015 ctx->shader_info->fs.force_persample = true;
4016 } else if (variable->data.centroid)
4017 interp_type = INTERP_CENTROID;
4018 else
4019 interp_type = INTERP_CENTER;
4020
4021 interp = lookup_interp_param(ctx, variable->data.interpolation, interp_type);
4022 } else
4023 interp = NULL;
4024
4025 for (unsigned i = 0; i < attrib_count; ++i)
4026 ctx->inputs[radeon_llvm_reg_index_soa(idx + i, 0)] = interp;
4027
4028 }
4029
4030 static void
4031 handle_shader_input_decl(struct nir_to_llvm_context *ctx,
4032 struct nir_variable *variable)
4033 {
4034 switch (ctx->stage) {
4035 case MESA_SHADER_VERTEX:
4036 handle_vs_input_decl(ctx, variable);
4037 break;
4038 case MESA_SHADER_FRAGMENT:
4039 handle_fs_input_decl(ctx, variable);
4040 break;
4041 default:
4042 break;
4043 }
4044
4045 }
4046
4047 static void
4048 handle_fs_inputs_pre(struct nir_to_llvm_context *ctx,
4049 struct nir_shader *nir)
4050 {
4051 unsigned index = 0;
4052 for (unsigned i = 0; i < RADEON_LLVM_MAX_INPUTS; ++i) {
4053 LLVMValueRef interp_param;
4054 LLVMValueRef *inputs = ctx->inputs +radeon_llvm_reg_index_soa(i, 0);
4055
4056 if (!(ctx->input_mask & (1ull << i)))
4057 continue;
4058
4059 if (i >= VARYING_SLOT_VAR0 || i == VARYING_SLOT_PNTC) {
4060 interp_param = *inputs;
4061 interp_fs_input(ctx, index, interp_param, ctx->prim_mask,
4062 inputs);
4063
4064 if (!interp_param)
4065 ctx->shader_info->fs.flat_shaded_mask |= 1u << index;
4066 ++index;
4067 } else if (i == VARYING_SLOT_POS) {
4068 for(int i = 0; i < 3; ++i)
4069 inputs[i] = ctx->frag_pos[i];
4070
4071 inputs[3] = emit_fdiv(ctx, ctx->f32one, ctx->frag_pos[3]);
4072 }
4073 }
4074 ctx->shader_info->fs.num_interp = index;
4075 if (ctx->input_mask & (1 << VARYING_SLOT_PNTC))
4076 ctx->shader_info->fs.has_pcoord = true;
4077 ctx->shader_info->fs.input_mask = ctx->input_mask >> VARYING_SLOT_VAR0;
4078 }
4079
4080 static LLVMValueRef
4081 ac_build_alloca(struct nir_to_llvm_context *ctx,
4082 LLVMTypeRef type,
4083 const char *name)
4084 {
4085 LLVMBuilderRef builder = ctx->builder;
4086 LLVMBasicBlockRef current_block = LLVMGetInsertBlock(builder);
4087 LLVMValueRef function = LLVMGetBasicBlockParent(current_block);
4088 LLVMBasicBlockRef first_block = LLVMGetEntryBasicBlock(function);
4089 LLVMValueRef first_instr = LLVMGetFirstInstruction(first_block);
4090 LLVMBuilderRef first_builder = LLVMCreateBuilderInContext(ctx->context);
4091 LLVMValueRef res;
4092
4093 if (first_instr) {
4094 LLVMPositionBuilderBefore(first_builder, first_instr);
4095 } else {
4096 LLVMPositionBuilderAtEnd(first_builder, first_block);
4097 }
4098
4099 res = LLVMBuildAlloca(first_builder, type, name);
4100 LLVMBuildStore(builder, LLVMConstNull(type), res);
4101
4102 LLVMDisposeBuilder(first_builder);
4103
4104 return res;
4105 }
4106
4107 static LLVMValueRef si_build_alloca_undef(struct nir_to_llvm_context *ctx,
4108 LLVMTypeRef type,
4109 const char *name)
4110 {
4111 LLVMValueRef ptr = ac_build_alloca(ctx, type, name);
4112 LLVMBuildStore(ctx->builder, LLVMGetUndef(type), ptr);
4113 return ptr;
4114 }
4115
4116 static void
4117 handle_shader_output_decl(struct nir_to_llvm_context *ctx,
4118 struct nir_variable *variable)
4119 {
4120 int idx = variable->data.location;
4121 unsigned attrib_count = glsl_count_attribute_slots(variable->type, false);
4122
4123 variable->data.driver_location = idx * 4;
4124
4125 if (ctx->stage == MESA_SHADER_VERTEX) {
4126
4127 if (idx == VARYING_SLOT_CLIP_DIST0 ||
4128 idx == VARYING_SLOT_CULL_DIST0) {
4129 int length = glsl_get_length(variable->type);
4130 if (idx == VARYING_SLOT_CLIP_DIST0) {
4131 ctx->shader_info->vs.clip_dist_mask = (1 << length) - 1;
4132 ctx->num_clips = length;
4133 } else if (idx == VARYING_SLOT_CULL_DIST0) {
4134 ctx->shader_info->vs.cull_dist_mask = (1 << length) - 1;
4135 ctx->num_culls = length;
4136 }
4137 if (length > 4)
4138 attrib_count = 2;
4139 else
4140 attrib_count = 1;
4141 }
4142 }
4143
4144 for (unsigned i = 0; i < attrib_count; ++i) {
4145 for (unsigned chan = 0; chan < 4; chan++) {
4146 ctx->outputs[radeon_llvm_reg_index_soa(idx + i, chan)] =
4147 si_build_alloca_undef(ctx, ctx->f32, "");
4148 }
4149 }
4150 ctx->output_mask |= ((1ull << attrib_count) - 1) << variable->data.location;
4151 }
4152
4153 static void
4154 setup_locals(struct nir_to_llvm_context *ctx,
4155 struct nir_function *func)
4156 {
4157 int i, j;
4158 ctx->num_locals = 0;
4159 nir_foreach_variable(variable, &func->impl->locals) {
4160 unsigned attrib_count = glsl_count_attribute_slots(variable->type, false);
4161 variable->data.driver_location = ctx->num_locals * 4;
4162 ctx->num_locals += attrib_count;
4163 }
4164 ctx->locals = malloc(4 * ctx->num_locals * sizeof(LLVMValueRef));
4165 if (!ctx->locals)
4166 return;
4167
4168 for (i = 0; i < ctx->num_locals; i++) {
4169 for (j = 0; j < 4; j++) {
4170 ctx->locals[i * 4 + j] =
4171 si_build_alloca_undef(ctx, ctx->f32, "temp");
4172 }
4173 }
4174 }
4175
4176 static LLVMValueRef
4177 emit_float_saturate(struct nir_to_llvm_context *ctx, LLVMValueRef v, float lo, float hi)
4178 {
4179 v = to_float(ctx, v);
4180 v = emit_intrin_2f_param(ctx, "llvm.maxnum.f32", v, LLVMConstReal(ctx->f32, lo));
4181 return emit_intrin_2f_param(ctx, "llvm.minnum.f32", v, LLVMConstReal(ctx->f32, hi));
4182 }
4183
4184
4185 static LLVMValueRef emit_pack_int16(struct nir_to_llvm_context *ctx,
4186 LLVMValueRef src0, LLVMValueRef src1)
4187 {
4188 LLVMValueRef const16 = LLVMConstInt(ctx->i32, 16, false);
4189 LLVMValueRef comp[2];
4190
4191 comp[0] = LLVMBuildAnd(ctx->builder, src0, LLVMConstInt(ctx-> i32, 65535, 0), "");
4192 comp[1] = LLVMBuildAnd(ctx->builder, src1, LLVMConstInt(ctx-> i32, 65535, 0), "");
4193 comp[1] = LLVMBuildShl(ctx->builder, comp[1], const16, "");
4194 return LLVMBuildOr(ctx->builder, comp[0], comp[1], "");
4195 }
4196
4197 /* Initialize arguments for the shader export intrinsic */
4198 static void
4199 si_llvm_init_export_args(struct nir_to_llvm_context *ctx,
4200 LLVMValueRef *values,
4201 unsigned target,
4202 LLVMValueRef *args)
4203 {
4204 /* Default is 0xf. Adjusted below depending on the format. */
4205 args[0] = LLVMConstInt(ctx->i32, target != V_008DFC_SQ_EXP_NULL ? 0xf : 0, false);
4206 /* Specify whether the EXEC mask represents the valid mask */
4207 args[1] = LLVMConstInt(ctx->i32, 0, false);
4208
4209 /* Specify whether this is the last export */
4210 args[2] = LLVMConstInt(ctx->i32, 0, false);
4211 /* Specify the target we are exporting */
4212 args[3] = LLVMConstInt(ctx->i32, target, false);
4213
4214 args[4] = LLVMConstInt(ctx->i32, 0, false); /* COMPR flag */
4215 args[5] = LLVMGetUndef(ctx->f32);
4216 args[6] = LLVMGetUndef(ctx->f32);
4217 args[7] = LLVMGetUndef(ctx->f32);
4218 args[8] = LLVMGetUndef(ctx->f32);
4219
4220 if (!values)
4221 return;
4222
4223 if (ctx->stage == MESA_SHADER_FRAGMENT && target >= V_008DFC_SQ_EXP_MRT) {
4224 LLVMValueRef val[4];
4225 unsigned index = target - V_008DFC_SQ_EXP_MRT;
4226 unsigned col_format = (ctx->options->key.fs.col_format >> (4 * index)) & 0xf;
4227 bool is_int8 = (ctx->options->key.fs.is_int8 >> index) & 1;
4228
4229 switch(col_format) {
4230 case V_028714_SPI_SHADER_ZERO:
4231 args[0] = LLVMConstInt(ctx->i32, 0x0, 0);
4232 args[3] = LLVMConstInt(ctx->i32, V_008DFC_SQ_EXP_NULL, 0);
4233 break;
4234
4235 case V_028714_SPI_SHADER_32_R:
4236 args[0] = LLVMConstInt(ctx->i32, 0x1, 0);
4237 args[5] = values[0];
4238 break;
4239
4240 case V_028714_SPI_SHADER_32_GR:
4241 args[0] = LLVMConstInt(ctx->i32, 0x3, 0);
4242 args[5] = values[0];
4243 args[6] = values[1];
4244 break;
4245
4246 case V_028714_SPI_SHADER_32_AR:
4247 args[0] = LLVMConstInt(ctx->i32, 0x9, 0);
4248 args[5] = values[0];
4249 args[8] = values[3];
4250 break;
4251
4252 case V_028714_SPI_SHADER_FP16_ABGR:
4253 args[4] = ctx->i32one;
4254
4255 for (unsigned chan = 0; chan < 2; chan++) {
4256 LLVMValueRef pack_args[2] = {
4257 values[2 * chan],
4258 values[2 * chan + 1]
4259 };
4260 LLVMValueRef packed;
4261
4262 packed = emit_llvm_intrinsic(ctx, "llvm.SI.packf16",
4263 ctx->i32, pack_args, 2,
4264 AC_FUNC_ATTR_READNONE);
4265 args[chan + 5] = packed;
4266 }
4267 break;
4268
4269 case V_028714_SPI_SHADER_UNORM16_ABGR:
4270 for (unsigned chan = 0; chan < 4; chan++) {
4271 val[chan] = emit_float_saturate(ctx, values[chan], 0, 1);
4272 val[chan] = LLVMBuildFMul(ctx->builder, val[chan],
4273 LLVMConstReal(ctx->f32, 65535), "");
4274 val[chan] = LLVMBuildFAdd(ctx->builder, val[chan],
4275 LLVMConstReal(ctx->f32, 0.5), "");
4276 val[chan] = LLVMBuildFPToUI(ctx->builder, val[chan],
4277 ctx->i32, "");
4278 }
4279
4280 args[4] = ctx->i32one;
4281 args[5] = emit_pack_int16(ctx, val[0], val[1]);
4282 args[6] = emit_pack_int16(ctx, val[2], val[3]);
4283 break;
4284
4285 case V_028714_SPI_SHADER_SNORM16_ABGR:
4286 for (unsigned chan = 0; chan < 4; chan++) {
4287 val[chan] = emit_float_saturate(ctx, values[chan], -1, 1);
4288 val[chan] = LLVMBuildFMul(ctx->builder, val[chan],
4289 LLVMConstReal(ctx->f32, 32767), "");
4290
4291 /* If positive, add 0.5, else add -0.5. */
4292 val[chan] = LLVMBuildFAdd(ctx->builder, val[chan],
4293 LLVMBuildSelect(ctx->builder,
4294 LLVMBuildFCmp(ctx->builder, LLVMRealOGE,
4295 val[chan], ctx->f32zero, ""),
4296 LLVMConstReal(ctx->f32, 0.5),
4297 LLVMConstReal(ctx->f32, -0.5), ""), "");
4298 val[chan] = LLVMBuildFPToSI(ctx->builder, val[chan], ctx->i32, "");
4299 }
4300
4301 args[4] = ctx->i32one;
4302 args[5] = emit_pack_int16(ctx, val[0], val[1]);
4303 args[6] = emit_pack_int16(ctx, val[2], val[3]);
4304 break;
4305
4306 case V_028714_SPI_SHADER_UINT16_ABGR: {
4307 LLVMValueRef max = LLVMConstInt(ctx->i32, is_int8 ? 255 : 65535, 0);
4308
4309 for (unsigned chan = 0; chan < 4; chan++) {
4310 val[chan] = to_integer(ctx, values[chan]);
4311 val[chan] = emit_minmax_int(ctx, LLVMIntULT, val[chan], max);
4312 }
4313
4314 args[4] = ctx->i32one;
4315 args[5] = emit_pack_int16(ctx, val[0], val[1]);
4316 args[6] = emit_pack_int16(ctx, val[2], val[3]);
4317 break;
4318 }
4319
4320 case V_028714_SPI_SHADER_SINT16_ABGR: {
4321 LLVMValueRef max = LLVMConstInt(ctx->i32, is_int8 ? 127 : 32767, 0);
4322 LLVMValueRef min = LLVMConstInt(ctx->i32, is_int8 ? -128 : -32768, 0);
4323
4324 /* Clamp. */
4325 for (unsigned chan = 0; chan < 4; chan++) {
4326 val[chan] = to_integer(ctx, values[chan]);
4327 val[chan] = emit_minmax_int(ctx, LLVMIntSLT, val[chan], max);
4328 val[chan] = emit_minmax_int(ctx, LLVMIntSGT, val[chan], min);
4329 }
4330
4331 args[4] = ctx->i32one;
4332 args[5] = emit_pack_int16(ctx, val[0], val[1]);
4333 args[6] = emit_pack_int16(ctx, val[2], val[3]);
4334 break;
4335 }
4336
4337 default:
4338 case V_028714_SPI_SHADER_32_ABGR:
4339 memcpy(&args[5], values, sizeof(values[0]) * 4);
4340 break;
4341 }
4342 } else
4343 memcpy(&args[5], values, sizeof(values[0]) * 4);
4344
4345 for (unsigned i = 5; i < 9; ++i)
4346 args[i] = to_float(ctx, args[i]);
4347 }
4348
4349 static void
4350 handle_vs_outputs_post(struct nir_to_llvm_context *ctx,
4351 struct nir_shader *nir)
4352 {
4353 uint32_t param_count = 0;
4354 unsigned target;
4355 unsigned pos_idx, num_pos_exports = 0;
4356 LLVMValueRef args[9];
4357 LLVMValueRef pos_args[4][9] = { { 0 } };
4358 LLVMValueRef psize_value = 0;
4359 int i;
4360 const uint64_t clip_mask = ctx->output_mask & ((1ull << VARYING_SLOT_CLIP_DIST0) |
4361 (1ull << VARYING_SLOT_CLIP_DIST1) |
4362 (1ull << VARYING_SLOT_CULL_DIST0) |
4363 (1ull << VARYING_SLOT_CULL_DIST1));
4364
4365 if (clip_mask) {
4366 LLVMValueRef slots[8];
4367 unsigned j;
4368
4369 if (ctx->shader_info->vs.cull_dist_mask)
4370 ctx->shader_info->vs.cull_dist_mask <<= ctx->num_clips;
4371
4372 i = VARYING_SLOT_CLIP_DIST0;
4373 for (j = 0; j < ctx->num_clips; j++)
4374 slots[j] = to_float(ctx, LLVMBuildLoad(ctx->builder,
4375 ctx->outputs[radeon_llvm_reg_index_soa(i, j)], ""));
4376 i = VARYING_SLOT_CULL_DIST0;
4377 for (j = 0; j < ctx->num_culls; j++)
4378 slots[ctx->num_clips + j] = to_float(ctx, LLVMBuildLoad(ctx->builder,
4379 ctx->outputs[radeon_llvm_reg_index_soa(i, j)], ""));
4380
4381 for (i = ctx->num_clips + ctx->num_culls; i < 8; i++)
4382 slots[i] = LLVMGetUndef(ctx->f32);
4383
4384 if (ctx->num_clips + ctx->num_culls > 4) {
4385 target = V_008DFC_SQ_EXP_POS + 3;
4386 si_llvm_init_export_args(ctx, &slots[4], target, args);
4387 memcpy(pos_args[target - V_008DFC_SQ_EXP_POS],
4388 args, sizeof(args));
4389 }
4390
4391 target = V_008DFC_SQ_EXP_POS + 2;
4392 si_llvm_init_export_args(ctx, &slots[0], target, args);
4393 memcpy(pos_args[target - V_008DFC_SQ_EXP_POS],
4394 args, sizeof(args));
4395
4396 }
4397
4398 for (unsigned i = 0; i < RADEON_LLVM_MAX_OUTPUTS; ++i) {
4399 LLVMValueRef values[4];
4400 if (!(ctx->output_mask & (1ull << i)))
4401 continue;
4402
4403 for (unsigned j = 0; j < 4; j++)
4404 values[j] = to_float(ctx, LLVMBuildLoad(ctx->builder,
4405 ctx->outputs[radeon_llvm_reg_index_soa(i, j)], ""));
4406
4407 if (i == VARYING_SLOT_POS) {
4408 target = V_008DFC_SQ_EXP_POS;
4409 } else if (i == VARYING_SLOT_CLIP_DIST0 ||
4410 i == VARYING_SLOT_CLIP_DIST1 ||
4411 i == VARYING_SLOT_CULL_DIST0 ||
4412 i == VARYING_SLOT_CULL_DIST1) {
4413 continue;
4414 } else if (i == VARYING_SLOT_PSIZ) {
4415 ctx->shader_info->vs.writes_pointsize = true;
4416 psize_value = values[0];
4417 continue;
4418 } else if (i >= VARYING_SLOT_VAR0) {
4419 ctx->shader_info->vs.export_mask |= 1u << (i - VARYING_SLOT_VAR0);
4420 target = V_008DFC_SQ_EXP_PARAM + param_count;
4421 param_count++;
4422 }
4423
4424 si_llvm_init_export_args(ctx, values, target, args);
4425
4426 if (target >= V_008DFC_SQ_EXP_POS &&
4427 target <= (V_008DFC_SQ_EXP_POS + 3)) {
4428 memcpy(pos_args[target - V_008DFC_SQ_EXP_POS],
4429 args, sizeof(args));
4430 } else {
4431 emit_llvm_intrinsic(ctx,
4432 "llvm.SI.export",
4433 LLVMVoidTypeInContext(ctx->context),
4434 args, 9, 0);
4435 }
4436 }
4437
4438 /* We need to add the position output manually if it's missing. */
4439 if (!pos_args[0][0]) {
4440 pos_args[0][0] = LLVMConstInt(ctx->i32, 0xf, false);
4441 pos_args[0][1] = ctx->i32zero; /* EXEC mask */
4442 pos_args[0][2] = ctx->i32zero; /* last export? */
4443 pos_args[0][3] = LLVMConstInt(ctx->i32, V_008DFC_SQ_EXP_POS, false);
4444 pos_args[0][4] = ctx->i32zero; /* COMPR flag */
4445 pos_args[0][5] = ctx->f32zero; /* X */
4446 pos_args[0][6] = ctx->f32zero; /* Y */
4447 pos_args[0][7] = ctx->f32zero; /* Z */
4448 pos_args[0][8] = ctx->f32one; /* W */
4449 }
4450
4451 if (ctx->shader_info->vs.writes_pointsize == true) {
4452 pos_args[1][0] = LLVMConstInt(ctx->i32, (ctx->shader_info->vs.writes_pointsize == true), false); /* writemask */
4453 pos_args[1][1] = ctx->i32zero; /* EXEC mask */
4454 pos_args[1][2] = ctx->i32zero; /* last export? */
4455 pos_args[1][3] = LLVMConstInt(ctx->i32, V_008DFC_SQ_EXP_POS + 1, false);
4456 pos_args[1][4] = ctx->i32zero; /* COMPR flag */
4457 pos_args[1][5] = ctx->f32zero; /* X */
4458 pos_args[1][6] = ctx->f32zero; /* Y */
4459 pos_args[1][7] = ctx->f32zero; /* Z */
4460 pos_args[1][8] = ctx->f32zero; /* W */
4461
4462 if (ctx->shader_info->vs.writes_pointsize == true)
4463 pos_args[1][5] = psize_value;
4464 }
4465 for (i = 0; i < 4; i++) {
4466 if (pos_args[i][0])
4467 num_pos_exports++;
4468 }
4469
4470 pos_idx = 0;
4471 for (i = 0; i < 4; i++) {
4472 if (!pos_args[i][0])
4473 continue;
4474
4475 /* Specify the target we are exporting */
4476 pos_args[i][3] = LLVMConstInt(ctx->i32, V_008DFC_SQ_EXP_POS + pos_idx++, false);
4477 if (pos_idx == num_pos_exports)
4478 pos_args[i][2] = ctx->i32one;
4479 emit_llvm_intrinsic(ctx,
4480 "llvm.SI.export",
4481 LLVMVoidTypeInContext(ctx->context),
4482 pos_args[i], 9, 0);
4483 }
4484
4485 ctx->shader_info->vs.pos_exports = num_pos_exports;
4486 ctx->shader_info->vs.param_exports = param_count;
4487 }
4488
4489 static void
4490 si_export_mrt_color(struct nir_to_llvm_context *ctx,
4491 LLVMValueRef *color, unsigned param, bool is_last)
4492 {
4493 LLVMValueRef args[9];
4494 /* Export */
4495 si_llvm_init_export_args(ctx, color, param,
4496 args);
4497
4498 if (is_last) {
4499 args[1] = ctx->i32one; /* whether the EXEC mask is valid */
4500 args[2] = ctx->i32one; /* DONE bit */
4501 } else if (args[0] == ctx->i32zero)
4502 return; /* unnecessary NULL export */
4503
4504 emit_llvm_intrinsic(ctx, "llvm.SI.export",
4505 ctx->voidt, args, 9, 0);
4506 }
4507
4508 static void
4509 si_export_mrt_z(struct nir_to_llvm_context *ctx,
4510 LLVMValueRef depth, LLVMValueRef stencil,
4511 LLVMValueRef samplemask)
4512 {
4513 LLVMValueRef args[9];
4514 unsigned mask = 0;
4515 args[1] = ctx->i32one; /* whether the EXEC mask is valid */
4516 args[2] = ctx->i32one; /* DONE bit */
4517 /* Specify the target we are exporting */
4518 args[3] = LLVMConstInt(ctx->i32, V_008DFC_SQ_EXP_MRTZ, false);
4519
4520 args[4] = ctx->i32zero; /* COMP flag */
4521 args[5] = LLVMGetUndef(ctx->f32); /* R, depth */
4522 args[6] = LLVMGetUndef(ctx->f32); /* G, stencil test val[0:7], stencil op val[8:15] */
4523 args[7] = LLVMGetUndef(ctx->f32); /* B, sample mask */
4524 args[8] = LLVMGetUndef(ctx->f32); /* A, alpha to mask */
4525
4526 if (depth) {
4527 args[5] = depth;
4528 mask |= 0x1;
4529 }
4530
4531 if (stencil) {
4532 args[6] = stencil;
4533 mask |= 0x2;
4534 }
4535
4536 if (samplemask) {
4537 args[7] = samplemask;
4538 mask |= 0x04;
4539 }
4540
4541 /* SI (except OLAND) has a bug that it only looks
4542 * at the X writemask component. */
4543 if (ctx->options->chip_class == SI &&
4544 ctx->options->family != CHIP_OLAND)
4545 mask |= 0x01;
4546
4547 args[0] = LLVMConstInt(ctx->i32, mask, false);
4548 emit_llvm_intrinsic(ctx, "llvm.SI.export",
4549 ctx->voidt, args, 9, 0);
4550 }
4551
4552 static void
4553 handle_fs_outputs_post(struct nir_to_llvm_context *ctx,
4554 struct nir_shader *nir)
4555 {
4556 unsigned index = 0;
4557 LLVMValueRef depth = NULL, stencil = NULL, samplemask = NULL;
4558
4559 for (unsigned i = 0; i < RADEON_LLVM_MAX_OUTPUTS; ++i) {
4560 LLVMValueRef values[4];
4561
4562 if (!(ctx->output_mask & (1ull << i)))
4563 continue;
4564
4565 if (i == FRAG_RESULT_DEPTH) {
4566 ctx->shader_info->fs.writes_z = true;
4567 depth = to_float(ctx, LLVMBuildLoad(ctx->builder,
4568 ctx->outputs[radeon_llvm_reg_index_soa(i, 0)], ""));
4569 } else if (i == FRAG_RESULT_STENCIL) {
4570 ctx->shader_info->fs.writes_stencil = true;
4571 stencil = to_float(ctx, LLVMBuildLoad(ctx->builder,
4572 ctx->outputs[radeon_llvm_reg_index_soa(i, 0)], ""));
4573 } else {
4574 bool last = false;
4575 for (unsigned j = 0; j < 4; j++)
4576 values[j] = to_float(ctx, LLVMBuildLoad(ctx->builder,
4577 ctx->outputs[radeon_llvm_reg_index_soa(i, j)], ""));
4578
4579 if (!ctx->shader_info->fs.writes_z && !ctx->shader_info->fs.writes_stencil)
4580 last = ctx->output_mask <= ((1ull << (i + 1)) - 1);
4581
4582 si_export_mrt_color(ctx, values, V_008DFC_SQ_EXP_MRT + index, last);
4583 index++;
4584 }
4585 }
4586
4587 if (depth || stencil)
4588 si_export_mrt_z(ctx, depth, stencil, samplemask);
4589 else if (!index)
4590 si_export_mrt_color(ctx, NULL, V_008DFC_SQ_EXP_NULL, true);
4591
4592 ctx->shader_info->fs.output_mask = index ? ((1ull << index) - 1) : 0;
4593 }
4594
4595 static void
4596 handle_shader_outputs_post(struct nir_to_llvm_context *ctx,
4597 struct nir_shader *nir)
4598 {
4599 switch (ctx->stage) {
4600 case MESA_SHADER_VERTEX:
4601 handle_vs_outputs_post(ctx, nir);
4602 break;
4603 case MESA_SHADER_FRAGMENT:
4604 handle_fs_outputs_post(ctx, nir);
4605 break;
4606 default:
4607 break;
4608 }
4609 }
4610
4611 static void
4612 handle_shared_compute_var(struct nir_to_llvm_context *ctx,
4613 struct nir_variable *variable, uint32_t *offset, int idx)
4614 {
4615 unsigned size = glsl_count_attribute_slots(variable->type, false);
4616 variable->data.driver_location = *offset;
4617 *offset += size;
4618 }
4619
4620 static void ac_llvm_finalize_module(struct nir_to_llvm_context * ctx)
4621 {
4622 LLVMPassManagerRef passmgr;
4623 /* Create the pass manager */
4624 passmgr = LLVMCreateFunctionPassManagerForModule(
4625 ctx->module);
4626
4627 /* This pass should eliminate all the load and store instructions */
4628 LLVMAddPromoteMemoryToRegisterPass(passmgr);
4629
4630 /* Add some optimization passes */
4631 LLVMAddScalarReplAggregatesPass(passmgr);
4632 LLVMAddLICMPass(passmgr);
4633 LLVMAddAggressiveDCEPass(passmgr);
4634 LLVMAddCFGSimplificationPass(passmgr);
4635 LLVMAddInstructionCombiningPass(passmgr);
4636
4637 /* Run the pass */
4638 LLVMInitializeFunctionPassManager(passmgr);
4639 LLVMRunFunctionPassManager(passmgr, ctx->main_function);
4640 LLVMFinalizeFunctionPassManager(passmgr);
4641
4642 LLVMDisposeBuilder(ctx->builder);
4643 LLVMDisposePassManager(passmgr);
4644 }
4645
4646 static
4647 LLVMModuleRef ac_translate_nir_to_llvm(LLVMTargetMachineRef tm,
4648 struct nir_shader *nir,
4649 struct ac_shader_variant_info *shader_info,
4650 const struct ac_nir_compiler_options *options)
4651 {
4652 struct nir_to_llvm_context ctx = {0};
4653 struct nir_function *func;
4654 unsigned i;
4655 ctx.options = options;
4656 ctx.shader_info = shader_info;
4657 ctx.context = LLVMContextCreate();
4658 ctx.module = LLVMModuleCreateWithNameInContext("shader", ctx.context);
4659
4660 ctx.has_ds_bpermute = ctx.options->chip_class >= VI;
4661
4662 memset(shader_info, 0, sizeof(*shader_info));
4663
4664 LLVMSetTarget(ctx.module, "amdgcn--");
4665 setup_types(&ctx);
4666
4667 ctx.builder = LLVMCreateBuilderInContext(ctx.context);
4668 ctx.stage = nir->stage;
4669
4670 for (i = 0; i < AC_UD_MAX_SETS; i++)
4671 shader_info->user_sgprs_locs.descriptor_sets[i].sgpr_idx = -1;
4672 for (i = 0; i < AC_UD_MAX_UD; i++)
4673 shader_info->user_sgprs_locs.shader_data[i].sgpr_idx = -1;
4674
4675 create_function(&ctx, nir);
4676
4677 if (nir->stage == MESA_SHADER_COMPUTE) {
4678 int num_shared = 0;
4679 nir_foreach_variable(variable, &nir->shared)
4680 num_shared++;
4681 if (num_shared) {
4682 int idx = 0;
4683 uint32_t shared_size = 0;
4684 LLVMValueRef var;
4685 LLVMTypeRef i8p = LLVMPointerType(ctx.i8, LOCAL_ADDR_SPACE);
4686 nir_foreach_variable(variable, &nir->shared) {
4687 handle_shared_compute_var(&ctx, variable, &shared_size, idx);
4688 idx++;
4689 }
4690
4691 shared_size *= 4;
4692 var = LLVMAddGlobalInAddressSpace(ctx.module,
4693 LLVMArrayType(ctx.i8, shared_size),
4694 "compute_lds",
4695 LOCAL_ADDR_SPACE);
4696 LLVMSetAlignment(var, 4);
4697 ctx.shared_memory = LLVMBuildBitCast(ctx.builder, var, i8p, "");
4698 }
4699 }
4700
4701 nir_foreach_variable(variable, &nir->inputs)
4702 handle_shader_input_decl(&ctx, variable);
4703
4704 if (nir->stage == MESA_SHADER_FRAGMENT)
4705 handle_fs_inputs_pre(&ctx, nir);
4706
4707 nir_foreach_variable(variable, &nir->outputs)
4708 handle_shader_output_decl(&ctx, variable);
4709
4710 ctx.defs = _mesa_hash_table_create(NULL, _mesa_hash_pointer,
4711 _mesa_key_pointer_equal);
4712 ctx.phis = _mesa_hash_table_create(NULL, _mesa_hash_pointer,
4713 _mesa_key_pointer_equal);
4714
4715 func = (struct nir_function *)exec_list_get_head(&nir->functions);
4716
4717 setup_locals(&ctx, func);
4718
4719 visit_cf_list(&ctx, &func->impl->body);
4720 phi_post_pass(&ctx);
4721
4722 handle_shader_outputs_post(&ctx, nir);
4723 LLVMBuildRetVoid(ctx.builder);
4724
4725 ac_llvm_finalize_module(&ctx);
4726 free(ctx.locals);
4727 ralloc_free(ctx.defs);
4728 ralloc_free(ctx.phis);
4729
4730 return ctx.module;
4731 }
4732
4733 static void ac_diagnostic_handler(LLVMDiagnosticInfoRef di, void *context)
4734 {
4735 unsigned *retval = (unsigned *)context;
4736 LLVMDiagnosticSeverity severity = LLVMGetDiagInfoSeverity(di);
4737 char *description = LLVMGetDiagInfoDescription(di);
4738
4739 if (severity == LLVMDSError) {
4740 *retval = 1;
4741 fprintf(stderr, "LLVM triggered Diagnostic Handler: %s\n",
4742 description);
4743 }
4744
4745 LLVMDisposeMessage(description);
4746 }
4747
4748 static unsigned ac_llvm_compile(LLVMModuleRef M,
4749 struct ac_shader_binary *binary,
4750 LLVMTargetMachineRef tm)
4751 {
4752 unsigned retval = 0;
4753 char *err;
4754 LLVMContextRef llvm_ctx;
4755 LLVMMemoryBufferRef out_buffer;
4756 unsigned buffer_size;
4757 const char *buffer_data;
4758 LLVMBool mem_err;
4759
4760 /* Setup Diagnostic Handler*/
4761 llvm_ctx = LLVMGetModuleContext(M);
4762
4763 LLVMContextSetDiagnosticHandler(llvm_ctx, ac_diagnostic_handler,
4764 &retval);
4765
4766 /* Compile IR*/
4767 mem_err = LLVMTargetMachineEmitToMemoryBuffer(tm, M, LLVMObjectFile,
4768 &err, &out_buffer);
4769
4770 /* Process Errors/Warnings */
4771 if (mem_err) {
4772 fprintf(stderr, "%s: %s", __FUNCTION__, err);
4773 free(err);
4774 retval = 1;
4775 goto out;
4776 }
4777
4778 /* Extract Shader Code*/
4779 buffer_size = LLVMGetBufferSize(out_buffer);
4780 buffer_data = LLVMGetBufferStart(out_buffer);
4781
4782 ac_elf_read(buffer_data, buffer_size, binary);
4783
4784 /* Clean up */
4785 LLVMDisposeMemoryBuffer(out_buffer);
4786
4787 out:
4788 return retval;
4789 }
4790
4791 void ac_compile_nir_shader(LLVMTargetMachineRef tm,
4792 struct ac_shader_binary *binary,
4793 struct ac_shader_config *config,
4794 struct ac_shader_variant_info *shader_info,
4795 struct nir_shader *nir,
4796 const struct ac_nir_compiler_options *options,
4797 bool dump_shader)
4798 {
4799
4800 LLVMModuleRef llvm_module = ac_translate_nir_to_llvm(tm, nir, shader_info,
4801 options);
4802 if (dump_shader)
4803 LLVMDumpModule(llvm_module);
4804
4805 memset(binary, 0, sizeof(*binary));
4806 int v = ac_llvm_compile(llvm_module, binary, tm);
4807 if (v) {
4808 fprintf(stderr, "compile failed\n");
4809 }
4810
4811 if (dump_shader)
4812 fprintf(stderr, "disasm:\n%s\n", binary->disasm_string);
4813
4814 ac_shader_binary_read_config(binary, config, 0);
4815
4816 LLVMContextRef ctx = LLVMGetModuleContext(llvm_module);
4817 LLVMDisposeModule(llvm_module);
4818 LLVMContextDispose(ctx);
4819
4820 if (nir->stage == MESA_SHADER_FRAGMENT) {
4821 shader_info->num_input_vgprs = 0;
4822 if (G_0286CC_PERSP_SAMPLE_ENA(config->spi_ps_input_addr))
4823 shader_info->num_input_vgprs += 2;
4824 if (G_0286CC_PERSP_CENTER_ENA(config->spi_ps_input_addr))
4825 shader_info->num_input_vgprs += 2;
4826 if (G_0286CC_PERSP_CENTROID_ENA(config->spi_ps_input_addr))
4827 shader_info->num_input_vgprs += 2;
4828 if (G_0286CC_PERSP_PULL_MODEL_ENA(config->spi_ps_input_addr))
4829 shader_info->num_input_vgprs += 3;
4830 if (G_0286CC_LINEAR_SAMPLE_ENA(config->spi_ps_input_addr))
4831 shader_info->num_input_vgprs += 2;
4832 if (G_0286CC_LINEAR_CENTER_ENA(config->spi_ps_input_addr))
4833 shader_info->num_input_vgprs += 2;
4834 if (G_0286CC_LINEAR_CENTROID_ENA(config->spi_ps_input_addr))
4835 shader_info->num_input_vgprs += 2;
4836 if (G_0286CC_LINE_STIPPLE_TEX_ENA(config->spi_ps_input_addr))
4837 shader_info->num_input_vgprs += 1;
4838 if (G_0286CC_POS_X_FLOAT_ENA(config->spi_ps_input_addr))
4839 shader_info->num_input_vgprs += 1;
4840 if (G_0286CC_POS_Y_FLOAT_ENA(config->spi_ps_input_addr))
4841 shader_info->num_input_vgprs += 1;
4842 if (G_0286CC_POS_Z_FLOAT_ENA(config->spi_ps_input_addr))
4843 shader_info->num_input_vgprs += 1;
4844 if (G_0286CC_POS_W_FLOAT_ENA(config->spi_ps_input_addr))
4845 shader_info->num_input_vgprs += 1;
4846 if (G_0286CC_FRONT_FACE_ENA(config->spi_ps_input_addr))
4847 shader_info->num_input_vgprs += 1;
4848 if (G_0286CC_ANCILLARY_ENA(config->spi_ps_input_addr))
4849 shader_info->num_input_vgprs += 1;
4850 if (G_0286CC_SAMPLE_COVERAGE_ENA(config->spi_ps_input_addr))
4851 shader_info->num_input_vgprs += 1;
4852 if (G_0286CC_POS_FIXED_PT_ENA(config->spi_ps_input_addr))
4853 shader_info->num_input_vgprs += 1;
4854 }
4855 config->num_vgprs = MAX2(config->num_vgprs, shader_info->num_input_vgprs);
4856
4857 /* +3 for scratch wave offset and VCC */
4858 config->num_sgprs = MAX2(config->num_sgprs,
4859 shader_info->num_input_sgprs + 3);
4860 if (nir->stage == MESA_SHADER_COMPUTE) {
4861 for (int i = 0; i < 3; ++i)
4862 shader_info->cs.block_size[i] = nir->info->cs.local_size[i];
4863 }
4864
4865 if (nir->stage == MESA_SHADER_FRAGMENT)
4866 shader_info->fs.early_fragment_test = nir->info->fs.early_fragment_tests;
4867 }