radv/ac: add implementation of load_sample_pos intrinsic.
[mesa.git] / src / amd / common / ac_nir_to_llvm.c
1 /*
2 * Copyright © 2016 Bas Nieuwenhuizen
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21 * IN THE SOFTWARE.
22 */
23
24 #include "ac_nir_to_llvm.h"
25 #include "ac_llvm_util.h"
26 #include "ac_binary.h"
27 #include "sid.h"
28 #include "nir/nir.h"
29 #include "../vulkan/radv_descriptor_set.h"
30 #include "util/bitscan.h"
31 #include <llvm-c/Transforms/Scalar.h>
32
33 enum radeon_llvm_calling_convention {
34 RADEON_LLVM_AMDGPU_VS = 87,
35 RADEON_LLVM_AMDGPU_GS = 88,
36 RADEON_LLVM_AMDGPU_PS = 89,
37 RADEON_LLVM_AMDGPU_CS = 90,
38 };
39
40 #define CONST_ADDR_SPACE 2
41 #define LOCAL_ADDR_SPACE 3
42
43 #define RADEON_LLVM_MAX_INPUTS (VARYING_SLOT_VAR31 + 1)
44 #define RADEON_LLVM_MAX_OUTPUTS (VARYING_SLOT_VAR31 + 1)
45
46 enum desc_type {
47 DESC_IMAGE,
48 DESC_FMASK,
49 DESC_SAMPLER,
50 DESC_BUFFER,
51 };
52
53 struct nir_to_llvm_context {
54 const struct ac_nir_compiler_options *options;
55 struct ac_shader_variant_info *shader_info;
56
57 LLVMContextRef context;
58 LLVMModuleRef module;
59 LLVMBuilderRef builder;
60 LLVMValueRef main_function;
61
62 struct hash_table *defs;
63 struct hash_table *phis;
64
65 LLVMValueRef descriptor_sets[4];
66 LLVMValueRef push_constants;
67 LLVMValueRef num_work_groups;
68 LLVMValueRef workgroup_ids;
69 LLVMValueRef local_invocation_ids;
70 LLVMValueRef tg_size;
71
72 LLVMValueRef vertex_buffers;
73 LLVMValueRef base_vertex;
74 LLVMValueRef start_instance;
75 LLVMValueRef vertex_id;
76 LLVMValueRef rel_auto_id;
77 LLVMValueRef vs_prim_id;
78 LLVMValueRef instance_id;
79
80 LLVMValueRef prim_mask;
81 LLVMValueRef sample_positions;
82 LLVMValueRef persp_sample, persp_center, persp_centroid;
83 LLVMValueRef linear_sample, linear_center, linear_centroid;
84 LLVMValueRef front_face;
85 LLVMValueRef ancillary;
86 LLVMValueRef frag_pos[4];
87
88 LLVMBasicBlockRef continue_block;
89 LLVMBasicBlockRef break_block;
90
91 LLVMTypeRef i1;
92 LLVMTypeRef i8;
93 LLVMTypeRef i16;
94 LLVMTypeRef i32;
95 LLVMTypeRef i64;
96 LLVMTypeRef v2i32;
97 LLVMTypeRef v3i32;
98 LLVMTypeRef v4i32;
99 LLVMTypeRef v8i32;
100 LLVMTypeRef f32;
101 LLVMTypeRef f16;
102 LLVMTypeRef v2f32;
103 LLVMTypeRef v4f32;
104 LLVMTypeRef v16i8;
105 LLVMTypeRef voidt;
106
107 LLVMValueRef i32zero;
108 LLVMValueRef i32one;
109 LLVMValueRef f32zero;
110 LLVMValueRef f32one;
111 LLVMValueRef v4f32empty;
112
113 unsigned range_md_kind;
114 unsigned uniform_md_kind;
115 unsigned fpmath_md_kind;
116 unsigned invariant_load_md_kind;
117 LLVMValueRef empty_md;
118 LLVMValueRef fpmath_md_2p5_ulp;
119 gl_shader_stage stage;
120
121 LLVMValueRef lds;
122 LLVMValueRef inputs[RADEON_LLVM_MAX_INPUTS * 4];
123 LLVMValueRef outputs[RADEON_LLVM_MAX_OUTPUTS * 4];
124
125 LLVMValueRef shared_memory;
126 uint64_t input_mask;
127 uint64_t output_mask;
128 int num_locals;
129 LLVMValueRef *locals;
130 bool has_ddxy;
131 unsigned num_clips;
132 unsigned num_culls;
133
134 bool has_ds_bpermute;
135 };
136
137 struct ac_tex_info {
138 LLVMValueRef args[12];
139 int arg_count;
140 LLVMTypeRef dst_type;
141 bool has_offset;
142 };
143
144 enum ac_func_attr {
145 AC_FUNC_ATTR_ALWAYSINLINE = (1 << 0),
146 AC_FUNC_ATTR_BYVAL = (1 << 1),
147 AC_FUNC_ATTR_INREG = (1 << 2),
148 AC_FUNC_ATTR_NOALIAS = (1 << 3),
149 AC_FUNC_ATTR_NOUNWIND = (1 << 4),
150 AC_FUNC_ATTR_READNONE = (1 << 5),
151 AC_FUNC_ATTR_READONLY = (1 << 6),
152 AC_FUNC_ATTR_LAST = (1 << 7)
153 };
154
155 #if HAVE_LLVM < 0x0400
156 static LLVMAttribute ac_attr_to_llvm_attr(enum ac_func_attr attr)
157 {
158 switch (attr) {
159 case AC_FUNC_ATTR_ALWAYSINLINE: return LLVMAlwaysInlineAttribute;
160 case AC_FUNC_ATTR_BYVAL: return LLVMByValAttribute;
161 case AC_FUNC_ATTR_INREG: return LLVMInRegAttribute;
162 case AC_FUNC_ATTR_NOALIAS: return LLVMNoAliasAttribute;
163 case AC_FUNC_ATTR_NOUNWIND: return LLVMNoUnwindAttribute;
164 case AC_FUNC_ATTR_READNONE: return LLVMReadNoneAttribute;
165 case AC_FUNC_ATTR_READONLY: return LLVMReadOnlyAttribute;
166 default:
167 fprintf(stderr, "Unhandled function attribute: %x\n", attr);
168 return 0;
169 }
170 }
171
172 #else
173
174 static const char *attr_to_str(enum ac_func_attr attr)
175 {
176 switch (attr) {
177 case AC_FUNC_ATTR_ALWAYSINLINE: return "alwaysinline";
178 case AC_FUNC_ATTR_BYVAL: return "byval";
179 case AC_FUNC_ATTR_INREG: return "inreg";
180 case AC_FUNC_ATTR_NOALIAS: return "noalias";
181 case AC_FUNC_ATTR_NOUNWIND: return "nounwind";
182 case AC_FUNC_ATTR_READNONE: return "readnone";
183 case AC_FUNC_ATTR_READONLY: return "readonly";
184 default:
185 fprintf(stderr, "Unhandled function attribute: %x\n", attr);
186 return 0;
187 }
188 }
189
190 #endif
191
192 static void
193 ac_add_function_attr(LLVMValueRef function,
194 int attr_idx,
195 enum ac_func_attr attr)
196 {
197
198 #if HAVE_LLVM < 0x0400
199 LLVMAttribute llvm_attr = ac_attr_to_llvm_attr(attr);
200 if (attr_idx == -1) {
201 LLVMAddFunctionAttr(function, llvm_attr);
202 } else {
203 LLVMAddAttribute(LLVMGetParam(function, attr_idx - 1), llvm_attr);
204 }
205 #else
206 LLVMContextRef context = LLVMGetModuleContext(LLVMGetGlobalParent(function));
207 const char *attr_name = attr_to_str(attr);
208 unsigned kind_id = LLVMGetEnumAttributeKindForName(attr_name,
209 strlen(attr_name));
210 LLVMAttributeRef llvm_attr = LLVMCreateEnumAttribute(context, kind_id, 0);
211 LLVMAddAttributeAtIndex(function, attr_idx, llvm_attr);
212 #endif
213 }
214
215 static LLVMValueRef
216 emit_llvm_intrinsic(struct nir_to_llvm_context *ctx, const char *name,
217 LLVMTypeRef return_type, LLVMValueRef *params,
218 unsigned param_count, unsigned attr_mask);
219 static LLVMValueRef get_sampler_desc(struct nir_to_llvm_context *ctx,
220 nir_deref_var *deref,
221 enum desc_type desc_type);
222 static unsigned radeon_llvm_reg_index_soa(unsigned index, unsigned chan)
223 {
224 return (index * 4) + chan;
225 }
226
227 static unsigned llvm_get_type_size(LLVMTypeRef type)
228 {
229 LLVMTypeKind kind = LLVMGetTypeKind(type);
230
231 switch (kind) {
232 case LLVMIntegerTypeKind:
233 return LLVMGetIntTypeWidth(type) / 8;
234 case LLVMFloatTypeKind:
235 return 4;
236 case LLVMPointerTypeKind:
237 return 8;
238 case LLVMVectorTypeKind:
239 return LLVMGetVectorSize(type) *
240 llvm_get_type_size(LLVMGetElementType(type));
241 default:
242 assert(0);
243 return 0;
244 }
245 }
246
247 static void set_llvm_calling_convention(LLVMValueRef func,
248 gl_shader_stage stage)
249 {
250 enum radeon_llvm_calling_convention calling_conv;
251
252 switch (stage) {
253 case MESA_SHADER_VERTEX:
254 case MESA_SHADER_TESS_CTRL:
255 case MESA_SHADER_TESS_EVAL:
256 calling_conv = RADEON_LLVM_AMDGPU_VS;
257 break;
258 case MESA_SHADER_GEOMETRY:
259 calling_conv = RADEON_LLVM_AMDGPU_GS;
260 break;
261 case MESA_SHADER_FRAGMENT:
262 calling_conv = RADEON_LLVM_AMDGPU_PS;
263 break;
264 case MESA_SHADER_COMPUTE:
265 calling_conv = RADEON_LLVM_AMDGPU_CS;
266 break;
267 default:
268 unreachable("Unhandle shader type");
269 }
270
271 LLVMSetFunctionCallConv(func, calling_conv);
272 }
273
274 static LLVMValueRef
275 create_llvm_function(LLVMContextRef ctx, LLVMModuleRef module,
276 LLVMBuilderRef builder, LLVMTypeRef *return_types,
277 unsigned num_return_elems, LLVMTypeRef *param_types,
278 unsigned param_count, unsigned array_params,
279 unsigned sgpr_params, bool unsafe_math)
280 {
281 LLVMTypeRef main_function_type, ret_type;
282 LLVMBasicBlockRef main_function_body;
283
284 if (num_return_elems)
285 ret_type = LLVMStructTypeInContext(ctx, return_types,
286 num_return_elems, true);
287 else
288 ret_type = LLVMVoidTypeInContext(ctx);
289
290 /* Setup the function */
291 main_function_type =
292 LLVMFunctionType(ret_type, param_types, param_count, 0);
293 LLVMValueRef main_function =
294 LLVMAddFunction(module, "main", main_function_type);
295 main_function_body =
296 LLVMAppendBasicBlockInContext(ctx, main_function, "main_body");
297 LLVMPositionBuilderAtEnd(builder, main_function_body);
298
299 LLVMSetFunctionCallConv(main_function, RADEON_LLVM_AMDGPU_CS);
300 for (unsigned i = 0; i < sgpr_params; ++i) {
301 if (i < array_params) {
302 LLVMValueRef P = LLVMGetParam(main_function, i);
303 ac_add_function_attr(main_function, i + 1, AC_FUNC_ATTR_BYVAL);
304 ac_add_attr_dereferenceable(P, UINT64_MAX);
305 }
306 else {
307 ac_add_function_attr(main_function, i + 1, AC_FUNC_ATTR_INREG);
308 }
309 }
310
311 if (unsafe_math) {
312 /* These were copied from some LLVM test. */
313 LLVMAddTargetDependentFunctionAttr(main_function,
314 "less-precise-fpmad",
315 "true");
316 LLVMAddTargetDependentFunctionAttr(main_function,
317 "no-infs-fp-math",
318 "true");
319 LLVMAddTargetDependentFunctionAttr(main_function,
320 "no-nans-fp-math",
321 "true");
322 LLVMAddTargetDependentFunctionAttr(main_function,
323 "unsafe-fp-math",
324 "true");
325 }
326 return main_function;
327 }
328
329 static LLVMTypeRef const_array(LLVMTypeRef elem_type, int num_elements)
330 {
331 return LLVMPointerType(LLVMArrayType(elem_type, num_elements),
332 CONST_ADDR_SPACE);
333 }
334
335 static LLVMValueRef get_shared_memory_ptr(struct nir_to_llvm_context *ctx,
336 int idx,
337 LLVMTypeRef type)
338 {
339 LLVMValueRef offset;
340 LLVMValueRef ptr;
341 int addr_space;
342
343 offset = LLVMConstInt(ctx->i32, idx, false);
344
345 ptr = ctx->shared_memory;
346 ptr = LLVMBuildGEP(ctx->builder, ptr, &offset, 1, "");
347 addr_space = LLVMGetPointerAddressSpace(LLVMTypeOf(ptr));
348 ptr = LLVMBuildBitCast(ctx->builder, ptr, LLVMPointerType(type, addr_space), "");
349 return ptr;
350 }
351
352 static LLVMValueRef to_integer(struct nir_to_llvm_context *ctx, LLVMValueRef v)
353 {
354 LLVMTypeRef type = LLVMTypeOf(v);
355 if (type == ctx->f32) {
356 return LLVMBuildBitCast(ctx->builder, v, ctx->i32, "");
357 } else if (LLVMGetTypeKind(type) == LLVMVectorTypeKind) {
358 LLVMTypeRef elem_type = LLVMGetElementType(type);
359 if (elem_type == ctx->f32) {
360 LLVMTypeRef nt = LLVMVectorType(ctx->i32, LLVMGetVectorSize(type));
361 return LLVMBuildBitCast(ctx->builder, v, nt, "");
362 }
363 }
364 return v;
365 }
366
367 static LLVMValueRef to_float(struct nir_to_llvm_context *ctx, LLVMValueRef v)
368 {
369 LLVMTypeRef type = LLVMTypeOf(v);
370 if (type == ctx->i32) {
371 return LLVMBuildBitCast(ctx->builder, v, ctx->f32, "");
372 } else if (LLVMGetTypeKind(type) == LLVMVectorTypeKind) {
373 LLVMTypeRef elem_type = LLVMGetElementType(type);
374 if (elem_type == ctx->i32) {
375 LLVMTypeRef nt = LLVMVectorType(ctx->f32, LLVMGetVectorSize(type));
376 return LLVMBuildBitCast(ctx->builder, v, nt, "");
377 }
378 }
379 return v;
380 }
381
382 static LLVMValueRef build_gep0(struct nir_to_llvm_context *ctx,
383 LLVMValueRef base_ptr, LLVMValueRef index)
384 {
385 LLVMValueRef indices[2] = {
386 ctx->i32zero,
387 index,
388 };
389 return LLVMBuildGEP(ctx->builder, base_ptr,
390 indices, 2, "");
391 }
392
393 static LLVMValueRef build_indexed_load(struct nir_to_llvm_context *ctx,
394 LLVMValueRef base_ptr, LLVMValueRef index,
395 bool uniform)
396 {
397 LLVMValueRef pointer;
398 pointer = build_gep0(ctx, base_ptr, index);
399 if (uniform)
400 LLVMSetMetadata(pointer, ctx->uniform_md_kind, ctx->empty_md);
401 return LLVMBuildLoad(ctx->builder, pointer, "");
402 }
403
404 static LLVMValueRef build_indexed_load_const(struct nir_to_llvm_context *ctx,
405 LLVMValueRef base_ptr, LLVMValueRef index)
406 {
407 LLVMValueRef result = build_indexed_load(ctx, base_ptr, index, true);
408 LLVMSetMetadata(result, ctx->invariant_load_md_kind, ctx->empty_md);
409 return result;
410 }
411
412 static void create_function(struct nir_to_llvm_context *ctx,
413 struct nir_shader *nir)
414 {
415 LLVMTypeRef arg_types[23];
416 unsigned arg_idx = 0;
417 unsigned array_count = 0;
418 unsigned sgpr_count = 0, user_sgpr_count;
419 unsigned i;
420
421 /* 1 for each descriptor set */
422 for (unsigned i = 0; i < 4; ++i)
423 arg_types[arg_idx++] = const_array(ctx->i8, 1024 * 1024);
424
425 /* 1 for push constants and dynamic descriptors */
426 arg_types[arg_idx++] = const_array(ctx->i8, 1024 * 1024);
427
428 array_count = arg_idx;
429 switch (nir->stage) {
430 case MESA_SHADER_COMPUTE:
431 arg_types[arg_idx++] = LLVMVectorType(ctx->i32, 3); /* grid size */
432 user_sgpr_count = arg_idx;
433 arg_types[arg_idx++] = LLVMVectorType(ctx->i32, 3);
434 arg_types[arg_idx++] = ctx->i32;
435 sgpr_count = arg_idx;
436
437 arg_types[arg_idx++] = LLVMVectorType(ctx->i32, 3);
438 break;
439 case MESA_SHADER_VERTEX:
440 arg_types[arg_idx++] = const_array(ctx->v16i8, 16); /* vertex buffers */
441 arg_types[arg_idx++] = ctx->i32; // base vertex
442 arg_types[arg_idx++] = ctx->i32; // start instance
443 user_sgpr_count = sgpr_count = arg_idx;
444 arg_types[arg_idx++] = ctx->i32; // vertex id
445 arg_types[arg_idx++] = ctx->i32; // rel auto id
446 arg_types[arg_idx++] = ctx->i32; // vs prim id
447 arg_types[arg_idx++] = ctx->i32; // instance id
448 break;
449 case MESA_SHADER_FRAGMENT:
450 arg_types[arg_idx++] = const_array(ctx->f32, 32); /* sample positions */
451 user_sgpr_count = arg_idx;
452 arg_types[arg_idx++] = ctx->i32; /* prim mask */
453 sgpr_count = arg_idx;
454 arg_types[arg_idx++] = ctx->v2i32; /* persp sample */
455 arg_types[arg_idx++] = ctx->v2i32; /* persp center */
456 arg_types[arg_idx++] = ctx->v2i32; /* persp centroid */
457 arg_types[arg_idx++] = ctx->v3i32; /* persp pull model */
458 arg_types[arg_idx++] = ctx->v2i32; /* linear sample */
459 arg_types[arg_idx++] = ctx->v2i32; /* linear center */
460 arg_types[arg_idx++] = ctx->v2i32; /* linear centroid */
461 arg_types[arg_idx++] = ctx->f32; /* line stipple tex */
462 arg_types[arg_idx++] = ctx->f32; /* pos x float */
463 arg_types[arg_idx++] = ctx->f32; /* pos y float */
464 arg_types[arg_idx++] = ctx->f32; /* pos z float */
465 arg_types[arg_idx++] = ctx->f32; /* pos w float */
466 arg_types[arg_idx++] = ctx->i32; /* front face */
467 arg_types[arg_idx++] = ctx->i32; /* ancillary */
468 arg_types[arg_idx++] = ctx->f32; /* sample coverage */
469 arg_types[arg_idx++] = ctx->i32; /* fixed pt */
470 break;
471 default:
472 unreachable("Shader stage not implemented");
473 }
474
475 ctx->main_function = create_llvm_function(
476 ctx->context, ctx->module, ctx->builder, NULL, 0, arg_types,
477 arg_idx, array_count, sgpr_count, ctx->options->unsafe_math);
478 set_llvm_calling_convention(ctx->main_function, nir->stage);
479
480
481 ctx->shader_info->num_input_sgprs = 0;
482 ctx->shader_info->num_input_vgprs = 0;
483
484 for (i = 0; i < user_sgpr_count; i++)
485 ctx->shader_info->num_user_sgprs += llvm_get_type_size(arg_types[i]) / 4;
486
487 ctx->shader_info->num_input_sgprs = ctx->shader_info->num_user_sgprs;
488 for (; i < sgpr_count; i++)
489 ctx->shader_info->num_input_sgprs += llvm_get_type_size(arg_types[i]) / 4;
490
491 if (nir->stage != MESA_SHADER_FRAGMENT)
492 for (; i < arg_idx; ++i)
493 ctx->shader_info->num_input_vgprs += llvm_get_type_size(arg_types[i]) / 4;
494
495 arg_idx = 0;
496 for (unsigned i = 0; i < 4; ++i)
497 ctx->descriptor_sets[i] =
498 LLVMGetParam(ctx->main_function, arg_idx++);
499
500 ctx->push_constants = LLVMGetParam(ctx->main_function, arg_idx++);
501
502 switch (nir->stage) {
503 case MESA_SHADER_COMPUTE:
504 ctx->num_work_groups =
505 LLVMGetParam(ctx->main_function, arg_idx++);
506 ctx->workgroup_ids =
507 LLVMGetParam(ctx->main_function, arg_idx++);
508 ctx->tg_size =
509 LLVMGetParam(ctx->main_function, arg_idx++);
510 ctx->local_invocation_ids =
511 LLVMGetParam(ctx->main_function, arg_idx++);
512 break;
513 case MESA_SHADER_VERTEX:
514 ctx->vertex_buffers = LLVMGetParam(ctx->main_function, arg_idx++);
515 ctx->base_vertex = LLVMGetParam(ctx->main_function, arg_idx++);
516 ctx->start_instance = LLVMGetParam(ctx->main_function, arg_idx++);
517 ctx->vertex_id = LLVMGetParam(ctx->main_function, arg_idx++);
518 ctx->rel_auto_id = LLVMGetParam(ctx->main_function, arg_idx++);
519 ctx->vs_prim_id = LLVMGetParam(ctx->main_function, arg_idx++);
520 ctx->instance_id = LLVMGetParam(ctx->main_function, arg_idx++);
521 break;
522 case MESA_SHADER_FRAGMENT:
523 ctx->sample_positions = LLVMGetParam(ctx->main_function, arg_idx++);
524 ctx->prim_mask = LLVMGetParam(ctx->main_function, arg_idx++);
525 ctx->persp_sample = LLVMGetParam(ctx->main_function, arg_idx++);
526 ctx->persp_center = LLVMGetParam(ctx->main_function, arg_idx++);
527 ctx->persp_centroid = LLVMGetParam(ctx->main_function, arg_idx++);
528 arg_idx++;
529 ctx->linear_sample = LLVMGetParam(ctx->main_function, arg_idx++);
530 ctx->linear_center = LLVMGetParam(ctx->main_function, arg_idx++);
531 ctx->linear_centroid = LLVMGetParam(ctx->main_function, arg_idx++);
532 arg_idx++; /* line stipple */
533 ctx->frag_pos[0] = LLVMGetParam(ctx->main_function, arg_idx++);
534 ctx->frag_pos[1] = LLVMGetParam(ctx->main_function, arg_idx++);
535 ctx->frag_pos[2] = LLVMGetParam(ctx->main_function, arg_idx++);
536 ctx->frag_pos[3] = LLVMGetParam(ctx->main_function, arg_idx++);
537 ctx->front_face = LLVMGetParam(ctx->main_function, arg_idx++);
538 ctx->ancillary = LLVMGetParam(ctx->main_function, arg_idx++);
539 break;
540 default:
541 unreachable("Shader stage not implemented");
542 }
543 }
544
545 static void setup_types(struct nir_to_llvm_context *ctx)
546 {
547 LLVMValueRef args[4];
548
549 ctx->voidt = LLVMVoidTypeInContext(ctx->context);
550 ctx->i1 = LLVMIntTypeInContext(ctx->context, 1);
551 ctx->i8 = LLVMIntTypeInContext(ctx->context, 8);
552 ctx->i16 = LLVMIntTypeInContext(ctx->context, 16);
553 ctx->i32 = LLVMIntTypeInContext(ctx->context, 32);
554 ctx->i64 = LLVMIntTypeInContext(ctx->context, 64);
555 ctx->v2i32 = LLVMVectorType(ctx->i32, 2);
556 ctx->v3i32 = LLVMVectorType(ctx->i32, 3);
557 ctx->v4i32 = LLVMVectorType(ctx->i32, 4);
558 ctx->v8i32 = LLVMVectorType(ctx->i32, 8);
559 ctx->f32 = LLVMFloatTypeInContext(ctx->context);
560 ctx->f16 = LLVMHalfTypeInContext(ctx->context);
561 ctx->v2f32 = LLVMVectorType(ctx->f32, 2);
562 ctx->v4f32 = LLVMVectorType(ctx->f32, 4);
563 ctx->v16i8 = LLVMVectorType(ctx->i8, 16);
564
565 ctx->i32zero = LLVMConstInt(ctx->i32, 0, false);
566 ctx->i32one = LLVMConstInt(ctx->i32, 1, false);
567 ctx->f32zero = LLVMConstReal(ctx->f32, 0.0);
568 ctx->f32one = LLVMConstReal(ctx->f32, 1.0);
569
570 args[0] = ctx->f32zero;
571 args[1] = ctx->f32zero;
572 args[2] = ctx->f32zero;
573 args[3] = ctx->f32one;
574 ctx->v4f32empty = LLVMConstVector(args, 4);
575
576 ctx->range_md_kind = LLVMGetMDKindIDInContext(ctx->context,
577 "range", 5);
578 ctx->invariant_load_md_kind = LLVMGetMDKindIDInContext(ctx->context,
579 "invariant.load", 14);
580 ctx->uniform_md_kind =
581 LLVMGetMDKindIDInContext(ctx->context, "amdgpu.uniform", 14);
582 ctx->empty_md = LLVMMDNodeInContext(ctx->context, NULL, 0);
583
584 ctx->fpmath_md_kind = LLVMGetMDKindIDInContext(ctx->context, "fpmath", 6);
585
586 args[0] = LLVMConstReal(ctx->f32, 2.5);
587 ctx->fpmath_md_2p5_ulp = LLVMMDNodeInContext(ctx->context, args, 1);
588 }
589
590 static int get_llvm_num_components(LLVMValueRef value)
591 {
592 LLVMTypeRef type = LLVMTypeOf(value);
593 unsigned num_components = LLVMGetTypeKind(type) == LLVMVectorTypeKind
594 ? LLVMGetVectorSize(type)
595 : 1;
596 return num_components;
597 }
598
599 static LLVMValueRef llvm_extract_elem(struct nir_to_llvm_context *ctx,
600 LLVMValueRef value,
601 int index)
602 {
603 int count = get_llvm_num_components(value);
604
605 assert(index < count);
606 if (count == 1)
607 return value;
608
609 return LLVMBuildExtractElement(ctx->builder, value,
610 LLVMConstInt(ctx->i32, index, false), "");
611 }
612
613 static LLVMValueRef trim_vector(struct nir_to_llvm_context *ctx,
614 LLVMValueRef value, unsigned count)
615 {
616 unsigned num_components = get_llvm_num_components(value);
617 if (count == num_components)
618 return value;
619
620 LLVMValueRef masks[] = {
621 LLVMConstInt(ctx->i32, 0, false), LLVMConstInt(ctx->i32, 1, false),
622 LLVMConstInt(ctx->i32, 2, false), LLVMConstInt(ctx->i32, 3, false)};
623
624 if (count == 1)
625 return LLVMBuildExtractElement(ctx->builder, value, masks[0],
626 "");
627
628 LLVMValueRef swizzle = LLVMConstVector(masks, count);
629 return LLVMBuildShuffleVector(ctx->builder, value, value, swizzle, "");
630 }
631
632 static LLVMValueRef
633 build_gather_values_extended(struct nir_to_llvm_context *ctx,
634 LLVMValueRef *values,
635 unsigned value_count,
636 unsigned value_stride,
637 bool load)
638 {
639 LLVMBuilderRef builder = ctx->builder;
640 LLVMValueRef vec;
641 unsigned i;
642
643
644 if (value_count == 1) {
645 if (load)
646 return LLVMBuildLoad(builder, values[0], "");
647 return values[0];
648 }
649
650 for (i = 0; i < value_count; i++) {
651 LLVMValueRef value = values[i * value_stride];
652 if (load)
653 value = LLVMBuildLoad(builder, value, "");
654
655 if (!i)
656 vec = LLVMGetUndef( LLVMVectorType(LLVMTypeOf(value), value_count));
657 LLVMValueRef index = LLVMConstInt(ctx->i32, i, false);
658 vec = LLVMBuildInsertElement(builder, vec, value, index, "");
659 }
660 return vec;
661 }
662
663
664 static void
665 build_store_values_extended(struct nir_to_llvm_context *ctx,
666 LLVMValueRef *values,
667 unsigned value_count,
668 unsigned value_stride,
669 LLVMValueRef vec)
670 {
671 LLVMBuilderRef builder = ctx->builder;
672 unsigned i;
673
674 if (value_count == 1) {
675 LLVMBuildStore(builder, vec, values[0]);
676 return;
677 }
678
679 for (i = 0; i < value_count; i++) {
680 LLVMValueRef ptr = values[i * value_stride];
681 LLVMValueRef index = LLVMConstInt(ctx->i32, i, false);
682 LLVMValueRef value = LLVMBuildExtractElement(builder, vec, index, "");
683 LLVMBuildStore(builder, value, ptr);
684 }
685 }
686
687 static LLVMValueRef
688 build_gather_values(struct nir_to_llvm_context *ctx,
689 LLVMValueRef *values,
690 unsigned value_count)
691 {
692 return build_gather_values_extended(ctx, values, value_count, 1, false);
693 }
694
695 static LLVMTypeRef get_def_type(struct nir_to_llvm_context *ctx,
696 nir_ssa_def *def)
697 {
698 LLVMTypeRef type = LLVMIntTypeInContext(ctx->context, def->bit_size);
699 if (def->num_components > 1) {
700 type = LLVMVectorType(type, def->num_components);
701 }
702 return type;
703 }
704
705 static LLVMValueRef get_src(struct nir_to_llvm_context *ctx, nir_src src)
706 {
707 assert(src.is_ssa);
708 struct hash_entry *entry = _mesa_hash_table_search(ctx->defs, src.ssa);
709 return (LLVMValueRef)entry->data;
710 }
711
712
713 static LLVMBasicBlockRef get_block(struct nir_to_llvm_context *ctx,
714 struct nir_block *b)
715 {
716 struct hash_entry *entry = _mesa_hash_table_search(ctx->defs, b);
717 return (LLVMBasicBlockRef)entry->data;
718 }
719
720 static LLVMValueRef get_alu_src(struct nir_to_llvm_context *ctx,
721 nir_alu_src src,
722 unsigned num_components)
723 {
724 LLVMValueRef value = get_src(ctx, src.src);
725 bool need_swizzle = false;
726
727 assert(value);
728 LLVMTypeRef type = LLVMTypeOf(value);
729 unsigned src_components = LLVMGetTypeKind(type) == LLVMVectorTypeKind
730 ? LLVMGetVectorSize(type)
731 : 1;
732
733 for (unsigned i = 0; i < num_components; ++i) {
734 assert(src.swizzle[i] < src_components);
735 if (src.swizzle[i] != i)
736 need_swizzle = true;
737 }
738
739 if (need_swizzle || num_components != src_components) {
740 LLVMValueRef masks[] = {
741 LLVMConstInt(ctx->i32, src.swizzle[0], false),
742 LLVMConstInt(ctx->i32, src.swizzle[1], false),
743 LLVMConstInt(ctx->i32, src.swizzle[2], false),
744 LLVMConstInt(ctx->i32, src.swizzle[3], false)};
745
746 if (src_components > 1 && num_components == 1) {
747 value = LLVMBuildExtractElement(ctx->builder, value,
748 masks[0], "");
749 } else if (src_components == 1 && num_components > 1) {
750 LLVMValueRef values[] = {value, value, value, value};
751 value = build_gather_values(ctx, values, num_components);
752 } else {
753 LLVMValueRef swizzle = LLVMConstVector(masks, num_components);
754 value = LLVMBuildShuffleVector(ctx->builder, value, value,
755 swizzle, "");
756 }
757 }
758 assert(!src.negate);
759 assert(!src.abs);
760 return value;
761 }
762
763 static LLVMValueRef emit_int_cmp(struct nir_to_llvm_context *ctx,
764 LLVMIntPredicate pred, LLVMValueRef src0,
765 LLVMValueRef src1)
766 {
767 LLVMValueRef result = LLVMBuildICmp(ctx->builder, pred, src0, src1, "");
768 return LLVMBuildSelect(ctx->builder, result,
769 LLVMConstInt(ctx->i32, 0xFFFFFFFF, false),
770 LLVMConstInt(ctx->i32, 0, false), "");
771 }
772
773 static LLVMValueRef emit_float_cmp(struct nir_to_llvm_context *ctx,
774 LLVMRealPredicate pred, LLVMValueRef src0,
775 LLVMValueRef src1)
776 {
777 LLVMValueRef result;
778 src0 = to_float(ctx, src0);
779 src1 = to_float(ctx, src1);
780 result = LLVMBuildFCmp(ctx->builder, pred, src0, src1, "");
781 return LLVMBuildSelect(ctx->builder, result,
782 LLVMConstInt(ctx->i32, 0xFFFFFFFF, false),
783 LLVMConstInt(ctx->i32, 0, false), "");
784 }
785
786 static LLVMValueRef emit_intrin_1f_param(struct nir_to_llvm_context *ctx,
787 const char *intrin,
788 LLVMValueRef src0)
789 {
790 LLVMValueRef params[] = {
791 to_float(ctx, src0),
792 };
793 return emit_llvm_intrinsic(ctx, intrin, ctx->f32, params, 1, AC_FUNC_ATTR_READNONE);
794 }
795
796 static LLVMValueRef emit_intrin_2f_param(struct nir_to_llvm_context *ctx,
797 const char *intrin,
798 LLVMValueRef src0, LLVMValueRef src1)
799 {
800 LLVMValueRef params[] = {
801 to_float(ctx, src0),
802 to_float(ctx, src1),
803 };
804 return emit_llvm_intrinsic(ctx, intrin, ctx->f32, params, 2, AC_FUNC_ATTR_READNONE);
805 }
806
807 static LLVMValueRef emit_intrin_3f_param(struct nir_to_llvm_context *ctx,
808 const char *intrin,
809 LLVMValueRef src0, LLVMValueRef src1, LLVMValueRef src2)
810 {
811 LLVMValueRef params[] = {
812 to_float(ctx, src0),
813 to_float(ctx, src1),
814 to_float(ctx, src2),
815 };
816 return emit_llvm_intrinsic(ctx, intrin, ctx->f32, params, 3, AC_FUNC_ATTR_READNONE);
817 }
818
819 static LLVMValueRef emit_bcsel(struct nir_to_llvm_context *ctx,
820 LLVMValueRef src0, LLVMValueRef src1, LLVMValueRef src2)
821 {
822 LLVMValueRef v = LLVMBuildICmp(ctx->builder, LLVMIntNE, src0,
823 ctx->i32zero, "");
824 return LLVMBuildSelect(ctx->builder, v, src1, src2, "");
825 }
826
827 static LLVMValueRef emit_find_lsb(struct nir_to_llvm_context *ctx,
828 LLVMValueRef src0)
829 {
830 LLVMValueRef params[2] = {
831 src0,
832
833 /* The value of 1 means that ffs(x=0) = undef, so LLVM won't
834 * add special code to check for x=0. The reason is that
835 * the LLVM behavior for x=0 is different from what we
836 * need here.
837 *
838 * The hardware already implements the correct behavior.
839 */
840 LLVMConstInt(ctx->i32, 1, false),
841 };
842 return emit_llvm_intrinsic(ctx, "llvm.cttz.i32", ctx->i32, params, 2, AC_FUNC_ATTR_READNONE);
843 }
844
845 static LLVMValueRef emit_ifind_msb(struct nir_to_llvm_context *ctx,
846 LLVMValueRef src0)
847 {
848 LLVMValueRef msb = emit_llvm_intrinsic(ctx, "llvm.AMDGPU.flbit.i32",
849 ctx->i32, &src0, 1,
850 AC_FUNC_ATTR_READNONE);
851
852 /* The HW returns the last bit index from MSB, but NIR wants
853 * the index from LSB. Invert it by doing "31 - msb". */
854 msb = LLVMBuildSub(ctx->builder, LLVMConstInt(ctx->i32, 31, false),
855 msb, "");
856
857 LLVMValueRef all_ones = LLVMConstInt(ctx->i32, -1, true);
858 LLVMValueRef cond = LLVMBuildOr(ctx->builder,
859 LLVMBuildICmp(ctx->builder, LLVMIntEQ,
860 src0, ctx->i32zero, ""),
861 LLVMBuildICmp(ctx->builder, LLVMIntEQ,
862 src0, all_ones, ""), "");
863
864 return LLVMBuildSelect(ctx->builder, cond, all_ones, msb, "");
865 }
866
867 static LLVMValueRef emit_ufind_msb(struct nir_to_llvm_context *ctx,
868 LLVMValueRef src0)
869 {
870 LLVMValueRef args[2] = {
871 src0,
872 ctx->i32one,
873 };
874 LLVMValueRef msb = emit_llvm_intrinsic(ctx, "llvm.ctlz.i32",
875 ctx->i32, args, ARRAY_SIZE(args),
876 AC_FUNC_ATTR_READNONE);
877
878 /* The HW returns the last bit index from MSB, but NIR wants
879 * the index from LSB. Invert it by doing "31 - msb". */
880 msb = LLVMBuildSub(ctx->builder, LLVMConstInt(ctx->i32, 31, false),
881 msb, "");
882
883 return LLVMBuildSelect(ctx->builder,
884 LLVMBuildICmp(ctx->builder, LLVMIntEQ, src0,
885 ctx->i32zero, ""),
886 LLVMConstInt(ctx->i32, -1, true), msb, "");
887 }
888
889 static LLVMValueRef emit_minmax_int(struct nir_to_llvm_context *ctx,
890 LLVMIntPredicate pred,
891 LLVMValueRef src0, LLVMValueRef src1)
892 {
893 return LLVMBuildSelect(ctx->builder,
894 LLVMBuildICmp(ctx->builder, pred, src0, src1, ""),
895 src0,
896 src1, "");
897
898 }
899 static LLVMValueRef emit_iabs(struct nir_to_llvm_context *ctx,
900 LLVMValueRef src0)
901 {
902 return emit_minmax_int(ctx, LLVMIntSGT, src0,
903 LLVMBuildNeg(ctx->builder, src0, ""));
904 }
905
906 static LLVMValueRef emit_fsign(struct nir_to_llvm_context *ctx,
907 LLVMValueRef src0)
908 {
909 LLVMValueRef cmp, val;
910
911 cmp = LLVMBuildFCmp(ctx->builder, LLVMRealOGT, src0, ctx->f32zero, "");
912 val = LLVMBuildSelect(ctx->builder, cmp, ctx->f32one, src0, "");
913 cmp = LLVMBuildFCmp(ctx->builder, LLVMRealOGE, val, ctx->f32zero, "");
914 val = LLVMBuildSelect(ctx->builder, cmp, val, LLVMConstReal(ctx->f32, -1.0), "");
915 return val;
916 }
917
918 static LLVMValueRef emit_isign(struct nir_to_llvm_context *ctx,
919 LLVMValueRef src0)
920 {
921 LLVMValueRef cmp, val;
922
923 cmp = LLVMBuildICmp(ctx->builder, LLVMIntSGT, src0, ctx->i32zero, "");
924 val = LLVMBuildSelect(ctx->builder, cmp, ctx->i32one, src0, "");
925 cmp = LLVMBuildICmp(ctx->builder, LLVMIntSGE, val, ctx->i32zero, "");
926 val = LLVMBuildSelect(ctx->builder, cmp, val, LLVMConstInt(ctx->i32, -1, true), "");
927 return val;
928 }
929
930 static LLVMValueRef emit_ffract(struct nir_to_llvm_context *ctx,
931 LLVMValueRef src0)
932 {
933 const char *intr = "llvm.floor.f32";
934 LLVMValueRef fsrc0 = to_float(ctx, src0);
935 LLVMValueRef params[] = {
936 fsrc0,
937 };
938 LLVMValueRef floor = emit_llvm_intrinsic(ctx, intr,
939 ctx->f32, params, 1,
940 AC_FUNC_ATTR_READNONE);
941 return LLVMBuildFSub(ctx->builder, fsrc0, floor, "");
942 }
943
944 static LLVMValueRef emit_uint_carry(struct nir_to_llvm_context *ctx,
945 const char *intrin,
946 LLVMValueRef src0, LLVMValueRef src1)
947 {
948 LLVMTypeRef ret_type;
949 LLVMTypeRef types[] = { ctx->i32, ctx->i1 };
950 LLVMValueRef res;
951 LLVMValueRef params[] = { src0, src1 };
952 ret_type = LLVMStructTypeInContext(ctx->context, types,
953 2, true);
954
955 res = emit_llvm_intrinsic(ctx, intrin, ret_type,
956 params, 2, AC_FUNC_ATTR_READNONE);
957
958 res = LLVMBuildExtractValue(ctx->builder, res, 1, "");
959 res = LLVMBuildZExt(ctx->builder, res, ctx->i32, "");
960 return res;
961 }
962
963 static LLVMValueRef emit_b2f(struct nir_to_llvm_context *ctx,
964 LLVMValueRef src0)
965 {
966 return LLVMBuildAnd(ctx->builder, src0, LLVMBuildBitCast(ctx->builder, LLVMConstReal(ctx->f32, 1.0), ctx->i32, ""), "");
967 }
968
969 static LLVMValueRef emit_umul_high(struct nir_to_llvm_context *ctx,
970 LLVMValueRef src0, LLVMValueRef src1)
971 {
972 LLVMValueRef dst64, result;
973 src0 = LLVMBuildZExt(ctx->builder, src0, ctx->i64, "");
974 src1 = LLVMBuildZExt(ctx->builder, src1, ctx->i64, "");
975
976 dst64 = LLVMBuildMul(ctx->builder, src0, src1, "");
977 dst64 = LLVMBuildLShr(ctx->builder, dst64, LLVMConstInt(ctx->i64, 32, false), "");
978 result = LLVMBuildTrunc(ctx->builder, dst64, ctx->i32, "");
979 return result;
980 }
981
982 static LLVMValueRef emit_imul_high(struct nir_to_llvm_context *ctx,
983 LLVMValueRef src0, LLVMValueRef src1)
984 {
985 LLVMValueRef dst64, result;
986 src0 = LLVMBuildSExt(ctx->builder, src0, ctx->i64, "");
987 src1 = LLVMBuildSExt(ctx->builder, src1, ctx->i64, "");
988
989 dst64 = LLVMBuildMul(ctx->builder, src0, src1, "");
990 dst64 = LLVMBuildAShr(ctx->builder, dst64, LLVMConstInt(ctx->i64, 32, false), "");
991 result = LLVMBuildTrunc(ctx->builder, dst64, ctx->i32, "");
992 return result;
993 }
994
995 static LLVMValueRef emit_bitfield_extract(struct nir_to_llvm_context *ctx,
996 const char *intrin,
997 LLVMValueRef srcs[3])
998 {
999 LLVMValueRef result;
1000 LLVMValueRef icond = LLVMBuildICmp(ctx->builder, LLVMIntEQ, srcs[2], LLVMConstInt(ctx->i32, 32, false), "");
1001 result = emit_llvm_intrinsic(ctx, intrin, ctx->i32, srcs, 3, AC_FUNC_ATTR_READNONE);
1002
1003 result = LLVMBuildSelect(ctx->builder, icond, srcs[0], result, "");
1004 return result;
1005 }
1006
1007 static LLVMValueRef emit_bitfield_insert(struct nir_to_llvm_context *ctx,
1008 LLVMValueRef src0, LLVMValueRef src1,
1009 LLVMValueRef src2, LLVMValueRef src3)
1010 {
1011 LLVMValueRef bfi_args[3], result;
1012
1013 bfi_args[0] = LLVMBuildShl(ctx->builder,
1014 LLVMBuildSub(ctx->builder,
1015 LLVMBuildShl(ctx->builder,
1016 ctx->i32one,
1017 src3, ""),
1018 ctx->i32one, ""),
1019 src2, "");
1020 bfi_args[1] = LLVMBuildShl(ctx->builder, src1, src2, "");
1021 bfi_args[2] = src0;
1022
1023 LLVMValueRef icond = LLVMBuildICmp(ctx->builder, LLVMIntEQ, src3, LLVMConstInt(ctx->i32, 32, false), "");
1024
1025 /* Calculate:
1026 * (arg0 & arg1) | (~arg0 & arg2) = arg2 ^ (arg0 & (arg1 ^ arg2)
1027 * Use the right-hand side, which the LLVM backend can convert to V_BFI.
1028 */
1029 result = LLVMBuildXor(ctx->builder, bfi_args[2],
1030 LLVMBuildAnd(ctx->builder, bfi_args[0],
1031 LLVMBuildXor(ctx->builder, bfi_args[1], bfi_args[2], ""), ""), "");
1032
1033 result = LLVMBuildSelect(ctx->builder, icond, src1, result, "");
1034 return result;
1035 }
1036
1037 static LLVMValueRef emit_pack_half_2x16(struct nir_to_llvm_context *ctx,
1038 LLVMValueRef src0)
1039 {
1040 LLVMValueRef const16 = LLVMConstInt(ctx->i32, 16, false);
1041 int i;
1042 LLVMValueRef comp[2];
1043
1044 src0 = to_float(ctx, src0);
1045 comp[0] = LLVMBuildExtractElement(ctx->builder, src0, ctx->i32zero, "");
1046 comp[1] = LLVMBuildExtractElement(ctx->builder, src0, ctx->i32one, "");
1047 for (i = 0; i < 2; i++) {
1048 comp[i] = LLVMBuildFPTrunc(ctx->builder, comp[i], ctx->f16, "");
1049 comp[i] = LLVMBuildBitCast(ctx->builder, comp[i], ctx->i16, "");
1050 comp[i] = LLVMBuildZExt(ctx->builder, comp[i], ctx->i32, "");
1051 }
1052
1053 comp[1] = LLVMBuildShl(ctx->builder, comp[1], const16, "");
1054 comp[0] = LLVMBuildOr(ctx->builder, comp[0], comp[1], "");
1055
1056 return comp[0];
1057 }
1058
1059 static LLVMValueRef emit_unpack_half_2x16(struct nir_to_llvm_context *ctx,
1060 LLVMValueRef src0)
1061 {
1062 LLVMValueRef const16 = LLVMConstInt(ctx->i32, 16, false);
1063 LLVMValueRef temps[2], result, val;
1064 int i;
1065
1066 for (i = 0; i < 2; i++) {
1067 val = i == 1 ? LLVMBuildLShr(ctx->builder, src0, const16, "") : src0;
1068 val = LLVMBuildTrunc(ctx->builder, val, ctx->i16, "");
1069 val = LLVMBuildBitCast(ctx->builder, val, ctx->f16, "");
1070 temps[i] = LLVMBuildFPExt(ctx->builder, val, ctx->f32, "");
1071 }
1072
1073 result = LLVMBuildInsertElement(ctx->builder, LLVMGetUndef(ctx->v2f32), temps[0],
1074 ctx->i32zero, "");
1075 result = LLVMBuildInsertElement(ctx->builder, result, temps[1],
1076 ctx->i32one, "");
1077 return result;
1078 }
1079
1080 /**
1081 * Set range metadata on an instruction. This can only be used on load and
1082 * call instructions. If you know an instruction can only produce the values
1083 * 0, 1, 2, you would do set_range_metadata(value, 0, 3);
1084 * \p lo is the minimum value inclusive.
1085 * \p hi is the maximum value exclusive.
1086 */
1087 static void set_range_metadata(struct nir_to_llvm_context *ctx,
1088 LLVMValueRef value, unsigned lo, unsigned hi)
1089 {
1090 LLVMValueRef range_md, md_args[2];
1091 LLVMTypeRef type = LLVMTypeOf(value);
1092 LLVMContextRef context = LLVMGetTypeContext(type);
1093
1094 md_args[0] = LLVMConstInt(type, lo, false);
1095 md_args[1] = LLVMConstInt(type, hi, false);
1096 range_md = LLVMMDNodeInContext(context, md_args, 2);
1097 LLVMSetMetadata(value, ctx->range_md_kind, range_md);
1098 }
1099
1100 static LLVMValueRef get_thread_id(struct nir_to_llvm_context *ctx)
1101 {
1102 LLVMValueRef tid;
1103 LLVMValueRef tid_args[2];
1104 tid_args[0] = LLVMConstInt(ctx->i32, 0xffffffff, false);
1105 tid_args[1] = ctx->i32zero;
1106 tid_args[1] = emit_llvm_intrinsic(ctx,
1107 "llvm.amdgcn.mbcnt.lo", ctx->i32,
1108 tid_args, 2, AC_FUNC_ATTR_READNONE);
1109
1110 tid = emit_llvm_intrinsic(ctx,
1111 "llvm.amdgcn.mbcnt.hi", ctx->i32,
1112 tid_args, 2, AC_FUNC_ATTR_READNONE);
1113 set_range_metadata(ctx, tid, 0, 64);
1114 return tid;
1115 }
1116
1117 /*
1118 * SI implements derivatives using the local data store (LDS)
1119 * All writes to the LDS happen in all executing threads at
1120 * the same time. TID is the Thread ID for the current
1121 * thread and is a value between 0 and 63, representing
1122 * the thread's position in the wavefront.
1123 *
1124 * For the pixel shader threads are grouped into quads of four pixels.
1125 * The TIDs of the pixels of a quad are:
1126 *
1127 * +------+------+
1128 * |4n + 0|4n + 1|
1129 * +------+------+
1130 * |4n + 2|4n + 3|
1131 * +------+------+
1132 *
1133 * So, masking the TID with 0xfffffffc yields the TID of the top left pixel
1134 * of the quad, masking with 0xfffffffd yields the TID of the top pixel of
1135 * the current pixel's column, and masking with 0xfffffffe yields the TID
1136 * of the left pixel of the current pixel's row.
1137 *
1138 * Adding 1 yields the TID of the pixel to the right of the left pixel, and
1139 * adding 2 yields the TID of the pixel below the top pixel.
1140 */
1141 /* masks for thread ID. */
1142 #define TID_MASK_TOP_LEFT 0xfffffffc
1143 #define TID_MASK_TOP 0xfffffffd
1144 #define TID_MASK_LEFT 0xfffffffe
1145 static LLVMValueRef emit_ddxy(struct nir_to_llvm_context *ctx,
1146 nir_op op,
1147 LLVMValueRef src0)
1148 {
1149 LLVMValueRef tl, trbl, result;
1150 LLVMValueRef tl_tid, trbl_tid;
1151 LLVMValueRef args[2];
1152 LLVMValueRef thread_id;
1153 unsigned mask;
1154 int idx;
1155 ctx->has_ddxy = true;
1156
1157 if (!ctx->lds && !ctx->has_ds_bpermute)
1158 ctx->lds = LLVMAddGlobalInAddressSpace(ctx->module,
1159 LLVMArrayType(ctx->i32, 64),
1160 "ddxy_lds", LOCAL_ADDR_SPACE);
1161
1162 thread_id = get_thread_id(ctx);
1163 if (op == nir_op_fddx_fine || op == nir_op_fddx)
1164 mask = TID_MASK_LEFT;
1165 else if (op == nir_op_fddy_fine || op == nir_op_fddy)
1166 mask = TID_MASK_TOP;
1167 else
1168 mask = TID_MASK_TOP_LEFT;
1169
1170 tl_tid = LLVMBuildAnd(ctx->builder, thread_id,
1171 LLVMConstInt(ctx->i32, mask, false), "");
1172 /* for DDX we want to next X pixel, DDY next Y pixel. */
1173 if (op == nir_op_fddx_fine ||
1174 op == nir_op_fddx_coarse ||
1175 op == nir_op_fddx)
1176 idx = 1;
1177 else
1178 idx = 2;
1179
1180 trbl_tid = LLVMBuildAdd(ctx->builder, tl_tid,
1181 LLVMConstInt(ctx->i32, idx, false), "");
1182
1183 if (ctx->has_ds_bpermute) {
1184 args[0] = LLVMBuildMul(ctx->builder, tl_tid,
1185 LLVMConstInt(ctx->i32, 4, false), "");
1186 args[1] = src0;
1187 tl = emit_llvm_intrinsic(ctx, "llvm.amdgcn.ds.bpermute",
1188 ctx->i32, args, 2,
1189 AC_FUNC_ATTR_READNONE);
1190
1191 args[0] = LLVMBuildMul(ctx->builder, trbl_tid,
1192 LLVMConstInt(ctx->i32, 4, false), "");
1193 trbl = emit_llvm_intrinsic(ctx, "llvm.amdgcn.ds.bpermute",
1194 ctx->i32, args, 2,
1195 AC_FUNC_ATTR_READNONE);
1196 } else {
1197 LLVMValueRef store_ptr, load_ptr0, load_ptr1;
1198
1199 store_ptr = build_gep0(ctx, ctx->lds, thread_id);
1200 load_ptr0 = build_gep0(ctx, ctx->lds, tl_tid);
1201 load_ptr1 = build_gep0(ctx, ctx->lds, trbl_tid);
1202
1203 LLVMBuildStore(ctx->builder, src0, store_ptr);
1204 tl = LLVMBuildLoad(ctx->builder, load_ptr0, "");
1205 trbl = LLVMBuildLoad(ctx->builder, load_ptr1, "");
1206 }
1207 tl = LLVMBuildBitCast(ctx->builder, tl, ctx->f32, "");
1208 trbl = LLVMBuildBitCast(ctx->builder, trbl, ctx->f32, "");
1209 result = LLVMBuildFSub(ctx->builder, trbl, tl, "");
1210 return result;
1211 }
1212
1213 /*
1214 * this takes an I,J coordinate pair,
1215 * and works out the X and Y derivatives.
1216 * it returns DDX(I), DDX(J), DDY(I), DDY(J).
1217 */
1218 static LLVMValueRef emit_ddxy_interp(
1219 struct nir_to_llvm_context *ctx,
1220 LLVMValueRef interp_ij)
1221 {
1222 LLVMValueRef result[4], a;
1223 unsigned i;
1224
1225 for (i = 0; i < 2; i++) {
1226 a = LLVMBuildExtractElement(ctx->builder, interp_ij,
1227 LLVMConstInt(ctx->i32, i, false), "");
1228 result[i] = emit_ddxy(ctx, nir_op_fddx, a);
1229 result[2+i] = emit_ddxy(ctx, nir_op_fddy, a);
1230 }
1231 return build_gather_values(ctx, result, 4);
1232 }
1233
1234 static LLVMValueRef emit_fdiv(struct nir_to_llvm_context *ctx,
1235 LLVMValueRef num,
1236 LLVMValueRef den)
1237 {
1238 LLVMValueRef ret = LLVMBuildFDiv(ctx->builder, num, den, "");
1239
1240 if (!LLVMIsConstant(ret))
1241 LLVMSetMetadata(ret, ctx->fpmath_md_kind, ctx->fpmath_md_2p5_ulp);
1242 return ret;
1243 }
1244
1245 static void visit_alu(struct nir_to_llvm_context *ctx, nir_alu_instr *instr)
1246 {
1247 LLVMValueRef src[4], result = NULL;
1248 unsigned num_components = instr->dest.dest.ssa.num_components;
1249 unsigned src_components;
1250
1251 assert(nir_op_infos[instr->op].num_inputs <= ARRAY_SIZE(src));
1252 switch (instr->op) {
1253 case nir_op_vec2:
1254 case nir_op_vec3:
1255 case nir_op_vec4:
1256 src_components = 1;
1257 break;
1258 case nir_op_pack_half_2x16:
1259 src_components = 2;
1260 break;
1261 case nir_op_unpack_half_2x16:
1262 src_components = 1;
1263 break;
1264 default:
1265 src_components = num_components;
1266 break;
1267 }
1268 for (unsigned i = 0; i < nir_op_infos[instr->op].num_inputs; i++)
1269 src[i] = get_alu_src(ctx, instr->src[i], src_components);
1270
1271 switch (instr->op) {
1272 case nir_op_fmov:
1273 case nir_op_imov:
1274 result = src[0];
1275 break;
1276 case nir_op_fneg:
1277 src[0] = to_float(ctx, src[0]);
1278 result = LLVMBuildFNeg(ctx->builder, src[0], "");
1279 break;
1280 case nir_op_ineg:
1281 result = LLVMBuildNeg(ctx->builder, src[0], "");
1282 break;
1283 case nir_op_inot:
1284 result = LLVMBuildNot(ctx->builder, src[0], "");
1285 break;
1286 case nir_op_iadd:
1287 result = LLVMBuildAdd(ctx->builder, src[0], src[1], "");
1288 break;
1289 case nir_op_fadd:
1290 src[0] = to_float(ctx, src[0]);
1291 src[1] = to_float(ctx, src[1]);
1292 result = LLVMBuildFAdd(ctx->builder, src[0], src[1], "");
1293 break;
1294 case nir_op_fsub:
1295 src[0] = to_float(ctx, src[0]);
1296 src[1] = to_float(ctx, src[1]);
1297 result = LLVMBuildFSub(ctx->builder, src[0], src[1], "");
1298 break;
1299 case nir_op_isub:
1300 result = LLVMBuildSub(ctx->builder, src[0], src[1], "");
1301 break;
1302 case nir_op_imul:
1303 result = LLVMBuildMul(ctx->builder, src[0], src[1], "");
1304 break;
1305 case nir_op_imod:
1306 result = LLVMBuildSRem(ctx->builder, src[0], src[1], "");
1307 break;
1308 case nir_op_umod:
1309 result = LLVMBuildURem(ctx->builder, src[0], src[1], "");
1310 break;
1311 case nir_op_fmod:
1312 src[0] = to_float(ctx, src[0]);
1313 src[1] = to_float(ctx, src[1]);
1314 result = emit_fdiv(ctx, src[0], src[1]);
1315 result = emit_intrin_1f_param(ctx, "llvm.floor.f32", result);
1316 result = LLVMBuildFMul(ctx->builder, src[1] , result, "");
1317 result = LLVMBuildFSub(ctx->builder, src[0], result, "");
1318 break;
1319 case nir_op_frem:
1320 src[0] = to_float(ctx, src[0]);
1321 src[1] = to_float(ctx, src[1]);
1322 result = LLVMBuildFRem(ctx->builder, src[0], src[1], "");
1323 break;
1324 case nir_op_idiv:
1325 result = LLVMBuildSDiv(ctx->builder, src[0], src[1], "");
1326 break;
1327 case nir_op_udiv:
1328 result = LLVMBuildUDiv(ctx->builder, src[0], src[1], "");
1329 break;
1330 case nir_op_fmul:
1331 src[0] = to_float(ctx, src[0]);
1332 src[1] = to_float(ctx, src[1]);
1333 result = LLVMBuildFMul(ctx->builder, src[0], src[1], "");
1334 break;
1335 case nir_op_fdiv:
1336 src[0] = to_float(ctx, src[0]);
1337 src[1] = to_float(ctx, src[1]);
1338 result = emit_fdiv(ctx, src[0], src[1]);
1339 break;
1340 case nir_op_frcp:
1341 src[0] = to_float(ctx, src[0]);
1342 result = emit_fdiv(ctx, ctx->f32one, src[0]);
1343 break;
1344 case nir_op_iand:
1345 result = LLVMBuildAnd(ctx->builder, src[0], src[1], "");
1346 break;
1347 case nir_op_ior:
1348 result = LLVMBuildOr(ctx->builder, src[0], src[1], "");
1349 break;
1350 case nir_op_ixor:
1351 result = LLVMBuildXor(ctx->builder, src[0], src[1], "");
1352 break;
1353 case nir_op_ishl:
1354 result = LLVMBuildShl(ctx->builder, src[0], src[1], "");
1355 break;
1356 case nir_op_ishr:
1357 result = LLVMBuildAShr(ctx->builder, src[0], src[1], "");
1358 break;
1359 case nir_op_ushr:
1360 result = LLVMBuildLShr(ctx->builder, src[0], src[1], "");
1361 break;
1362 case nir_op_ilt:
1363 result = emit_int_cmp(ctx, LLVMIntSLT, src[0], src[1]);
1364 break;
1365 case nir_op_ine:
1366 result = emit_int_cmp(ctx, LLVMIntNE, src[0], src[1]);
1367 break;
1368 case nir_op_ieq:
1369 result = emit_int_cmp(ctx, LLVMIntEQ, src[0], src[1]);
1370 break;
1371 case nir_op_ige:
1372 result = emit_int_cmp(ctx, LLVMIntSGE, src[0], src[1]);
1373 break;
1374 case nir_op_ult:
1375 result = emit_int_cmp(ctx, LLVMIntULT, src[0], src[1]);
1376 break;
1377 case nir_op_uge:
1378 result = emit_int_cmp(ctx, LLVMIntUGE, src[0], src[1]);
1379 break;
1380 case nir_op_feq:
1381 result = emit_float_cmp(ctx, LLVMRealUEQ, src[0], src[1]);
1382 break;
1383 case nir_op_fne:
1384 result = emit_float_cmp(ctx, LLVMRealUNE, src[0], src[1]);
1385 break;
1386 case nir_op_flt:
1387 result = emit_float_cmp(ctx, LLVMRealULT, src[0], src[1]);
1388 break;
1389 case nir_op_fge:
1390 result = emit_float_cmp(ctx, LLVMRealUGE, src[0], src[1]);
1391 break;
1392 case nir_op_fabs:
1393 result = emit_intrin_1f_param(ctx, "llvm.fabs.f32", src[0]);
1394 break;
1395 case nir_op_iabs:
1396 result = emit_iabs(ctx, src[0]);
1397 break;
1398 case nir_op_imax:
1399 result = emit_minmax_int(ctx, LLVMIntSGT, src[0], src[1]);
1400 break;
1401 case nir_op_imin:
1402 result = emit_minmax_int(ctx, LLVMIntSLT, src[0], src[1]);
1403 break;
1404 case nir_op_umax:
1405 result = emit_minmax_int(ctx, LLVMIntUGT, src[0], src[1]);
1406 break;
1407 case nir_op_umin:
1408 result = emit_minmax_int(ctx, LLVMIntULT, src[0], src[1]);
1409 break;
1410 case nir_op_isign:
1411 result = emit_isign(ctx, src[0]);
1412 break;
1413 case nir_op_fsign:
1414 src[0] = to_float(ctx, src[0]);
1415 result = emit_fsign(ctx, src[0]);
1416 break;
1417 case nir_op_ffloor:
1418 result = emit_intrin_1f_param(ctx, "llvm.floor.f32", src[0]);
1419 break;
1420 case nir_op_ftrunc:
1421 result = emit_intrin_1f_param(ctx, "llvm.trunc.f32", src[0]);
1422 break;
1423 case nir_op_fceil:
1424 result = emit_intrin_1f_param(ctx, "llvm.ceil.f32", src[0]);
1425 break;
1426 case nir_op_fround_even:
1427 result = emit_intrin_1f_param(ctx, "llvm.rint.f32", src[0]);
1428 break;
1429 case nir_op_ffract:
1430 result = emit_ffract(ctx, src[0]);
1431 break;
1432 case nir_op_fsin:
1433 result = emit_intrin_1f_param(ctx, "llvm.sin.f32", src[0]);
1434 break;
1435 case nir_op_fcos:
1436 result = emit_intrin_1f_param(ctx, "llvm.cos.f32", src[0]);
1437 break;
1438 case nir_op_fsqrt:
1439 result = emit_intrin_1f_param(ctx, "llvm.sqrt.f32", src[0]);
1440 break;
1441 case nir_op_fexp2:
1442 result = emit_intrin_1f_param(ctx, "llvm.exp2.f32", src[0]);
1443 break;
1444 case nir_op_flog2:
1445 result = emit_intrin_1f_param(ctx, "llvm.log2.f32", src[0]);
1446 break;
1447 case nir_op_frsq:
1448 result = emit_intrin_1f_param(ctx, "llvm.sqrt.f32", src[0]);
1449 result = emit_fdiv(ctx, ctx->f32one, result);
1450 break;
1451 case nir_op_fpow:
1452 result = emit_intrin_2f_param(ctx, "llvm.pow.f32", src[0], src[1]);
1453 break;
1454 case nir_op_fmax:
1455 result = emit_intrin_2f_param(ctx, "llvm.maxnum.f32", src[0], src[1]);
1456 break;
1457 case nir_op_fmin:
1458 result = emit_intrin_2f_param(ctx, "llvm.minnum.f32", src[0], src[1]);
1459 break;
1460 case nir_op_ffma:
1461 result = emit_intrin_3f_param(ctx, "llvm.fma.f32", src[0], src[1], src[2]);
1462 break;
1463 case nir_op_ibitfield_extract:
1464 result = emit_bitfield_extract(ctx, "llvm.AMDGPU.bfe.i32", src);
1465 break;
1466 case nir_op_ubitfield_extract:
1467 result = emit_bitfield_extract(ctx, "llvm.AMDGPU.bfe.u32", src);
1468 break;
1469 case nir_op_bitfield_insert:
1470 result = emit_bitfield_insert(ctx, src[0], src[1], src[2], src[3]);
1471 break;
1472 case nir_op_bitfield_reverse:
1473 result = emit_llvm_intrinsic(ctx, "llvm.bitreverse.i32", ctx->i32, src, 1, AC_FUNC_ATTR_READNONE);
1474 break;
1475 case nir_op_bit_count:
1476 result = emit_llvm_intrinsic(ctx, "llvm.ctpop.i32", ctx->i32, src, 1, AC_FUNC_ATTR_READNONE);
1477 break;
1478 case nir_op_vec2:
1479 case nir_op_vec3:
1480 case nir_op_vec4:
1481 for (unsigned i = 0; i < nir_op_infos[instr->op].num_inputs; i++)
1482 src[i] = to_integer(ctx, src[i]);
1483 result = build_gather_values(ctx, src, num_components);
1484 break;
1485 case nir_op_f2i:
1486 src[0] = to_float(ctx, src[0]);
1487 result = LLVMBuildFPToSI(ctx->builder, src[0], ctx->i32, "");
1488 break;
1489 case nir_op_f2u:
1490 src[0] = to_float(ctx, src[0]);
1491 result = LLVMBuildFPToUI(ctx->builder, src[0], ctx->i32, "");
1492 break;
1493 case nir_op_i2f:
1494 result = LLVMBuildSIToFP(ctx->builder, src[0], ctx->f32, "");
1495 break;
1496 case nir_op_u2f:
1497 result = LLVMBuildUIToFP(ctx->builder, src[0], ctx->f32, "");
1498 break;
1499 case nir_op_bcsel:
1500 result = emit_bcsel(ctx, src[0], src[1], src[2]);
1501 break;
1502 case nir_op_find_lsb:
1503 result = emit_find_lsb(ctx, src[0]);
1504 break;
1505 case nir_op_ufind_msb:
1506 result = emit_ufind_msb(ctx, src[0]);
1507 break;
1508 case nir_op_ifind_msb:
1509 result = emit_ifind_msb(ctx, src[0]);
1510 break;
1511 case nir_op_uadd_carry:
1512 result = emit_uint_carry(ctx, "llvm.uadd.with.overflow.i32", src[0], src[1]);
1513 break;
1514 case nir_op_usub_borrow:
1515 result = emit_uint_carry(ctx, "llvm.usub.with.overflow.i32", src[0], src[1]);
1516 break;
1517 case nir_op_b2f:
1518 result = emit_b2f(ctx, src[0]);
1519 break;
1520 case nir_op_fquantize2f16:
1521 src[0] = to_float(ctx, src[0]);
1522 result = LLVMBuildFPTrunc(ctx->builder, src[0], ctx->f16, "");
1523 /* need to convert back up to f32 */
1524 result = LLVMBuildFPExt(ctx->builder, result, ctx->f32, "");
1525 break;
1526 case nir_op_umul_high:
1527 result = emit_umul_high(ctx, src[0], src[1]);
1528 break;
1529 case nir_op_imul_high:
1530 result = emit_imul_high(ctx, src[0], src[1]);
1531 break;
1532 case nir_op_pack_half_2x16:
1533 result = emit_pack_half_2x16(ctx, src[0]);
1534 break;
1535 case nir_op_unpack_half_2x16:
1536 result = emit_unpack_half_2x16(ctx, src[0]);
1537 break;
1538 case nir_op_fddx:
1539 case nir_op_fddy:
1540 case nir_op_fddx_fine:
1541 case nir_op_fddy_fine:
1542 case nir_op_fddx_coarse:
1543 case nir_op_fddy_coarse:
1544 result = emit_ddxy(ctx, instr->op, src[0]);
1545 break;
1546 default:
1547 fprintf(stderr, "Unknown NIR alu instr: ");
1548 nir_print_instr(&instr->instr, stderr);
1549 fprintf(stderr, "\n");
1550 abort();
1551 }
1552
1553 if (result) {
1554 assert(instr->dest.dest.is_ssa);
1555 result = to_integer(ctx, result);
1556 _mesa_hash_table_insert(ctx->defs, &instr->dest.dest.ssa,
1557 result);
1558 }
1559 }
1560
1561 static void visit_load_const(struct nir_to_llvm_context *ctx,
1562 nir_load_const_instr *instr)
1563 {
1564 LLVMValueRef values[4], value = NULL;
1565 LLVMTypeRef element_type =
1566 LLVMIntTypeInContext(ctx->context, instr->def.bit_size);
1567
1568 for (unsigned i = 0; i < instr->def.num_components; ++i) {
1569 switch (instr->def.bit_size) {
1570 case 32:
1571 values[i] = LLVMConstInt(element_type,
1572 instr->value.u32[i], false);
1573 break;
1574 case 64:
1575 values[i] = LLVMConstInt(element_type,
1576 instr->value.u64[i], false);
1577 break;
1578 default:
1579 fprintf(stderr,
1580 "unsupported nir load_const bit_size: %d\n",
1581 instr->def.bit_size);
1582 abort();
1583 }
1584 }
1585 if (instr->def.num_components > 1) {
1586 value = LLVMConstVector(values, instr->def.num_components);
1587 } else
1588 value = values[0];
1589
1590 _mesa_hash_table_insert(ctx->defs, &instr->def, value);
1591 }
1592
1593 static LLVMValueRef cast_ptr(struct nir_to_llvm_context *ctx, LLVMValueRef ptr,
1594 LLVMTypeRef type)
1595 {
1596 int addr_space = LLVMGetPointerAddressSpace(LLVMTypeOf(ptr));
1597 return LLVMBuildBitCast(ctx->builder, ptr,
1598 LLVMPointerType(type, addr_space), "");
1599 }
1600
1601 static LLVMValueRef
1602 emit_llvm_intrinsic(struct nir_to_llvm_context *ctx, const char *name,
1603 LLVMTypeRef return_type, LLVMValueRef *params,
1604 unsigned param_count, unsigned attrib_mask)
1605 {
1606 LLVMValueRef function;
1607
1608 function = LLVMGetNamedFunction(ctx->module, name);
1609 if (!function) {
1610 LLVMTypeRef param_types[32], function_type;
1611 unsigned i;
1612
1613 assert(param_count <= 32);
1614
1615 for (i = 0; i < param_count; ++i) {
1616 assert(params[i]);
1617 param_types[i] = LLVMTypeOf(params[i]);
1618 }
1619 function_type =
1620 LLVMFunctionType(return_type, param_types, param_count, 0);
1621 function = LLVMAddFunction(ctx->module, name, function_type);
1622
1623 LLVMSetFunctionCallConv(function, LLVMCCallConv);
1624 LLVMSetLinkage(function, LLVMExternalLinkage);
1625
1626 attrib_mask |= AC_FUNC_ATTR_NOUNWIND;
1627 while (attrib_mask) {
1628 enum ac_func_attr attr = 1u << u_bit_scan(&attrib_mask);
1629 ac_add_function_attr(function, -1, attr);
1630 }
1631 }
1632 return LLVMBuildCall(ctx->builder, function, params, param_count, "");
1633 }
1634
1635 static LLVMValueRef
1636 get_buffer_size(struct nir_to_llvm_context *ctx, LLVMValueRef descriptor, bool in_elements)
1637 {
1638 LLVMValueRef size =
1639 LLVMBuildExtractElement(ctx->builder, descriptor,
1640 LLVMConstInt(ctx->i32, 2, false), "");
1641
1642 /* VI only */
1643 if (ctx->options->chip_class >= VI && in_elements) {
1644 /* On VI, the descriptor contains the size in bytes,
1645 * but TXQ must return the size in elements.
1646 * The stride is always non-zero for resources using TXQ.
1647 */
1648 LLVMValueRef stride =
1649 LLVMBuildExtractElement(ctx->builder, descriptor,
1650 LLVMConstInt(ctx->i32, 1, false), "");
1651 stride = LLVMBuildLShr(ctx->builder, stride,
1652 LLVMConstInt(ctx->i32, 16, false), "");
1653 stride = LLVMBuildAnd(ctx->builder, stride,
1654 LLVMConstInt(ctx->i32, 0x3fff, false), "");
1655
1656 size = LLVMBuildUDiv(ctx->builder, size, stride, "");
1657 }
1658 return size;
1659 }
1660
1661 /**
1662 * Given the i32 or vNi32 \p type, generate the textual name (e.g. for use with
1663 * intrinsic names).
1664 */
1665 static void build_int_type_name(
1666 LLVMTypeRef type,
1667 char *buf, unsigned bufsize)
1668 {
1669 assert(bufsize >= 6);
1670
1671 if (LLVMGetTypeKind(type) == LLVMVectorTypeKind)
1672 snprintf(buf, bufsize, "v%ui32",
1673 LLVMGetVectorSize(type));
1674 else
1675 strcpy(buf, "i32");
1676 }
1677
1678 static LLVMValueRef radv_lower_gather4_integer(struct nir_to_llvm_context *ctx,
1679 struct ac_tex_info *tinfo,
1680 nir_tex_instr *instr,
1681 const char *intr_name,
1682 unsigned coord_vgpr_index)
1683 {
1684 LLVMValueRef coord = tinfo->args[0];
1685 LLVMValueRef half_texel[2];
1686 int c;
1687
1688 //TODO Rect
1689 {
1690 LLVMValueRef txq_args[10];
1691 int txq_arg_count = 0;
1692 LLVMValueRef size;
1693 bool da = instr->is_array || instr->sampler_dim == GLSL_SAMPLER_DIM_CUBE;
1694 txq_args[txq_arg_count++] = LLVMConstInt(ctx->i32, 0, false);
1695 txq_args[txq_arg_count++] = tinfo->args[1];
1696 txq_args[txq_arg_count++] = LLVMConstInt(ctx->i32, 0xf, 0); /* dmask */
1697 txq_args[txq_arg_count++] = LLVMConstInt(ctx->i32, 0, 0); /* unorm */
1698 txq_args[txq_arg_count++] = LLVMConstInt(ctx->i32, 0, 0); /* r128 */
1699 txq_args[txq_arg_count++] = LLVMConstInt(ctx->i32, da ? 1 : 0, 0);
1700 txq_args[txq_arg_count++] = LLVMConstInt(ctx->i32, 0, 0); /* glc */
1701 txq_args[txq_arg_count++] = LLVMConstInt(ctx->i32, 0, 0); /* slc */
1702 txq_args[txq_arg_count++] = LLVMConstInt(ctx->i32, 0, 0); /* tfe */
1703 txq_args[txq_arg_count++] = LLVMConstInt(ctx->i32, 0, 0); /* lwe */
1704 size = emit_llvm_intrinsic(ctx, "llvm.SI.getresinfo.i32", ctx->v4i32,
1705 txq_args, txq_arg_count,
1706 AC_FUNC_ATTR_READNONE);
1707
1708 for (c = 0; c < 2; c++) {
1709 half_texel[c] = LLVMBuildExtractElement(ctx->builder, size,
1710 LLVMConstInt(ctx->i32, c, false), "");
1711 half_texel[c] = LLVMBuildUIToFP(ctx->builder, half_texel[c], ctx->f32, "");
1712 half_texel[c] = emit_fdiv(ctx, ctx->f32one, half_texel[c]);
1713 half_texel[c] = LLVMBuildFMul(ctx->builder, half_texel[c],
1714 LLVMConstReal(ctx->f32, -0.5), "");
1715 }
1716 }
1717
1718 for (c = 0; c < 2; c++) {
1719 LLVMValueRef tmp;
1720 LLVMValueRef index = LLVMConstInt(ctx->i32, coord_vgpr_index + c, 0);
1721 tmp = LLVMBuildExtractElement(ctx->builder, coord, index, "");
1722 tmp = LLVMBuildBitCast(ctx->builder, tmp, ctx->f32, "");
1723 tmp = LLVMBuildFAdd(ctx->builder, tmp, half_texel[c], "");
1724 tmp = LLVMBuildBitCast(ctx->builder, tmp, ctx->i32, "");
1725 coord = LLVMBuildInsertElement(ctx->builder, coord, tmp, index, "");
1726 }
1727
1728 tinfo->args[0] = coord;
1729 return emit_llvm_intrinsic(ctx, intr_name, tinfo->dst_type, tinfo->args, tinfo->arg_count,
1730 AC_FUNC_ATTR_READNONE | AC_FUNC_ATTR_NOUNWIND);
1731
1732 }
1733
1734 static LLVMValueRef build_tex_intrinsic(struct nir_to_llvm_context *ctx,
1735 nir_tex_instr *instr,
1736 struct ac_tex_info *tinfo)
1737 {
1738 const char *name = "llvm.SI.image.sample";
1739 const char *infix = "";
1740 char intr_name[127];
1741 char type[64];
1742 bool is_shadow = instr->is_shadow;
1743 bool has_offset = tinfo->has_offset;
1744 switch (instr->op) {
1745 case nir_texop_txf:
1746 case nir_texop_txf_ms:
1747 case nir_texop_samples_identical:
1748 name = instr->sampler_dim == GLSL_SAMPLER_DIM_MS ? "llvm.SI.image.load" :
1749 instr->sampler_dim == GLSL_SAMPLER_DIM_BUF ? "llvm.SI.vs.load.input" :
1750 "llvm.SI.image.load.mip";
1751 is_shadow = false;
1752 has_offset = false;
1753 break;
1754 case nir_texop_txb:
1755 infix = ".b";
1756 break;
1757 case nir_texop_txl:
1758 infix = ".l";
1759 break;
1760 case nir_texop_txs:
1761 name = "llvm.SI.getresinfo";
1762 break;
1763 case nir_texop_query_levels:
1764 name = "llvm.SI.getresinfo";
1765 break;
1766 case nir_texop_tex:
1767 if (ctx->stage != MESA_SHADER_FRAGMENT)
1768 infix = ".lz";
1769 break;
1770 case nir_texop_txd:
1771 infix = ".d";
1772 break;
1773 case nir_texop_tg4:
1774 name = "llvm.SI.gather4";
1775 infix = ".lz";
1776 break;
1777 case nir_texop_lod:
1778 name = "llvm.SI.getlod";
1779 is_shadow = false;
1780 has_offset = false;
1781 break;
1782 default:
1783 break;
1784 }
1785
1786 build_int_type_name(LLVMTypeOf(tinfo->args[0]), type, sizeof(type));
1787 sprintf(intr_name, "%s%s%s%s.%s", name, is_shadow ? ".c" : "", infix,
1788 has_offset ? ".o" : "", type);
1789
1790 if (instr->op == nir_texop_tg4) {
1791 enum glsl_base_type stype = glsl_get_sampler_result_type(instr->texture->var->type);
1792 if (stype == GLSL_TYPE_UINT || stype == GLSL_TYPE_INT) {
1793 return radv_lower_gather4_integer(ctx, tinfo, instr, intr_name,
1794 (int)has_offset + (int)is_shadow);
1795 }
1796 }
1797 return emit_llvm_intrinsic(ctx, intr_name, tinfo->dst_type, tinfo->args, tinfo->arg_count,
1798 AC_FUNC_ATTR_READNONE | AC_FUNC_ATTR_NOUNWIND);
1799
1800 }
1801
1802 static LLVMValueRef visit_vulkan_resource_index(struct nir_to_llvm_context *ctx,
1803 nir_intrinsic_instr *instr)
1804 {
1805 LLVMValueRef index = get_src(ctx, instr->src[0]);
1806 unsigned desc_set = nir_intrinsic_desc_set(instr);
1807 unsigned binding = nir_intrinsic_binding(instr);
1808 LLVMValueRef desc_ptr = ctx->descriptor_sets[desc_set];
1809 struct radv_descriptor_set_layout *layout = ctx->options->layout->set[desc_set].layout;
1810 unsigned base_offset = layout->binding[binding].offset;
1811 LLVMValueRef offset, stride;
1812
1813 if (layout->binding[binding].type == VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER_DYNAMIC ||
1814 layout->binding[binding].type == VK_DESCRIPTOR_TYPE_STORAGE_BUFFER_DYNAMIC) {
1815 desc_ptr = ctx->push_constants;
1816 base_offset = ctx->options->layout->push_constant_size;
1817 base_offset += 16 * layout->binding[binding].dynamic_offset_offset;
1818 stride = LLVMConstInt(ctx->i32, 16, false);
1819 } else
1820 stride = LLVMConstInt(ctx->i32, layout->binding[binding].size, false);
1821
1822 offset = LLVMConstInt(ctx->i32, base_offset, false);
1823 index = LLVMBuildMul(ctx->builder, index, stride, "");
1824 offset = LLVMBuildAdd(ctx->builder, offset, index, "");
1825
1826 LLVMValueRef indices[] = {ctx->i32zero, offset};
1827 desc_ptr = LLVMBuildGEP(ctx->builder, desc_ptr, indices, 2, "");
1828 desc_ptr = cast_ptr(ctx, desc_ptr, ctx->v4i32);
1829 LLVMSetMetadata(desc_ptr, ctx->uniform_md_kind, ctx->empty_md);
1830
1831 return LLVMBuildLoad(ctx->builder, desc_ptr, "");
1832 }
1833
1834 static LLVMValueRef visit_load_push_constant(struct nir_to_llvm_context *ctx,
1835 nir_intrinsic_instr *instr)
1836 {
1837 LLVMValueRef ptr;
1838
1839 LLVMValueRef indices[] = {ctx->i32zero, get_src(ctx, instr->src[0])};
1840 ptr = LLVMBuildGEP(ctx->builder, ctx->push_constants, indices, 2, "");
1841 ptr = cast_ptr(ctx, ptr, get_def_type(ctx, &instr->dest.ssa));
1842
1843 return LLVMBuildLoad(ctx->builder, ptr, "");
1844 }
1845
1846 static LLVMValueRef visit_get_buffer_size(struct nir_to_llvm_context *ctx,
1847 nir_intrinsic_instr *instr)
1848 {
1849 LLVMValueRef desc = get_src(ctx, instr->src[0]);
1850
1851 return get_buffer_size(ctx, desc, false);
1852 }
1853 static void visit_store_ssbo(struct nir_to_llvm_context *ctx,
1854 nir_intrinsic_instr *instr)
1855 {
1856 const char *store_name;
1857 LLVMTypeRef data_type = ctx->f32;
1858 unsigned writemask = nir_intrinsic_write_mask(instr);
1859 LLVMValueRef base_data, base_offset;
1860 LLVMValueRef params[6];
1861
1862 if (ctx->stage == MESA_SHADER_FRAGMENT)
1863 ctx->shader_info->fs.writes_memory = true;
1864
1865 params[1] = get_src(ctx, instr->src[1]);
1866 params[2] = LLVMConstInt(ctx->i32, 0, false); /* vindex */
1867 params[4] = LLVMConstInt(ctx->i1, 0, false); /* glc */
1868 params[5] = LLVMConstInt(ctx->i1, 0, false); /* slc */
1869
1870 if (instr->num_components > 1)
1871 data_type = LLVMVectorType(ctx->f32, instr->num_components);
1872
1873 base_data = to_float(ctx, get_src(ctx, instr->src[0]));
1874 base_data = trim_vector(ctx, base_data, instr->num_components);
1875 base_data = LLVMBuildBitCast(ctx->builder, base_data,
1876 data_type, "");
1877 base_offset = get_src(ctx, instr->src[2]); /* voffset */
1878 while (writemask) {
1879 int start, count;
1880 LLVMValueRef data;
1881 LLVMValueRef offset;
1882 LLVMValueRef tmp;
1883 u_bit_scan_consecutive_range(&writemask, &start, &count);
1884
1885 /* Due to an LLVM limitation, split 3-element writes
1886 * into a 2-element and a 1-element write. */
1887 if (count == 3) {
1888 writemask |= 1 << (start + 2);
1889 count = 2;
1890 }
1891
1892 if (count == 4) {
1893 store_name = "llvm.amdgcn.buffer.store.v4f32";
1894 data = base_data;
1895 } else if (count == 2) {
1896 tmp = LLVMBuildExtractElement(ctx->builder,
1897 base_data, LLVMConstInt(ctx->i32, start, false), "");
1898 data = LLVMBuildInsertElement(ctx->builder, LLVMGetUndef(ctx->v2f32), tmp,
1899 ctx->i32zero, "");
1900
1901 tmp = LLVMBuildExtractElement(ctx->builder,
1902 base_data, LLVMConstInt(ctx->i32, start + 1, false), "");
1903 data = LLVMBuildInsertElement(ctx->builder, data, tmp,
1904 ctx->i32one, "");
1905 store_name = "llvm.amdgcn.buffer.store.v2f32";
1906
1907 } else {
1908 assert(count == 1);
1909 if (get_llvm_num_components(base_data) > 1)
1910 data = LLVMBuildExtractElement(ctx->builder, base_data,
1911 LLVMConstInt(ctx->i32, start, false), "");
1912 else
1913 data = base_data;
1914 store_name = "llvm.amdgcn.buffer.store.f32";
1915 }
1916
1917 offset = base_offset;
1918 if (start != 0) {
1919 offset = LLVMBuildAdd(ctx->builder, offset, LLVMConstInt(ctx->i32, start * 4, false), "");
1920 }
1921 params[0] = data;
1922 params[3] = offset;
1923 emit_llvm_intrinsic(ctx, store_name,
1924 LLVMVoidTypeInContext(ctx->context), params, 6, 0);
1925 }
1926 }
1927
1928 static LLVMValueRef visit_atomic_ssbo(struct nir_to_llvm_context *ctx,
1929 nir_intrinsic_instr *instr)
1930 {
1931 const char *name;
1932 LLVMValueRef params[5];
1933 int arg_count = 0;
1934 if (ctx->stage == MESA_SHADER_FRAGMENT)
1935 ctx->shader_info->fs.writes_memory = true;
1936
1937 if (instr->intrinsic == nir_intrinsic_ssbo_atomic_comp_swap) {
1938 params[arg_count++] = get_src(ctx, instr->src[3]);
1939 }
1940 params[arg_count++] = get_src(ctx, instr->src[2]);
1941 params[arg_count++] = get_src(ctx, instr->src[0]);
1942 params[arg_count++] = LLVMConstInt(ctx->i32, 0, false); /* vindex */
1943 params[arg_count++] = get_src(ctx, instr->src[1]); /* voffset */
1944 params[arg_count++] = LLVMConstInt(ctx->i1, 0, false); /* slc */
1945
1946 switch (instr->intrinsic) {
1947 case nir_intrinsic_ssbo_atomic_add:
1948 name = "llvm.amdgcn.buffer.atomic.add";
1949 break;
1950 case nir_intrinsic_ssbo_atomic_imin:
1951 name = "llvm.amdgcn.buffer.atomic.smin";
1952 break;
1953 case nir_intrinsic_ssbo_atomic_umin:
1954 name = "llvm.amdgcn.buffer.atomic.umin";
1955 break;
1956 case nir_intrinsic_ssbo_atomic_imax:
1957 name = "llvm.amdgcn.buffer.atomic.smax";
1958 break;
1959 case nir_intrinsic_ssbo_atomic_umax:
1960 name = "llvm.amdgcn.buffer.atomic.umax";
1961 break;
1962 case nir_intrinsic_ssbo_atomic_and:
1963 name = "llvm.amdgcn.buffer.atomic.and";
1964 break;
1965 case nir_intrinsic_ssbo_atomic_or:
1966 name = "llvm.amdgcn.buffer.atomic.or";
1967 break;
1968 case nir_intrinsic_ssbo_atomic_xor:
1969 name = "llvm.amdgcn.buffer.atomic.xor";
1970 break;
1971 case nir_intrinsic_ssbo_atomic_exchange:
1972 name = "llvm.amdgcn.buffer.atomic.swap";
1973 break;
1974 case nir_intrinsic_ssbo_atomic_comp_swap:
1975 name = "llvm.amdgcn.buffer.atomic.cmpswap";
1976 break;
1977 default:
1978 abort();
1979 }
1980
1981 return emit_llvm_intrinsic(ctx, name, ctx->i32, params, arg_count, 0);
1982 }
1983
1984 static LLVMValueRef visit_load_buffer(struct nir_to_llvm_context *ctx,
1985 nir_intrinsic_instr *instr)
1986 {
1987 const char *load_name;
1988 LLVMTypeRef data_type = ctx->f32;
1989 if (instr->num_components == 3)
1990 data_type = LLVMVectorType(ctx->f32, 4);
1991 else if (instr->num_components > 1)
1992 data_type = LLVMVectorType(ctx->f32, instr->num_components);
1993
1994 if (instr->num_components == 4 || instr->num_components == 3)
1995 load_name = "llvm.amdgcn.buffer.load.v4f32";
1996 else if (instr->num_components == 2)
1997 load_name = "llvm.amdgcn.buffer.load.v2f32";
1998 else if (instr->num_components == 1)
1999 load_name = "llvm.amdgcn.buffer.load.f32";
2000 else
2001 abort();
2002
2003 LLVMValueRef params[] = {
2004 get_src(ctx, instr->src[0]),
2005 LLVMConstInt(ctx->i32, 0, false),
2006 get_src(ctx, instr->src[1]),
2007 LLVMConstInt(ctx->i1, 0, false),
2008 LLVMConstInt(ctx->i1, 0, false),
2009 };
2010
2011 LLVMValueRef ret =
2012 emit_llvm_intrinsic(ctx, load_name, data_type, params, 5, 0);
2013
2014 if (instr->num_components == 3)
2015 ret = trim_vector(ctx, ret, 3);
2016
2017 return LLVMBuildBitCast(ctx->builder, ret,
2018 get_def_type(ctx, &instr->dest.ssa), "");
2019 }
2020
2021 static void
2022 radv_get_deref_offset(struct nir_to_llvm_context *ctx, nir_deref *tail,
2023 bool vs_in, unsigned *const_out, LLVMValueRef *indir_out)
2024 {
2025 unsigned const_offset = 0;
2026 LLVMValueRef offset = NULL;
2027
2028
2029 while (tail->child != NULL) {
2030 const struct glsl_type *parent_type = tail->type;
2031 tail = tail->child;
2032
2033 if (tail->deref_type == nir_deref_type_array) {
2034 nir_deref_array *deref_array = nir_deref_as_array(tail);
2035 LLVMValueRef index, stride, local_offset;
2036 unsigned size = glsl_count_attribute_slots(tail->type, vs_in);
2037
2038 const_offset += size * deref_array->base_offset;
2039 if (deref_array->deref_array_type == nir_deref_array_type_direct)
2040 continue;
2041
2042 assert(deref_array->deref_array_type == nir_deref_array_type_indirect);
2043 index = get_src(ctx, deref_array->indirect);
2044 stride = LLVMConstInt(ctx->i32, size, 0);
2045 local_offset = LLVMBuildMul(ctx->builder, stride, index, "");
2046
2047 if (offset)
2048 offset = LLVMBuildAdd(ctx->builder, offset, local_offset, "");
2049 else
2050 offset = local_offset;
2051 } else if (tail->deref_type == nir_deref_type_struct) {
2052 nir_deref_struct *deref_struct = nir_deref_as_struct(tail);
2053
2054 for (unsigned i = 0; i < deref_struct->index; i++) {
2055 const struct glsl_type *ft = glsl_get_struct_field(parent_type, i);
2056 const_offset += glsl_count_attribute_slots(ft, vs_in);
2057 }
2058 } else
2059 unreachable("unsupported deref type");
2060
2061 }
2062
2063 if (const_offset && offset)
2064 offset = LLVMBuildAdd(ctx->builder, offset,
2065 LLVMConstInt(ctx->i32, const_offset, 0),
2066 "");
2067
2068 *const_out = const_offset;
2069 *indir_out = offset;
2070 }
2071
2072 static LLVMValueRef visit_load_var(struct nir_to_llvm_context *ctx,
2073 nir_intrinsic_instr *instr)
2074 {
2075 LLVMValueRef values[4];
2076 int idx = instr->variables[0]->var->data.driver_location;
2077 int ve = instr->dest.ssa.num_components;
2078 LLVMValueRef indir_index;
2079 unsigned const_index;
2080 switch (instr->variables[0]->var->data.mode) {
2081 case nir_var_shader_in:
2082 radv_get_deref_offset(ctx, &instr->variables[0]->deref,
2083 ctx->stage == MESA_SHADER_VERTEX,
2084 &const_index, &indir_index);
2085 for (unsigned chan = 0; chan < ve; chan++) {
2086 if (indir_index) {
2087 unsigned count = glsl_count_attribute_slots(
2088 instr->variables[0]->var->type,
2089 ctx->stage == MESA_SHADER_VERTEX);
2090 LLVMValueRef tmp_vec = build_gather_values_extended(
2091 ctx, ctx->inputs + idx + chan, count,
2092 4, false);
2093
2094 values[chan] = LLVMBuildExtractElement(ctx->builder,
2095 tmp_vec,
2096 indir_index, "");
2097 } else
2098 values[chan] = ctx->inputs[idx + chan + const_index * 4];
2099 }
2100 return to_integer(ctx, build_gather_values(ctx, values, ve));
2101 break;
2102 case nir_var_local:
2103 radv_get_deref_offset(ctx, &instr->variables[0]->deref, false,
2104 &const_index, &indir_index);
2105 for (unsigned chan = 0; chan < ve; chan++) {
2106 if (indir_index) {
2107 unsigned count = glsl_count_attribute_slots(
2108 instr->variables[0]->var->type, false);
2109 LLVMValueRef tmp_vec = build_gather_values_extended(
2110 ctx, ctx->locals + idx + chan, count,
2111 4, true);
2112
2113 values[chan] = LLVMBuildExtractElement(ctx->builder,
2114 tmp_vec,
2115 indir_index, "");
2116 } else {
2117 values[chan] = LLVMBuildLoad(ctx->builder, ctx->locals[idx + chan + const_index * 4], "");
2118 }
2119 }
2120 return to_integer(ctx, build_gather_values(ctx, values, ve));
2121 case nir_var_shader_out:
2122 radv_get_deref_offset(ctx, &instr->variables[0]->deref, false,
2123 &const_index, &indir_index);
2124 for (unsigned chan = 0; chan < ve; chan++) {
2125 if (indir_index) {
2126 unsigned count = glsl_count_attribute_slots(
2127 instr->variables[0]->var->type, false);
2128 LLVMValueRef tmp_vec = build_gather_values_extended(
2129 ctx, ctx->outputs + idx + chan, count,
2130 4, true);
2131
2132 values[chan] = LLVMBuildExtractElement(ctx->builder,
2133 tmp_vec,
2134 indir_index, "");
2135 } else {
2136 values[chan] = LLVMBuildLoad(ctx->builder,
2137 ctx->outputs[idx + chan + const_index * 4],
2138 "");
2139 }
2140 }
2141 return to_integer(ctx, build_gather_values(ctx, values, ve));
2142 case nir_var_shared: {
2143 radv_get_deref_offset(ctx, &instr->variables[0]->deref, false,
2144 &const_index, &indir_index);
2145 LLVMValueRef ptr = get_shared_memory_ptr(ctx, idx, ctx->i32);
2146 LLVMValueRef derived_ptr;
2147 LLVMValueRef index = ctx->i32zero;
2148 if (indir_index)
2149 index = LLVMBuildAdd(ctx->builder, index, indir_index, "");
2150 derived_ptr = LLVMBuildGEP(ctx->builder, ptr, &index, 1, "");
2151
2152 return to_integer(ctx, LLVMBuildLoad(ctx->builder, derived_ptr, ""));
2153 break;
2154 }
2155 default:
2156 break;
2157 }
2158 return NULL;
2159 }
2160
2161 static void
2162 visit_store_var(struct nir_to_llvm_context *ctx,
2163 nir_intrinsic_instr *instr)
2164 {
2165 LLVMValueRef temp_ptr, value;
2166 int idx = instr->variables[0]->var->data.driver_location;
2167 LLVMValueRef src = to_float(ctx, get_src(ctx, instr->src[0]));
2168 int writemask = instr->const_index[0];
2169 LLVMValueRef indir_index;
2170 unsigned const_index;
2171 switch (instr->variables[0]->var->data.mode) {
2172 case nir_var_shader_out:
2173 radv_get_deref_offset(ctx, &instr->variables[0]->deref, false,
2174 &const_index, &indir_index);
2175 for (unsigned chan = 0; chan < 4; chan++) {
2176 int stride = 4;
2177 if (!(writemask & (1 << chan)))
2178 continue;
2179 if (get_llvm_num_components(src) == 1)
2180 value = src;
2181 else
2182 value = LLVMBuildExtractElement(ctx->builder, src,
2183 LLVMConstInt(ctx->i32,
2184 chan, false),
2185 "");
2186
2187 if (instr->variables[0]->var->data.location == VARYING_SLOT_CLIP_DIST0 ||
2188 instr->variables[0]->var->data.location == VARYING_SLOT_CULL_DIST0)
2189 stride = 1;
2190 if (indir_index) {
2191 unsigned count = glsl_count_attribute_slots(
2192 instr->variables[0]->var->type, false);
2193 LLVMValueRef tmp_vec = build_gather_values_extended(
2194 ctx, ctx->outputs + idx + chan, count,
2195 stride, true);
2196
2197 if (get_llvm_num_components(tmp_vec) > 1) {
2198 tmp_vec = LLVMBuildInsertElement(ctx->builder, tmp_vec,
2199 value, indir_index, "");
2200 } else
2201 tmp_vec = value;
2202 build_store_values_extended(ctx, ctx->outputs + idx + chan,
2203 count, stride, tmp_vec);
2204
2205 } else {
2206 temp_ptr = ctx->outputs[idx + chan + const_index * stride];
2207
2208 LLVMBuildStore(ctx->builder, value, temp_ptr);
2209 }
2210 }
2211 break;
2212 case nir_var_local:
2213 radv_get_deref_offset(ctx, &instr->variables[0]->deref, false,
2214 &const_index, &indir_index);
2215 for (unsigned chan = 0; chan < 4; chan++) {
2216 if (!(writemask & (1 << chan)))
2217 continue;
2218
2219 if (get_llvm_num_components(src) == 1)
2220 value = src;
2221 else
2222 value = LLVMBuildExtractElement(ctx->builder, src,
2223 LLVMConstInt(ctx->i32, chan, false), "");
2224 if (indir_index) {
2225 unsigned count = glsl_count_attribute_slots(
2226 instr->variables[0]->var->type, false);
2227 LLVMValueRef tmp_vec = build_gather_values_extended(
2228 ctx, ctx->locals + idx + chan, count,
2229 4, true);
2230
2231 tmp_vec = LLVMBuildInsertElement(ctx->builder, tmp_vec,
2232 value, indir_index, "");
2233 build_store_values_extended(ctx, ctx->locals + idx + chan,
2234 count, 4, tmp_vec);
2235 } else {
2236 temp_ptr = ctx->locals[idx + chan + const_index * 4];
2237
2238 LLVMBuildStore(ctx->builder, value, temp_ptr);
2239 }
2240 }
2241 break;
2242 case nir_var_shared: {
2243 LLVMValueRef ptr;
2244 radv_get_deref_offset(ctx, &instr->variables[0]->deref, false,
2245 &const_index, &indir_index);
2246
2247 ptr = get_shared_memory_ptr(ctx, idx, ctx->i32);
2248 LLVMValueRef index = ctx->i32zero;
2249 LLVMValueRef derived_ptr;
2250
2251 if (indir_index)
2252 index = LLVMBuildAdd(ctx->builder, index, indir_index, "");
2253 derived_ptr = LLVMBuildGEP(ctx->builder, ptr, &index, 1, "");
2254 LLVMBuildStore(ctx->builder,
2255 to_integer(ctx, src), derived_ptr);
2256 break;
2257 }
2258 default:
2259 break;
2260 }
2261 }
2262
2263 static int image_type_to_components_count(enum glsl_sampler_dim dim, bool array)
2264 {
2265 switch (dim) {
2266 case GLSL_SAMPLER_DIM_BUF:
2267 return 1;
2268 case GLSL_SAMPLER_DIM_1D:
2269 return array ? 2 : 1;
2270 case GLSL_SAMPLER_DIM_2D:
2271 return array ? 3 : 2;
2272 case GLSL_SAMPLER_DIM_3D:
2273 case GLSL_SAMPLER_DIM_CUBE:
2274 return 3;
2275 case GLSL_SAMPLER_DIM_RECT:
2276 case GLSL_SAMPLER_DIM_SUBPASS:
2277 return 2;
2278 default:
2279 break;
2280 }
2281 return 0;
2282 }
2283
2284 static LLVMValueRef get_image_coords(struct nir_to_llvm_context *ctx,
2285 nir_intrinsic_instr *instr, bool add_frag_pos)
2286 {
2287 const struct glsl_type *type = instr->variables[0]->var->type;
2288 if(instr->variables[0]->deref.child)
2289 type = instr->variables[0]->deref.child->type;
2290
2291 LLVMValueRef src0 = get_src(ctx, instr->src[0]);
2292 LLVMValueRef coords[4];
2293 LLVMValueRef masks[] = {
2294 LLVMConstInt(ctx->i32, 0, false), LLVMConstInt(ctx->i32, 1, false),
2295 LLVMConstInt(ctx->i32, 2, false), LLVMConstInt(ctx->i32, 3, false),
2296 };
2297 LLVMValueRef res;
2298 int count;
2299 count = image_type_to_components_count(glsl_get_sampler_dim(type),
2300 glsl_sampler_type_is_array(type));
2301
2302 if (count == 1) {
2303 if (instr->src[0].ssa->num_components)
2304 res = LLVMBuildExtractElement(ctx->builder, src0, masks[0], "");
2305 else
2306 res = src0;
2307 } else {
2308 int chan;
2309 for (chan = 0; chan < count; ++chan) {
2310 coords[chan] = LLVMBuildExtractElement(ctx->builder, src0, masks[chan], "");
2311 }
2312
2313 if (add_frag_pos) {
2314 for (chan = 0; chan < count; ++chan)
2315 coords[chan] = LLVMBuildAdd(ctx->builder, coords[chan], LLVMBuildFPToUI(ctx->builder, ctx->frag_pos[chan], ctx->i32, ""), "");
2316 }
2317 if (count == 3) {
2318 coords[3] = LLVMGetUndef(ctx->i32);
2319 count = 4;
2320 }
2321 res = build_gather_values(ctx, coords, count);
2322 }
2323 return res;
2324 }
2325
2326 static void build_type_name_for_intr(
2327 LLVMTypeRef type,
2328 char *buf, unsigned bufsize)
2329 {
2330 LLVMTypeRef elem_type = type;
2331
2332 assert(bufsize >= 8);
2333
2334 if (LLVMGetTypeKind(type) == LLVMVectorTypeKind) {
2335 int ret = snprintf(buf, bufsize, "v%u",
2336 LLVMGetVectorSize(type));
2337 if (ret < 0) {
2338 char *type_name = LLVMPrintTypeToString(type);
2339 fprintf(stderr, "Error building type name for: %s\n",
2340 type_name);
2341 return;
2342 }
2343 elem_type = LLVMGetElementType(type);
2344 buf += ret;
2345 bufsize -= ret;
2346 }
2347 switch (LLVMGetTypeKind(elem_type)) {
2348 default: break;
2349 case LLVMIntegerTypeKind:
2350 snprintf(buf, bufsize, "i%d", LLVMGetIntTypeWidth(elem_type));
2351 break;
2352 case LLVMFloatTypeKind:
2353 snprintf(buf, bufsize, "f32");
2354 break;
2355 case LLVMDoubleTypeKind:
2356 snprintf(buf, bufsize, "f64");
2357 break;
2358 }
2359 }
2360
2361 static void get_image_intr_name(const char *base_name,
2362 LLVMTypeRef data_type,
2363 LLVMTypeRef coords_type,
2364 LLVMTypeRef rsrc_type,
2365 char *out_name, unsigned out_len)
2366 {
2367 char coords_type_name[8];
2368
2369 build_type_name_for_intr(coords_type, coords_type_name,
2370 sizeof(coords_type_name));
2371
2372 if (HAVE_LLVM <= 0x0309) {
2373 snprintf(out_name, out_len, "%s.%s", base_name, coords_type_name);
2374 } else {
2375 char data_type_name[8];
2376 char rsrc_type_name[8];
2377
2378 build_type_name_for_intr(data_type, data_type_name,
2379 sizeof(data_type_name));
2380 build_type_name_for_intr(rsrc_type, rsrc_type_name,
2381 sizeof(rsrc_type_name));
2382 snprintf(out_name, out_len, "%s.%s.%s.%s", base_name,
2383 data_type_name, coords_type_name, rsrc_type_name);
2384 }
2385 }
2386
2387 static LLVMValueRef visit_image_load(struct nir_to_llvm_context *ctx,
2388 nir_intrinsic_instr *instr)
2389 {
2390 LLVMValueRef params[7];
2391 LLVMValueRef res;
2392 char intrinsic_name[64];
2393 const nir_variable *var = instr->variables[0]->var;
2394 const struct glsl_type *type = var->type;
2395 if(instr->variables[0]->deref.child)
2396 type = instr->variables[0]->deref.child->type;
2397
2398 type = glsl_without_array(type);
2399 if (glsl_get_sampler_dim(type) == GLSL_SAMPLER_DIM_BUF) {
2400 params[0] = get_sampler_desc(ctx, instr->variables[0], DESC_BUFFER);
2401 params[1] = LLVMBuildExtractElement(ctx->builder, get_src(ctx, instr->src[0]),
2402 LLVMConstInt(ctx->i32, 0, false), ""); /* vindex */
2403 params[2] = LLVMConstInt(ctx->i32, 0, false); /* voffset */
2404 params[3] = LLVMConstInt(ctx->i1, 0, false); /* glc */
2405 params[4] = LLVMConstInt(ctx->i1, 0, false); /* slc */
2406 res = emit_llvm_intrinsic(ctx, "llvm.amdgcn.buffer.load.format.v4f32", ctx->v4f32,
2407 params, 5, 0);
2408
2409 res = trim_vector(ctx, res, instr->dest.ssa.num_components);
2410 res = to_integer(ctx, res);
2411 } else {
2412 bool is_da = glsl_sampler_type_is_array(type) ||
2413 glsl_get_sampler_dim(type) == GLSL_SAMPLER_DIM_CUBE;
2414 bool add_frag_pos = glsl_get_sampler_dim(type) == GLSL_SAMPLER_DIM_SUBPASS;
2415 LLVMValueRef da = is_da ? ctx->i32one : ctx->i32zero;
2416 LLVMValueRef glc = LLVMConstInt(ctx->i1, 0, false);
2417 LLVMValueRef slc = LLVMConstInt(ctx->i1, 0, false);
2418
2419 params[0] = get_image_coords(ctx, instr, add_frag_pos);
2420 params[1] = get_sampler_desc(ctx, instr->variables[0], DESC_IMAGE);
2421 params[2] = LLVMConstInt(ctx->i32, 15, false); /* dmask */
2422 if (HAVE_LLVM <= 0x0309) {
2423 params[3] = LLVMConstInt(ctx->i1, 0, false); /* r128 */
2424 params[4] = da;
2425 params[5] = glc;
2426 params[6] = slc;
2427 } else {
2428 LLVMValueRef lwe = LLVMConstInt(ctx->i1, 0, false);
2429 params[3] = glc;
2430 params[4] = slc;
2431 params[5] = lwe;
2432 params[6] = da;
2433 }
2434
2435 get_image_intr_name("llvm.amdgcn.image.load",
2436 ctx->v4f32, /* vdata */
2437 LLVMTypeOf(params[0]), /* coords */
2438 LLVMTypeOf(params[1]), /* rsrc */
2439 intrinsic_name, sizeof(intrinsic_name));
2440
2441 res = emit_llvm_intrinsic(ctx, intrinsic_name, ctx->v4f32,
2442 params, 7, AC_FUNC_ATTR_READONLY);
2443 }
2444 return to_integer(ctx, res);
2445 }
2446
2447 static void visit_image_store(struct nir_to_llvm_context *ctx,
2448 nir_intrinsic_instr *instr)
2449 {
2450 LLVMValueRef params[8];
2451 char intrinsic_name[64];
2452 const nir_variable *var = instr->variables[0]->var;
2453 LLVMValueRef i1false = LLVMConstInt(ctx->i1, 0, 0);
2454 LLVMValueRef i1true = LLVMConstInt(ctx->i1, 1, 0);
2455 const struct glsl_type *type = glsl_without_array(var->type);
2456
2457 if (ctx->stage == MESA_SHADER_FRAGMENT)
2458 ctx->shader_info->fs.writes_memory = true;
2459
2460 if (glsl_get_sampler_dim(type) == GLSL_SAMPLER_DIM_BUF) {
2461 params[0] = to_float(ctx, get_src(ctx, instr->src[2])); /* data */
2462 params[1] = get_sampler_desc(ctx, instr->variables[0], DESC_BUFFER);
2463 params[2] = LLVMBuildExtractElement(ctx->builder, get_src(ctx, instr->src[0]),
2464 LLVMConstInt(ctx->i32, 0, false), ""); /* vindex */
2465 params[3] = LLVMConstInt(ctx->i32, 0, false); /* voffset */
2466 params[4] = i1false; /* glc */
2467 params[5] = i1false; /* slc */
2468 emit_llvm_intrinsic(ctx, "llvm.amdgcn.buffer.store.format.v4f32", ctx->voidt,
2469 params, 6, 0);
2470 } else {
2471 bool is_da = glsl_sampler_type_is_array(type) ||
2472 glsl_get_sampler_dim(type) == GLSL_SAMPLER_DIM_CUBE;
2473 LLVMValueRef da = is_da ? i1true : i1false;
2474 LLVMValueRef glc = i1false;
2475 LLVMValueRef slc = i1false;
2476
2477 params[0] = to_float(ctx, get_src(ctx, instr->src[2]));
2478 params[1] = get_image_coords(ctx, instr, false); /* coords */
2479 params[2] = get_sampler_desc(ctx, instr->variables[0], DESC_IMAGE);
2480 params[3] = LLVMConstInt(ctx->i32, 15, false); /* dmask */
2481 if (HAVE_LLVM <= 0x0309) {
2482 params[4] = i1false; /* r128 */
2483 params[5] = da;
2484 params[6] = glc;
2485 params[7] = slc;
2486 } else {
2487 LLVMValueRef lwe = i1false;
2488 params[4] = glc;
2489 params[5] = slc;
2490 params[6] = lwe;
2491 params[7] = da;
2492 }
2493
2494 get_image_intr_name("llvm.amdgcn.image.store",
2495 LLVMTypeOf(params[0]), /* vdata */
2496 LLVMTypeOf(params[1]), /* coords */
2497 LLVMTypeOf(params[2]), /* rsrc */
2498 intrinsic_name, sizeof(intrinsic_name));
2499
2500 emit_llvm_intrinsic(ctx, intrinsic_name, ctx->voidt,
2501 params, 8, 0);
2502 }
2503
2504 }
2505
2506 static LLVMValueRef visit_image_atomic(struct nir_to_llvm_context *ctx,
2507 nir_intrinsic_instr *instr)
2508 {
2509 LLVMValueRef params[6];
2510 int param_count = 0;
2511 const nir_variable *var = instr->variables[0]->var;
2512 LLVMValueRef i1false = LLVMConstInt(ctx->i1, 0, 0);
2513 LLVMValueRef i1true = LLVMConstInt(ctx->i1, 1, 0);
2514 const char *base_name = "llvm.amdgcn.image.atomic";
2515 const char *atomic_name;
2516 LLVMValueRef coords;
2517 char intrinsic_name[32], coords_type[8];
2518 const struct glsl_type *type = glsl_without_array(var->type);
2519
2520 if (ctx->stage == MESA_SHADER_FRAGMENT)
2521 ctx->shader_info->fs.writes_memory = true;
2522
2523 params[param_count++] = get_src(ctx, instr->src[2]);
2524 if (instr->intrinsic == nir_intrinsic_image_atomic_comp_swap)
2525 params[param_count++] = get_src(ctx, instr->src[3]);
2526
2527 if (glsl_get_sampler_dim(type) == GLSL_SAMPLER_DIM_BUF) {
2528 params[param_count++] = get_sampler_desc(ctx, instr->variables[0], DESC_BUFFER);
2529 coords = params[param_count++] = LLVMBuildExtractElement(ctx->builder, get_src(ctx, instr->src[0]),
2530 LLVMConstInt(ctx->i32, 0, false), ""); /* vindex */
2531 params[param_count++] = ctx->i32zero; /* voffset */
2532 params[param_count++] = i1false; /* glc */
2533 params[param_count++] = i1false; /* slc */
2534 } else {
2535 bool da = glsl_sampler_type_is_array(type) ||
2536 glsl_get_sampler_dim(type) == GLSL_SAMPLER_DIM_CUBE;
2537
2538 coords = params[param_count++] = get_image_coords(ctx, instr, false);
2539 params[param_count++] = get_sampler_desc(ctx, instr->variables[0], DESC_IMAGE);
2540 params[param_count++] = i1false; /* r128 */
2541 params[param_count++] = da ? i1true : i1false; /* da */
2542 params[param_count++] = i1false; /* slc */
2543 }
2544
2545 switch (instr->intrinsic) {
2546 case nir_intrinsic_image_atomic_add:
2547 atomic_name = "add";
2548 break;
2549 case nir_intrinsic_image_atomic_min:
2550 atomic_name = "smin";
2551 break;
2552 case nir_intrinsic_image_atomic_max:
2553 atomic_name = "smax";
2554 break;
2555 case nir_intrinsic_image_atomic_and:
2556 atomic_name = "and";
2557 break;
2558 case nir_intrinsic_image_atomic_or:
2559 atomic_name = "or";
2560 break;
2561 case nir_intrinsic_image_atomic_xor:
2562 atomic_name = "xor";
2563 break;
2564 case nir_intrinsic_image_atomic_exchange:
2565 atomic_name = "swap";
2566 break;
2567 case nir_intrinsic_image_atomic_comp_swap:
2568 atomic_name = "cmpswap";
2569 break;
2570 default:
2571 abort();
2572 }
2573 build_int_type_name(LLVMTypeOf(coords),
2574 coords_type, sizeof(coords_type));
2575
2576 snprintf(intrinsic_name, sizeof(intrinsic_name),
2577 "%s.%s.%s", base_name, atomic_name, coords_type);
2578 return emit_llvm_intrinsic(ctx, intrinsic_name, ctx->i32, params, param_count, 0);
2579 }
2580
2581 static LLVMValueRef visit_image_size(struct nir_to_llvm_context *ctx,
2582 nir_intrinsic_instr *instr)
2583 {
2584 LLVMValueRef res;
2585 LLVMValueRef params[10];
2586 const nir_variable *var = instr->variables[0]->var;
2587 const struct glsl_type *type = instr->variables[0]->var->type;
2588 bool da = glsl_sampler_type_is_array(var->type) ||
2589 glsl_get_sampler_dim(var->type) == GLSL_SAMPLER_DIM_CUBE;
2590 if(instr->variables[0]->deref.child)
2591 type = instr->variables[0]->deref.child->type;
2592
2593 if (glsl_get_sampler_dim(type) == GLSL_SAMPLER_DIM_BUF)
2594 return get_buffer_size(ctx, get_sampler_desc(ctx, instr->variables[0], DESC_BUFFER), true);
2595 params[0] = ctx->i32zero;
2596 params[1] = get_sampler_desc(ctx, instr->variables[0], DESC_IMAGE);
2597 params[2] = LLVMConstInt(ctx->i32, 15, false);
2598 params[3] = ctx->i32zero;
2599 params[4] = ctx->i32zero;
2600 params[5] = da ? ctx->i32one : ctx->i32zero;
2601 params[6] = ctx->i32zero;
2602 params[7] = ctx->i32zero;
2603 params[8] = ctx->i32zero;
2604 params[9] = ctx->i32zero;
2605
2606 res = emit_llvm_intrinsic(ctx, "llvm.SI.getresinfo.i32", ctx->v4i32,
2607 params, 10, AC_FUNC_ATTR_READNONE);
2608
2609 if (glsl_get_sampler_dim(type) == GLSL_SAMPLER_DIM_CUBE &&
2610 glsl_sampler_type_is_array(type)) {
2611 LLVMValueRef two = LLVMConstInt(ctx->i32, 2, false);
2612 LLVMValueRef six = LLVMConstInt(ctx->i32, 6, false);
2613 LLVMValueRef z = LLVMBuildExtractElement(ctx->builder, res, two, "");
2614 z = LLVMBuildSDiv(ctx->builder, z, six, "");
2615 res = LLVMBuildInsertElement(ctx->builder, res, z, two, "");
2616 }
2617 return res;
2618 }
2619
2620 static void emit_waitcnt(struct nir_to_llvm_context *ctx)
2621 {
2622 LLVMValueRef args[1] = {
2623 LLVMConstInt(ctx->i32, 0xf70, false),
2624 };
2625 emit_llvm_intrinsic(ctx, "llvm.amdgcn.s.waitcnt",
2626 ctx->voidt, args, 1, 0);
2627 }
2628
2629 static void emit_barrier(struct nir_to_llvm_context *ctx)
2630 {
2631 // TODO tess
2632 emit_llvm_intrinsic(ctx, "llvm.amdgcn.s.barrier",
2633 ctx->voidt, NULL, 0, 0);
2634 }
2635
2636 static void emit_discard_if(struct nir_to_llvm_context *ctx,
2637 nir_intrinsic_instr *instr)
2638 {
2639 LLVMValueRef cond;
2640 ctx->shader_info->fs.can_discard = true;
2641
2642 cond = LLVMBuildICmp(ctx->builder, LLVMIntNE,
2643 get_src(ctx, instr->src[0]),
2644 ctx->i32zero, "");
2645
2646 cond = LLVMBuildSelect(ctx->builder, cond,
2647 LLVMConstReal(ctx->f32, -1.0f),
2648 ctx->f32zero, "");
2649 emit_llvm_intrinsic(ctx, "llvm.AMDGPU.kill",
2650 LLVMVoidTypeInContext(ctx->context),
2651 &cond, 1, 0);
2652 }
2653
2654 static LLVMValueRef
2655 visit_load_local_invocation_index(struct nir_to_llvm_context *ctx)
2656 {
2657 LLVMValueRef result;
2658 LLVMValueRef thread_id = get_thread_id(ctx);
2659 result = LLVMBuildAnd(ctx->builder, ctx->tg_size,
2660 LLVMConstInt(ctx->i32, 0xfc0, false), "");
2661
2662 return LLVMBuildAdd(ctx->builder, result, thread_id, "");
2663 }
2664
2665 static LLVMValueRef visit_var_atomic(struct nir_to_llvm_context *ctx,
2666 nir_intrinsic_instr *instr)
2667 {
2668 LLVMValueRef ptr, result;
2669 int idx = instr->variables[0]->var->data.driver_location;
2670 LLVMValueRef src = get_src(ctx, instr->src[0]);
2671 ptr = get_shared_memory_ptr(ctx, idx, ctx->i32);
2672
2673 if (instr->intrinsic == nir_intrinsic_var_atomic_comp_swap) {
2674 LLVMValueRef src1 = get_src(ctx, instr->src[1]);
2675 result = LLVMBuildAtomicCmpXchg(ctx->builder,
2676 ptr, src, src1,
2677 LLVMAtomicOrderingSequentiallyConsistent,
2678 LLVMAtomicOrderingSequentiallyConsistent,
2679 false);
2680 } else {
2681 LLVMAtomicRMWBinOp op;
2682 switch (instr->intrinsic) {
2683 case nir_intrinsic_var_atomic_add:
2684 op = LLVMAtomicRMWBinOpAdd;
2685 break;
2686 case nir_intrinsic_var_atomic_umin:
2687 op = LLVMAtomicRMWBinOpUMin;
2688 break;
2689 case nir_intrinsic_var_atomic_umax:
2690 op = LLVMAtomicRMWBinOpUMax;
2691 break;
2692 case nir_intrinsic_var_atomic_imin:
2693 op = LLVMAtomicRMWBinOpMin;
2694 break;
2695 case nir_intrinsic_var_atomic_imax:
2696 op = LLVMAtomicRMWBinOpMax;
2697 break;
2698 case nir_intrinsic_var_atomic_and:
2699 op = LLVMAtomicRMWBinOpAnd;
2700 break;
2701 case nir_intrinsic_var_atomic_or:
2702 op = LLVMAtomicRMWBinOpOr;
2703 break;
2704 case nir_intrinsic_var_atomic_xor:
2705 op = LLVMAtomicRMWBinOpXor;
2706 break;
2707 case nir_intrinsic_var_atomic_exchange:
2708 op = LLVMAtomicRMWBinOpXchg;
2709 break;
2710 default:
2711 return NULL;
2712 }
2713
2714 result = LLVMBuildAtomicRMW(ctx->builder, op, ptr, to_integer(ctx, src),
2715 LLVMAtomicOrderingSequentiallyConsistent,
2716 false);
2717 }
2718 return result;
2719 }
2720
2721 #define INTERP_CENTER 0
2722 #define INTERP_CENTROID 1
2723 #define INTERP_SAMPLE 2
2724
2725 static LLVMValueRef lookup_interp_param(struct nir_to_llvm_context *ctx,
2726 enum glsl_interp_mode interp, unsigned location)
2727 {
2728 switch (interp) {
2729 case INTERP_MODE_FLAT:
2730 default:
2731 return NULL;
2732 case INTERP_MODE_SMOOTH:
2733 case INTERP_MODE_NONE:
2734 if (location == INTERP_CENTER)
2735 return ctx->persp_center;
2736 else if (location == INTERP_CENTROID)
2737 return ctx->persp_centroid;
2738 else if (location == INTERP_SAMPLE)
2739 return ctx->persp_sample;
2740 break;
2741 case INTERP_MODE_NOPERSPECTIVE:
2742 if (location == INTERP_CENTER)
2743 return ctx->linear_center;
2744 else if (location == INTERP_CENTROID)
2745 return ctx->linear_centroid;
2746 else if (location == INTERP_SAMPLE)
2747 return ctx->linear_sample;
2748 break;
2749 }
2750 return NULL;
2751 }
2752
2753 static LLVMValueRef load_sample_position(struct nir_to_llvm_context *ctx,
2754 LLVMValueRef sample_id)
2755 {
2756 /* offset = sample_id * 8 (8 = 2 floats containing samplepos.xy) */
2757 LLVMValueRef offset0 = LLVMBuildMul(ctx->builder, sample_id, LLVMConstInt(ctx->i32, 8, false), "");
2758 LLVMValueRef offset1 = LLVMBuildAdd(ctx->builder, offset0, LLVMConstInt(ctx->i32, 4, false), "");
2759 LLVMValueRef result[2];
2760
2761 result[0] = build_indexed_load_const(ctx, ctx->sample_positions, offset0);
2762 result[1] = build_indexed_load_const(ctx, ctx->sample_positions, offset1);
2763
2764 return build_gather_values(ctx, result, 2);
2765 }
2766
2767 static LLVMValueRef load_sample_pos(struct nir_to_llvm_context *ctx)
2768 {
2769 LLVMValueRef values[2];
2770
2771 values[0] = emit_ffract(ctx, ctx->frag_pos[0]);
2772 values[1] = emit_ffract(ctx, ctx->frag_pos[1]);
2773 return build_gather_values(ctx, values, 2);
2774 }
2775
2776 static LLVMValueRef visit_interp(struct nir_to_llvm_context *ctx,
2777 nir_intrinsic_instr *instr)
2778 {
2779 LLVMValueRef result[2];
2780 LLVMValueRef interp_param, attr_number;
2781 unsigned location;
2782 unsigned chan;
2783 LLVMValueRef src_c0, src_c1;
2784 const char *intr_name;
2785 LLVMValueRef src0;
2786 int input_index = instr->variables[0]->var->data.location - VARYING_SLOT_VAR0;
2787 switch (instr->intrinsic) {
2788 case nir_intrinsic_interp_var_at_centroid:
2789 location = INTERP_CENTROID;
2790 break;
2791 case nir_intrinsic_interp_var_at_sample:
2792 case nir_intrinsic_interp_var_at_offset:
2793 location = INTERP_SAMPLE;
2794 src0 = get_src(ctx, instr->src[0]);
2795 break;
2796 default:
2797 break;
2798 }
2799
2800 if (instr->intrinsic == nir_intrinsic_interp_var_at_offset) {
2801 src_c0 = to_float(ctx, LLVMBuildExtractElement(ctx->builder, src0, ctx->i32zero, ""));
2802 src_c1 = to_float(ctx, LLVMBuildExtractElement(ctx->builder, src0, ctx->i32one, ""));
2803 } else if (instr->intrinsic == nir_intrinsic_interp_var_at_sample) {
2804 LLVMValueRef sample_position;
2805 LLVMValueRef halfval = LLVMConstReal(ctx->f32, 0.5f);
2806
2807 /* fetch sample ID */
2808 sample_position = load_sample_position(ctx, src0);
2809
2810 src_c0 = LLVMBuildExtractElement(ctx->builder, sample_position, ctx->i32zero, "");
2811 src_c0 = LLVMBuildFSub(ctx->builder, src_c0, halfval, "");
2812 src_c1 = LLVMBuildExtractElement(ctx->builder, sample_position, ctx->i32one, "");
2813 src_c1 = LLVMBuildFSub(ctx->builder, src_c1, halfval, "");
2814 }
2815 interp_param = lookup_interp_param(ctx, instr->variables[0]->var->data.interpolation, location);
2816 attr_number = LLVMConstInt(ctx->i32, input_index, false);
2817
2818 if (location == INTERP_SAMPLE) {
2819 LLVMValueRef ij_out[2];
2820 LLVMValueRef ddxy_out = emit_ddxy_interp(ctx, interp_param);
2821
2822 /*
2823 * take the I then J parameters, and the DDX/Y for it, and
2824 * calculate the IJ inputs for the interpolator.
2825 * temp1 = ddx * offset/sample.x + I;
2826 * interp_param.I = ddy * offset/sample.y + temp1;
2827 * temp1 = ddx * offset/sample.x + J;
2828 * interp_param.J = ddy * offset/sample.y + temp1;
2829 */
2830 for (unsigned i = 0; i < 2; i++) {
2831 LLVMValueRef ix_ll = LLVMConstInt(ctx->i32, i, false);
2832 LLVMValueRef iy_ll = LLVMConstInt(ctx->i32, i + 2, false);
2833 LLVMValueRef ddx_el = LLVMBuildExtractElement(ctx->builder,
2834 ddxy_out, ix_ll, "");
2835 LLVMValueRef ddy_el = LLVMBuildExtractElement(ctx->builder,
2836 ddxy_out, iy_ll, "");
2837 LLVMValueRef interp_el = LLVMBuildExtractElement(ctx->builder,
2838 interp_param, ix_ll, "");
2839 LLVMValueRef temp1, temp2;
2840
2841 interp_el = LLVMBuildBitCast(ctx->builder, interp_el,
2842 ctx->f32, "");
2843
2844 temp1 = LLVMBuildFMul(ctx->builder, ddx_el, src_c0, "");
2845 temp1 = LLVMBuildFAdd(ctx->builder, temp1, interp_el, "");
2846
2847 temp2 = LLVMBuildFMul(ctx->builder, ddy_el, src_c1, "");
2848 temp2 = LLVMBuildFAdd(ctx->builder, temp2, temp1, "");
2849
2850 ij_out[i] = LLVMBuildBitCast(ctx->builder,
2851 temp2, ctx->i32, "");
2852 }
2853 interp_param = build_gather_values(ctx, ij_out, 2);
2854
2855 }
2856 intr_name = interp_param ? "llvm.SI.fs.interp" : "llvm.SI.fs.constant";
2857 for (chan = 0; chan < 2; chan++) {
2858 LLVMValueRef args[4];
2859 LLVMValueRef llvm_chan = LLVMConstInt(ctx->i32, chan, false);
2860
2861 args[0] = llvm_chan;
2862 args[1] = attr_number;
2863 args[2] = ctx->prim_mask;
2864 args[3] = interp_param;
2865 result[chan] = emit_llvm_intrinsic(ctx, intr_name,
2866 ctx->f32, args, args[3] ? 4 : 3,
2867 AC_FUNC_ATTR_READNONE);
2868 }
2869 return build_gather_values(ctx, result, 2);
2870 }
2871
2872 static void visit_intrinsic(struct nir_to_llvm_context *ctx,
2873 nir_intrinsic_instr *instr)
2874 {
2875 LLVMValueRef result = NULL;
2876
2877 switch (instr->intrinsic) {
2878 case nir_intrinsic_load_work_group_id: {
2879 result = ctx->workgroup_ids;
2880 break;
2881 }
2882 case nir_intrinsic_load_base_vertex: {
2883 result = ctx->base_vertex;
2884 break;
2885 }
2886 case nir_intrinsic_load_vertex_id_zero_base: {
2887 result = ctx->vertex_id;
2888 break;
2889 }
2890 case nir_intrinsic_load_local_invocation_id: {
2891 result = ctx->local_invocation_ids;
2892 break;
2893 }
2894 case nir_intrinsic_load_base_instance:
2895 result = ctx->start_instance;
2896 break;
2897 case nir_intrinsic_load_sample_id:
2898 result = ctx->ancillary;
2899 break;
2900 case nir_intrinsic_load_sample_pos:
2901 result = load_sample_pos(ctx);
2902 break;
2903 case nir_intrinsic_load_front_face:
2904 result = ctx->front_face;
2905 break;
2906 case nir_intrinsic_load_instance_id:
2907 result = ctx->instance_id;
2908 ctx->shader_info->vs.vgpr_comp_cnt = MAX2(3,
2909 ctx->shader_info->vs.vgpr_comp_cnt);
2910 break;
2911 case nir_intrinsic_load_num_work_groups:
2912 result = ctx->num_work_groups;
2913 break;
2914 case nir_intrinsic_load_local_invocation_index:
2915 result = visit_load_local_invocation_index(ctx);
2916 break;
2917 case nir_intrinsic_load_push_constant:
2918 result = visit_load_push_constant(ctx, instr);
2919 break;
2920 case nir_intrinsic_vulkan_resource_index:
2921 result = visit_vulkan_resource_index(ctx, instr);
2922 break;
2923 case nir_intrinsic_store_ssbo:
2924 visit_store_ssbo(ctx, instr);
2925 break;
2926 case nir_intrinsic_load_ssbo:
2927 result = visit_load_buffer(ctx, instr);
2928 break;
2929 case nir_intrinsic_ssbo_atomic_add:
2930 case nir_intrinsic_ssbo_atomic_imin:
2931 case nir_intrinsic_ssbo_atomic_umin:
2932 case nir_intrinsic_ssbo_atomic_imax:
2933 case nir_intrinsic_ssbo_atomic_umax:
2934 case nir_intrinsic_ssbo_atomic_and:
2935 case nir_intrinsic_ssbo_atomic_or:
2936 case nir_intrinsic_ssbo_atomic_xor:
2937 case nir_intrinsic_ssbo_atomic_exchange:
2938 case nir_intrinsic_ssbo_atomic_comp_swap:
2939 result = visit_atomic_ssbo(ctx, instr);
2940 break;
2941 case nir_intrinsic_load_ubo:
2942 result = visit_load_buffer(ctx, instr);
2943 break;
2944 case nir_intrinsic_get_buffer_size:
2945 result = visit_get_buffer_size(ctx, instr);
2946 break;
2947 case nir_intrinsic_load_var:
2948 result = visit_load_var(ctx, instr);
2949 break;
2950 case nir_intrinsic_store_var:
2951 visit_store_var(ctx, instr);
2952 break;
2953 case nir_intrinsic_image_load:
2954 result = visit_image_load(ctx, instr);
2955 break;
2956 case nir_intrinsic_image_store:
2957 visit_image_store(ctx, instr);
2958 break;
2959 case nir_intrinsic_image_atomic_add:
2960 case nir_intrinsic_image_atomic_min:
2961 case nir_intrinsic_image_atomic_max:
2962 case nir_intrinsic_image_atomic_and:
2963 case nir_intrinsic_image_atomic_or:
2964 case nir_intrinsic_image_atomic_xor:
2965 case nir_intrinsic_image_atomic_exchange:
2966 case nir_intrinsic_image_atomic_comp_swap:
2967 result = visit_image_atomic(ctx, instr);
2968 break;
2969 case nir_intrinsic_image_size:
2970 result = visit_image_size(ctx, instr);
2971 break;
2972 case nir_intrinsic_discard:
2973 ctx->shader_info->fs.can_discard = true;
2974 emit_llvm_intrinsic(ctx, "llvm.AMDGPU.kilp",
2975 LLVMVoidTypeInContext(ctx->context),
2976 NULL, 0, 0);
2977 break;
2978 case nir_intrinsic_discard_if:
2979 emit_discard_if(ctx, instr);
2980 break;
2981 case nir_intrinsic_memory_barrier:
2982 emit_waitcnt(ctx);
2983 break;
2984 case nir_intrinsic_barrier:
2985 emit_barrier(ctx);
2986 break;
2987 case nir_intrinsic_var_atomic_add:
2988 case nir_intrinsic_var_atomic_imin:
2989 case nir_intrinsic_var_atomic_umin:
2990 case nir_intrinsic_var_atomic_imax:
2991 case nir_intrinsic_var_atomic_umax:
2992 case nir_intrinsic_var_atomic_and:
2993 case nir_intrinsic_var_atomic_or:
2994 case nir_intrinsic_var_atomic_xor:
2995 case nir_intrinsic_var_atomic_exchange:
2996 case nir_intrinsic_var_atomic_comp_swap:
2997 result = visit_var_atomic(ctx, instr);
2998 break;
2999 case nir_intrinsic_interp_var_at_centroid:
3000 case nir_intrinsic_interp_var_at_sample:
3001 case nir_intrinsic_interp_var_at_offset:
3002 result = visit_interp(ctx, instr);
3003 break;
3004 default:
3005 fprintf(stderr, "Unknown intrinsic: ");
3006 nir_print_instr(&instr->instr, stderr);
3007 fprintf(stderr, "\n");
3008 break;
3009 }
3010 if (result) {
3011 _mesa_hash_table_insert(ctx->defs, &instr->dest.ssa, result);
3012 }
3013 }
3014
3015 static LLVMValueRef get_sampler_desc(struct nir_to_llvm_context *ctx,
3016 nir_deref_var *deref,
3017 enum desc_type desc_type)
3018 {
3019 unsigned desc_set = deref->var->data.descriptor_set;
3020 LLVMValueRef list = ctx->descriptor_sets[desc_set];
3021 struct radv_descriptor_set_layout *layout = ctx->options->layout->set[desc_set].layout;
3022 struct radv_descriptor_set_binding_layout *binding = layout->binding + deref->var->data.binding;
3023 unsigned offset = binding->offset;
3024 unsigned stride = binding->size;
3025 unsigned type_size;
3026 LLVMBuilderRef builder = ctx->builder;
3027 LLVMTypeRef type;
3028 LLVMValueRef indices[2];
3029 LLVMValueRef index = NULL;
3030
3031 assert(deref->var->data.binding < layout->binding_count);
3032
3033 switch (desc_type) {
3034 case DESC_IMAGE:
3035 type = ctx->v8i32;
3036 type_size = 32;
3037 break;
3038 case DESC_FMASK:
3039 type = ctx->v8i32;
3040 offset += 32;
3041 type_size = 32;
3042 break;
3043 case DESC_SAMPLER:
3044 type = ctx->v4i32;
3045 if (binding->type == VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER)
3046 offset += 64;
3047
3048 type_size = 16;
3049 break;
3050 case DESC_BUFFER:
3051 type = ctx->v4i32;
3052 type_size = 16;
3053 break;
3054 }
3055
3056 if (deref->deref.child) {
3057 nir_deref_array *child = (nir_deref_array*)deref->deref.child;
3058
3059 assert(child->deref_array_type != nir_deref_array_type_wildcard);
3060 offset += child->base_offset * stride;
3061 if (child->deref_array_type == nir_deref_array_type_indirect) {
3062 index = get_src(ctx, child->indirect);
3063 }
3064 }
3065
3066 assert(stride % type_size == 0);
3067
3068 if (!index)
3069 index = ctx->i32zero;
3070
3071 index = LLVMBuildMul(builder, index, LLVMConstInt(ctx->i32, stride / type_size, 0), "");
3072 indices[0] = ctx->i32zero;
3073 indices[1] = LLVMConstInt(ctx->i32, offset, 0);
3074 list = LLVMBuildGEP(builder, list, indices, 2, "");
3075 list = LLVMBuildPointerCast(builder, list, const_array(type, 0), "");
3076
3077 return build_indexed_load_const(ctx, list, index);
3078 }
3079
3080 static void set_tex_fetch_args(struct nir_to_llvm_context *ctx,
3081 struct ac_tex_info *tinfo,
3082 nir_tex_instr *instr,
3083 nir_texop op,
3084 LLVMValueRef res_ptr, LLVMValueRef samp_ptr,
3085 LLVMValueRef *param, unsigned count,
3086 unsigned dmask)
3087 {
3088 int num_args;
3089 unsigned is_rect = 0;
3090 bool da = instr->is_array || instr->sampler_dim == GLSL_SAMPLER_DIM_CUBE;
3091
3092 if (op == nir_texop_lod)
3093 da = false;
3094 /* Pad to power of two vector */
3095 while (count < util_next_power_of_two(count))
3096 param[count++] = LLVMGetUndef(ctx->i32);
3097
3098 if (count > 1)
3099 tinfo->args[0] = build_gather_values(ctx, param, count);
3100 else
3101 tinfo->args[0] = param[0];
3102
3103 tinfo->args[1] = res_ptr;
3104 num_args = 2;
3105
3106 if (op == nir_texop_txf ||
3107 op == nir_texop_txf_ms ||
3108 op == nir_texop_query_levels ||
3109 op == nir_texop_texture_samples ||
3110 op == nir_texop_txs)
3111 tinfo->dst_type = ctx->v4i32;
3112 else {
3113 tinfo->dst_type = ctx->v4f32;
3114 tinfo->args[num_args++] = samp_ptr;
3115 }
3116
3117 if (instr->sampler_dim == GLSL_SAMPLER_DIM_BUF && op == nir_texop_txf) {
3118 tinfo->args[0] = res_ptr;
3119 tinfo->args[1] = LLVMConstInt(ctx->i32, 0, false);
3120 tinfo->args[2] = param[0];
3121 tinfo->arg_count = 3;
3122 return;
3123 }
3124
3125 tinfo->args[num_args++] = LLVMConstInt(ctx->i32, dmask, 0);
3126 tinfo->args[num_args++] = LLVMConstInt(ctx->i32, is_rect, 0); /* unorm */
3127 tinfo->args[num_args++] = LLVMConstInt(ctx->i32, 0, 0); /* r128 */
3128 tinfo->args[num_args++] = LLVMConstInt(ctx->i32, da ? 1 : 0, 0);
3129 tinfo->args[num_args++] = LLVMConstInt(ctx->i32, 0, 0); /* glc */
3130 tinfo->args[num_args++] = LLVMConstInt(ctx->i32, 0, 0); /* slc */
3131 tinfo->args[num_args++] = LLVMConstInt(ctx->i32, 0, 0); /* tfe */
3132 tinfo->args[num_args++] = LLVMConstInt(ctx->i32, 0, 0); /* lwe */
3133
3134 tinfo->arg_count = num_args;
3135 }
3136
3137 static void tex_fetch_ptrs(struct nir_to_llvm_context *ctx,
3138 nir_tex_instr *instr,
3139 LLVMValueRef *res_ptr, LLVMValueRef *samp_ptr,
3140 LLVMValueRef *fmask_ptr)
3141 {
3142 if (instr->sampler_dim == GLSL_SAMPLER_DIM_BUF)
3143 *res_ptr = get_sampler_desc(ctx, instr->texture, DESC_BUFFER);
3144 else
3145 *res_ptr = get_sampler_desc(ctx, instr->texture, DESC_IMAGE);
3146 if (samp_ptr) {
3147 if (instr->sampler)
3148 *samp_ptr = get_sampler_desc(ctx, instr->sampler, DESC_SAMPLER);
3149 else
3150 *samp_ptr = get_sampler_desc(ctx, instr->texture, DESC_SAMPLER);
3151 }
3152 if (fmask_ptr && !instr->sampler && (instr->op == nir_texop_txf_ms ||
3153 instr->op == nir_texop_samples_identical))
3154 *fmask_ptr = get_sampler_desc(ctx, instr->texture, DESC_FMASK);
3155 }
3156
3157 static LLVMValueRef build_cube_intrinsic(struct nir_to_llvm_context *ctx,
3158 LLVMValueRef *in)
3159 {
3160
3161 LLVMValueRef v, cube_vec;
3162
3163 if (1) {
3164 LLVMTypeRef f32 = LLVMTypeOf(in[0]);
3165 LLVMValueRef out[4];
3166
3167 out[0] = emit_llvm_intrinsic(ctx, "llvm.amdgcn.cubetc",
3168 f32, in, 3, AC_FUNC_ATTR_READNONE);
3169 out[1] = emit_llvm_intrinsic(ctx, "llvm.amdgcn.cubesc",
3170 f32, in, 3, AC_FUNC_ATTR_READNONE);
3171 out[2] = emit_llvm_intrinsic(ctx, "llvm.amdgcn.cubema",
3172 f32, in, 3, AC_FUNC_ATTR_READNONE);
3173 out[3] = emit_llvm_intrinsic(ctx, "llvm.amdgcn.cubeid",
3174 f32, in, 3, AC_FUNC_ATTR_READNONE);
3175
3176 return build_gather_values(ctx, out, 4);
3177 } else {
3178 LLVMValueRef c[4];
3179 c[0] = in[0];
3180 c[1] = in[1];
3181 c[2] = in[2];
3182 c[3] = LLVMGetUndef(LLVMTypeOf(in[0]));
3183 cube_vec = build_gather_values(ctx, c, 4);
3184 v = emit_llvm_intrinsic(ctx, "llvm.AMDGPU.cube", LLVMTypeOf(cube_vec),
3185 &cube_vec, 1, AC_FUNC_ATTR_READNONE);
3186 }
3187 return v;
3188 }
3189
3190 static void cube_to_2d_coords(struct nir_to_llvm_context *ctx,
3191 LLVMValueRef *in, LLVMValueRef *out)
3192 {
3193 LLVMValueRef coords[4];
3194 LLVMValueRef mad_args[3];
3195 LLVMValueRef v;
3196 LLVMValueRef tmp;
3197 int i;
3198
3199 v = build_cube_intrinsic(ctx, in);
3200 for (i = 0; i < 4; i++)
3201 coords[i] = LLVMBuildExtractElement(ctx->builder, v,
3202 LLVMConstInt(ctx->i32, i, false), "");
3203
3204 coords[2] = emit_llvm_intrinsic(ctx, "llvm.fabs.f32", ctx->f32,
3205 &coords[2], 1, AC_FUNC_ATTR_READNONE);
3206 coords[2] = emit_fdiv(ctx, ctx->f32one, coords[2]);
3207
3208 mad_args[1] = coords[2];
3209 mad_args[2] = LLVMConstReal(ctx->f32, 1.5);
3210 mad_args[0] = coords[0];
3211
3212 /* emit MAD */
3213 tmp = LLVMBuildFMul(ctx->builder, mad_args[0], mad_args[1], "");
3214 coords[0] = LLVMBuildFAdd(ctx->builder, tmp, mad_args[2], "");
3215
3216 mad_args[0] = coords[1];
3217
3218 /* emit MAD */
3219 tmp = LLVMBuildFMul(ctx->builder, mad_args[0], mad_args[1], "");
3220 coords[1] = LLVMBuildFAdd(ctx->builder, tmp, mad_args[2], "");
3221
3222 /* apply xyz = yxw swizzle to cooords */
3223 out[0] = coords[1];
3224 out[1] = coords[0];
3225 out[2] = coords[3];
3226 }
3227
3228 static void emit_prepare_cube_coords(struct nir_to_llvm_context *ctx,
3229 LLVMValueRef *coords_arg, int num_coords,
3230 bool is_deriv,
3231 bool is_array, LLVMValueRef *derivs_arg)
3232 {
3233 LLVMValueRef coords[4];
3234 int i;
3235 cube_to_2d_coords(ctx, coords_arg, coords);
3236
3237 if (is_deriv && derivs_arg) {
3238 LLVMValueRef derivs[4];
3239 int axis;
3240
3241 /* Convert cube derivatives to 2D derivatives. */
3242 for (axis = 0; axis < 2; axis++) {
3243 LLVMValueRef shifted_cube_coords[4], shifted_coords[4];
3244
3245 /* Shift the cube coordinates by the derivatives to get
3246 * the cube coordinates of the "neighboring pixel".
3247 */
3248 for (i = 0; i < 3; i++)
3249 shifted_cube_coords[i] =
3250 LLVMBuildFAdd(ctx->builder, coords_arg[i],
3251 derivs_arg[axis*3+i], "");
3252 shifted_cube_coords[3] = LLVMGetUndef(ctx->f32);
3253
3254 /* Project the shifted cube coordinates onto the face. */
3255 cube_to_2d_coords(ctx, shifted_cube_coords,
3256 shifted_coords);
3257
3258 /* Subtract both sets of 2D coordinates to get 2D derivatives.
3259 * This won't work if the shifted coordinates ended up
3260 * in a different face.
3261 */
3262 for (i = 0; i < 2; i++)
3263 derivs[axis * 2 + i] =
3264 LLVMBuildFSub(ctx->builder, shifted_coords[i],
3265 coords[i], "");
3266 }
3267
3268 memcpy(derivs_arg, derivs, sizeof(derivs));
3269 }
3270
3271 if (is_array) {
3272 /* for cube arrays coord.z = coord.w(array_index) * 8 + face */
3273 /* coords_arg.w component - array_index for cube arrays */
3274 LLVMValueRef tmp = LLVMBuildFMul(ctx->builder, coords_arg[3], LLVMConstReal(ctx->f32, 8.0), "");
3275 coords[2] = LLVMBuildFAdd(ctx->builder, tmp, coords[2], "");
3276 }
3277
3278 memcpy(coords_arg, coords, sizeof(coords));
3279 }
3280
3281 static void visit_tex(struct nir_to_llvm_context *ctx, nir_tex_instr *instr)
3282 {
3283 LLVMValueRef result = NULL;
3284 struct ac_tex_info tinfo = { 0 };
3285 unsigned dmask = 0xf;
3286 LLVMValueRef address[16];
3287 LLVMValueRef coords[5];
3288 LLVMValueRef coord = NULL, lod = NULL, comparitor = NULL, bias, offsets = NULL;
3289 LLVMValueRef res_ptr, samp_ptr, fmask_ptr = NULL, sample_index = NULL;
3290 LLVMValueRef ddx = NULL, ddy = NULL;
3291 LLVMValueRef derivs[6];
3292 unsigned chan, count = 0;
3293 unsigned const_src = 0, num_deriv_comp = 0;
3294
3295 tex_fetch_ptrs(ctx, instr, &res_ptr, &samp_ptr, &fmask_ptr);
3296
3297 for (unsigned i = 0; i < instr->num_srcs; i++) {
3298 switch (instr->src[i].src_type) {
3299 case nir_tex_src_coord:
3300 coord = get_src(ctx, instr->src[i].src);
3301 break;
3302 case nir_tex_src_projector:
3303 break;
3304 case nir_tex_src_comparitor:
3305 comparitor = get_src(ctx, instr->src[i].src);
3306 break;
3307 case nir_tex_src_offset:
3308 offsets = get_src(ctx, instr->src[i].src);
3309 const_src = i;
3310 break;
3311 case nir_tex_src_bias:
3312 bias = get_src(ctx, instr->src[i].src);
3313 break;
3314 case nir_tex_src_lod:
3315 lod = get_src(ctx, instr->src[i].src);
3316 break;
3317 case nir_tex_src_ms_index:
3318 sample_index = get_src(ctx, instr->src[i].src);
3319 break;
3320 case nir_tex_src_ms_mcs:
3321 break;
3322 case nir_tex_src_ddx:
3323 ddx = get_src(ctx, instr->src[i].src);
3324 num_deriv_comp = instr->src[i].src.ssa->num_components;
3325 break;
3326 case nir_tex_src_ddy:
3327 ddy = get_src(ctx, instr->src[i].src);
3328 break;
3329 case nir_tex_src_texture_offset:
3330 case nir_tex_src_sampler_offset:
3331 case nir_tex_src_plane:
3332 default:
3333 break;
3334 }
3335 }
3336
3337 if (instr->op == nir_texop_texture_samples) {
3338 LLVMValueRef res, samples, is_msaa;
3339 res = LLVMBuildBitCast(ctx->builder, res_ptr, ctx->v8i32, "");
3340 samples = LLVMBuildExtractElement(ctx->builder, res,
3341 LLVMConstInt(ctx->i32, 3, false), "");
3342 is_msaa = LLVMBuildLShr(ctx->builder, samples,
3343 LLVMConstInt(ctx->i32, 28, false), "");
3344 is_msaa = LLVMBuildAnd(ctx->builder, is_msaa,
3345 LLVMConstInt(ctx->i32, 0xe, false), "");
3346 is_msaa = LLVMBuildICmp(ctx->builder, LLVMIntEQ, is_msaa,
3347 LLVMConstInt(ctx->i32, 0xe, false), "");
3348
3349 samples = LLVMBuildLShr(ctx->builder, samples,
3350 LLVMConstInt(ctx->i32, 16, false), "");
3351 samples = LLVMBuildAnd(ctx->builder, samples,
3352 LLVMConstInt(ctx->i32, 0xf, false), "");
3353 samples = LLVMBuildShl(ctx->builder, ctx->i32one,
3354 samples, "");
3355 samples = LLVMBuildSelect(ctx->builder, is_msaa, samples,
3356 ctx->i32one, "");
3357 result = samples;
3358 goto write_result;
3359 }
3360
3361 if (coord)
3362 for (chan = 0; chan < instr->coord_components; chan++)
3363 coords[chan] = llvm_extract_elem(ctx, coord, chan);
3364
3365 if (offsets && instr->op != nir_texop_txf) {
3366 LLVMValueRef offset[3], pack;
3367 for (chan = 0; chan < 3; ++chan)
3368 offset[chan] = ctx->i32zero;
3369
3370 tinfo.has_offset = true;
3371 for (chan = 0; chan < get_llvm_num_components(offsets); chan++) {
3372 offset[chan] = llvm_extract_elem(ctx, offsets, chan);
3373 offset[chan] = LLVMBuildAnd(ctx->builder, offset[chan],
3374 LLVMConstInt(ctx->i32, 0x3f, false), "");
3375 if (chan)
3376 offset[chan] = LLVMBuildShl(ctx->builder, offset[chan],
3377 LLVMConstInt(ctx->i32, chan * 8, false), "");
3378 }
3379 pack = LLVMBuildOr(ctx->builder, offset[0], offset[1], "");
3380 pack = LLVMBuildOr(ctx->builder, pack, offset[2], "");
3381 address[count++] = pack;
3382
3383 }
3384 /* pack LOD bias value */
3385 if (instr->op == nir_texop_txb && bias) {
3386 address[count++] = bias;
3387 }
3388
3389 /* Pack depth comparison value */
3390 if (instr->is_shadow && comparitor) {
3391 address[count++] = llvm_extract_elem(ctx, comparitor, 0);
3392 }
3393
3394 /* pack derivatives */
3395 if (ddx || ddy) {
3396 switch (instr->sampler_dim) {
3397 case GLSL_SAMPLER_DIM_3D:
3398 case GLSL_SAMPLER_DIM_CUBE:
3399 num_deriv_comp = 3;
3400 break;
3401 case GLSL_SAMPLER_DIM_2D:
3402 default:
3403 num_deriv_comp = 2;
3404 break;
3405 case GLSL_SAMPLER_DIM_1D:
3406 num_deriv_comp = 1;
3407 break;
3408 }
3409
3410 for (unsigned i = 0; i < num_deriv_comp; i++) {
3411 derivs[i * 2] = to_float(ctx, llvm_extract_elem(ctx, ddx, i));
3412 derivs[i * 2 + 1] = to_float(ctx, llvm_extract_elem(ctx, ddy, i));
3413 }
3414 }
3415
3416 if (instr->sampler_dim == GLSL_SAMPLER_DIM_CUBE && coord) {
3417 for (chan = 0; chan < instr->coord_components; chan++)
3418 coords[chan] = to_float(ctx, coords[chan]);
3419 if (instr->coord_components == 3)
3420 coords[3] = LLVMGetUndef(ctx->f32);
3421 emit_prepare_cube_coords(ctx, coords, instr->coord_components, instr->op == nir_texop_txd, instr->is_array, derivs);
3422 if (num_deriv_comp)
3423 num_deriv_comp--;
3424 }
3425
3426 if (ddx || ddy) {
3427 for (unsigned i = 0; i < num_deriv_comp * 2; i++)
3428 address[count++] = derivs[i];
3429 }
3430
3431 /* Pack texture coordinates */
3432 if (coord) {
3433 address[count++] = coords[0];
3434 if (instr->coord_components > 1)
3435 address[count++] = coords[1];
3436 if (instr->coord_components > 2) {
3437 /* This seems like a bit of a hack - but it passes Vulkan CTS with it */
3438 if (instr->sampler_dim != GLSL_SAMPLER_DIM_3D && instr->op != nir_texop_txf) {
3439 coords[2] = to_float(ctx, coords[2]);
3440 coords[2] = emit_llvm_intrinsic(ctx, "llvm.rint.f32", ctx->f32, &coords[2],
3441 1, 0);
3442 coords[2] = to_integer(ctx, coords[2]);
3443 }
3444 address[count++] = coords[2];
3445 }
3446 }
3447
3448 /* Pack LOD */
3449 if ((instr->op == nir_texop_txl || instr->op == nir_texop_txf) && lod) {
3450 address[count++] = lod;
3451 } else if (instr->op == nir_texop_txf_ms && sample_index) {
3452 address[count++] = sample_index;
3453 } else if(instr->op == nir_texop_txs) {
3454 count = 0;
3455 if (lod)
3456 address[count++] = lod;
3457 else
3458 address[count++] = ctx->i32zero;
3459 }
3460
3461 for (chan = 0; chan < count; chan++) {
3462 address[chan] = LLVMBuildBitCast(ctx->builder,
3463 address[chan], ctx->i32, "");
3464 }
3465
3466 if (instr->op == nir_texop_samples_identical) {
3467 LLVMValueRef txf_address[4];
3468 struct ac_tex_info txf_info = { 0 };
3469 unsigned txf_count = count;
3470 memcpy(txf_address, address, sizeof(txf_address));
3471
3472 if (!instr->is_array)
3473 txf_address[2] = ctx->i32zero;
3474 txf_address[3] = ctx->i32zero;
3475
3476 set_tex_fetch_args(ctx, &txf_info, instr, nir_texop_txf,
3477 fmask_ptr, NULL,
3478 txf_address, txf_count, 0xf);
3479
3480 result = build_tex_intrinsic(ctx, instr, &txf_info);
3481
3482 result = LLVMBuildExtractElement(ctx->builder, result, ctx->i32zero, "");
3483 result = emit_int_cmp(ctx, LLVMIntEQ, result, ctx->i32zero);
3484 goto write_result;
3485 }
3486
3487 /* Adjust the sample index according to FMASK.
3488 *
3489 * For uncompressed MSAA surfaces, FMASK should return 0x76543210,
3490 * which is the identity mapping. Each nibble says which physical sample
3491 * should be fetched to get that sample.
3492 *
3493 * For example, 0x11111100 means there are only 2 samples stored and
3494 * the second sample covers 3/4 of the pixel. When reading samples 0
3495 * and 1, return physical sample 0 (determined by the first two 0s
3496 * in FMASK), otherwise return physical sample 1.
3497 *
3498 * The sample index should be adjusted as follows:
3499 * sample_index = (fmask >> (sample_index * 4)) & 0xF;
3500 */
3501 if (instr->sampler_dim == GLSL_SAMPLER_DIM_MS) {
3502 LLVMValueRef txf_address[4];
3503 struct ac_tex_info txf_info = { 0 };
3504 unsigned txf_count = count;
3505 memcpy(txf_address, address, sizeof(txf_address));
3506
3507 if (!instr->is_array)
3508 txf_address[2] = ctx->i32zero;
3509 txf_address[3] = ctx->i32zero;
3510
3511 set_tex_fetch_args(ctx, &txf_info, instr, nir_texop_txf,
3512 fmask_ptr, NULL,
3513 txf_address, txf_count, 0xf);
3514
3515 result = build_tex_intrinsic(ctx, instr, &txf_info);
3516 LLVMValueRef four = LLVMConstInt(ctx->i32, 4, false);
3517 LLVMValueRef F = LLVMConstInt(ctx->i32, 0xf, false);
3518
3519 LLVMValueRef fmask = LLVMBuildExtractElement(ctx->builder,
3520 result,
3521 ctx->i32zero, "");
3522
3523 unsigned sample_chan = instr->is_array ? 3 : 2;
3524
3525 LLVMValueRef sample_index4 =
3526 LLVMBuildMul(ctx->builder, address[sample_chan], four, "");
3527 LLVMValueRef shifted_fmask =
3528 LLVMBuildLShr(ctx->builder, fmask, sample_index4, "");
3529 LLVMValueRef final_sample =
3530 LLVMBuildAnd(ctx->builder, shifted_fmask, F, "");
3531
3532 /* Don't rewrite the sample index if WORD1.DATA_FORMAT of the FMASK
3533 * resource descriptor is 0 (invalid),
3534 */
3535 LLVMValueRef fmask_desc =
3536 LLVMBuildBitCast(ctx->builder, fmask_ptr,
3537 ctx->v8i32, "");
3538
3539 LLVMValueRef fmask_word1 =
3540 LLVMBuildExtractElement(ctx->builder, fmask_desc,
3541 ctx->i32one, "");
3542
3543 LLVMValueRef word1_is_nonzero =
3544 LLVMBuildICmp(ctx->builder, LLVMIntNE,
3545 fmask_word1, ctx->i32zero, "");
3546
3547 /* Replace the MSAA sample index. */
3548 address[sample_chan] =
3549 LLVMBuildSelect(ctx->builder, word1_is_nonzero,
3550 final_sample, address[sample_chan], "");
3551 }
3552
3553 if (offsets && instr->op == nir_texop_txf) {
3554 nir_const_value *const_offset =
3555 nir_src_as_const_value(instr->src[const_src].src);
3556
3557 assert(const_offset);
3558 if (instr->coord_components > 2)
3559 address[2] = LLVMBuildAdd(ctx->builder,
3560 address[2], LLVMConstInt(ctx->i32, const_offset->i32[2], false), "");
3561 if (instr->coord_components > 1)
3562 address[1] = LLVMBuildAdd(ctx->builder,
3563 address[1], LLVMConstInt(ctx->i32, const_offset->i32[1], false), "");
3564 address[0] = LLVMBuildAdd(ctx->builder,
3565 address[0], LLVMConstInt(ctx->i32, const_offset->i32[0], false), "");
3566
3567 }
3568
3569 /* TODO TG4 support */
3570 if (instr->op == nir_texop_tg4) {
3571 if (instr->is_shadow)
3572 dmask = 1;
3573 else
3574 dmask = 1 << instr->component;
3575 }
3576 set_tex_fetch_args(ctx, &tinfo, instr, instr->op,
3577 res_ptr, samp_ptr, address, count, dmask);
3578
3579 result = build_tex_intrinsic(ctx, instr, &tinfo);
3580
3581 if (instr->op == nir_texop_query_levels)
3582 result = LLVMBuildExtractElement(ctx->builder, result, LLVMConstInt(ctx->i32, 3, false), "");
3583 else if (instr->op == nir_texop_txs &&
3584 instr->sampler_dim == GLSL_SAMPLER_DIM_CUBE &&
3585 instr->is_array) {
3586 LLVMValueRef two = LLVMConstInt(ctx->i32, 2, false);
3587 LLVMValueRef six = LLVMConstInt(ctx->i32, 6, false);
3588 LLVMValueRef z = LLVMBuildExtractElement(ctx->builder, result, two, "");
3589 z = LLVMBuildSDiv(ctx->builder, z, six, "");
3590 result = LLVMBuildInsertElement(ctx->builder, result, z, two, "");
3591 } else if (instr->dest.ssa.num_components != 4)
3592 result = trim_vector(ctx, result, instr->dest.ssa.num_components);
3593
3594 write_result:
3595 if (result) {
3596 assert(instr->dest.is_ssa);
3597 result = to_integer(ctx, result);
3598 _mesa_hash_table_insert(ctx->defs, &instr->dest.ssa, result);
3599 }
3600 }
3601
3602
3603 static void visit_phi(struct nir_to_llvm_context *ctx, nir_phi_instr *instr)
3604 {
3605 LLVMTypeRef type = get_def_type(ctx, &instr->dest.ssa);
3606 LLVMValueRef result = LLVMBuildPhi(ctx->builder, type, "");
3607
3608 _mesa_hash_table_insert(ctx->defs, &instr->dest.ssa, result);
3609 _mesa_hash_table_insert(ctx->phis, instr, result);
3610 }
3611
3612 static void visit_post_phi(struct nir_to_llvm_context *ctx,
3613 nir_phi_instr *instr,
3614 LLVMValueRef llvm_phi)
3615 {
3616 nir_foreach_phi_src(src, instr) {
3617 LLVMBasicBlockRef block = get_block(ctx, src->pred);
3618 LLVMValueRef llvm_src = get_src(ctx, src->src);
3619
3620 LLVMAddIncoming(llvm_phi, &llvm_src, &block, 1);
3621 }
3622 }
3623
3624 static void phi_post_pass(struct nir_to_llvm_context *ctx)
3625 {
3626 struct hash_entry *entry;
3627 hash_table_foreach(ctx->phis, entry) {
3628 visit_post_phi(ctx, (nir_phi_instr*)entry->key,
3629 (LLVMValueRef)entry->data);
3630 }
3631 }
3632
3633
3634 static void visit_ssa_undef(struct nir_to_llvm_context *ctx,
3635 nir_ssa_undef_instr *instr)
3636 {
3637 unsigned num_components = instr->def.num_components;
3638 LLVMValueRef undef;
3639
3640 if (num_components == 1)
3641 undef = LLVMGetUndef(ctx->i32);
3642 else {
3643 undef = LLVMGetUndef(LLVMVectorType(ctx->i32, num_components));
3644 }
3645 _mesa_hash_table_insert(ctx->defs, &instr->def, undef);
3646 }
3647
3648 static void visit_jump(struct nir_to_llvm_context *ctx,
3649 nir_jump_instr *instr)
3650 {
3651 switch (instr->type) {
3652 case nir_jump_break:
3653 LLVMBuildBr(ctx->builder, ctx->break_block);
3654 LLVMClearInsertionPosition(ctx->builder);
3655 break;
3656 case nir_jump_continue:
3657 LLVMBuildBr(ctx->builder, ctx->continue_block);
3658 LLVMClearInsertionPosition(ctx->builder);
3659 break;
3660 default:
3661 fprintf(stderr, "Unknown NIR jump instr: ");
3662 nir_print_instr(&instr->instr, stderr);
3663 fprintf(stderr, "\n");
3664 abort();
3665 }
3666 }
3667
3668 static void visit_cf_list(struct nir_to_llvm_context *ctx,
3669 struct exec_list *list);
3670
3671 static void visit_block(struct nir_to_llvm_context *ctx, nir_block *block)
3672 {
3673 LLVMBasicBlockRef llvm_block = LLVMGetInsertBlock(ctx->builder);
3674 nir_foreach_instr(instr, block)
3675 {
3676 switch (instr->type) {
3677 case nir_instr_type_alu:
3678 visit_alu(ctx, nir_instr_as_alu(instr));
3679 break;
3680 case nir_instr_type_load_const:
3681 visit_load_const(ctx, nir_instr_as_load_const(instr));
3682 break;
3683 case nir_instr_type_intrinsic:
3684 visit_intrinsic(ctx, nir_instr_as_intrinsic(instr));
3685 break;
3686 case nir_instr_type_tex:
3687 visit_tex(ctx, nir_instr_as_tex(instr));
3688 break;
3689 case nir_instr_type_phi:
3690 visit_phi(ctx, nir_instr_as_phi(instr));
3691 break;
3692 case nir_instr_type_ssa_undef:
3693 visit_ssa_undef(ctx, nir_instr_as_ssa_undef(instr));
3694 break;
3695 case nir_instr_type_jump:
3696 visit_jump(ctx, nir_instr_as_jump(instr));
3697 break;
3698 default:
3699 fprintf(stderr, "Unknown NIR instr type: ");
3700 nir_print_instr(instr, stderr);
3701 fprintf(stderr, "\n");
3702 abort();
3703 }
3704 }
3705
3706 _mesa_hash_table_insert(ctx->defs, block, llvm_block);
3707 }
3708
3709 static void visit_if(struct nir_to_llvm_context *ctx, nir_if *if_stmt)
3710 {
3711 LLVMValueRef value = get_src(ctx, if_stmt->condition);
3712
3713 LLVMBasicBlockRef merge_block =
3714 LLVMAppendBasicBlockInContext(ctx->context, ctx->main_function, "");
3715 LLVMBasicBlockRef if_block =
3716 LLVMAppendBasicBlockInContext(ctx->context, ctx->main_function, "");
3717 LLVMBasicBlockRef else_block = merge_block;
3718 if (!exec_list_is_empty(&if_stmt->else_list))
3719 else_block = LLVMAppendBasicBlockInContext(
3720 ctx->context, ctx->main_function, "");
3721
3722 LLVMValueRef cond = LLVMBuildICmp(ctx->builder, LLVMIntNE, value,
3723 LLVMConstInt(ctx->i32, 0, false), "");
3724 LLVMBuildCondBr(ctx->builder, cond, if_block, else_block);
3725
3726 LLVMPositionBuilderAtEnd(ctx->builder, if_block);
3727 visit_cf_list(ctx, &if_stmt->then_list);
3728 if (LLVMGetInsertBlock(ctx->builder))
3729 LLVMBuildBr(ctx->builder, merge_block);
3730
3731 if (!exec_list_is_empty(&if_stmt->else_list)) {
3732 LLVMPositionBuilderAtEnd(ctx->builder, else_block);
3733 visit_cf_list(ctx, &if_stmt->else_list);
3734 if (LLVMGetInsertBlock(ctx->builder))
3735 LLVMBuildBr(ctx->builder, merge_block);
3736 }
3737
3738 LLVMPositionBuilderAtEnd(ctx->builder, merge_block);
3739 }
3740
3741 static void visit_loop(struct nir_to_llvm_context *ctx, nir_loop *loop)
3742 {
3743 LLVMBasicBlockRef continue_parent = ctx->continue_block;
3744 LLVMBasicBlockRef break_parent = ctx->break_block;
3745
3746 ctx->continue_block =
3747 LLVMAppendBasicBlockInContext(ctx->context, ctx->main_function, "");
3748 ctx->break_block =
3749 LLVMAppendBasicBlockInContext(ctx->context, ctx->main_function, "");
3750
3751 LLVMBuildBr(ctx->builder, ctx->continue_block);
3752 LLVMPositionBuilderAtEnd(ctx->builder, ctx->continue_block);
3753 visit_cf_list(ctx, &loop->body);
3754
3755 if (LLVMGetInsertBlock(ctx->builder))
3756 LLVMBuildBr(ctx->builder, ctx->continue_block);
3757 LLVMPositionBuilderAtEnd(ctx->builder, ctx->break_block);
3758
3759 ctx->continue_block = continue_parent;
3760 ctx->break_block = break_parent;
3761 }
3762
3763 static void visit_cf_list(struct nir_to_llvm_context *ctx,
3764 struct exec_list *list)
3765 {
3766 foreach_list_typed(nir_cf_node, node, node, list)
3767 {
3768 switch (node->type) {
3769 case nir_cf_node_block:
3770 visit_block(ctx, nir_cf_node_as_block(node));
3771 break;
3772
3773 case nir_cf_node_if:
3774 visit_if(ctx, nir_cf_node_as_if(node));
3775 break;
3776
3777 case nir_cf_node_loop:
3778 visit_loop(ctx, nir_cf_node_as_loop(node));
3779 break;
3780
3781 default:
3782 assert(0);
3783 }
3784 }
3785 }
3786
3787 static void
3788 handle_vs_input_decl(struct nir_to_llvm_context *ctx,
3789 struct nir_variable *variable)
3790 {
3791 LLVMValueRef t_list_ptr = ctx->vertex_buffers;
3792 LLVMValueRef t_offset;
3793 LLVMValueRef t_list;
3794 LLVMValueRef args[3];
3795 LLVMValueRef input;
3796 LLVMValueRef buffer_index;
3797 int index = variable->data.location - VERT_ATTRIB_GENERIC0;
3798 int idx = variable->data.location;
3799 unsigned attrib_count = glsl_count_attribute_slots(variable->type, true);
3800
3801 variable->data.driver_location = idx * 4;
3802
3803 if (ctx->options->key.vs.instance_rate_inputs & (1u << index)) {
3804 buffer_index = LLVMBuildAdd(ctx->builder, ctx->instance_id,
3805 ctx->start_instance, "");
3806 ctx->shader_info->vs.vgpr_comp_cnt = MAX2(3,
3807 ctx->shader_info->vs.vgpr_comp_cnt);
3808 } else
3809 buffer_index = LLVMBuildAdd(ctx->builder, ctx->vertex_id,
3810 ctx->base_vertex, "");
3811
3812 for (unsigned i = 0; i < attrib_count; ++i, ++idx) {
3813 t_offset = LLVMConstInt(ctx->i32, index + i, false);
3814
3815 t_list = build_indexed_load_const(ctx, t_list_ptr, t_offset);
3816 args[0] = t_list;
3817 args[1] = LLVMConstInt(ctx->i32, 0, false);
3818 args[2] = buffer_index;
3819 input = emit_llvm_intrinsic(ctx,
3820 "llvm.SI.vs.load.input", ctx->v4f32, args, 3,
3821 AC_FUNC_ATTR_READNONE | AC_FUNC_ATTR_NOUNWIND);
3822
3823 for (unsigned chan = 0; chan < 4; chan++) {
3824 LLVMValueRef llvm_chan = LLVMConstInt(ctx->i32, chan, false);
3825 ctx->inputs[radeon_llvm_reg_index_soa(idx, chan)] =
3826 to_integer(ctx, LLVMBuildExtractElement(ctx->builder,
3827 input, llvm_chan, ""));
3828 }
3829 }
3830 }
3831
3832
3833 static void interp_fs_input(struct nir_to_llvm_context *ctx,
3834 unsigned attr,
3835 LLVMValueRef interp_param,
3836 LLVMValueRef prim_mask,
3837 LLVMValueRef result[4])
3838 {
3839 const char *intr_name;
3840 LLVMValueRef attr_number;
3841 unsigned chan;
3842
3843 attr_number = LLVMConstInt(ctx->i32, attr, false);
3844
3845 /* fs.constant returns the param from the middle vertex, so it's not
3846 * really useful for flat shading. It's meant to be used for custom
3847 * interpolation (but the intrinsic can't fetch from the other two
3848 * vertices).
3849 *
3850 * Luckily, it doesn't matter, because we rely on the FLAT_SHADE state
3851 * to do the right thing. The only reason we use fs.constant is that
3852 * fs.interp cannot be used on integers, because they can be equal
3853 * to NaN.
3854 */
3855 intr_name = interp_param ? "llvm.SI.fs.interp" : "llvm.SI.fs.constant";
3856
3857 for (chan = 0; chan < 4; chan++) {
3858 LLVMValueRef args[4];
3859 LLVMValueRef llvm_chan = LLVMConstInt(ctx->i32, chan, false);
3860
3861 args[0] = llvm_chan;
3862 args[1] = attr_number;
3863 args[2] = prim_mask;
3864 args[3] = interp_param;
3865 result[chan] = emit_llvm_intrinsic(ctx, intr_name,
3866 ctx->f32, args, args[3] ? 4 : 3,
3867 AC_FUNC_ATTR_READNONE | AC_FUNC_ATTR_NOUNWIND);
3868 }
3869 }
3870
3871 static void
3872 handle_fs_input_decl(struct nir_to_llvm_context *ctx,
3873 struct nir_variable *variable)
3874 {
3875 int idx = variable->data.location;
3876 unsigned attrib_count = glsl_count_attribute_slots(variable->type, false);
3877 LLVMValueRef interp;
3878
3879 variable->data.driver_location = idx * 4;
3880 ctx->input_mask |= ((1ull << attrib_count) - 1) << variable->data.location;
3881
3882 if (glsl_get_base_type(glsl_without_array(variable->type)) == GLSL_TYPE_FLOAT)
3883 interp = lookup_interp_param(ctx, variable->data.interpolation, INTERP_CENTER);
3884 else
3885 interp = NULL;
3886
3887 for (unsigned i = 0; i < attrib_count; ++i)
3888 ctx->inputs[radeon_llvm_reg_index_soa(idx + i, 0)] = interp;
3889
3890 }
3891
3892 static void
3893 handle_shader_input_decl(struct nir_to_llvm_context *ctx,
3894 struct nir_variable *variable)
3895 {
3896 switch (ctx->stage) {
3897 case MESA_SHADER_VERTEX:
3898 handle_vs_input_decl(ctx, variable);
3899 break;
3900 case MESA_SHADER_FRAGMENT:
3901 handle_fs_input_decl(ctx, variable);
3902 break;
3903 default:
3904 break;
3905 }
3906
3907 }
3908
3909 static void
3910 handle_fs_inputs_pre(struct nir_to_llvm_context *ctx,
3911 struct nir_shader *nir)
3912 {
3913 unsigned index = 0;
3914 for (unsigned i = 0; i < RADEON_LLVM_MAX_INPUTS; ++i) {
3915 LLVMValueRef interp_param;
3916 LLVMValueRef *inputs = ctx->inputs +radeon_llvm_reg_index_soa(i, 0);
3917
3918 if (!(ctx->input_mask & (1ull << i)))
3919 continue;
3920
3921 if (i >= VARYING_SLOT_VAR0 || i == VARYING_SLOT_PNTC) {
3922 interp_param = *inputs;
3923 interp_fs_input(ctx, index, interp_param, ctx->prim_mask,
3924 inputs);
3925
3926 if (!interp_param)
3927 ctx->shader_info->fs.flat_shaded_mask |= 1u << index;
3928 ++index;
3929 } else if (i == VARYING_SLOT_POS) {
3930 for(int i = 0; i < 3; ++i)
3931 inputs[i] = ctx->frag_pos[i];
3932
3933 inputs[3] = emit_fdiv(ctx, ctx->f32one, ctx->frag_pos[3]);
3934 }
3935 }
3936 ctx->shader_info->fs.num_interp = index;
3937 if (ctx->input_mask & (1 << VARYING_SLOT_PNTC))
3938 ctx->shader_info->fs.has_pcoord = true;
3939 ctx->shader_info->fs.input_mask = ctx->input_mask >> VARYING_SLOT_VAR0;
3940 }
3941
3942 static LLVMValueRef
3943 ac_build_alloca(struct nir_to_llvm_context *ctx,
3944 LLVMTypeRef type,
3945 const char *name)
3946 {
3947 LLVMBuilderRef builder = ctx->builder;
3948 LLVMBasicBlockRef current_block = LLVMGetInsertBlock(builder);
3949 LLVMValueRef function = LLVMGetBasicBlockParent(current_block);
3950 LLVMBasicBlockRef first_block = LLVMGetEntryBasicBlock(function);
3951 LLVMValueRef first_instr = LLVMGetFirstInstruction(first_block);
3952 LLVMBuilderRef first_builder = LLVMCreateBuilderInContext(ctx->context);
3953 LLVMValueRef res;
3954
3955 if (first_instr) {
3956 LLVMPositionBuilderBefore(first_builder, first_instr);
3957 } else {
3958 LLVMPositionBuilderAtEnd(first_builder, first_block);
3959 }
3960
3961 res = LLVMBuildAlloca(first_builder, type, name);
3962 LLVMBuildStore(builder, LLVMConstNull(type), res);
3963
3964 LLVMDisposeBuilder(first_builder);
3965
3966 return res;
3967 }
3968
3969 static LLVMValueRef si_build_alloca_undef(struct nir_to_llvm_context *ctx,
3970 LLVMTypeRef type,
3971 const char *name)
3972 {
3973 LLVMValueRef ptr = ac_build_alloca(ctx, type, name);
3974 LLVMBuildStore(ctx->builder, LLVMGetUndef(type), ptr);
3975 return ptr;
3976 }
3977
3978 static void
3979 handle_shader_output_decl(struct nir_to_llvm_context *ctx,
3980 struct nir_variable *variable)
3981 {
3982 int idx = variable->data.location;
3983 unsigned attrib_count = glsl_count_attribute_slots(variable->type, false);
3984
3985 variable->data.driver_location = idx * 4;
3986
3987 if (ctx->stage == MESA_SHADER_VERTEX) {
3988
3989 if (idx == VARYING_SLOT_CLIP_DIST0 ||
3990 idx == VARYING_SLOT_CULL_DIST0) {
3991 int length = glsl_get_length(variable->type);
3992 if (idx == VARYING_SLOT_CLIP_DIST0) {
3993 ctx->shader_info->vs.clip_dist_mask = (1 << length) - 1;
3994 ctx->num_clips = length;
3995 } else if (idx == VARYING_SLOT_CULL_DIST0) {
3996 ctx->shader_info->vs.cull_dist_mask = (1 << length) - 1;
3997 ctx->num_culls = length;
3998 }
3999 if (length > 4)
4000 attrib_count = 2;
4001 else
4002 attrib_count = 1;
4003 }
4004 }
4005
4006 for (unsigned i = 0; i < attrib_count; ++i) {
4007 for (unsigned chan = 0; chan < 4; chan++) {
4008 ctx->outputs[radeon_llvm_reg_index_soa(idx + i, chan)] =
4009 si_build_alloca_undef(ctx, ctx->f32, "");
4010 }
4011 }
4012 ctx->output_mask |= ((1ull << attrib_count) - 1) << variable->data.location;
4013 }
4014
4015 static void
4016 setup_locals(struct nir_to_llvm_context *ctx,
4017 struct nir_function *func)
4018 {
4019 int i, j;
4020 ctx->num_locals = 0;
4021 nir_foreach_variable(variable, &func->impl->locals) {
4022 unsigned attrib_count = glsl_count_attribute_slots(variable->type, false);
4023 variable->data.driver_location = ctx->num_locals * 4;
4024 ctx->num_locals += attrib_count;
4025 }
4026 ctx->locals = malloc(4 * ctx->num_locals * sizeof(LLVMValueRef));
4027 if (!ctx->locals)
4028 return;
4029
4030 for (i = 0; i < ctx->num_locals; i++) {
4031 for (j = 0; j < 4; j++) {
4032 ctx->locals[i * 4 + j] =
4033 si_build_alloca_undef(ctx, ctx->f32, "temp");
4034 }
4035 }
4036 }
4037
4038 static LLVMValueRef
4039 emit_float_saturate(struct nir_to_llvm_context *ctx, LLVMValueRef v, float lo, float hi)
4040 {
4041 v = to_float(ctx, v);
4042 v = emit_intrin_2f_param(ctx, "llvm.maxnum.f32", v, LLVMConstReal(ctx->f32, lo));
4043 return emit_intrin_2f_param(ctx, "llvm.minnum.f32", v, LLVMConstReal(ctx->f32, hi));
4044 }
4045
4046
4047 static LLVMValueRef emit_pack_int16(struct nir_to_llvm_context *ctx,
4048 LLVMValueRef src0, LLVMValueRef src1)
4049 {
4050 LLVMValueRef const16 = LLVMConstInt(ctx->i32, 16, false);
4051 LLVMValueRef comp[2];
4052
4053 comp[0] = LLVMBuildAnd(ctx->builder, src0, LLVMConstInt(ctx-> i32, 65535, 0), "");
4054 comp[1] = LLVMBuildAnd(ctx->builder, src1, LLVMConstInt(ctx-> i32, 65535, 0), "");
4055 comp[1] = LLVMBuildShl(ctx->builder, comp[1], const16, "");
4056 return LLVMBuildOr(ctx->builder, comp[0], comp[1], "");
4057 }
4058
4059 /* Initialize arguments for the shader export intrinsic */
4060 static void
4061 si_llvm_init_export_args(struct nir_to_llvm_context *ctx,
4062 LLVMValueRef *values,
4063 unsigned target,
4064 LLVMValueRef *args)
4065 {
4066 /* Default is 0xf. Adjusted below depending on the format. */
4067 args[0] = LLVMConstInt(ctx->i32, target != V_008DFC_SQ_EXP_NULL ? 0xf : 0, false);
4068 /* Specify whether the EXEC mask represents the valid mask */
4069 args[1] = LLVMConstInt(ctx->i32, 0, false);
4070
4071 /* Specify whether this is the last export */
4072 args[2] = LLVMConstInt(ctx->i32, 0, false);
4073 /* Specify the target we are exporting */
4074 args[3] = LLVMConstInt(ctx->i32, target, false);
4075
4076 args[4] = LLVMConstInt(ctx->i32, 0, false); /* COMPR flag */
4077 args[5] = LLVMGetUndef(ctx->f32);
4078 args[6] = LLVMGetUndef(ctx->f32);
4079 args[7] = LLVMGetUndef(ctx->f32);
4080 args[8] = LLVMGetUndef(ctx->f32);
4081
4082 if (!values)
4083 return;
4084
4085 if (ctx->stage == MESA_SHADER_FRAGMENT && target >= V_008DFC_SQ_EXP_MRT) {
4086 LLVMValueRef val[4];
4087 unsigned index = target - V_008DFC_SQ_EXP_MRT;
4088 unsigned col_format = (ctx->options->key.fs.col_format >> (4 * index)) & 0xf;
4089 bool is_int8 = (ctx->options->key.fs.is_int8 >> index) & 1;
4090
4091 switch(col_format) {
4092 case V_028714_SPI_SHADER_ZERO:
4093 args[0] = LLVMConstInt(ctx->i32, 0x0, 0);
4094 args[3] = LLVMConstInt(ctx->i32, V_008DFC_SQ_EXP_NULL, 0);
4095 break;
4096
4097 case V_028714_SPI_SHADER_32_R:
4098 args[0] = LLVMConstInt(ctx->i32, 0x1, 0);
4099 args[5] = values[0];
4100 break;
4101
4102 case V_028714_SPI_SHADER_32_GR:
4103 args[0] = LLVMConstInt(ctx->i32, 0x3, 0);
4104 args[5] = values[0];
4105 args[6] = values[1];
4106 break;
4107
4108 case V_028714_SPI_SHADER_32_AR:
4109 args[0] = LLVMConstInt(ctx->i32, 0x9, 0);
4110 args[5] = values[0];
4111 args[8] = values[3];
4112 break;
4113
4114 case V_028714_SPI_SHADER_FP16_ABGR:
4115 args[4] = ctx->i32one;
4116
4117 for (unsigned chan = 0; chan < 2; chan++) {
4118 LLVMValueRef pack_args[2] = {
4119 values[2 * chan],
4120 values[2 * chan + 1]
4121 };
4122 LLVMValueRef packed;
4123
4124 packed = emit_llvm_intrinsic(ctx, "llvm.SI.packf16",
4125 ctx->i32, pack_args, 2,
4126 AC_FUNC_ATTR_READNONE);
4127 args[chan + 5] = packed;
4128 }
4129 break;
4130
4131 case V_028714_SPI_SHADER_UNORM16_ABGR:
4132 for (unsigned chan = 0; chan < 4; chan++) {
4133 val[chan] = emit_float_saturate(ctx, values[chan], 0, 1);
4134 val[chan] = LLVMBuildFMul(ctx->builder, val[chan],
4135 LLVMConstReal(ctx->f32, 65535), "");
4136 val[chan] = LLVMBuildFAdd(ctx->builder, val[chan],
4137 LLVMConstReal(ctx->f32, 0.5), "");
4138 val[chan] = LLVMBuildFPToUI(ctx->builder, val[chan],
4139 ctx->i32, "");
4140 }
4141
4142 args[4] = ctx->i32one;
4143 args[5] = emit_pack_int16(ctx, val[0], val[1]);
4144 args[6] = emit_pack_int16(ctx, val[2], val[3]);
4145 break;
4146
4147 case V_028714_SPI_SHADER_SNORM16_ABGR:
4148 for (unsigned chan = 0; chan < 4; chan++) {
4149 val[chan] = emit_float_saturate(ctx, values[chan], -1, 1);
4150 val[chan] = LLVMBuildFMul(ctx->builder, val[chan],
4151 LLVMConstReal(ctx->f32, 32767), "");
4152
4153 /* If positive, add 0.5, else add -0.5. */
4154 val[chan] = LLVMBuildFAdd(ctx->builder, val[chan],
4155 LLVMBuildSelect(ctx->builder,
4156 LLVMBuildFCmp(ctx->builder, LLVMRealOGE,
4157 val[chan], ctx->f32zero, ""),
4158 LLVMConstReal(ctx->f32, 0.5),
4159 LLVMConstReal(ctx->f32, -0.5), ""), "");
4160 val[chan] = LLVMBuildFPToSI(ctx->builder, val[chan], ctx->i32, "");
4161 }
4162
4163 args[4] = ctx->i32one;
4164 args[5] = emit_pack_int16(ctx, val[0], val[1]);
4165 args[6] = emit_pack_int16(ctx, val[2], val[3]);
4166 break;
4167
4168 case V_028714_SPI_SHADER_UINT16_ABGR: {
4169 LLVMValueRef max = LLVMConstInt(ctx->i32, is_int8 ? 255 : 65535, 0);
4170
4171 for (unsigned chan = 0; chan < 4; chan++) {
4172 val[chan] = to_integer(ctx, values[chan]);
4173 val[chan] = emit_minmax_int(ctx, LLVMIntULT, val[chan], max);
4174 }
4175
4176 args[4] = ctx->i32one;
4177 args[5] = emit_pack_int16(ctx, val[0], val[1]);
4178 args[6] = emit_pack_int16(ctx, val[2], val[3]);
4179 break;
4180 }
4181
4182 case V_028714_SPI_SHADER_SINT16_ABGR: {
4183 LLVMValueRef max = LLVMConstInt(ctx->i32, is_int8 ? 127 : 32767, 0);
4184 LLVMValueRef min = LLVMConstInt(ctx->i32, is_int8 ? -128 : -32768, 0);
4185
4186 /* Clamp. */
4187 for (unsigned chan = 0; chan < 4; chan++) {
4188 val[chan] = to_integer(ctx, values[chan]);
4189 val[chan] = emit_minmax_int(ctx, LLVMIntSLT, val[chan], max);
4190 val[chan] = emit_minmax_int(ctx, LLVMIntSGT, val[chan], min);
4191 }
4192
4193 args[4] = ctx->i32one;
4194 args[5] = emit_pack_int16(ctx, val[0], val[1]);
4195 args[6] = emit_pack_int16(ctx, val[2], val[3]);
4196 break;
4197 }
4198
4199 default:
4200 case V_028714_SPI_SHADER_32_ABGR:
4201 memcpy(&args[5], values, sizeof(values[0]) * 4);
4202 break;
4203 }
4204 } else
4205 memcpy(&args[5], values, sizeof(values[0]) * 4);
4206
4207 for (unsigned i = 5; i < 9; ++i)
4208 args[i] = to_float(ctx, args[i]);
4209 }
4210
4211 static void
4212 handle_vs_outputs_post(struct nir_to_llvm_context *ctx,
4213 struct nir_shader *nir)
4214 {
4215 uint32_t param_count = 0;
4216 unsigned target;
4217 unsigned pos_idx, num_pos_exports = 0;
4218 LLVMValueRef args[9];
4219 LLVMValueRef pos_args[4][9] = { { 0 } };
4220 LLVMValueRef psize_value = 0;
4221 int i;
4222 const uint64_t clip_mask = ctx->output_mask & ((1ull << VARYING_SLOT_CLIP_DIST0) |
4223 (1ull << VARYING_SLOT_CLIP_DIST1) |
4224 (1ull << VARYING_SLOT_CULL_DIST0) |
4225 (1ull << VARYING_SLOT_CULL_DIST1));
4226
4227 if (clip_mask) {
4228 LLVMValueRef slots[8];
4229 unsigned j;
4230
4231 if (ctx->shader_info->vs.cull_dist_mask)
4232 ctx->shader_info->vs.cull_dist_mask <<= ctx->num_clips;
4233
4234 i = VARYING_SLOT_CLIP_DIST0;
4235 for (j = 0; j < ctx->num_clips; j++)
4236 slots[j] = to_float(ctx, LLVMBuildLoad(ctx->builder,
4237 ctx->outputs[radeon_llvm_reg_index_soa(i, j)], ""));
4238 i = VARYING_SLOT_CULL_DIST0;
4239 for (j = 0; j < ctx->num_culls; j++)
4240 slots[ctx->num_clips + j] = to_float(ctx, LLVMBuildLoad(ctx->builder,
4241 ctx->outputs[radeon_llvm_reg_index_soa(i, j)], ""));
4242
4243 for (i = ctx->num_clips + ctx->num_culls; i < 8; i++)
4244 slots[i] = LLVMGetUndef(ctx->f32);
4245
4246 if (ctx->num_clips + ctx->num_culls > 4) {
4247 target = V_008DFC_SQ_EXP_POS + 3;
4248 si_llvm_init_export_args(ctx, &slots[4], target, args);
4249 memcpy(pos_args[target - V_008DFC_SQ_EXP_POS],
4250 args, sizeof(args));
4251 }
4252
4253 target = V_008DFC_SQ_EXP_POS + 2;
4254 si_llvm_init_export_args(ctx, &slots[0], target, args);
4255 memcpy(pos_args[target - V_008DFC_SQ_EXP_POS],
4256 args, sizeof(args));
4257
4258 }
4259
4260 for (unsigned i = 0; i < RADEON_LLVM_MAX_OUTPUTS; ++i) {
4261 LLVMValueRef values[4];
4262 if (!(ctx->output_mask & (1ull << i)))
4263 continue;
4264
4265 for (unsigned j = 0; j < 4; j++)
4266 values[j] = to_float(ctx, LLVMBuildLoad(ctx->builder,
4267 ctx->outputs[radeon_llvm_reg_index_soa(i, j)], ""));
4268
4269 if (i == VARYING_SLOT_POS) {
4270 target = V_008DFC_SQ_EXP_POS;
4271 } else if (i == VARYING_SLOT_CLIP_DIST0 ||
4272 i == VARYING_SLOT_CLIP_DIST1 ||
4273 i == VARYING_SLOT_CULL_DIST0 ||
4274 i == VARYING_SLOT_CULL_DIST1) {
4275 continue;
4276 } else if (i == VARYING_SLOT_PSIZ) {
4277 ctx->shader_info->vs.writes_pointsize = true;
4278 psize_value = values[0];
4279 continue;
4280 } else if (i >= VARYING_SLOT_VAR0) {
4281 ctx->shader_info->vs.export_mask |= 1u << (i - VARYING_SLOT_VAR0);
4282 target = V_008DFC_SQ_EXP_PARAM + param_count;
4283 param_count++;
4284 }
4285
4286 si_llvm_init_export_args(ctx, values, target, args);
4287
4288 if (target >= V_008DFC_SQ_EXP_POS &&
4289 target <= (V_008DFC_SQ_EXP_POS + 3)) {
4290 memcpy(pos_args[target - V_008DFC_SQ_EXP_POS],
4291 args, sizeof(args));
4292 } else {
4293 emit_llvm_intrinsic(ctx,
4294 "llvm.SI.export",
4295 LLVMVoidTypeInContext(ctx->context),
4296 args, 9, 0);
4297 }
4298 }
4299
4300 /* We need to add the position output manually if it's missing. */
4301 if (!pos_args[0][0]) {
4302 pos_args[0][0] = LLVMConstInt(ctx->i32, 0xf, false);
4303 pos_args[0][1] = ctx->i32zero; /* EXEC mask */
4304 pos_args[0][2] = ctx->i32zero; /* last export? */
4305 pos_args[0][3] = LLVMConstInt(ctx->i32, V_008DFC_SQ_EXP_POS, false);
4306 pos_args[0][4] = ctx->i32zero; /* COMPR flag */
4307 pos_args[0][5] = ctx->f32zero; /* X */
4308 pos_args[0][6] = ctx->f32zero; /* Y */
4309 pos_args[0][7] = ctx->f32zero; /* Z */
4310 pos_args[0][8] = ctx->f32one; /* W */
4311 }
4312
4313 if (ctx->shader_info->vs.writes_pointsize == true) {
4314 pos_args[1][0] = LLVMConstInt(ctx->i32, (ctx->shader_info->vs.writes_pointsize == true), false); /* writemask */
4315 pos_args[1][1] = ctx->i32zero; /* EXEC mask */
4316 pos_args[1][2] = ctx->i32zero; /* last export? */
4317 pos_args[1][3] = LLVMConstInt(ctx->i32, V_008DFC_SQ_EXP_POS + 1, false);
4318 pos_args[1][4] = ctx->i32zero; /* COMPR flag */
4319 pos_args[1][5] = ctx->f32zero; /* X */
4320 pos_args[1][6] = ctx->f32zero; /* Y */
4321 pos_args[1][7] = ctx->f32zero; /* Z */
4322 pos_args[1][8] = ctx->f32zero; /* W */
4323
4324 if (ctx->shader_info->vs.writes_pointsize == true)
4325 pos_args[1][5] = psize_value;
4326 }
4327 for (i = 0; i < 4; i++) {
4328 if (pos_args[i][0])
4329 num_pos_exports++;
4330 }
4331
4332 pos_idx = 0;
4333 for (i = 0; i < 4; i++) {
4334 if (!pos_args[i][0])
4335 continue;
4336
4337 /* Specify the target we are exporting */
4338 pos_args[i][3] = LLVMConstInt(ctx->i32, V_008DFC_SQ_EXP_POS + pos_idx++, false);
4339 if (pos_idx == num_pos_exports)
4340 pos_args[i][2] = ctx->i32one;
4341 emit_llvm_intrinsic(ctx,
4342 "llvm.SI.export",
4343 LLVMVoidTypeInContext(ctx->context),
4344 pos_args[i], 9, 0);
4345 }
4346
4347 ctx->shader_info->vs.pos_exports = num_pos_exports;
4348 ctx->shader_info->vs.param_exports = param_count;
4349 }
4350
4351 static void
4352 si_export_mrt_color(struct nir_to_llvm_context *ctx,
4353 LLVMValueRef *color, unsigned param, bool is_last)
4354 {
4355 LLVMValueRef args[9];
4356 /* Export */
4357 si_llvm_init_export_args(ctx, color, param,
4358 args);
4359
4360 if (is_last) {
4361 args[1] = ctx->i32one; /* whether the EXEC mask is valid */
4362 args[2] = ctx->i32one; /* DONE bit */
4363 } else if (args[0] == ctx->i32zero)
4364 return; /* unnecessary NULL export */
4365
4366 emit_llvm_intrinsic(ctx, "llvm.SI.export",
4367 ctx->voidt, args, 9, 0);
4368 }
4369
4370 static void
4371 si_export_mrt_z(struct nir_to_llvm_context *ctx,
4372 LLVMValueRef depth, LLVMValueRef stencil,
4373 LLVMValueRef samplemask)
4374 {
4375 LLVMValueRef args[9];
4376 unsigned mask = 0;
4377 args[1] = ctx->i32one; /* whether the EXEC mask is valid */
4378 args[2] = ctx->i32one; /* DONE bit */
4379 /* Specify the target we are exporting */
4380 args[3] = LLVMConstInt(ctx->i32, V_008DFC_SQ_EXP_MRTZ, false);
4381
4382 args[4] = ctx->i32zero; /* COMP flag */
4383 args[5] = LLVMGetUndef(ctx->f32); /* R, depth */
4384 args[6] = LLVMGetUndef(ctx->f32); /* G, stencil test val[0:7], stencil op val[8:15] */
4385 args[7] = LLVMGetUndef(ctx->f32); /* B, sample mask */
4386 args[8] = LLVMGetUndef(ctx->f32); /* A, alpha to mask */
4387
4388 if (depth) {
4389 args[5] = depth;
4390 mask |= 0x1;
4391 }
4392
4393 if (stencil) {
4394 args[6] = stencil;
4395 mask |= 0x2;
4396 }
4397
4398 if (samplemask) {
4399 args[7] = samplemask;
4400 mask |= 0x04;
4401 }
4402
4403 /* SI (except OLAND) has a bug that it only looks
4404 * at the X writemask component. */
4405 if (ctx->options->chip_class == SI &&
4406 ctx->options->family != CHIP_OLAND)
4407 mask |= 0x01;
4408
4409 args[0] = LLVMConstInt(ctx->i32, mask, false);
4410 emit_llvm_intrinsic(ctx, "llvm.SI.export",
4411 ctx->voidt, args, 9, 0);
4412 }
4413
4414 static void
4415 handle_fs_outputs_post(struct nir_to_llvm_context *ctx,
4416 struct nir_shader *nir)
4417 {
4418 unsigned index = 0;
4419 LLVMValueRef depth = NULL, stencil = NULL, samplemask = NULL;
4420
4421 for (unsigned i = 0; i < RADEON_LLVM_MAX_OUTPUTS; ++i) {
4422 LLVMValueRef values[4];
4423
4424 if (!(ctx->output_mask & (1ull << i)))
4425 continue;
4426
4427 if (i == FRAG_RESULT_DEPTH) {
4428 ctx->shader_info->fs.writes_z = true;
4429 depth = to_float(ctx, LLVMBuildLoad(ctx->builder,
4430 ctx->outputs[radeon_llvm_reg_index_soa(i, 0)], ""));
4431 } else if (i == FRAG_RESULT_STENCIL) {
4432 ctx->shader_info->fs.writes_stencil = true;
4433 stencil = to_float(ctx, LLVMBuildLoad(ctx->builder,
4434 ctx->outputs[radeon_llvm_reg_index_soa(i, 0)], ""));
4435 } else {
4436 bool last = false;
4437 for (unsigned j = 0; j < 4; j++)
4438 values[j] = to_float(ctx, LLVMBuildLoad(ctx->builder,
4439 ctx->outputs[radeon_llvm_reg_index_soa(i, j)], ""));
4440
4441 if (!ctx->shader_info->fs.writes_z && !ctx->shader_info->fs.writes_stencil)
4442 last = ctx->output_mask <= ((1ull << (i + 1)) - 1);
4443
4444 si_export_mrt_color(ctx, values, V_008DFC_SQ_EXP_MRT + index, last);
4445 index++;
4446 }
4447 }
4448
4449 if (depth || stencil)
4450 si_export_mrt_z(ctx, depth, stencil, samplemask);
4451 else if (!index)
4452 si_export_mrt_color(ctx, NULL, V_008DFC_SQ_EXP_NULL, true);
4453
4454 ctx->shader_info->fs.output_mask = index ? ((1ull << index) - 1) : 0;
4455 }
4456
4457 static void
4458 handle_shader_outputs_post(struct nir_to_llvm_context *ctx,
4459 struct nir_shader *nir)
4460 {
4461 switch (ctx->stage) {
4462 case MESA_SHADER_VERTEX:
4463 handle_vs_outputs_post(ctx, nir);
4464 break;
4465 case MESA_SHADER_FRAGMENT:
4466 handle_fs_outputs_post(ctx, nir);
4467 break;
4468 default:
4469 break;
4470 }
4471 }
4472
4473 static void
4474 handle_shared_compute_var(struct nir_to_llvm_context *ctx,
4475 struct nir_variable *variable, uint32_t *offset, int idx)
4476 {
4477 unsigned size = glsl_count_attribute_slots(variable->type, false);
4478 variable->data.driver_location = *offset;
4479 *offset += size;
4480 }
4481
4482 static void ac_llvm_finalize_module(struct nir_to_llvm_context * ctx)
4483 {
4484 LLVMPassManagerRef passmgr;
4485 /* Create the pass manager */
4486 passmgr = LLVMCreateFunctionPassManagerForModule(
4487 ctx->module);
4488
4489 /* This pass should eliminate all the load and store instructions */
4490 LLVMAddPromoteMemoryToRegisterPass(passmgr);
4491
4492 /* Add some optimization passes */
4493 LLVMAddScalarReplAggregatesPass(passmgr);
4494 LLVMAddLICMPass(passmgr);
4495 LLVMAddAggressiveDCEPass(passmgr);
4496 LLVMAddCFGSimplificationPass(passmgr);
4497 LLVMAddInstructionCombiningPass(passmgr);
4498
4499 /* Run the pass */
4500 LLVMInitializeFunctionPassManager(passmgr);
4501 LLVMRunFunctionPassManager(passmgr, ctx->main_function);
4502 LLVMFinalizeFunctionPassManager(passmgr);
4503
4504 LLVMDisposeBuilder(ctx->builder);
4505 LLVMDisposePassManager(passmgr);
4506 }
4507
4508 static
4509 LLVMModuleRef ac_translate_nir_to_llvm(LLVMTargetMachineRef tm,
4510 struct nir_shader *nir,
4511 struct ac_shader_variant_info *shader_info,
4512 const struct ac_nir_compiler_options *options)
4513 {
4514 struct nir_to_llvm_context ctx = {0};
4515 struct nir_function *func;
4516 ctx.options = options;
4517 ctx.shader_info = shader_info;
4518 ctx.context = LLVMContextCreate();
4519 ctx.module = LLVMModuleCreateWithNameInContext("shader", ctx.context);
4520
4521 ctx.has_ds_bpermute = ctx.options->chip_class >= VI;
4522
4523 memset(shader_info, 0, sizeof(*shader_info));
4524
4525 LLVMSetTarget(ctx.module, "amdgcn--");
4526 setup_types(&ctx);
4527
4528 ctx.builder = LLVMCreateBuilderInContext(ctx.context);
4529 ctx.stage = nir->stage;
4530
4531 create_function(&ctx, nir);
4532
4533 if (nir->stage == MESA_SHADER_COMPUTE) {
4534 int num_shared = 0;
4535 nir_foreach_variable(variable, &nir->shared)
4536 num_shared++;
4537 if (num_shared) {
4538 int idx = 0;
4539 uint32_t shared_size = 0;
4540 LLVMValueRef var;
4541 LLVMTypeRef i8p = LLVMPointerType(ctx.i8, LOCAL_ADDR_SPACE);
4542 nir_foreach_variable(variable, &nir->shared) {
4543 handle_shared_compute_var(&ctx, variable, &shared_size, idx);
4544 idx++;
4545 }
4546
4547 shared_size *= 4;
4548 var = LLVMAddGlobalInAddressSpace(ctx.module,
4549 LLVMArrayType(ctx.i8, shared_size),
4550 "compute_lds",
4551 LOCAL_ADDR_SPACE);
4552 LLVMSetAlignment(var, 4);
4553 ctx.shared_memory = LLVMBuildBitCast(ctx.builder, var, i8p, "");
4554 }
4555 }
4556
4557 nir_foreach_variable(variable, &nir->inputs)
4558 handle_shader_input_decl(&ctx, variable);
4559
4560 if (nir->stage == MESA_SHADER_FRAGMENT)
4561 handle_fs_inputs_pre(&ctx, nir);
4562
4563 nir_foreach_variable(variable, &nir->outputs)
4564 handle_shader_output_decl(&ctx, variable);
4565
4566 ctx.defs = _mesa_hash_table_create(NULL, _mesa_hash_pointer,
4567 _mesa_key_pointer_equal);
4568 ctx.phis = _mesa_hash_table_create(NULL, _mesa_hash_pointer,
4569 _mesa_key_pointer_equal);
4570
4571 func = (struct nir_function *)exec_list_get_head(&nir->functions);
4572
4573 setup_locals(&ctx, func);
4574
4575 visit_cf_list(&ctx, &func->impl->body);
4576 phi_post_pass(&ctx);
4577
4578 handle_shader_outputs_post(&ctx, nir);
4579 LLVMBuildRetVoid(ctx.builder);
4580
4581 ac_llvm_finalize_module(&ctx);
4582 free(ctx.locals);
4583 ralloc_free(ctx.defs);
4584 ralloc_free(ctx.phis);
4585
4586 return ctx.module;
4587 }
4588
4589 static void ac_diagnostic_handler(LLVMDiagnosticInfoRef di, void *context)
4590 {
4591 unsigned *retval = (unsigned *)context;
4592 LLVMDiagnosticSeverity severity = LLVMGetDiagInfoSeverity(di);
4593 char *description = LLVMGetDiagInfoDescription(di);
4594
4595 if (severity == LLVMDSError) {
4596 *retval = 1;
4597 fprintf(stderr, "LLVM triggered Diagnostic Handler: %s\n",
4598 description);
4599 }
4600
4601 LLVMDisposeMessage(description);
4602 }
4603
4604 static unsigned ac_llvm_compile(LLVMModuleRef M,
4605 struct ac_shader_binary *binary,
4606 LLVMTargetMachineRef tm)
4607 {
4608 unsigned retval = 0;
4609 char *err;
4610 LLVMContextRef llvm_ctx;
4611 LLVMMemoryBufferRef out_buffer;
4612 unsigned buffer_size;
4613 const char *buffer_data;
4614 LLVMBool mem_err;
4615
4616 /* Setup Diagnostic Handler*/
4617 llvm_ctx = LLVMGetModuleContext(M);
4618
4619 LLVMContextSetDiagnosticHandler(llvm_ctx, ac_diagnostic_handler,
4620 &retval);
4621
4622 /* Compile IR*/
4623 mem_err = LLVMTargetMachineEmitToMemoryBuffer(tm, M, LLVMObjectFile,
4624 &err, &out_buffer);
4625
4626 /* Process Errors/Warnings */
4627 if (mem_err) {
4628 fprintf(stderr, "%s: %s", __FUNCTION__, err);
4629 free(err);
4630 retval = 1;
4631 goto out;
4632 }
4633
4634 /* Extract Shader Code*/
4635 buffer_size = LLVMGetBufferSize(out_buffer);
4636 buffer_data = LLVMGetBufferStart(out_buffer);
4637
4638 ac_elf_read(buffer_data, buffer_size, binary);
4639
4640 /* Clean up */
4641 LLVMDisposeMemoryBuffer(out_buffer);
4642
4643 out:
4644 return retval;
4645 }
4646
4647 void ac_compile_nir_shader(LLVMTargetMachineRef tm,
4648 struct ac_shader_binary *binary,
4649 struct ac_shader_config *config,
4650 struct ac_shader_variant_info *shader_info,
4651 struct nir_shader *nir,
4652 const struct ac_nir_compiler_options *options,
4653 bool dump_shader)
4654 {
4655
4656 LLVMModuleRef llvm_module = ac_translate_nir_to_llvm(tm, nir, shader_info,
4657 options);
4658 if (dump_shader)
4659 LLVMDumpModule(llvm_module);
4660
4661 memset(binary, 0, sizeof(*binary));
4662 int v = ac_llvm_compile(llvm_module, binary, tm);
4663 if (v) {
4664 fprintf(stderr, "compile failed\n");
4665 }
4666
4667 if (dump_shader)
4668 fprintf(stderr, "disasm:\n%s\n", binary->disasm_string);
4669
4670 ac_shader_binary_read_config(binary, config, 0);
4671
4672 LLVMContextRef ctx = LLVMGetModuleContext(llvm_module);
4673 LLVMDisposeModule(llvm_module);
4674 LLVMContextDispose(ctx);
4675
4676 if (nir->stage == MESA_SHADER_FRAGMENT) {
4677 shader_info->num_input_vgprs = 0;
4678 if (G_0286CC_PERSP_SAMPLE_ENA(config->spi_ps_input_addr))
4679 shader_info->num_input_vgprs += 2;
4680 if (G_0286CC_PERSP_CENTER_ENA(config->spi_ps_input_addr))
4681 shader_info->num_input_vgprs += 2;
4682 if (G_0286CC_PERSP_CENTROID_ENA(config->spi_ps_input_addr))
4683 shader_info->num_input_vgprs += 2;
4684 if (G_0286CC_PERSP_PULL_MODEL_ENA(config->spi_ps_input_addr))
4685 shader_info->num_input_vgprs += 3;
4686 if (G_0286CC_LINEAR_SAMPLE_ENA(config->spi_ps_input_addr))
4687 shader_info->num_input_vgprs += 2;
4688 if (G_0286CC_LINEAR_CENTER_ENA(config->spi_ps_input_addr))
4689 shader_info->num_input_vgprs += 2;
4690 if (G_0286CC_LINEAR_CENTROID_ENA(config->spi_ps_input_addr))
4691 shader_info->num_input_vgprs += 2;
4692 if (G_0286CC_LINE_STIPPLE_TEX_ENA(config->spi_ps_input_addr))
4693 shader_info->num_input_vgprs += 1;
4694 if (G_0286CC_POS_X_FLOAT_ENA(config->spi_ps_input_addr))
4695 shader_info->num_input_vgprs += 1;
4696 if (G_0286CC_POS_Y_FLOAT_ENA(config->spi_ps_input_addr))
4697 shader_info->num_input_vgprs += 1;
4698 if (G_0286CC_POS_Z_FLOAT_ENA(config->spi_ps_input_addr))
4699 shader_info->num_input_vgprs += 1;
4700 if (G_0286CC_POS_W_FLOAT_ENA(config->spi_ps_input_addr))
4701 shader_info->num_input_vgprs += 1;
4702 if (G_0286CC_FRONT_FACE_ENA(config->spi_ps_input_addr))
4703 shader_info->num_input_vgprs += 1;
4704 if (G_0286CC_ANCILLARY_ENA(config->spi_ps_input_addr))
4705 shader_info->num_input_vgprs += 1;
4706 if (G_0286CC_SAMPLE_COVERAGE_ENA(config->spi_ps_input_addr))
4707 shader_info->num_input_vgprs += 1;
4708 if (G_0286CC_POS_FIXED_PT_ENA(config->spi_ps_input_addr))
4709 shader_info->num_input_vgprs += 1;
4710 }
4711 config->num_vgprs = MAX2(config->num_vgprs, shader_info->num_input_vgprs);
4712
4713 /* +3 for scratch wave offset and VCC */
4714 config->num_sgprs = MAX2(config->num_sgprs,
4715 shader_info->num_input_sgprs + 3);
4716 if (nir->stage == MESA_SHADER_COMPUTE) {
4717 for (int i = 0; i < 3; ++i)
4718 shader_info->cs.block_size[i] = nir->info->cs.local_size[i];
4719 }
4720
4721 if (nir->stage == MESA_SHADER_FRAGMENT)
4722 shader_info->fs.early_fragment_test = nir->info->fs.early_fragment_tests;
4723 }