ac/nir: use ac_emit_llvm_intrinsic throughout
[mesa.git] / src / amd / common / ac_nir_to_llvm.c
1 /*
2 * Copyright © 2016 Bas Nieuwenhuizen
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21 * IN THE SOFTWARE.
22 */
23
24 #include "ac_nir_to_llvm.h"
25 #include "ac_llvm_util.h"
26 #include "ac_binary.h"
27 #include "sid.h"
28 #include "nir/nir.h"
29 #include "../vulkan/radv_descriptor_set.h"
30 #include "util/bitscan.h"
31 #include <llvm-c/Transforms/Scalar.h>
32
33 enum radeon_llvm_calling_convention {
34 RADEON_LLVM_AMDGPU_VS = 87,
35 RADEON_LLVM_AMDGPU_GS = 88,
36 RADEON_LLVM_AMDGPU_PS = 89,
37 RADEON_LLVM_AMDGPU_CS = 90,
38 };
39
40 #define CONST_ADDR_SPACE 2
41 #define LOCAL_ADDR_SPACE 3
42
43 #define RADEON_LLVM_MAX_INPUTS (VARYING_SLOT_VAR31 + 1)
44 #define RADEON_LLVM_MAX_OUTPUTS (VARYING_SLOT_VAR31 + 1)
45
46 enum desc_type {
47 DESC_IMAGE,
48 DESC_FMASK,
49 DESC_SAMPLER,
50 DESC_BUFFER,
51 };
52
53 struct nir_to_llvm_context {
54 struct ac_llvm_context ac;
55 const struct ac_nir_compiler_options *options;
56 struct ac_shader_variant_info *shader_info;
57
58 LLVMContextRef context;
59 LLVMModuleRef module;
60 LLVMBuilderRef builder;
61 LLVMValueRef main_function;
62
63 struct hash_table *defs;
64 struct hash_table *phis;
65
66 LLVMValueRef descriptor_sets[AC_UD_MAX_SETS];
67 LLVMValueRef push_constants;
68 LLVMValueRef num_work_groups;
69 LLVMValueRef workgroup_ids;
70 LLVMValueRef local_invocation_ids;
71 LLVMValueRef tg_size;
72
73 LLVMValueRef vertex_buffers;
74 LLVMValueRef base_vertex;
75 LLVMValueRef start_instance;
76 LLVMValueRef vertex_id;
77 LLVMValueRef rel_auto_id;
78 LLVMValueRef vs_prim_id;
79 LLVMValueRef instance_id;
80
81 LLVMValueRef prim_mask;
82 LLVMValueRef sample_positions;
83 LLVMValueRef persp_sample, persp_center, persp_centroid;
84 LLVMValueRef linear_sample, linear_center, linear_centroid;
85 LLVMValueRef front_face;
86 LLVMValueRef ancillary;
87 LLVMValueRef frag_pos[4];
88
89 LLVMBasicBlockRef continue_block;
90 LLVMBasicBlockRef break_block;
91
92 LLVMTypeRef i1;
93 LLVMTypeRef i8;
94 LLVMTypeRef i16;
95 LLVMTypeRef i32;
96 LLVMTypeRef i64;
97 LLVMTypeRef v2i32;
98 LLVMTypeRef v3i32;
99 LLVMTypeRef v4i32;
100 LLVMTypeRef v8i32;
101 LLVMTypeRef f32;
102 LLVMTypeRef f16;
103 LLVMTypeRef v2f32;
104 LLVMTypeRef v4f32;
105 LLVMTypeRef v16i8;
106 LLVMTypeRef voidt;
107
108 LLVMValueRef i32zero;
109 LLVMValueRef i32one;
110 LLVMValueRef f32zero;
111 LLVMValueRef f32one;
112 LLVMValueRef v4f32empty;
113
114 unsigned range_md_kind;
115 unsigned uniform_md_kind;
116 unsigned fpmath_md_kind;
117 unsigned invariant_load_md_kind;
118 LLVMValueRef empty_md;
119 LLVMValueRef fpmath_md_2p5_ulp;
120 gl_shader_stage stage;
121
122 LLVMValueRef lds;
123 LLVMValueRef inputs[RADEON_LLVM_MAX_INPUTS * 4];
124 LLVMValueRef outputs[RADEON_LLVM_MAX_OUTPUTS * 4];
125
126 LLVMValueRef shared_memory;
127 uint64_t input_mask;
128 uint64_t output_mask;
129 int num_locals;
130 LLVMValueRef *locals;
131 bool has_ddxy;
132 unsigned num_clips;
133 unsigned num_culls;
134
135 bool has_ds_bpermute;
136 };
137
138 struct ac_tex_info {
139 LLVMValueRef args[12];
140 int arg_count;
141 LLVMTypeRef dst_type;
142 bool has_offset;
143 };
144
145 static LLVMValueRef get_sampler_desc(struct nir_to_llvm_context *ctx,
146 nir_deref_var *deref,
147 enum desc_type desc_type);
148 static unsigned radeon_llvm_reg_index_soa(unsigned index, unsigned chan)
149 {
150 return (index * 4) + chan;
151 }
152
153 static unsigned llvm_get_type_size(LLVMTypeRef type)
154 {
155 LLVMTypeKind kind = LLVMGetTypeKind(type);
156
157 switch (kind) {
158 case LLVMIntegerTypeKind:
159 return LLVMGetIntTypeWidth(type) / 8;
160 case LLVMFloatTypeKind:
161 return 4;
162 case LLVMPointerTypeKind:
163 return 8;
164 case LLVMVectorTypeKind:
165 return LLVMGetVectorSize(type) *
166 llvm_get_type_size(LLVMGetElementType(type));
167 default:
168 assert(0);
169 return 0;
170 }
171 }
172
173 static void set_llvm_calling_convention(LLVMValueRef func,
174 gl_shader_stage stage)
175 {
176 enum radeon_llvm_calling_convention calling_conv;
177
178 switch (stage) {
179 case MESA_SHADER_VERTEX:
180 case MESA_SHADER_TESS_CTRL:
181 case MESA_SHADER_TESS_EVAL:
182 calling_conv = RADEON_LLVM_AMDGPU_VS;
183 break;
184 case MESA_SHADER_GEOMETRY:
185 calling_conv = RADEON_LLVM_AMDGPU_GS;
186 break;
187 case MESA_SHADER_FRAGMENT:
188 calling_conv = RADEON_LLVM_AMDGPU_PS;
189 break;
190 case MESA_SHADER_COMPUTE:
191 calling_conv = RADEON_LLVM_AMDGPU_CS;
192 break;
193 default:
194 unreachable("Unhandle shader type");
195 }
196
197 LLVMSetFunctionCallConv(func, calling_conv);
198 }
199
200 static LLVMValueRef
201 create_llvm_function(LLVMContextRef ctx, LLVMModuleRef module,
202 LLVMBuilderRef builder, LLVMTypeRef *return_types,
203 unsigned num_return_elems, LLVMTypeRef *param_types,
204 unsigned param_count, unsigned array_params_mask,
205 unsigned sgpr_params, bool unsafe_math)
206 {
207 LLVMTypeRef main_function_type, ret_type;
208 LLVMBasicBlockRef main_function_body;
209
210 if (num_return_elems)
211 ret_type = LLVMStructTypeInContext(ctx, return_types,
212 num_return_elems, true);
213 else
214 ret_type = LLVMVoidTypeInContext(ctx);
215
216 /* Setup the function */
217 main_function_type =
218 LLVMFunctionType(ret_type, param_types, param_count, 0);
219 LLVMValueRef main_function =
220 LLVMAddFunction(module, "main", main_function_type);
221 main_function_body =
222 LLVMAppendBasicBlockInContext(ctx, main_function, "main_body");
223 LLVMPositionBuilderAtEnd(builder, main_function_body);
224
225 LLVMSetFunctionCallConv(main_function, RADEON_LLVM_AMDGPU_CS);
226 for (unsigned i = 0; i < sgpr_params; ++i) {
227 if (array_params_mask & (1 << i)) {
228 LLVMValueRef P = LLVMGetParam(main_function, i);
229 ac_add_function_attr(main_function, i + 1, AC_FUNC_ATTR_BYVAL);
230 ac_add_attr_dereferenceable(P, UINT64_MAX);
231 }
232 else {
233 ac_add_function_attr(main_function, i + 1, AC_FUNC_ATTR_INREG);
234 }
235 }
236
237 if (unsafe_math) {
238 /* These were copied from some LLVM test. */
239 LLVMAddTargetDependentFunctionAttr(main_function,
240 "less-precise-fpmad",
241 "true");
242 LLVMAddTargetDependentFunctionAttr(main_function,
243 "no-infs-fp-math",
244 "true");
245 LLVMAddTargetDependentFunctionAttr(main_function,
246 "no-nans-fp-math",
247 "true");
248 LLVMAddTargetDependentFunctionAttr(main_function,
249 "unsafe-fp-math",
250 "true");
251 }
252 return main_function;
253 }
254
255 static LLVMTypeRef const_array(LLVMTypeRef elem_type, int num_elements)
256 {
257 return LLVMPointerType(LLVMArrayType(elem_type, num_elements),
258 CONST_ADDR_SPACE);
259 }
260
261 static LLVMValueRef get_shared_memory_ptr(struct nir_to_llvm_context *ctx,
262 int idx,
263 LLVMTypeRef type)
264 {
265 LLVMValueRef offset;
266 LLVMValueRef ptr;
267 int addr_space;
268
269 offset = LLVMConstInt(ctx->i32, idx, false);
270
271 ptr = ctx->shared_memory;
272 ptr = LLVMBuildGEP(ctx->builder, ptr, &offset, 1, "");
273 addr_space = LLVMGetPointerAddressSpace(LLVMTypeOf(ptr));
274 ptr = LLVMBuildBitCast(ctx->builder, ptr, LLVMPointerType(type, addr_space), "");
275 return ptr;
276 }
277
278 static LLVMValueRef to_integer(struct nir_to_llvm_context *ctx, LLVMValueRef v)
279 {
280 LLVMTypeRef type = LLVMTypeOf(v);
281 if (type == ctx->f32) {
282 return LLVMBuildBitCast(ctx->builder, v, ctx->i32, "");
283 } else if (LLVMGetTypeKind(type) == LLVMVectorTypeKind) {
284 LLVMTypeRef elem_type = LLVMGetElementType(type);
285 if (elem_type == ctx->f32) {
286 LLVMTypeRef nt = LLVMVectorType(ctx->i32, LLVMGetVectorSize(type));
287 return LLVMBuildBitCast(ctx->builder, v, nt, "");
288 }
289 }
290 return v;
291 }
292
293 static LLVMValueRef to_float(struct nir_to_llvm_context *ctx, LLVMValueRef v)
294 {
295 LLVMTypeRef type = LLVMTypeOf(v);
296 if (type == ctx->i32) {
297 return LLVMBuildBitCast(ctx->builder, v, ctx->f32, "");
298 } else if (LLVMGetTypeKind(type) == LLVMVectorTypeKind) {
299 LLVMTypeRef elem_type = LLVMGetElementType(type);
300 if (elem_type == ctx->i32) {
301 LLVMTypeRef nt = LLVMVectorType(ctx->f32, LLVMGetVectorSize(type));
302 return LLVMBuildBitCast(ctx->builder, v, nt, "");
303 }
304 }
305 return v;
306 }
307
308 static LLVMValueRef unpack_param(struct nir_to_llvm_context *ctx,
309 LLVMValueRef param, unsigned rshift,
310 unsigned bitwidth)
311 {
312 LLVMValueRef value = param;
313 if (rshift)
314 value = LLVMBuildLShr(ctx->builder, value,
315 LLVMConstInt(ctx->i32, rshift, false), "");
316
317 if (rshift + bitwidth < 32) {
318 unsigned mask = (1 << bitwidth) - 1;
319 value = LLVMBuildAnd(ctx->builder, value,
320 LLVMConstInt(ctx->i32, mask, false), "");
321 }
322 return value;
323 }
324
325 static LLVMValueRef build_gep0(struct nir_to_llvm_context *ctx,
326 LLVMValueRef base_ptr, LLVMValueRef index)
327 {
328 LLVMValueRef indices[2] = {
329 ctx->i32zero,
330 index,
331 };
332 return LLVMBuildGEP(ctx->builder, base_ptr,
333 indices, 2, "");
334 }
335
336 static LLVMValueRef build_indexed_load(struct nir_to_llvm_context *ctx,
337 LLVMValueRef base_ptr, LLVMValueRef index,
338 bool uniform)
339 {
340 LLVMValueRef pointer;
341 pointer = build_gep0(ctx, base_ptr, index);
342 if (uniform)
343 LLVMSetMetadata(pointer, ctx->uniform_md_kind, ctx->empty_md);
344 return LLVMBuildLoad(ctx->builder, pointer, "");
345 }
346
347 static LLVMValueRef build_indexed_load_const(struct nir_to_llvm_context *ctx,
348 LLVMValueRef base_ptr, LLVMValueRef index)
349 {
350 LLVMValueRef result = build_indexed_load(ctx, base_ptr, index, true);
351 LLVMSetMetadata(result, ctx->invariant_load_md_kind, ctx->empty_md);
352 return result;
353 }
354
355 static void set_userdata_location(struct ac_userdata_info *ud_info, uint8_t sgpr_idx, uint8_t num_sgprs)
356 {
357 ud_info->sgpr_idx = sgpr_idx;
358 ud_info->num_sgprs = num_sgprs;
359 ud_info->indirect = false;
360 ud_info->indirect_offset = 0;
361 }
362
363 static void set_userdata_location_shader(struct nir_to_llvm_context *ctx,
364 int idx, uint8_t sgpr_idx, uint8_t num_sgprs)
365 {
366 set_userdata_location(&ctx->shader_info->user_sgprs_locs.shader_data[idx], sgpr_idx, num_sgprs);
367 }
368
369 #if 0
370 static void set_userdata_location_indirect(struct ac_userdata_info *ud_info, uint8_t sgpr_idx, uint8_t num_sgprs,
371 uint32_t indirect_offset)
372 {
373 ud_info->sgpr_idx = sgpr_idx;
374 ud_info->num_sgprs = num_sgprs;
375 ud_info->indirect = true;
376 ud_info->indirect_offset = indirect_offset;
377 }
378 #endif
379
380 static void create_function(struct nir_to_llvm_context *ctx)
381 {
382 LLVMTypeRef arg_types[23];
383 unsigned arg_idx = 0;
384 unsigned array_params_mask = 0;
385 unsigned sgpr_count = 0, user_sgpr_count;
386 unsigned i;
387 unsigned num_sets = ctx->options->layout ? ctx->options->layout->num_sets : 0;
388 unsigned user_sgpr_idx;
389 bool need_push_constants;
390
391 need_push_constants = true;
392 if (!ctx->options->layout)
393 need_push_constants = false;
394 else if (!ctx->options->layout->push_constant_size &&
395 !ctx->options->layout->dynamic_offset_count)
396 need_push_constants = false;
397
398 /* 1 for each descriptor set */
399 for (unsigned i = 0; i < num_sets; ++i) {
400 if (ctx->options->layout->set[i].layout->shader_stages & (1 << ctx->stage)) {
401 array_params_mask |= (1 << arg_idx);
402 arg_types[arg_idx++] = const_array(ctx->i8, 1024 * 1024);
403 }
404 }
405
406 if (need_push_constants) {
407 /* 1 for push constants and dynamic descriptors */
408 array_params_mask |= (1 << arg_idx);
409 arg_types[arg_idx++] = const_array(ctx->i8, 1024 * 1024);
410 }
411
412 switch (ctx->stage) {
413 case MESA_SHADER_COMPUTE:
414 arg_types[arg_idx++] = LLVMVectorType(ctx->i32, 3); /* grid size */
415 user_sgpr_count = arg_idx;
416 arg_types[arg_idx++] = LLVMVectorType(ctx->i32, 3);
417 arg_types[arg_idx++] = ctx->i32;
418 sgpr_count = arg_idx;
419
420 arg_types[arg_idx++] = LLVMVectorType(ctx->i32, 3);
421 break;
422 case MESA_SHADER_VERTEX:
423 arg_types[arg_idx++] = const_array(ctx->v16i8, 16); /* vertex buffers */
424 arg_types[arg_idx++] = ctx->i32; // base vertex
425 arg_types[arg_idx++] = ctx->i32; // start instance
426 user_sgpr_count = sgpr_count = arg_idx;
427 arg_types[arg_idx++] = ctx->i32; // vertex id
428 arg_types[arg_idx++] = ctx->i32; // rel auto id
429 arg_types[arg_idx++] = ctx->i32; // vs prim id
430 arg_types[arg_idx++] = ctx->i32; // instance id
431 break;
432 case MESA_SHADER_FRAGMENT:
433 arg_types[arg_idx++] = const_array(ctx->f32, 32); /* sample positions */
434 user_sgpr_count = arg_idx;
435 arg_types[arg_idx++] = ctx->i32; /* prim mask */
436 sgpr_count = arg_idx;
437 arg_types[arg_idx++] = ctx->v2i32; /* persp sample */
438 arg_types[arg_idx++] = ctx->v2i32; /* persp center */
439 arg_types[arg_idx++] = ctx->v2i32; /* persp centroid */
440 arg_types[arg_idx++] = ctx->v3i32; /* persp pull model */
441 arg_types[arg_idx++] = ctx->v2i32; /* linear sample */
442 arg_types[arg_idx++] = ctx->v2i32; /* linear center */
443 arg_types[arg_idx++] = ctx->v2i32; /* linear centroid */
444 arg_types[arg_idx++] = ctx->f32; /* line stipple tex */
445 arg_types[arg_idx++] = ctx->f32; /* pos x float */
446 arg_types[arg_idx++] = ctx->f32; /* pos y float */
447 arg_types[arg_idx++] = ctx->f32; /* pos z float */
448 arg_types[arg_idx++] = ctx->f32; /* pos w float */
449 arg_types[arg_idx++] = ctx->i32; /* front face */
450 arg_types[arg_idx++] = ctx->i32; /* ancillary */
451 arg_types[arg_idx++] = ctx->f32; /* sample coverage */
452 arg_types[arg_idx++] = ctx->i32; /* fixed pt */
453 break;
454 default:
455 unreachable("Shader stage not implemented");
456 }
457
458 ctx->main_function = create_llvm_function(
459 ctx->context, ctx->module, ctx->builder, NULL, 0, arg_types,
460 arg_idx, array_params_mask, sgpr_count, ctx->options->unsafe_math);
461 set_llvm_calling_convention(ctx->main_function, ctx->stage);
462
463
464 ctx->shader_info->num_input_sgprs = 0;
465 ctx->shader_info->num_input_vgprs = 0;
466
467 for (i = 0; i < user_sgpr_count; i++)
468 ctx->shader_info->num_user_sgprs += llvm_get_type_size(arg_types[i]) / 4;
469
470 ctx->shader_info->num_input_sgprs = ctx->shader_info->num_user_sgprs;
471 for (; i < sgpr_count; i++)
472 ctx->shader_info->num_input_sgprs += llvm_get_type_size(arg_types[i]) / 4;
473
474 if (ctx->stage != MESA_SHADER_FRAGMENT)
475 for (; i < arg_idx; ++i)
476 ctx->shader_info->num_input_vgprs += llvm_get_type_size(arg_types[i]) / 4;
477
478 arg_idx = 0;
479 user_sgpr_idx = 0;
480 for (unsigned i = 0; i < num_sets; ++i) {
481 if (ctx->options->layout->set[i].layout->shader_stages & (1 << ctx->stage)) {
482 set_userdata_location(&ctx->shader_info->user_sgprs_locs.descriptor_sets[i], user_sgpr_idx, 2);
483 user_sgpr_idx += 2;
484 ctx->descriptor_sets[i] =
485 LLVMGetParam(ctx->main_function, arg_idx++);
486 } else
487 ctx->descriptor_sets[i] = NULL;
488 }
489
490 if (need_push_constants) {
491 ctx->push_constants = LLVMGetParam(ctx->main_function, arg_idx++);
492 set_userdata_location_shader(ctx, AC_UD_PUSH_CONSTANTS, user_sgpr_idx, 2);
493 user_sgpr_idx += 2;
494 }
495
496 switch (ctx->stage) {
497 case MESA_SHADER_COMPUTE:
498 set_userdata_location_shader(ctx, AC_UD_CS_GRID_SIZE, user_sgpr_idx, 3);
499 user_sgpr_idx += 3;
500 ctx->num_work_groups =
501 LLVMGetParam(ctx->main_function, arg_idx++);
502 ctx->workgroup_ids =
503 LLVMGetParam(ctx->main_function, arg_idx++);
504 ctx->tg_size =
505 LLVMGetParam(ctx->main_function, arg_idx++);
506 ctx->local_invocation_ids =
507 LLVMGetParam(ctx->main_function, arg_idx++);
508 break;
509 case MESA_SHADER_VERTEX:
510 set_userdata_location_shader(ctx, AC_UD_VS_VERTEX_BUFFERS, user_sgpr_idx, 2);
511 user_sgpr_idx += 2;
512 ctx->vertex_buffers = LLVMGetParam(ctx->main_function, arg_idx++);
513 set_userdata_location_shader(ctx, AC_UD_VS_BASE_VERTEX_START_INSTANCE, user_sgpr_idx, 2);
514 user_sgpr_idx += 2;
515 ctx->base_vertex = LLVMGetParam(ctx->main_function, arg_idx++);
516 ctx->start_instance = LLVMGetParam(ctx->main_function, arg_idx++);
517 ctx->vertex_id = LLVMGetParam(ctx->main_function, arg_idx++);
518 ctx->rel_auto_id = LLVMGetParam(ctx->main_function, arg_idx++);
519 ctx->vs_prim_id = LLVMGetParam(ctx->main_function, arg_idx++);
520 ctx->instance_id = LLVMGetParam(ctx->main_function, arg_idx++);
521 break;
522 case MESA_SHADER_FRAGMENT:
523 set_userdata_location_shader(ctx, AC_UD_PS_SAMPLE_POS, user_sgpr_idx, 2);
524 user_sgpr_idx += 2;
525 ctx->sample_positions = LLVMGetParam(ctx->main_function, arg_idx++);
526 ctx->prim_mask = LLVMGetParam(ctx->main_function, arg_idx++);
527 ctx->persp_sample = LLVMGetParam(ctx->main_function, arg_idx++);
528 ctx->persp_center = LLVMGetParam(ctx->main_function, arg_idx++);
529 ctx->persp_centroid = LLVMGetParam(ctx->main_function, arg_idx++);
530 arg_idx++;
531 ctx->linear_sample = LLVMGetParam(ctx->main_function, arg_idx++);
532 ctx->linear_center = LLVMGetParam(ctx->main_function, arg_idx++);
533 ctx->linear_centroid = LLVMGetParam(ctx->main_function, arg_idx++);
534 arg_idx++; /* line stipple */
535 ctx->frag_pos[0] = LLVMGetParam(ctx->main_function, arg_idx++);
536 ctx->frag_pos[1] = LLVMGetParam(ctx->main_function, arg_idx++);
537 ctx->frag_pos[2] = LLVMGetParam(ctx->main_function, arg_idx++);
538 ctx->frag_pos[3] = LLVMGetParam(ctx->main_function, arg_idx++);
539 ctx->front_face = LLVMGetParam(ctx->main_function, arg_idx++);
540 ctx->ancillary = LLVMGetParam(ctx->main_function, arg_idx++);
541 break;
542 default:
543 unreachable("Shader stage not implemented");
544 }
545 }
546
547 static void setup_types(struct nir_to_llvm_context *ctx)
548 {
549 LLVMValueRef args[4];
550
551 ctx->voidt = LLVMVoidTypeInContext(ctx->context);
552 ctx->i1 = LLVMIntTypeInContext(ctx->context, 1);
553 ctx->i8 = LLVMIntTypeInContext(ctx->context, 8);
554 ctx->i16 = LLVMIntTypeInContext(ctx->context, 16);
555 ctx->i32 = LLVMIntTypeInContext(ctx->context, 32);
556 ctx->i64 = LLVMIntTypeInContext(ctx->context, 64);
557 ctx->v2i32 = LLVMVectorType(ctx->i32, 2);
558 ctx->v3i32 = LLVMVectorType(ctx->i32, 3);
559 ctx->v4i32 = LLVMVectorType(ctx->i32, 4);
560 ctx->v8i32 = LLVMVectorType(ctx->i32, 8);
561 ctx->f32 = LLVMFloatTypeInContext(ctx->context);
562 ctx->f16 = LLVMHalfTypeInContext(ctx->context);
563 ctx->v2f32 = LLVMVectorType(ctx->f32, 2);
564 ctx->v4f32 = LLVMVectorType(ctx->f32, 4);
565 ctx->v16i8 = LLVMVectorType(ctx->i8, 16);
566
567 ctx->i32zero = LLVMConstInt(ctx->i32, 0, false);
568 ctx->i32one = LLVMConstInt(ctx->i32, 1, false);
569 ctx->f32zero = LLVMConstReal(ctx->f32, 0.0);
570 ctx->f32one = LLVMConstReal(ctx->f32, 1.0);
571
572 args[0] = ctx->f32zero;
573 args[1] = ctx->f32zero;
574 args[2] = ctx->f32zero;
575 args[3] = ctx->f32one;
576 ctx->v4f32empty = LLVMConstVector(args, 4);
577
578 ctx->range_md_kind = LLVMGetMDKindIDInContext(ctx->context,
579 "range", 5);
580 ctx->invariant_load_md_kind = LLVMGetMDKindIDInContext(ctx->context,
581 "invariant.load", 14);
582 ctx->uniform_md_kind =
583 LLVMGetMDKindIDInContext(ctx->context, "amdgpu.uniform", 14);
584 ctx->empty_md = LLVMMDNodeInContext(ctx->context, NULL, 0);
585
586 ctx->fpmath_md_kind = LLVMGetMDKindIDInContext(ctx->context, "fpmath", 6);
587
588 args[0] = LLVMConstReal(ctx->f32, 2.5);
589 ctx->fpmath_md_2p5_ulp = LLVMMDNodeInContext(ctx->context, args, 1);
590 }
591
592 static int get_llvm_num_components(LLVMValueRef value)
593 {
594 LLVMTypeRef type = LLVMTypeOf(value);
595 unsigned num_components = LLVMGetTypeKind(type) == LLVMVectorTypeKind
596 ? LLVMGetVectorSize(type)
597 : 1;
598 return num_components;
599 }
600
601 static LLVMValueRef llvm_extract_elem(struct nir_to_llvm_context *ctx,
602 LLVMValueRef value,
603 int index)
604 {
605 int count = get_llvm_num_components(value);
606
607 assert(index < count);
608 if (count == 1)
609 return value;
610
611 return LLVMBuildExtractElement(ctx->builder, value,
612 LLVMConstInt(ctx->i32, index, false), "");
613 }
614
615 static LLVMValueRef trim_vector(struct nir_to_llvm_context *ctx,
616 LLVMValueRef value, unsigned count)
617 {
618 unsigned num_components = get_llvm_num_components(value);
619 if (count == num_components)
620 return value;
621
622 LLVMValueRef masks[] = {
623 LLVMConstInt(ctx->i32, 0, false), LLVMConstInt(ctx->i32, 1, false),
624 LLVMConstInt(ctx->i32, 2, false), LLVMConstInt(ctx->i32, 3, false)};
625
626 if (count == 1)
627 return LLVMBuildExtractElement(ctx->builder, value, masks[0],
628 "");
629
630 LLVMValueRef swizzle = LLVMConstVector(masks, count);
631 return LLVMBuildShuffleVector(ctx->builder, value, value, swizzle, "");
632 }
633
634 static LLVMValueRef
635 build_gather_values_extended(struct nir_to_llvm_context *ctx,
636 LLVMValueRef *values,
637 unsigned value_count,
638 unsigned value_stride,
639 bool load)
640 {
641 LLVMBuilderRef builder = ctx->builder;
642 LLVMValueRef vec;
643 unsigned i;
644
645
646 if (value_count == 1) {
647 if (load)
648 return LLVMBuildLoad(builder, values[0], "");
649 return values[0];
650 } else if (!value_count)
651 unreachable("value_count is 0");
652
653 for (i = 0; i < value_count; i++) {
654 LLVMValueRef value = values[i * value_stride];
655 if (load)
656 value = LLVMBuildLoad(builder, value, "");
657
658 if (!i)
659 vec = LLVMGetUndef( LLVMVectorType(LLVMTypeOf(value), value_count));
660 LLVMValueRef index = LLVMConstInt(ctx->i32, i, false);
661 vec = LLVMBuildInsertElement(builder, vec, value, index, "");
662 }
663 return vec;
664 }
665
666
667 static void
668 build_store_values_extended(struct nir_to_llvm_context *ctx,
669 LLVMValueRef *values,
670 unsigned value_count,
671 unsigned value_stride,
672 LLVMValueRef vec)
673 {
674 LLVMBuilderRef builder = ctx->builder;
675 unsigned i;
676
677 if (value_count == 1) {
678 LLVMBuildStore(builder, vec, values[0]);
679 return;
680 }
681
682 for (i = 0; i < value_count; i++) {
683 LLVMValueRef ptr = values[i * value_stride];
684 LLVMValueRef index = LLVMConstInt(ctx->i32, i, false);
685 LLVMValueRef value = LLVMBuildExtractElement(builder, vec, index, "");
686 LLVMBuildStore(builder, value, ptr);
687 }
688 }
689
690 static LLVMValueRef
691 build_gather_values(struct nir_to_llvm_context *ctx,
692 LLVMValueRef *values,
693 unsigned value_count)
694 {
695 return build_gather_values_extended(ctx, values, value_count, 1, false);
696 }
697
698 static LLVMTypeRef get_def_type(struct nir_to_llvm_context *ctx,
699 nir_ssa_def *def)
700 {
701 LLVMTypeRef type = LLVMIntTypeInContext(ctx->context, def->bit_size);
702 if (def->num_components > 1) {
703 type = LLVMVectorType(type, def->num_components);
704 }
705 return type;
706 }
707
708 static LLVMValueRef get_src(struct nir_to_llvm_context *ctx, nir_src src)
709 {
710 assert(src.is_ssa);
711 struct hash_entry *entry = _mesa_hash_table_search(ctx->defs, src.ssa);
712 return (LLVMValueRef)entry->data;
713 }
714
715
716 static LLVMBasicBlockRef get_block(struct nir_to_llvm_context *ctx,
717 struct nir_block *b)
718 {
719 struct hash_entry *entry = _mesa_hash_table_search(ctx->defs, b);
720 return (LLVMBasicBlockRef)entry->data;
721 }
722
723 static LLVMValueRef get_alu_src(struct nir_to_llvm_context *ctx,
724 nir_alu_src src,
725 unsigned num_components)
726 {
727 LLVMValueRef value = get_src(ctx, src.src);
728 bool need_swizzle = false;
729
730 assert(value);
731 LLVMTypeRef type = LLVMTypeOf(value);
732 unsigned src_components = LLVMGetTypeKind(type) == LLVMVectorTypeKind
733 ? LLVMGetVectorSize(type)
734 : 1;
735
736 for (unsigned i = 0; i < num_components; ++i) {
737 assert(src.swizzle[i] < src_components);
738 if (src.swizzle[i] != i)
739 need_swizzle = true;
740 }
741
742 if (need_swizzle || num_components != src_components) {
743 LLVMValueRef masks[] = {
744 LLVMConstInt(ctx->i32, src.swizzle[0], false),
745 LLVMConstInt(ctx->i32, src.swizzle[1], false),
746 LLVMConstInt(ctx->i32, src.swizzle[2], false),
747 LLVMConstInt(ctx->i32, src.swizzle[3], false)};
748
749 if (src_components > 1 && num_components == 1) {
750 value = LLVMBuildExtractElement(ctx->builder, value,
751 masks[0], "");
752 } else if (src_components == 1 && num_components > 1) {
753 LLVMValueRef values[] = {value, value, value, value};
754 value = build_gather_values(ctx, values, num_components);
755 } else {
756 LLVMValueRef swizzle = LLVMConstVector(masks, num_components);
757 value = LLVMBuildShuffleVector(ctx->builder, value, value,
758 swizzle, "");
759 }
760 }
761 assert(!src.negate);
762 assert(!src.abs);
763 return value;
764 }
765
766 static LLVMValueRef emit_int_cmp(struct nir_to_llvm_context *ctx,
767 LLVMIntPredicate pred, LLVMValueRef src0,
768 LLVMValueRef src1)
769 {
770 LLVMValueRef result = LLVMBuildICmp(ctx->builder, pred, src0, src1, "");
771 return LLVMBuildSelect(ctx->builder, result,
772 LLVMConstInt(ctx->i32, 0xFFFFFFFF, false),
773 LLVMConstInt(ctx->i32, 0, false), "");
774 }
775
776 static LLVMValueRef emit_float_cmp(struct nir_to_llvm_context *ctx,
777 LLVMRealPredicate pred, LLVMValueRef src0,
778 LLVMValueRef src1)
779 {
780 LLVMValueRef result;
781 src0 = to_float(ctx, src0);
782 src1 = to_float(ctx, src1);
783 result = LLVMBuildFCmp(ctx->builder, pred, src0, src1, "");
784 return LLVMBuildSelect(ctx->builder, result,
785 LLVMConstInt(ctx->i32, 0xFFFFFFFF, false),
786 LLVMConstInt(ctx->i32, 0, false), "");
787 }
788
789 static LLVMValueRef emit_intrin_1f_param(struct nir_to_llvm_context *ctx,
790 const char *intrin,
791 LLVMValueRef src0)
792 {
793 LLVMValueRef params[] = {
794 to_float(ctx, src0),
795 };
796 return ac_emit_llvm_intrinsic(&ctx->ac, intrin, ctx->f32, params, 1, AC_FUNC_ATTR_READNONE);
797 }
798
799 static LLVMValueRef emit_intrin_2f_param(struct nir_to_llvm_context *ctx,
800 const char *intrin,
801 LLVMValueRef src0, LLVMValueRef src1)
802 {
803 LLVMValueRef params[] = {
804 to_float(ctx, src0),
805 to_float(ctx, src1),
806 };
807 return ac_emit_llvm_intrinsic(&ctx->ac, intrin, ctx->f32, params, 2, AC_FUNC_ATTR_READNONE);
808 }
809
810 static LLVMValueRef emit_intrin_3f_param(struct nir_to_llvm_context *ctx,
811 const char *intrin,
812 LLVMValueRef src0, LLVMValueRef src1, LLVMValueRef src2)
813 {
814 LLVMValueRef params[] = {
815 to_float(ctx, src0),
816 to_float(ctx, src1),
817 to_float(ctx, src2),
818 };
819 return ac_emit_llvm_intrinsic(&ctx->ac, intrin, ctx->f32, params, 3, AC_FUNC_ATTR_READNONE);
820 }
821
822 static LLVMValueRef emit_bcsel(struct nir_to_llvm_context *ctx,
823 LLVMValueRef src0, LLVMValueRef src1, LLVMValueRef src2)
824 {
825 LLVMValueRef v = LLVMBuildICmp(ctx->builder, LLVMIntNE, src0,
826 ctx->i32zero, "");
827 return LLVMBuildSelect(ctx->builder, v, src1, src2, "");
828 }
829
830 static LLVMValueRef emit_find_lsb(struct nir_to_llvm_context *ctx,
831 LLVMValueRef src0)
832 {
833 LLVMValueRef params[2] = {
834 src0,
835
836 /* The value of 1 means that ffs(x=0) = undef, so LLVM won't
837 * add special code to check for x=0. The reason is that
838 * the LLVM behavior for x=0 is different from what we
839 * need here.
840 *
841 * The hardware already implements the correct behavior.
842 */
843 LLVMConstInt(ctx->i32, 1, false),
844 };
845 return ac_emit_llvm_intrinsic(&ctx->ac, "llvm.cttz.i32", ctx->i32, params, 2, AC_FUNC_ATTR_READNONE);
846 }
847
848 static LLVMValueRef emit_ifind_msb(struct nir_to_llvm_context *ctx,
849 LLVMValueRef src0)
850 {
851 LLVMValueRef msb = ac_emit_llvm_intrinsic(&ctx->ac, "llvm.AMDGPU.flbit.i32",
852 ctx->i32, &src0, 1,
853 AC_FUNC_ATTR_READNONE);
854
855 /* The HW returns the last bit index from MSB, but NIR wants
856 * the index from LSB. Invert it by doing "31 - msb". */
857 msb = LLVMBuildSub(ctx->builder, LLVMConstInt(ctx->i32, 31, false),
858 msb, "");
859
860 LLVMValueRef all_ones = LLVMConstInt(ctx->i32, -1, true);
861 LLVMValueRef cond = LLVMBuildOr(ctx->builder,
862 LLVMBuildICmp(ctx->builder, LLVMIntEQ,
863 src0, ctx->i32zero, ""),
864 LLVMBuildICmp(ctx->builder, LLVMIntEQ,
865 src0, all_ones, ""), "");
866
867 return LLVMBuildSelect(ctx->builder, cond, all_ones, msb, "");
868 }
869
870 static LLVMValueRef emit_ufind_msb(struct nir_to_llvm_context *ctx,
871 LLVMValueRef src0)
872 {
873 LLVMValueRef args[2] = {
874 src0,
875 ctx->i32one,
876 };
877 LLVMValueRef msb = ac_emit_llvm_intrinsic(&ctx->ac, "llvm.ctlz.i32",
878 ctx->i32, args, ARRAY_SIZE(args),
879 AC_FUNC_ATTR_READNONE);
880
881 /* The HW returns the last bit index from MSB, but NIR wants
882 * the index from LSB. Invert it by doing "31 - msb". */
883 msb = LLVMBuildSub(ctx->builder, LLVMConstInt(ctx->i32, 31, false),
884 msb, "");
885
886 return LLVMBuildSelect(ctx->builder,
887 LLVMBuildICmp(ctx->builder, LLVMIntEQ, src0,
888 ctx->i32zero, ""),
889 LLVMConstInt(ctx->i32, -1, true), msb, "");
890 }
891
892 static LLVMValueRef emit_minmax_int(struct nir_to_llvm_context *ctx,
893 LLVMIntPredicate pred,
894 LLVMValueRef src0, LLVMValueRef src1)
895 {
896 return LLVMBuildSelect(ctx->builder,
897 LLVMBuildICmp(ctx->builder, pred, src0, src1, ""),
898 src0,
899 src1, "");
900
901 }
902 static LLVMValueRef emit_iabs(struct nir_to_llvm_context *ctx,
903 LLVMValueRef src0)
904 {
905 return emit_minmax_int(ctx, LLVMIntSGT, src0,
906 LLVMBuildNeg(ctx->builder, src0, ""));
907 }
908
909 static LLVMValueRef emit_fsign(struct nir_to_llvm_context *ctx,
910 LLVMValueRef src0)
911 {
912 LLVMValueRef cmp, val;
913
914 cmp = LLVMBuildFCmp(ctx->builder, LLVMRealOGT, src0, ctx->f32zero, "");
915 val = LLVMBuildSelect(ctx->builder, cmp, ctx->f32one, src0, "");
916 cmp = LLVMBuildFCmp(ctx->builder, LLVMRealOGE, val, ctx->f32zero, "");
917 val = LLVMBuildSelect(ctx->builder, cmp, val, LLVMConstReal(ctx->f32, -1.0), "");
918 return val;
919 }
920
921 static LLVMValueRef emit_isign(struct nir_to_llvm_context *ctx,
922 LLVMValueRef src0)
923 {
924 LLVMValueRef cmp, val;
925
926 cmp = LLVMBuildICmp(ctx->builder, LLVMIntSGT, src0, ctx->i32zero, "");
927 val = LLVMBuildSelect(ctx->builder, cmp, ctx->i32one, src0, "");
928 cmp = LLVMBuildICmp(ctx->builder, LLVMIntSGE, val, ctx->i32zero, "");
929 val = LLVMBuildSelect(ctx->builder, cmp, val, LLVMConstInt(ctx->i32, -1, true), "");
930 return val;
931 }
932
933 static LLVMValueRef emit_ffract(struct nir_to_llvm_context *ctx,
934 LLVMValueRef src0)
935 {
936 const char *intr = "llvm.floor.f32";
937 LLVMValueRef fsrc0 = to_float(ctx, src0);
938 LLVMValueRef params[] = {
939 fsrc0,
940 };
941 LLVMValueRef floor = ac_emit_llvm_intrinsic(&ctx->ac, intr,
942 ctx->f32, params, 1,
943 AC_FUNC_ATTR_READNONE);
944 return LLVMBuildFSub(ctx->builder, fsrc0, floor, "");
945 }
946
947 static LLVMValueRef emit_uint_carry(struct nir_to_llvm_context *ctx,
948 const char *intrin,
949 LLVMValueRef src0, LLVMValueRef src1)
950 {
951 LLVMTypeRef ret_type;
952 LLVMTypeRef types[] = { ctx->i32, ctx->i1 };
953 LLVMValueRef res;
954 LLVMValueRef params[] = { src0, src1 };
955 ret_type = LLVMStructTypeInContext(ctx->context, types,
956 2, true);
957
958 res = ac_emit_llvm_intrinsic(&ctx->ac, intrin, ret_type,
959 params, 2, AC_FUNC_ATTR_READNONE);
960
961 res = LLVMBuildExtractValue(ctx->builder, res, 1, "");
962 res = LLVMBuildZExt(ctx->builder, res, ctx->i32, "");
963 return res;
964 }
965
966 static LLVMValueRef emit_b2f(struct nir_to_llvm_context *ctx,
967 LLVMValueRef src0)
968 {
969 return LLVMBuildAnd(ctx->builder, src0, LLVMBuildBitCast(ctx->builder, LLVMConstReal(ctx->f32, 1.0), ctx->i32, ""), "");
970 }
971
972 static LLVMValueRef emit_umul_high(struct nir_to_llvm_context *ctx,
973 LLVMValueRef src0, LLVMValueRef src1)
974 {
975 LLVMValueRef dst64, result;
976 src0 = LLVMBuildZExt(ctx->builder, src0, ctx->i64, "");
977 src1 = LLVMBuildZExt(ctx->builder, src1, ctx->i64, "");
978
979 dst64 = LLVMBuildMul(ctx->builder, src0, src1, "");
980 dst64 = LLVMBuildLShr(ctx->builder, dst64, LLVMConstInt(ctx->i64, 32, false), "");
981 result = LLVMBuildTrunc(ctx->builder, dst64, ctx->i32, "");
982 return result;
983 }
984
985 static LLVMValueRef emit_imul_high(struct nir_to_llvm_context *ctx,
986 LLVMValueRef src0, LLVMValueRef src1)
987 {
988 LLVMValueRef dst64, result;
989 src0 = LLVMBuildSExt(ctx->builder, src0, ctx->i64, "");
990 src1 = LLVMBuildSExt(ctx->builder, src1, ctx->i64, "");
991
992 dst64 = LLVMBuildMul(ctx->builder, src0, src1, "");
993 dst64 = LLVMBuildAShr(ctx->builder, dst64, LLVMConstInt(ctx->i64, 32, false), "");
994 result = LLVMBuildTrunc(ctx->builder, dst64, ctx->i32, "");
995 return result;
996 }
997
998 static LLVMValueRef emit_bitfield_extract(struct nir_to_llvm_context *ctx,
999 const char *intrin,
1000 LLVMValueRef srcs[3])
1001 {
1002 LLVMValueRef result;
1003 LLVMValueRef icond = LLVMBuildICmp(ctx->builder, LLVMIntEQ, srcs[2], LLVMConstInt(ctx->i32, 32, false), "");
1004 result = ac_emit_llvm_intrinsic(&ctx->ac, intrin, ctx->i32, srcs, 3, AC_FUNC_ATTR_READNONE);
1005
1006 result = LLVMBuildSelect(ctx->builder, icond, srcs[0], result, "");
1007 return result;
1008 }
1009
1010 static LLVMValueRef emit_bitfield_insert(struct nir_to_llvm_context *ctx,
1011 LLVMValueRef src0, LLVMValueRef src1,
1012 LLVMValueRef src2, LLVMValueRef src3)
1013 {
1014 LLVMValueRef bfi_args[3], result;
1015
1016 bfi_args[0] = LLVMBuildShl(ctx->builder,
1017 LLVMBuildSub(ctx->builder,
1018 LLVMBuildShl(ctx->builder,
1019 ctx->i32one,
1020 src3, ""),
1021 ctx->i32one, ""),
1022 src2, "");
1023 bfi_args[1] = LLVMBuildShl(ctx->builder, src1, src2, "");
1024 bfi_args[2] = src0;
1025
1026 LLVMValueRef icond = LLVMBuildICmp(ctx->builder, LLVMIntEQ, src3, LLVMConstInt(ctx->i32, 32, false), "");
1027
1028 /* Calculate:
1029 * (arg0 & arg1) | (~arg0 & arg2) = arg2 ^ (arg0 & (arg1 ^ arg2)
1030 * Use the right-hand side, which the LLVM backend can convert to V_BFI.
1031 */
1032 result = LLVMBuildXor(ctx->builder, bfi_args[2],
1033 LLVMBuildAnd(ctx->builder, bfi_args[0],
1034 LLVMBuildXor(ctx->builder, bfi_args[1], bfi_args[2], ""), ""), "");
1035
1036 result = LLVMBuildSelect(ctx->builder, icond, src1, result, "");
1037 return result;
1038 }
1039
1040 static LLVMValueRef emit_pack_half_2x16(struct nir_to_llvm_context *ctx,
1041 LLVMValueRef src0)
1042 {
1043 LLVMValueRef const16 = LLVMConstInt(ctx->i32, 16, false);
1044 int i;
1045 LLVMValueRef comp[2];
1046
1047 src0 = to_float(ctx, src0);
1048 comp[0] = LLVMBuildExtractElement(ctx->builder, src0, ctx->i32zero, "");
1049 comp[1] = LLVMBuildExtractElement(ctx->builder, src0, ctx->i32one, "");
1050 for (i = 0; i < 2; i++) {
1051 comp[i] = LLVMBuildFPTrunc(ctx->builder, comp[i], ctx->f16, "");
1052 comp[i] = LLVMBuildBitCast(ctx->builder, comp[i], ctx->i16, "");
1053 comp[i] = LLVMBuildZExt(ctx->builder, comp[i], ctx->i32, "");
1054 }
1055
1056 comp[1] = LLVMBuildShl(ctx->builder, comp[1], const16, "");
1057 comp[0] = LLVMBuildOr(ctx->builder, comp[0], comp[1], "");
1058
1059 return comp[0];
1060 }
1061
1062 static LLVMValueRef emit_unpack_half_2x16(struct nir_to_llvm_context *ctx,
1063 LLVMValueRef src0)
1064 {
1065 LLVMValueRef const16 = LLVMConstInt(ctx->i32, 16, false);
1066 LLVMValueRef temps[2], result, val;
1067 int i;
1068
1069 for (i = 0; i < 2; i++) {
1070 val = i == 1 ? LLVMBuildLShr(ctx->builder, src0, const16, "") : src0;
1071 val = LLVMBuildTrunc(ctx->builder, val, ctx->i16, "");
1072 val = LLVMBuildBitCast(ctx->builder, val, ctx->f16, "");
1073 temps[i] = LLVMBuildFPExt(ctx->builder, val, ctx->f32, "");
1074 }
1075
1076 result = LLVMBuildInsertElement(ctx->builder, LLVMGetUndef(ctx->v2f32), temps[0],
1077 ctx->i32zero, "");
1078 result = LLVMBuildInsertElement(ctx->builder, result, temps[1],
1079 ctx->i32one, "");
1080 return result;
1081 }
1082
1083 /**
1084 * Set range metadata on an instruction. This can only be used on load and
1085 * call instructions. If you know an instruction can only produce the values
1086 * 0, 1, 2, you would do set_range_metadata(value, 0, 3);
1087 * \p lo is the minimum value inclusive.
1088 * \p hi is the maximum value exclusive.
1089 */
1090 static void set_range_metadata(struct nir_to_llvm_context *ctx,
1091 LLVMValueRef value, unsigned lo, unsigned hi)
1092 {
1093 LLVMValueRef range_md, md_args[2];
1094 LLVMTypeRef type = LLVMTypeOf(value);
1095 LLVMContextRef context = LLVMGetTypeContext(type);
1096
1097 md_args[0] = LLVMConstInt(type, lo, false);
1098 md_args[1] = LLVMConstInt(type, hi, false);
1099 range_md = LLVMMDNodeInContext(context, md_args, 2);
1100 LLVMSetMetadata(value, ctx->range_md_kind, range_md);
1101 }
1102
1103 static LLVMValueRef get_thread_id(struct nir_to_llvm_context *ctx)
1104 {
1105 LLVMValueRef tid;
1106 LLVMValueRef tid_args[2];
1107 tid_args[0] = LLVMConstInt(ctx->i32, 0xffffffff, false);
1108 tid_args[1] = ctx->i32zero;
1109 tid_args[1] = ac_emit_llvm_intrinsic(&ctx->ac,
1110 "llvm.amdgcn.mbcnt.lo", ctx->i32,
1111 tid_args, 2, AC_FUNC_ATTR_READNONE);
1112
1113 tid = ac_emit_llvm_intrinsic(&ctx->ac,
1114 "llvm.amdgcn.mbcnt.hi", ctx->i32,
1115 tid_args, 2, AC_FUNC_ATTR_READNONE);
1116 set_range_metadata(ctx, tid, 0, 64);
1117 return tid;
1118 }
1119
1120 /*
1121 * SI implements derivatives using the local data store (LDS)
1122 * All writes to the LDS happen in all executing threads at
1123 * the same time. TID is the Thread ID for the current
1124 * thread and is a value between 0 and 63, representing
1125 * the thread's position in the wavefront.
1126 *
1127 * For the pixel shader threads are grouped into quads of four pixels.
1128 * The TIDs of the pixels of a quad are:
1129 *
1130 * +------+------+
1131 * |4n + 0|4n + 1|
1132 * +------+------+
1133 * |4n + 2|4n + 3|
1134 * +------+------+
1135 *
1136 * So, masking the TID with 0xfffffffc yields the TID of the top left pixel
1137 * of the quad, masking with 0xfffffffd yields the TID of the top pixel of
1138 * the current pixel's column, and masking with 0xfffffffe yields the TID
1139 * of the left pixel of the current pixel's row.
1140 *
1141 * Adding 1 yields the TID of the pixel to the right of the left pixel, and
1142 * adding 2 yields the TID of the pixel below the top pixel.
1143 */
1144 /* masks for thread ID. */
1145 #define TID_MASK_TOP_LEFT 0xfffffffc
1146 #define TID_MASK_TOP 0xfffffffd
1147 #define TID_MASK_LEFT 0xfffffffe
1148 static LLVMValueRef emit_ddxy(struct nir_to_llvm_context *ctx,
1149 nir_op op,
1150 LLVMValueRef src0)
1151 {
1152 LLVMValueRef tl, trbl, result;
1153 LLVMValueRef tl_tid, trbl_tid;
1154 LLVMValueRef args[2];
1155 LLVMValueRef thread_id;
1156 unsigned mask;
1157 int idx;
1158 ctx->has_ddxy = true;
1159
1160 if (!ctx->lds && !ctx->has_ds_bpermute)
1161 ctx->lds = LLVMAddGlobalInAddressSpace(ctx->module,
1162 LLVMArrayType(ctx->i32, 64),
1163 "ddxy_lds", LOCAL_ADDR_SPACE);
1164
1165 thread_id = get_thread_id(ctx);
1166 if (op == nir_op_fddx_fine || op == nir_op_fddx)
1167 mask = TID_MASK_LEFT;
1168 else if (op == nir_op_fddy_fine || op == nir_op_fddy)
1169 mask = TID_MASK_TOP;
1170 else
1171 mask = TID_MASK_TOP_LEFT;
1172
1173 tl_tid = LLVMBuildAnd(ctx->builder, thread_id,
1174 LLVMConstInt(ctx->i32, mask, false), "");
1175 /* for DDX we want to next X pixel, DDY next Y pixel. */
1176 if (op == nir_op_fddx_fine ||
1177 op == nir_op_fddx_coarse ||
1178 op == nir_op_fddx)
1179 idx = 1;
1180 else
1181 idx = 2;
1182
1183 trbl_tid = LLVMBuildAdd(ctx->builder, tl_tid,
1184 LLVMConstInt(ctx->i32, idx, false), "");
1185
1186 if (ctx->has_ds_bpermute) {
1187 args[0] = LLVMBuildMul(ctx->builder, tl_tid,
1188 LLVMConstInt(ctx->i32, 4, false), "");
1189 args[1] = src0;
1190 tl = ac_emit_llvm_intrinsic(&ctx->ac, "llvm.amdgcn.ds.bpermute",
1191 ctx->i32, args, 2,
1192 AC_FUNC_ATTR_READNONE);
1193
1194 args[0] = LLVMBuildMul(ctx->builder, trbl_tid,
1195 LLVMConstInt(ctx->i32, 4, false), "");
1196 trbl = ac_emit_llvm_intrinsic(&ctx->ac, "llvm.amdgcn.ds.bpermute",
1197 ctx->i32, args, 2,
1198 AC_FUNC_ATTR_READNONE);
1199 } else {
1200 LLVMValueRef store_ptr, load_ptr0, load_ptr1;
1201
1202 store_ptr = build_gep0(ctx, ctx->lds, thread_id);
1203 load_ptr0 = build_gep0(ctx, ctx->lds, tl_tid);
1204 load_ptr1 = build_gep0(ctx, ctx->lds, trbl_tid);
1205
1206 LLVMBuildStore(ctx->builder, src0, store_ptr);
1207 tl = LLVMBuildLoad(ctx->builder, load_ptr0, "");
1208 trbl = LLVMBuildLoad(ctx->builder, load_ptr1, "");
1209 }
1210 tl = LLVMBuildBitCast(ctx->builder, tl, ctx->f32, "");
1211 trbl = LLVMBuildBitCast(ctx->builder, trbl, ctx->f32, "");
1212 result = LLVMBuildFSub(ctx->builder, trbl, tl, "");
1213 return result;
1214 }
1215
1216 /*
1217 * this takes an I,J coordinate pair,
1218 * and works out the X and Y derivatives.
1219 * it returns DDX(I), DDX(J), DDY(I), DDY(J).
1220 */
1221 static LLVMValueRef emit_ddxy_interp(
1222 struct nir_to_llvm_context *ctx,
1223 LLVMValueRef interp_ij)
1224 {
1225 LLVMValueRef result[4], a;
1226 unsigned i;
1227
1228 for (i = 0; i < 2; i++) {
1229 a = LLVMBuildExtractElement(ctx->builder, interp_ij,
1230 LLVMConstInt(ctx->i32, i, false), "");
1231 result[i] = emit_ddxy(ctx, nir_op_fddx, a);
1232 result[2+i] = emit_ddxy(ctx, nir_op_fddy, a);
1233 }
1234 return build_gather_values(ctx, result, 4);
1235 }
1236
1237 static LLVMValueRef emit_fdiv(struct nir_to_llvm_context *ctx,
1238 LLVMValueRef num,
1239 LLVMValueRef den)
1240 {
1241 LLVMValueRef ret = LLVMBuildFDiv(ctx->builder, num, den, "");
1242
1243 if (!LLVMIsConstant(ret))
1244 LLVMSetMetadata(ret, ctx->fpmath_md_kind, ctx->fpmath_md_2p5_ulp);
1245 return ret;
1246 }
1247
1248 static void visit_alu(struct nir_to_llvm_context *ctx, nir_alu_instr *instr)
1249 {
1250 LLVMValueRef src[4], result = NULL;
1251 unsigned num_components = instr->dest.dest.ssa.num_components;
1252 unsigned src_components;
1253
1254 assert(nir_op_infos[instr->op].num_inputs <= ARRAY_SIZE(src));
1255 switch (instr->op) {
1256 case nir_op_vec2:
1257 case nir_op_vec3:
1258 case nir_op_vec4:
1259 src_components = 1;
1260 break;
1261 case nir_op_pack_half_2x16:
1262 src_components = 2;
1263 break;
1264 case nir_op_unpack_half_2x16:
1265 src_components = 1;
1266 break;
1267 default:
1268 src_components = num_components;
1269 break;
1270 }
1271 for (unsigned i = 0; i < nir_op_infos[instr->op].num_inputs; i++)
1272 src[i] = get_alu_src(ctx, instr->src[i], src_components);
1273
1274 switch (instr->op) {
1275 case nir_op_fmov:
1276 case nir_op_imov:
1277 result = src[0];
1278 break;
1279 case nir_op_fneg:
1280 src[0] = to_float(ctx, src[0]);
1281 result = LLVMBuildFNeg(ctx->builder, src[0], "");
1282 break;
1283 case nir_op_ineg:
1284 result = LLVMBuildNeg(ctx->builder, src[0], "");
1285 break;
1286 case nir_op_inot:
1287 result = LLVMBuildNot(ctx->builder, src[0], "");
1288 break;
1289 case nir_op_iadd:
1290 result = LLVMBuildAdd(ctx->builder, src[0], src[1], "");
1291 break;
1292 case nir_op_fadd:
1293 src[0] = to_float(ctx, src[0]);
1294 src[1] = to_float(ctx, src[1]);
1295 result = LLVMBuildFAdd(ctx->builder, src[0], src[1], "");
1296 break;
1297 case nir_op_fsub:
1298 src[0] = to_float(ctx, src[0]);
1299 src[1] = to_float(ctx, src[1]);
1300 result = LLVMBuildFSub(ctx->builder, src[0], src[1], "");
1301 break;
1302 case nir_op_isub:
1303 result = LLVMBuildSub(ctx->builder, src[0], src[1], "");
1304 break;
1305 case nir_op_imul:
1306 result = LLVMBuildMul(ctx->builder, src[0], src[1], "");
1307 break;
1308 case nir_op_imod:
1309 result = LLVMBuildSRem(ctx->builder, src[0], src[1], "");
1310 break;
1311 case nir_op_umod:
1312 result = LLVMBuildURem(ctx->builder, src[0], src[1], "");
1313 break;
1314 case nir_op_fmod:
1315 src[0] = to_float(ctx, src[0]);
1316 src[1] = to_float(ctx, src[1]);
1317 result = emit_fdiv(ctx, src[0], src[1]);
1318 result = emit_intrin_1f_param(ctx, "llvm.floor.f32", result);
1319 result = LLVMBuildFMul(ctx->builder, src[1] , result, "");
1320 result = LLVMBuildFSub(ctx->builder, src[0], result, "");
1321 break;
1322 case nir_op_frem:
1323 src[0] = to_float(ctx, src[0]);
1324 src[1] = to_float(ctx, src[1]);
1325 result = LLVMBuildFRem(ctx->builder, src[0], src[1], "");
1326 break;
1327 case nir_op_idiv:
1328 result = LLVMBuildSDiv(ctx->builder, src[0], src[1], "");
1329 break;
1330 case nir_op_udiv:
1331 result = LLVMBuildUDiv(ctx->builder, src[0], src[1], "");
1332 break;
1333 case nir_op_fmul:
1334 src[0] = to_float(ctx, src[0]);
1335 src[1] = to_float(ctx, src[1]);
1336 result = LLVMBuildFMul(ctx->builder, src[0], src[1], "");
1337 break;
1338 case nir_op_fdiv:
1339 src[0] = to_float(ctx, src[0]);
1340 src[1] = to_float(ctx, src[1]);
1341 result = emit_fdiv(ctx, src[0], src[1]);
1342 break;
1343 case nir_op_frcp:
1344 src[0] = to_float(ctx, src[0]);
1345 result = emit_fdiv(ctx, ctx->f32one, src[0]);
1346 break;
1347 case nir_op_iand:
1348 result = LLVMBuildAnd(ctx->builder, src[0], src[1], "");
1349 break;
1350 case nir_op_ior:
1351 result = LLVMBuildOr(ctx->builder, src[0], src[1], "");
1352 break;
1353 case nir_op_ixor:
1354 result = LLVMBuildXor(ctx->builder, src[0], src[1], "");
1355 break;
1356 case nir_op_ishl:
1357 result = LLVMBuildShl(ctx->builder, src[0], src[1], "");
1358 break;
1359 case nir_op_ishr:
1360 result = LLVMBuildAShr(ctx->builder, src[0], src[1], "");
1361 break;
1362 case nir_op_ushr:
1363 result = LLVMBuildLShr(ctx->builder, src[0], src[1], "");
1364 break;
1365 case nir_op_ilt:
1366 result = emit_int_cmp(ctx, LLVMIntSLT, src[0], src[1]);
1367 break;
1368 case nir_op_ine:
1369 result = emit_int_cmp(ctx, LLVMIntNE, src[0], src[1]);
1370 break;
1371 case nir_op_ieq:
1372 result = emit_int_cmp(ctx, LLVMIntEQ, src[0], src[1]);
1373 break;
1374 case nir_op_ige:
1375 result = emit_int_cmp(ctx, LLVMIntSGE, src[0], src[1]);
1376 break;
1377 case nir_op_ult:
1378 result = emit_int_cmp(ctx, LLVMIntULT, src[0], src[1]);
1379 break;
1380 case nir_op_uge:
1381 result = emit_int_cmp(ctx, LLVMIntUGE, src[0], src[1]);
1382 break;
1383 case nir_op_feq:
1384 result = emit_float_cmp(ctx, LLVMRealUEQ, src[0], src[1]);
1385 break;
1386 case nir_op_fne:
1387 result = emit_float_cmp(ctx, LLVMRealUNE, src[0], src[1]);
1388 break;
1389 case nir_op_flt:
1390 result = emit_float_cmp(ctx, LLVMRealULT, src[0], src[1]);
1391 break;
1392 case nir_op_fge:
1393 result = emit_float_cmp(ctx, LLVMRealUGE, src[0], src[1]);
1394 break;
1395 case nir_op_fabs:
1396 result = emit_intrin_1f_param(ctx, "llvm.fabs.f32", src[0]);
1397 break;
1398 case nir_op_iabs:
1399 result = emit_iabs(ctx, src[0]);
1400 break;
1401 case nir_op_imax:
1402 result = emit_minmax_int(ctx, LLVMIntSGT, src[0], src[1]);
1403 break;
1404 case nir_op_imin:
1405 result = emit_minmax_int(ctx, LLVMIntSLT, src[0], src[1]);
1406 break;
1407 case nir_op_umax:
1408 result = emit_minmax_int(ctx, LLVMIntUGT, src[0], src[1]);
1409 break;
1410 case nir_op_umin:
1411 result = emit_minmax_int(ctx, LLVMIntULT, src[0], src[1]);
1412 break;
1413 case nir_op_isign:
1414 result = emit_isign(ctx, src[0]);
1415 break;
1416 case nir_op_fsign:
1417 src[0] = to_float(ctx, src[0]);
1418 result = emit_fsign(ctx, src[0]);
1419 break;
1420 case nir_op_ffloor:
1421 result = emit_intrin_1f_param(ctx, "llvm.floor.f32", src[0]);
1422 break;
1423 case nir_op_ftrunc:
1424 result = emit_intrin_1f_param(ctx, "llvm.trunc.f32", src[0]);
1425 break;
1426 case nir_op_fceil:
1427 result = emit_intrin_1f_param(ctx, "llvm.ceil.f32", src[0]);
1428 break;
1429 case nir_op_fround_even:
1430 result = emit_intrin_1f_param(ctx, "llvm.rint.f32", src[0]);
1431 break;
1432 case nir_op_ffract:
1433 result = emit_ffract(ctx, src[0]);
1434 break;
1435 case nir_op_fsin:
1436 result = emit_intrin_1f_param(ctx, "llvm.sin.f32", src[0]);
1437 break;
1438 case nir_op_fcos:
1439 result = emit_intrin_1f_param(ctx, "llvm.cos.f32", src[0]);
1440 break;
1441 case nir_op_fsqrt:
1442 result = emit_intrin_1f_param(ctx, "llvm.sqrt.f32", src[0]);
1443 break;
1444 case nir_op_fexp2:
1445 result = emit_intrin_1f_param(ctx, "llvm.exp2.f32", src[0]);
1446 break;
1447 case nir_op_flog2:
1448 result = emit_intrin_1f_param(ctx, "llvm.log2.f32", src[0]);
1449 break;
1450 case nir_op_frsq:
1451 result = emit_intrin_1f_param(ctx, "llvm.sqrt.f32", src[0]);
1452 result = emit_fdiv(ctx, ctx->f32one, result);
1453 break;
1454 case nir_op_fpow:
1455 result = emit_intrin_2f_param(ctx, "llvm.pow.f32", src[0], src[1]);
1456 break;
1457 case nir_op_fmax:
1458 result = emit_intrin_2f_param(ctx, "llvm.maxnum.f32", src[0], src[1]);
1459 break;
1460 case nir_op_fmin:
1461 result = emit_intrin_2f_param(ctx, "llvm.minnum.f32", src[0], src[1]);
1462 break;
1463 case nir_op_ffma:
1464 result = emit_intrin_3f_param(ctx, "llvm.fma.f32", src[0], src[1], src[2]);
1465 break;
1466 case nir_op_ibitfield_extract:
1467 result = emit_bitfield_extract(ctx, "llvm.AMDGPU.bfe.i32", src);
1468 break;
1469 case nir_op_ubitfield_extract:
1470 result = emit_bitfield_extract(ctx, "llvm.AMDGPU.bfe.u32", src);
1471 break;
1472 case nir_op_bitfield_insert:
1473 result = emit_bitfield_insert(ctx, src[0], src[1], src[2], src[3]);
1474 break;
1475 case nir_op_bitfield_reverse:
1476 result = ac_emit_llvm_intrinsic(&ctx->ac, "llvm.bitreverse.i32", ctx->i32, src, 1, AC_FUNC_ATTR_READNONE);
1477 break;
1478 case nir_op_bit_count:
1479 result = ac_emit_llvm_intrinsic(&ctx->ac, "llvm.ctpop.i32", ctx->i32, src, 1, AC_FUNC_ATTR_READNONE);
1480 break;
1481 case nir_op_vec2:
1482 case nir_op_vec3:
1483 case nir_op_vec4:
1484 for (unsigned i = 0; i < nir_op_infos[instr->op].num_inputs; i++)
1485 src[i] = to_integer(ctx, src[i]);
1486 result = build_gather_values(ctx, src, num_components);
1487 break;
1488 case nir_op_f2i:
1489 src[0] = to_float(ctx, src[0]);
1490 result = LLVMBuildFPToSI(ctx->builder, src[0], ctx->i32, "");
1491 break;
1492 case nir_op_f2u:
1493 src[0] = to_float(ctx, src[0]);
1494 result = LLVMBuildFPToUI(ctx->builder, src[0], ctx->i32, "");
1495 break;
1496 case nir_op_i2f:
1497 result = LLVMBuildSIToFP(ctx->builder, src[0], ctx->f32, "");
1498 break;
1499 case nir_op_u2f:
1500 result = LLVMBuildUIToFP(ctx->builder, src[0], ctx->f32, "");
1501 break;
1502 case nir_op_bcsel:
1503 result = emit_bcsel(ctx, src[0], src[1], src[2]);
1504 break;
1505 case nir_op_find_lsb:
1506 result = emit_find_lsb(ctx, src[0]);
1507 break;
1508 case nir_op_ufind_msb:
1509 result = emit_ufind_msb(ctx, src[0]);
1510 break;
1511 case nir_op_ifind_msb:
1512 result = emit_ifind_msb(ctx, src[0]);
1513 break;
1514 case nir_op_uadd_carry:
1515 result = emit_uint_carry(ctx, "llvm.uadd.with.overflow.i32", src[0], src[1]);
1516 break;
1517 case nir_op_usub_borrow:
1518 result = emit_uint_carry(ctx, "llvm.usub.with.overflow.i32", src[0], src[1]);
1519 break;
1520 case nir_op_b2f:
1521 result = emit_b2f(ctx, src[0]);
1522 break;
1523 case nir_op_fquantize2f16:
1524 src[0] = to_float(ctx, src[0]);
1525 result = LLVMBuildFPTrunc(ctx->builder, src[0], ctx->f16, "");
1526 /* need to convert back up to f32 */
1527 result = LLVMBuildFPExt(ctx->builder, result, ctx->f32, "");
1528 break;
1529 case nir_op_umul_high:
1530 result = emit_umul_high(ctx, src[0], src[1]);
1531 break;
1532 case nir_op_imul_high:
1533 result = emit_imul_high(ctx, src[0], src[1]);
1534 break;
1535 case nir_op_pack_half_2x16:
1536 result = emit_pack_half_2x16(ctx, src[0]);
1537 break;
1538 case nir_op_unpack_half_2x16:
1539 result = emit_unpack_half_2x16(ctx, src[0]);
1540 break;
1541 case nir_op_fddx:
1542 case nir_op_fddy:
1543 case nir_op_fddx_fine:
1544 case nir_op_fddy_fine:
1545 case nir_op_fddx_coarse:
1546 case nir_op_fddy_coarse:
1547 result = emit_ddxy(ctx, instr->op, src[0]);
1548 break;
1549 default:
1550 fprintf(stderr, "Unknown NIR alu instr: ");
1551 nir_print_instr(&instr->instr, stderr);
1552 fprintf(stderr, "\n");
1553 abort();
1554 }
1555
1556 if (result) {
1557 assert(instr->dest.dest.is_ssa);
1558 result = to_integer(ctx, result);
1559 _mesa_hash_table_insert(ctx->defs, &instr->dest.dest.ssa,
1560 result);
1561 }
1562 }
1563
1564 static void visit_load_const(struct nir_to_llvm_context *ctx,
1565 nir_load_const_instr *instr)
1566 {
1567 LLVMValueRef values[4], value = NULL;
1568 LLVMTypeRef element_type =
1569 LLVMIntTypeInContext(ctx->context, instr->def.bit_size);
1570
1571 for (unsigned i = 0; i < instr->def.num_components; ++i) {
1572 switch (instr->def.bit_size) {
1573 case 32:
1574 values[i] = LLVMConstInt(element_type,
1575 instr->value.u32[i], false);
1576 break;
1577 case 64:
1578 values[i] = LLVMConstInt(element_type,
1579 instr->value.u64[i], false);
1580 break;
1581 default:
1582 fprintf(stderr,
1583 "unsupported nir load_const bit_size: %d\n",
1584 instr->def.bit_size);
1585 abort();
1586 }
1587 }
1588 if (instr->def.num_components > 1) {
1589 value = LLVMConstVector(values, instr->def.num_components);
1590 } else
1591 value = values[0];
1592
1593 _mesa_hash_table_insert(ctx->defs, &instr->def, value);
1594 }
1595
1596 static LLVMValueRef cast_ptr(struct nir_to_llvm_context *ctx, LLVMValueRef ptr,
1597 LLVMTypeRef type)
1598 {
1599 int addr_space = LLVMGetPointerAddressSpace(LLVMTypeOf(ptr));
1600 return LLVMBuildBitCast(ctx->builder, ptr,
1601 LLVMPointerType(type, addr_space), "");
1602 }
1603
1604 static LLVMValueRef
1605 get_buffer_size(struct nir_to_llvm_context *ctx, LLVMValueRef descriptor, bool in_elements)
1606 {
1607 LLVMValueRef size =
1608 LLVMBuildExtractElement(ctx->builder, descriptor,
1609 LLVMConstInt(ctx->i32, 2, false), "");
1610
1611 /* VI only */
1612 if (ctx->options->chip_class >= VI && in_elements) {
1613 /* On VI, the descriptor contains the size in bytes,
1614 * but TXQ must return the size in elements.
1615 * The stride is always non-zero for resources using TXQ.
1616 */
1617 LLVMValueRef stride =
1618 LLVMBuildExtractElement(ctx->builder, descriptor,
1619 LLVMConstInt(ctx->i32, 1, false), "");
1620 stride = LLVMBuildLShr(ctx->builder, stride,
1621 LLVMConstInt(ctx->i32, 16, false), "");
1622 stride = LLVMBuildAnd(ctx->builder, stride,
1623 LLVMConstInt(ctx->i32, 0x3fff, false), "");
1624
1625 size = LLVMBuildUDiv(ctx->builder, size, stride, "");
1626 }
1627 return size;
1628 }
1629
1630 /**
1631 * Given the i32 or vNi32 \p type, generate the textual name (e.g. for use with
1632 * intrinsic names).
1633 */
1634 static void build_int_type_name(
1635 LLVMTypeRef type,
1636 char *buf, unsigned bufsize)
1637 {
1638 assert(bufsize >= 6);
1639
1640 if (LLVMGetTypeKind(type) == LLVMVectorTypeKind)
1641 snprintf(buf, bufsize, "v%ui32",
1642 LLVMGetVectorSize(type));
1643 else
1644 strcpy(buf, "i32");
1645 }
1646
1647 static LLVMValueRef radv_lower_gather4_integer(struct nir_to_llvm_context *ctx,
1648 struct ac_tex_info *tinfo,
1649 nir_tex_instr *instr,
1650 const char *intr_name,
1651 unsigned coord_vgpr_index)
1652 {
1653 LLVMValueRef coord = tinfo->args[0];
1654 LLVMValueRef half_texel[2];
1655 int c;
1656
1657 //TODO Rect
1658 {
1659 LLVMValueRef txq_args[10];
1660 int txq_arg_count = 0;
1661 LLVMValueRef size;
1662 bool da = instr->is_array || instr->sampler_dim == GLSL_SAMPLER_DIM_CUBE;
1663 txq_args[txq_arg_count++] = LLVMConstInt(ctx->i32, 0, false);
1664 txq_args[txq_arg_count++] = tinfo->args[1];
1665 txq_args[txq_arg_count++] = LLVMConstInt(ctx->i32, 0xf, 0); /* dmask */
1666 txq_args[txq_arg_count++] = LLVMConstInt(ctx->i32, 0, 0); /* unorm */
1667 txq_args[txq_arg_count++] = LLVMConstInt(ctx->i32, 0, 0); /* r128 */
1668 txq_args[txq_arg_count++] = LLVMConstInt(ctx->i32, da ? 1 : 0, 0);
1669 txq_args[txq_arg_count++] = LLVMConstInt(ctx->i32, 0, 0); /* glc */
1670 txq_args[txq_arg_count++] = LLVMConstInt(ctx->i32, 0, 0); /* slc */
1671 txq_args[txq_arg_count++] = LLVMConstInt(ctx->i32, 0, 0); /* tfe */
1672 txq_args[txq_arg_count++] = LLVMConstInt(ctx->i32, 0, 0); /* lwe */
1673 size = ac_emit_llvm_intrinsic(&ctx->ac, "llvm.SI.getresinfo.i32", ctx->v4i32,
1674 txq_args, txq_arg_count,
1675 AC_FUNC_ATTR_READNONE);
1676
1677 for (c = 0; c < 2; c++) {
1678 half_texel[c] = LLVMBuildExtractElement(ctx->builder, size,
1679 LLVMConstInt(ctx->i32, c, false), "");
1680 half_texel[c] = LLVMBuildUIToFP(ctx->builder, half_texel[c], ctx->f32, "");
1681 half_texel[c] = emit_fdiv(ctx, ctx->f32one, half_texel[c]);
1682 half_texel[c] = LLVMBuildFMul(ctx->builder, half_texel[c],
1683 LLVMConstReal(ctx->f32, -0.5), "");
1684 }
1685 }
1686
1687 for (c = 0; c < 2; c++) {
1688 LLVMValueRef tmp;
1689 LLVMValueRef index = LLVMConstInt(ctx->i32, coord_vgpr_index + c, 0);
1690 tmp = LLVMBuildExtractElement(ctx->builder, coord, index, "");
1691 tmp = LLVMBuildBitCast(ctx->builder, tmp, ctx->f32, "");
1692 tmp = LLVMBuildFAdd(ctx->builder, tmp, half_texel[c], "");
1693 tmp = LLVMBuildBitCast(ctx->builder, tmp, ctx->i32, "");
1694 coord = LLVMBuildInsertElement(ctx->builder, coord, tmp, index, "");
1695 }
1696
1697 tinfo->args[0] = coord;
1698 return ac_emit_llvm_intrinsic(&ctx->ac, intr_name, tinfo->dst_type, tinfo->args, tinfo->arg_count,
1699 AC_FUNC_ATTR_READNONE | AC_FUNC_ATTR_NOUNWIND);
1700
1701 }
1702
1703 static LLVMValueRef build_tex_intrinsic(struct nir_to_llvm_context *ctx,
1704 nir_tex_instr *instr,
1705 struct ac_tex_info *tinfo)
1706 {
1707 const char *name = "llvm.SI.image.sample";
1708 const char *infix = "";
1709 char intr_name[127];
1710 char type[64];
1711 bool is_shadow = instr->is_shadow;
1712 bool has_offset = tinfo->has_offset;
1713 switch (instr->op) {
1714 case nir_texop_txf:
1715 case nir_texop_txf_ms:
1716 case nir_texop_samples_identical:
1717 name = instr->sampler_dim == GLSL_SAMPLER_DIM_MS ? "llvm.SI.image.load" :
1718 instr->sampler_dim == GLSL_SAMPLER_DIM_BUF ? "llvm.SI.vs.load.input" :
1719 "llvm.SI.image.load.mip";
1720 is_shadow = false;
1721 has_offset = false;
1722 break;
1723 case nir_texop_txb:
1724 infix = ".b";
1725 break;
1726 case nir_texop_txl:
1727 infix = ".l";
1728 break;
1729 case nir_texop_txs:
1730 name = "llvm.SI.getresinfo";
1731 break;
1732 case nir_texop_query_levels:
1733 name = "llvm.SI.getresinfo";
1734 break;
1735 case nir_texop_tex:
1736 if (ctx->stage != MESA_SHADER_FRAGMENT)
1737 infix = ".lz";
1738 break;
1739 case nir_texop_txd:
1740 infix = ".d";
1741 break;
1742 case nir_texop_tg4:
1743 name = "llvm.SI.gather4";
1744 infix = ".lz";
1745 break;
1746 case nir_texop_lod:
1747 name = "llvm.SI.getlod";
1748 is_shadow = false;
1749 has_offset = false;
1750 break;
1751 default:
1752 break;
1753 }
1754
1755 build_int_type_name(LLVMTypeOf(tinfo->args[0]), type, sizeof(type));
1756 sprintf(intr_name, "%s%s%s%s.%s", name, is_shadow ? ".c" : "", infix,
1757 has_offset ? ".o" : "", type);
1758
1759 if (instr->op == nir_texop_tg4) {
1760 enum glsl_base_type stype = glsl_get_sampler_result_type(instr->texture->var->type);
1761 if (stype == GLSL_TYPE_UINT || stype == GLSL_TYPE_INT) {
1762 return radv_lower_gather4_integer(ctx, tinfo, instr, intr_name,
1763 (int)has_offset + (int)is_shadow);
1764 }
1765 }
1766 return ac_emit_llvm_intrinsic(&ctx->ac, intr_name, tinfo->dst_type, tinfo->args, tinfo->arg_count,
1767 AC_FUNC_ATTR_READNONE | AC_FUNC_ATTR_NOUNWIND);
1768
1769 }
1770
1771 static LLVMValueRef visit_vulkan_resource_index(struct nir_to_llvm_context *ctx,
1772 nir_intrinsic_instr *instr)
1773 {
1774 LLVMValueRef index = get_src(ctx, instr->src[0]);
1775 unsigned desc_set = nir_intrinsic_desc_set(instr);
1776 unsigned binding = nir_intrinsic_binding(instr);
1777 LLVMValueRef desc_ptr = ctx->descriptor_sets[desc_set];
1778 struct radv_descriptor_set_layout *layout = ctx->options->layout->set[desc_set].layout;
1779 unsigned base_offset = layout->binding[binding].offset;
1780 LLVMValueRef offset, stride;
1781
1782 if (layout->binding[binding].type == VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER_DYNAMIC ||
1783 layout->binding[binding].type == VK_DESCRIPTOR_TYPE_STORAGE_BUFFER_DYNAMIC) {
1784 desc_ptr = ctx->push_constants;
1785 base_offset = ctx->options->layout->push_constant_size;
1786 base_offset += 16 * layout->binding[binding].dynamic_offset_offset;
1787 stride = LLVMConstInt(ctx->i32, 16, false);
1788 } else
1789 stride = LLVMConstInt(ctx->i32, layout->binding[binding].size, false);
1790
1791 offset = LLVMConstInt(ctx->i32, base_offset, false);
1792 index = LLVMBuildMul(ctx->builder, index, stride, "");
1793 offset = LLVMBuildAdd(ctx->builder, offset, index, "");
1794
1795 desc_ptr = build_gep0(ctx, desc_ptr, offset);
1796 desc_ptr = cast_ptr(ctx, desc_ptr, ctx->v4i32);
1797 LLVMSetMetadata(desc_ptr, ctx->uniform_md_kind, ctx->empty_md);
1798
1799 return LLVMBuildLoad(ctx->builder, desc_ptr, "");
1800 }
1801
1802 static LLVMValueRef visit_load_push_constant(struct nir_to_llvm_context *ctx,
1803 nir_intrinsic_instr *instr)
1804 {
1805 LLVMValueRef ptr;
1806
1807 ptr = build_gep0(ctx, ctx->push_constants, get_src(ctx, instr->src[0]));
1808 ptr = cast_ptr(ctx, ptr, get_def_type(ctx, &instr->dest.ssa));
1809
1810 return LLVMBuildLoad(ctx->builder, ptr, "");
1811 }
1812
1813 static LLVMValueRef visit_get_buffer_size(struct nir_to_llvm_context *ctx,
1814 nir_intrinsic_instr *instr)
1815 {
1816 LLVMValueRef desc = get_src(ctx, instr->src[0]);
1817
1818 return get_buffer_size(ctx, desc, false);
1819 }
1820 static void visit_store_ssbo(struct nir_to_llvm_context *ctx,
1821 nir_intrinsic_instr *instr)
1822 {
1823 const char *store_name;
1824 LLVMTypeRef data_type = ctx->f32;
1825 unsigned writemask = nir_intrinsic_write_mask(instr);
1826 LLVMValueRef base_data, base_offset;
1827 LLVMValueRef params[6];
1828
1829 if (ctx->stage == MESA_SHADER_FRAGMENT)
1830 ctx->shader_info->fs.writes_memory = true;
1831
1832 params[1] = get_src(ctx, instr->src[1]);
1833 params[2] = LLVMConstInt(ctx->i32, 0, false); /* vindex */
1834 params[4] = LLVMConstInt(ctx->i1, 0, false); /* glc */
1835 params[5] = LLVMConstInt(ctx->i1, 0, false); /* slc */
1836
1837 if (instr->num_components > 1)
1838 data_type = LLVMVectorType(ctx->f32, instr->num_components);
1839
1840 base_data = to_float(ctx, get_src(ctx, instr->src[0]));
1841 base_data = trim_vector(ctx, base_data, instr->num_components);
1842 base_data = LLVMBuildBitCast(ctx->builder, base_data,
1843 data_type, "");
1844 base_offset = get_src(ctx, instr->src[2]); /* voffset */
1845 while (writemask) {
1846 int start, count;
1847 LLVMValueRef data;
1848 LLVMValueRef offset;
1849 LLVMValueRef tmp;
1850 u_bit_scan_consecutive_range(&writemask, &start, &count);
1851
1852 /* Due to an LLVM limitation, split 3-element writes
1853 * into a 2-element and a 1-element write. */
1854 if (count == 3) {
1855 writemask |= 1 << (start + 2);
1856 count = 2;
1857 }
1858
1859 if (count == 4) {
1860 store_name = "llvm.amdgcn.buffer.store.v4f32";
1861 data = base_data;
1862 } else if (count == 2) {
1863 tmp = LLVMBuildExtractElement(ctx->builder,
1864 base_data, LLVMConstInt(ctx->i32, start, false), "");
1865 data = LLVMBuildInsertElement(ctx->builder, LLVMGetUndef(ctx->v2f32), tmp,
1866 ctx->i32zero, "");
1867
1868 tmp = LLVMBuildExtractElement(ctx->builder,
1869 base_data, LLVMConstInt(ctx->i32, start + 1, false), "");
1870 data = LLVMBuildInsertElement(ctx->builder, data, tmp,
1871 ctx->i32one, "");
1872 store_name = "llvm.amdgcn.buffer.store.v2f32";
1873
1874 } else {
1875 assert(count == 1);
1876 if (get_llvm_num_components(base_data) > 1)
1877 data = LLVMBuildExtractElement(ctx->builder, base_data,
1878 LLVMConstInt(ctx->i32, start, false), "");
1879 else
1880 data = base_data;
1881 store_name = "llvm.amdgcn.buffer.store.f32";
1882 }
1883
1884 offset = base_offset;
1885 if (start != 0) {
1886 offset = LLVMBuildAdd(ctx->builder, offset, LLVMConstInt(ctx->i32, start * 4, false), "");
1887 }
1888 params[0] = data;
1889 params[3] = offset;
1890 ac_emit_llvm_intrinsic(&ctx->ac, store_name,
1891 LLVMVoidTypeInContext(ctx->context), params, 6, 0);
1892 }
1893 }
1894
1895 static LLVMValueRef visit_atomic_ssbo(struct nir_to_llvm_context *ctx,
1896 nir_intrinsic_instr *instr)
1897 {
1898 const char *name;
1899 LLVMValueRef params[6];
1900 int arg_count = 0;
1901 if (ctx->stage == MESA_SHADER_FRAGMENT)
1902 ctx->shader_info->fs.writes_memory = true;
1903
1904 if (instr->intrinsic == nir_intrinsic_ssbo_atomic_comp_swap) {
1905 params[arg_count++] = llvm_extract_elem(ctx, get_src(ctx, instr->src[3]), 0);
1906 }
1907 params[arg_count++] = llvm_extract_elem(ctx, get_src(ctx, instr->src[2]), 0);
1908 params[arg_count++] = get_src(ctx, instr->src[0]);
1909 params[arg_count++] = LLVMConstInt(ctx->i32, 0, false); /* vindex */
1910 params[arg_count++] = get_src(ctx, instr->src[1]); /* voffset */
1911 params[arg_count++] = LLVMConstInt(ctx->i1, 0, false); /* slc */
1912
1913 switch (instr->intrinsic) {
1914 case nir_intrinsic_ssbo_atomic_add:
1915 name = "llvm.amdgcn.buffer.atomic.add";
1916 break;
1917 case nir_intrinsic_ssbo_atomic_imin:
1918 name = "llvm.amdgcn.buffer.atomic.smin";
1919 break;
1920 case nir_intrinsic_ssbo_atomic_umin:
1921 name = "llvm.amdgcn.buffer.atomic.umin";
1922 break;
1923 case nir_intrinsic_ssbo_atomic_imax:
1924 name = "llvm.amdgcn.buffer.atomic.smax";
1925 break;
1926 case nir_intrinsic_ssbo_atomic_umax:
1927 name = "llvm.amdgcn.buffer.atomic.umax";
1928 break;
1929 case nir_intrinsic_ssbo_atomic_and:
1930 name = "llvm.amdgcn.buffer.atomic.and";
1931 break;
1932 case nir_intrinsic_ssbo_atomic_or:
1933 name = "llvm.amdgcn.buffer.atomic.or";
1934 break;
1935 case nir_intrinsic_ssbo_atomic_xor:
1936 name = "llvm.amdgcn.buffer.atomic.xor";
1937 break;
1938 case nir_intrinsic_ssbo_atomic_exchange:
1939 name = "llvm.amdgcn.buffer.atomic.swap";
1940 break;
1941 case nir_intrinsic_ssbo_atomic_comp_swap:
1942 name = "llvm.amdgcn.buffer.atomic.cmpswap";
1943 break;
1944 default:
1945 abort();
1946 }
1947
1948 return ac_emit_llvm_intrinsic(&ctx->ac, name, ctx->i32, params, arg_count, 0);
1949 }
1950
1951 static LLVMValueRef visit_load_buffer(struct nir_to_llvm_context *ctx,
1952 nir_intrinsic_instr *instr)
1953 {
1954 const char *load_name;
1955 LLVMTypeRef data_type = ctx->f32;
1956 if (instr->num_components == 3)
1957 data_type = LLVMVectorType(ctx->f32, 4);
1958 else if (instr->num_components > 1)
1959 data_type = LLVMVectorType(ctx->f32, instr->num_components);
1960
1961 if (instr->num_components == 4 || instr->num_components == 3)
1962 load_name = "llvm.amdgcn.buffer.load.v4f32";
1963 else if (instr->num_components == 2)
1964 load_name = "llvm.amdgcn.buffer.load.v2f32";
1965 else if (instr->num_components == 1)
1966 load_name = "llvm.amdgcn.buffer.load.f32";
1967 else
1968 abort();
1969
1970 LLVMValueRef params[] = {
1971 get_src(ctx, instr->src[0]),
1972 LLVMConstInt(ctx->i32, 0, false),
1973 get_src(ctx, instr->src[1]),
1974 LLVMConstInt(ctx->i1, 0, false),
1975 LLVMConstInt(ctx->i1, 0, false),
1976 };
1977
1978 LLVMValueRef ret =
1979 ac_emit_llvm_intrinsic(&ctx->ac, load_name, data_type, params, 5, 0);
1980
1981 if (instr->num_components == 3)
1982 ret = trim_vector(ctx, ret, 3);
1983
1984 return LLVMBuildBitCast(ctx->builder, ret,
1985 get_def_type(ctx, &instr->dest.ssa), "");
1986 }
1987
1988 static LLVMValueRef visit_load_ubo_buffer(struct nir_to_llvm_context *ctx,
1989 nir_intrinsic_instr *instr)
1990 {
1991 LLVMValueRef results[4], ret;
1992 LLVMValueRef rsrc = get_src(ctx, instr->src[0]);
1993 LLVMValueRef offset = get_src(ctx, instr->src[1]);
1994
1995 rsrc = LLVMBuildBitCast(ctx->builder, rsrc, LLVMVectorType(ctx->i8, 16), "");
1996
1997 for (unsigned i = 0; i < instr->num_components; ++i) {
1998 LLVMValueRef params[] = {
1999 rsrc,
2000 LLVMBuildAdd(ctx->builder, LLVMConstInt(ctx->i32, 4 * i, 0),
2001 offset, "")
2002 };
2003 results[i] = ac_emit_llvm_intrinsic(&ctx->ac, "llvm.SI.load.const", ctx->f32,
2004 params, 2, AC_FUNC_ATTR_READNONE);
2005 }
2006
2007
2008 ret = build_gather_values(ctx, results, instr->num_components);
2009 return LLVMBuildBitCast(ctx->builder, ret,
2010 get_def_type(ctx, &instr->dest.ssa), "");
2011 }
2012
2013 static void
2014 radv_get_deref_offset(struct nir_to_llvm_context *ctx, nir_deref *tail,
2015 bool vs_in, unsigned *const_out, LLVMValueRef *indir_out)
2016 {
2017 unsigned const_offset = 0;
2018 LLVMValueRef offset = NULL;
2019
2020
2021 while (tail->child != NULL) {
2022 const struct glsl_type *parent_type = tail->type;
2023 tail = tail->child;
2024
2025 if (tail->deref_type == nir_deref_type_array) {
2026 nir_deref_array *deref_array = nir_deref_as_array(tail);
2027 LLVMValueRef index, stride, local_offset;
2028 unsigned size = glsl_count_attribute_slots(tail->type, vs_in);
2029
2030 const_offset += size * deref_array->base_offset;
2031 if (deref_array->deref_array_type == nir_deref_array_type_direct)
2032 continue;
2033
2034 assert(deref_array->deref_array_type == nir_deref_array_type_indirect);
2035 index = get_src(ctx, deref_array->indirect);
2036 stride = LLVMConstInt(ctx->i32, size, 0);
2037 local_offset = LLVMBuildMul(ctx->builder, stride, index, "");
2038
2039 if (offset)
2040 offset = LLVMBuildAdd(ctx->builder, offset, local_offset, "");
2041 else
2042 offset = local_offset;
2043 } else if (tail->deref_type == nir_deref_type_struct) {
2044 nir_deref_struct *deref_struct = nir_deref_as_struct(tail);
2045
2046 for (unsigned i = 0; i < deref_struct->index; i++) {
2047 const struct glsl_type *ft = glsl_get_struct_field(parent_type, i);
2048 const_offset += glsl_count_attribute_slots(ft, vs_in);
2049 }
2050 } else
2051 unreachable("unsupported deref type");
2052
2053 }
2054
2055 if (const_offset && offset)
2056 offset = LLVMBuildAdd(ctx->builder, offset,
2057 LLVMConstInt(ctx->i32, const_offset, 0),
2058 "");
2059
2060 *const_out = const_offset;
2061 *indir_out = offset;
2062 }
2063
2064 static LLVMValueRef visit_load_var(struct nir_to_llvm_context *ctx,
2065 nir_intrinsic_instr *instr)
2066 {
2067 LLVMValueRef values[4];
2068 int idx = instr->variables[0]->var->data.driver_location;
2069 int ve = instr->dest.ssa.num_components;
2070 LLVMValueRef indir_index;
2071 unsigned const_index;
2072 switch (instr->variables[0]->var->data.mode) {
2073 case nir_var_shader_in:
2074 radv_get_deref_offset(ctx, &instr->variables[0]->deref,
2075 ctx->stage == MESA_SHADER_VERTEX,
2076 &const_index, &indir_index);
2077 for (unsigned chan = 0; chan < ve; chan++) {
2078 if (indir_index) {
2079 unsigned count = glsl_count_attribute_slots(
2080 instr->variables[0]->var->type,
2081 ctx->stage == MESA_SHADER_VERTEX);
2082 LLVMValueRef tmp_vec = build_gather_values_extended(
2083 ctx, ctx->inputs + idx + chan, count,
2084 4, false);
2085
2086 values[chan] = LLVMBuildExtractElement(ctx->builder,
2087 tmp_vec,
2088 indir_index, "");
2089 } else
2090 values[chan] = ctx->inputs[idx + chan + const_index * 4];
2091 }
2092 return to_integer(ctx, build_gather_values(ctx, values, ve));
2093 break;
2094 case nir_var_local:
2095 radv_get_deref_offset(ctx, &instr->variables[0]->deref, false,
2096 &const_index, &indir_index);
2097 for (unsigned chan = 0; chan < ve; chan++) {
2098 if (indir_index) {
2099 unsigned count = glsl_count_attribute_slots(
2100 instr->variables[0]->var->type, false);
2101 LLVMValueRef tmp_vec = build_gather_values_extended(
2102 ctx, ctx->locals + idx + chan, count,
2103 4, true);
2104
2105 values[chan] = LLVMBuildExtractElement(ctx->builder,
2106 tmp_vec,
2107 indir_index, "");
2108 } else {
2109 values[chan] = LLVMBuildLoad(ctx->builder, ctx->locals[idx + chan + const_index * 4], "");
2110 }
2111 }
2112 return to_integer(ctx, build_gather_values(ctx, values, ve));
2113 case nir_var_shader_out:
2114 radv_get_deref_offset(ctx, &instr->variables[0]->deref, false,
2115 &const_index, &indir_index);
2116 for (unsigned chan = 0; chan < ve; chan++) {
2117 if (indir_index) {
2118 unsigned count = glsl_count_attribute_slots(
2119 instr->variables[0]->var->type, false);
2120 LLVMValueRef tmp_vec = build_gather_values_extended(
2121 ctx, ctx->outputs + idx + chan, count,
2122 4, true);
2123
2124 values[chan] = LLVMBuildExtractElement(ctx->builder,
2125 tmp_vec,
2126 indir_index, "");
2127 } else {
2128 values[chan] = LLVMBuildLoad(ctx->builder,
2129 ctx->outputs[idx + chan + const_index * 4],
2130 "");
2131 }
2132 }
2133 return to_integer(ctx, build_gather_values(ctx, values, ve));
2134 case nir_var_shared: {
2135 radv_get_deref_offset(ctx, &instr->variables[0]->deref, false,
2136 &const_index, &indir_index);
2137 LLVMValueRef ptr = get_shared_memory_ptr(ctx, idx, ctx->i32);
2138 LLVMValueRef derived_ptr;
2139
2140 for (unsigned chan = 0; chan < ve; chan++) {
2141 LLVMValueRef index = LLVMConstInt(ctx->i32, chan, false);
2142 if (indir_index)
2143 index = LLVMBuildAdd(ctx->builder, index, indir_index, "");
2144 derived_ptr = LLVMBuildGEP(ctx->builder, ptr, &index, 1, "");
2145 values[chan] = LLVMBuildLoad(ctx->builder, derived_ptr, "");
2146 }
2147 return to_integer(ctx, build_gather_values(ctx, values, ve));
2148 }
2149 default:
2150 break;
2151 }
2152 return NULL;
2153 }
2154
2155 static void
2156 visit_store_var(struct nir_to_llvm_context *ctx,
2157 nir_intrinsic_instr *instr)
2158 {
2159 LLVMValueRef temp_ptr, value;
2160 int idx = instr->variables[0]->var->data.driver_location;
2161 LLVMValueRef src = to_float(ctx, get_src(ctx, instr->src[0]));
2162 int writemask = instr->const_index[0];
2163 LLVMValueRef indir_index;
2164 unsigned const_index;
2165 switch (instr->variables[0]->var->data.mode) {
2166 case nir_var_shader_out:
2167 radv_get_deref_offset(ctx, &instr->variables[0]->deref, false,
2168 &const_index, &indir_index);
2169 for (unsigned chan = 0; chan < 4; chan++) {
2170 int stride = 4;
2171 if (!(writemask & (1 << chan)))
2172 continue;
2173 if (get_llvm_num_components(src) == 1)
2174 value = src;
2175 else
2176 value = LLVMBuildExtractElement(ctx->builder, src,
2177 LLVMConstInt(ctx->i32,
2178 chan, false),
2179 "");
2180
2181 if (instr->variables[0]->var->data.location == VARYING_SLOT_CLIP_DIST0 ||
2182 instr->variables[0]->var->data.location == VARYING_SLOT_CULL_DIST0)
2183 stride = 1;
2184 if (indir_index) {
2185 unsigned count = glsl_count_attribute_slots(
2186 instr->variables[0]->var->type, false);
2187 LLVMValueRef tmp_vec = build_gather_values_extended(
2188 ctx, ctx->outputs + idx + chan, count,
2189 stride, true);
2190
2191 if (get_llvm_num_components(tmp_vec) > 1) {
2192 tmp_vec = LLVMBuildInsertElement(ctx->builder, tmp_vec,
2193 value, indir_index, "");
2194 } else
2195 tmp_vec = value;
2196 build_store_values_extended(ctx, ctx->outputs + idx + chan,
2197 count, stride, tmp_vec);
2198
2199 } else {
2200 temp_ptr = ctx->outputs[idx + chan + const_index * stride];
2201
2202 LLVMBuildStore(ctx->builder, value, temp_ptr);
2203 }
2204 }
2205 break;
2206 case nir_var_local:
2207 radv_get_deref_offset(ctx, &instr->variables[0]->deref, false,
2208 &const_index, &indir_index);
2209 for (unsigned chan = 0; chan < 4; chan++) {
2210 if (!(writemask & (1 << chan)))
2211 continue;
2212
2213 if (get_llvm_num_components(src) == 1)
2214 value = src;
2215 else
2216 value = LLVMBuildExtractElement(ctx->builder, src,
2217 LLVMConstInt(ctx->i32, chan, false), "");
2218 if (indir_index) {
2219 unsigned count = glsl_count_attribute_slots(
2220 instr->variables[0]->var->type, false);
2221 LLVMValueRef tmp_vec = build_gather_values_extended(
2222 ctx, ctx->locals + idx + chan, count,
2223 4, true);
2224
2225 tmp_vec = LLVMBuildInsertElement(ctx->builder, tmp_vec,
2226 value, indir_index, "");
2227 build_store_values_extended(ctx, ctx->locals + idx + chan,
2228 count, 4, tmp_vec);
2229 } else {
2230 temp_ptr = ctx->locals[idx + chan + const_index * 4];
2231
2232 LLVMBuildStore(ctx->builder, value, temp_ptr);
2233 }
2234 }
2235 break;
2236 case nir_var_shared: {
2237 LLVMValueRef ptr;
2238 radv_get_deref_offset(ctx, &instr->variables[0]->deref, false,
2239 &const_index, &indir_index);
2240
2241 ptr = get_shared_memory_ptr(ctx, idx, ctx->i32);
2242 LLVMValueRef derived_ptr;
2243
2244 for (unsigned chan = 0; chan < 4; chan++) {
2245 if (!(writemask & (1 << chan)))
2246 continue;
2247
2248 LLVMValueRef index = LLVMConstInt(ctx->i32, chan, false);
2249
2250 if (get_llvm_num_components(src) == 1)
2251 value = src;
2252 else
2253 value = LLVMBuildExtractElement(ctx->builder, src,
2254 LLVMConstInt(ctx->i32,
2255 chan, false),
2256 "");
2257
2258 if (indir_index)
2259 index = LLVMBuildAdd(ctx->builder, index, indir_index, "");
2260
2261 derived_ptr = LLVMBuildGEP(ctx->builder, ptr, &index, 1, "");
2262 LLVMBuildStore(ctx->builder,
2263 to_integer(ctx, value), derived_ptr);
2264 }
2265 break;
2266 }
2267 default:
2268 break;
2269 }
2270 }
2271
2272 static int image_type_to_components_count(enum glsl_sampler_dim dim, bool array)
2273 {
2274 switch (dim) {
2275 case GLSL_SAMPLER_DIM_BUF:
2276 return 1;
2277 case GLSL_SAMPLER_DIM_1D:
2278 return array ? 2 : 1;
2279 case GLSL_SAMPLER_DIM_2D:
2280 return array ? 3 : 2;
2281 case GLSL_SAMPLER_DIM_MS:
2282 return array ? 4 : 3;
2283 case GLSL_SAMPLER_DIM_3D:
2284 case GLSL_SAMPLER_DIM_CUBE:
2285 return 3;
2286 case GLSL_SAMPLER_DIM_RECT:
2287 case GLSL_SAMPLER_DIM_SUBPASS:
2288 return 2;
2289 case GLSL_SAMPLER_DIM_SUBPASS_MS:
2290 return 3;
2291 default:
2292 break;
2293 }
2294 return 0;
2295 }
2296
2297 static LLVMValueRef get_image_coords(struct nir_to_llvm_context *ctx,
2298 nir_intrinsic_instr *instr, bool add_frag_pos)
2299 {
2300 const struct glsl_type *type = instr->variables[0]->var->type;
2301 if(instr->variables[0]->deref.child)
2302 type = instr->variables[0]->deref.child->type;
2303
2304 LLVMValueRef src0 = get_src(ctx, instr->src[0]);
2305 LLVMValueRef coords[4];
2306 LLVMValueRef masks[] = {
2307 LLVMConstInt(ctx->i32, 0, false), LLVMConstInt(ctx->i32, 1, false),
2308 LLVMConstInt(ctx->i32, 2, false), LLVMConstInt(ctx->i32, 3, false),
2309 };
2310 LLVMValueRef res;
2311 int count;
2312 enum glsl_sampler_dim dim = glsl_get_sampler_dim(type);
2313 bool is_ms = (dim == GLSL_SAMPLER_DIM_MS ||
2314 dim == GLSL_SAMPLER_DIM_SUBPASS_MS);
2315
2316 count = image_type_to_components_count(dim,
2317 glsl_sampler_type_is_array(type));
2318
2319 if (count == 1) {
2320 if (instr->src[0].ssa->num_components)
2321 res = LLVMBuildExtractElement(ctx->builder, src0, masks[0], "");
2322 else
2323 res = src0;
2324 } else {
2325 int chan;
2326 if (is_ms)
2327 count--;
2328 for (chan = 0; chan < count; ++chan) {
2329 coords[chan] = LLVMBuildExtractElement(ctx->builder, src0, masks[chan], "");
2330 }
2331
2332 if (add_frag_pos) {
2333 for (chan = 0; chan < count; ++chan)
2334 coords[chan] = LLVMBuildAdd(ctx->builder, coords[chan], LLVMBuildFPToUI(ctx->builder, ctx->frag_pos[chan], ctx->i32, ""), "");
2335 }
2336 if (is_ms) {
2337 coords[count] = llvm_extract_elem(ctx, get_src(ctx, instr->src[1]), 0);
2338 count++;
2339 }
2340
2341 if (count == 3) {
2342 coords[3] = LLVMGetUndef(ctx->i32);
2343 count = 4;
2344 }
2345 res = build_gather_values(ctx, coords, count);
2346 }
2347 return res;
2348 }
2349
2350 static void build_type_name_for_intr(
2351 LLVMTypeRef type,
2352 char *buf, unsigned bufsize)
2353 {
2354 LLVMTypeRef elem_type = type;
2355
2356 assert(bufsize >= 8);
2357
2358 if (LLVMGetTypeKind(type) == LLVMVectorTypeKind) {
2359 int ret = snprintf(buf, bufsize, "v%u",
2360 LLVMGetVectorSize(type));
2361 if (ret < 0) {
2362 char *type_name = LLVMPrintTypeToString(type);
2363 fprintf(stderr, "Error building type name for: %s\n",
2364 type_name);
2365 return;
2366 }
2367 elem_type = LLVMGetElementType(type);
2368 buf += ret;
2369 bufsize -= ret;
2370 }
2371 switch (LLVMGetTypeKind(elem_type)) {
2372 default: break;
2373 case LLVMIntegerTypeKind:
2374 snprintf(buf, bufsize, "i%d", LLVMGetIntTypeWidth(elem_type));
2375 break;
2376 case LLVMFloatTypeKind:
2377 snprintf(buf, bufsize, "f32");
2378 break;
2379 case LLVMDoubleTypeKind:
2380 snprintf(buf, bufsize, "f64");
2381 break;
2382 }
2383 }
2384
2385 static void get_image_intr_name(const char *base_name,
2386 LLVMTypeRef data_type,
2387 LLVMTypeRef coords_type,
2388 LLVMTypeRef rsrc_type,
2389 char *out_name, unsigned out_len)
2390 {
2391 char coords_type_name[8];
2392
2393 build_type_name_for_intr(coords_type, coords_type_name,
2394 sizeof(coords_type_name));
2395
2396 if (HAVE_LLVM <= 0x0309) {
2397 snprintf(out_name, out_len, "%s.%s", base_name, coords_type_name);
2398 } else {
2399 char data_type_name[8];
2400 char rsrc_type_name[8];
2401
2402 build_type_name_for_intr(data_type, data_type_name,
2403 sizeof(data_type_name));
2404 build_type_name_for_intr(rsrc_type, rsrc_type_name,
2405 sizeof(rsrc_type_name));
2406 snprintf(out_name, out_len, "%s.%s.%s.%s", base_name,
2407 data_type_name, coords_type_name, rsrc_type_name);
2408 }
2409 }
2410
2411 static LLVMValueRef visit_image_load(struct nir_to_llvm_context *ctx,
2412 nir_intrinsic_instr *instr)
2413 {
2414 LLVMValueRef params[7];
2415 LLVMValueRef res;
2416 char intrinsic_name[64];
2417 const nir_variable *var = instr->variables[0]->var;
2418 const struct glsl_type *type = var->type;
2419 if(instr->variables[0]->deref.child)
2420 type = instr->variables[0]->deref.child->type;
2421
2422 type = glsl_without_array(type);
2423 if (glsl_get_sampler_dim(type) == GLSL_SAMPLER_DIM_BUF) {
2424 params[0] = get_sampler_desc(ctx, instr->variables[0], DESC_BUFFER);
2425 params[1] = LLVMBuildExtractElement(ctx->builder, get_src(ctx, instr->src[0]),
2426 LLVMConstInt(ctx->i32, 0, false), ""); /* vindex */
2427 params[2] = LLVMConstInt(ctx->i32, 0, false); /* voffset */
2428 params[3] = LLVMConstInt(ctx->i1, 0, false); /* glc */
2429 params[4] = LLVMConstInt(ctx->i1, 0, false); /* slc */
2430 res = ac_emit_llvm_intrinsic(&ctx->ac, "llvm.amdgcn.buffer.load.format.v4f32", ctx->v4f32,
2431 params, 5, 0);
2432
2433 res = trim_vector(ctx, res, instr->dest.ssa.num_components);
2434 res = to_integer(ctx, res);
2435 } else {
2436 bool is_da = glsl_sampler_type_is_array(type) ||
2437 glsl_get_sampler_dim(type) == GLSL_SAMPLER_DIM_CUBE;
2438 bool add_frag_pos = glsl_get_sampler_dim(type) == GLSL_SAMPLER_DIM_SUBPASS;
2439 LLVMValueRef da = is_da ? ctx->i32one : ctx->i32zero;
2440 LLVMValueRef glc = LLVMConstInt(ctx->i1, 0, false);
2441 LLVMValueRef slc = LLVMConstInt(ctx->i1, 0, false);
2442
2443 params[0] = get_image_coords(ctx, instr, add_frag_pos);
2444 params[1] = get_sampler_desc(ctx, instr->variables[0], DESC_IMAGE);
2445 params[2] = LLVMConstInt(ctx->i32, 15, false); /* dmask */
2446 if (HAVE_LLVM <= 0x0309) {
2447 params[3] = LLVMConstInt(ctx->i1, 0, false); /* r128 */
2448 params[4] = da;
2449 params[5] = glc;
2450 params[6] = slc;
2451 } else {
2452 LLVMValueRef lwe = LLVMConstInt(ctx->i1, 0, false);
2453 params[3] = glc;
2454 params[4] = slc;
2455 params[5] = lwe;
2456 params[6] = da;
2457 }
2458
2459 get_image_intr_name("llvm.amdgcn.image.load",
2460 ctx->v4f32, /* vdata */
2461 LLVMTypeOf(params[0]), /* coords */
2462 LLVMTypeOf(params[1]), /* rsrc */
2463 intrinsic_name, sizeof(intrinsic_name));
2464
2465 res = ac_emit_llvm_intrinsic(&ctx->ac, intrinsic_name, ctx->v4f32,
2466 params, 7, AC_FUNC_ATTR_READONLY);
2467 }
2468 return to_integer(ctx, res);
2469 }
2470
2471 static void visit_image_store(struct nir_to_llvm_context *ctx,
2472 nir_intrinsic_instr *instr)
2473 {
2474 LLVMValueRef params[8];
2475 char intrinsic_name[64];
2476 const nir_variable *var = instr->variables[0]->var;
2477 LLVMValueRef i1false = LLVMConstInt(ctx->i1, 0, 0);
2478 LLVMValueRef i1true = LLVMConstInt(ctx->i1, 1, 0);
2479 const struct glsl_type *type = glsl_without_array(var->type);
2480
2481 if (ctx->stage == MESA_SHADER_FRAGMENT)
2482 ctx->shader_info->fs.writes_memory = true;
2483
2484 if (glsl_get_sampler_dim(type) == GLSL_SAMPLER_DIM_BUF) {
2485 params[0] = to_float(ctx, get_src(ctx, instr->src[2])); /* data */
2486 params[1] = get_sampler_desc(ctx, instr->variables[0], DESC_BUFFER);
2487 params[2] = LLVMBuildExtractElement(ctx->builder, get_src(ctx, instr->src[0]),
2488 LLVMConstInt(ctx->i32, 0, false), ""); /* vindex */
2489 params[3] = LLVMConstInt(ctx->i32, 0, false); /* voffset */
2490 params[4] = i1false; /* glc */
2491 params[5] = i1false; /* slc */
2492 ac_emit_llvm_intrinsic(&ctx->ac, "llvm.amdgcn.buffer.store.format.v4f32", ctx->voidt,
2493 params, 6, 0);
2494 } else {
2495 bool is_da = glsl_sampler_type_is_array(type) ||
2496 glsl_get_sampler_dim(type) == GLSL_SAMPLER_DIM_CUBE;
2497 LLVMValueRef da = is_da ? i1true : i1false;
2498 LLVMValueRef glc = i1false;
2499 LLVMValueRef slc = i1false;
2500
2501 params[0] = to_float(ctx, get_src(ctx, instr->src[2]));
2502 params[1] = get_image_coords(ctx, instr, false); /* coords */
2503 params[2] = get_sampler_desc(ctx, instr->variables[0], DESC_IMAGE);
2504 params[3] = LLVMConstInt(ctx->i32, 15, false); /* dmask */
2505 if (HAVE_LLVM <= 0x0309) {
2506 params[4] = i1false; /* r128 */
2507 params[5] = da;
2508 params[6] = glc;
2509 params[7] = slc;
2510 } else {
2511 LLVMValueRef lwe = i1false;
2512 params[4] = glc;
2513 params[5] = slc;
2514 params[6] = lwe;
2515 params[7] = da;
2516 }
2517
2518 get_image_intr_name("llvm.amdgcn.image.store",
2519 LLVMTypeOf(params[0]), /* vdata */
2520 LLVMTypeOf(params[1]), /* coords */
2521 LLVMTypeOf(params[2]), /* rsrc */
2522 intrinsic_name, sizeof(intrinsic_name));
2523
2524 ac_emit_llvm_intrinsic(&ctx->ac, intrinsic_name, ctx->voidt,
2525 params, 8, 0);
2526 }
2527
2528 }
2529
2530 static LLVMValueRef visit_image_atomic(struct nir_to_llvm_context *ctx,
2531 nir_intrinsic_instr *instr)
2532 {
2533 LLVMValueRef params[6];
2534 int param_count = 0;
2535 const nir_variable *var = instr->variables[0]->var;
2536 LLVMValueRef i1false = LLVMConstInt(ctx->i1, 0, 0);
2537 LLVMValueRef i1true = LLVMConstInt(ctx->i1, 1, 0);
2538 const char *base_name = "llvm.amdgcn.image.atomic";
2539 const char *atomic_name;
2540 LLVMValueRef coords;
2541 char intrinsic_name[32], coords_type[8];
2542 const struct glsl_type *type = glsl_without_array(var->type);
2543
2544 if (ctx->stage == MESA_SHADER_FRAGMENT)
2545 ctx->shader_info->fs.writes_memory = true;
2546
2547 params[param_count++] = get_src(ctx, instr->src[2]);
2548 if (instr->intrinsic == nir_intrinsic_image_atomic_comp_swap)
2549 params[param_count++] = get_src(ctx, instr->src[3]);
2550
2551 if (glsl_get_sampler_dim(type) == GLSL_SAMPLER_DIM_BUF) {
2552 params[param_count++] = get_sampler_desc(ctx, instr->variables[0], DESC_BUFFER);
2553 coords = params[param_count++] = LLVMBuildExtractElement(ctx->builder, get_src(ctx, instr->src[0]),
2554 LLVMConstInt(ctx->i32, 0, false), ""); /* vindex */
2555 params[param_count++] = ctx->i32zero; /* voffset */
2556 params[param_count++] = i1false; /* glc */
2557 params[param_count++] = i1false; /* slc */
2558 } else {
2559 bool da = glsl_sampler_type_is_array(type) ||
2560 glsl_get_sampler_dim(type) == GLSL_SAMPLER_DIM_CUBE;
2561
2562 coords = params[param_count++] = get_image_coords(ctx, instr, false);
2563 params[param_count++] = get_sampler_desc(ctx, instr->variables[0], DESC_IMAGE);
2564 params[param_count++] = i1false; /* r128 */
2565 params[param_count++] = da ? i1true : i1false; /* da */
2566 params[param_count++] = i1false; /* slc */
2567 }
2568
2569 switch (instr->intrinsic) {
2570 case nir_intrinsic_image_atomic_add:
2571 atomic_name = "add";
2572 break;
2573 case nir_intrinsic_image_atomic_min:
2574 atomic_name = "smin";
2575 break;
2576 case nir_intrinsic_image_atomic_max:
2577 atomic_name = "smax";
2578 break;
2579 case nir_intrinsic_image_atomic_and:
2580 atomic_name = "and";
2581 break;
2582 case nir_intrinsic_image_atomic_or:
2583 atomic_name = "or";
2584 break;
2585 case nir_intrinsic_image_atomic_xor:
2586 atomic_name = "xor";
2587 break;
2588 case nir_intrinsic_image_atomic_exchange:
2589 atomic_name = "swap";
2590 break;
2591 case nir_intrinsic_image_atomic_comp_swap:
2592 atomic_name = "cmpswap";
2593 break;
2594 default:
2595 abort();
2596 }
2597 build_int_type_name(LLVMTypeOf(coords),
2598 coords_type, sizeof(coords_type));
2599
2600 snprintf(intrinsic_name, sizeof(intrinsic_name),
2601 "%s.%s.%s", base_name, atomic_name, coords_type);
2602 return ac_emit_llvm_intrinsic(&ctx->ac, intrinsic_name, ctx->i32, params, param_count, 0);
2603 }
2604
2605 static LLVMValueRef visit_image_size(struct nir_to_llvm_context *ctx,
2606 nir_intrinsic_instr *instr)
2607 {
2608 LLVMValueRef res;
2609 LLVMValueRef params[10];
2610 const nir_variable *var = instr->variables[0]->var;
2611 const struct glsl_type *type = instr->variables[0]->var->type;
2612 bool da = glsl_sampler_type_is_array(var->type) ||
2613 glsl_get_sampler_dim(var->type) == GLSL_SAMPLER_DIM_CUBE;
2614 if(instr->variables[0]->deref.child)
2615 type = instr->variables[0]->deref.child->type;
2616
2617 if (glsl_get_sampler_dim(type) == GLSL_SAMPLER_DIM_BUF)
2618 return get_buffer_size(ctx, get_sampler_desc(ctx, instr->variables[0], DESC_BUFFER), true);
2619 params[0] = ctx->i32zero;
2620 params[1] = get_sampler_desc(ctx, instr->variables[0], DESC_IMAGE);
2621 params[2] = LLVMConstInt(ctx->i32, 15, false);
2622 params[3] = ctx->i32zero;
2623 params[4] = ctx->i32zero;
2624 params[5] = da ? ctx->i32one : ctx->i32zero;
2625 params[6] = ctx->i32zero;
2626 params[7] = ctx->i32zero;
2627 params[8] = ctx->i32zero;
2628 params[9] = ctx->i32zero;
2629
2630 res = ac_emit_llvm_intrinsic(&ctx->ac, "llvm.SI.getresinfo.i32", ctx->v4i32,
2631 params, 10, AC_FUNC_ATTR_READNONE);
2632
2633 if (glsl_get_sampler_dim(type) == GLSL_SAMPLER_DIM_CUBE &&
2634 glsl_sampler_type_is_array(type)) {
2635 LLVMValueRef two = LLVMConstInt(ctx->i32, 2, false);
2636 LLVMValueRef six = LLVMConstInt(ctx->i32, 6, false);
2637 LLVMValueRef z = LLVMBuildExtractElement(ctx->builder, res, two, "");
2638 z = LLVMBuildSDiv(ctx->builder, z, six, "");
2639 res = LLVMBuildInsertElement(ctx->builder, res, z, two, "");
2640 }
2641 return res;
2642 }
2643
2644 static void emit_waitcnt(struct nir_to_llvm_context *ctx)
2645 {
2646 LLVMValueRef args[1] = {
2647 LLVMConstInt(ctx->i32, 0xf70, false),
2648 };
2649 ac_emit_llvm_intrinsic(&ctx->ac, "llvm.amdgcn.s.waitcnt",
2650 ctx->voidt, args, 1, 0);
2651 }
2652
2653 static void emit_barrier(struct nir_to_llvm_context *ctx)
2654 {
2655 // TODO tess
2656 ac_emit_llvm_intrinsic(&ctx->ac, "llvm.amdgcn.s.barrier",
2657 ctx->voidt, NULL, 0, 0);
2658 }
2659
2660 static void emit_discard_if(struct nir_to_llvm_context *ctx,
2661 nir_intrinsic_instr *instr)
2662 {
2663 LLVMValueRef cond;
2664 ctx->shader_info->fs.can_discard = true;
2665
2666 cond = LLVMBuildICmp(ctx->builder, LLVMIntNE,
2667 get_src(ctx, instr->src[0]),
2668 ctx->i32zero, "");
2669
2670 cond = LLVMBuildSelect(ctx->builder, cond,
2671 LLVMConstReal(ctx->f32, -1.0f),
2672 ctx->f32zero, "");
2673 ac_emit_llvm_intrinsic(&ctx->ac, "llvm.AMDGPU.kill",
2674 LLVMVoidTypeInContext(ctx->context),
2675 &cond, 1, 0);
2676 }
2677
2678 static LLVMValueRef
2679 visit_load_local_invocation_index(struct nir_to_llvm_context *ctx)
2680 {
2681 LLVMValueRef result;
2682 LLVMValueRef thread_id = get_thread_id(ctx);
2683 result = LLVMBuildAnd(ctx->builder, ctx->tg_size,
2684 LLVMConstInt(ctx->i32, 0xfc0, false), "");
2685
2686 return LLVMBuildAdd(ctx->builder, result, thread_id, "");
2687 }
2688
2689 static LLVMValueRef visit_var_atomic(struct nir_to_llvm_context *ctx,
2690 nir_intrinsic_instr *instr)
2691 {
2692 LLVMValueRef ptr, result;
2693 int idx = instr->variables[0]->var->data.driver_location;
2694 LLVMValueRef src = get_src(ctx, instr->src[0]);
2695 ptr = get_shared_memory_ptr(ctx, idx, ctx->i32);
2696
2697 if (instr->intrinsic == nir_intrinsic_var_atomic_comp_swap) {
2698 LLVMValueRef src1 = get_src(ctx, instr->src[1]);
2699 result = LLVMBuildAtomicCmpXchg(ctx->builder,
2700 ptr, src, src1,
2701 LLVMAtomicOrderingSequentiallyConsistent,
2702 LLVMAtomicOrderingSequentiallyConsistent,
2703 false);
2704 } else {
2705 LLVMAtomicRMWBinOp op;
2706 switch (instr->intrinsic) {
2707 case nir_intrinsic_var_atomic_add:
2708 op = LLVMAtomicRMWBinOpAdd;
2709 break;
2710 case nir_intrinsic_var_atomic_umin:
2711 op = LLVMAtomicRMWBinOpUMin;
2712 break;
2713 case nir_intrinsic_var_atomic_umax:
2714 op = LLVMAtomicRMWBinOpUMax;
2715 break;
2716 case nir_intrinsic_var_atomic_imin:
2717 op = LLVMAtomicRMWBinOpMin;
2718 break;
2719 case nir_intrinsic_var_atomic_imax:
2720 op = LLVMAtomicRMWBinOpMax;
2721 break;
2722 case nir_intrinsic_var_atomic_and:
2723 op = LLVMAtomicRMWBinOpAnd;
2724 break;
2725 case nir_intrinsic_var_atomic_or:
2726 op = LLVMAtomicRMWBinOpOr;
2727 break;
2728 case nir_intrinsic_var_atomic_xor:
2729 op = LLVMAtomicRMWBinOpXor;
2730 break;
2731 case nir_intrinsic_var_atomic_exchange:
2732 op = LLVMAtomicRMWBinOpXchg;
2733 break;
2734 default:
2735 return NULL;
2736 }
2737
2738 result = LLVMBuildAtomicRMW(ctx->builder, op, ptr, to_integer(ctx, src),
2739 LLVMAtomicOrderingSequentiallyConsistent,
2740 false);
2741 }
2742 return result;
2743 }
2744
2745 #define INTERP_CENTER 0
2746 #define INTERP_CENTROID 1
2747 #define INTERP_SAMPLE 2
2748
2749 static LLVMValueRef lookup_interp_param(struct nir_to_llvm_context *ctx,
2750 enum glsl_interp_mode interp, unsigned location)
2751 {
2752 switch (interp) {
2753 case INTERP_MODE_FLAT:
2754 default:
2755 return NULL;
2756 case INTERP_MODE_SMOOTH:
2757 case INTERP_MODE_NONE:
2758 if (location == INTERP_CENTER)
2759 return ctx->persp_center;
2760 else if (location == INTERP_CENTROID)
2761 return ctx->persp_centroid;
2762 else if (location == INTERP_SAMPLE)
2763 return ctx->persp_sample;
2764 break;
2765 case INTERP_MODE_NOPERSPECTIVE:
2766 if (location == INTERP_CENTER)
2767 return ctx->linear_center;
2768 else if (location == INTERP_CENTROID)
2769 return ctx->linear_centroid;
2770 else if (location == INTERP_SAMPLE)
2771 return ctx->linear_sample;
2772 break;
2773 }
2774 return NULL;
2775 }
2776
2777 static LLVMValueRef load_sample_position(struct nir_to_llvm_context *ctx,
2778 LLVMValueRef sample_id)
2779 {
2780 /* offset = sample_id * 8 (8 = 2 floats containing samplepos.xy) */
2781 LLVMValueRef offset0 = LLVMBuildMul(ctx->builder, sample_id, LLVMConstInt(ctx->i32, 8, false), "");
2782 LLVMValueRef offset1 = LLVMBuildAdd(ctx->builder, offset0, LLVMConstInt(ctx->i32, 4, false), "");
2783 LLVMValueRef result[2];
2784
2785 result[0] = build_indexed_load_const(ctx, ctx->sample_positions, offset0);
2786 result[1] = build_indexed_load_const(ctx, ctx->sample_positions, offset1);
2787
2788 return build_gather_values(ctx, result, 2);
2789 }
2790
2791 static LLVMValueRef load_sample_pos(struct nir_to_llvm_context *ctx)
2792 {
2793 LLVMValueRef values[2];
2794
2795 values[0] = emit_ffract(ctx, ctx->frag_pos[0]);
2796 values[1] = emit_ffract(ctx, ctx->frag_pos[1]);
2797 return build_gather_values(ctx, values, 2);
2798 }
2799
2800 static LLVMValueRef visit_interp(struct nir_to_llvm_context *ctx,
2801 nir_intrinsic_instr *instr)
2802 {
2803 LLVMValueRef result[2];
2804 LLVMValueRef interp_param, attr_number;
2805 unsigned location;
2806 unsigned chan;
2807 LLVMValueRef src_c0, src_c1;
2808 const char *intr_name;
2809 LLVMValueRef src0;
2810 int input_index = instr->variables[0]->var->data.location - VARYING_SLOT_VAR0;
2811 switch (instr->intrinsic) {
2812 case nir_intrinsic_interp_var_at_centroid:
2813 location = INTERP_CENTROID;
2814 break;
2815 case nir_intrinsic_interp_var_at_sample:
2816 case nir_intrinsic_interp_var_at_offset:
2817 location = INTERP_SAMPLE;
2818 src0 = get_src(ctx, instr->src[0]);
2819 break;
2820 default:
2821 break;
2822 }
2823
2824 if (instr->intrinsic == nir_intrinsic_interp_var_at_offset) {
2825 src_c0 = to_float(ctx, LLVMBuildExtractElement(ctx->builder, src0, ctx->i32zero, ""));
2826 src_c1 = to_float(ctx, LLVMBuildExtractElement(ctx->builder, src0, ctx->i32one, ""));
2827 } else if (instr->intrinsic == nir_intrinsic_interp_var_at_sample) {
2828 LLVMValueRef sample_position;
2829 LLVMValueRef halfval = LLVMConstReal(ctx->f32, 0.5f);
2830
2831 /* fetch sample ID */
2832 sample_position = load_sample_position(ctx, src0);
2833
2834 src_c0 = LLVMBuildExtractElement(ctx->builder, sample_position, ctx->i32zero, "");
2835 src_c0 = LLVMBuildFSub(ctx->builder, src_c0, halfval, "");
2836 src_c1 = LLVMBuildExtractElement(ctx->builder, sample_position, ctx->i32one, "");
2837 src_c1 = LLVMBuildFSub(ctx->builder, src_c1, halfval, "");
2838 }
2839 interp_param = lookup_interp_param(ctx, instr->variables[0]->var->data.interpolation, location);
2840 attr_number = LLVMConstInt(ctx->i32, input_index, false);
2841
2842 if (location == INTERP_SAMPLE) {
2843 LLVMValueRef ij_out[2];
2844 LLVMValueRef ddxy_out = emit_ddxy_interp(ctx, interp_param);
2845
2846 /*
2847 * take the I then J parameters, and the DDX/Y for it, and
2848 * calculate the IJ inputs for the interpolator.
2849 * temp1 = ddx * offset/sample.x + I;
2850 * interp_param.I = ddy * offset/sample.y + temp1;
2851 * temp1 = ddx * offset/sample.x + J;
2852 * interp_param.J = ddy * offset/sample.y + temp1;
2853 */
2854 for (unsigned i = 0; i < 2; i++) {
2855 LLVMValueRef ix_ll = LLVMConstInt(ctx->i32, i, false);
2856 LLVMValueRef iy_ll = LLVMConstInt(ctx->i32, i + 2, false);
2857 LLVMValueRef ddx_el = LLVMBuildExtractElement(ctx->builder,
2858 ddxy_out, ix_ll, "");
2859 LLVMValueRef ddy_el = LLVMBuildExtractElement(ctx->builder,
2860 ddxy_out, iy_ll, "");
2861 LLVMValueRef interp_el = LLVMBuildExtractElement(ctx->builder,
2862 interp_param, ix_ll, "");
2863 LLVMValueRef temp1, temp2;
2864
2865 interp_el = LLVMBuildBitCast(ctx->builder, interp_el,
2866 ctx->f32, "");
2867
2868 temp1 = LLVMBuildFMul(ctx->builder, ddx_el, src_c0, "");
2869 temp1 = LLVMBuildFAdd(ctx->builder, temp1, interp_el, "");
2870
2871 temp2 = LLVMBuildFMul(ctx->builder, ddy_el, src_c1, "");
2872 temp2 = LLVMBuildFAdd(ctx->builder, temp2, temp1, "");
2873
2874 ij_out[i] = LLVMBuildBitCast(ctx->builder,
2875 temp2, ctx->i32, "");
2876 }
2877 interp_param = build_gather_values(ctx, ij_out, 2);
2878
2879 }
2880 intr_name = interp_param ? "llvm.SI.fs.interp" : "llvm.SI.fs.constant";
2881 for (chan = 0; chan < 2; chan++) {
2882 LLVMValueRef args[4];
2883 LLVMValueRef llvm_chan = LLVMConstInt(ctx->i32, chan, false);
2884
2885 args[0] = llvm_chan;
2886 args[1] = attr_number;
2887 args[2] = ctx->prim_mask;
2888 args[3] = interp_param;
2889 result[chan] = ac_emit_llvm_intrinsic(&ctx->ac, intr_name,
2890 ctx->f32, args, args[3] ? 4 : 3,
2891 AC_FUNC_ATTR_READNONE);
2892 }
2893 return build_gather_values(ctx, result, 2);
2894 }
2895
2896 static void visit_intrinsic(struct nir_to_llvm_context *ctx,
2897 nir_intrinsic_instr *instr)
2898 {
2899 LLVMValueRef result = NULL;
2900
2901 switch (instr->intrinsic) {
2902 case nir_intrinsic_load_work_group_id: {
2903 result = ctx->workgroup_ids;
2904 break;
2905 }
2906 case nir_intrinsic_load_base_vertex: {
2907 result = ctx->base_vertex;
2908 break;
2909 }
2910 case nir_intrinsic_load_vertex_id_zero_base: {
2911 result = ctx->vertex_id;
2912 break;
2913 }
2914 case nir_intrinsic_load_local_invocation_id: {
2915 result = ctx->local_invocation_ids;
2916 break;
2917 }
2918 case nir_intrinsic_load_base_instance:
2919 result = ctx->start_instance;
2920 break;
2921 case nir_intrinsic_load_sample_id:
2922 ctx->shader_info->fs.force_persample = true;
2923 result = unpack_param(ctx, ctx->ancillary, 8, 4);
2924 break;
2925 case nir_intrinsic_load_sample_pos:
2926 ctx->shader_info->fs.force_persample = true;
2927 result = load_sample_pos(ctx);
2928 break;
2929 case nir_intrinsic_load_front_face:
2930 result = ctx->front_face;
2931 break;
2932 case nir_intrinsic_load_instance_id:
2933 result = ctx->instance_id;
2934 ctx->shader_info->vs.vgpr_comp_cnt = MAX2(3,
2935 ctx->shader_info->vs.vgpr_comp_cnt);
2936 break;
2937 case nir_intrinsic_load_num_work_groups:
2938 result = ctx->num_work_groups;
2939 break;
2940 case nir_intrinsic_load_local_invocation_index:
2941 result = visit_load_local_invocation_index(ctx);
2942 break;
2943 case nir_intrinsic_load_push_constant:
2944 result = visit_load_push_constant(ctx, instr);
2945 break;
2946 case nir_intrinsic_vulkan_resource_index:
2947 result = visit_vulkan_resource_index(ctx, instr);
2948 break;
2949 case nir_intrinsic_store_ssbo:
2950 visit_store_ssbo(ctx, instr);
2951 break;
2952 case nir_intrinsic_load_ssbo:
2953 result = visit_load_buffer(ctx, instr);
2954 break;
2955 case nir_intrinsic_ssbo_atomic_add:
2956 case nir_intrinsic_ssbo_atomic_imin:
2957 case nir_intrinsic_ssbo_atomic_umin:
2958 case nir_intrinsic_ssbo_atomic_imax:
2959 case nir_intrinsic_ssbo_atomic_umax:
2960 case nir_intrinsic_ssbo_atomic_and:
2961 case nir_intrinsic_ssbo_atomic_or:
2962 case nir_intrinsic_ssbo_atomic_xor:
2963 case nir_intrinsic_ssbo_atomic_exchange:
2964 case nir_intrinsic_ssbo_atomic_comp_swap:
2965 result = visit_atomic_ssbo(ctx, instr);
2966 break;
2967 case nir_intrinsic_load_ubo:
2968 result = visit_load_ubo_buffer(ctx, instr);
2969 break;
2970 case nir_intrinsic_get_buffer_size:
2971 result = visit_get_buffer_size(ctx, instr);
2972 break;
2973 case nir_intrinsic_load_var:
2974 result = visit_load_var(ctx, instr);
2975 break;
2976 case nir_intrinsic_store_var:
2977 visit_store_var(ctx, instr);
2978 break;
2979 case nir_intrinsic_image_load:
2980 result = visit_image_load(ctx, instr);
2981 break;
2982 case nir_intrinsic_image_store:
2983 visit_image_store(ctx, instr);
2984 break;
2985 case nir_intrinsic_image_atomic_add:
2986 case nir_intrinsic_image_atomic_min:
2987 case nir_intrinsic_image_atomic_max:
2988 case nir_intrinsic_image_atomic_and:
2989 case nir_intrinsic_image_atomic_or:
2990 case nir_intrinsic_image_atomic_xor:
2991 case nir_intrinsic_image_atomic_exchange:
2992 case nir_intrinsic_image_atomic_comp_swap:
2993 result = visit_image_atomic(ctx, instr);
2994 break;
2995 case nir_intrinsic_image_size:
2996 result = visit_image_size(ctx, instr);
2997 break;
2998 case nir_intrinsic_discard:
2999 ctx->shader_info->fs.can_discard = true;
3000 ac_emit_llvm_intrinsic(&ctx->ac, "llvm.AMDGPU.kilp",
3001 LLVMVoidTypeInContext(ctx->context),
3002 NULL, 0, 0);
3003 break;
3004 case nir_intrinsic_discard_if:
3005 emit_discard_if(ctx, instr);
3006 break;
3007 case nir_intrinsic_memory_barrier:
3008 emit_waitcnt(ctx);
3009 break;
3010 case nir_intrinsic_barrier:
3011 emit_barrier(ctx);
3012 break;
3013 case nir_intrinsic_var_atomic_add:
3014 case nir_intrinsic_var_atomic_imin:
3015 case nir_intrinsic_var_atomic_umin:
3016 case nir_intrinsic_var_atomic_imax:
3017 case nir_intrinsic_var_atomic_umax:
3018 case nir_intrinsic_var_atomic_and:
3019 case nir_intrinsic_var_atomic_or:
3020 case nir_intrinsic_var_atomic_xor:
3021 case nir_intrinsic_var_atomic_exchange:
3022 case nir_intrinsic_var_atomic_comp_swap:
3023 result = visit_var_atomic(ctx, instr);
3024 break;
3025 case nir_intrinsic_interp_var_at_centroid:
3026 case nir_intrinsic_interp_var_at_sample:
3027 case nir_intrinsic_interp_var_at_offset:
3028 result = visit_interp(ctx, instr);
3029 break;
3030 default:
3031 fprintf(stderr, "Unknown intrinsic: ");
3032 nir_print_instr(&instr->instr, stderr);
3033 fprintf(stderr, "\n");
3034 break;
3035 }
3036 if (result) {
3037 _mesa_hash_table_insert(ctx->defs, &instr->dest.ssa, result);
3038 }
3039 }
3040
3041 static LLVMValueRef get_sampler_desc(struct nir_to_llvm_context *ctx,
3042 nir_deref_var *deref,
3043 enum desc_type desc_type)
3044 {
3045 unsigned desc_set = deref->var->data.descriptor_set;
3046 LLVMValueRef list = ctx->descriptor_sets[desc_set];
3047 struct radv_descriptor_set_layout *layout = ctx->options->layout->set[desc_set].layout;
3048 struct radv_descriptor_set_binding_layout *binding = layout->binding + deref->var->data.binding;
3049 unsigned offset = binding->offset;
3050 unsigned stride = binding->size;
3051 unsigned type_size;
3052 LLVMBuilderRef builder = ctx->builder;
3053 LLVMTypeRef type;
3054 LLVMValueRef index = NULL;
3055
3056 assert(deref->var->data.binding < layout->binding_count);
3057
3058 switch (desc_type) {
3059 case DESC_IMAGE:
3060 type = ctx->v8i32;
3061 type_size = 32;
3062 break;
3063 case DESC_FMASK:
3064 type = ctx->v8i32;
3065 offset += 32;
3066 type_size = 32;
3067 break;
3068 case DESC_SAMPLER:
3069 type = ctx->v4i32;
3070 if (binding->type == VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER)
3071 offset += 64;
3072
3073 type_size = 16;
3074 break;
3075 case DESC_BUFFER:
3076 type = ctx->v4i32;
3077 type_size = 16;
3078 break;
3079 default:
3080 unreachable("invalid desc_type\n");
3081 }
3082
3083 if (deref->deref.child) {
3084 nir_deref_array *child = (nir_deref_array*)deref->deref.child;
3085
3086 assert(child->deref_array_type != nir_deref_array_type_wildcard);
3087 offset += child->base_offset * stride;
3088 if (child->deref_array_type == nir_deref_array_type_indirect) {
3089 index = get_src(ctx, child->indirect);
3090 }
3091 }
3092
3093 assert(stride % type_size == 0);
3094
3095 if (!index)
3096 index = ctx->i32zero;
3097
3098 index = LLVMBuildMul(builder, index, LLVMConstInt(ctx->i32, stride / type_size, 0), "");
3099
3100 list = build_gep0(ctx, list, LLVMConstInt(ctx->i32, offset, 0));
3101 list = LLVMBuildPointerCast(builder, list, const_array(type, 0), "");
3102
3103 return build_indexed_load_const(ctx, list, index);
3104 }
3105
3106 static void set_tex_fetch_args(struct nir_to_llvm_context *ctx,
3107 struct ac_tex_info *tinfo,
3108 nir_tex_instr *instr,
3109 nir_texop op,
3110 LLVMValueRef res_ptr, LLVMValueRef samp_ptr,
3111 LLVMValueRef *param, unsigned count,
3112 unsigned dmask)
3113 {
3114 int num_args;
3115 unsigned is_rect = 0;
3116 bool da = instr->is_array || instr->sampler_dim == GLSL_SAMPLER_DIM_CUBE;
3117
3118 if (op == nir_texop_lod)
3119 da = false;
3120 /* Pad to power of two vector */
3121 while (count < util_next_power_of_two(count))
3122 param[count++] = LLVMGetUndef(ctx->i32);
3123
3124 if (count > 1)
3125 tinfo->args[0] = build_gather_values(ctx, param, count);
3126 else
3127 tinfo->args[0] = param[0];
3128
3129 tinfo->args[1] = res_ptr;
3130 num_args = 2;
3131
3132 if (op == nir_texop_txf ||
3133 op == nir_texop_txf_ms ||
3134 op == nir_texop_query_levels ||
3135 op == nir_texop_texture_samples ||
3136 op == nir_texop_txs)
3137 tinfo->dst_type = ctx->v4i32;
3138 else {
3139 tinfo->dst_type = ctx->v4f32;
3140 tinfo->args[num_args++] = samp_ptr;
3141 }
3142
3143 if (instr->sampler_dim == GLSL_SAMPLER_DIM_BUF && op == nir_texop_txf) {
3144 tinfo->args[0] = res_ptr;
3145 tinfo->args[1] = LLVMConstInt(ctx->i32, 0, false);
3146 tinfo->args[2] = param[0];
3147 tinfo->arg_count = 3;
3148 return;
3149 }
3150
3151 tinfo->args[num_args++] = LLVMConstInt(ctx->i32, dmask, 0);
3152 tinfo->args[num_args++] = LLVMConstInt(ctx->i32, is_rect, 0); /* unorm */
3153 tinfo->args[num_args++] = LLVMConstInt(ctx->i32, 0, 0); /* r128 */
3154 tinfo->args[num_args++] = LLVMConstInt(ctx->i32, da ? 1 : 0, 0);
3155 tinfo->args[num_args++] = LLVMConstInt(ctx->i32, 0, 0); /* glc */
3156 tinfo->args[num_args++] = LLVMConstInt(ctx->i32, 0, 0); /* slc */
3157 tinfo->args[num_args++] = LLVMConstInt(ctx->i32, 0, 0); /* tfe */
3158 tinfo->args[num_args++] = LLVMConstInt(ctx->i32, 0, 0); /* lwe */
3159
3160 tinfo->arg_count = num_args;
3161 }
3162
3163 /* Disable anisotropic filtering if BASE_LEVEL == LAST_LEVEL.
3164 *
3165 * SI-CI:
3166 * If BASE_LEVEL == LAST_LEVEL, the shader must disable anisotropic
3167 * filtering manually. The driver sets img7 to a mask clearing
3168 * MAX_ANISO_RATIO if BASE_LEVEL == LAST_LEVEL. The shader must do:
3169 * s_and_b32 samp0, samp0, img7
3170 *
3171 * VI:
3172 * The ANISO_OVERRIDE sampler field enables this fix in TA.
3173 */
3174 static LLVMValueRef sici_fix_sampler_aniso(struct nir_to_llvm_context *ctx,
3175 LLVMValueRef res, LLVMValueRef samp)
3176 {
3177 LLVMBuilderRef builder = ctx->builder;
3178 LLVMValueRef img7, samp0;
3179
3180 if (ctx->options->chip_class >= VI)
3181 return samp;
3182
3183 img7 = LLVMBuildExtractElement(builder, res,
3184 LLVMConstInt(ctx->i32, 7, 0), "");
3185 samp0 = LLVMBuildExtractElement(builder, samp,
3186 LLVMConstInt(ctx->i32, 0, 0), "");
3187 samp0 = LLVMBuildAnd(builder, samp0, img7, "");
3188 return LLVMBuildInsertElement(builder, samp, samp0,
3189 LLVMConstInt(ctx->i32, 0, 0), "");
3190 }
3191
3192 static void tex_fetch_ptrs(struct nir_to_llvm_context *ctx,
3193 nir_tex_instr *instr,
3194 LLVMValueRef *res_ptr, LLVMValueRef *samp_ptr,
3195 LLVMValueRef *fmask_ptr)
3196 {
3197 if (instr->sampler_dim == GLSL_SAMPLER_DIM_BUF)
3198 *res_ptr = get_sampler_desc(ctx, instr->texture, DESC_BUFFER);
3199 else
3200 *res_ptr = get_sampler_desc(ctx, instr->texture, DESC_IMAGE);
3201 if (samp_ptr) {
3202 if (instr->sampler)
3203 *samp_ptr = get_sampler_desc(ctx, instr->sampler, DESC_SAMPLER);
3204 else
3205 *samp_ptr = get_sampler_desc(ctx, instr->texture, DESC_SAMPLER);
3206 if (instr->sampler_dim < GLSL_SAMPLER_DIM_RECT)
3207 *samp_ptr = sici_fix_sampler_aniso(ctx, *res_ptr, *samp_ptr);
3208 }
3209 if (fmask_ptr && !instr->sampler && (instr->op == nir_texop_txf_ms ||
3210 instr->op == nir_texop_samples_identical))
3211 *fmask_ptr = get_sampler_desc(ctx, instr->texture, DESC_FMASK);
3212 }
3213
3214 static void visit_tex(struct nir_to_llvm_context *ctx, nir_tex_instr *instr)
3215 {
3216 LLVMValueRef result = NULL;
3217 struct ac_tex_info tinfo = { 0 };
3218 unsigned dmask = 0xf;
3219 LLVMValueRef address[16];
3220 LLVMValueRef coords[5];
3221 LLVMValueRef coord = NULL, lod = NULL, comparator = NULL;
3222 LLVMValueRef bias = NULL, offsets = NULL;
3223 LLVMValueRef res_ptr, samp_ptr, fmask_ptr = NULL, sample_index = NULL;
3224 LLVMValueRef ddx = NULL, ddy = NULL;
3225 LLVMValueRef derivs[6];
3226 unsigned chan, count = 0;
3227 unsigned const_src = 0, num_deriv_comp = 0;
3228
3229 tex_fetch_ptrs(ctx, instr, &res_ptr, &samp_ptr, &fmask_ptr);
3230
3231 for (unsigned i = 0; i < instr->num_srcs; i++) {
3232 switch (instr->src[i].src_type) {
3233 case nir_tex_src_coord:
3234 coord = get_src(ctx, instr->src[i].src);
3235 break;
3236 case nir_tex_src_projector:
3237 break;
3238 case nir_tex_src_comparator:
3239 comparator = get_src(ctx, instr->src[i].src);
3240 break;
3241 case nir_tex_src_offset:
3242 offsets = get_src(ctx, instr->src[i].src);
3243 const_src = i;
3244 break;
3245 case nir_tex_src_bias:
3246 bias = get_src(ctx, instr->src[i].src);
3247 break;
3248 case nir_tex_src_lod:
3249 lod = get_src(ctx, instr->src[i].src);
3250 break;
3251 case nir_tex_src_ms_index:
3252 sample_index = get_src(ctx, instr->src[i].src);
3253 break;
3254 case nir_tex_src_ms_mcs:
3255 break;
3256 case nir_tex_src_ddx:
3257 ddx = get_src(ctx, instr->src[i].src);
3258 num_deriv_comp = instr->src[i].src.ssa->num_components;
3259 break;
3260 case nir_tex_src_ddy:
3261 ddy = get_src(ctx, instr->src[i].src);
3262 break;
3263 case nir_tex_src_texture_offset:
3264 case nir_tex_src_sampler_offset:
3265 case nir_tex_src_plane:
3266 default:
3267 break;
3268 }
3269 }
3270
3271 if (instr->op == nir_texop_texture_samples) {
3272 LLVMValueRef res, samples, is_msaa;
3273 res = LLVMBuildBitCast(ctx->builder, res_ptr, ctx->v8i32, "");
3274 samples = LLVMBuildExtractElement(ctx->builder, res,
3275 LLVMConstInt(ctx->i32, 3, false), "");
3276 is_msaa = LLVMBuildLShr(ctx->builder, samples,
3277 LLVMConstInt(ctx->i32, 28, false), "");
3278 is_msaa = LLVMBuildAnd(ctx->builder, is_msaa,
3279 LLVMConstInt(ctx->i32, 0xe, false), "");
3280 is_msaa = LLVMBuildICmp(ctx->builder, LLVMIntEQ, is_msaa,
3281 LLVMConstInt(ctx->i32, 0xe, false), "");
3282
3283 samples = LLVMBuildLShr(ctx->builder, samples,
3284 LLVMConstInt(ctx->i32, 16, false), "");
3285 samples = LLVMBuildAnd(ctx->builder, samples,
3286 LLVMConstInt(ctx->i32, 0xf, false), "");
3287 samples = LLVMBuildShl(ctx->builder, ctx->i32one,
3288 samples, "");
3289 samples = LLVMBuildSelect(ctx->builder, is_msaa, samples,
3290 ctx->i32one, "");
3291 result = samples;
3292 goto write_result;
3293 }
3294
3295 if (coord)
3296 for (chan = 0; chan < instr->coord_components; chan++)
3297 coords[chan] = llvm_extract_elem(ctx, coord, chan);
3298
3299 if (offsets && instr->op != nir_texop_txf) {
3300 LLVMValueRef offset[3], pack;
3301 for (chan = 0; chan < 3; ++chan)
3302 offset[chan] = ctx->i32zero;
3303
3304 tinfo.has_offset = true;
3305 for (chan = 0; chan < get_llvm_num_components(offsets); chan++) {
3306 offset[chan] = llvm_extract_elem(ctx, offsets, chan);
3307 offset[chan] = LLVMBuildAnd(ctx->builder, offset[chan],
3308 LLVMConstInt(ctx->i32, 0x3f, false), "");
3309 if (chan)
3310 offset[chan] = LLVMBuildShl(ctx->builder, offset[chan],
3311 LLVMConstInt(ctx->i32, chan * 8, false), "");
3312 }
3313 pack = LLVMBuildOr(ctx->builder, offset[0], offset[1], "");
3314 pack = LLVMBuildOr(ctx->builder, pack, offset[2], "");
3315 address[count++] = pack;
3316
3317 }
3318 /* pack LOD bias value */
3319 if (instr->op == nir_texop_txb && bias) {
3320 address[count++] = bias;
3321 }
3322
3323 /* Pack depth comparison value */
3324 if (instr->is_shadow && comparator) {
3325 address[count++] = llvm_extract_elem(ctx, comparator, 0);
3326 }
3327
3328 /* pack derivatives */
3329 if (ddx || ddy) {
3330 switch (instr->sampler_dim) {
3331 case GLSL_SAMPLER_DIM_3D:
3332 case GLSL_SAMPLER_DIM_CUBE:
3333 num_deriv_comp = 3;
3334 break;
3335 case GLSL_SAMPLER_DIM_2D:
3336 default:
3337 num_deriv_comp = 2;
3338 break;
3339 case GLSL_SAMPLER_DIM_1D:
3340 num_deriv_comp = 1;
3341 break;
3342 }
3343
3344 for (unsigned i = 0; i < num_deriv_comp; i++) {
3345 derivs[i * 2] = to_float(ctx, llvm_extract_elem(ctx, ddx, i));
3346 derivs[i * 2 + 1] = to_float(ctx, llvm_extract_elem(ctx, ddy, i));
3347 }
3348 }
3349
3350 if (instr->sampler_dim == GLSL_SAMPLER_DIM_CUBE && coord) {
3351 for (chan = 0; chan < instr->coord_components; chan++)
3352 coords[chan] = to_float(ctx, coords[chan]);
3353 if (instr->coord_components == 3)
3354 coords[3] = LLVMGetUndef(ctx->f32);
3355 ac_prepare_cube_coords(&ctx->ac,
3356 instr->op == nir_texop_txd, instr->is_array,
3357 coords, derivs);
3358 if (num_deriv_comp)
3359 num_deriv_comp--;
3360 }
3361
3362 if (ddx || ddy) {
3363 for (unsigned i = 0; i < num_deriv_comp * 2; i++)
3364 address[count++] = derivs[i];
3365 }
3366
3367 /* Pack texture coordinates */
3368 if (coord) {
3369 address[count++] = coords[0];
3370 if (instr->coord_components > 1)
3371 address[count++] = coords[1];
3372 if (instr->coord_components > 2) {
3373 /* This seems like a bit of a hack - but it passes Vulkan CTS with it */
3374 if (instr->sampler_dim != GLSL_SAMPLER_DIM_3D && instr->op != nir_texop_txf) {
3375 coords[2] = to_float(ctx, coords[2]);
3376 coords[2] = ac_emit_llvm_intrinsic(&ctx->ac, "llvm.rint.f32", ctx->f32, &coords[2],
3377 1, 0);
3378 coords[2] = to_integer(ctx, coords[2]);
3379 }
3380 address[count++] = coords[2];
3381 }
3382 }
3383
3384 /* Pack LOD */
3385 if ((instr->op == nir_texop_txl || instr->op == nir_texop_txf) && lod) {
3386 address[count++] = lod;
3387 } else if (instr->op == nir_texop_txf_ms && sample_index) {
3388 address[count++] = sample_index;
3389 } else if(instr->op == nir_texop_txs) {
3390 count = 0;
3391 if (lod)
3392 address[count++] = lod;
3393 else
3394 address[count++] = ctx->i32zero;
3395 }
3396
3397 for (chan = 0; chan < count; chan++) {
3398 address[chan] = LLVMBuildBitCast(ctx->builder,
3399 address[chan], ctx->i32, "");
3400 }
3401
3402 if (instr->op == nir_texop_samples_identical) {
3403 LLVMValueRef txf_address[4];
3404 struct ac_tex_info txf_info = { 0 };
3405 unsigned txf_count = count;
3406 memcpy(txf_address, address, sizeof(txf_address));
3407
3408 if (!instr->is_array)
3409 txf_address[2] = ctx->i32zero;
3410 txf_address[3] = ctx->i32zero;
3411
3412 set_tex_fetch_args(ctx, &txf_info, instr, nir_texop_txf,
3413 fmask_ptr, NULL,
3414 txf_address, txf_count, 0xf);
3415
3416 result = build_tex_intrinsic(ctx, instr, &txf_info);
3417
3418 result = LLVMBuildExtractElement(ctx->builder, result, ctx->i32zero, "");
3419 result = emit_int_cmp(ctx, LLVMIntEQ, result, ctx->i32zero);
3420 goto write_result;
3421 }
3422
3423 /* Adjust the sample index according to FMASK.
3424 *
3425 * For uncompressed MSAA surfaces, FMASK should return 0x76543210,
3426 * which is the identity mapping. Each nibble says which physical sample
3427 * should be fetched to get that sample.
3428 *
3429 * For example, 0x11111100 means there are only 2 samples stored and
3430 * the second sample covers 3/4 of the pixel. When reading samples 0
3431 * and 1, return physical sample 0 (determined by the first two 0s
3432 * in FMASK), otherwise return physical sample 1.
3433 *
3434 * The sample index should be adjusted as follows:
3435 * sample_index = (fmask >> (sample_index * 4)) & 0xF;
3436 */
3437 if (instr->sampler_dim == GLSL_SAMPLER_DIM_MS) {
3438 LLVMValueRef txf_address[4];
3439 struct ac_tex_info txf_info = { 0 };
3440 unsigned txf_count = count;
3441 memcpy(txf_address, address, sizeof(txf_address));
3442
3443 if (!instr->is_array)
3444 txf_address[2] = ctx->i32zero;
3445 txf_address[3] = ctx->i32zero;
3446
3447 set_tex_fetch_args(ctx, &txf_info, instr, nir_texop_txf,
3448 fmask_ptr, NULL,
3449 txf_address, txf_count, 0xf);
3450
3451 result = build_tex_intrinsic(ctx, instr, &txf_info);
3452 LLVMValueRef four = LLVMConstInt(ctx->i32, 4, false);
3453 LLVMValueRef F = LLVMConstInt(ctx->i32, 0xf, false);
3454
3455 LLVMValueRef fmask = LLVMBuildExtractElement(ctx->builder,
3456 result,
3457 ctx->i32zero, "");
3458
3459 unsigned sample_chan = instr->is_array ? 3 : 2;
3460
3461 LLVMValueRef sample_index4 =
3462 LLVMBuildMul(ctx->builder, address[sample_chan], four, "");
3463 LLVMValueRef shifted_fmask =
3464 LLVMBuildLShr(ctx->builder, fmask, sample_index4, "");
3465 LLVMValueRef final_sample =
3466 LLVMBuildAnd(ctx->builder, shifted_fmask, F, "");
3467
3468 /* Don't rewrite the sample index if WORD1.DATA_FORMAT of the FMASK
3469 * resource descriptor is 0 (invalid),
3470 */
3471 LLVMValueRef fmask_desc =
3472 LLVMBuildBitCast(ctx->builder, fmask_ptr,
3473 ctx->v8i32, "");
3474
3475 LLVMValueRef fmask_word1 =
3476 LLVMBuildExtractElement(ctx->builder, fmask_desc,
3477 ctx->i32one, "");
3478
3479 LLVMValueRef word1_is_nonzero =
3480 LLVMBuildICmp(ctx->builder, LLVMIntNE,
3481 fmask_word1, ctx->i32zero, "");
3482
3483 /* Replace the MSAA sample index. */
3484 address[sample_chan] =
3485 LLVMBuildSelect(ctx->builder, word1_is_nonzero,
3486 final_sample, address[sample_chan], "");
3487 }
3488
3489 if (offsets && instr->op == nir_texop_txf) {
3490 nir_const_value *const_offset =
3491 nir_src_as_const_value(instr->src[const_src].src);
3492 int num_offsets = instr->src[const_src].src.ssa->num_components;
3493 assert(const_offset);
3494 num_offsets = MIN2(num_offsets, instr->coord_components);
3495 if (num_offsets > 2)
3496 address[2] = LLVMBuildAdd(ctx->builder,
3497 address[2], LLVMConstInt(ctx->i32, const_offset->i32[2], false), "");
3498 if (num_offsets > 1)
3499 address[1] = LLVMBuildAdd(ctx->builder,
3500 address[1], LLVMConstInt(ctx->i32, const_offset->i32[1], false), "");
3501 address[0] = LLVMBuildAdd(ctx->builder,
3502 address[0], LLVMConstInt(ctx->i32, const_offset->i32[0], false), "");
3503
3504 }
3505
3506 /* TODO TG4 support */
3507 if (instr->op == nir_texop_tg4) {
3508 if (instr->is_shadow)
3509 dmask = 1;
3510 else
3511 dmask = 1 << instr->component;
3512 }
3513 set_tex_fetch_args(ctx, &tinfo, instr, instr->op,
3514 res_ptr, samp_ptr, address, count, dmask);
3515
3516 result = build_tex_intrinsic(ctx, instr, &tinfo);
3517
3518 if (instr->op == nir_texop_query_levels)
3519 result = LLVMBuildExtractElement(ctx->builder, result, LLVMConstInt(ctx->i32, 3, false), "");
3520 else if (instr->is_shadow && instr->op != nir_texop_txs && instr->op != nir_texop_lod && instr->op != nir_texop_tg4)
3521 result = LLVMBuildExtractElement(ctx->builder, result, ctx->i32zero, "");
3522 else if (instr->op == nir_texop_txs &&
3523 instr->sampler_dim == GLSL_SAMPLER_DIM_CUBE &&
3524 instr->is_array) {
3525 LLVMValueRef two = LLVMConstInt(ctx->i32, 2, false);
3526 LLVMValueRef six = LLVMConstInt(ctx->i32, 6, false);
3527 LLVMValueRef z = LLVMBuildExtractElement(ctx->builder, result, two, "");
3528 z = LLVMBuildSDiv(ctx->builder, z, six, "");
3529 result = LLVMBuildInsertElement(ctx->builder, result, z, two, "");
3530 } else if (instr->dest.ssa.num_components != 4)
3531 result = trim_vector(ctx, result, instr->dest.ssa.num_components);
3532
3533 write_result:
3534 if (result) {
3535 assert(instr->dest.is_ssa);
3536 result = to_integer(ctx, result);
3537 _mesa_hash_table_insert(ctx->defs, &instr->dest.ssa, result);
3538 }
3539 }
3540
3541
3542 static void visit_phi(struct nir_to_llvm_context *ctx, nir_phi_instr *instr)
3543 {
3544 LLVMTypeRef type = get_def_type(ctx, &instr->dest.ssa);
3545 LLVMValueRef result = LLVMBuildPhi(ctx->builder, type, "");
3546
3547 _mesa_hash_table_insert(ctx->defs, &instr->dest.ssa, result);
3548 _mesa_hash_table_insert(ctx->phis, instr, result);
3549 }
3550
3551 static void visit_post_phi(struct nir_to_llvm_context *ctx,
3552 nir_phi_instr *instr,
3553 LLVMValueRef llvm_phi)
3554 {
3555 nir_foreach_phi_src(src, instr) {
3556 LLVMBasicBlockRef block = get_block(ctx, src->pred);
3557 LLVMValueRef llvm_src = get_src(ctx, src->src);
3558
3559 LLVMAddIncoming(llvm_phi, &llvm_src, &block, 1);
3560 }
3561 }
3562
3563 static void phi_post_pass(struct nir_to_llvm_context *ctx)
3564 {
3565 struct hash_entry *entry;
3566 hash_table_foreach(ctx->phis, entry) {
3567 visit_post_phi(ctx, (nir_phi_instr*)entry->key,
3568 (LLVMValueRef)entry->data);
3569 }
3570 }
3571
3572
3573 static void visit_ssa_undef(struct nir_to_llvm_context *ctx,
3574 nir_ssa_undef_instr *instr)
3575 {
3576 unsigned num_components = instr->def.num_components;
3577 LLVMValueRef undef;
3578
3579 if (num_components == 1)
3580 undef = LLVMGetUndef(ctx->i32);
3581 else {
3582 undef = LLVMGetUndef(LLVMVectorType(ctx->i32, num_components));
3583 }
3584 _mesa_hash_table_insert(ctx->defs, &instr->def, undef);
3585 }
3586
3587 static void visit_jump(struct nir_to_llvm_context *ctx,
3588 nir_jump_instr *instr)
3589 {
3590 switch (instr->type) {
3591 case nir_jump_break:
3592 LLVMBuildBr(ctx->builder, ctx->break_block);
3593 LLVMClearInsertionPosition(ctx->builder);
3594 break;
3595 case nir_jump_continue:
3596 LLVMBuildBr(ctx->builder, ctx->continue_block);
3597 LLVMClearInsertionPosition(ctx->builder);
3598 break;
3599 default:
3600 fprintf(stderr, "Unknown NIR jump instr: ");
3601 nir_print_instr(&instr->instr, stderr);
3602 fprintf(stderr, "\n");
3603 abort();
3604 }
3605 }
3606
3607 static void visit_cf_list(struct nir_to_llvm_context *ctx,
3608 struct exec_list *list);
3609
3610 static void visit_block(struct nir_to_llvm_context *ctx, nir_block *block)
3611 {
3612 LLVMBasicBlockRef llvm_block = LLVMGetInsertBlock(ctx->builder);
3613 nir_foreach_instr(instr, block)
3614 {
3615 switch (instr->type) {
3616 case nir_instr_type_alu:
3617 visit_alu(ctx, nir_instr_as_alu(instr));
3618 break;
3619 case nir_instr_type_load_const:
3620 visit_load_const(ctx, nir_instr_as_load_const(instr));
3621 break;
3622 case nir_instr_type_intrinsic:
3623 visit_intrinsic(ctx, nir_instr_as_intrinsic(instr));
3624 break;
3625 case nir_instr_type_tex:
3626 visit_tex(ctx, nir_instr_as_tex(instr));
3627 break;
3628 case nir_instr_type_phi:
3629 visit_phi(ctx, nir_instr_as_phi(instr));
3630 break;
3631 case nir_instr_type_ssa_undef:
3632 visit_ssa_undef(ctx, nir_instr_as_ssa_undef(instr));
3633 break;
3634 case nir_instr_type_jump:
3635 visit_jump(ctx, nir_instr_as_jump(instr));
3636 break;
3637 default:
3638 fprintf(stderr, "Unknown NIR instr type: ");
3639 nir_print_instr(instr, stderr);
3640 fprintf(stderr, "\n");
3641 abort();
3642 }
3643 }
3644
3645 _mesa_hash_table_insert(ctx->defs, block, llvm_block);
3646 }
3647
3648 static void visit_if(struct nir_to_llvm_context *ctx, nir_if *if_stmt)
3649 {
3650 LLVMValueRef value = get_src(ctx, if_stmt->condition);
3651
3652 LLVMBasicBlockRef merge_block =
3653 LLVMAppendBasicBlockInContext(ctx->context, ctx->main_function, "");
3654 LLVMBasicBlockRef if_block =
3655 LLVMAppendBasicBlockInContext(ctx->context, ctx->main_function, "");
3656 LLVMBasicBlockRef else_block = merge_block;
3657 if (!exec_list_is_empty(&if_stmt->else_list))
3658 else_block = LLVMAppendBasicBlockInContext(
3659 ctx->context, ctx->main_function, "");
3660
3661 LLVMValueRef cond = LLVMBuildICmp(ctx->builder, LLVMIntNE, value,
3662 LLVMConstInt(ctx->i32, 0, false), "");
3663 LLVMBuildCondBr(ctx->builder, cond, if_block, else_block);
3664
3665 LLVMPositionBuilderAtEnd(ctx->builder, if_block);
3666 visit_cf_list(ctx, &if_stmt->then_list);
3667 if (LLVMGetInsertBlock(ctx->builder))
3668 LLVMBuildBr(ctx->builder, merge_block);
3669
3670 if (!exec_list_is_empty(&if_stmt->else_list)) {
3671 LLVMPositionBuilderAtEnd(ctx->builder, else_block);
3672 visit_cf_list(ctx, &if_stmt->else_list);
3673 if (LLVMGetInsertBlock(ctx->builder))
3674 LLVMBuildBr(ctx->builder, merge_block);
3675 }
3676
3677 LLVMPositionBuilderAtEnd(ctx->builder, merge_block);
3678 }
3679
3680 static void visit_loop(struct nir_to_llvm_context *ctx, nir_loop *loop)
3681 {
3682 LLVMBasicBlockRef continue_parent = ctx->continue_block;
3683 LLVMBasicBlockRef break_parent = ctx->break_block;
3684
3685 ctx->continue_block =
3686 LLVMAppendBasicBlockInContext(ctx->context, ctx->main_function, "");
3687 ctx->break_block =
3688 LLVMAppendBasicBlockInContext(ctx->context, ctx->main_function, "");
3689
3690 LLVMBuildBr(ctx->builder, ctx->continue_block);
3691 LLVMPositionBuilderAtEnd(ctx->builder, ctx->continue_block);
3692 visit_cf_list(ctx, &loop->body);
3693
3694 if (LLVMGetInsertBlock(ctx->builder))
3695 LLVMBuildBr(ctx->builder, ctx->continue_block);
3696 LLVMPositionBuilderAtEnd(ctx->builder, ctx->break_block);
3697
3698 ctx->continue_block = continue_parent;
3699 ctx->break_block = break_parent;
3700 }
3701
3702 static void visit_cf_list(struct nir_to_llvm_context *ctx,
3703 struct exec_list *list)
3704 {
3705 foreach_list_typed(nir_cf_node, node, node, list)
3706 {
3707 switch (node->type) {
3708 case nir_cf_node_block:
3709 visit_block(ctx, nir_cf_node_as_block(node));
3710 break;
3711
3712 case nir_cf_node_if:
3713 visit_if(ctx, nir_cf_node_as_if(node));
3714 break;
3715
3716 case nir_cf_node_loop:
3717 visit_loop(ctx, nir_cf_node_as_loop(node));
3718 break;
3719
3720 default:
3721 assert(0);
3722 }
3723 }
3724 }
3725
3726 static void
3727 handle_vs_input_decl(struct nir_to_llvm_context *ctx,
3728 struct nir_variable *variable)
3729 {
3730 LLVMValueRef t_list_ptr = ctx->vertex_buffers;
3731 LLVMValueRef t_offset;
3732 LLVMValueRef t_list;
3733 LLVMValueRef args[3];
3734 LLVMValueRef input;
3735 LLVMValueRef buffer_index;
3736 int index = variable->data.location - VERT_ATTRIB_GENERIC0;
3737 int idx = variable->data.location;
3738 unsigned attrib_count = glsl_count_attribute_slots(variable->type, true);
3739
3740 variable->data.driver_location = idx * 4;
3741
3742 if (ctx->options->key.vs.instance_rate_inputs & (1u << index)) {
3743 buffer_index = LLVMBuildAdd(ctx->builder, ctx->instance_id,
3744 ctx->start_instance, "");
3745 ctx->shader_info->vs.vgpr_comp_cnt = MAX2(3,
3746 ctx->shader_info->vs.vgpr_comp_cnt);
3747 } else
3748 buffer_index = LLVMBuildAdd(ctx->builder, ctx->vertex_id,
3749 ctx->base_vertex, "");
3750
3751 for (unsigned i = 0; i < attrib_count; ++i, ++idx) {
3752 t_offset = LLVMConstInt(ctx->i32, index + i, false);
3753
3754 t_list = build_indexed_load_const(ctx, t_list_ptr, t_offset);
3755 args[0] = t_list;
3756 args[1] = LLVMConstInt(ctx->i32, 0, false);
3757 args[2] = buffer_index;
3758 input = ac_emit_llvm_intrinsic(&ctx->ac,
3759 "llvm.SI.vs.load.input", ctx->v4f32, args, 3,
3760 AC_FUNC_ATTR_READNONE | AC_FUNC_ATTR_NOUNWIND);
3761
3762 for (unsigned chan = 0; chan < 4; chan++) {
3763 LLVMValueRef llvm_chan = LLVMConstInt(ctx->i32, chan, false);
3764 ctx->inputs[radeon_llvm_reg_index_soa(idx, chan)] =
3765 to_integer(ctx, LLVMBuildExtractElement(ctx->builder,
3766 input, llvm_chan, ""));
3767 }
3768 }
3769 }
3770
3771
3772 static void interp_fs_input(struct nir_to_llvm_context *ctx,
3773 unsigned attr,
3774 LLVMValueRef interp_param,
3775 LLVMValueRef prim_mask,
3776 LLVMValueRef result[4])
3777 {
3778 const char *intr_name;
3779 LLVMValueRef attr_number;
3780 unsigned chan;
3781
3782 attr_number = LLVMConstInt(ctx->i32, attr, false);
3783
3784 /* fs.constant returns the param from the middle vertex, so it's not
3785 * really useful for flat shading. It's meant to be used for custom
3786 * interpolation (but the intrinsic can't fetch from the other two
3787 * vertices).
3788 *
3789 * Luckily, it doesn't matter, because we rely on the FLAT_SHADE state
3790 * to do the right thing. The only reason we use fs.constant is that
3791 * fs.interp cannot be used on integers, because they can be equal
3792 * to NaN.
3793 */
3794 intr_name = interp_param ? "llvm.SI.fs.interp" : "llvm.SI.fs.constant";
3795
3796 for (chan = 0; chan < 4; chan++) {
3797 LLVMValueRef args[4];
3798 LLVMValueRef llvm_chan = LLVMConstInt(ctx->i32, chan, false);
3799
3800 args[0] = llvm_chan;
3801 args[1] = attr_number;
3802 args[2] = prim_mask;
3803 args[3] = interp_param;
3804 result[chan] = ac_emit_llvm_intrinsic(&ctx->ac, intr_name,
3805 ctx->f32, args, args[3] ? 4 : 3,
3806 AC_FUNC_ATTR_READNONE | AC_FUNC_ATTR_NOUNWIND);
3807 }
3808 }
3809
3810 static void
3811 handle_fs_input_decl(struct nir_to_llvm_context *ctx,
3812 struct nir_variable *variable)
3813 {
3814 int idx = variable->data.location;
3815 unsigned attrib_count = glsl_count_attribute_slots(variable->type, false);
3816 LLVMValueRef interp;
3817
3818 variable->data.driver_location = idx * 4;
3819 ctx->input_mask |= ((1ull << attrib_count) - 1) << variable->data.location;
3820
3821 if (glsl_get_base_type(glsl_without_array(variable->type)) == GLSL_TYPE_FLOAT) {
3822 unsigned interp_type;
3823 if (variable->data.sample) {
3824 interp_type = INTERP_SAMPLE;
3825 ctx->shader_info->fs.force_persample = true;
3826 } else if (variable->data.centroid)
3827 interp_type = INTERP_CENTROID;
3828 else
3829 interp_type = INTERP_CENTER;
3830
3831 interp = lookup_interp_param(ctx, variable->data.interpolation, interp_type);
3832 } else
3833 interp = NULL;
3834
3835 for (unsigned i = 0; i < attrib_count; ++i)
3836 ctx->inputs[radeon_llvm_reg_index_soa(idx + i, 0)] = interp;
3837
3838 }
3839
3840 static void
3841 handle_shader_input_decl(struct nir_to_llvm_context *ctx,
3842 struct nir_variable *variable)
3843 {
3844 switch (ctx->stage) {
3845 case MESA_SHADER_VERTEX:
3846 handle_vs_input_decl(ctx, variable);
3847 break;
3848 case MESA_SHADER_FRAGMENT:
3849 handle_fs_input_decl(ctx, variable);
3850 break;
3851 default:
3852 break;
3853 }
3854
3855 }
3856
3857 static void
3858 handle_fs_inputs_pre(struct nir_to_llvm_context *ctx,
3859 struct nir_shader *nir)
3860 {
3861 unsigned index = 0;
3862 for (unsigned i = 0; i < RADEON_LLVM_MAX_INPUTS; ++i) {
3863 LLVMValueRef interp_param;
3864 LLVMValueRef *inputs = ctx->inputs +radeon_llvm_reg_index_soa(i, 0);
3865
3866 if (!(ctx->input_mask & (1ull << i)))
3867 continue;
3868
3869 if (i >= VARYING_SLOT_VAR0 || i == VARYING_SLOT_PNTC) {
3870 interp_param = *inputs;
3871 interp_fs_input(ctx, index, interp_param, ctx->prim_mask,
3872 inputs);
3873
3874 if (!interp_param)
3875 ctx->shader_info->fs.flat_shaded_mask |= 1u << index;
3876 ++index;
3877 } else if (i == VARYING_SLOT_POS) {
3878 for(int i = 0; i < 3; ++i)
3879 inputs[i] = ctx->frag_pos[i];
3880
3881 inputs[3] = emit_fdiv(ctx, ctx->f32one, ctx->frag_pos[3]);
3882 }
3883 }
3884 ctx->shader_info->fs.num_interp = index;
3885 if (ctx->input_mask & (1 << VARYING_SLOT_PNTC))
3886 ctx->shader_info->fs.has_pcoord = true;
3887 ctx->shader_info->fs.input_mask = ctx->input_mask >> VARYING_SLOT_VAR0;
3888 }
3889
3890 static LLVMValueRef
3891 ac_build_alloca(struct nir_to_llvm_context *ctx,
3892 LLVMTypeRef type,
3893 const char *name)
3894 {
3895 LLVMBuilderRef builder = ctx->builder;
3896 LLVMBasicBlockRef current_block = LLVMGetInsertBlock(builder);
3897 LLVMValueRef function = LLVMGetBasicBlockParent(current_block);
3898 LLVMBasicBlockRef first_block = LLVMGetEntryBasicBlock(function);
3899 LLVMValueRef first_instr = LLVMGetFirstInstruction(first_block);
3900 LLVMBuilderRef first_builder = LLVMCreateBuilderInContext(ctx->context);
3901 LLVMValueRef res;
3902
3903 if (first_instr) {
3904 LLVMPositionBuilderBefore(first_builder, first_instr);
3905 } else {
3906 LLVMPositionBuilderAtEnd(first_builder, first_block);
3907 }
3908
3909 res = LLVMBuildAlloca(first_builder, type, name);
3910 LLVMBuildStore(builder, LLVMConstNull(type), res);
3911
3912 LLVMDisposeBuilder(first_builder);
3913
3914 return res;
3915 }
3916
3917 static LLVMValueRef si_build_alloca_undef(struct nir_to_llvm_context *ctx,
3918 LLVMTypeRef type,
3919 const char *name)
3920 {
3921 LLVMValueRef ptr = ac_build_alloca(ctx, type, name);
3922 LLVMBuildStore(ctx->builder, LLVMGetUndef(type), ptr);
3923 return ptr;
3924 }
3925
3926 static void
3927 handle_shader_output_decl(struct nir_to_llvm_context *ctx,
3928 struct nir_variable *variable)
3929 {
3930 int idx = variable->data.location + variable->data.index;
3931 unsigned attrib_count = glsl_count_attribute_slots(variable->type, false);
3932
3933 variable->data.driver_location = idx * 4;
3934
3935 if (ctx->stage == MESA_SHADER_VERTEX) {
3936
3937 if (idx == VARYING_SLOT_CLIP_DIST0 ||
3938 idx == VARYING_SLOT_CULL_DIST0) {
3939 int length = glsl_get_length(variable->type);
3940 if (idx == VARYING_SLOT_CLIP_DIST0) {
3941 ctx->shader_info->vs.clip_dist_mask = (1 << length) - 1;
3942 ctx->num_clips = length;
3943 } else if (idx == VARYING_SLOT_CULL_DIST0) {
3944 ctx->shader_info->vs.cull_dist_mask = (1 << length) - 1;
3945 ctx->num_culls = length;
3946 }
3947 if (length > 4)
3948 attrib_count = 2;
3949 else
3950 attrib_count = 1;
3951 }
3952 }
3953
3954 for (unsigned i = 0; i < attrib_count; ++i) {
3955 for (unsigned chan = 0; chan < 4; chan++) {
3956 ctx->outputs[radeon_llvm_reg_index_soa(idx + i, chan)] =
3957 si_build_alloca_undef(ctx, ctx->f32, "");
3958 }
3959 }
3960 ctx->output_mask |= ((1ull << attrib_count) - 1) << idx;
3961 }
3962
3963 static void
3964 setup_locals(struct nir_to_llvm_context *ctx,
3965 struct nir_function *func)
3966 {
3967 int i, j;
3968 ctx->num_locals = 0;
3969 nir_foreach_variable(variable, &func->impl->locals) {
3970 unsigned attrib_count = glsl_count_attribute_slots(variable->type, false);
3971 variable->data.driver_location = ctx->num_locals * 4;
3972 ctx->num_locals += attrib_count;
3973 }
3974 ctx->locals = malloc(4 * ctx->num_locals * sizeof(LLVMValueRef));
3975 if (!ctx->locals)
3976 return;
3977
3978 for (i = 0; i < ctx->num_locals; i++) {
3979 for (j = 0; j < 4; j++) {
3980 ctx->locals[i * 4 + j] =
3981 si_build_alloca_undef(ctx, ctx->f32, "temp");
3982 }
3983 }
3984 }
3985
3986 static LLVMValueRef
3987 emit_float_saturate(struct nir_to_llvm_context *ctx, LLVMValueRef v, float lo, float hi)
3988 {
3989 v = to_float(ctx, v);
3990 v = emit_intrin_2f_param(ctx, "llvm.maxnum.f32", v, LLVMConstReal(ctx->f32, lo));
3991 return emit_intrin_2f_param(ctx, "llvm.minnum.f32", v, LLVMConstReal(ctx->f32, hi));
3992 }
3993
3994
3995 static LLVMValueRef emit_pack_int16(struct nir_to_llvm_context *ctx,
3996 LLVMValueRef src0, LLVMValueRef src1)
3997 {
3998 LLVMValueRef const16 = LLVMConstInt(ctx->i32, 16, false);
3999 LLVMValueRef comp[2];
4000
4001 comp[0] = LLVMBuildAnd(ctx->builder, src0, LLVMConstInt(ctx-> i32, 65535, 0), "");
4002 comp[1] = LLVMBuildAnd(ctx->builder, src1, LLVMConstInt(ctx-> i32, 65535, 0), "");
4003 comp[1] = LLVMBuildShl(ctx->builder, comp[1], const16, "");
4004 return LLVMBuildOr(ctx->builder, comp[0], comp[1], "");
4005 }
4006
4007 /* Initialize arguments for the shader export intrinsic */
4008 static void
4009 si_llvm_init_export_args(struct nir_to_llvm_context *ctx,
4010 LLVMValueRef *values,
4011 unsigned target,
4012 LLVMValueRef *args)
4013 {
4014 /* Default is 0xf. Adjusted below depending on the format. */
4015 args[0] = LLVMConstInt(ctx->i32, target != V_008DFC_SQ_EXP_NULL ? 0xf : 0, false);
4016 /* Specify whether the EXEC mask represents the valid mask */
4017 args[1] = LLVMConstInt(ctx->i32, 0, false);
4018
4019 /* Specify whether this is the last export */
4020 args[2] = LLVMConstInt(ctx->i32, 0, false);
4021 /* Specify the target we are exporting */
4022 args[3] = LLVMConstInt(ctx->i32, target, false);
4023
4024 args[4] = LLVMConstInt(ctx->i32, 0, false); /* COMPR flag */
4025 args[5] = LLVMGetUndef(ctx->f32);
4026 args[6] = LLVMGetUndef(ctx->f32);
4027 args[7] = LLVMGetUndef(ctx->f32);
4028 args[8] = LLVMGetUndef(ctx->f32);
4029
4030 if (!values)
4031 return;
4032
4033 if (ctx->stage == MESA_SHADER_FRAGMENT && target >= V_008DFC_SQ_EXP_MRT) {
4034 LLVMValueRef val[4];
4035 unsigned index = target - V_008DFC_SQ_EXP_MRT;
4036 unsigned col_format = (ctx->options->key.fs.col_format >> (4 * index)) & 0xf;
4037 bool is_int8 = (ctx->options->key.fs.is_int8 >> index) & 1;
4038
4039 switch(col_format) {
4040 case V_028714_SPI_SHADER_ZERO:
4041 args[0] = LLVMConstInt(ctx->i32, 0x0, 0);
4042 args[3] = LLVMConstInt(ctx->i32, V_008DFC_SQ_EXP_NULL, 0);
4043 break;
4044
4045 case V_028714_SPI_SHADER_32_R:
4046 args[0] = LLVMConstInt(ctx->i32, 0x1, 0);
4047 args[5] = values[0];
4048 break;
4049
4050 case V_028714_SPI_SHADER_32_GR:
4051 args[0] = LLVMConstInt(ctx->i32, 0x3, 0);
4052 args[5] = values[0];
4053 args[6] = values[1];
4054 break;
4055
4056 case V_028714_SPI_SHADER_32_AR:
4057 args[0] = LLVMConstInt(ctx->i32, 0x9, 0);
4058 args[5] = values[0];
4059 args[8] = values[3];
4060 break;
4061
4062 case V_028714_SPI_SHADER_FP16_ABGR:
4063 args[4] = ctx->i32one;
4064
4065 for (unsigned chan = 0; chan < 2; chan++) {
4066 LLVMValueRef pack_args[2] = {
4067 values[2 * chan],
4068 values[2 * chan + 1]
4069 };
4070 LLVMValueRef packed;
4071
4072 packed = ac_emit_llvm_intrinsic(&ctx->ac, "llvm.SI.packf16",
4073 ctx->i32, pack_args, 2,
4074 AC_FUNC_ATTR_READNONE);
4075 args[chan + 5] = packed;
4076 }
4077 break;
4078
4079 case V_028714_SPI_SHADER_UNORM16_ABGR:
4080 for (unsigned chan = 0; chan < 4; chan++) {
4081 val[chan] = emit_float_saturate(ctx, values[chan], 0, 1);
4082 val[chan] = LLVMBuildFMul(ctx->builder, val[chan],
4083 LLVMConstReal(ctx->f32, 65535), "");
4084 val[chan] = LLVMBuildFAdd(ctx->builder, val[chan],
4085 LLVMConstReal(ctx->f32, 0.5), "");
4086 val[chan] = LLVMBuildFPToUI(ctx->builder, val[chan],
4087 ctx->i32, "");
4088 }
4089
4090 args[4] = ctx->i32one;
4091 args[5] = emit_pack_int16(ctx, val[0], val[1]);
4092 args[6] = emit_pack_int16(ctx, val[2], val[3]);
4093 break;
4094
4095 case V_028714_SPI_SHADER_SNORM16_ABGR:
4096 for (unsigned chan = 0; chan < 4; chan++) {
4097 val[chan] = emit_float_saturate(ctx, values[chan], -1, 1);
4098 val[chan] = LLVMBuildFMul(ctx->builder, val[chan],
4099 LLVMConstReal(ctx->f32, 32767), "");
4100
4101 /* If positive, add 0.5, else add -0.5. */
4102 val[chan] = LLVMBuildFAdd(ctx->builder, val[chan],
4103 LLVMBuildSelect(ctx->builder,
4104 LLVMBuildFCmp(ctx->builder, LLVMRealOGE,
4105 val[chan], ctx->f32zero, ""),
4106 LLVMConstReal(ctx->f32, 0.5),
4107 LLVMConstReal(ctx->f32, -0.5), ""), "");
4108 val[chan] = LLVMBuildFPToSI(ctx->builder, val[chan], ctx->i32, "");
4109 }
4110
4111 args[4] = ctx->i32one;
4112 args[5] = emit_pack_int16(ctx, val[0], val[1]);
4113 args[6] = emit_pack_int16(ctx, val[2], val[3]);
4114 break;
4115
4116 case V_028714_SPI_SHADER_UINT16_ABGR: {
4117 LLVMValueRef max = LLVMConstInt(ctx->i32, is_int8 ? 255 : 65535, 0);
4118
4119 for (unsigned chan = 0; chan < 4; chan++) {
4120 val[chan] = to_integer(ctx, values[chan]);
4121 val[chan] = emit_minmax_int(ctx, LLVMIntULT, val[chan], max);
4122 }
4123
4124 args[4] = ctx->i32one;
4125 args[5] = emit_pack_int16(ctx, val[0], val[1]);
4126 args[6] = emit_pack_int16(ctx, val[2], val[3]);
4127 break;
4128 }
4129
4130 case V_028714_SPI_SHADER_SINT16_ABGR: {
4131 LLVMValueRef max = LLVMConstInt(ctx->i32, is_int8 ? 127 : 32767, 0);
4132 LLVMValueRef min = LLVMConstInt(ctx->i32, is_int8 ? -128 : -32768, 0);
4133
4134 /* Clamp. */
4135 for (unsigned chan = 0; chan < 4; chan++) {
4136 val[chan] = to_integer(ctx, values[chan]);
4137 val[chan] = emit_minmax_int(ctx, LLVMIntSLT, val[chan], max);
4138 val[chan] = emit_minmax_int(ctx, LLVMIntSGT, val[chan], min);
4139 }
4140
4141 args[4] = ctx->i32one;
4142 args[5] = emit_pack_int16(ctx, val[0], val[1]);
4143 args[6] = emit_pack_int16(ctx, val[2], val[3]);
4144 break;
4145 }
4146
4147 default:
4148 case V_028714_SPI_SHADER_32_ABGR:
4149 memcpy(&args[5], values, sizeof(values[0]) * 4);
4150 break;
4151 }
4152 } else
4153 memcpy(&args[5], values, sizeof(values[0]) * 4);
4154
4155 for (unsigned i = 5; i < 9; ++i)
4156 args[i] = to_float(ctx, args[i]);
4157 }
4158
4159 static void
4160 handle_vs_outputs_post(struct nir_to_llvm_context *ctx)
4161 {
4162 uint32_t param_count = 0;
4163 unsigned target;
4164 unsigned pos_idx, num_pos_exports = 0;
4165 LLVMValueRef args[9];
4166 LLVMValueRef pos_args[4][9] = { { 0 } };
4167 LLVMValueRef psize_value = 0;
4168 int i;
4169 const uint64_t clip_mask = ctx->output_mask & ((1ull << VARYING_SLOT_CLIP_DIST0) |
4170 (1ull << VARYING_SLOT_CLIP_DIST1) |
4171 (1ull << VARYING_SLOT_CULL_DIST0) |
4172 (1ull << VARYING_SLOT_CULL_DIST1));
4173
4174 if (clip_mask) {
4175 LLVMValueRef slots[8];
4176 unsigned j;
4177
4178 if (ctx->shader_info->vs.cull_dist_mask)
4179 ctx->shader_info->vs.cull_dist_mask <<= ctx->num_clips;
4180
4181 i = VARYING_SLOT_CLIP_DIST0;
4182 for (j = 0; j < ctx->num_clips; j++)
4183 slots[j] = to_float(ctx, LLVMBuildLoad(ctx->builder,
4184 ctx->outputs[radeon_llvm_reg_index_soa(i, j)], ""));
4185 i = VARYING_SLOT_CULL_DIST0;
4186 for (j = 0; j < ctx->num_culls; j++)
4187 slots[ctx->num_clips + j] = to_float(ctx, LLVMBuildLoad(ctx->builder,
4188 ctx->outputs[radeon_llvm_reg_index_soa(i, j)], ""));
4189
4190 for (i = ctx->num_clips + ctx->num_culls; i < 8; i++)
4191 slots[i] = LLVMGetUndef(ctx->f32);
4192
4193 if (ctx->num_clips + ctx->num_culls > 4) {
4194 target = V_008DFC_SQ_EXP_POS + 3;
4195 si_llvm_init_export_args(ctx, &slots[4], target, args);
4196 memcpy(pos_args[target - V_008DFC_SQ_EXP_POS],
4197 args, sizeof(args));
4198 }
4199
4200 target = V_008DFC_SQ_EXP_POS + 2;
4201 si_llvm_init_export_args(ctx, &slots[0], target, args);
4202 memcpy(pos_args[target - V_008DFC_SQ_EXP_POS],
4203 args, sizeof(args));
4204
4205 }
4206
4207 for (unsigned i = 0; i < RADEON_LLVM_MAX_OUTPUTS; ++i) {
4208 LLVMValueRef values[4];
4209 if (!(ctx->output_mask & (1ull << i)))
4210 continue;
4211
4212 for (unsigned j = 0; j < 4; j++)
4213 values[j] = to_float(ctx, LLVMBuildLoad(ctx->builder,
4214 ctx->outputs[radeon_llvm_reg_index_soa(i, j)], ""));
4215
4216 if (i == VARYING_SLOT_POS) {
4217 target = V_008DFC_SQ_EXP_POS;
4218 } else if (i == VARYING_SLOT_CLIP_DIST0 ||
4219 i == VARYING_SLOT_CLIP_DIST1 ||
4220 i == VARYING_SLOT_CULL_DIST0 ||
4221 i == VARYING_SLOT_CULL_DIST1) {
4222 continue;
4223 } else if (i == VARYING_SLOT_PSIZ) {
4224 ctx->shader_info->vs.writes_pointsize = true;
4225 psize_value = values[0];
4226 continue;
4227 } else if (i >= VARYING_SLOT_VAR0) {
4228 ctx->shader_info->vs.export_mask |= 1u << (i - VARYING_SLOT_VAR0);
4229 target = V_008DFC_SQ_EXP_PARAM + param_count;
4230 param_count++;
4231 }
4232
4233 si_llvm_init_export_args(ctx, values, target, args);
4234
4235 if (target >= V_008DFC_SQ_EXP_POS &&
4236 target <= (V_008DFC_SQ_EXP_POS + 3)) {
4237 memcpy(pos_args[target - V_008DFC_SQ_EXP_POS],
4238 args, sizeof(args));
4239 } else {
4240 ac_emit_llvm_intrinsic(&ctx->ac,
4241 "llvm.SI.export",
4242 LLVMVoidTypeInContext(ctx->context),
4243 args, 9, 0);
4244 }
4245 }
4246
4247 /* We need to add the position output manually if it's missing. */
4248 if (!pos_args[0][0]) {
4249 pos_args[0][0] = LLVMConstInt(ctx->i32, 0xf, false);
4250 pos_args[0][1] = ctx->i32zero; /* EXEC mask */
4251 pos_args[0][2] = ctx->i32zero; /* last export? */
4252 pos_args[0][3] = LLVMConstInt(ctx->i32, V_008DFC_SQ_EXP_POS, false);
4253 pos_args[0][4] = ctx->i32zero; /* COMPR flag */
4254 pos_args[0][5] = ctx->f32zero; /* X */
4255 pos_args[0][6] = ctx->f32zero; /* Y */
4256 pos_args[0][7] = ctx->f32zero; /* Z */
4257 pos_args[0][8] = ctx->f32one; /* W */
4258 }
4259
4260 if (ctx->shader_info->vs.writes_pointsize == true) {
4261 pos_args[1][0] = LLVMConstInt(ctx->i32, (ctx->shader_info->vs.writes_pointsize == true), false); /* writemask */
4262 pos_args[1][1] = ctx->i32zero; /* EXEC mask */
4263 pos_args[1][2] = ctx->i32zero; /* last export? */
4264 pos_args[1][3] = LLVMConstInt(ctx->i32, V_008DFC_SQ_EXP_POS + 1, false);
4265 pos_args[1][4] = ctx->i32zero; /* COMPR flag */
4266 pos_args[1][5] = ctx->f32zero; /* X */
4267 pos_args[1][6] = ctx->f32zero; /* Y */
4268 pos_args[1][7] = ctx->f32zero; /* Z */
4269 pos_args[1][8] = ctx->f32zero; /* W */
4270
4271 if (ctx->shader_info->vs.writes_pointsize == true)
4272 pos_args[1][5] = psize_value;
4273 }
4274 for (i = 0; i < 4; i++) {
4275 if (pos_args[i][0])
4276 num_pos_exports++;
4277 }
4278
4279 pos_idx = 0;
4280 for (i = 0; i < 4; i++) {
4281 if (!pos_args[i][0])
4282 continue;
4283
4284 /* Specify the target we are exporting */
4285 pos_args[i][3] = LLVMConstInt(ctx->i32, V_008DFC_SQ_EXP_POS + pos_idx++, false);
4286 if (pos_idx == num_pos_exports)
4287 pos_args[i][2] = ctx->i32one;
4288 ac_emit_llvm_intrinsic(&ctx->ac,
4289 "llvm.SI.export",
4290 LLVMVoidTypeInContext(ctx->context),
4291 pos_args[i], 9, 0);
4292 }
4293
4294 ctx->shader_info->vs.pos_exports = num_pos_exports;
4295 ctx->shader_info->vs.param_exports = param_count;
4296 }
4297
4298 static void
4299 si_export_mrt_color(struct nir_to_llvm_context *ctx,
4300 LLVMValueRef *color, unsigned param, bool is_last)
4301 {
4302 LLVMValueRef args[9];
4303 /* Export */
4304 si_llvm_init_export_args(ctx, color, param,
4305 args);
4306
4307 if (is_last) {
4308 args[1] = ctx->i32one; /* whether the EXEC mask is valid */
4309 args[2] = ctx->i32one; /* DONE bit */
4310 } else if (args[0] == ctx->i32zero)
4311 return; /* unnecessary NULL export */
4312
4313 ac_emit_llvm_intrinsic(&ctx->ac, "llvm.SI.export",
4314 ctx->voidt, args, 9, 0);
4315 }
4316
4317 static void
4318 si_export_mrt_z(struct nir_to_llvm_context *ctx,
4319 LLVMValueRef depth, LLVMValueRef stencil,
4320 LLVMValueRef samplemask)
4321 {
4322 LLVMValueRef args[9];
4323 unsigned mask = 0;
4324 args[1] = ctx->i32one; /* whether the EXEC mask is valid */
4325 args[2] = ctx->i32one; /* DONE bit */
4326 /* Specify the target we are exporting */
4327 args[3] = LLVMConstInt(ctx->i32, V_008DFC_SQ_EXP_MRTZ, false);
4328
4329 args[4] = ctx->i32zero; /* COMP flag */
4330 args[5] = LLVMGetUndef(ctx->f32); /* R, depth */
4331 args[6] = LLVMGetUndef(ctx->f32); /* G, stencil test val[0:7], stencil op val[8:15] */
4332 args[7] = LLVMGetUndef(ctx->f32); /* B, sample mask */
4333 args[8] = LLVMGetUndef(ctx->f32); /* A, alpha to mask */
4334
4335 if (depth) {
4336 args[5] = depth;
4337 mask |= 0x1;
4338 }
4339
4340 if (stencil) {
4341 args[6] = stencil;
4342 mask |= 0x2;
4343 }
4344
4345 if (samplemask) {
4346 args[7] = samplemask;
4347 mask |= 0x04;
4348 }
4349
4350 /* SI (except OLAND) has a bug that it only looks
4351 * at the X writemask component. */
4352 if (ctx->options->chip_class == SI &&
4353 ctx->options->family != CHIP_OLAND)
4354 mask |= 0x01;
4355
4356 args[0] = LLVMConstInt(ctx->i32, mask, false);
4357 ac_emit_llvm_intrinsic(&ctx->ac, "llvm.SI.export",
4358 ctx->voidt, args, 9, 0);
4359 }
4360
4361 static void
4362 handle_fs_outputs_post(struct nir_to_llvm_context *ctx)
4363 {
4364 unsigned index = 0;
4365 LLVMValueRef depth = NULL, stencil = NULL, samplemask = NULL;
4366
4367 for (unsigned i = 0; i < RADEON_LLVM_MAX_OUTPUTS; ++i) {
4368 LLVMValueRef values[4];
4369
4370 if (!(ctx->output_mask & (1ull << i)))
4371 continue;
4372
4373 if (i == FRAG_RESULT_DEPTH) {
4374 ctx->shader_info->fs.writes_z = true;
4375 depth = to_float(ctx, LLVMBuildLoad(ctx->builder,
4376 ctx->outputs[radeon_llvm_reg_index_soa(i, 0)], ""));
4377 } else if (i == FRAG_RESULT_STENCIL) {
4378 ctx->shader_info->fs.writes_stencil = true;
4379 stencil = to_float(ctx, LLVMBuildLoad(ctx->builder,
4380 ctx->outputs[radeon_llvm_reg_index_soa(i, 0)], ""));
4381 } else {
4382 bool last = false;
4383 for (unsigned j = 0; j < 4; j++)
4384 values[j] = to_float(ctx, LLVMBuildLoad(ctx->builder,
4385 ctx->outputs[radeon_llvm_reg_index_soa(i, j)], ""));
4386
4387 if (!ctx->shader_info->fs.writes_z && !ctx->shader_info->fs.writes_stencil)
4388 last = ctx->output_mask <= ((1ull << (i + 1)) - 1);
4389
4390 si_export_mrt_color(ctx, values, V_008DFC_SQ_EXP_MRT + index, last);
4391 index++;
4392 }
4393 }
4394
4395 if (depth || stencil)
4396 si_export_mrt_z(ctx, depth, stencil, samplemask);
4397 else if (!index)
4398 si_export_mrt_color(ctx, NULL, V_008DFC_SQ_EXP_NULL, true);
4399
4400 ctx->shader_info->fs.output_mask = index ? ((1ull << index) - 1) : 0;
4401 }
4402
4403 static void
4404 handle_shader_outputs_post(struct nir_to_llvm_context *ctx)
4405 {
4406 switch (ctx->stage) {
4407 case MESA_SHADER_VERTEX:
4408 handle_vs_outputs_post(ctx);
4409 break;
4410 case MESA_SHADER_FRAGMENT:
4411 handle_fs_outputs_post(ctx);
4412 break;
4413 default:
4414 break;
4415 }
4416 }
4417
4418 static void
4419 handle_shared_compute_var(struct nir_to_llvm_context *ctx,
4420 struct nir_variable *variable, uint32_t *offset, int idx)
4421 {
4422 unsigned size = glsl_count_attribute_slots(variable->type, false);
4423 variable->data.driver_location = *offset;
4424 *offset += size;
4425 }
4426
4427 static void ac_llvm_finalize_module(struct nir_to_llvm_context * ctx)
4428 {
4429 LLVMPassManagerRef passmgr;
4430 /* Create the pass manager */
4431 passmgr = LLVMCreateFunctionPassManagerForModule(
4432 ctx->module);
4433
4434 /* This pass should eliminate all the load and store instructions */
4435 LLVMAddPromoteMemoryToRegisterPass(passmgr);
4436
4437 /* Add some optimization passes */
4438 LLVMAddScalarReplAggregatesPass(passmgr);
4439 LLVMAddLICMPass(passmgr);
4440 LLVMAddAggressiveDCEPass(passmgr);
4441 LLVMAddCFGSimplificationPass(passmgr);
4442 LLVMAddInstructionCombiningPass(passmgr);
4443
4444 /* Run the pass */
4445 LLVMInitializeFunctionPassManager(passmgr);
4446 LLVMRunFunctionPassManager(passmgr, ctx->main_function);
4447 LLVMFinalizeFunctionPassManager(passmgr);
4448
4449 LLVMDisposeBuilder(ctx->builder);
4450 LLVMDisposePassManager(passmgr);
4451 }
4452
4453 static
4454 LLVMModuleRef ac_translate_nir_to_llvm(LLVMTargetMachineRef tm,
4455 struct nir_shader *nir,
4456 struct ac_shader_variant_info *shader_info,
4457 const struct ac_nir_compiler_options *options)
4458 {
4459 struct nir_to_llvm_context ctx = {0};
4460 struct nir_function *func;
4461 unsigned i;
4462 ctx.options = options;
4463 ctx.shader_info = shader_info;
4464 ctx.context = LLVMContextCreate();
4465 ctx.module = LLVMModuleCreateWithNameInContext("shader", ctx.context);
4466
4467 ac_llvm_context_init(&ctx.ac, ctx.context);
4468 ctx.ac.module = ctx.module;
4469
4470 ctx.has_ds_bpermute = ctx.options->chip_class >= VI;
4471
4472 memset(shader_info, 0, sizeof(*shader_info));
4473
4474 LLVMSetTarget(ctx.module, "amdgcn--");
4475 setup_types(&ctx);
4476
4477 ctx.builder = LLVMCreateBuilderInContext(ctx.context);
4478 ctx.ac.builder = ctx.builder;
4479 ctx.stage = nir->stage;
4480
4481 for (i = 0; i < AC_UD_MAX_SETS; i++)
4482 shader_info->user_sgprs_locs.descriptor_sets[i].sgpr_idx = -1;
4483 for (i = 0; i < AC_UD_MAX_UD; i++)
4484 shader_info->user_sgprs_locs.shader_data[i].sgpr_idx = -1;
4485
4486 create_function(&ctx);
4487
4488 if (nir->stage == MESA_SHADER_COMPUTE) {
4489 int num_shared = 0;
4490 nir_foreach_variable(variable, &nir->shared)
4491 num_shared++;
4492 if (num_shared) {
4493 int idx = 0;
4494 uint32_t shared_size = 0;
4495 LLVMValueRef var;
4496 LLVMTypeRef i8p = LLVMPointerType(ctx.i8, LOCAL_ADDR_SPACE);
4497 nir_foreach_variable(variable, &nir->shared) {
4498 handle_shared_compute_var(&ctx, variable, &shared_size, idx);
4499 idx++;
4500 }
4501
4502 shared_size *= 4;
4503 var = LLVMAddGlobalInAddressSpace(ctx.module,
4504 LLVMArrayType(ctx.i8, shared_size),
4505 "compute_lds",
4506 LOCAL_ADDR_SPACE);
4507 LLVMSetAlignment(var, 4);
4508 ctx.shared_memory = LLVMBuildBitCast(ctx.builder, var, i8p, "");
4509 }
4510 }
4511
4512 nir_foreach_variable(variable, &nir->inputs)
4513 handle_shader_input_decl(&ctx, variable);
4514
4515 if (nir->stage == MESA_SHADER_FRAGMENT)
4516 handle_fs_inputs_pre(&ctx, nir);
4517
4518 nir_foreach_variable(variable, &nir->outputs)
4519 handle_shader_output_decl(&ctx, variable);
4520
4521 ctx.defs = _mesa_hash_table_create(NULL, _mesa_hash_pointer,
4522 _mesa_key_pointer_equal);
4523 ctx.phis = _mesa_hash_table_create(NULL, _mesa_hash_pointer,
4524 _mesa_key_pointer_equal);
4525
4526 func = (struct nir_function *)exec_list_get_head(&nir->functions);
4527
4528 setup_locals(&ctx, func);
4529
4530 visit_cf_list(&ctx, &func->impl->body);
4531 phi_post_pass(&ctx);
4532
4533 handle_shader_outputs_post(&ctx);
4534 LLVMBuildRetVoid(ctx.builder);
4535
4536 ac_llvm_finalize_module(&ctx);
4537 free(ctx.locals);
4538 ralloc_free(ctx.defs);
4539 ralloc_free(ctx.phis);
4540
4541 return ctx.module;
4542 }
4543
4544 static void ac_diagnostic_handler(LLVMDiagnosticInfoRef di, void *context)
4545 {
4546 unsigned *retval = (unsigned *)context;
4547 LLVMDiagnosticSeverity severity = LLVMGetDiagInfoSeverity(di);
4548 char *description = LLVMGetDiagInfoDescription(di);
4549
4550 if (severity == LLVMDSError) {
4551 *retval = 1;
4552 fprintf(stderr, "LLVM triggered Diagnostic Handler: %s\n",
4553 description);
4554 }
4555
4556 LLVMDisposeMessage(description);
4557 }
4558
4559 static unsigned ac_llvm_compile(LLVMModuleRef M,
4560 struct ac_shader_binary *binary,
4561 LLVMTargetMachineRef tm)
4562 {
4563 unsigned retval = 0;
4564 char *err;
4565 LLVMContextRef llvm_ctx;
4566 LLVMMemoryBufferRef out_buffer;
4567 unsigned buffer_size;
4568 const char *buffer_data;
4569 LLVMBool mem_err;
4570
4571 /* Setup Diagnostic Handler*/
4572 llvm_ctx = LLVMGetModuleContext(M);
4573
4574 LLVMContextSetDiagnosticHandler(llvm_ctx, ac_diagnostic_handler,
4575 &retval);
4576
4577 /* Compile IR*/
4578 mem_err = LLVMTargetMachineEmitToMemoryBuffer(tm, M, LLVMObjectFile,
4579 &err, &out_buffer);
4580
4581 /* Process Errors/Warnings */
4582 if (mem_err) {
4583 fprintf(stderr, "%s: %s", __FUNCTION__, err);
4584 free(err);
4585 retval = 1;
4586 goto out;
4587 }
4588
4589 /* Extract Shader Code*/
4590 buffer_size = LLVMGetBufferSize(out_buffer);
4591 buffer_data = LLVMGetBufferStart(out_buffer);
4592
4593 ac_elf_read(buffer_data, buffer_size, binary);
4594
4595 /* Clean up */
4596 LLVMDisposeMemoryBuffer(out_buffer);
4597
4598 out:
4599 return retval;
4600 }
4601
4602 void ac_compile_nir_shader(LLVMTargetMachineRef tm,
4603 struct ac_shader_binary *binary,
4604 struct ac_shader_config *config,
4605 struct ac_shader_variant_info *shader_info,
4606 struct nir_shader *nir,
4607 const struct ac_nir_compiler_options *options,
4608 bool dump_shader)
4609 {
4610
4611 LLVMModuleRef llvm_module = ac_translate_nir_to_llvm(tm, nir, shader_info,
4612 options);
4613 if (dump_shader)
4614 LLVMDumpModule(llvm_module);
4615
4616 memset(binary, 0, sizeof(*binary));
4617 int v = ac_llvm_compile(llvm_module, binary, tm);
4618 if (v) {
4619 fprintf(stderr, "compile failed\n");
4620 }
4621
4622 if (dump_shader)
4623 fprintf(stderr, "disasm:\n%s\n", binary->disasm_string);
4624
4625 ac_shader_binary_read_config(binary, config, 0);
4626
4627 LLVMContextRef ctx = LLVMGetModuleContext(llvm_module);
4628 LLVMDisposeModule(llvm_module);
4629 LLVMContextDispose(ctx);
4630
4631 if (nir->stage == MESA_SHADER_FRAGMENT) {
4632 shader_info->num_input_vgprs = 0;
4633 if (G_0286CC_PERSP_SAMPLE_ENA(config->spi_ps_input_addr))
4634 shader_info->num_input_vgprs += 2;
4635 if (G_0286CC_PERSP_CENTER_ENA(config->spi_ps_input_addr))
4636 shader_info->num_input_vgprs += 2;
4637 if (G_0286CC_PERSP_CENTROID_ENA(config->spi_ps_input_addr))
4638 shader_info->num_input_vgprs += 2;
4639 if (G_0286CC_PERSP_PULL_MODEL_ENA(config->spi_ps_input_addr))
4640 shader_info->num_input_vgprs += 3;
4641 if (G_0286CC_LINEAR_SAMPLE_ENA(config->spi_ps_input_addr))
4642 shader_info->num_input_vgprs += 2;
4643 if (G_0286CC_LINEAR_CENTER_ENA(config->spi_ps_input_addr))
4644 shader_info->num_input_vgprs += 2;
4645 if (G_0286CC_LINEAR_CENTROID_ENA(config->spi_ps_input_addr))
4646 shader_info->num_input_vgprs += 2;
4647 if (G_0286CC_LINE_STIPPLE_TEX_ENA(config->spi_ps_input_addr))
4648 shader_info->num_input_vgprs += 1;
4649 if (G_0286CC_POS_X_FLOAT_ENA(config->spi_ps_input_addr))
4650 shader_info->num_input_vgprs += 1;
4651 if (G_0286CC_POS_Y_FLOAT_ENA(config->spi_ps_input_addr))
4652 shader_info->num_input_vgprs += 1;
4653 if (G_0286CC_POS_Z_FLOAT_ENA(config->spi_ps_input_addr))
4654 shader_info->num_input_vgprs += 1;
4655 if (G_0286CC_POS_W_FLOAT_ENA(config->spi_ps_input_addr))
4656 shader_info->num_input_vgprs += 1;
4657 if (G_0286CC_FRONT_FACE_ENA(config->spi_ps_input_addr))
4658 shader_info->num_input_vgprs += 1;
4659 if (G_0286CC_ANCILLARY_ENA(config->spi_ps_input_addr))
4660 shader_info->num_input_vgprs += 1;
4661 if (G_0286CC_SAMPLE_COVERAGE_ENA(config->spi_ps_input_addr))
4662 shader_info->num_input_vgprs += 1;
4663 if (G_0286CC_POS_FIXED_PT_ENA(config->spi_ps_input_addr))
4664 shader_info->num_input_vgprs += 1;
4665 }
4666 config->num_vgprs = MAX2(config->num_vgprs, shader_info->num_input_vgprs);
4667
4668 /* +3 for scratch wave offset and VCC */
4669 config->num_sgprs = MAX2(config->num_sgprs,
4670 shader_info->num_input_sgprs + 3);
4671 if (nir->stage == MESA_SHADER_COMPUTE) {
4672 for (int i = 0; i < 3; ++i)
4673 shader_info->cs.block_size[i] = nir->info->cs.local_size[i];
4674 }
4675
4676 if (nir->stage == MESA_SHADER_FRAGMENT)
4677 shader_info->fs.early_fragment_test = nir->info->fs.early_fragment_tests;
4678 }