radeonsi/gfx9: set correct LLVM calling conventions for merged shaders
[mesa.git] / src / gallium / drivers / radeonsi / si_shader_tgsi_setup.c
1 /*
2 * Copyright 2016 Advanced Micro Devices, Inc.
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * on the rights to use, copy, modify, merge, publish, distribute, sub
8 * license, and/or sell copies of the Software, and to permit persons to whom
9 * the Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
19 * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
20 * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
21 * USE OR OTHER DEALINGS IN THE SOFTWARE.
22 */
23
24 #include "si_shader_internal.h"
25 #include "si_pipe.h"
26
27 #include "gallivm/lp_bld_const.h"
28 #include "gallivm/lp_bld_gather.h"
29 #include "gallivm/lp_bld_flow.h"
30 #include "gallivm/lp_bld_init.h"
31 #include "gallivm/lp_bld_intr.h"
32 #include "gallivm/lp_bld_misc.h"
33 #include "gallivm/lp_bld_swizzle.h"
34 #include "tgsi/tgsi_info.h"
35 #include "tgsi/tgsi_parse.h"
36 #include "util/u_math.h"
37 #include "util/u_memory.h"
38 #include "util/u_debug.h"
39
40 #include <stdio.h>
41 #include <llvm-c/Transforms/IPO.h>
42 #include <llvm-c/Transforms/Scalar.h>
43 #include <llvm-c/Support.h>
44
45 /* Data for if/else/endif and bgnloop/endloop control flow structures.
46 */
47 struct si_llvm_flow {
48 /* Loop exit or next part of if/else/endif. */
49 LLVMBasicBlockRef next_block;
50 LLVMBasicBlockRef loop_entry_block;
51 };
52
53 enum si_llvm_calling_convention {
54 RADEON_LLVM_AMDGPU_VS = 87,
55 RADEON_LLVM_AMDGPU_GS = 88,
56 RADEON_LLVM_AMDGPU_PS = 89,
57 RADEON_LLVM_AMDGPU_CS = 90,
58 RADEON_LLVM_AMDGPU_HS = 93,
59 };
60
61 void si_llvm_add_attribute(LLVMValueRef F, const char *name, int value)
62 {
63 char str[16];
64
65 snprintf(str, sizeof(str), "%i", value);
66 LLVMAddTargetDependentFunctionAttr(F, name, str);
67 }
68
69 static void init_amdgpu_target()
70 {
71 gallivm_init_llvm_targets();
72 LLVMInitializeAMDGPUTargetInfo();
73 LLVMInitializeAMDGPUTarget();
74 LLVMInitializeAMDGPUTargetMC();
75 LLVMInitializeAMDGPUAsmPrinter();
76
77 /* For inline assembly. */
78 LLVMInitializeAMDGPUAsmParser();
79
80 if (HAVE_LLVM >= 0x0400) {
81 /*
82 * Workaround for bug in llvm 4.0 that causes image intrinsics
83 * to disappear.
84 * https://reviews.llvm.org/D26348
85 */
86 const char *argv[2] = {"mesa", "-simplifycfg-sink-common=false"};
87 LLVMParseCommandLineOptions(2, argv, NULL);
88 }
89 }
90
91 static once_flag init_amdgpu_target_once_flag = ONCE_FLAG_INIT;
92
93 LLVMTargetRef si_llvm_get_amdgpu_target(const char *triple)
94 {
95 LLVMTargetRef target = NULL;
96 char *err_message = NULL;
97
98 call_once(&init_amdgpu_target_once_flag, init_amdgpu_target);
99
100 if (LLVMGetTargetFromTriple(triple, &target, &err_message)) {
101 fprintf(stderr, "Cannot find target for triple %s ", triple);
102 if (err_message) {
103 fprintf(stderr, "%s\n", err_message);
104 }
105 LLVMDisposeMessage(err_message);
106 return NULL;
107 }
108 return target;
109 }
110
111 struct si_llvm_diagnostics {
112 struct pipe_debug_callback *debug;
113 unsigned retval;
114 };
115
116 static void si_diagnostic_handler(LLVMDiagnosticInfoRef di, void *context)
117 {
118 struct si_llvm_diagnostics *diag = (struct si_llvm_diagnostics *)context;
119 LLVMDiagnosticSeverity severity = LLVMGetDiagInfoSeverity(di);
120 char *description = LLVMGetDiagInfoDescription(di);
121 const char *severity_str = NULL;
122
123 switch (severity) {
124 case LLVMDSError:
125 severity_str = "error";
126 break;
127 case LLVMDSWarning:
128 severity_str = "warning";
129 break;
130 case LLVMDSRemark:
131 severity_str = "remark";
132 break;
133 case LLVMDSNote:
134 severity_str = "note";
135 break;
136 default:
137 severity_str = "unknown";
138 }
139
140 pipe_debug_message(diag->debug, SHADER_INFO,
141 "LLVM diagnostic (%s): %s", severity_str, description);
142
143 if (severity == LLVMDSError) {
144 diag->retval = 1;
145 fprintf(stderr,"LLVM triggered Diagnostic Handler: %s\n", description);
146 }
147
148 LLVMDisposeMessage(description);
149 }
150
151 /**
152 * Compile an LLVM module to machine code.
153 *
154 * @returns 0 for success, 1 for failure
155 */
156 unsigned si_llvm_compile(LLVMModuleRef M, struct ac_shader_binary *binary,
157 LLVMTargetMachineRef tm,
158 struct pipe_debug_callback *debug)
159 {
160 struct si_llvm_diagnostics diag;
161 char *err;
162 LLVMContextRef llvm_ctx;
163 LLVMMemoryBufferRef out_buffer;
164 unsigned buffer_size;
165 const char *buffer_data;
166 LLVMBool mem_err;
167
168 diag.debug = debug;
169 diag.retval = 0;
170
171 /* Setup Diagnostic Handler*/
172 llvm_ctx = LLVMGetModuleContext(M);
173
174 LLVMContextSetDiagnosticHandler(llvm_ctx, si_diagnostic_handler, &diag);
175
176 /* Compile IR*/
177 mem_err = LLVMTargetMachineEmitToMemoryBuffer(tm, M, LLVMObjectFile, &err,
178 &out_buffer);
179
180 /* Process Errors/Warnings */
181 if (mem_err) {
182 fprintf(stderr, "%s: %s", __FUNCTION__, err);
183 pipe_debug_message(debug, SHADER_INFO,
184 "LLVM emit error: %s", err);
185 FREE(err);
186 diag.retval = 1;
187 goto out;
188 }
189
190 /* Extract Shader Code*/
191 buffer_size = LLVMGetBufferSize(out_buffer);
192 buffer_data = LLVMGetBufferStart(out_buffer);
193
194 ac_elf_read(buffer_data, buffer_size, binary);
195
196 /* Clean up */
197 LLVMDisposeMemoryBuffer(out_buffer);
198
199 out:
200 if (diag.retval != 0)
201 pipe_debug_message(debug, SHADER_INFO, "LLVM compile failed");
202 return diag.retval;
203 }
204
205 LLVMTypeRef tgsi2llvmtype(struct lp_build_tgsi_context *bld_base,
206 enum tgsi_opcode_type type)
207 {
208 LLVMContextRef ctx = bld_base->base.gallivm->context;
209
210 switch (type) {
211 case TGSI_TYPE_UNSIGNED:
212 case TGSI_TYPE_SIGNED:
213 return LLVMInt32TypeInContext(ctx);
214 case TGSI_TYPE_UNSIGNED64:
215 case TGSI_TYPE_SIGNED64:
216 return LLVMInt64TypeInContext(ctx);
217 case TGSI_TYPE_DOUBLE:
218 return LLVMDoubleTypeInContext(ctx);
219 case TGSI_TYPE_UNTYPED:
220 case TGSI_TYPE_FLOAT:
221 return LLVMFloatTypeInContext(ctx);
222 default: break;
223 }
224 return 0;
225 }
226
227 LLVMValueRef bitcast(struct lp_build_tgsi_context *bld_base,
228 enum tgsi_opcode_type type, LLVMValueRef value)
229 {
230 LLVMBuilderRef builder = bld_base->base.gallivm->builder;
231 LLVMTypeRef dst_type = tgsi2llvmtype(bld_base, type);
232
233 if (dst_type)
234 return LLVMBuildBitCast(builder, value, dst_type, "");
235 else
236 return value;
237 }
238
239 /**
240 * Return a value that is equal to the given i32 \p index if it lies in [0,num)
241 * or an undefined value in the same interval otherwise.
242 */
243 LLVMValueRef si_llvm_bound_index(struct si_shader_context *ctx,
244 LLVMValueRef index,
245 unsigned num)
246 {
247 struct gallivm_state *gallivm = &ctx->gallivm;
248 LLVMBuilderRef builder = gallivm->builder;
249 LLVMValueRef c_max = LLVMConstInt(ctx->i32, num - 1, 0);
250 LLVMValueRef cc;
251
252 if (util_is_power_of_two(num)) {
253 index = LLVMBuildAnd(builder, index, c_max, "");
254 } else {
255 /* In theory, this MAX pattern should result in code that is
256 * as good as the bit-wise AND above.
257 *
258 * In practice, LLVM generates worse code (at the time of
259 * writing), because its value tracking is not strong enough.
260 */
261 cc = LLVMBuildICmp(builder, LLVMIntULE, index, c_max, "");
262 index = LLVMBuildSelect(builder, cc, index, c_max, "");
263 }
264
265 return index;
266 }
267
268 static struct si_llvm_flow *
269 get_current_flow(struct si_shader_context *ctx)
270 {
271 if (ctx->flow_depth > 0)
272 return &ctx->flow[ctx->flow_depth - 1];
273 return NULL;
274 }
275
276 static struct si_llvm_flow *
277 get_innermost_loop(struct si_shader_context *ctx)
278 {
279 for (unsigned i = ctx->flow_depth; i > 0; --i) {
280 if (ctx->flow[i - 1].loop_entry_block)
281 return &ctx->flow[i - 1];
282 }
283 return NULL;
284 }
285
286 static struct si_llvm_flow *
287 push_flow(struct si_shader_context *ctx)
288 {
289 struct si_llvm_flow *flow;
290
291 if (ctx->flow_depth >= ctx->flow_depth_max) {
292 unsigned new_max = MAX2(ctx->flow_depth << 1, RADEON_LLVM_INITIAL_CF_DEPTH);
293 ctx->flow = REALLOC(ctx->flow,
294 ctx->flow_depth_max * sizeof(*ctx->flow),
295 new_max * sizeof(*ctx->flow));
296 ctx->flow_depth_max = new_max;
297 }
298
299 flow = &ctx->flow[ctx->flow_depth];
300 ctx->flow_depth++;
301
302 flow->next_block = NULL;
303 flow->loop_entry_block = NULL;
304 return flow;
305 }
306
307 static LLVMValueRef emit_swizzle(struct lp_build_tgsi_context *bld_base,
308 LLVMValueRef value,
309 unsigned swizzle_x,
310 unsigned swizzle_y,
311 unsigned swizzle_z,
312 unsigned swizzle_w)
313 {
314 LLVMValueRef swizzles[4];
315 LLVMTypeRef i32t =
316 LLVMInt32TypeInContext(bld_base->base.gallivm->context);
317
318 swizzles[0] = LLVMConstInt(i32t, swizzle_x, 0);
319 swizzles[1] = LLVMConstInt(i32t, swizzle_y, 0);
320 swizzles[2] = LLVMConstInt(i32t, swizzle_z, 0);
321 swizzles[3] = LLVMConstInt(i32t, swizzle_w, 0);
322
323 return LLVMBuildShuffleVector(bld_base->base.gallivm->builder,
324 value,
325 LLVMGetUndef(LLVMTypeOf(value)),
326 LLVMConstVector(swizzles, 4), "");
327 }
328
329 /**
330 * Return the description of the array covering the given temporary register
331 * index.
332 */
333 static unsigned
334 get_temp_array_id(struct lp_build_tgsi_context *bld_base,
335 unsigned reg_index,
336 const struct tgsi_ind_register *reg)
337 {
338 struct si_shader_context *ctx = si_shader_context(bld_base);
339 unsigned num_arrays = ctx->bld_base.info->array_max[TGSI_FILE_TEMPORARY];
340 unsigned i;
341
342 if (reg && reg->ArrayID > 0 && reg->ArrayID <= num_arrays)
343 return reg->ArrayID;
344
345 for (i = 0; i < num_arrays; i++) {
346 const struct tgsi_array_info *array = &ctx->temp_arrays[i];
347
348 if (reg_index >= array->range.First && reg_index <= array->range.Last)
349 return i + 1;
350 }
351
352 return 0;
353 }
354
355 static struct tgsi_declaration_range
356 get_array_range(struct lp_build_tgsi_context *bld_base,
357 unsigned File, unsigned reg_index,
358 const struct tgsi_ind_register *reg)
359 {
360 struct si_shader_context *ctx = si_shader_context(bld_base);
361 struct tgsi_declaration_range range;
362
363 if (File == TGSI_FILE_TEMPORARY) {
364 unsigned array_id = get_temp_array_id(bld_base, reg_index, reg);
365 if (array_id)
366 return ctx->temp_arrays[array_id - 1].range;
367 }
368
369 range.First = 0;
370 range.Last = bld_base->info->file_max[File];
371 return range;
372 }
373
374 static LLVMValueRef
375 emit_array_index(struct si_shader_context *ctx,
376 const struct tgsi_ind_register *reg,
377 unsigned offset)
378 {
379 struct gallivm_state *gallivm = &ctx->gallivm;
380
381 if (!reg) {
382 return LLVMConstInt(ctx->i32, offset, 0);
383 }
384 LLVMValueRef addr = LLVMBuildLoad(gallivm->builder, ctx->addrs[reg->Index][reg->Swizzle], "");
385 return LLVMBuildAdd(gallivm->builder, addr, LLVMConstInt(ctx->i32, offset, 0), "");
386 }
387
388 /**
389 * For indirect registers, construct a pointer directly to the requested
390 * element using getelementptr if possible.
391 *
392 * Returns NULL if the insertelement/extractelement fallback for array access
393 * must be used.
394 */
395 static LLVMValueRef
396 get_pointer_into_array(struct si_shader_context *ctx,
397 unsigned file,
398 unsigned swizzle,
399 unsigned reg_index,
400 const struct tgsi_ind_register *reg_indirect)
401 {
402 unsigned array_id;
403 struct tgsi_array_info *array;
404 struct gallivm_state *gallivm = &ctx->gallivm;
405 LLVMBuilderRef builder = gallivm->builder;
406 LLVMValueRef idxs[2];
407 LLVMValueRef index;
408 LLVMValueRef alloca;
409
410 if (file != TGSI_FILE_TEMPORARY)
411 return NULL;
412
413 array_id = get_temp_array_id(&ctx->bld_base, reg_index, reg_indirect);
414 if (!array_id)
415 return NULL;
416
417 alloca = ctx->temp_array_allocas[array_id - 1];
418 if (!alloca)
419 return NULL;
420
421 array = &ctx->temp_arrays[array_id - 1];
422
423 if (!(array->writemask & (1 << swizzle)))
424 return ctx->undef_alloca;
425
426 index = emit_array_index(ctx, reg_indirect,
427 reg_index - ctx->temp_arrays[array_id - 1].range.First);
428
429 /* Ensure that the index is within a valid range, to guard against
430 * VM faults and overwriting critical data (e.g. spilled resource
431 * descriptors).
432 *
433 * TODO It should be possible to avoid the additional instructions
434 * if LLVM is changed so that it guarantuees:
435 * 1. the scratch space descriptor isolates the current wave (this
436 * could even save the scratch offset SGPR at the cost of an
437 * additional SALU instruction)
438 * 2. the memory for allocas must be allocated at the _end_ of the
439 * scratch space (after spilled registers)
440 */
441 index = si_llvm_bound_index(ctx, index, array->range.Last - array->range.First + 1);
442
443 index = LLVMBuildMul(
444 builder, index,
445 LLVMConstInt(ctx->i32, util_bitcount(array->writemask), 0),
446 "");
447 index = LLVMBuildAdd(
448 builder, index,
449 LLVMConstInt(ctx->i32,
450 util_bitcount(array->writemask & ((1 << swizzle) - 1)), 0),
451 "");
452 idxs[0] = ctx->i32_0;
453 idxs[1] = index;
454 return LLVMBuildGEP(builder, alloca, idxs, 2, "");
455 }
456
457 LLVMValueRef
458 si_llvm_emit_fetch_64bit(struct lp_build_tgsi_context *bld_base,
459 enum tgsi_opcode_type type,
460 LLVMValueRef ptr,
461 LLVMValueRef ptr2)
462 {
463 LLVMBuilderRef builder = bld_base->base.gallivm->builder;
464 LLVMValueRef result;
465
466 result = LLVMGetUndef(LLVMVectorType(LLVMIntTypeInContext(bld_base->base.gallivm->context, 32), bld_base->base.type.length * 2));
467
468 result = LLVMBuildInsertElement(builder,
469 result,
470 bitcast(bld_base, TGSI_TYPE_UNSIGNED, ptr),
471 bld_base->int_bld.zero, "");
472 result = LLVMBuildInsertElement(builder,
473 result,
474 bitcast(bld_base, TGSI_TYPE_UNSIGNED, ptr2),
475 bld_base->int_bld.one, "");
476 return bitcast(bld_base, type, result);
477 }
478
479 static LLVMValueRef
480 emit_array_fetch(struct lp_build_tgsi_context *bld_base,
481 unsigned File, enum tgsi_opcode_type type,
482 struct tgsi_declaration_range range,
483 unsigned swizzle)
484 {
485 struct si_shader_context *ctx = si_shader_context(bld_base);
486
487 LLVMBuilderRef builder = ctx->gallivm.builder;
488
489 unsigned i, size = range.Last - range.First + 1;
490 LLVMTypeRef vec = LLVMVectorType(tgsi2llvmtype(bld_base, type), size);
491 LLVMValueRef result = LLVMGetUndef(vec);
492
493 struct tgsi_full_src_register tmp_reg = {};
494 tmp_reg.Register.File = File;
495
496 for (i = 0; i < size; ++i) {
497 tmp_reg.Register.Index = i + range.First;
498 LLVMValueRef temp = si_llvm_emit_fetch(bld_base, &tmp_reg, type, swizzle);
499 result = LLVMBuildInsertElement(builder, result, temp,
500 LLVMConstInt(ctx->i32, i, 0), "array_vector");
501 }
502 return result;
503 }
504
505 static LLVMValueRef
506 load_value_from_array(struct lp_build_tgsi_context *bld_base,
507 unsigned file,
508 enum tgsi_opcode_type type,
509 unsigned swizzle,
510 unsigned reg_index,
511 const struct tgsi_ind_register *reg_indirect)
512 {
513 struct si_shader_context *ctx = si_shader_context(bld_base);
514 struct gallivm_state *gallivm = &ctx->gallivm;
515 LLVMBuilderRef builder = gallivm->builder;
516 LLVMValueRef ptr;
517
518 ptr = get_pointer_into_array(ctx, file, swizzle, reg_index, reg_indirect);
519 if (ptr) {
520 LLVMValueRef val = LLVMBuildLoad(builder, ptr, "");
521 if (tgsi_type_is_64bit(type)) {
522 LLVMValueRef ptr_hi, val_hi;
523 ptr_hi = LLVMBuildGEP(builder, ptr, &ctx->i32_1, 1, "");
524 val_hi = LLVMBuildLoad(builder, ptr_hi, "");
525 val = si_llvm_emit_fetch_64bit(bld_base, type, val, val_hi);
526 }
527
528 return val;
529 } else {
530 struct tgsi_declaration_range range =
531 get_array_range(bld_base, file, reg_index, reg_indirect);
532 LLVMValueRef index =
533 emit_array_index(ctx, reg_indirect, reg_index - range.First);
534 LLVMValueRef array =
535 emit_array_fetch(bld_base, file, type, range, swizzle);
536 return LLVMBuildExtractElement(builder, array, index, "");
537 }
538 }
539
540 static void
541 store_value_to_array(struct lp_build_tgsi_context *bld_base,
542 LLVMValueRef value,
543 unsigned file,
544 unsigned chan_index,
545 unsigned reg_index,
546 const struct tgsi_ind_register *reg_indirect)
547 {
548 struct si_shader_context *ctx = si_shader_context(bld_base);
549 struct gallivm_state *gallivm = &ctx->gallivm;
550 LLVMBuilderRef builder = gallivm->builder;
551 LLVMValueRef ptr;
552
553 ptr = get_pointer_into_array(ctx, file, chan_index, reg_index, reg_indirect);
554 if (ptr) {
555 LLVMBuildStore(builder, value, ptr);
556 } else {
557 unsigned i, size;
558 struct tgsi_declaration_range range = get_array_range(bld_base, file, reg_index, reg_indirect);
559 LLVMValueRef index = emit_array_index(ctx, reg_indirect, reg_index - range.First);
560 LLVMValueRef array =
561 emit_array_fetch(bld_base, file, TGSI_TYPE_FLOAT, range, chan_index);
562 LLVMValueRef temp_ptr;
563
564 array = LLVMBuildInsertElement(builder, array, value, index, "");
565
566 size = range.Last - range.First + 1;
567 for (i = 0; i < size; ++i) {
568 switch(file) {
569 case TGSI_FILE_OUTPUT:
570 temp_ptr = ctx->outputs[i + range.First][chan_index];
571 break;
572
573 case TGSI_FILE_TEMPORARY:
574 if (range.First + i >= ctx->temps_count)
575 continue;
576 temp_ptr = ctx->temps[(i + range.First) * TGSI_NUM_CHANNELS + chan_index];
577 break;
578
579 default:
580 continue;
581 }
582 value = LLVMBuildExtractElement(builder, array,
583 LLVMConstInt(ctx->i32, i, 0), "");
584 LLVMBuildStore(builder, value, temp_ptr);
585 }
586 }
587 }
588
589 /* If this is true, preload FS inputs at the beginning of shaders. Otherwise,
590 * reload them at each use. This must be true if the shader is using
591 * derivatives and KILL, because KILL can leave the WQM and then a lazy
592 * input load isn't in the WQM anymore.
593 */
594 static bool si_preload_fs_inputs(struct si_shader_context *ctx)
595 {
596 struct si_shader_selector *sel = ctx->shader->selector;
597
598 return sel->info.uses_derivatives &&
599 sel->info.uses_kill;
600 }
601
602 static LLVMValueRef
603 get_output_ptr(struct lp_build_tgsi_context *bld_base, unsigned index,
604 unsigned chan)
605 {
606 struct si_shader_context *ctx = si_shader_context(bld_base);
607
608 assert(index <= ctx->bld_base.info->file_max[TGSI_FILE_OUTPUT]);
609 return ctx->outputs[index][chan];
610 }
611
612 LLVMValueRef si_llvm_emit_fetch(struct lp_build_tgsi_context *bld_base,
613 const struct tgsi_full_src_register *reg,
614 enum tgsi_opcode_type type,
615 unsigned swizzle)
616 {
617 struct si_shader_context *ctx = si_shader_context(bld_base);
618 LLVMBuilderRef builder = ctx->gallivm.builder;
619 LLVMValueRef result = NULL, ptr, ptr2;
620
621 if (swizzle == ~0) {
622 LLVMValueRef values[TGSI_NUM_CHANNELS];
623 unsigned chan;
624 for (chan = 0; chan < TGSI_NUM_CHANNELS; chan++) {
625 values[chan] = si_llvm_emit_fetch(bld_base, reg, type, chan);
626 }
627 return lp_build_gather_values(&ctx->gallivm, values,
628 TGSI_NUM_CHANNELS);
629 }
630
631 if (reg->Register.Indirect) {
632 LLVMValueRef load = load_value_from_array(bld_base, reg->Register.File, type,
633 swizzle, reg->Register.Index, &reg->Indirect);
634 return bitcast(bld_base, type, load);
635 }
636
637 switch(reg->Register.File) {
638 case TGSI_FILE_IMMEDIATE: {
639 LLVMTypeRef ctype = tgsi2llvmtype(bld_base, type);
640 if (tgsi_type_is_64bit(type)) {
641 result = LLVMGetUndef(LLVMVectorType(ctx->i32, bld_base->base.type.length * 2));
642 result = LLVMConstInsertElement(result,
643 ctx->imms[reg->Register.Index * TGSI_NUM_CHANNELS + swizzle],
644 ctx->i32_0);
645 result = LLVMConstInsertElement(result,
646 ctx->imms[reg->Register.Index * TGSI_NUM_CHANNELS + swizzle + 1],
647 ctx->i32_1);
648 return LLVMConstBitCast(result, ctype);
649 } else {
650 return LLVMConstBitCast(ctx->imms[reg->Register.Index * TGSI_NUM_CHANNELS + swizzle], ctype);
651 }
652 }
653
654 case TGSI_FILE_INPUT: {
655 unsigned index = reg->Register.Index;
656 LLVMValueRef input[4];
657
658 /* I don't think doing this for vertex shaders is beneficial.
659 * For those, we want to make sure the VMEM loads are executed
660 * only once. Fragment shaders don't care much, because
661 * v_interp instructions are much cheaper than VMEM loads.
662 */
663 if (!si_preload_fs_inputs(ctx) &&
664 ctx->bld_base.info->processor == PIPE_SHADER_FRAGMENT)
665 ctx->load_input(ctx, index, &ctx->input_decls[index], input);
666 else
667 memcpy(input, &ctx->inputs[index * 4], sizeof(input));
668
669 result = input[swizzle];
670
671 if (tgsi_type_is_64bit(type)) {
672 ptr = result;
673 ptr2 = input[swizzle + 1];
674 return si_llvm_emit_fetch_64bit(bld_base, type, ptr, ptr2);
675 }
676 break;
677 }
678
679 case TGSI_FILE_TEMPORARY:
680 if (reg->Register.Index >= ctx->temps_count)
681 return LLVMGetUndef(tgsi2llvmtype(bld_base, type));
682 ptr = ctx->temps[reg->Register.Index * TGSI_NUM_CHANNELS + swizzle];
683 if (tgsi_type_is_64bit(type)) {
684 ptr2 = ctx->temps[reg->Register.Index * TGSI_NUM_CHANNELS + swizzle + 1];
685 return si_llvm_emit_fetch_64bit(bld_base, type,
686 LLVMBuildLoad(builder, ptr, ""),
687 LLVMBuildLoad(builder, ptr2, ""));
688 }
689 result = LLVMBuildLoad(builder, ptr, "");
690 break;
691
692 case TGSI_FILE_OUTPUT:
693 ptr = get_output_ptr(bld_base, reg->Register.Index, swizzle);
694 if (tgsi_type_is_64bit(type)) {
695 ptr2 = get_output_ptr(bld_base, reg->Register.Index, swizzle + 1);
696 return si_llvm_emit_fetch_64bit(bld_base, type,
697 LLVMBuildLoad(builder, ptr, ""),
698 LLVMBuildLoad(builder, ptr2, ""));
699 }
700 result = LLVMBuildLoad(builder, ptr, "");
701 break;
702
703 default:
704 return LLVMGetUndef(tgsi2llvmtype(bld_base, type));
705 }
706
707 return bitcast(bld_base, type, result);
708 }
709
710 static LLVMValueRef fetch_system_value(struct lp_build_tgsi_context *bld_base,
711 const struct tgsi_full_src_register *reg,
712 enum tgsi_opcode_type type,
713 unsigned swizzle)
714 {
715 struct si_shader_context *ctx = si_shader_context(bld_base);
716 LLVMBuilderRef builder = ctx->gallivm.builder;
717 LLVMValueRef cval = ctx->system_values[reg->Register.Index];
718
719 if (tgsi_type_is_64bit(type)) {
720 LLVMValueRef lo, hi;
721
722 assert(swizzle == 0 || swizzle == 2);
723
724 lo = LLVMBuildExtractElement(
725 builder, cval, LLVMConstInt(ctx->i32, swizzle, 0), "");
726 hi = LLVMBuildExtractElement(
727 builder, cval, LLVMConstInt(ctx->i32, swizzle + 1, 0), "");
728
729 return si_llvm_emit_fetch_64bit(bld_base, type, lo, hi);
730 }
731
732 if (LLVMGetTypeKind(LLVMTypeOf(cval)) == LLVMVectorTypeKind) {
733 cval = LLVMBuildExtractElement(
734 builder, cval, LLVMConstInt(ctx->i32, swizzle, 0), "");
735 } else {
736 assert(swizzle == 0);
737 }
738
739 return bitcast(bld_base, type, cval);
740 }
741
742 static void emit_declaration(struct lp_build_tgsi_context *bld_base,
743 const struct tgsi_full_declaration *decl)
744 {
745 struct si_shader_context *ctx = si_shader_context(bld_base);
746 LLVMBuilderRef builder = ctx->gallivm.builder;
747 unsigned first, last, i;
748 switch(decl->Declaration.File) {
749 case TGSI_FILE_ADDRESS:
750 {
751 unsigned idx;
752 for (idx = decl->Range.First; idx <= decl->Range.Last; idx++) {
753 unsigned chan;
754 for (chan = 0; chan < TGSI_NUM_CHANNELS; chan++) {
755 ctx->addrs[idx][chan] = lp_build_alloca_undef(
756 &ctx->gallivm,
757 ctx->i32, "");
758 }
759 }
760 break;
761 }
762
763 case TGSI_FILE_TEMPORARY:
764 {
765 char name[16] = "";
766 LLVMValueRef array_alloca = NULL;
767 unsigned decl_size;
768 unsigned writemask = decl->Declaration.UsageMask;
769 first = decl->Range.First;
770 last = decl->Range.Last;
771 decl_size = 4 * ((last - first) + 1);
772
773 if (decl->Declaration.Array) {
774 unsigned id = decl->Array.ArrayID - 1;
775 unsigned array_size;
776
777 writemask &= ctx->temp_arrays[id].writemask;
778 ctx->temp_arrays[id].writemask = writemask;
779 array_size = ((last - first) + 1) * util_bitcount(writemask);
780
781 /* If the array has more than 16 elements, store it
782 * in memory using an alloca that spans the entire
783 * array.
784 *
785 * Otherwise, store each array element individually.
786 * We will then generate vectors (per-channel, up to
787 * <16 x float> if the usagemask is a single bit) for
788 * indirect addressing.
789 *
790 * Note that 16 is the number of vector elements that
791 * LLVM will store in a register, so theoretically an
792 * array with up to 4 * 16 = 64 elements could be
793 * handled this way, but whether that's a good idea
794 * depends on VGPR register pressure elsewhere.
795 *
796 * FIXME: We shouldn't need to have the non-alloca
797 * code path for arrays. LLVM should be smart enough to
798 * promote allocas into registers when profitable.
799 */
800 if (array_size > 16 ||
801 /* TODO: VGPR indexing is buggy on GFX9. */
802 ctx->screen->b.chip_class == GFX9) {
803 array_alloca = LLVMBuildAlloca(builder,
804 LLVMArrayType(ctx->f32,
805 array_size), "array");
806 ctx->temp_array_allocas[id] = array_alloca;
807 }
808 }
809
810 if (!ctx->temps_count) {
811 ctx->temps_count = bld_base->info->file_max[TGSI_FILE_TEMPORARY] + 1;
812 ctx->temps = MALLOC(TGSI_NUM_CHANNELS * ctx->temps_count * sizeof(LLVMValueRef));
813 }
814 if (!array_alloca) {
815 for (i = 0; i < decl_size; ++i) {
816 #ifdef DEBUG
817 snprintf(name, sizeof(name), "TEMP%d.%c",
818 first + i / 4, "xyzw"[i % 4]);
819 #endif
820 ctx->temps[first * TGSI_NUM_CHANNELS + i] =
821 lp_build_alloca_undef(&ctx->gallivm,
822 ctx->f32,
823 name);
824 }
825 } else {
826 LLVMValueRef idxs[2] = {
827 ctx->i32_0,
828 NULL
829 };
830 unsigned j = 0;
831
832 if (writemask != TGSI_WRITEMASK_XYZW &&
833 !ctx->undef_alloca) {
834 /* Create a dummy alloca. We use it so that we
835 * have a pointer that is safe to load from if
836 * a shader ever reads from a channel that
837 * it never writes to.
838 */
839 ctx->undef_alloca = lp_build_alloca_undef(
840 &ctx->gallivm,
841 ctx->f32, "undef");
842 }
843
844 for (i = 0; i < decl_size; ++i) {
845 LLVMValueRef ptr;
846 if (writemask & (1 << (i % 4))) {
847 #ifdef DEBUG
848 snprintf(name, sizeof(name), "TEMP%d.%c",
849 first + i / 4, "xyzw"[i % 4]);
850 #endif
851 idxs[1] = LLVMConstInt(ctx->i32, j, 0);
852 ptr = LLVMBuildGEP(builder, array_alloca, idxs, 2, name);
853 j++;
854 } else {
855 ptr = ctx->undef_alloca;
856 }
857 ctx->temps[first * TGSI_NUM_CHANNELS + i] = ptr;
858 }
859 }
860 break;
861 }
862 case TGSI_FILE_INPUT:
863 {
864 unsigned idx;
865 for (idx = decl->Range.First; idx <= decl->Range.Last; idx++) {
866 if (ctx->load_input &&
867 ctx->input_decls[idx].Declaration.File != TGSI_FILE_INPUT) {
868 ctx->input_decls[idx] = *decl;
869 ctx->input_decls[idx].Range.First = idx;
870 ctx->input_decls[idx].Range.Last = idx;
871 ctx->input_decls[idx].Semantic.Index += idx - decl->Range.First;
872
873 if (si_preload_fs_inputs(ctx) ||
874 bld_base->info->processor != PIPE_SHADER_FRAGMENT)
875 ctx->load_input(ctx, idx, &ctx->input_decls[idx],
876 &ctx->inputs[idx * 4]);
877 }
878 }
879 }
880 break;
881
882 case TGSI_FILE_SYSTEM_VALUE:
883 {
884 unsigned idx;
885 for (idx = decl->Range.First; idx <= decl->Range.Last; idx++) {
886 ctx->load_system_value(ctx, idx, decl);
887 }
888 }
889 break;
890
891 case TGSI_FILE_OUTPUT:
892 {
893 char name[16] = "";
894 unsigned idx;
895 for (idx = decl->Range.First; idx <= decl->Range.Last; idx++) {
896 unsigned chan;
897 assert(idx < RADEON_LLVM_MAX_OUTPUTS);
898 if (ctx->outputs[idx][0])
899 continue;
900 for (chan = 0; chan < TGSI_NUM_CHANNELS; chan++) {
901 #ifdef DEBUG
902 snprintf(name, sizeof(name), "OUT%d.%c",
903 idx, "xyzw"[chan % 4]);
904 #endif
905 ctx->outputs[idx][chan] = lp_build_alloca_undef(
906 &ctx->gallivm,
907 ctx->f32, name);
908 }
909 }
910 break;
911 }
912
913 case TGSI_FILE_MEMORY:
914 ctx->declare_memory_region(ctx, decl);
915 break;
916
917 default:
918 break;
919 }
920 }
921
922 void si_llvm_emit_store(struct lp_build_tgsi_context *bld_base,
923 const struct tgsi_full_instruction *inst,
924 const struct tgsi_opcode_info *info,
925 LLVMValueRef dst[4])
926 {
927 struct si_shader_context *ctx = si_shader_context(bld_base);
928 struct gallivm_state *gallivm = &ctx->gallivm;
929 const struct tgsi_full_dst_register *reg = &inst->Dst[0];
930 LLVMBuilderRef builder = ctx->gallivm.builder;
931 LLVMValueRef temp_ptr, temp_ptr2 = NULL;
932 unsigned chan, chan_index;
933 bool is_vec_store = false;
934 enum tgsi_opcode_type dtype = tgsi_opcode_infer_dst_type(inst->Instruction.Opcode);
935
936 if (dst[0]) {
937 LLVMTypeKind k = LLVMGetTypeKind(LLVMTypeOf(dst[0]));
938 is_vec_store = (k == LLVMVectorTypeKind);
939 }
940
941 if (is_vec_store) {
942 LLVMValueRef values[4] = {};
943 TGSI_FOR_EACH_DST0_ENABLED_CHANNEL(inst, chan) {
944 LLVMValueRef index = LLVMConstInt(ctx->i32, chan, 0);
945 values[chan] = LLVMBuildExtractElement(gallivm->builder,
946 dst[0], index, "");
947 }
948 bld_base->emit_store(bld_base, inst, info, values);
949 return;
950 }
951
952 TGSI_FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
953 LLVMValueRef value = dst[chan_index];
954
955 if (tgsi_type_is_64bit(dtype) && (chan_index == 1 || chan_index == 3))
956 continue;
957 if (inst->Instruction.Saturate)
958 value = ac_build_clamp(&ctx->ac, value);
959
960 if (reg->Register.File == TGSI_FILE_ADDRESS) {
961 temp_ptr = ctx->addrs[reg->Register.Index][chan_index];
962 LLVMBuildStore(builder, value, temp_ptr);
963 continue;
964 }
965
966 if (!tgsi_type_is_64bit(dtype))
967 value = bitcast(bld_base, TGSI_TYPE_FLOAT, value);
968
969 if (reg->Register.Indirect) {
970 unsigned file = reg->Register.File;
971 unsigned reg_index = reg->Register.Index;
972 store_value_to_array(bld_base, value, file, chan_index,
973 reg_index, &reg->Indirect);
974 } else {
975 switch(reg->Register.File) {
976 case TGSI_FILE_OUTPUT:
977 temp_ptr = ctx->outputs[reg->Register.Index][chan_index];
978 if (tgsi_type_is_64bit(dtype))
979 temp_ptr2 = ctx->outputs[reg->Register.Index][chan_index + 1];
980 break;
981
982 case TGSI_FILE_TEMPORARY:
983 {
984 if (reg->Register.Index >= ctx->temps_count)
985 continue;
986
987 temp_ptr = ctx->temps[ TGSI_NUM_CHANNELS * reg->Register.Index + chan_index];
988 if (tgsi_type_is_64bit(dtype))
989 temp_ptr2 = ctx->temps[ TGSI_NUM_CHANNELS * reg->Register.Index + chan_index + 1];
990
991 break;
992 }
993 default:
994 return;
995 }
996 if (!tgsi_type_is_64bit(dtype))
997 LLVMBuildStore(builder, value, temp_ptr);
998 else {
999 LLVMValueRef ptr = LLVMBuildBitCast(builder, value,
1000 LLVMVectorType(ctx->i32, 2), "");
1001 LLVMValueRef val2;
1002 value = LLVMBuildExtractElement(builder, ptr,
1003 ctx->i32_0, "");
1004 val2 = LLVMBuildExtractElement(builder, ptr,
1005 ctx->i32_1, "");
1006
1007 LLVMBuildStore(builder, bitcast(bld_base, TGSI_TYPE_FLOAT, value), temp_ptr);
1008 LLVMBuildStore(builder, bitcast(bld_base, TGSI_TYPE_FLOAT, val2), temp_ptr2);
1009 }
1010 }
1011 }
1012 }
1013
1014 static void set_basicblock_name(LLVMBasicBlockRef bb, const char *base, int pc)
1015 {
1016 char buf[32];
1017 /* Subtract 1 so that the number shown is that of the corresponding
1018 * opcode in the TGSI dump, e.g. an if block has the same suffix as
1019 * the instruction number of the corresponding TGSI IF.
1020 */
1021 snprintf(buf, sizeof(buf), "%s%d", base, pc - 1);
1022 LLVMSetValueName(LLVMBasicBlockAsValue(bb), buf);
1023 }
1024
1025 /* Append a basic block at the level of the parent flow.
1026 */
1027 static LLVMBasicBlockRef append_basic_block(struct si_shader_context *ctx,
1028 const char *name)
1029 {
1030 struct gallivm_state *gallivm = &ctx->gallivm;
1031
1032 assert(ctx->flow_depth >= 1);
1033
1034 if (ctx->flow_depth >= 2) {
1035 struct si_llvm_flow *flow = &ctx->flow[ctx->flow_depth - 2];
1036
1037 return LLVMInsertBasicBlockInContext(gallivm->context,
1038 flow->next_block, name);
1039 }
1040
1041 return LLVMAppendBasicBlockInContext(gallivm->context, ctx->main_fn, name);
1042 }
1043
1044 /* Emit a branch to the given default target for the current block if
1045 * applicable -- that is, if the current block does not already contain a
1046 * branch from a break or continue.
1047 */
1048 static void emit_default_branch(LLVMBuilderRef builder, LLVMBasicBlockRef target)
1049 {
1050 if (!LLVMGetBasicBlockTerminator(LLVMGetInsertBlock(builder)))
1051 LLVMBuildBr(builder, target);
1052 }
1053
1054 static void bgnloop_emit(const struct lp_build_tgsi_action *action,
1055 struct lp_build_tgsi_context *bld_base,
1056 struct lp_build_emit_data *emit_data)
1057 {
1058 struct si_shader_context *ctx = si_shader_context(bld_base);
1059 struct gallivm_state *gallivm = &ctx->gallivm;
1060 struct si_llvm_flow *flow = push_flow(ctx);
1061 flow->loop_entry_block = append_basic_block(ctx, "LOOP");
1062 flow->next_block = append_basic_block(ctx, "ENDLOOP");
1063 set_basicblock_name(flow->loop_entry_block, "loop", bld_base->pc);
1064 LLVMBuildBr(gallivm->builder, flow->loop_entry_block);
1065 LLVMPositionBuilderAtEnd(gallivm->builder, flow->loop_entry_block);
1066 }
1067
1068 static void brk_emit(const struct lp_build_tgsi_action *action,
1069 struct lp_build_tgsi_context *bld_base,
1070 struct lp_build_emit_data *emit_data)
1071 {
1072 struct si_shader_context *ctx = si_shader_context(bld_base);
1073 struct gallivm_state *gallivm = &ctx->gallivm;
1074 struct si_llvm_flow *flow = get_innermost_loop(ctx);
1075
1076 LLVMBuildBr(gallivm->builder, flow->next_block);
1077 }
1078
1079 static void cont_emit(const struct lp_build_tgsi_action *action,
1080 struct lp_build_tgsi_context *bld_base,
1081 struct lp_build_emit_data *emit_data)
1082 {
1083 struct si_shader_context *ctx = si_shader_context(bld_base);
1084 struct gallivm_state *gallivm = &ctx->gallivm;
1085 struct si_llvm_flow *flow = get_innermost_loop(ctx);
1086
1087 LLVMBuildBr(gallivm->builder, flow->loop_entry_block);
1088 }
1089
1090 static void else_emit(const struct lp_build_tgsi_action *action,
1091 struct lp_build_tgsi_context *bld_base,
1092 struct lp_build_emit_data *emit_data)
1093 {
1094 struct si_shader_context *ctx = si_shader_context(bld_base);
1095 struct gallivm_state *gallivm = &ctx->gallivm;
1096 struct si_llvm_flow *current_branch = get_current_flow(ctx);
1097 LLVMBasicBlockRef endif_block;
1098
1099 assert(!current_branch->loop_entry_block);
1100
1101 endif_block = append_basic_block(ctx, "ENDIF");
1102 emit_default_branch(gallivm->builder, endif_block);
1103
1104 LLVMPositionBuilderAtEnd(gallivm->builder, current_branch->next_block);
1105 set_basicblock_name(current_branch->next_block, "else", bld_base->pc);
1106
1107 current_branch->next_block = endif_block;
1108 }
1109
1110 static void endif_emit(const struct lp_build_tgsi_action *action,
1111 struct lp_build_tgsi_context *bld_base,
1112 struct lp_build_emit_data *emit_data)
1113 {
1114 struct si_shader_context *ctx = si_shader_context(bld_base);
1115 struct gallivm_state *gallivm = &ctx->gallivm;
1116 struct si_llvm_flow *current_branch = get_current_flow(ctx);
1117
1118 assert(!current_branch->loop_entry_block);
1119
1120 emit_default_branch(gallivm->builder, current_branch->next_block);
1121 LLVMPositionBuilderAtEnd(gallivm->builder, current_branch->next_block);
1122 set_basicblock_name(current_branch->next_block, "endif", bld_base->pc);
1123
1124 ctx->flow_depth--;
1125 }
1126
1127 static void endloop_emit(const struct lp_build_tgsi_action *action,
1128 struct lp_build_tgsi_context *bld_base,
1129 struct lp_build_emit_data *emit_data)
1130 {
1131 struct si_shader_context *ctx = si_shader_context(bld_base);
1132 struct gallivm_state *gallivm = &ctx->gallivm;
1133 struct si_llvm_flow *current_loop = get_current_flow(ctx);
1134
1135 assert(current_loop->loop_entry_block);
1136
1137 emit_default_branch(gallivm->builder, current_loop->loop_entry_block);
1138
1139 LLVMPositionBuilderAtEnd(gallivm->builder, current_loop->next_block);
1140 set_basicblock_name(current_loop->next_block, "endloop", bld_base->pc);
1141 ctx->flow_depth--;
1142 }
1143
1144 static void if_cond_emit(const struct lp_build_tgsi_action *action,
1145 struct lp_build_tgsi_context *bld_base,
1146 struct lp_build_emit_data *emit_data,
1147 LLVMValueRef cond)
1148 {
1149 struct si_shader_context *ctx = si_shader_context(bld_base);
1150 struct gallivm_state *gallivm = &ctx->gallivm;
1151 struct si_llvm_flow *flow = push_flow(ctx);
1152 LLVMBasicBlockRef if_block;
1153
1154 if_block = append_basic_block(ctx, "IF");
1155 flow->next_block = append_basic_block(ctx, "ELSE");
1156 set_basicblock_name(if_block, "if", bld_base->pc);
1157 LLVMBuildCondBr(gallivm->builder, cond, if_block, flow->next_block);
1158 LLVMPositionBuilderAtEnd(gallivm->builder, if_block);
1159 }
1160
1161 static void if_emit(const struct lp_build_tgsi_action *action,
1162 struct lp_build_tgsi_context *bld_base,
1163 struct lp_build_emit_data *emit_data)
1164 {
1165 struct gallivm_state *gallivm = bld_base->base.gallivm;
1166 LLVMValueRef cond;
1167
1168 cond = LLVMBuildFCmp(gallivm->builder, LLVMRealUNE,
1169 emit_data->args[0],
1170 bld_base->base.zero, "");
1171
1172 if_cond_emit(action, bld_base, emit_data, cond);
1173 }
1174
1175 static void uif_emit(const struct lp_build_tgsi_action *action,
1176 struct lp_build_tgsi_context *bld_base,
1177 struct lp_build_emit_data *emit_data)
1178 {
1179 struct gallivm_state *gallivm = bld_base->base.gallivm;
1180 LLVMValueRef cond;
1181
1182 cond = LLVMBuildICmp(gallivm->builder, LLVMIntNE,
1183 bitcast(bld_base, TGSI_TYPE_UNSIGNED, emit_data->args[0]),
1184 bld_base->int_bld.zero, "");
1185
1186 if_cond_emit(action, bld_base, emit_data, cond);
1187 }
1188
1189 static void emit_immediate(struct lp_build_tgsi_context *bld_base,
1190 const struct tgsi_full_immediate *imm)
1191 {
1192 unsigned i;
1193 struct si_shader_context *ctx = si_shader_context(bld_base);
1194
1195 for (i = 0; i < 4; ++i) {
1196 ctx->imms[ctx->imms_num * TGSI_NUM_CHANNELS + i] =
1197 LLVMConstInt(ctx->i32, imm->u[i].Uint, false );
1198 }
1199
1200 ctx->imms_num++;
1201 }
1202
1203 void si_llvm_context_init(struct si_shader_context *ctx,
1204 struct si_screen *sscreen,
1205 LLVMTargetMachineRef tm)
1206 {
1207 struct lp_type type;
1208
1209 /* Initialize the gallivm object:
1210 * We are only using the module, context, and builder fields of this struct.
1211 * This should be enough for us to be able to pass our gallivm struct to the
1212 * helper functions in the gallivm module.
1213 */
1214 memset(ctx, 0, sizeof(*ctx));
1215 ctx->screen = sscreen;
1216 ctx->tm = tm;
1217
1218 ctx->gallivm.context = LLVMContextCreate();
1219 ctx->gallivm.module = LLVMModuleCreateWithNameInContext("tgsi",
1220 ctx->gallivm.context);
1221 LLVMSetTarget(ctx->gallivm.module, "amdgcn--");
1222
1223 LLVMTargetDataRef data_layout = LLVMCreateTargetDataLayout(tm);
1224 char *data_layout_str = LLVMCopyStringRepOfTargetData(data_layout);
1225 LLVMSetDataLayout(ctx->gallivm.module, data_layout_str);
1226 LLVMDisposeTargetData(data_layout);
1227 LLVMDisposeMessage(data_layout_str);
1228
1229 bool unsafe_fpmath = (sscreen->b.debug_flags & DBG_UNSAFE_MATH) != 0;
1230 enum lp_float_mode float_mode =
1231 unsafe_fpmath ? LP_FLOAT_MODE_UNSAFE_FP_MATH :
1232 LP_FLOAT_MODE_NO_SIGNED_ZEROS_FP_MATH;
1233
1234 ctx->gallivm.builder = lp_create_builder(ctx->gallivm.context,
1235 float_mode);
1236
1237 ac_llvm_context_init(&ctx->ac, ctx->gallivm.context);
1238 ctx->ac.module = ctx->gallivm.module;
1239 ctx->ac.builder = ctx->gallivm.builder;
1240
1241 struct lp_build_tgsi_context *bld_base = &ctx->bld_base;
1242
1243 type.floating = true;
1244 type.fixed = false;
1245 type.sign = true;
1246 type.norm = false;
1247 type.width = 32;
1248 type.length = 1;
1249
1250 lp_build_context_init(&bld_base->base, &ctx->gallivm, type);
1251 lp_build_context_init(&ctx->bld_base.uint_bld, &ctx->gallivm, lp_uint_type(type));
1252 lp_build_context_init(&ctx->bld_base.int_bld, &ctx->gallivm, lp_int_type(type));
1253 type.width *= 2;
1254 lp_build_context_init(&ctx->bld_base.dbl_bld, &ctx->gallivm, type);
1255 lp_build_context_init(&ctx->bld_base.uint64_bld, &ctx->gallivm, lp_uint_type(type));
1256 lp_build_context_init(&ctx->bld_base.int64_bld, &ctx->gallivm, lp_int_type(type));
1257
1258 bld_base->soa = 1;
1259 bld_base->emit_swizzle = emit_swizzle;
1260 bld_base->emit_declaration = emit_declaration;
1261 bld_base->emit_immediate = emit_immediate;
1262
1263 /* metadata allowing 2.5 ULP */
1264 ctx->fpmath_md_kind = LLVMGetMDKindIDInContext(ctx->gallivm.context,
1265 "fpmath", 6);
1266 LLVMValueRef arg = lp_build_const_float(&ctx->gallivm, 2.5);
1267 ctx->fpmath_md_2p5_ulp = LLVMMDNodeInContext(ctx->gallivm.context,
1268 &arg, 1);
1269
1270 bld_base->op_actions[TGSI_OPCODE_BGNLOOP].emit = bgnloop_emit;
1271 bld_base->op_actions[TGSI_OPCODE_BRK].emit = brk_emit;
1272 bld_base->op_actions[TGSI_OPCODE_CONT].emit = cont_emit;
1273 bld_base->op_actions[TGSI_OPCODE_IF].emit = if_emit;
1274 bld_base->op_actions[TGSI_OPCODE_UIF].emit = uif_emit;
1275 bld_base->op_actions[TGSI_OPCODE_ELSE].emit = else_emit;
1276 bld_base->op_actions[TGSI_OPCODE_ENDIF].emit = endif_emit;
1277 bld_base->op_actions[TGSI_OPCODE_ENDLOOP].emit = endloop_emit;
1278
1279 si_shader_context_init_alu(&ctx->bld_base);
1280
1281 ctx->voidt = LLVMVoidTypeInContext(ctx->gallivm.context);
1282 ctx->i1 = LLVMInt1TypeInContext(ctx->gallivm.context);
1283 ctx->i8 = LLVMInt8TypeInContext(ctx->gallivm.context);
1284 ctx->i32 = LLVMInt32TypeInContext(ctx->gallivm.context);
1285 ctx->i64 = LLVMInt64TypeInContext(ctx->gallivm.context);
1286 ctx->i128 = LLVMIntTypeInContext(ctx->gallivm.context, 128);
1287 ctx->f32 = LLVMFloatTypeInContext(ctx->gallivm.context);
1288 ctx->v2i32 = LLVMVectorType(ctx->i32, 2);
1289 ctx->v4i32 = LLVMVectorType(ctx->i32, 4);
1290 ctx->v4f32 = LLVMVectorType(ctx->f32, 4);
1291 ctx->v8i32 = LLVMVectorType(ctx->i32, 8);
1292
1293 ctx->i32_0 = LLVMConstInt(ctx->i32, 0, 0);
1294 ctx->i32_1 = LLVMConstInt(ctx->i32, 1, 0);
1295 }
1296
1297 /* Set the context to a certain TGSI shader. Can be called repeatedly
1298 * to change the shader. */
1299 void si_llvm_context_set_tgsi(struct si_shader_context *ctx,
1300 struct si_shader *shader)
1301 {
1302 const struct tgsi_shader_info *info = NULL;
1303 const struct tgsi_token *tokens = NULL;
1304
1305 if (shader && shader->selector) {
1306 info = &shader->selector->info;
1307 tokens = shader->selector->tokens;
1308 }
1309
1310 ctx->shader = shader;
1311 ctx->type = info ? info->processor : -1;
1312 ctx->bld_base.info = info;
1313
1314 /* Clean up the old contents. */
1315 FREE(ctx->temp_arrays);
1316 ctx->temp_arrays = NULL;
1317 FREE(ctx->temp_array_allocas);
1318 ctx->temp_array_allocas = NULL;
1319
1320 FREE(ctx->imms);
1321 ctx->imms = NULL;
1322 ctx->imms_num = 0;
1323
1324 FREE(ctx->temps);
1325 ctx->temps = NULL;
1326 ctx->temps_count = 0;
1327
1328 if (!info || !tokens)
1329 return;
1330
1331 if (info->array_max[TGSI_FILE_TEMPORARY] > 0) {
1332 int size = info->array_max[TGSI_FILE_TEMPORARY];
1333
1334 ctx->temp_arrays = CALLOC(size, sizeof(ctx->temp_arrays[0]));
1335 ctx->temp_array_allocas = CALLOC(size, sizeof(ctx->temp_array_allocas[0]));
1336
1337 tgsi_scan_arrays(tokens, TGSI_FILE_TEMPORARY, size,
1338 ctx->temp_arrays);
1339 }
1340 if (info->file_max[TGSI_FILE_IMMEDIATE] >= 0) {
1341 int size = info->file_max[TGSI_FILE_IMMEDIATE] + 1;
1342 ctx->imms = MALLOC(size * TGSI_NUM_CHANNELS * sizeof(LLVMValueRef));
1343 }
1344
1345 /* Re-set these to start with a clean slate. */
1346 ctx->bld_base.num_instructions = 0;
1347 ctx->bld_base.pc = 0;
1348 memset(ctx->outputs, 0, sizeof(ctx->outputs));
1349
1350 ctx->bld_base.emit_store = si_llvm_emit_store;
1351 ctx->bld_base.emit_fetch_funcs[TGSI_FILE_IMMEDIATE] = si_llvm_emit_fetch;
1352 ctx->bld_base.emit_fetch_funcs[TGSI_FILE_INPUT] = si_llvm_emit_fetch;
1353 ctx->bld_base.emit_fetch_funcs[TGSI_FILE_TEMPORARY] = si_llvm_emit_fetch;
1354 ctx->bld_base.emit_fetch_funcs[TGSI_FILE_OUTPUT] = si_llvm_emit_fetch;
1355 ctx->bld_base.emit_fetch_funcs[TGSI_FILE_SYSTEM_VALUE] = fetch_system_value;
1356 }
1357
1358 void si_llvm_create_func(struct si_shader_context *ctx,
1359 const char *name,
1360 LLVMTypeRef *return_types, unsigned num_return_elems,
1361 LLVMTypeRef *ParamTypes, unsigned ParamCount)
1362 {
1363 LLVMTypeRef main_fn_type, ret_type;
1364 LLVMBasicBlockRef main_fn_body;
1365 enum si_llvm_calling_convention call_conv;
1366 unsigned real_shader_type;
1367
1368 if (num_return_elems)
1369 ret_type = LLVMStructTypeInContext(ctx->gallivm.context,
1370 return_types,
1371 num_return_elems, true);
1372 else
1373 ret_type = LLVMVoidTypeInContext(ctx->gallivm.context);
1374
1375 /* Setup the function */
1376 ctx->return_type = ret_type;
1377 main_fn_type = LLVMFunctionType(ret_type, ParamTypes, ParamCount, 0);
1378 ctx->main_fn = LLVMAddFunction(ctx->gallivm.module, name, main_fn_type);
1379 main_fn_body = LLVMAppendBasicBlockInContext(ctx->gallivm.context,
1380 ctx->main_fn, "main_body");
1381 LLVMPositionBuilderAtEnd(ctx->gallivm.builder, main_fn_body);
1382
1383 real_shader_type = ctx->type;
1384
1385 /* LS is merged into HS (TCS), and ES is merged into GS. */
1386 if (ctx->screen->b.chip_class >= GFX9) {
1387 if (ctx->shader->key.as_ls)
1388 real_shader_type = PIPE_SHADER_TESS_CTRL;
1389 else if (ctx->shader->key.as_es)
1390 real_shader_type = PIPE_SHADER_GEOMETRY;
1391 }
1392
1393 switch (real_shader_type) {
1394 case PIPE_SHADER_VERTEX:
1395 case PIPE_SHADER_TESS_EVAL:
1396 call_conv = RADEON_LLVM_AMDGPU_VS;
1397 break;
1398 case PIPE_SHADER_TESS_CTRL:
1399 call_conv = HAVE_LLVM >= 0x0500 ? RADEON_LLVM_AMDGPU_HS :
1400 RADEON_LLVM_AMDGPU_VS;
1401 break;
1402 case PIPE_SHADER_GEOMETRY:
1403 call_conv = RADEON_LLVM_AMDGPU_GS;
1404 break;
1405 case PIPE_SHADER_FRAGMENT:
1406 call_conv = RADEON_LLVM_AMDGPU_PS;
1407 break;
1408 case PIPE_SHADER_COMPUTE:
1409 call_conv = RADEON_LLVM_AMDGPU_CS;
1410 break;
1411 default:
1412 unreachable("Unhandle shader type");
1413 }
1414
1415 LLVMSetFunctionCallConv(ctx->main_fn, call_conv);
1416 }
1417
1418 void si_llvm_optimize_module(struct si_shader_context *ctx)
1419 {
1420 struct gallivm_state *gallivm = &ctx->gallivm;
1421 const char *triple = LLVMGetTarget(gallivm->module);
1422 LLVMTargetLibraryInfoRef target_library_info;
1423
1424 /* Dump LLVM IR before any optimization passes */
1425 if (ctx->screen->b.debug_flags & DBG_PREOPT_IR &&
1426 r600_can_dump_shader(&ctx->screen->b, ctx->type))
1427 LLVMDumpModule(ctx->gallivm.module);
1428
1429 /* Create the pass manager */
1430 gallivm->passmgr = LLVMCreatePassManager();
1431
1432 target_library_info = gallivm_create_target_library_info(triple);
1433 LLVMAddTargetLibraryInfo(target_library_info, gallivm->passmgr);
1434
1435 if (r600_extra_shader_checks(&ctx->screen->b, ctx->type))
1436 LLVMAddVerifierPass(gallivm->passmgr);
1437
1438 LLVMAddAlwaysInlinerPass(gallivm->passmgr);
1439
1440 /* This pass should eliminate all the load and store instructions */
1441 LLVMAddPromoteMemoryToRegisterPass(gallivm->passmgr);
1442
1443 /* Add some optimization passes */
1444 LLVMAddScalarReplAggregatesPass(gallivm->passmgr);
1445 LLVMAddLICMPass(gallivm->passmgr);
1446 LLVMAddAggressiveDCEPass(gallivm->passmgr);
1447 LLVMAddCFGSimplificationPass(gallivm->passmgr);
1448 LLVMAddInstructionCombiningPass(gallivm->passmgr);
1449
1450 /* Run the pass */
1451 LLVMRunPassManager(gallivm->passmgr, ctx->gallivm.module);
1452
1453 LLVMDisposeBuilder(gallivm->builder);
1454 LLVMDisposePassManager(gallivm->passmgr);
1455 gallivm_dispose_target_library_info(target_library_info);
1456 }
1457
1458 void si_llvm_dispose(struct si_shader_context *ctx)
1459 {
1460 LLVMDisposeModule(ctx->gallivm.module);
1461 LLVMContextDispose(ctx->gallivm.context);
1462 FREE(ctx->temp_arrays);
1463 ctx->temp_arrays = NULL;
1464 FREE(ctx->temp_array_allocas);
1465 ctx->temp_array_allocas = NULL;
1466 FREE(ctx->temps);
1467 ctx->temps = NULL;
1468 ctx->temps_count = 0;
1469 FREE(ctx->imms);
1470 ctx->imms = NULL;
1471 ctx->imms_num = 0;
1472 FREE(ctx->flow);
1473 ctx->flow = NULL;
1474 ctx->flow_depth_max = 0;
1475 }