radeonsi: inline si_llvm_shader_type into si_llvm_create_func
[mesa.git] / src / gallium / drivers / radeonsi / si_shader_tgsi_setup.c
1 /*
2 * Copyright 2016 Advanced Micro Devices, Inc.
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * on the rights to use, copy, modify, merge, publish, distribute, sub
8 * license, and/or sell copies of the Software, and to permit persons to whom
9 * the Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
19 * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
20 * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
21 * USE OR OTHER DEALINGS IN THE SOFTWARE.
22 */
23
24 #include "si_shader_internal.h"
25 #include "si_pipe.h"
26
27 #include "gallivm/lp_bld_const.h"
28 #include "gallivm/lp_bld_gather.h"
29 #include "gallivm/lp_bld_flow.h"
30 #include "gallivm/lp_bld_init.h"
31 #include "gallivm/lp_bld_intr.h"
32 #include "gallivm/lp_bld_misc.h"
33 #include "gallivm/lp_bld_swizzle.h"
34 #include "tgsi/tgsi_info.h"
35 #include "tgsi/tgsi_parse.h"
36 #include "util/u_math.h"
37 #include "util/u_memory.h"
38 #include "util/u_debug.h"
39
40 #include <stdio.h>
41 #include <llvm-c/Transforms/IPO.h>
42 #include <llvm-c/Transforms/Scalar.h>
43 #include <llvm-c/Support.h>
44
45 /* Data for if/else/endif and bgnloop/endloop control flow structures.
46 */
47 struct si_llvm_flow {
48 /* Loop exit or next part of if/else/endif. */
49 LLVMBasicBlockRef next_block;
50 LLVMBasicBlockRef loop_entry_block;
51 };
52
53 enum si_llvm_calling_convention {
54 RADEON_LLVM_AMDGPU_VS = 87,
55 RADEON_LLVM_AMDGPU_GS = 88,
56 RADEON_LLVM_AMDGPU_PS = 89,
57 RADEON_LLVM_AMDGPU_CS = 90,
58 };
59
60 void si_llvm_add_attribute(LLVMValueRef F, const char *name, int value)
61 {
62 char str[16];
63
64 snprintf(str, sizeof(str), "%i", value);
65 LLVMAddTargetDependentFunctionAttr(F, name, str);
66 }
67
68 static void init_amdgpu_target()
69 {
70 gallivm_init_llvm_targets();
71 LLVMInitializeAMDGPUTargetInfo();
72 LLVMInitializeAMDGPUTarget();
73 LLVMInitializeAMDGPUTargetMC();
74 LLVMInitializeAMDGPUAsmPrinter();
75
76 /* For inline assembly. */
77 LLVMInitializeAMDGPUAsmParser();
78
79 if (HAVE_LLVM >= 0x0400) {
80 /*
81 * Workaround for bug in llvm 4.0 that causes image intrinsics
82 * to disappear.
83 * https://reviews.llvm.org/D26348
84 */
85 const char *argv[2] = {"mesa", "-simplifycfg-sink-common=false"};
86 LLVMParseCommandLineOptions(2, argv, NULL);
87 }
88 }
89
90 static once_flag init_amdgpu_target_once_flag = ONCE_FLAG_INIT;
91
92 LLVMTargetRef si_llvm_get_amdgpu_target(const char *triple)
93 {
94 LLVMTargetRef target = NULL;
95 char *err_message = NULL;
96
97 call_once(&init_amdgpu_target_once_flag, init_amdgpu_target);
98
99 if (LLVMGetTargetFromTriple(triple, &target, &err_message)) {
100 fprintf(stderr, "Cannot find target for triple %s ", triple);
101 if (err_message) {
102 fprintf(stderr, "%s\n", err_message);
103 }
104 LLVMDisposeMessage(err_message);
105 return NULL;
106 }
107 return target;
108 }
109
110 struct si_llvm_diagnostics {
111 struct pipe_debug_callback *debug;
112 unsigned retval;
113 };
114
115 static void si_diagnostic_handler(LLVMDiagnosticInfoRef di, void *context)
116 {
117 struct si_llvm_diagnostics *diag = (struct si_llvm_diagnostics *)context;
118 LLVMDiagnosticSeverity severity = LLVMGetDiagInfoSeverity(di);
119 char *description = LLVMGetDiagInfoDescription(di);
120 const char *severity_str = NULL;
121
122 switch (severity) {
123 case LLVMDSError:
124 severity_str = "error";
125 break;
126 case LLVMDSWarning:
127 severity_str = "warning";
128 break;
129 case LLVMDSRemark:
130 severity_str = "remark";
131 break;
132 case LLVMDSNote:
133 severity_str = "note";
134 break;
135 default:
136 severity_str = "unknown";
137 }
138
139 pipe_debug_message(diag->debug, SHADER_INFO,
140 "LLVM diagnostic (%s): %s", severity_str, description);
141
142 if (severity == LLVMDSError) {
143 diag->retval = 1;
144 fprintf(stderr,"LLVM triggered Diagnostic Handler: %s\n", description);
145 }
146
147 LLVMDisposeMessage(description);
148 }
149
150 /**
151 * Compile an LLVM module to machine code.
152 *
153 * @returns 0 for success, 1 for failure
154 */
155 unsigned si_llvm_compile(LLVMModuleRef M, struct ac_shader_binary *binary,
156 LLVMTargetMachineRef tm,
157 struct pipe_debug_callback *debug)
158 {
159 struct si_llvm_diagnostics diag;
160 char *err;
161 LLVMContextRef llvm_ctx;
162 LLVMMemoryBufferRef out_buffer;
163 unsigned buffer_size;
164 const char *buffer_data;
165 LLVMBool mem_err;
166
167 diag.debug = debug;
168 diag.retval = 0;
169
170 /* Setup Diagnostic Handler*/
171 llvm_ctx = LLVMGetModuleContext(M);
172
173 LLVMContextSetDiagnosticHandler(llvm_ctx, si_diagnostic_handler, &diag);
174
175 /* Compile IR*/
176 mem_err = LLVMTargetMachineEmitToMemoryBuffer(tm, M, LLVMObjectFile, &err,
177 &out_buffer);
178
179 /* Process Errors/Warnings */
180 if (mem_err) {
181 fprintf(stderr, "%s: %s", __FUNCTION__, err);
182 pipe_debug_message(debug, SHADER_INFO,
183 "LLVM emit error: %s", err);
184 FREE(err);
185 diag.retval = 1;
186 goto out;
187 }
188
189 /* Extract Shader Code*/
190 buffer_size = LLVMGetBufferSize(out_buffer);
191 buffer_data = LLVMGetBufferStart(out_buffer);
192
193 ac_elf_read(buffer_data, buffer_size, binary);
194
195 /* Clean up */
196 LLVMDisposeMemoryBuffer(out_buffer);
197
198 out:
199 if (diag.retval != 0)
200 pipe_debug_message(debug, SHADER_INFO, "LLVM compile failed");
201 return diag.retval;
202 }
203
204 LLVMTypeRef tgsi2llvmtype(struct lp_build_tgsi_context *bld_base,
205 enum tgsi_opcode_type type)
206 {
207 LLVMContextRef ctx = bld_base->base.gallivm->context;
208
209 switch (type) {
210 case TGSI_TYPE_UNSIGNED:
211 case TGSI_TYPE_SIGNED:
212 return LLVMInt32TypeInContext(ctx);
213 case TGSI_TYPE_UNSIGNED64:
214 case TGSI_TYPE_SIGNED64:
215 return LLVMInt64TypeInContext(ctx);
216 case TGSI_TYPE_DOUBLE:
217 return LLVMDoubleTypeInContext(ctx);
218 case TGSI_TYPE_UNTYPED:
219 case TGSI_TYPE_FLOAT:
220 return LLVMFloatTypeInContext(ctx);
221 default: break;
222 }
223 return 0;
224 }
225
226 LLVMValueRef bitcast(struct lp_build_tgsi_context *bld_base,
227 enum tgsi_opcode_type type, LLVMValueRef value)
228 {
229 LLVMBuilderRef builder = bld_base->base.gallivm->builder;
230 LLVMTypeRef dst_type = tgsi2llvmtype(bld_base, type);
231
232 if (dst_type)
233 return LLVMBuildBitCast(builder, value, dst_type, "");
234 else
235 return value;
236 }
237
238 /**
239 * Return a value that is equal to the given i32 \p index if it lies in [0,num)
240 * or an undefined value in the same interval otherwise.
241 */
242 LLVMValueRef si_llvm_bound_index(struct si_shader_context *ctx,
243 LLVMValueRef index,
244 unsigned num)
245 {
246 struct gallivm_state *gallivm = &ctx->gallivm;
247 LLVMBuilderRef builder = gallivm->builder;
248 LLVMValueRef c_max = LLVMConstInt(ctx->i32, num - 1, 0);
249 LLVMValueRef cc;
250
251 if (util_is_power_of_two(num)) {
252 index = LLVMBuildAnd(builder, index, c_max, "");
253 } else {
254 /* In theory, this MAX pattern should result in code that is
255 * as good as the bit-wise AND above.
256 *
257 * In practice, LLVM generates worse code (at the time of
258 * writing), because its value tracking is not strong enough.
259 */
260 cc = LLVMBuildICmp(builder, LLVMIntULE, index, c_max, "");
261 index = LLVMBuildSelect(builder, cc, index, c_max, "");
262 }
263
264 return index;
265 }
266
267 static struct si_llvm_flow *
268 get_current_flow(struct si_shader_context *ctx)
269 {
270 if (ctx->flow_depth > 0)
271 return &ctx->flow[ctx->flow_depth - 1];
272 return NULL;
273 }
274
275 static struct si_llvm_flow *
276 get_innermost_loop(struct si_shader_context *ctx)
277 {
278 for (unsigned i = ctx->flow_depth; i > 0; --i) {
279 if (ctx->flow[i - 1].loop_entry_block)
280 return &ctx->flow[i - 1];
281 }
282 return NULL;
283 }
284
285 static struct si_llvm_flow *
286 push_flow(struct si_shader_context *ctx)
287 {
288 struct si_llvm_flow *flow;
289
290 if (ctx->flow_depth >= ctx->flow_depth_max) {
291 unsigned new_max = MAX2(ctx->flow_depth << 1, RADEON_LLVM_INITIAL_CF_DEPTH);
292 ctx->flow = REALLOC(ctx->flow,
293 ctx->flow_depth_max * sizeof(*ctx->flow),
294 new_max * sizeof(*ctx->flow));
295 ctx->flow_depth_max = new_max;
296 }
297
298 flow = &ctx->flow[ctx->flow_depth];
299 ctx->flow_depth++;
300
301 flow->next_block = NULL;
302 flow->loop_entry_block = NULL;
303 return flow;
304 }
305
306 static LLVMValueRef emit_swizzle(struct lp_build_tgsi_context *bld_base,
307 LLVMValueRef value,
308 unsigned swizzle_x,
309 unsigned swizzle_y,
310 unsigned swizzle_z,
311 unsigned swizzle_w)
312 {
313 LLVMValueRef swizzles[4];
314 LLVMTypeRef i32t =
315 LLVMInt32TypeInContext(bld_base->base.gallivm->context);
316
317 swizzles[0] = LLVMConstInt(i32t, swizzle_x, 0);
318 swizzles[1] = LLVMConstInt(i32t, swizzle_y, 0);
319 swizzles[2] = LLVMConstInt(i32t, swizzle_z, 0);
320 swizzles[3] = LLVMConstInt(i32t, swizzle_w, 0);
321
322 return LLVMBuildShuffleVector(bld_base->base.gallivm->builder,
323 value,
324 LLVMGetUndef(LLVMTypeOf(value)),
325 LLVMConstVector(swizzles, 4), "");
326 }
327
328 /**
329 * Return the description of the array covering the given temporary register
330 * index.
331 */
332 static unsigned
333 get_temp_array_id(struct lp_build_tgsi_context *bld_base,
334 unsigned reg_index,
335 const struct tgsi_ind_register *reg)
336 {
337 struct si_shader_context *ctx = si_shader_context(bld_base);
338 unsigned num_arrays = ctx->bld_base.info->array_max[TGSI_FILE_TEMPORARY];
339 unsigned i;
340
341 if (reg && reg->ArrayID > 0 && reg->ArrayID <= num_arrays)
342 return reg->ArrayID;
343
344 for (i = 0; i < num_arrays; i++) {
345 const struct tgsi_array_info *array = &ctx->temp_arrays[i];
346
347 if (reg_index >= array->range.First && reg_index <= array->range.Last)
348 return i + 1;
349 }
350
351 return 0;
352 }
353
354 static struct tgsi_declaration_range
355 get_array_range(struct lp_build_tgsi_context *bld_base,
356 unsigned File, unsigned reg_index,
357 const struct tgsi_ind_register *reg)
358 {
359 struct si_shader_context *ctx = si_shader_context(bld_base);
360 struct tgsi_declaration_range range;
361
362 if (File == TGSI_FILE_TEMPORARY) {
363 unsigned array_id = get_temp_array_id(bld_base, reg_index, reg);
364 if (array_id)
365 return ctx->temp_arrays[array_id - 1].range;
366 }
367
368 range.First = 0;
369 range.Last = bld_base->info->file_max[File];
370 return range;
371 }
372
373 static LLVMValueRef
374 emit_array_index(struct si_shader_context *ctx,
375 const struct tgsi_ind_register *reg,
376 unsigned offset)
377 {
378 struct gallivm_state *gallivm = &ctx->gallivm;
379
380 if (!reg) {
381 return LLVMConstInt(ctx->i32, offset, 0);
382 }
383 LLVMValueRef addr = LLVMBuildLoad(gallivm->builder, ctx->addrs[reg->Index][reg->Swizzle], "");
384 return LLVMBuildAdd(gallivm->builder, addr, LLVMConstInt(ctx->i32, offset, 0), "");
385 }
386
387 /**
388 * For indirect registers, construct a pointer directly to the requested
389 * element using getelementptr if possible.
390 *
391 * Returns NULL if the insertelement/extractelement fallback for array access
392 * must be used.
393 */
394 static LLVMValueRef
395 get_pointer_into_array(struct si_shader_context *ctx,
396 unsigned file,
397 unsigned swizzle,
398 unsigned reg_index,
399 const struct tgsi_ind_register *reg_indirect)
400 {
401 unsigned array_id;
402 struct tgsi_array_info *array;
403 struct gallivm_state *gallivm = &ctx->gallivm;
404 LLVMBuilderRef builder = gallivm->builder;
405 LLVMValueRef idxs[2];
406 LLVMValueRef index;
407 LLVMValueRef alloca;
408
409 if (file != TGSI_FILE_TEMPORARY)
410 return NULL;
411
412 array_id = get_temp_array_id(&ctx->bld_base, reg_index, reg_indirect);
413 if (!array_id)
414 return NULL;
415
416 alloca = ctx->temp_array_allocas[array_id - 1];
417 if (!alloca)
418 return NULL;
419
420 array = &ctx->temp_arrays[array_id - 1];
421
422 if (!(array->writemask & (1 << swizzle)))
423 return ctx->undef_alloca;
424
425 index = emit_array_index(ctx, reg_indirect,
426 reg_index - ctx->temp_arrays[array_id - 1].range.First);
427
428 /* Ensure that the index is within a valid range, to guard against
429 * VM faults and overwriting critical data (e.g. spilled resource
430 * descriptors).
431 *
432 * TODO It should be possible to avoid the additional instructions
433 * if LLVM is changed so that it guarantuees:
434 * 1. the scratch space descriptor isolates the current wave (this
435 * could even save the scratch offset SGPR at the cost of an
436 * additional SALU instruction)
437 * 2. the memory for allocas must be allocated at the _end_ of the
438 * scratch space (after spilled registers)
439 */
440 index = si_llvm_bound_index(ctx, index, array->range.Last - array->range.First + 1);
441
442 index = LLVMBuildMul(
443 builder, index,
444 LLVMConstInt(ctx->i32, util_bitcount(array->writemask), 0),
445 "");
446 index = LLVMBuildAdd(
447 builder, index,
448 LLVMConstInt(ctx->i32,
449 util_bitcount(array->writemask & ((1 << swizzle) - 1)), 0),
450 "");
451 idxs[0] = ctx->i32_0;
452 idxs[1] = index;
453 return LLVMBuildGEP(builder, alloca, idxs, 2, "");
454 }
455
456 LLVMValueRef
457 si_llvm_emit_fetch_64bit(struct lp_build_tgsi_context *bld_base,
458 enum tgsi_opcode_type type,
459 LLVMValueRef ptr,
460 LLVMValueRef ptr2)
461 {
462 LLVMBuilderRef builder = bld_base->base.gallivm->builder;
463 LLVMValueRef result;
464
465 result = LLVMGetUndef(LLVMVectorType(LLVMIntTypeInContext(bld_base->base.gallivm->context, 32), bld_base->base.type.length * 2));
466
467 result = LLVMBuildInsertElement(builder,
468 result,
469 bitcast(bld_base, TGSI_TYPE_UNSIGNED, ptr),
470 bld_base->int_bld.zero, "");
471 result = LLVMBuildInsertElement(builder,
472 result,
473 bitcast(bld_base, TGSI_TYPE_UNSIGNED, ptr2),
474 bld_base->int_bld.one, "");
475 return bitcast(bld_base, type, result);
476 }
477
478 static LLVMValueRef
479 emit_array_fetch(struct lp_build_tgsi_context *bld_base,
480 unsigned File, enum tgsi_opcode_type type,
481 struct tgsi_declaration_range range,
482 unsigned swizzle)
483 {
484 struct si_shader_context *ctx = si_shader_context(bld_base);
485
486 LLVMBuilderRef builder = ctx->gallivm.builder;
487
488 unsigned i, size = range.Last - range.First + 1;
489 LLVMTypeRef vec = LLVMVectorType(tgsi2llvmtype(bld_base, type), size);
490 LLVMValueRef result = LLVMGetUndef(vec);
491
492 struct tgsi_full_src_register tmp_reg = {};
493 tmp_reg.Register.File = File;
494
495 for (i = 0; i < size; ++i) {
496 tmp_reg.Register.Index = i + range.First;
497 LLVMValueRef temp = si_llvm_emit_fetch(bld_base, &tmp_reg, type, swizzle);
498 result = LLVMBuildInsertElement(builder, result, temp,
499 LLVMConstInt(ctx->i32, i, 0), "array_vector");
500 }
501 return result;
502 }
503
504 static LLVMValueRef
505 load_value_from_array(struct lp_build_tgsi_context *bld_base,
506 unsigned file,
507 enum tgsi_opcode_type type,
508 unsigned swizzle,
509 unsigned reg_index,
510 const struct tgsi_ind_register *reg_indirect)
511 {
512 struct si_shader_context *ctx = si_shader_context(bld_base);
513 struct gallivm_state *gallivm = &ctx->gallivm;
514 LLVMBuilderRef builder = gallivm->builder;
515 LLVMValueRef ptr;
516
517 ptr = get_pointer_into_array(ctx, file, swizzle, reg_index, reg_indirect);
518 if (ptr) {
519 LLVMValueRef val = LLVMBuildLoad(builder, ptr, "");
520 if (tgsi_type_is_64bit(type)) {
521 LLVMValueRef ptr_hi, val_hi;
522 ptr_hi = LLVMBuildGEP(builder, ptr, &ctx->i32_1, 1, "");
523 val_hi = LLVMBuildLoad(builder, ptr_hi, "");
524 val = si_llvm_emit_fetch_64bit(bld_base, type, val, val_hi);
525 }
526
527 return val;
528 } else {
529 struct tgsi_declaration_range range =
530 get_array_range(bld_base, file, reg_index, reg_indirect);
531 LLVMValueRef index =
532 emit_array_index(ctx, reg_indirect, reg_index - range.First);
533 LLVMValueRef array =
534 emit_array_fetch(bld_base, file, type, range, swizzle);
535 return LLVMBuildExtractElement(builder, array, index, "");
536 }
537 }
538
539 static void
540 store_value_to_array(struct lp_build_tgsi_context *bld_base,
541 LLVMValueRef value,
542 unsigned file,
543 unsigned chan_index,
544 unsigned reg_index,
545 const struct tgsi_ind_register *reg_indirect)
546 {
547 struct si_shader_context *ctx = si_shader_context(bld_base);
548 struct gallivm_state *gallivm = &ctx->gallivm;
549 LLVMBuilderRef builder = gallivm->builder;
550 LLVMValueRef ptr;
551
552 ptr = get_pointer_into_array(ctx, file, chan_index, reg_index, reg_indirect);
553 if (ptr) {
554 LLVMBuildStore(builder, value, ptr);
555 } else {
556 unsigned i, size;
557 struct tgsi_declaration_range range = get_array_range(bld_base, file, reg_index, reg_indirect);
558 LLVMValueRef index = emit_array_index(ctx, reg_indirect, reg_index - range.First);
559 LLVMValueRef array =
560 emit_array_fetch(bld_base, file, TGSI_TYPE_FLOAT, range, chan_index);
561 LLVMValueRef temp_ptr;
562
563 array = LLVMBuildInsertElement(builder, array, value, index, "");
564
565 size = range.Last - range.First + 1;
566 for (i = 0; i < size; ++i) {
567 switch(file) {
568 case TGSI_FILE_OUTPUT:
569 temp_ptr = ctx->outputs[i + range.First][chan_index];
570 break;
571
572 case TGSI_FILE_TEMPORARY:
573 if (range.First + i >= ctx->temps_count)
574 continue;
575 temp_ptr = ctx->temps[(i + range.First) * TGSI_NUM_CHANNELS + chan_index];
576 break;
577
578 default:
579 continue;
580 }
581 value = LLVMBuildExtractElement(builder, array,
582 LLVMConstInt(ctx->i32, i, 0), "");
583 LLVMBuildStore(builder, value, temp_ptr);
584 }
585 }
586 }
587
588 /* If this is true, preload FS inputs at the beginning of shaders. Otherwise,
589 * reload them at each use. This must be true if the shader is using
590 * derivatives and KILL, because KILL can leave the WQM and then a lazy
591 * input load isn't in the WQM anymore.
592 */
593 static bool si_preload_fs_inputs(struct si_shader_context *ctx)
594 {
595 struct si_shader_selector *sel = ctx->shader->selector;
596
597 return sel->info.uses_derivatives &&
598 sel->info.uses_kill;
599 }
600
601 static LLVMValueRef
602 get_output_ptr(struct lp_build_tgsi_context *bld_base, unsigned index,
603 unsigned chan)
604 {
605 struct si_shader_context *ctx = si_shader_context(bld_base);
606
607 assert(index <= ctx->bld_base.info->file_max[TGSI_FILE_OUTPUT]);
608 return ctx->outputs[index][chan];
609 }
610
611 LLVMValueRef si_llvm_emit_fetch(struct lp_build_tgsi_context *bld_base,
612 const struct tgsi_full_src_register *reg,
613 enum tgsi_opcode_type type,
614 unsigned swizzle)
615 {
616 struct si_shader_context *ctx = si_shader_context(bld_base);
617 LLVMBuilderRef builder = ctx->gallivm.builder;
618 LLVMValueRef result = NULL, ptr, ptr2;
619
620 if (swizzle == ~0) {
621 LLVMValueRef values[TGSI_NUM_CHANNELS];
622 unsigned chan;
623 for (chan = 0; chan < TGSI_NUM_CHANNELS; chan++) {
624 values[chan] = si_llvm_emit_fetch(bld_base, reg, type, chan);
625 }
626 return lp_build_gather_values(&ctx->gallivm, values,
627 TGSI_NUM_CHANNELS);
628 }
629
630 if (reg->Register.Indirect) {
631 LLVMValueRef load = load_value_from_array(bld_base, reg->Register.File, type,
632 swizzle, reg->Register.Index, &reg->Indirect);
633 return bitcast(bld_base, type, load);
634 }
635
636 switch(reg->Register.File) {
637 case TGSI_FILE_IMMEDIATE: {
638 LLVMTypeRef ctype = tgsi2llvmtype(bld_base, type);
639 if (tgsi_type_is_64bit(type)) {
640 result = LLVMGetUndef(LLVMVectorType(ctx->i32, bld_base->base.type.length * 2));
641 result = LLVMConstInsertElement(result,
642 ctx->imms[reg->Register.Index * TGSI_NUM_CHANNELS + swizzle],
643 ctx->i32_0);
644 result = LLVMConstInsertElement(result,
645 ctx->imms[reg->Register.Index * TGSI_NUM_CHANNELS + swizzle + 1],
646 ctx->i32_1);
647 return LLVMConstBitCast(result, ctype);
648 } else {
649 return LLVMConstBitCast(ctx->imms[reg->Register.Index * TGSI_NUM_CHANNELS + swizzle], ctype);
650 }
651 }
652
653 case TGSI_FILE_INPUT: {
654 unsigned index = reg->Register.Index;
655 LLVMValueRef input[4];
656
657 /* I don't think doing this for vertex shaders is beneficial.
658 * For those, we want to make sure the VMEM loads are executed
659 * only once. Fragment shaders don't care much, because
660 * v_interp instructions are much cheaper than VMEM loads.
661 */
662 if (!si_preload_fs_inputs(ctx) &&
663 ctx->bld_base.info->processor == PIPE_SHADER_FRAGMENT)
664 ctx->load_input(ctx, index, &ctx->input_decls[index], input);
665 else
666 memcpy(input, &ctx->inputs[index * 4], sizeof(input));
667
668 result = input[swizzle];
669
670 if (tgsi_type_is_64bit(type)) {
671 ptr = result;
672 ptr2 = input[swizzle + 1];
673 return si_llvm_emit_fetch_64bit(bld_base, type, ptr, ptr2);
674 }
675 break;
676 }
677
678 case TGSI_FILE_TEMPORARY:
679 if (reg->Register.Index >= ctx->temps_count)
680 return LLVMGetUndef(tgsi2llvmtype(bld_base, type));
681 ptr = ctx->temps[reg->Register.Index * TGSI_NUM_CHANNELS + swizzle];
682 if (tgsi_type_is_64bit(type)) {
683 ptr2 = ctx->temps[reg->Register.Index * TGSI_NUM_CHANNELS + swizzle + 1];
684 return si_llvm_emit_fetch_64bit(bld_base, type,
685 LLVMBuildLoad(builder, ptr, ""),
686 LLVMBuildLoad(builder, ptr2, ""));
687 }
688 result = LLVMBuildLoad(builder, ptr, "");
689 break;
690
691 case TGSI_FILE_OUTPUT:
692 ptr = get_output_ptr(bld_base, reg->Register.Index, swizzle);
693 if (tgsi_type_is_64bit(type)) {
694 ptr2 = get_output_ptr(bld_base, reg->Register.Index, swizzle + 1);
695 return si_llvm_emit_fetch_64bit(bld_base, type,
696 LLVMBuildLoad(builder, ptr, ""),
697 LLVMBuildLoad(builder, ptr2, ""));
698 }
699 result = LLVMBuildLoad(builder, ptr, "");
700 break;
701
702 default:
703 return LLVMGetUndef(tgsi2llvmtype(bld_base, type));
704 }
705
706 return bitcast(bld_base, type, result);
707 }
708
709 static LLVMValueRef fetch_system_value(struct lp_build_tgsi_context *bld_base,
710 const struct tgsi_full_src_register *reg,
711 enum tgsi_opcode_type type,
712 unsigned swizzle)
713 {
714 struct si_shader_context *ctx = si_shader_context(bld_base);
715 LLVMBuilderRef builder = ctx->gallivm.builder;
716 LLVMValueRef cval = ctx->system_values[reg->Register.Index];
717
718 if (tgsi_type_is_64bit(type)) {
719 LLVMValueRef lo, hi;
720
721 assert(swizzle == 0 || swizzle == 2);
722
723 lo = LLVMBuildExtractElement(
724 builder, cval, LLVMConstInt(ctx->i32, swizzle, 0), "");
725 hi = LLVMBuildExtractElement(
726 builder, cval, LLVMConstInt(ctx->i32, swizzle + 1, 0), "");
727
728 return si_llvm_emit_fetch_64bit(bld_base, type, lo, hi);
729 }
730
731 if (LLVMGetTypeKind(LLVMTypeOf(cval)) == LLVMVectorTypeKind) {
732 cval = LLVMBuildExtractElement(
733 builder, cval, LLVMConstInt(ctx->i32, swizzle, 0), "");
734 } else {
735 assert(swizzle == 0);
736 }
737
738 return bitcast(bld_base, type, cval);
739 }
740
741 static void emit_declaration(struct lp_build_tgsi_context *bld_base,
742 const struct tgsi_full_declaration *decl)
743 {
744 struct si_shader_context *ctx = si_shader_context(bld_base);
745 LLVMBuilderRef builder = ctx->gallivm.builder;
746 unsigned first, last, i;
747 switch(decl->Declaration.File) {
748 case TGSI_FILE_ADDRESS:
749 {
750 unsigned idx;
751 for (idx = decl->Range.First; idx <= decl->Range.Last; idx++) {
752 unsigned chan;
753 for (chan = 0; chan < TGSI_NUM_CHANNELS; chan++) {
754 ctx->addrs[idx][chan] = lp_build_alloca_undef(
755 &ctx->gallivm,
756 ctx->i32, "");
757 }
758 }
759 break;
760 }
761
762 case TGSI_FILE_TEMPORARY:
763 {
764 char name[16] = "";
765 LLVMValueRef array_alloca = NULL;
766 unsigned decl_size;
767 unsigned writemask = decl->Declaration.UsageMask;
768 first = decl->Range.First;
769 last = decl->Range.Last;
770 decl_size = 4 * ((last - first) + 1);
771
772 if (decl->Declaration.Array) {
773 unsigned id = decl->Array.ArrayID - 1;
774 unsigned array_size;
775
776 writemask &= ctx->temp_arrays[id].writemask;
777 ctx->temp_arrays[id].writemask = writemask;
778 array_size = ((last - first) + 1) * util_bitcount(writemask);
779
780 /* If the array has more than 16 elements, store it
781 * in memory using an alloca that spans the entire
782 * array.
783 *
784 * Otherwise, store each array element individually.
785 * We will then generate vectors (per-channel, up to
786 * <16 x float> if the usagemask is a single bit) for
787 * indirect addressing.
788 *
789 * Note that 16 is the number of vector elements that
790 * LLVM will store in a register, so theoretically an
791 * array with up to 4 * 16 = 64 elements could be
792 * handled this way, but whether that's a good idea
793 * depends on VGPR register pressure elsewhere.
794 *
795 * FIXME: We shouldn't need to have the non-alloca
796 * code path for arrays. LLVM should be smart enough to
797 * promote allocas into registers when profitable.
798 */
799 if (array_size > 16 ||
800 /* TODO: VGPR indexing is buggy on GFX9. */
801 ctx->screen->b.chip_class == GFX9) {
802 array_alloca = LLVMBuildAlloca(builder,
803 LLVMArrayType(ctx->f32,
804 array_size), "array");
805 ctx->temp_array_allocas[id] = array_alloca;
806 }
807 }
808
809 if (!ctx->temps_count) {
810 ctx->temps_count = bld_base->info->file_max[TGSI_FILE_TEMPORARY] + 1;
811 ctx->temps = MALLOC(TGSI_NUM_CHANNELS * ctx->temps_count * sizeof(LLVMValueRef));
812 }
813 if (!array_alloca) {
814 for (i = 0; i < decl_size; ++i) {
815 #ifdef DEBUG
816 snprintf(name, sizeof(name), "TEMP%d.%c",
817 first + i / 4, "xyzw"[i % 4]);
818 #endif
819 ctx->temps[first * TGSI_NUM_CHANNELS + i] =
820 lp_build_alloca_undef(&ctx->gallivm,
821 ctx->f32,
822 name);
823 }
824 } else {
825 LLVMValueRef idxs[2] = {
826 ctx->i32_0,
827 NULL
828 };
829 unsigned j = 0;
830
831 if (writemask != TGSI_WRITEMASK_XYZW &&
832 !ctx->undef_alloca) {
833 /* Create a dummy alloca. We use it so that we
834 * have a pointer that is safe to load from if
835 * a shader ever reads from a channel that
836 * it never writes to.
837 */
838 ctx->undef_alloca = lp_build_alloca_undef(
839 &ctx->gallivm,
840 ctx->f32, "undef");
841 }
842
843 for (i = 0; i < decl_size; ++i) {
844 LLVMValueRef ptr;
845 if (writemask & (1 << (i % 4))) {
846 #ifdef DEBUG
847 snprintf(name, sizeof(name), "TEMP%d.%c",
848 first + i / 4, "xyzw"[i % 4]);
849 #endif
850 idxs[1] = LLVMConstInt(ctx->i32, j, 0);
851 ptr = LLVMBuildGEP(builder, array_alloca, idxs, 2, name);
852 j++;
853 } else {
854 ptr = ctx->undef_alloca;
855 }
856 ctx->temps[first * TGSI_NUM_CHANNELS + i] = ptr;
857 }
858 }
859 break;
860 }
861 case TGSI_FILE_INPUT:
862 {
863 unsigned idx;
864 for (idx = decl->Range.First; idx <= decl->Range.Last; idx++) {
865 if (ctx->load_input &&
866 ctx->input_decls[idx].Declaration.File != TGSI_FILE_INPUT) {
867 ctx->input_decls[idx] = *decl;
868 ctx->input_decls[idx].Range.First = idx;
869 ctx->input_decls[idx].Range.Last = idx;
870 ctx->input_decls[idx].Semantic.Index += idx - decl->Range.First;
871
872 if (si_preload_fs_inputs(ctx) ||
873 bld_base->info->processor != PIPE_SHADER_FRAGMENT)
874 ctx->load_input(ctx, idx, &ctx->input_decls[idx],
875 &ctx->inputs[idx * 4]);
876 }
877 }
878 }
879 break;
880
881 case TGSI_FILE_SYSTEM_VALUE:
882 {
883 unsigned idx;
884 for (idx = decl->Range.First; idx <= decl->Range.Last; idx++) {
885 ctx->load_system_value(ctx, idx, decl);
886 }
887 }
888 break;
889
890 case TGSI_FILE_OUTPUT:
891 {
892 char name[16] = "";
893 unsigned idx;
894 for (idx = decl->Range.First; idx <= decl->Range.Last; idx++) {
895 unsigned chan;
896 assert(idx < RADEON_LLVM_MAX_OUTPUTS);
897 if (ctx->outputs[idx][0])
898 continue;
899 for (chan = 0; chan < TGSI_NUM_CHANNELS; chan++) {
900 #ifdef DEBUG
901 snprintf(name, sizeof(name), "OUT%d.%c",
902 idx, "xyzw"[chan % 4]);
903 #endif
904 ctx->outputs[idx][chan] = lp_build_alloca_undef(
905 &ctx->gallivm,
906 ctx->f32, name);
907 }
908 }
909 break;
910 }
911
912 case TGSI_FILE_MEMORY:
913 ctx->declare_memory_region(ctx, decl);
914 break;
915
916 default:
917 break;
918 }
919 }
920
921 void si_llvm_emit_store(struct lp_build_tgsi_context *bld_base,
922 const struct tgsi_full_instruction *inst,
923 const struct tgsi_opcode_info *info,
924 LLVMValueRef dst[4])
925 {
926 struct si_shader_context *ctx = si_shader_context(bld_base);
927 struct gallivm_state *gallivm = &ctx->gallivm;
928 const struct tgsi_full_dst_register *reg = &inst->Dst[0];
929 LLVMBuilderRef builder = ctx->gallivm.builder;
930 LLVMValueRef temp_ptr, temp_ptr2 = NULL;
931 unsigned chan, chan_index;
932 bool is_vec_store = false;
933 enum tgsi_opcode_type dtype = tgsi_opcode_infer_dst_type(inst->Instruction.Opcode);
934
935 if (dst[0]) {
936 LLVMTypeKind k = LLVMGetTypeKind(LLVMTypeOf(dst[0]));
937 is_vec_store = (k == LLVMVectorTypeKind);
938 }
939
940 if (is_vec_store) {
941 LLVMValueRef values[4] = {};
942 TGSI_FOR_EACH_DST0_ENABLED_CHANNEL(inst, chan) {
943 LLVMValueRef index = LLVMConstInt(ctx->i32, chan, 0);
944 values[chan] = LLVMBuildExtractElement(gallivm->builder,
945 dst[0], index, "");
946 }
947 bld_base->emit_store(bld_base, inst, info, values);
948 return;
949 }
950
951 TGSI_FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
952 LLVMValueRef value = dst[chan_index];
953
954 if (tgsi_type_is_64bit(dtype) && (chan_index == 1 || chan_index == 3))
955 continue;
956 if (inst->Instruction.Saturate)
957 value = ac_build_clamp(&ctx->ac, value);
958
959 if (reg->Register.File == TGSI_FILE_ADDRESS) {
960 temp_ptr = ctx->addrs[reg->Register.Index][chan_index];
961 LLVMBuildStore(builder, value, temp_ptr);
962 continue;
963 }
964
965 if (!tgsi_type_is_64bit(dtype))
966 value = bitcast(bld_base, TGSI_TYPE_FLOAT, value);
967
968 if (reg->Register.Indirect) {
969 unsigned file = reg->Register.File;
970 unsigned reg_index = reg->Register.Index;
971 store_value_to_array(bld_base, value, file, chan_index,
972 reg_index, &reg->Indirect);
973 } else {
974 switch(reg->Register.File) {
975 case TGSI_FILE_OUTPUT:
976 temp_ptr = ctx->outputs[reg->Register.Index][chan_index];
977 if (tgsi_type_is_64bit(dtype))
978 temp_ptr2 = ctx->outputs[reg->Register.Index][chan_index + 1];
979 break;
980
981 case TGSI_FILE_TEMPORARY:
982 {
983 if (reg->Register.Index >= ctx->temps_count)
984 continue;
985
986 temp_ptr = ctx->temps[ TGSI_NUM_CHANNELS * reg->Register.Index + chan_index];
987 if (tgsi_type_is_64bit(dtype))
988 temp_ptr2 = ctx->temps[ TGSI_NUM_CHANNELS * reg->Register.Index + chan_index + 1];
989
990 break;
991 }
992 default:
993 return;
994 }
995 if (!tgsi_type_is_64bit(dtype))
996 LLVMBuildStore(builder, value, temp_ptr);
997 else {
998 LLVMValueRef ptr = LLVMBuildBitCast(builder, value,
999 LLVMVectorType(ctx->i32, 2), "");
1000 LLVMValueRef val2;
1001 value = LLVMBuildExtractElement(builder, ptr,
1002 ctx->i32_0, "");
1003 val2 = LLVMBuildExtractElement(builder, ptr,
1004 ctx->i32_1, "");
1005
1006 LLVMBuildStore(builder, bitcast(bld_base, TGSI_TYPE_FLOAT, value), temp_ptr);
1007 LLVMBuildStore(builder, bitcast(bld_base, TGSI_TYPE_FLOAT, val2), temp_ptr2);
1008 }
1009 }
1010 }
1011 }
1012
1013 static void set_basicblock_name(LLVMBasicBlockRef bb, const char *base, int pc)
1014 {
1015 char buf[32];
1016 /* Subtract 1 so that the number shown is that of the corresponding
1017 * opcode in the TGSI dump, e.g. an if block has the same suffix as
1018 * the instruction number of the corresponding TGSI IF.
1019 */
1020 snprintf(buf, sizeof(buf), "%s%d", base, pc - 1);
1021 LLVMSetValueName(LLVMBasicBlockAsValue(bb), buf);
1022 }
1023
1024 /* Append a basic block at the level of the parent flow.
1025 */
1026 static LLVMBasicBlockRef append_basic_block(struct si_shader_context *ctx,
1027 const char *name)
1028 {
1029 struct gallivm_state *gallivm = &ctx->gallivm;
1030
1031 assert(ctx->flow_depth >= 1);
1032
1033 if (ctx->flow_depth >= 2) {
1034 struct si_llvm_flow *flow = &ctx->flow[ctx->flow_depth - 2];
1035
1036 return LLVMInsertBasicBlockInContext(gallivm->context,
1037 flow->next_block, name);
1038 }
1039
1040 return LLVMAppendBasicBlockInContext(gallivm->context, ctx->main_fn, name);
1041 }
1042
1043 /* Emit a branch to the given default target for the current block if
1044 * applicable -- that is, if the current block does not already contain a
1045 * branch from a break or continue.
1046 */
1047 static void emit_default_branch(LLVMBuilderRef builder, LLVMBasicBlockRef target)
1048 {
1049 if (!LLVMGetBasicBlockTerminator(LLVMGetInsertBlock(builder)))
1050 LLVMBuildBr(builder, target);
1051 }
1052
1053 static void bgnloop_emit(const struct lp_build_tgsi_action *action,
1054 struct lp_build_tgsi_context *bld_base,
1055 struct lp_build_emit_data *emit_data)
1056 {
1057 struct si_shader_context *ctx = si_shader_context(bld_base);
1058 struct gallivm_state *gallivm = &ctx->gallivm;
1059 struct si_llvm_flow *flow = push_flow(ctx);
1060 flow->loop_entry_block = append_basic_block(ctx, "LOOP");
1061 flow->next_block = append_basic_block(ctx, "ENDLOOP");
1062 set_basicblock_name(flow->loop_entry_block, "loop", bld_base->pc);
1063 LLVMBuildBr(gallivm->builder, flow->loop_entry_block);
1064 LLVMPositionBuilderAtEnd(gallivm->builder, flow->loop_entry_block);
1065 }
1066
1067 static void brk_emit(const struct lp_build_tgsi_action *action,
1068 struct lp_build_tgsi_context *bld_base,
1069 struct lp_build_emit_data *emit_data)
1070 {
1071 struct si_shader_context *ctx = si_shader_context(bld_base);
1072 struct gallivm_state *gallivm = &ctx->gallivm;
1073 struct si_llvm_flow *flow = get_innermost_loop(ctx);
1074
1075 LLVMBuildBr(gallivm->builder, flow->next_block);
1076 }
1077
1078 static void cont_emit(const struct lp_build_tgsi_action *action,
1079 struct lp_build_tgsi_context *bld_base,
1080 struct lp_build_emit_data *emit_data)
1081 {
1082 struct si_shader_context *ctx = si_shader_context(bld_base);
1083 struct gallivm_state *gallivm = &ctx->gallivm;
1084 struct si_llvm_flow *flow = get_innermost_loop(ctx);
1085
1086 LLVMBuildBr(gallivm->builder, flow->loop_entry_block);
1087 }
1088
1089 static void else_emit(const struct lp_build_tgsi_action *action,
1090 struct lp_build_tgsi_context *bld_base,
1091 struct lp_build_emit_data *emit_data)
1092 {
1093 struct si_shader_context *ctx = si_shader_context(bld_base);
1094 struct gallivm_state *gallivm = &ctx->gallivm;
1095 struct si_llvm_flow *current_branch = get_current_flow(ctx);
1096 LLVMBasicBlockRef endif_block;
1097
1098 assert(!current_branch->loop_entry_block);
1099
1100 endif_block = append_basic_block(ctx, "ENDIF");
1101 emit_default_branch(gallivm->builder, endif_block);
1102
1103 LLVMPositionBuilderAtEnd(gallivm->builder, current_branch->next_block);
1104 set_basicblock_name(current_branch->next_block, "else", bld_base->pc);
1105
1106 current_branch->next_block = endif_block;
1107 }
1108
1109 static void endif_emit(const struct lp_build_tgsi_action *action,
1110 struct lp_build_tgsi_context *bld_base,
1111 struct lp_build_emit_data *emit_data)
1112 {
1113 struct si_shader_context *ctx = si_shader_context(bld_base);
1114 struct gallivm_state *gallivm = &ctx->gallivm;
1115 struct si_llvm_flow *current_branch = get_current_flow(ctx);
1116
1117 assert(!current_branch->loop_entry_block);
1118
1119 emit_default_branch(gallivm->builder, current_branch->next_block);
1120 LLVMPositionBuilderAtEnd(gallivm->builder, current_branch->next_block);
1121 set_basicblock_name(current_branch->next_block, "endif", bld_base->pc);
1122
1123 ctx->flow_depth--;
1124 }
1125
1126 static void endloop_emit(const struct lp_build_tgsi_action *action,
1127 struct lp_build_tgsi_context *bld_base,
1128 struct lp_build_emit_data *emit_data)
1129 {
1130 struct si_shader_context *ctx = si_shader_context(bld_base);
1131 struct gallivm_state *gallivm = &ctx->gallivm;
1132 struct si_llvm_flow *current_loop = get_current_flow(ctx);
1133
1134 assert(current_loop->loop_entry_block);
1135
1136 emit_default_branch(gallivm->builder, current_loop->loop_entry_block);
1137
1138 LLVMPositionBuilderAtEnd(gallivm->builder, current_loop->next_block);
1139 set_basicblock_name(current_loop->next_block, "endloop", bld_base->pc);
1140 ctx->flow_depth--;
1141 }
1142
1143 static void if_cond_emit(const struct lp_build_tgsi_action *action,
1144 struct lp_build_tgsi_context *bld_base,
1145 struct lp_build_emit_data *emit_data,
1146 LLVMValueRef cond)
1147 {
1148 struct si_shader_context *ctx = si_shader_context(bld_base);
1149 struct gallivm_state *gallivm = &ctx->gallivm;
1150 struct si_llvm_flow *flow = push_flow(ctx);
1151 LLVMBasicBlockRef if_block;
1152
1153 if_block = append_basic_block(ctx, "IF");
1154 flow->next_block = append_basic_block(ctx, "ELSE");
1155 set_basicblock_name(if_block, "if", bld_base->pc);
1156 LLVMBuildCondBr(gallivm->builder, cond, if_block, flow->next_block);
1157 LLVMPositionBuilderAtEnd(gallivm->builder, if_block);
1158 }
1159
1160 static void if_emit(const struct lp_build_tgsi_action *action,
1161 struct lp_build_tgsi_context *bld_base,
1162 struct lp_build_emit_data *emit_data)
1163 {
1164 struct gallivm_state *gallivm = bld_base->base.gallivm;
1165 LLVMValueRef cond;
1166
1167 cond = LLVMBuildFCmp(gallivm->builder, LLVMRealUNE,
1168 emit_data->args[0],
1169 bld_base->base.zero, "");
1170
1171 if_cond_emit(action, bld_base, emit_data, cond);
1172 }
1173
1174 static void uif_emit(const struct lp_build_tgsi_action *action,
1175 struct lp_build_tgsi_context *bld_base,
1176 struct lp_build_emit_data *emit_data)
1177 {
1178 struct gallivm_state *gallivm = bld_base->base.gallivm;
1179 LLVMValueRef cond;
1180
1181 cond = LLVMBuildICmp(gallivm->builder, LLVMIntNE,
1182 bitcast(bld_base, TGSI_TYPE_UNSIGNED, emit_data->args[0]),
1183 bld_base->int_bld.zero, "");
1184
1185 if_cond_emit(action, bld_base, emit_data, cond);
1186 }
1187
1188 static void emit_immediate(struct lp_build_tgsi_context *bld_base,
1189 const struct tgsi_full_immediate *imm)
1190 {
1191 unsigned i;
1192 struct si_shader_context *ctx = si_shader_context(bld_base);
1193
1194 for (i = 0; i < 4; ++i) {
1195 ctx->imms[ctx->imms_num * TGSI_NUM_CHANNELS + i] =
1196 LLVMConstInt(ctx->i32, imm->u[i].Uint, false );
1197 }
1198
1199 ctx->imms_num++;
1200 }
1201
1202 void si_llvm_context_init(struct si_shader_context *ctx,
1203 struct si_screen *sscreen,
1204 LLVMTargetMachineRef tm)
1205 {
1206 struct lp_type type;
1207
1208 /* Initialize the gallivm object:
1209 * We are only using the module, context, and builder fields of this struct.
1210 * This should be enough for us to be able to pass our gallivm struct to the
1211 * helper functions in the gallivm module.
1212 */
1213 memset(ctx, 0, sizeof(*ctx));
1214 ctx->screen = sscreen;
1215 ctx->tm = tm;
1216
1217 ctx->gallivm.context = LLVMContextCreate();
1218 ctx->gallivm.module = LLVMModuleCreateWithNameInContext("tgsi",
1219 ctx->gallivm.context);
1220 LLVMSetTarget(ctx->gallivm.module, "amdgcn--");
1221
1222 LLVMTargetDataRef data_layout = LLVMCreateTargetDataLayout(tm);
1223 char *data_layout_str = LLVMCopyStringRepOfTargetData(data_layout);
1224 LLVMSetDataLayout(ctx->gallivm.module, data_layout_str);
1225 LLVMDisposeTargetData(data_layout);
1226 LLVMDisposeMessage(data_layout_str);
1227
1228 bool unsafe_fpmath = (sscreen->b.debug_flags & DBG_UNSAFE_MATH) != 0;
1229 enum lp_float_mode float_mode =
1230 unsafe_fpmath ? LP_FLOAT_MODE_UNSAFE_FP_MATH :
1231 LP_FLOAT_MODE_NO_SIGNED_ZEROS_FP_MATH;
1232
1233 ctx->gallivm.builder = lp_create_builder(ctx->gallivm.context,
1234 float_mode);
1235
1236 ac_llvm_context_init(&ctx->ac, ctx->gallivm.context);
1237 ctx->ac.module = ctx->gallivm.module;
1238 ctx->ac.builder = ctx->gallivm.builder;
1239
1240 struct lp_build_tgsi_context *bld_base = &ctx->bld_base;
1241
1242 type.floating = true;
1243 type.fixed = false;
1244 type.sign = true;
1245 type.norm = false;
1246 type.width = 32;
1247 type.length = 1;
1248
1249 lp_build_context_init(&bld_base->base, &ctx->gallivm, type);
1250 lp_build_context_init(&ctx->bld_base.uint_bld, &ctx->gallivm, lp_uint_type(type));
1251 lp_build_context_init(&ctx->bld_base.int_bld, &ctx->gallivm, lp_int_type(type));
1252 type.width *= 2;
1253 lp_build_context_init(&ctx->bld_base.dbl_bld, &ctx->gallivm, type);
1254 lp_build_context_init(&ctx->bld_base.uint64_bld, &ctx->gallivm, lp_uint_type(type));
1255 lp_build_context_init(&ctx->bld_base.int64_bld, &ctx->gallivm, lp_int_type(type));
1256
1257 bld_base->soa = 1;
1258 bld_base->emit_swizzle = emit_swizzle;
1259 bld_base->emit_declaration = emit_declaration;
1260 bld_base->emit_immediate = emit_immediate;
1261
1262 /* metadata allowing 2.5 ULP */
1263 ctx->fpmath_md_kind = LLVMGetMDKindIDInContext(ctx->gallivm.context,
1264 "fpmath", 6);
1265 LLVMValueRef arg = lp_build_const_float(&ctx->gallivm, 2.5);
1266 ctx->fpmath_md_2p5_ulp = LLVMMDNodeInContext(ctx->gallivm.context,
1267 &arg, 1);
1268
1269 bld_base->op_actions[TGSI_OPCODE_BGNLOOP].emit = bgnloop_emit;
1270 bld_base->op_actions[TGSI_OPCODE_BRK].emit = brk_emit;
1271 bld_base->op_actions[TGSI_OPCODE_CONT].emit = cont_emit;
1272 bld_base->op_actions[TGSI_OPCODE_IF].emit = if_emit;
1273 bld_base->op_actions[TGSI_OPCODE_UIF].emit = uif_emit;
1274 bld_base->op_actions[TGSI_OPCODE_ELSE].emit = else_emit;
1275 bld_base->op_actions[TGSI_OPCODE_ENDIF].emit = endif_emit;
1276 bld_base->op_actions[TGSI_OPCODE_ENDLOOP].emit = endloop_emit;
1277
1278 si_shader_context_init_alu(&ctx->bld_base);
1279
1280 ctx->voidt = LLVMVoidTypeInContext(ctx->gallivm.context);
1281 ctx->i1 = LLVMInt1TypeInContext(ctx->gallivm.context);
1282 ctx->i8 = LLVMInt8TypeInContext(ctx->gallivm.context);
1283 ctx->i32 = LLVMInt32TypeInContext(ctx->gallivm.context);
1284 ctx->i64 = LLVMInt64TypeInContext(ctx->gallivm.context);
1285 ctx->i128 = LLVMIntTypeInContext(ctx->gallivm.context, 128);
1286 ctx->f32 = LLVMFloatTypeInContext(ctx->gallivm.context);
1287 ctx->v2i32 = LLVMVectorType(ctx->i32, 2);
1288 ctx->v4i32 = LLVMVectorType(ctx->i32, 4);
1289 ctx->v4f32 = LLVMVectorType(ctx->f32, 4);
1290 ctx->v8i32 = LLVMVectorType(ctx->i32, 8);
1291
1292 ctx->i32_0 = LLVMConstInt(ctx->i32, 0, 0);
1293 ctx->i32_1 = LLVMConstInt(ctx->i32, 1, 0);
1294 }
1295
1296 /* Set the context to a certain TGSI shader. Can be called repeatedly
1297 * to change the shader. */
1298 void si_llvm_context_set_tgsi(struct si_shader_context *ctx,
1299 struct si_shader *shader)
1300 {
1301 const struct tgsi_shader_info *info = NULL;
1302 const struct tgsi_token *tokens = NULL;
1303
1304 if (shader && shader->selector) {
1305 info = &shader->selector->info;
1306 tokens = shader->selector->tokens;
1307 }
1308
1309 ctx->shader = shader;
1310 ctx->type = info ? info->processor : -1;
1311 ctx->bld_base.info = info;
1312
1313 /* Clean up the old contents. */
1314 FREE(ctx->temp_arrays);
1315 ctx->temp_arrays = NULL;
1316 FREE(ctx->temp_array_allocas);
1317 ctx->temp_array_allocas = NULL;
1318
1319 FREE(ctx->imms);
1320 ctx->imms = NULL;
1321 ctx->imms_num = 0;
1322
1323 FREE(ctx->temps);
1324 ctx->temps = NULL;
1325 ctx->temps_count = 0;
1326
1327 if (!info || !tokens)
1328 return;
1329
1330 if (info->array_max[TGSI_FILE_TEMPORARY] > 0) {
1331 int size = info->array_max[TGSI_FILE_TEMPORARY];
1332
1333 ctx->temp_arrays = CALLOC(size, sizeof(ctx->temp_arrays[0]));
1334 ctx->temp_array_allocas = CALLOC(size, sizeof(ctx->temp_array_allocas[0]));
1335
1336 tgsi_scan_arrays(tokens, TGSI_FILE_TEMPORARY, size,
1337 ctx->temp_arrays);
1338 }
1339 if (info->file_max[TGSI_FILE_IMMEDIATE] >= 0) {
1340 int size = info->file_max[TGSI_FILE_IMMEDIATE] + 1;
1341 ctx->imms = MALLOC(size * TGSI_NUM_CHANNELS * sizeof(LLVMValueRef));
1342 }
1343
1344 /* Re-set these to start with a clean slate. */
1345 ctx->bld_base.num_instructions = 0;
1346 ctx->bld_base.pc = 0;
1347 memset(ctx->outputs, 0, sizeof(ctx->outputs));
1348
1349 ctx->bld_base.emit_store = si_llvm_emit_store;
1350 ctx->bld_base.emit_fetch_funcs[TGSI_FILE_IMMEDIATE] = si_llvm_emit_fetch;
1351 ctx->bld_base.emit_fetch_funcs[TGSI_FILE_INPUT] = si_llvm_emit_fetch;
1352 ctx->bld_base.emit_fetch_funcs[TGSI_FILE_TEMPORARY] = si_llvm_emit_fetch;
1353 ctx->bld_base.emit_fetch_funcs[TGSI_FILE_OUTPUT] = si_llvm_emit_fetch;
1354 ctx->bld_base.emit_fetch_funcs[TGSI_FILE_SYSTEM_VALUE] = fetch_system_value;
1355 }
1356
1357 void si_llvm_create_func(struct si_shader_context *ctx,
1358 const char *name,
1359 LLVMTypeRef *return_types, unsigned num_return_elems,
1360 LLVMTypeRef *ParamTypes, unsigned ParamCount)
1361 {
1362 LLVMTypeRef main_fn_type, ret_type;
1363 LLVMBasicBlockRef main_fn_body;
1364 enum si_llvm_calling_convention call_conv;
1365
1366 if (num_return_elems)
1367 ret_type = LLVMStructTypeInContext(ctx->gallivm.context,
1368 return_types,
1369 num_return_elems, true);
1370 else
1371 ret_type = LLVMVoidTypeInContext(ctx->gallivm.context);
1372
1373 /* Setup the function */
1374 ctx->return_type = ret_type;
1375 main_fn_type = LLVMFunctionType(ret_type, ParamTypes, ParamCount, 0);
1376 ctx->main_fn = LLVMAddFunction(ctx->gallivm.module, name, main_fn_type);
1377 main_fn_body = LLVMAppendBasicBlockInContext(ctx->gallivm.context,
1378 ctx->main_fn, "main_body");
1379 LLVMPositionBuilderAtEnd(ctx->gallivm.builder, main_fn_body);
1380
1381 switch (ctx->type) {
1382 case PIPE_SHADER_VERTEX:
1383 case PIPE_SHADER_TESS_CTRL:
1384 case PIPE_SHADER_TESS_EVAL:
1385 call_conv = RADEON_LLVM_AMDGPU_VS;
1386 break;
1387 case PIPE_SHADER_GEOMETRY:
1388 call_conv = RADEON_LLVM_AMDGPU_GS;
1389 break;
1390 case PIPE_SHADER_FRAGMENT:
1391 call_conv = RADEON_LLVM_AMDGPU_PS;
1392 break;
1393 case PIPE_SHADER_COMPUTE:
1394 call_conv = RADEON_LLVM_AMDGPU_CS;
1395 break;
1396 default:
1397 unreachable("Unhandle shader type");
1398 }
1399
1400 LLVMSetFunctionCallConv(ctx->main_fn, call_conv);
1401 }
1402
1403 void si_llvm_optimize_module(struct si_shader_context *ctx)
1404 {
1405 struct gallivm_state *gallivm = &ctx->gallivm;
1406 const char *triple = LLVMGetTarget(gallivm->module);
1407 LLVMTargetLibraryInfoRef target_library_info;
1408
1409 /* Dump LLVM IR before any optimization passes */
1410 if (ctx->screen->b.debug_flags & DBG_PREOPT_IR &&
1411 r600_can_dump_shader(&ctx->screen->b, ctx->type))
1412 LLVMDumpModule(ctx->gallivm.module);
1413
1414 /* Create the pass manager */
1415 gallivm->passmgr = LLVMCreatePassManager();
1416
1417 target_library_info = gallivm_create_target_library_info(triple);
1418 LLVMAddTargetLibraryInfo(target_library_info, gallivm->passmgr);
1419
1420 if (r600_extra_shader_checks(&ctx->screen->b, ctx->type))
1421 LLVMAddVerifierPass(gallivm->passmgr);
1422
1423 LLVMAddAlwaysInlinerPass(gallivm->passmgr);
1424
1425 /* This pass should eliminate all the load and store instructions */
1426 LLVMAddPromoteMemoryToRegisterPass(gallivm->passmgr);
1427
1428 /* Add some optimization passes */
1429 LLVMAddScalarReplAggregatesPass(gallivm->passmgr);
1430 LLVMAddLICMPass(gallivm->passmgr);
1431 LLVMAddAggressiveDCEPass(gallivm->passmgr);
1432 LLVMAddCFGSimplificationPass(gallivm->passmgr);
1433 LLVMAddInstructionCombiningPass(gallivm->passmgr);
1434
1435 /* Run the pass */
1436 LLVMRunPassManager(gallivm->passmgr, ctx->gallivm.module);
1437
1438 LLVMDisposeBuilder(gallivm->builder);
1439 LLVMDisposePassManager(gallivm->passmgr);
1440 gallivm_dispose_target_library_info(target_library_info);
1441 }
1442
1443 void si_llvm_dispose(struct si_shader_context *ctx)
1444 {
1445 LLVMDisposeModule(ctx->gallivm.module);
1446 LLVMContextDispose(ctx->gallivm.context);
1447 FREE(ctx->temp_arrays);
1448 ctx->temp_arrays = NULL;
1449 FREE(ctx->temp_array_allocas);
1450 ctx->temp_array_allocas = NULL;
1451 FREE(ctx->temps);
1452 ctx->temps = NULL;
1453 ctx->temps_count = 0;
1454 FREE(ctx->imms);
1455 ctx->imms = NULL;
1456 ctx->imms_num = 0;
1457 FREE(ctx->flow);
1458 ctx->flow = NULL;
1459 ctx->flow_depth_max = 0;
1460 }