2b0d6001fd5c24ff13755b2bb170c002cea5682a
[mesa.git] / src / gallium / drivers / radeonsi / si_shader_tgsi_setup.c
1 /*
2 * Copyright 2016 Advanced Micro Devices, Inc.
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * on the rights to use, copy, modify, merge, publish, distribute, sub
8 * license, and/or sell copies of the Software, and to permit persons to whom
9 * the Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
19 * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
20 * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
21 * USE OR OTHER DEALINGS IN THE SOFTWARE.
22 */
23
24 #include "si_shader_internal.h"
25 #include "si_pipe.h"
26
27 #include "gallivm/lp_bld_const.h"
28 #include "gallivm/lp_bld_gather.h"
29 #include "gallivm/lp_bld_flow.h"
30 #include "gallivm/lp_bld_init.h"
31 #include "gallivm/lp_bld_intr.h"
32 #include "gallivm/lp_bld_misc.h"
33 #include "gallivm/lp_bld_swizzle.h"
34 #include "tgsi/tgsi_info.h"
35 #include "tgsi/tgsi_parse.h"
36 #include "util/u_math.h"
37 #include "util/u_memory.h"
38 #include "util/u_debug.h"
39
40 #include <stdio.h>
41 #include <llvm-c/Transforms/IPO.h>
42 #include <llvm-c/Transforms/Scalar.h>
43 #include <llvm-c/Support.h>
44
45 /* Data for if/else/endif and bgnloop/endloop control flow structures.
46 */
47 struct si_llvm_flow {
48 /* Loop exit or next part of if/else/endif. */
49 LLVMBasicBlockRef next_block;
50 LLVMBasicBlockRef loop_entry_block;
51 };
52
53 enum si_llvm_calling_convention {
54 RADEON_LLVM_AMDGPU_VS = 87,
55 RADEON_LLVM_AMDGPU_GS = 88,
56 RADEON_LLVM_AMDGPU_PS = 89,
57 RADEON_LLVM_AMDGPU_CS = 90,
58 };
59
60 void si_llvm_add_attribute(LLVMValueRef F, const char *name, int value)
61 {
62 char str[16];
63
64 snprintf(str, sizeof(str), "%i", value);
65 LLVMAddTargetDependentFunctionAttr(F, name, str);
66 }
67
68 /**
69 * Set the shader type we want to compile
70 *
71 * @param type shader type to set
72 */
73 void si_llvm_shader_type(LLVMValueRef F, unsigned type)
74 {
75 enum si_llvm_calling_convention calling_conv;
76
77 switch (type) {
78 case PIPE_SHADER_VERTEX:
79 case PIPE_SHADER_TESS_CTRL:
80 case PIPE_SHADER_TESS_EVAL:
81 calling_conv = RADEON_LLVM_AMDGPU_VS;
82 break;
83 case PIPE_SHADER_GEOMETRY:
84 calling_conv = RADEON_LLVM_AMDGPU_GS;
85 break;
86 case PIPE_SHADER_FRAGMENT:
87 calling_conv = RADEON_LLVM_AMDGPU_PS;
88 break;
89 case PIPE_SHADER_COMPUTE:
90 calling_conv = RADEON_LLVM_AMDGPU_CS;
91 break;
92 default:
93 unreachable("Unhandle shader type");
94 }
95
96 LLVMSetFunctionCallConv(F, calling_conv);
97 }
98
99 static void init_amdgpu_target()
100 {
101 gallivm_init_llvm_targets();
102 LLVMInitializeAMDGPUTargetInfo();
103 LLVMInitializeAMDGPUTarget();
104 LLVMInitializeAMDGPUTargetMC();
105 LLVMInitializeAMDGPUAsmPrinter();
106
107 /* For inline assembly. */
108 LLVMInitializeAMDGPUAsmParser();
109
110 if (HAVE_LLVM >= 0x0400) {
111 /*
112 * Workaround for bug in llvm 4.0 that causes image intrinsics
113 * to disappear.
114 * https://reviews.llvm.org/D26348
115 */
116 const char *argv[2] = {"mesa", "-simplifycfg-sink-common=false"};
117 LLVMParseCommandLineOptions(2, argv, NULL);
118 }
119 }
120
121 static once_flag init_amdgpu_target_once_flag = ONCE_FLAG_INIT;
122
123 LLVMTargetRef si_llvm_get_amdgpu_target(const char *triple)
124 {
125 LLVMTargetRef target = NULL;
126 char *err_message = NULL;
127
128 call_once(&init_amdgpu_target_once_flag, init_amdgpu_target);
129
130 if (LLVMGetTargetFromTriple(triple, &target, &err_message)) {
131 fprintf(stderr, "Cannot find target for triple %s ", triple);
132 if (err_message) {
133 fprintf(stderr, "%s\n", err_message);
134 }
135 LLVMDisposeMessage(err_message);
136 return NULL;
137 }
138 return target;
139 }
140
141 struct si_llvm_diagnostics {
142 struct pipe_debug_callback *debug;
143 unsigned retval;
144 };
145
146 static void si_diagnostic_handler(LLVMDiagnosticInfoRef di, void *context)
147 {
148 struct si_llvm_diagnostics *diag = (struct si_llvm_diagnostics *)context;
149 LLVMDiagnosticSeverity severity = LLVMGetDiagInfoSeverity(di);
150 char *description = LLVMGetDiagInfoDescription(di);
151 const char *severity_str = NULL;
152
153 switch (severity) {
154 case LLVMDSError:
155 severity_str = "error";
156 break;
157 case LLVMDSWarning:
158 severity_str = "warning";
159 break;
160 case LLVMDSRemark:
161 severity_str = "remark";
162 break;
163 case LLVMDSNote:
164 severity_str = "note";
165 break;
166 default:
167 severity_str = "unknown";
168 }
169
170 pipe_debug_message(diag->debug, SHADER_INFO,
171 "LLVM diagnostic (%s): %s", severity_str, description);
172
173 if (severity == LLVMDSError) {
174 diag->retval = 1;
175 fprintf(stderr,"LLVM triggered Diagnostic Handler: %s\n", description);
176 }
177
178 LLVMDisposeMessage(description);
179 }
180
181 /**
182 * Compile an LLVM module to machine code.
183 *
184 * @returns 0 for success, 1 for failure
185 */
186 unsigned si_llvm_compile(LLVMModuleRef M, struct ac_shader_binary *binary,
187 LLVMTargetMachineRef tm,
188 struct pipe_debug_callback *debug)
189 {
190 struct si_llvm_diagnostics diag;
191 char *err;
192 LLVMContextRef llvm_ctx;
193 LLVMMemoryBufferRef out_buffer;
194 unsigned buffer_size;
195 const char *buffer_data;
196 LLVMBool mem_err;
197
198 diag.debug = debug;
199 diag.retval = 0;
200
201 /* Setup Diagnostic Handler*/
202 llvm_ctx = LLVMGetModuleContext(M);
203
204 LLVMContextSetDiagnosticHandler(llvm_ctx, si_diagnostic_handler, &diag);
205
206 /* Compile IR*/
207 mem_err = LLVMTargetMachineEmitToMemoryBuffer(tm, M, LLVMObjectFile, &err,
208 &out_buffer);
209
210 /* Process Errors/Warnings */
211 if (mem_err) {
212 fprintf(stderr, "%s: %s", __FUNCTION__, err);
213 pipe_debug_message(debug, SHADER_INFO,
214 "LLVM emit error: %s", err);
215 FREE(err);
216 diag.retval = 1;
217 goto out;
218 }
219
220 /* Extract Shader Code*/
221 buffer_size = LLVMGetBufferSize(out_buffer);
222 buffer_data = LLVMGetBufferStart(out_buffer);
223
224 ac_elf_read(buffer_data, buffer_size, binary);
225
226 /* Clean up */
227 LLVMDisposeMemoryBuffer(out_buffer);
228
229 out:
230 if (diag.retval != 0)
231 pipe_debug_message(debug, SHADER_INFO, "LLVM compile failed");
232 return diag.retval;
233 }
234
235 LLVMTypeRef tgsi2llvmtype(struct lp_build_tgsi_context *bld_base,
236 enum tgsi_opcode_type type)
237 {
238 LLVMContextRef ctx = bld_base->base.gallivm->context;
239
240 switch (type) {
241 case TGSI_TYPE_UNSIGNED:
242 case TGSI_TYPE_SIGNED:
243 return LLVMInt32TypeInContext(ctx);
244 case TGSI_TYPE_UNSIGNED64:
245 case TGSI_TYPE_SIGNED64:
246 return LLVMInt64TypeInContext(ctx);
247 case TGSI_TYPE_DOUBLE:
248 return LLVMDoubleTypeInContext(ctx);
249 case TGSI_TYPE_UNTYPED:
250 case TGSI_TYPE_FLOAT:
251 return LLVMFloatTypeInContext(ctx);
252 default: break;
253 }
254 return 0;
255 }
256
257 LLVMValueRef bitcast(struct lp_build_tgsi_context *bld_base,
258 enum tgsi_opcode_type type, LLVMValueRef value)
259 {
260 LLVMBuilderRef builder = bld_base->base.gallivm->builder;
261 LLVMTypeRef dst_type = tgsi2llvmtype(bld_base, type);
262
263 if (dst_type)
264 return LLVMBuildBitCast(builder, value, dst_type, "");
265 else
266 return value;
267 }
268
269 /**
270 * Return a value that is equal to the given i32 \p index if it lies in [0,num)
271 * or an undefined value in the same interval otherwise.
272 */
273 LLVMValueRef si_llvm_bound_index(struct si_shader_context *ctx,
274 LLVMValueRef index,
275 unsigned num)
276 {
277 struct gallivm_state *gallivm = &ctx->gallivm;
278 LLVMBuilderRef builder = gallivm->builder;
279 LLVMValueRef c_max = LLVMConstInt(ctx->i32, num - 1, 0);
280 LLVMValueRef cc;
281
282 if (util_is_power_of_two(num)) {
283 index = LLVMBuildAnd(builder, index, c_max, "");
284 } else {
285 /* In theory, this MAX pattern should result in code that is
286 * as good as the bit-wise AND above.
287 *
288 * In practice, LLVM generates worse code (at the time of
289 * writing), because its value tracking is not strong enough.
290 */
291 cc = LLVMBuildICmp(builder, LLVMIntULE, index, c_max, "");
292 index = LLVMBuildSelect(builder, cc, index, c_max, "");
293 }
294
295 return index;
296 }
297
298 static struct si_llvm_flow *
299 get_current_flow(struct si_shader_context *ctx)
300 {
301 if (ctx->flow_depth > 0)
302 return &ctx->flow[ctx->flow_depth - 1];
303 return NULL;
304 }
305
306 static struct si_llvm_flow *
307 get_innermost_loop(struct si_shader_context *ctx)
308 {
309 for (unsigned i = ctx->flow_depth; i > 0; --i) {
310 if (ctx->flow[i - 1].loop_entry_block)
311 return &ctx->flow[i - 1];
312 }
313 return NULL;
314 }
315
316 static struct si_llvm_flow *
317 push_flow(struct si_shader_context *ctx)
318 {
319 struct si_llvm_flow *flow;
320
321 if (ctx->flow_depth >= ctx->flow_depth_max) {
322 unsigned new_max = MAX2(ctx->flow_depth << 1, RADEON_LLVM_INITIAL_CF_DEPTH);
323 ctx->flow = REALLOC(ctx->flow,
324 ctx->flow_depth_max * sizeof(*ctx->flow),
325 new_max * sizeof(*ctx->flow));
326 ctx->flow_depth_max = new_max;
327 }
328
329 flow = &ctx->flow[ctx->flow_depth];
330 ctx->flow_depth++;
331
332 flow->next_block = NULL;
333 flow->loop_entry_block = NULL;
334 return flow;
335 }
336
337 static LLVMValueRef emit_swizzle(struct lp_build_tgsi_context *bld_base,
338 LLVMValueRef value,
339 unsigned swizzle_x,
340 unsigned swizzle_y,
341 unsigned swizzle_z,
342 unsigned swizzle_w)
343 {
344 LLVMValueRef swizzles[4];
345 LLVMTypeRef i32t =
346 LLVMInt32TypeInContext(bld_base->base.gallivm->context);
347
348 swizzles[0] = LLVMConstInt(i32t, swizzle_x, 0);
349 swizzles[1] = LLVMConstInt(i32t, swizzle_y, 0);
350 swizzles[2] = LLVMConstInt(i32t, swizzle_z, 0);
351 swizzles[3] = LLVMConstInt(i32t, swizzle_w, 0);
352
353 return LLVMBuildShuffleVector(bld_base->base.gallivm->builder,
354 value,
355 LLVMGetUndef(LLVMTypeOf(value)),
356 LLVMConstVector(swizzles, 4), "");
357 }
358
359 /**
360 * Return the description of the array covering the given temporary register
361 * index.
362 */
363 static unsigned
364 get_temp_array_id(struct lp_build_tgsi_context *bld_base,
365 unsigned reg_index,
366 const struct tgsi_ind_register *reg)
367 {
368 struct si_shader_context *ctx = si_shader_context(bld_base);
369 unsigned num_arrays = ctx->bld_base.info->array_max[TGSI_FILE_TEMPORARY];
370 unsigned i;
371
372 if (reg && reg->ArrayID > 0 && reg->ArrayID <= num_arrays)
373 return reg->ArrayID;
374
375 for (i = 0; i < num_arrays; i++) {
376 const struct tgsi_array_info *array = &ctx->temp_arrays[i];
377
378 if (reg_index >= array->range.First && reg_index <= array->range.Last)
379 return i + 1;
380 }
381
382 return 0;
383 }
384
385 static struct tgsi_declaration_range
386 get_array_range(struct lp_build_tgsi_context *bld_base,
387 unsigned File, unsigned reg_index,
388 const struct tgsi_ind_register *reg)
389 {
390 struct si_shader_context *ctx = si_shader_context(bld_base);
391 struct tgsi_declaration_range range;
392
393 if (File == TGSI_FILE_TEMPORARY) {
394 unsigned array_id = get_temp_array_id(bld_base, reg_index, reg);
395 if (array_id)
396 return ctx->temp_arrays[array_id - 1].range;
397 }
398
399 range.First = 0;
400 range.Last = bld_base->info->file_max[File];
401 return range;
402 }
403
404 static LLVMValueRef
405 emit_array_index(struct si_shader_context *ctx,
406 const struct tgsi_ind_register *reg,
407 unsigned offset)
408 {
409 struct gallivm_state *gallivm = &ctx->gallivm;
410
411 if (!reg) {
412 return LLVMConstInt(ctx->i32, offset, 0);
413 }
414 LLVMValueRef addr = LLVMBuildLoad(gallivm->builder, ctx->addrs[reg->Index][reg->Swizzle], "");
415 return LLVMBuildAdd(gallivm->builder, addr, LLVMConstInt(ctx->i32, offset, 0), "");
416 }
417
418 /**
419 * For indirect registers, construct a pointer directly to the requested
420 * element using getelementptr if possible.
421 *
422 * Returns NULL if the insertelement/extractelement fallback for array access
423 * must be used.
424 */
425 static LLVMValueRef
426 get_pointer_into_array(struct si_shader_context *ctx,
427 unsigned file,
428 unsigned swizzle,
429 unsigned reg_index,
430 const struct tgsi_ind_register *reg_indirect)
431 {
432 unsigned array_id;
433 struct tgsi_array_info *array;
434 struct gallivm_state *gallivm = &ctx->gallivm;
435 LLVMBuilderRef builder = gallivm->builder;
436 LLVMValueRef idxs[2];
437 LLVMValueRef index;
438 LLVMValueRef alloca;
439
440 if (file != TGSI_FILE_TEMPORARY)
441 return NULL;
442
443 array_id = get_temp_array_id(&ctx->bld_base, reg_index, reg_indirect);
444 if (!array_id)
445 return NULL;
446
447 alloca = ctx->temp_array_allocas[array_id - 1];
448 if (!alloca)
449 return NULL;
450
451 array = &ctx->temp_arrays[array_id - 1];
452
453 if (!(array->writemask & (1 << swizzle)))
454 return ctx->undef_alloca;
455
456 index = emit_array_index(ctx, reg_indirect,
457 reg_index - ctx->temp_arrays[array_id - 1].range.First);
458
459 /* Ensure that the index is within a valid range, to guard against
460 * VM faults and overwriting critical data (e.g. spilled resource
461 * descriptors).
462 *
463 * TODO It should be possible to avoid the additional instructions
464 * if LLVM is changed so that it guarantuees:
465 * 1. the scratch space descriptor isolates the current wave (this
466 * could even save the scratch offset SGPR at the cost of an
467 * additional SALU instruction)
468 * 2. the memory for allocas must be allocated at the _end_ of the
469 * scratch space (after spilled registers)
470 */
471 index = si_llvm_bound_index(ctx, index, array->range.Last - array->range.First + 1);
472
473 index = LLVMBuildMul(
474 builder, index,
475 LLVMConstInt(ctx->i32, util_bitcount(array->writemask), 0),
476 "");
477 index = LLVMBuildAdd(
478 builder, index,
479 LLVMConstInt(ctx->i32,
480 util_bitcount(array->writemask & ((1 << swizzle) - 1)), 0),
481 "");
482 idxs[0] = ctx->i32_0;
483 idxs[1] = index;
484 return LLVMBuildGEP(builder, alloca, idxs, 2, "");
485 }
486
487 LLVMValueRef
488 si_llvm_emit_fetch_64bit(struct lp_build_tgsi_context *bld_base,
489 enum tgsi_opcode_type type,
490 LLVMValueRef ptr,
491 LLVMValueRef ptr2)
492 {
493 LLVMBuilderRef builder = bld_base->base.gallivm->builder;
494 LLVMValueRef result;
495
496 result = LLVMGetUndef(LLVMVectorType(LLVMIntTypeInContext(bld_base->base.gallivm->context, 32), bld_base->base.type.length * 2));
497
498 result = LLVMBuildInsertElement(builder,
499 result,
500 bitcast(bld_base, TGSI_TYPE_UNSIGNED, ptr),
501 bld_base->int_bld.zero, "");
502 result = LLVMBuildInsertElement(builder,
503 result,
504 bitcast(bld_base, TGSI_TYPE_UNSIGNED, ptr2),
505 bld_base->int_bld.one, "");
506 return bitcast(bld_base, type, result);
507 }
508
509 static LLVMValueRef
510 emit_array_fetch(struct lp_build_tgsi_context *bld_base,
511 unsigned File, enum tgsi_opcode_type type,
512 struct tgsi_declaration_range range,
513 unsigned swizzle)
514 {
515 struct si_shader_context *ctx = si_shader_context(bld_base);
516
517 LLVMBuilderRef builder = ctx->gallivm.builder;
518
519 unsigned i, size = range.Last - range.First + 1;
520 LLVMTypeRef vec = LLVMVectorType(tgsi2llvmtype(bld_base, type), size);
521 LLVMValueRef result = LLVMGetUndef(vec);
522
523 struct tgsi_full_src_register tmp_reg = {};
524 tmp_reg.Register.File = File;
525
526 for (i = 0; i < size; ++i) {
527 tmp_reg.Register.Index = i + range.First;
528 LLVMValueRef temp = si_llvm_emit_fetch(bld_base, &tmp_reg, type, swizzle);
529 result = LLVMBuildInsertElement(builder, result, temp,
530 LLVMConstInt(ctx->i32, i, 0), "array_vector");
531 }
532 return result;
533 }
534
535 static LLVMValueRef
536 load_value_from_array(struct lp_build_tgsi_context *bld_base,
537 unsigned file,
538 enum tgsi_opcode_type type,
539 unsigned swizzle,
540 unsigned reg_index,
541 const struct tgsi_ind_register *reg_indirect)
542 {
543 struct si_shader_context *ctx = si_shader_context(bld_base);
544 struct gallivm_state *gallivm = &ctx->gallivm;
545 LLVMBuilderRef builder = gallivm->builder;
546 LLVMValueRef ptr;
547
548 ptr = get_pointer_into_array(ctx, file, swizzle, reg_index, reg_indirect);
549 if (ptr) {
550 LLVMValueRef val = LLVMBuildLoad(builder, ptr, "");
551 if (tgsi_type_is_64bit(type)) {
552 LLVMValueRef ptr_hi, val_hi;
553 ptr_hi = LLVMBuildGEP(builder, ptr, &ctx->i32_1, 1, "");
554 val_hi = LLVMBuildLoad(builder, ptr_hi, "");
555 val = si_llvm_emit_fetch_64bit(bld_base, type, val, val_hi);
556 }
557
558 return val;
559 } else {
560 struct tgsi_declaration_range range =
561 get_array_range(bld_base, file, reg_index, reg_indirect);
562 LLVMValueRef index =
563 emit_array_index(ctx, reg_indirect, reg_index - range.First);
564 LLVMValueRef array =
565 emit_array_fetch(bld_base, file, type, range, swizzle);
566 return LLVMBuildExtractElement(builder, array, index, "");
567 }
568 }
569
570 static void
571 store_value_to_array(struct lp_build_tgsi_context *bld_base,
572 LLVMValueRef value,
573 unsigned file,
574 unsigned chan_index,
575 unsigned reg_index,
576 const struct tgsi_ind_register *reg_indirect)
577 {
578 struct si_shader_context *ctx = si_shader_context(bld_base);
579 struct gallivm_state *gallivm = &ctx->gallivm;
580 LLVMBuilderRef builder = gallivm->builder;
581 LLVMValueRef ptr;
582
583 ptr = get_pointer_into_array(ctx, file, chan_index, reg_index, reg_indirect);
584 if (ptr) {
585 LLVMBuildStore(builder, value, ptr);
586 } else {
587 unsigned i, size;
588 struct tgsi_declaration_range range = get_array_range(bld_base, file, reg_index, reg_indirect);
589 LLVMValueRef index = emit_array_index(ctx, reg_indirect, reg_index - range.First);
590 LLVMValueRef array =
591 emit_array_fetch(bld_base, file, TGSI_TYPE_FLOAT, range, chan_index);
592 LLVMValueRef temp_ptr;
593
594 array = LLVMBuildInsertElement(builder, array, value, index, "");
595
596 size = range.Last - range.First + 1;
597 for (i = 0; i < size; ++i) {
598 switch(file) {
599 case TGSI_FILE_OUTPUT:
600 temp_ptr = ctx->outputs[i + range.First][chan_index];
601 break;
602
603 case TGSI_FILE_TEMPORARY:
604 if (range.First + i >= ctx->temps_count)
605 continue;
606 temp_ptr = ctx->temps[(i + range.First) * TGSI_NUM_CHANNELS + chan_index];
607 break;
608
609 default:
610 continue;
611 }
612 value = LLVMBuildExtractElement(builder, array,
613 LLVMConstInt(ctx->i32, i, 0), "");
614 LLVMBuildStore(builder, value, temp_ptr);
615 }
616 }
617 }
618
619 /* If this is true, preload FS inputs at the beginning of shaders. Otherwise,
620 * reload them at each use. This must be true if the shader is using
621 * derivatives and KILL, because KILL can leave the WQM and then a lazy
622 * input load isn't in the WQM anymore.
623 */
624 static bool si_preload_fs_inputs(struct si_shader_context *ctx)
625 {
626 struct si_shader_selector *sel = ctx->shader->selector;
627
628 return sel->info.uses_derivatives &&
629 sel->info.uses_kill;
630 }
631
632 static LLVMValueRef
633 get_output_ptr(struct lp_build_tgsi_context *bld_base, unsigned index,
634 unsigned chan)
635 {
636 struct si_shader_context *ctx = si_shader_context(bld_base);
637
638 assert(index <= ctx->bld_base.info->file_max[TGSI_FILE_OUTPUT]);
639 return ctx->outputs[index][chan];
640 }
641
642 LLVMValueRef si_llvm_emit_fetch(struct lp_build_tgsi_context *bld_base,
643 const struct tgsi_full_src_register *reg,
644 enum tgsi_opcode_type type,
645 unsigned swizzle)
646 {
647 struct si_shader_context *ctx = si_shader_context(bld_base);
648 LLVMBuilderRef builder = ctx->gallivm.builder;
649 LLVMValueRef result = NULL, ptr, ptr2;
650
651 if (swizzle == ~0) {
652 LLVMValueRef values[TGSI_NUM_CHANNELS];
653 unsigned chan;
654 for (chan = 0; chan < TGSI_NUM_CHANNELS; chan++) {
655 values[chan] = si_llvm_emit_fetch(bld_base, reg, type, chan);
656 }
657 return lp_build_gather_values(&ctx->gallivm, values,
658 TGSI_NUM_CHANNELS);
659 }
660
661 if (reg->Register.Indirect) {
662 LLVMValueRef load = load_value_from_array(bld_base, reg->Register.File, type,
663 swizzle, reg->Register.Index, &reg->Indirect);
664 return bitcast(bld_base, type, load);
665 }
666
667 switch(reg->Register.File) {
668 case TGSI_FILE_IMMEDIATE: {
669 LLVMTypeRef ctype = tgsi2llvmtype(bld_base, type);
670 if (tgsi_type_is_64bit(type)) {
671 result = LLVMGetUndef(LLVMVectorType(ctx->i32, bld_base->base.type.length * 2));
672 result = LLVMConstInsertElement(result,
673 ctx->imms[reg->Register.Index * TGSI_NUM_CHANNELS + swizzle],
674 ctx->i32_0);
675 result = LLVMConstInsertElement(result,
676 ctx->imms[reg->Register.Index * TGSI_NUM_CHANNELS + swizzle + 1],
677 ctx->i32_1);
678 return LLVMConstBitCast(result, ctype);
679 } else {
680 return LLVMConstBitCast(ctx->imms[reg->Register.Index * TGSI_NUM_CHANNELS + swizzle], ctype);
681 }
682 }
683
684 case TGSI_FILE_INPUT: {
685 unsigned index = reg->Register.Index;
686 LLVMValueRef input[4];
687
688 /* I don't think doing this for vertex shaders is beneficial.
689 * For those, we want to make sure the VMEM loads are executed
690 * only once. Fragment shaders don't care much, because
691 * v_interp instructions are much cheaper than VMEM loads.
692 */
693 if (!si_preload_fs_inputs(ctx) &&
694 ctx->bld_base.info->processor == PIPE_SHADER_FRAGMENT)
695 ctx->load_input(ctx, index, &ctx->input_decls[index], input);
696 else
697 memcpy(input, &ctx->inputs[index * 4], sizeof(input));
698
699 result = input[swizzle];
700
701 if (tgsi_type_is_64bit(type)) {
702 ptr = result;
703 ptr2 = input[swizzle + 1];
704 return si_llvm_emit_fetch_64bit(bld_base, type, ptr, ptr2);
705 }
706 break;
707 }
708
709 case TGSI_FILE_TEMPORARY:
710 if (reg->Register.Index >= ctx->temps_count)
711 return LLVMGetUndef(tgsi2llvmtype(bld_base, type));
712 ptr = ctx->temps[reg->Register.Index * TGSI_NUM_CHANNELS + swizzle];
713 if (tgsi_type_is_64bit(type)) {
714 ptr2 = ctx->temps[reg->Register.Index * TGSI_NUM_CHANNELS + swizzle + 1];
715 return si_llvm_emit_fetch_64bit(bld_base, type,
716 LLVMBuildLoad(builder, ptr, ""),
717 LLVMBuildLoad(builder, ptr2, ""));
718 }
719 result = LLVMBuildLoad(builder, ptr, "");
720 break;
721
722 case TGSI_FILE_OUTPUT:
723 ptr = get_output_ptr(bld_base, reg->Register.Index, swizzle);
724 if (tgsi_type_is_64bit(type)) {
725 ptr2 = get_output_ptr(bld_base, reg->Register.Index, swizzle + 1);
726 return si_llvm_emit_fetch_64bit(bld_base, type,
727 LLVMBuildLoad(builder, ptr, ""),
728 LLVMBuildLoad(builder, ptr2, ""));
729 }
730 result = LLVMBuildLoad(builder, ptr, "");
731 break;
732
733 default:
734 return LLVMGetUndef(tgsi2llvmtype(bld_base, type));
735 }
736
737 return bitcast(bld_base, type, result);
738 }
739
740 static LLVMValueRef fetch_system_value(struct lp_build_tgsi_context *bld_base,
741 const struct tgsi_full_src_register *reg,
742 enum tgsi_opcode_type type,
743 unsigned swizzle)
744 {
745 struct si_shader_context *ctx = si_shader_context(bld_base);
746 LLVMBuilderRef builder = ctx->gallivm.builder;
747 LLVMValueRef cval = ctx->system_values[reg->Register.Index];
748
749 if (tgsi_type_is_64bit(type)) {
750 LLVMValueRef lo, hi;
751
752 assert(swizzle == 0 || swizzle == 2);
753
754 lo = LLVMBuildExtractElement(
755 builder, cval, LLVMConstInt(ctx->i32, swizzle, 0), "");
756 hi = LLVMBuildExtractElement(
757 builder, cval, LLVMConstInt(ctx->i32, swizzle + 1, 0), "");
758
759 return si_llvm_emit_fetch_64bit(bld_base, type, lo, hi);
760 }
761
762 if (LLVMGetTypeKind(LLVMTypeOf(cval)) == LLVMVectorTypeKind) {
763 cval = LLVMBuildExtractElement(
764 builder, cval, LLVMConstInt(ctx->i32, swizzle, 0), "");
765 } else {
766 assert(swizzle == 0);
767 }
768
769 return bitcast(bld_base, type, cval);
770 }
771
772 static void emit_declaration(struct lp_build_tgsi_context *bld_base,
773 const struct tgsi_full_declaration *decl)
774 {
775 struct si_shader_context *ctx = si_shader_context(bld_base);
776 LLVMBuilderRef builder = ctx->gallivm.builder;
777 unsigned first, last, i;
778 switch(decl->Declaration.File) {
779 case TGSI_FILE_ADDRESS:
780 {
781 unsigned idx;
782 for (idx = decl->Range.First; idx <= decl->Range.Last; idx++) {
783 unsigned chan;
784 for (chan = 0; chan < TGSI_NUM_CHANNELS; chan++) {
785 ctx->addrs[idx][chan] = lp_build_alloca_undef(
786 &ctx->gallivm,
787 ctx->i32, "");
788 }
789 }
790 break;
791 }
792
793 case TGSI_FILE_TEMPORARY:
794 {
795 char name[16] = "";
796 LLVMValueRef array_alloca = NULL;
797 unsigned decl_size;
798 unsigned writemask = decl->Declaration.UsageMask;
799 first = decl->Range.First;
800 last = decl->Range.Last;
801 decl_size = 4 * ((last - first) + 1);
802
803 if (decl->Declaration.Array) {
804 unsigned id = decl->Array.ArrayID - 1;
805 unsigned array_size;
806
807 writemask &= ctx->temp_arrays[id].writemask;
808 ctx->temp_arrays[id].writemask = writemask;
809 array_size = ((last - first) + 1) * util_bitcount(writemask);
810
811 /* If the array has more than 16 elements, store it
812 * in memory using an alloca that spans the entire
813 * array.
814 *
815 * Otherwise, store each array element individually.
816 * We will then generate vectors (per-channel, up to
817 * <16 x float> if the usagemask is a single bit) for
818 * indirect addressing.
819 *
820 * Note that 16 is the number of vector elements that
821 * LLVM will store in a register, so theoretically an
822 * array with up to 4 * 16 = 64 elements could be
823 * handled this way, but whether that's a good idea
824 * depends on VGPR register pressure elsewhere.
825 *
826 * FIXME: We shouldn't need to have the non-alloca
827 * code path for arrays. LLVM should be smart enough to
828 * promote allocas into registers when profitable.
829 */
830 if (array_size > 16 ||
831 /* TODO: VGPR indexing is buggy on GFX9. */
832 ctx->screen->b.chip_class == GFX9) {
833 array_alloca = LLVMBuildAlloca(builder,
834 LLVMArrayType(ctx->f32,
835 array_size), "array");
836 ctx->temp_array_allocas[id] = array_alloca;
837 }
838 }
839
840 if (!ctx->temps_count) {
841 ctx->temps_count = bld_base->info->file_max[TGSI_FILE_TEMPORARY] + 1;
842 ctx->temps = MALLOC(TGSI_NUM_CHANNELS * ctx->temps_count * sizeof(LLVMValueRef));
843 }
844 if (!array_alloca) {
845 for (i = 0; i < decl_size; ++i) {
846 #ifdef DEBUG
847 snprintf(name, sizeof(name), "TEMP%d.%c",
848 first + i / 4, "xyzw"[i % 4]);
849 #endif
850 ctx->temps[first * TGSI_NUM_CHANNELS + i] =
851 lp_build_alloca_undef(&ctx->gallivm,
852 ctx->f32,
853 name);
854 }
855 } else {
856 LLVMValueRef idxs[2] = {
857 ctx->i32_0,
858 NULL
859 };
860 unsigned j = 0;
861
862 if (writemask != TGSI_WRITEMASK_XYZW &&
863 !ctx->undef_alloca) {
864 /* Create a dummy alloca. We use it so that we
865 * have a pointer that is safe to load from if
866 * a shader ever reads from a channel that
867 * it never writes to.
868 */
869 ctx->undef_alloca = lp_build_alloca_undef(
870 &ctx->gallivm,
871 ctx->f32, "undef");
872 }
873
874 for (i = 0; i < decl_size; ++i) {
875 LLVMValueRef ptr;
876 if (writemask & (1 << (i % 4))) {
877 #ifdef DEBUG
878 snprintf(name, sizeof(name), "TEMP%d.%c",
879 first + i / 4, "xyzw"[i % 4]);
880 #endif
881 idxs[1] = LLVMConstInt(ctx->i32, j, 0);
882 ptr = LLVMBuildGEP(builder, array_alloca, idxs, 2, name);
883 j++;
884 } else {
885 ptr = ctx->undef_alloca;
886 }
887 ctx->temps[first * TGSI_NUM_CHANNELS + i] = ptr;
888 }
889 }
890 break;
891 }
892 case TGSI_FILE_INPUT:
893 {
894 unsigned idx;
895 for (idx = decl->Range.First; idx <= decl->Range.Last; idx++) {
896 if (ctx->load_input &&
897 ctx->input_decls[idx].Declaration.File != TGSI_FILE_INPUT) {
898 ctx->input_decls[idx] = *decl;
899 ctx->input_decls[idx].Range.First = idx;
900 ctx->input_decls[idx].Range.Last = idx;
901 ctx->input_decls[idx].Semantic.Index += idx - decl->Range.First;
902
903 if (si_preload_fs_inputs(ctx) ||
904 bld_base->info->processor != PIPE_SHADER_FRAGMENT)
905 ctx->load_input(ctx, idx, &ctx->input_decls[idx],
906 &ctx->inputs[idx * 4]);
907 }
908 }
909 }
910 break;
911
912 case TGSI_FILE_SYSTEM_VALUE:
913 {
914 unsigned idx;
915 for (idx = decl->Range.First; idx <= decl->Range.Last; idx++) {
916 ctx->load_system_value(ctx, idx, decl);
917 }
918 }
919 break;
920
921 case TGSI_FILE_OUTPUT:
922 {
923 char name[16] = "";
924 unsigned idx;
925 for (idx = decl->Range.First; idx <= decl->Range.Last; idx++) {
926 unsigned chan;
927 assert(idx < RADEON_LLVM_MAX_OUTPUTS);
928 if (ctx->outputs[idx][0])
929 continue;
930 for (chan = 0; chan < TGSI_NUM_CHANNELS; chan++) {
931 #ifdef DEBUG
932 snprintf(name, sizeof(name), "OUT%d.%c",
933 idx, "xyzw"[chan % 4]);
934 #endif
935 ctx->outputs[idx][chan] = lp_build_alloca_undef(
936 &ctx->gallivm,
937 ctx->f32, name);
938 }
939 }
940 break;
941 }
942
943 case TGSI_FILE_MEMORY:
944 ctx->declare_memory_region(ctx, decl);
945 break;
946
947 default:
948 break;
949 }
950 }
951
952 void si_llvm_emit_store(struct lp_build_tgsi_context *bld_base,
953 const struct tgsi_full_instruction *inst,
954 const struct tgsi_opcode_info *info,
955 LLVMValueRef dst[4])
956 {
957 struct si_shader_context *ctx = si_shader_context(bld_base);
958 struct gallivm_state *gallivm = &ctx->gallivm;
959 const struct tgsi_full_dst_register *reg = &inst->Dst[0];
960 LLVMBuilderRef builder = ctx->gallivm.builder;
961 LLVMValueRef temp_ptr, temp_ptr2 = NULL;
962 unsigned chan, chan_index;
963 bool is_vec_store = false;
964 enum tgsi_opcode_type dtype = tgsi_opcode_infer_dst_type(inst->Instruction.Opcode);
965
966 if (dst[0]) {
967 LLVMTypeKind k = LLVMGetTypeKind(LLVMTypeOf(dst[0]));
968 is_vec_store = (k == LLVMVectorTypeKind);
969 }
970
971 if (is_vec_store) {
972 LLVMValueRef values[4] = {};
973 TGSI_FOR_EACH_DST0_ENABLED_CHANNEL(inst, chan) {
974 LLVMValueRef index = LLVMConstInt(ctx->i32, chan, 0);
975 values[chan] = LLVMBuildExtractElement(gallivm->builder,
976 dst[0], index, "");
977 }
978 bld_base->emit_store(bld_base, inst, info, values);
979 return;
980 }
981
982 TGSI_FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
983 LLVMValueRef value = dst[chan_index];
984
985 if (tgsi_type_is_64bit(dtype) && (chan_index == 1 || chan_index == 3))
986 continue;
987 if (inst->Instruction.Saturate)
988 value = ac_build_clamp(&ctx->ac, value);
989
990 if (reg->Register.File == TGSI_FILE_ADDRESS) {
991 temp_ptr = ctx->addrs[reg->Register.Index][chan_index];
992 LLVMBuildStore(builder, value, temp_ptr);
993 continue;
994 }
995
996 if (!tgsi_type_is_64bit(dtype))
997 value = bitcast(bld_base, TGSI_TYPE_FLOAT, value);
998
999 if (reg->Register.Indirect) {
1000 unsigned file = reg->Register.File;
1001 unsigned reg_index = reg->Register.Index;
1002 store_value_to_array(bld_base, value, file, chan_index,
1003 reg_index, &reg->Indirect);
1004 } else {
1005 switch(reg->Register.File) {
1006 case TGSI_FILE_OUTPUT:
1007 temp_ptr = ctx->outputs[reg->Register.Index][chan_index];
1008 if (tgsi_type_is_64bit(dtype))
1009 temp_ptr2 = ctx->outputs[reg->Register.Index][chan_index + 1];
1010 break;
1011
1012 case TGSI_FILE_TEMPORARY:
1013 {
1014 if (reg->Register.Index >= ctx->temps_count)
1015 continue;
1016
1017 temp_ptr = ctx->temps[ TGSI_NUM_CHANNELS * reg->Register.Index + chan_index];
1018 if (tgsi_type_is_64bit(dtype))
1019 temp_ptr2 = ctx->temps[ TGSI_NUM_CHANNELS * reg->Register.Index + chan_index + 1];
1020
1021 break;
1022 }
1023 default:
1024 return;
1025 }
1026 if (!tgsi_type_is_64bit(dtype))
1027 LLVMBuildStore(builder, value, temp_ptr);
1028 else {
1029 LLVMValueRef ptr = LLVMBuildBitCast(builder, value,
1030 LLVMVectorType(ctx->i32, 2), "");
1031 LLVMValueRef val2;
1032 value = LLVMBuildExtractElement(builder, ptr,
1033 ctx->i32_0, "");
1034 val2 = LLVMBuildExtractElement(builder, ptr,
1035 ctx->i32_1, "");
1036
1037 LLVMBuildStore(builder, bitcast(bld_base, TGSI_TYPE_FLOAT, value), temp_ptr);
1038 LLVMBuildStore(builder, bitcast(bld_base, TGSI_TYPE_FLOAT, val2), temp_ptr2);
1039 }
1040 }
1041 }
1042 }
1043
1044 static void set_basicblock_name(LLVMBasicBlockRef bb, const char *base, int pc)
1045 {
1046 char buf[32];
1047 /* Subtract 1 so that the number shown is that of the corresponding
1048 * opcode in the TGSI dump, e.g. an if block has the same suffix as
1049 * the instruction number of the corresponding TGSI IF.
1050 */
1051 snprintf(buf, sizeof(buf), "%s%d", base, pc - 1);
1052 LLVMSetValueName(LLVMBasicBlockAsValue(bb), buf);
1053 }
1054
1055 /* Append a basic block at the level of the parent flow.
1056 */
1057 static LLVMBasicBlockRef append_basic_block(struct si_shader_context *ctx,
1058 const char *name)
1059 {
1060 struct gallivm_state *gallivm = &ctx->gallivm;
1061
1062 assert(ctx->flow_depth >= 1);
1063
1064 if (ctx->flow_depth >= 2) {
1065 struct si_llvm_flow *flow = &ctx->flow[ctx->flow_depth - 2];
1066
1067 return LLVMInsertBasicBlockInContext(gallivm->context,
1068 flow->next_block, name);
1069 }
1070
1071 return LLVMAppendBasicBlockInContext(gallivm->context, ctx->main_fn, name);
1072 }
1073
1074 /* Emit a branch to the given default target for the current block if
1075 * applicable -- that is, if the current block does not already contain a
1076 * branch from a break or continue.
1077 */
1078 static void emit_default_branch(LLVMBuilderRef builder, LLVMBasicBlockRef target)
1079 {
1080 if (!LLVMGetBasicBlockTerminator(LLVMGetInsertBlock(builder)))
1081 LLVMBuildBr(builder, target);
1082 }
1083
1084 static void bgnloop_emit(const struct lp_build_tgsi_action *action,
1085 struct lp_build_tgsi_context *bld_base,
1086 struct lp_build_emit_data *emit_data)
1087 {
1088 struct si_shader_context *ctx = si_shader_context(bld_base);
1089 struct gallivm_state *gallivm = &ctx->gallivm;
1090 struct si_llvm_flow *flow = push_flow(ctx);
1091 flow->loop_entry_block = append_basic_block(ctx, "LOOP");
1092 flow->next_block = append_basic_block(ctx, "ENDLOOP");
1093 set_basicblock_name(flow->loop_entry_block, "loop", bld_base->pc);
1094 LLVMBuildBr(gallivm->builder, flow->loop_entry_block);
1095 LLVMPositionBuilderAtEnd(gallivm->builder, flow->loop_entry_block);
1096 }
1097
1098 static void brk_emit(const struct lp_build_tgsi_action *action,
1099 struct lp_build_tgsi_context *bld_base,
1100 struct lp_build_emit_data *emit_data)
1101 {
1102 struct si_shader_context *ctx = si_shader_context(bld_base);
1103 struct gallivm_state *gallivm = &ctx->gallivm;
1104 struct si_llvm_flow *flow = get_innermost_loop(ctx);
1105
1106 LLVMBuildBr(gallivm->builder, flow->next_block);
1107 }
1108
1109 static void cont_emit(const struct lp_build_tgsi_action *action,
1110 struct lp_build_tgsi_context *bld_base,
1111 struct lp_build_emit_data *emit_data)
1112 {
1113 struct si_shader_context *ctx = si_shader_context(bld_base);
1114 struct gallivm_state *gallivm = &ctx->gallivm;
1115 struct si_llvm_flow *flow = get_innermost_loop(ctx);
1116
1117 LLVMBuildBr(gallivm->builder, flow->loop_entry_block);
1118 }
1119
1120 static void else_emit(const struct lp_build_tgsi_action *action,
1121 struct lp_build_tgsi_context *bld_base,
1122 struct lp_build_emit_data *emit_data)
1123 {
1124 struct si_shader_context *ctx = si_shader_context(bld_base);
1125 struct gallivm_state *gallivm = &ctx->gallivm;
1126 struct si_llvm_flow *current_branch = get_current_flow(ctx);
1127 LLVMBasicBlockRef endif_block;
1128
1129 assert(!current_branch->loop_entry_block);
1130
1131 endif_block = append_basic_block(ctx, "ENDIF");
1132 emit_default_branch(gallivm->builder, endif_block);
1133
1134 LLVMPositionBuilderAtEnd(gallivm->builder, current_branch->next_block);
1135 set_basicblock_name(current_branch->next_block, "else", bld_base->pc);
1136
1137 current_branch->next_block = endif_block;
1138 }
1139
1140 static void endif_emit(const struct lp_build_tgsi_action *action,
1141 struct lp_build_tgsi_context *bld_base,
1142 struct lp_build_emit_data *emit_data)
1143 {
1144 struct si_shader_context *ctx = si_shader_context(bld_base);
1145 struct gallivm_state *gallivm = &ctx->gallivm;
1146 struct si_llvm_flow *current_branch = get_current_flow(ctx);
1147
1148 assert(!current_branch->loop_entry_block);
1149
1150 emit_default_branch(gallivm->builder, current_branch->next_block);
1151 LLVMPositionBuilderAtEnd(gallivm->builder, current_branch->next_block);
1152 set_basicblock_name(current_branch->next_block, "endif", bld_base->pc);
1153
1154 ctx->flow_depth--;
1155 }
1156
1157 static void endloop_emit(const struct lp_build_tgsi_action *action,
1158 struct lp_build_tgsi_context *bld_base,
1159 struct lp_build_emit_data *emit_data)
1160 {
1161 struct si_shader_context *ctx = si_shader_context(bld_base);
1162 struct gallivm_state *gallivm = &ctx->gallivm;
1163 struct si_llvm_flow *current_loop = get_current_flow(ctx);
1164
1165 assert(current_loop->loop_entry_block);
1166
1167 emit_default_branch(gallivm->builder, current_loop->loop_entry_block);
1168
1169 LLVMPositionBuilderAtEnd(gallivm->builder, current_loop->next_block);
1170 set_basicblock_name(current_loop->next_block, "endloop", bld_base->pc);
1171 ctx->flow_depth--;
1172 }
1173
1174 static void if_cond_emit(const struct lp_build_tgsi_action *action,
1175 struct lp_build_tgsi_context *bld_base,
1176 struct lp_build_emit_data *emit_data,
1177 LLVMValueRef cond)
1178 {
1179 struct si_shader_context *ctx = si_shader_context(bld_base);
1180 struct gallivm_state *gallivm = &ctx->gallivm;
1181 struct si_llvm_flow *flow = push_flow(ctx);
1182 LLVMBasicBlockRef if_block;
1183
1184 if_block = append_basic_block(ctx, "IF");
1185 flow->next_block = append_basic_block(ctx, "ELSE");
1186 set_basicblock_name(if_block, "if", bld_base->pc);
1187 LLVMBuildCondBr(gallivm->builder, cond, if_block, flow->next_block);
1188 LLVMPositionBuilderAtEnd(gallivm->builder, if_block);
1189 }
1190
1191 static void if_emit(const struct lp_build_tgsi_action *action,
1192 struct lp_build_tgsi_context *bld_base,
1193 struct lp_build_emit_data *emit_data)
1194 {
1195 struct gallivm_state *gallivm = bld_base->base.gallivm;
1196 LLVMValueRef cond;
1197
1198 cond = LLVMBuildFCmp(gallivm->builder, LLVMRealUNE,
1199 emit_data->args[0],
1200 bld_base->base.zero, "");
1201
1202 if_cond_emit(action, bld_base, emit_data, cond);
1203 }
1204
1205 static void uif_emit(const struct lp_build_tgsi_action *action,
1206 struct lp_build_tgsi_context *bld_base,
1207 struct lp_build_emit_data *emit_data)
1208 {
1209 struct gallivm_state *gallivm = bld_base->base.gallivm;
1210 LLVMValueRef cond;
1211
1212 cond = LLVMBuildICmp(gallivm->builder, LLVMIntNE,
1213 bitcast(bld_base, TGSI_TYPE_UNSIGNED, emit_data->args[0]),
1214 bld_base->int_bld.zero, "");
1215
1216 if_cond_emit(action, bld_base, emit_data, cond);
1217 }
1218
1219 static void emit_immediate(struct lp_build_tgsi_context *bld_base,
1220 const struct tgsi_full_immediate *imm)
1221 {
1222 unsigned i;
1223 struct si_shader_context *ctx = si_shader_context(bld_base);
1224
1225 for (i = 0; i < 4; ++i) {
1226 ctx->imms[ctx->imms_num * TGSI_NUM_CHANNELS + i] =
1227 LLVMConstInt(ctx->i32, imm->u[i].Uint, false );
1228 }
1229
1230 ctx->imms_num++;
1231 }
1232
1233 void si_llvm_context_init(struct si_shader_context *ctx,
1234 struct si_screen *sscreen,
1235 LLVMTargetMachineRef tm)
1236 {
1237 struct lp_type type;
1238
1239 /* Initialize the gallivm object:
1240 * We are only using the module, context, and builder fields of this struct.
1241 * This should be enough for us to be able to pass our gallivm struct to the
1242 * helper functions in the gallivm module.
1243 */
1244 memset(ctx, 0, sizeof(*ctx));
1245 ctx->screen = sscreen;
1246 ctx->tm = tm;
1247
1248 ctx->gallivm.context = LLVMContextCreate();
1249 ctx->gallivm.module = LLVMModuleCreateWithNameInContext("tgsi",
1250 ctx->gallivm.context);
1251 LLVMSetTarget(ctx->gallivm.module, "amdgcn--");
1252
1253 LLVMTargetDataRef data_layout = LLVMCreateTargetDataLayout(tm);
1254 char *data_layout_str = LLVMCopyStringRepOfTargetData(data_layout);
1255 LLVMSetDataLayout(ctx->gallivm.module, data_layout_str);
1256 LLVMDisposeTargetData(data_layout);
1257 LLVMDisposeMessage(data_layout_str);
1258
1259 bool unsafe_fpmath = (sscreen->b.debug_flags & DBG_UNSAFE_MATH) != 0;
1260 enum lp_float_mode float_mode =
1261 unsafe_fpmath ? LP_FLOAT_MODE_UNSAFE_FP_MATH :
1262 LP_FLOAT_MODE_NO_SIGNED_ZEROS_FP_MATH;
1263
1264 ctx->gallivm.builder = lp_create_builder(ctx->gallivm.context,
1265 float_mode);
1266
1267 ac_llvm_context_init(&ctx->ac, ctx->gallivm.context);
1268 ctx->ac.module = ctx->gallivm.module;
1269 ctx->ac.builder = ctx->gallivm.builder;
1270
1271 struct lp_build_tgsi_context *bld_base = &ctx->bld_base;
1272
1273 type.floating = true;
1274 type.fixed = false;
1275 type.sign = true;
1276 type.norm = false;
1277 type.width = 32;
1278 type.length = 1;
1279
1280 lp_build_context_init(&bld_base->base, &ctx->gallivm, type);
1281 lp_build_context_init(&ctx->bld_base.uint_bld, &ctx->gallivm, lp_uint_type(type));
1282 lp_build_context_init(&ctx->bld_base.int_bld, &ctx->gallivm, lp_int_type(type));
1283 type.width *= 2;
1284 lp_build_context_init(&ctx->bld_base.dbl_bld, &ctx->gallivm, type);
1285 lp_build_context_init(&ctx->bld_base.uint64_bld, &ctx->gallivm, lp_uint_type(type));
1286 lp_build_context_init(&ctx->bld_base.int64_bld, &ctx->gallivm, lp_int_type(type));
1287
1288 bld_base->soa = 1;
1289 bld_base->emit_swizzle = emit_swizzle;
1290 bld_base->emit_declaration = emit_declaration;
1291 bld_base->emit_immediate = emit_immediate;
1292
1293 /* metadata allowing 2.5 ULP */
1294 ctx->fpmath_md_kind = LLVMGetMDKindIDInContext(ctx->gallivm.context,
1295 "fpmath", 6);
1296 LLVMValueRef arg = lp_build_const_float(&ctx->gallivm, 2.5);
1297 ctx->fpmath_md_2p5_ulp = LLVMMDNodeInContext(ctx->gallivm.context,
1298 &arg, 1);
1299
1300 bld_base->op_actions[TGSI_OPCODE_BGNLOOP].emit = bgnloop_emit;
1301 bld_base->op_actions[TGSI_OPCODE_BRK].emit = brk_emit;
1302 bld_base->op_actions[TGSI_OPCODE_CONT].emit = cont_emit;
1303 bld_base->op_actions[TGSI_OPCODE_IF].emit = if_emit;
1304 bld_base->op_actions[TGSI_OPCODE_UIF].emit = uif_emit;
1305 bld_base->op_actions[TGSI_OPCODE_ELSE].emit = else_emit;
1306 bld_base->op_actions[TGSI_OPCODE_ENDIF].emit = endif_emit;
1307 bld_base->op_actions[TGSI_OPCODE_ENDLOOP].emit = endloop_emit;
1308
1309 si_shader_context_init_alu(&ctx->bld_base);
1310
1311 ctx->voidt = LLVMVoidTypeInContext(ctx->gallivm.context);
1312 ctx->i1 = LLVMInt1TypeInContext(ctx->gallivm.context);
1313 ctx->i8 = LLVMInt8TypeInContext(ctx->gallivm.context);
1314 ctx->i32 = LLVMInt32TypeInContext(ctx->gallivm.context);
1315 ctx->i64 = LLVMInt64TypeInContext(ctx->gallivm.context);
1316 ctx->i128 = LLVMIntTypeInContext(ctx->gallivm.context, 128);
1317 ctx->f32 = LLVMFloatTypeInContext(ctx->gallivm.context);
1318 ctx->v2i32 = LLVMVectorType(ctx->i32, 2);
1319 ctx->v4i32 = LLVMVectorType(ctx->i32, 4);
1320 ctx->v4f32 = LLVMVectorType(ctx->f32, 4);
1321 ctx->v8i32 = LLVMVectorType(ctx->i32, 8);
1322
1323 ctx->i32_0 = LLVMConstInt(ctx->i32, 0, 0);
1324 ctx->i32_1 = LLVMConstInt(ctx->i32, 1, 0);
1325 }
1326
1327 /* Set the context to a certain TGSI shader. Can be called repeatedly
1328 * to change the shader. */
1329 void si_llvm_context_set_tgsi(struct si_shader_context *ctx,
1330 struct si_shader *shader)
1331 {
1332 const struct tgsi_shader_info *info = NULL;
1333 const struct tgsi_token *tokens = NULL;
1334
1335 if (shader && shader->selector) {
1336 info = &shader->selector->info;
1337 tokens = shader->selector->tokens;
1338 }
1339
1340 ctx->shader = shader;
1341 ctx->type = info ? info->processor : -1;
1342 ctx->bld_base.info = info;
1343
1344 /* Clean up the old contents. */
1345 FREE(ctx->temp_arrays);
1346 ctx->temp_arrays = NULL;
1347 FREE(ctx->temp_array_allocas);
1348 ctx->temp_array_allocas = NULL;
1349
1350 FREE(ctx->imms);
1351 ctx->imms = NULL;
1352 ctx->imms_num = 0;
1353
1354 FREE(ctx->temps);
1355 ctx->temps = NULL;
1356 ctx->temps_count = 0;
1357
1358 if (!info || !tokens)
1359 return;
1360
1361 if (info->array_max[TGSI_FILE_TEMPORARY] > 0) {
1362 int size = info->array_max[TGSI_FILE_TEMPORARY];
1363
1364 ctx->temp_arrays = CALLOC(size, sizeof(ctx->temp_arrays[0]));
1365 ctx->temp_array_allocas = CALLOC(size, sizeof(ctx->temp_array_allocas[0]));
1366
1367 tgsi_scan_arrays(tokens, TGSI_FILE_TEMPORARY, size,
1368 ctx->temp_arrays);
1369 }
1370 if (info->file_max[TGSI_FILE_IMMEDIATE] >= 0) {
1371 int size = info->file_max[TGSI_FILE_IMMEDIATE] + 1;
1372 ctx->imms = MALLOC(size * TGSI_NUM_CHANNELS * sizeof(LLVMValueRef));
1373 }
1374
1375 /* Re-set these to start with a clean slate. */
1376 ctx->bld_base.num_instructions = 0;
1377 ctx->bld_base.pc = 0;
1378 memset(ctx->outputs, 0, sizeof(ctx->outputs));
1379
1380 ctx->bld_base.emit_store = si_llvm_emit_store;
1381 ctx->bld_base.emit_fetch_funcs[TGSI_FILE_IMMEDIATE] = si_llvm_emit_fetch;
1382 ctx->bld_base.emit_fetch_funcs[TGSI_FILE_INPUT] = si_llvm_emit_fetch;
1383 ctx->bld_base.emit_fetch_funcs[TGSI_FILE_TEMPORARY] = si_llvm_emit_fetch;
1384 ctx->bld_base.emit_fetch_funcs[TGSI_FILE_OUTPUT] = si_llvm_emit_fetch;
1385 ctx->bld_base.emit_fetch_funcs[TGSI_FILE_SYSTEM_VALUE] = fetch_system_value;
1386 }
1387
1388 void si_llvm_create_func(struct si_shader_context *ctx,
1389 const char *name,
1390 LLVMTypeRef *return_types, unsigned num_return_elems,
1391 LLVMTypeRef *ParamTypes, unsigned ParamCount)
1392 {
1393 LLVMTypeRef main_fn_type, ret_type;
1394 LLVMBasicBlockRef main_fn_body;
1395
1396 if (num_return_elems)
1397 ret_type = LLVMStructTypeInContext(ctx->gallivm.context,
1398 return_types,
1399 num_return_elems, true);
1400 else
1401 ret_type = LLVMVoidTypeInContext(ctx->gallivm.context);
1402
1403 /* Setup the function */
1404 ctx->return_type = ret_type;
1405 main_fn_type = LLVMFunctionType(ret_type, ParamTypes, ParamCount, 0);
1406 ctx->main_fn = LLVMAddFunction(ctx->gallivm.module, name, main_fn_type);
1407 main_fn_body = LLVMAppendBasicBlockInContext(ctx->gallivm.context,
1408 ctx->main_fn, "main_body");
1409 LLVMPositionBuilderAtEnd(ctx->gallivm.builder, main_fn_body);
1410 }
1411
1412 void si_llvm_optimize_module(struct si_shader_context *ctx)
1413 {
1414 struct gallivm_state *gallivm = &ctx->gallivm;
1415 const char *triple = LLVMGetTarget(gallivm->module);
1416 LLVMTargetLibraryInfoRef target_library_info;
1417
1418 /* Dump LLVM IR before any optimization passes */
1419 if (ctx->screen->b.debug_flags & DBG_PREOPT_IR &&
1420 r600_can_dump_shader(&ctx->screen->b, ctx->type))
1421 LLVMDumpModule(ctx->gallivm.module);
1422
1423 /* Create the pass manager */
1424 gallivm->passmgr = LLVMCreatePassManager();
1425
1426 target_library_info = gallivm_create_target_library_info(triple);
1427 LLVMAddTargetLibraryInfo(target_library_info, gallivm->passmgr);
1428
1429 if (r600_extra_shader_checks(&ctx->screen->b, ctx->type))
1430 LLVMAddVerifierPass(gallivm->passmgr);
1431
1432 LLVMAddAlwaysInlinerPass(gallivm->passmgr);
1433
1434 /* This pass should eliminate all the load and store instructions */
1435 LLVMAddPromoteMemoryToRegisterPass(gallivm->passmgr);
1436
1437 /* Add some optimization passes */
1438 LLVMAddScalarReplAggregatesPass(gallivm->passmgr);
1439 LLVMAddLICMPass(gallivm->passmgr);
1440 LLVMAddAggressiveDCEPass(gallivm->passmgr);
1441 LLVMAddCFGSimplificationPass(gallivm->passmgr);
1442 LLVMAddInstructionCombiningPass(gallivm->passmgr);
1443
1444 /* Run the pass */
1445 LLVMRunPassManager(gallivm->passmgr, ctx->gallivm.module);
1446
1447 LLVMDisposeBuilder(gallivm->builder);
1448 LLVMDisposePassManager(gallivm->passmgr);
1449 gallivm_dispose_target_library_info(target_library_info);
1450 }
1451
1452 void si_llvm_dispose(struct si_shader_context *ctx)
1453 {
1454 LLVMDisposeModule(ctx->gallivm.module);
1455 LLVMContextDispose(ctx->gallivm.context);
1456 FREE(ctx->temp_arrays);
1457 ctx->temp_arrays = NULL;
1458 FREE(ctx->temp_array_allocas);
1459 ctx->temp_array_allocas = NULL;
1460 FREE(ctx->temps);
1461 ctx->temps = NULL;
1462 ctx->temps_count = 0;
1463 FREE(ctx->imms);
1464 ctx->imms = NULL;
1465 ctx->imms_num = 0;
1466 FREE(ctx->flow);
1467 ctx->flow = NULL;
1468 ctx->flow_depth_max = 0;
1469 }