c733f5a812160d3a96ce23b096e9f3bbbee2ab25
[mesa.git] / src / gallium / drivers / radeonsi / si_shader_tgsi_setup.c
1 /*
2 * Copyright 2016 Advanced Micro Devices, Inc.
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * on the rights to use, copy, modify, merge, publish, distribute, sub
8 * license, and/or sell copies of the Software, and to permit persons to whom
9 * the Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
19 * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
20 * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
21 * USE OR OTHER DEALINGS IN THE SOFTWARE.
22 */
23
24 #include "si_shader_internal.h"
25 #include "si_pipe.h"
26
27 #include "gallivm/lp_bld_const.h"
28 #include "gallivm/lp_bld_gather.h"
29 #include "gallivm/lp_bld_flow.h"
30 #include "gallivm/lp_bld_init.h"
31 #include "gallivm/lp_bld_intr.h"
32 #include "gallivm/lp_bld_misc.h"
33 #include "gallivm/lp_bld_swizzle.h"
34 #include "tgsi/tgsi_info.h"
35 #include "tgsi/tgsi_parse.h"
36 #include "util/u_math.h"
37 #include "util/u_memory.h"
38 #include "util/u_debug.h"
39
40 #include <stdio.h>
41 #include <llvm-c/Transforms/IPO.h>
42 #include <llvm-c/Transforms/Scalar.h>
43 #include <llvm-c/Support.h>
44
45 /* Data for if/else/endif and bgnloop/endloop control flow structures.
46 */
47 struct si_llvm_flow {
48 /* Loop exit or next part of if/else/endif. */
49 LLVMBasicBlockRef next_block;
50 LLVMBasicBlockRef loop_entry_block;
51 };
52
53 #define CPU_STRING_LEN 30
54 #define FS_STRING_LEN 30
55 #define TRIPLE_STRING_LEN 7
56
57 /**
58 * Shader types for the LLVM backend.
59 */
60 enum si_llvm_shader_type {
61 RADEON_LLVM_SHADER_PS = 0,
62 RADEON_LLVM_SHADER_VS = 1,
63 RADEON_LLVM_SHADER_GS = 2,
64 RADEON_LLVM_SHADER_CS = 3,
65 };
66
67 enum si_llvm_calling_convention {
68 RADEON_LLVM_AMDGPU_VS = 87,
69 RADEON_LLVM_AMDGPU_GS = 88,
70 RADEON_LLVM_AMDGPU_PS = 89,
71 RADEON_LLVM_AMDGPU_CS = 90,
72 };
73
74 void si_llvm_add_attribute(LLVMValueRef F, const char *name, int value)
75 {
76 char str[16];
77
78 snprintf(str, sizeof(str), "%i", value);
79 LLVMAddTargetDependentFunctionAttr(F, name, str);
80 }
81
82 /**
83 * Set the shader type we want to compile
84 *
85 * @param type shader type to set
86 */
87 void si_llvm_shader_type(LLVMValueRef F, unsigned type)
88 {
89 enum si_llvm_shader_type llvm_type;
90 enum si_llvm_calling_convention calling_conv;
91
92 switch (type) {
93 case PIPE_SHADER_VERTEX:
94 case PIPE_SHADER_TESS_CTRL:
95 case PIPE_SHADER_TESS_EVAL:
96 llvm_type = RADEON_LLVM_SHADER_VS;
97 calling_conv = RADEON_LLVM_AMDGPU_VS;
98 break;
99 case PIPE_SHADER_GEOMETRY:
100 llvm_type = RADEON_LLVM_SHADER_GS;
101 calling_conv = RADEON_LLVM_AMDGPU_GS;
102 break;
103 case PIPE_SHADER_FRAGMENT:
104 llvm_type = RADEON_LLVM_SHADER_PS;
105 calling_conv = RADEON_LLVM_AMDGPU_PS;
106 break;
107 case PIPE_SHADER_COMPUTE:
108 llvm_type = RADEON_LLVM_SHADER_CS;
109 calling_conv = RADEON_LLVM_AMDGPU_CS;
110 break;
111 default:
112 unreachable("Unhandle shader type");
113 }
114
115 if (HAVE_LLVM >= 0x309)
116 LLVMSetFunctionCallConv(F, calling_conv);
117 else
118 si_llvm_add_attribute(F, "ShaderType", llvm_type);
119 }
120
121 static void init_amdgpu_target()
122 {
123 gallivm_init_llvm_targets();
124 LLVMInitializeAMDGPUTargetInfo();
125 LLVMInitializeAMDGPUTarget();
126 LLVMInitializeAMDGPUTargetMC();
127 LLVMInitializeAMDGPUAsmPrinter();
128
129 /* For inline assembly. */
130 LLVMInitializeAMDGPUAsmParser();
131
132 if (HAVE_LLVM >= 0x0400) {
133 /*
134 * Workaround for bug in llvm 4.0 that causes image intrinsics
135 * to disappear.
136 * https://reviews.llvm.org/D26348
137 */
138 const char *argv[2] = {"mesa", "-simplifycfg-sink-common=false"};
139 LLVMParseCommandLineOptions(2, argv, NULL);
140 }
141 }
142
143 static once_flag init_amdgpu_target_once_flag = ONCE_FLAG_INIT;
144
145 LLVMTargetRef si_llvm_get_amdgpu_target(const char *triple)
146 {
147 LLVMTargetRef target = NULL;
148 char *err_message = NULL;
149
150 call_once(&init_amdgpu_target_once_flag, init_amdgpu_target);
151
152 if (LLVMGetTargetFromTriple(triple, &target, &err_message)) {
153 fprintf(stderr, "Cannot find target for triple %s ", triple);
154 if (err_message) {
155 fprintf(stderr, "%s\n", err_message);
156 }
157 LLVMDisposeMessage(err_message);
158 return NULL;
159 }
160 return target;
161 }
162
163 struct si_llvm_diagnostics {
164 struct pipe_debug_callback *debug;
165 unsigned retval;
166 };
167
168 static void si_diagnostic_handler(LLVMDiagnosticInfoRef di, void *context)
169 {
170 struct si_llvm_diagnostics *diag = (struct si_llvm_diagnostics *)context;
171 LLVMDiagnosticSeverity severity = LLVMGetDiagInfoSeverity(di);
172 char *description = LLVMGetDiagInfoDescription(di);
173 const char *severity_str = NULL;
174
175 switch (severity) {
176 case LLVMDSError:
177 severity_str = "error";
178 break;
179 case LLVMDSWarning:
180 severity_str = "warning";
181 break;
182 case LLVMDSRemark:
183 severity_str = "remark";
184 break;
185 case LLVMDSNote:
186 severity_str = "note";
187 break;
188 default:
189 severity_str = "unknown";
190 }
191
192 pipe_debug_message(diag->debug, SHADER_INFO,
193 "LLVM diagnostic (%s): %s", severity_str, description);
194
195 if (severity == LLVMDSError) {
196 diag->retval = 1;
197 fprintf(stderr,"LLVM triggered Diagnostic Handler: %s\n", description);
198 }
199
200 LLVMDisposeMessage(description);
201 }
202
203 /**
204 * Compile an LLVM module to machine code.
205 *
206 * @returns 0 for success, 1 for failure
207 */
208 unsigned si_llvm_compile(LLVMModuleRef M, struct ac_shader_binary *binary,
209 LLVMTargetMachineRef tm,
210 struct pipe_debug_callback *debug)
211 {
212 struct si_llvm_diagnostics diag;
213 char *err;
214 LLVMContextRef llvm_ctx;
215 LLVMMemoryBufferRef out_buffer;
216 unsigned buffer_size;
217 const char *buffer_data;
218 LLVMBool mem_err;
219
220 diag.debug = debug;
221 diag.retval = 0;
222
223 /* Setup Diagnostic Handler*/
224 llvm_ctx = LLVMGetModuleContext(M);
225
226 LLVMContextSetDiagnosticHandler(llvm_ctx, si_diagnostic_handler, &diag);
227
228 /* Compile IR*/
229 mem_err = LLVMTargetMachineEmitToMemoryBuffer(tm, M, LLVMObjectFile, &err,
230 &out_buffer);
231
232 /* Process Errors/Warnings */
233 if (mem_err) {
234 fprintf(stderr, "%s: %s", __FUNCTION__, err);
235 pipe_debug_message(debug, SHADER_INFO,
236 "LLVM emit error: %s", err);
237 FREE(err);
238 diag.retval = 1;
239 goto out;
240 }
241
242 /* Extract Shader Code*/
243 buffer_size = LLVMGetBufferSize(out_buffer);
244 buffer_data = LLVMGetBufferStart(out_buffer);
245
246 ac_elf_read(buffer_data, buffer_size, binary);
247
248 /* Clean up */
249 LLVMDisposeMemoryBuffer(out_buffer);
250
251 out:
252 if (diag.retval != 0)
253 pipe_debug_message(debug, SHADER_INFO, "LLVM compile failed");
254 return diag.retval;
255 }
256
257 LLVMTypeRef tgsi2llvmtype(struct lp_build_tgsi_context *bld_base,
258 enum tgsi_opcode_type type)
259 {
260 LLVMContextRef ctx = bld_base->base.gallivm->context;
261
262 switch (type) {
263 case TGSI_TYPE_UNSIGNED:
264 case TGSI_TYPE_SIGNED:
265 return LLVMInt32TypeInContext(ctx);
266 case TGSI_TYPE_UNSIGNED64:
267 case TGSI_TYPE_SIGNED64:
268 return LLVMInt64TypeInContext(ctx);
269 case TGSI_TYPE_DOUBLE:
270 return LLVMDoubleTypeInContext(ctx);
271 case TGSI_TYPE_UNTYPED:
272 case TGSI_TYPE_FLOAT:
273 return LLVMFloatTypeInContext(ctx);
274 default: break;
275 }
276 return 0;
277 }
278
279 LLVMValueRef bitcast(struct lp_build_tgsi_context *bld_base,
280 enum tgsi_opcode_type type, LLVMValueRef value)
281 {
282 LLVMBuilderRef builder = bld_base->base.gallivm->builder;
283 LLVMTypeRef dst_type = tgsi2llvmtype(bld_base, type);
284
285 if (dst_type)
286 return LLVMBuildBitCast(builder, value, dst_type, "");
287 else
288 return value;
289 }
290
291 /**
292 * Return a value that is equal to the given i32 \p index if it lies in [0,num)
293 * or an undefined value in the same interval otherwise.
294 */
295 LLVMValueRef si_llvm_bound_index(struct si_shader_context *ctx,
296 LLVMValueRef index,
297 unsigned num)
298 {
299 struct gallivm_state *gallivm = &ctx->gallivm;
300 LLVMBuilderRef builder = gallivm->builder;
301 LLVMValueRef c_max = LLVMConstInt(ctx->i32, num - 1, 0);
302 LLVMValueRef cc;
303
304 if (util_is_power_of_two(num)) {
305 index = LLVMBuildAnd(builder, index, c_max, "");
306 } else {
307 /* In theory, this MAX pattern should result in code that is
308 * as good as the bit-wise AND above.
309 *
310 * In practice, LLVM generates worse code (at the time of
311 * writing), because its value tracking is not strong enough.
312 */
313 cc = LLVMBuildICmp(builder, LLVMIntULE, index, c_max, "");
314 index = LLVMBuildSelect(builder, cc, index, c_max, "");
315 }
316
317 return index;
318 }
319
320 static struct si_llvm_flow *
321 get_current_flow(struct si_shader_context *ctx)
322 {
323 if (ctx->flow_depth > 0)
324 return &ctx->flow[ctx->flow_depth - 1];
325 return NULL;
326 }
327
328 static struct si_llvm_flow *
329 get_innermost_loop(struct si_shader_context *ctx)
330 {
331 for (unsigned i = ctx->flow_depth; i > 0; --i) {
332 if (ctx->flow[i - 1].loop_entry_block)
333 return &ctx->flow[i - 1];
334 }
335 return NULL;
336 }
337
338 static struct si_llvm_flow *
339 push_flow(struct si_shader_context *ctx)
340 {
341 struct si_llvm_flow *flow;
342
343 if (ctx->flow_depth >= ctx->flow_depth_max) {
344 unsigned new_max = MAX2(ctx->flow_depth << 1, RADEON_LLVM_INITIAL_CF_DEPTH);
345 ctx->flow = REALLOC(ctx->flow,
346 ctx->flow_depth_max * sizeof(*ctx->flow),
347 new_max * sizeof(*ctx->flow));
348 ctx->flow_depth_max = new_max;
349 }
350
351 flow = &ctx->flow[ctx->flow_depth];
352 ctx->flow_depth++;
353
354 flow->next_block = NULL;
355 flow->loop_entry_block = NULL;
356 return flow;
357 }
358
359 static LLVMValueRef emit_swizzle(struct lp_build_tgsi_context *bld_base,
360 LLVMValueRef value,
361 unsigned swizzle_x,
362 unsigned swizzle_y,
363 unsigned swizzle_z,
364 unsigned swizzle_w)
365 {
366 LLVMValueRef swizzles[4];
367 LLVMTypeRef i32t =
368 LLVMInt32TypeInContext(bld_base->base.gallivm->context);
369
370 swizzles[0] = LLVMConstInt(i32t, swizzle_x, 0);
371 swizzles[1] = LLVMConstInt(i32t, swizzle_y, 0);
372 swizzles[2] = LLVMConstInt(i32t, swizzle_z, 0);
373 swizzles[3] = LLVMConstInt(i32t, swizzle_w, 0);
374
375 return LLVMBuildShuffleVector(bld_base->base.gallivm->builder,
376 value,
377 LLVMGetUndef(LLVMTypeOf(value)),
378 LLVMConstVector(swizzles, 4), "");
379 }
380
381 /**
382 * Return the description of the array covering the given temporary register
383 * index.
384 */
385 static unsigned
386 get_temp_array_id(struct lp_build_tgsi_context *bld_base,
387 unsigned reg_index,
388 const struct tgsi_ind_register *reg)
389 {
390 struct si_shader_context *ctx = si_shader_context(bld_base);
391 unsigned num_arrays = ctx->bld_base.info->array_max[TGSI_FILE_TEMPORARY];
392 unsigned i;
393
394 if (reg && reg->ArrayID > 0 && reg->ArrayID <= num_arrays)
395 return reg->ArrayID;
396
397 for (i = 0; i < num_arrays; i++) {
398 const struct tgsi_array_info *array = &ctx->temp_arrays[i];
399
400 if (reg_index >= array->range.First && reg_index <= array->range.Last)
401 return i + 1;
402 }
403
404 return 0;
405 }
406
407 static struct tgsi_declaration_range
408 get_array_range(struct lp_build_tgsi_context *bld_base,
409 unsigned File, unsigned reg_index,
410 const struct tgsi_ind_register *reg)
411 {
412 struct si_shader_context *ctx = si_shader_context(bld_base);
413 struct tgsi_declaration_range range;
414
415 if (File == TGSI_FILE_TEMPORARY) {
416 unsigned array_id = get_temp_array_id(bld_base, reg_index, reg);
417 if (array_id)
418 return ctx->temp_arrays[array_id - 1].range;
419 }
420
421 range.First = 0;
422 range.Last = bld_base->info->file_max[File];
423 return range;
424 }
425
426 static LLVMValueRef
427 emit_array_index(struct si_shader_context *ctx,
428 const struct tgsi_ind_register *reg,
429 unsigned offset)
430 {
431 struct gallivm_state *gallivm = &ctx->gallivm;
432
433 if (!reg) {
434 return LLVMConstInt(ctx->i32, offset, 0);
435 }
436 LLVMValueRef addr = LLVMBuildLoad(gallivm->builder, ctx->addrs[reg->Index][reg->Swizzle], "");
437 return LLVMBuildAdd(gallivm->builder, addr, LLVMConstInt(ctx->i32, offset, 0), "");
438 }
439
440 /**
441 * For indirect registers, construct a pointer directly to the requested
442 * element using getelementptr if possible.
443 *
444 * Returns NULL if the insertelement/extractelement fallback for array access
445 * must be used.
446 */
447 static LLVMValueRef
448 get_pointer_into_array(struct si_shader_context *ctx,
449 unsigned file,
450 unsigned swizzle,
451 unsigned reg_index,
452 const struct tgsi_ind_register *reg_indirect)
453 {
454 unsigned array_id;
455 struct tgsi_array_info *array;
456 struct gallivm_state *gallivm = &ctx->gallivm;
457 LLVMBuilderRef builder = gallivm->builder;
458 LLVMValueRef idxs[2];
459 LLVMValueRef index;
460 LLVMValueRef alloca;
461
462 if (file != TGSI_FILE_TEMPORARY)
463 return NULL;
464
465 array_id = get_temp_array_id(&ctx->bld_base, reg_index, reg_indirect);
466 if (!array_id)
467 return NULL;
468
469 alloca = ctx->temp_array_allocas[array_id - 1];
470 if (!alloca)
471 return NULL;
472
473 array = &ctx->temp_arrays[array_id - 1];
474
475 if (!(array->writemask & (1 << swizzle)))
476 return ctx->undef_alloca;
477
478 index = emit_array_index(ctx, reg_indirect,
479 reg_index - ctx->temp_arrays[array_id - 1].range.First);
480
481 /* Ensure that the index is within a valid range, to guard against
482 * VM faults and overwriting critical data (e.g. spilled resource
483 * descriptors).
484 *
485 * TODO It should be possible to avoid the additional instructions
486 * if LLVM is changed so that it guarantuees:
487 * 1. the scratch space descriptor isolates the current wave (this
488 * could even save the scratch offset SGPR at the cost of an
489 * additional SALU instruction)
490 * 2. the memory for allocas must be allocated at the _end_ of the
491 * scratch space (after spilled registers)
492 */
493 index = si_llvm_bound_index(ctx, index, array->range.Last - array->range.First + 1);
494
495 index = LLVMBuildMul(
496 builder, index,
497 LLVMConstInt(ctx->i32, util_bitcount(array->writemask), 0),
498 "");
499 index = LLVMBuildAdd(
500 builder, index,
501 LLVMConstInt(ctx->i32,
502 util_bitcount(array->writemask & ((1 << swizzle) - 1)), 0),
503 "");
504 idxs[0] = ctx->i32_0;
505 idxs[1] = index;
506 return LLVMBuildGEP(builder, alloca, idxs, 2, "");
507 }
508
509 LLVMValueRef
510 si_llvm_emit_fetch_64bit(struct lp_build_tgsi_context *bld_base,
511 enum tgsi_opcode_type type,
512 LLVMValueRef ptr,
513 LLVMValueRef ptr2)
514 {
515 LLVMBuilderRef builder = bld_base->base.gallivm->builder;
516 LLVMValueRef result;
517
518 result = LLVMGetUndef(LLVMVectorType(LLVMIntTypeInContext(bld_base->base.gallivm->context, 32), bld_base->base.type.length * 2));
519
520 result = LLVMBuildInsertElement(builder,
521 result,
522 bitcast(bld_base, TGSI_TYPE_UNSIGNED, ptr),
523 bld_base->int_bld.zero, "");
524 result = LLVMBuildInsertElement(builder,
525 result,
526 bitcast(bld_base, TGSI_TYPE_UNSIGNED, ptr2),
527 bld_base->int_bld.one, "");
528 return bitcast(bld_base, type, result);
529 }
530
531 static LLVMValueRef
532 emit_array_fetch(struct lp_build_tgsi_context *bld_base,
533 unsigned File, enum tgsi_opcode_type type,
534 struct tgsi_declaration_range range,
535 unsigned swizzle)
536 {
537 struct si_shader_context *ctx = si_shader_context(bld_base);
538
539 LLVMBuilderRef builder = ctx->gallivm.builder;
540
541 unsigned i, size = range.Last - range.First + 1;
542 LLVMTypeRef vec = LLVMVectorType(tgsi2llvmtype(bld_base, type), size);
543 LLVMValueRef result = LLVMGetUndef(vec);
544
545 struct tgsi_full_src_register tmp_reg = {};
546 tmp_reg.Register.File = File;
547
548 for (i = 0; i < size; ++i) {
549 tmp_reg.Register.Index = i + range.First;
550 LLVMValueRef temp = si_llvm_emit_fetch(bld_base, &tmp_reg, type, swizzle);
551 result = LLVMBuildInsertElement(builder, result, temp,
552 LLVMConstInt(ctx->i32, i, 0), "array_vector");
553 }
554 return result;
555 }
556
557 static LLVMValueRef
558 load_value_from_array(struct lp_build_tgsi_context *bld_base,
559 unsigned file,
560 enum tgsi_opcode_type type,
561 unsigned swizzle,
562 unsigned reg_index,
563 const struct tgsi_ind_register *reg_indirect)
564 {
565 struct si_shader_context *ctx = si_shader_context(bld_base);
566 struct gallivm_state *gallivm = &ctx->gallivm;
567 LLVMBuilderRef builder = gallivm->builder;
568 LLVMValueRef ptr;
569
570 ptr = get_pointer_into_array(ctx, file, swizzle, reg_index, reg_indirect);
571 if (ptr) {
572 LLVMValueRef val = LLVMBuildLoad(builder, ptr, "");
573 if (tgsi_type_is_64bit(type)) {
574 LLVMValueRef ptr_hi, val_hi;
575 ptr_hi = LLVMBuildGEP(builder, ptr, &ctx->i32_1, 1, "");
576 val_hi = LLVMBuildLoad(builder, ptr_hi, "");
577 val = si_llvm_emit_fetch_64bit(bld_base, type, val, val_hi);
578 }
579
580 return val;
581 } else {
582 struct tgsi_declaration_range range =
583 get_array_range(bld_base, file, reg_index, reg_indirect);
584 LLVMValueRef index =
585 emit_array_index(ctx, reg_indirect, reg_index - range.First);
586 LLVMValueRef array =
587 emit_array_fetch(bld_base, file, type, range, swizzle);
588 return LLVMBuildExtractElement(builder, array, index, "");
589 }
590 }
591
592 static void
593 store_value_to_array(struct lp_build_tgsi_context *bld_base,
594 LLVMValueRef value,
595 unsigned file,
596 unsigned chan_index,
597 unsigned reg_index,
598 const struct tgsi_ind_register *reg_indirect)
599 {
600 struct si_shader_context *ctx = si_shader_context(bld_base);
601 struct gallivm_state *gallivm = &ctx->gallivm;
602 LLVMBuilderRef builder = gallivm->builder;
603 LLVMValueRef ptr;
604
605 ptr = get_pointer_into_array(ctx, file, chan_index, reg_index, reg_indirect);
606 if (ptr) {
607 LLVMBuildStore(builder, value, ptr);
608 } else {
609 unsigned i, size;
610 struct tgsi_declaration_range range = get_array_range(bld_base, file, reg_index, reg_indirect);
611 LLVMValueRef index = emit_array_index(ctx, reg_indirect, reg_index - range.First);
612 LLVMValueRef array =
613 emit_array_fetch(bld_base, file, TGSI_TYPE_FLOAT, range, chan_index);
614 LLVMValueRef temp_ptr;
615
616 array = LLVMBuildInsertElement(builder, array, value, index, "");
617
618 size = range.Last - range.First + 1;
619 for (i = 0; i < size; ++i) {
620 switch(file) {
621 case TGSI_FILE_OUTPUT:
622 temp_ptr = ctx->outputs[i + range.First][chan_index];
623 break;
624
625 case TGSI_FILE_TEMPORARY:
626 if (range.First + i >= ctx->temps_count)
627 continue;
628 temp_ptr = ctx->temps[(i + range.First) * TGSI_NUM_CHANNELS + chan_index];
629 break;
630
631 default:
632 continue;
633 }
634 value = LLVMBuildExtractElement(builder, array,
635 LLVMConstInt(ctx->i32, i, 0), "");
636 LLVMBuildStore(builder, value, temp_ptr);
637 }
638 }
639 }
640
641 /* If this is true, preload FS inputs at the beginning of shaders. Otherwise,
642 * reload them at each use. This must be true if the shader is using
643 * derivatives and KILL, because KILL can leave the WQM and then a lazy
644 * input load isn't in the WQM anymore.
645 */
646 static bool si_preload_fs_inputs(struct si_shader_context *ctx)
647 {
648 struct si_shader_selector *sel = ctx->shader->selector;
649
650 return sel->info.uses_derivatives &&
651 sel->info.uses_kill;
652 }
653
654 static LLVMValueRef
655 get_output_ptr(struct lp_build_tgsi_context *bld_base, unsigned index,
656 unsigned chan)
657 {
658 struct si_shader_context *ctx = si_shader_context(bld_base);
659
660 assert(index <= ctx->bld_base.info->file_max[TGSI_FILE_OUTPUT]);
661 return ctx->outputs[index][chan];
662 }
663
664 LLVMValueRef si_llvm_emit_fetch(struct lp_build_tgsi_context *bld_base,
665 const struct tgsi_full_src_register *reg,
666 enum tgsi_opcode_type type,
667 unsigned swizzle)
668 {
669 struct si_shader_context *ctx = si_shader_context(bld_base);
670 LLVMBuilderRef builder = ctx->gallivm.builder;
671 LLVMValueRef result = NULL, ptr, ptr2;
672
673 if (swizzle == ~0) {
674 LLVMValueRef values[TGSI_NUM_CHANNELS];
675 unsigned chan;
676 for (chan = 0; chan < TGSI_NUM_CHANNELS; chan++) {
677 values[chan] = si_llvm_emit_fetch(bld_base, reg, type, chan);
678 }
679 return lp_build_gather_values(&ctx->gallivm, values,
680 TGSI_NUM_CHANNELS);
681 }
682
683 if (reg->Register.Indirect) {
684 LLVMValueRef load = load_value_from_array(bld_base, reg->Register.File, type,
685 swizzle, reg->Register.Index, &reg->Indirect);
686 return bitcast(bld_base, type, load);
687 }
688
689 switch(reg->Register.File) {
690 case TGSI_FILE_IMMEDIATE: {
691 LLVMTypeRef ctype = tgsi2llvmtype(bld_base, type);
692 if (tgsi_type_is_64bit(type)) {
693 result = LLVMGetUndef(LLVMVectorType(ctx->i32, bld_base->base.type.length * 2));
694 result = LLVMConstInsertElement(result,
695 ctx->imms[reg->Register.Index * TGSI_NUM_CHANNELS + swizzle],
696 ctx->i32_0);
697 result = LLVMConstInsertElement(result,
698 ctx->imms[reg->Register.Index * TGSI_NUM_CHANNELS + swizzle + 1],
699 ctx->i32_1);
700 return LLVMConstBitCast(result, ctype);
701 } else {
702 return LLVMConstBitCast(ctx->imms[reg->Register.Index * TGSI_NUM_CHANNELS + swizzle], ctype);
703 }
704 }
705
706 case TGSI_FILE_INPUT: {
707 unsigned index = reg->Register.Index;
708 LLVMValueRef input[4];
709
710 /* I don't think doing this for vertex shaders is beneficial.
711 * For those, we want to make sure the VMEM loads are executed
712 * only once. Fragment shaders don't care much, because
713 * v_interp instructions are much cheaper than VMEM loads.
714 */
715 if (!si_preload_fs_inputs(ctx) &&
716 ctx->bld_base.info->processor == PIPE_SHADER_FRAGMENT)
717 ctx->load_input(ctx, index, &ctx->input_decls[index], input);
718 else
719 memcpy(input, &ctx->inputs[index * 4], sizeof(input));
720
721 result = input[swizzle];
722
723 if (tgsi_type_is_64bit(type)) {
724 ptr = result;
725 ptr2 = input[swizzle + 1];
726 return si_llvm_emit_fetch_64bit(bld_base, type, ptr, ptr2);
727 }
728 break;
729 }
730
731 case TGSI_FILE_TEMPORARY:
732 if (reg->Register.Index >= ctx->temps_count)
733 return LLVMGetUndef(tgsi2llvmtype(bld_base, type));
734 ptr = ctx->temps[reg->Register.Index * TGSI_NUM_CHANNELS + swizzle];
735 if (tgsi_type_is_64bit(type)) {
736 ptr2 = ctx->temps[reg->Register.Index * TGSI_NUM_CHANNELS + swizzle + 1];
737 return si_llvm_emit_fetch_64bit(bld_base, type,
738 LLVMBuildLoad(builder, ptr, ""),
739 LLVMBuildLoad(builder, ptr2, ""));
740 }
741 result = LLVMBuildLoad(builder, ptr, "");
742 break;
743
744 case TGSI_FILE_OUTPUT:
745 ptr = get_output_ptr(bld_base, reg->Register.Index, swizzle);
746 if (tgsi_type_is_64bit(type)) {
747 ptr2 = get_output_ptr(bld_base, reg->Register.Index, swizzle + 1);
748 return si_llvm_emit_fetch_64bit(bld_base, type,
749 LLVMBuildLoad(builder, ptr, ""),
750 LLVMBuildLoad(builder, ptr2, ""));
751 }
752 result = LLVMBuildLoad(builder, ptr, "");
753 break;
754
755 default:
756 return LLVMGetUndef(tgsi2llvmtype(bld_base, type));
757 }
758
759 return bitcast(bld_base, type, result);
760 }
761
762 static LLVMValueRef fetch_system_value(struct lp_build_tgsi_context *bld_base,
763 const struct tgsi_full_src_register *reg,
764 enum tgsi_opcode_type type,
765 unsigned swizzle)
766 {
767 struct si_shader_context *ctx = si_shader_context(bld_base);
768 LLVMBuilderRef builder = ctx->gallivm.builder;
769 LLVMValueRef cval = ctx->system_values[reg->Register.Index];
770
771 if (tgsi_type_is_64bit(type)) {
772 LLVMValueRef lo, hi;
773
774 assert(swizzle == 0 || swizzle == 2);
775
776 lo = LLVMBuildExtractElement(
777 builder, cval, LLVMConstInt(ctx->i32, swizzle, 0), "");
778 hi = LLVMBuildExtractElement(
779 builder, cval, LLVMConstInt(ctx->i32, swizzle + 1, 0), "");
780
781 return si_llvm_emit_fetch_64bit(bld_base, type, lo, hi);
782 }
783
784 if (LLVMGetTypeKind(LLVMTypeOf(cval)) == LLVMVectorTypeKind) {
785 cval = LLVMBuildExtractElement(
786 builder, cval, LLVMConstInt(ctx->i32, swizzle, 0), "");
787 } else {
788 assert(swizzle == 0);
789 }
790
791 return bitcast(bld_base, type, cval);
792 }
793
794 static void emit_declaration(struct lp_build_tgsi_context *bld_base,
795 const struct tgsi_full_declaration *decl)
796 {
797 struct si_shader_context *ctx = si_shader_context(bld_base);
798 LLVMBuilderRef builder = ctx->gallivm.builder;
799 unsigned first, last, i;
800 switch(decl->Declaration.File) {
801 case TGSI_FILE_ADDRESS:
802 {
803 unsigned idx;
804 for (idx = decl->Range.First; idx <= decl->Range.Last; idx++) {
805 unsigned chan;
806 for (chan = 0; chan < TGSI_NUM_CHANNELS; chan++) {
807 ctx->addrs[idx][chan] = lp_build_alloca_undef(
808 &ctx->gallivm,
809 ctx->i32, "");
810 }
811 }
812 break;
813 }
814
815 case TGSI_FILE_TEMPORARY:
816 {
817 char name[16] = "";
818 LLVMValueRef array_alloca = NULL;
819 unsigned decl_size;
820 unsigned writemask = decl->Declaration.UsageMask;
821 first = decl->Range.First;
822 last = decl->Range.Last;
823 decl_size = 4 * ((last - first) + 1);
824
825 if (decl->Declaration.Array) {
826 unsigned id = decl->Array.ArrayID - 1;
827 unsigned array_size;
828
829 writemask &= ctx->temp_arrays[id].writemask;
830 ctx->temp_arrays[id].writemask = writemask;
831 array_size = ((last - first) + 1) * util_bitcount(writemask);
832
833 /* If the array has more than 16 elements, store it
834 * in memory using an alloca that spans the entire
835 * array.
836 *
837 * Otherwise, store each array element individually.
838 * We will then generate vectors (per-channel, up to
839 * <16 x float> if the usagemask is a single bit) for
840 * indirect addressing.
841 *
842 * Note that 16 is the number of vector elements that
843 * LLVM will store in a register, so theoretically an
844 * array with up to 4 * 16 = 64 elements could be
845 * handled this way, but whether that's a good idea
846 * depends on VGPR register pressure elsewhere.
847 *
848 * FIXME: We shouldn't need to have the non-alloca
849 * code path for arrays. LLVM should be smart enough to
850 * promote allocas into registers when profitable.
851 *
852 * LLVM 3.8 crashes with this.
853 */
854 if ((HAVE_LLVM >= 0x0309 && array_size > 16) ||
855 /* TODO: VGPR indexing is buggy on GFX9. */
856 ctx->screen->b.chip_class == GFX9) {
857 array_alloca = LLVMBuildAlloca(builder,
858 LLVMArrayType(ctx->f32,
859 array_size), "array");
860 ctx->temp_array_allocas[id] = array_alloca;
861 }
862 }
863
864 if (!ctx->temps_count) {
865 ctx->temps_count = bld_base->info->file_max[TGSI_FILE_TEMPORARY] + 1;
866 ctx->temps = MALLOC(TGSI_NUM_CHANNELS * ctx->temps_count * sizeof(LLVMValueRef));
867 }
868 if (!array_alloca) {
869 for (i = 0; i < decl_size; ++i) {
870 #ifdef DEBUG
871 snprintf(name, sizeof(name), "TEMP%d.%c",
872 first + i / 4, "xyzw"[i % 4]);
873 #endif
874 ctx->temps[first * TGSI_NUM_CHANNELS + i] =
875 lp_build_alloca_undef(&ctx->gallivm,
876 ctx->f32,
877 name);
878 }
879 } else {
880 LLVMValueRef idxs[2] = {
881 ctx->i32_0,
882 NULL
883 };
884 unsigned j = 0;
885
886 if (writemask != TGSI_WRITEMASK_XYZW &&
887 !ctx->undef_alloca) {
888 /* Create a dummy alloca. We use it so that we
889 * have a pointer that is safe to load from if
890 * a shader ever reads from a channel that
891 * it never writes to.
892 */
893 ctx->undef_alloca = lp_build_alloca_undef(
894 &ctx->gallivm,
895 ctx->f32, "undef");
896 }
897
898 for (i = 0; i < decl_size; ++i) {
899 LLVMValueRef ptr;
900 if (writemask & (1 << (i % 4))) {
901 #ifdef DEBUG
902 snprintf(name, sizeof(name), "TEMP%d.%c",
903 first + i / 4, "xyzw"[i % 4]);
904 #endif
905 idxs[1] = LLVMConstInt(ctx->i32, j, 0);
906 ptr = LLVMBuildGEP(builder, array_alloca, idxs, 2, name);
907 j++;
908 } else {
909 ptr = ctx->undef_alloca;
910 }
911 ctx->temps[first * TGSI_NUM_CHANNELS + i] = ptr;
912 }
913 }
914 break;
915 }
916 case TGSI_FILE_INPUT:
917 {
918 unsigned idx;
919 for (idx = decl->Range.First; idx <= decl->Range.Last; idx++) {
920 if (ctx->load_input &&
921 ctx->input_decls[idx].Declaration.File != TGSI_FILE_INPUT) {
922 ctx->input_decls[idx] = *decl;
923 ctx->input_decls[idx].Range.First = idx;
924 ctx->input_decls[idx].Range.Last = idx;
925 ctx->input_decls[idx].Semantic.Index += idx - decl->Range.First;
926
927 if (si_preload_fs_inputs(ctx) ||
928 bld_base->info->processor != PIPE_SHADER_FRAGMENT)
929 ctx->load_input(ctx, idx, &ctx->input_decls[idx],
930 &ctx->inputs[idx * 4]);
931 }
932 }
933 }
934 break;
935
936 case TGSI_FILE_SYSTEM_VALUE:
937 {
938 unsigned idx;
939 for (idx = decl->Range.First; idx <= decl->Range.Last; idx++) {
940 ctx->load_system_value(ctx, idx, decl);
941 }
942 }
943 break;
944
945 case TGSI_FILE_OUTPUT:
946 {
947 char name[16] = "";
948 unsigned idx;
949 for (idx = decl->Range.First; idx <= decl->Range.Last; idx++) {
950 unsigned chan;
951 assert(idx < RADEON_LLVM_MAX_OUTPUTS);
952 if (ctx->outputs[idx][0])
953 continue;
954 for (chan = 0; chan < TGSI_NUM_CHANNELS; chan++) {
955 #ifdef DEBUG
956 snprintf(name, sizeof(name), "OUT%d.%c",
957 idx, "xyzw"[chan % 4]);
958 #endif
959 ctx->outputs[idx][chan] = lp_build_alloca_undef(
960 &ctx->gallivm,
961 ctx->f32, name);
962 }
963 }
964 break;
965 }
966
967 case TGSI_FILE_MEMORY:
968 ctx->declare_memory_region(ctx, decl);
969 break;
970
971 default:
972 break;
973 }
974 }
975
976 void si_llvm_emit_store(struct lp_build_tgsi_context *bld_base,
977 const struct tgsi_full_instruction *inst,
978 const struct tgsi_opcode_info *info,
979 LLVMValueRef dst[4])
980 {
981 struct si_shader_context *ctx = si_shader_context(bld_base);
982 struct gallivm_state *gallivm = &ctx->gallivm;
983 const struct tgsi_full_dst_register *reg = &inst->Dst[0];
984 LLVMBuilderRef builder = ctx->gallivm.builder;
985 LLVMValueRef temp_ptr, temp_ptr2 = NULL;
986 unsigned chan, chan_index;
987 bool is_vec_store = false;
988 enum tgsi_opcode_type dtype = tgsi_opcode_infer_dst_type(inst->Instruction.Opcode);
989
990 if (dst[0]) {
991 LLVMTypeKind k = LLVMGetTypeKind(LLVMTypeOf(dst[0]));
992 is_vec_store = (k == LLVMVectorTypeKind);
993 }
994
995 if (is_vec_store) {
996 LLVMValueRef values[4] = {};
997 TGSI_FOR_EACH_DST0_ENABLED_CHANNEL(inst, chan) {
998 LLVMValueRef index = LLVMConstInt(ctx->i32, chan, 0);
999 values[chan] = LLVMBuildExtractElement(gallivm->builder,
1000 dst[0], index, "");
1001 }
1002 bld_base->emit_store(bld_base, inst, info, values);
1003 return;
1004 }
1005
1006 TGSI_FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
1007 LLVMValueRef value = dst[chan_index];
1008
1009 if (tgsi_type_is_64bit(dtype) && (chan_index == 1 || chan_index == 3))
1010 continue;
1011 if (inst->Instruction.Saturate)
1012 value = ac_build_clamp(&ctx->ac, value);
1013
1014 if (reg->Register.File == TGSI_FILE_ADDRESS) {
1015 temp_ptr = ctx->addrs[reg->Register.Index][chan_index];
1016 LLVMBuildStore(builder, value, temp_ptr);
1017 continue;
1018 }
1019
1020 if (!tgsi_type_is_64bit(dtype))
1021 value = bitcast(bld_base, TGSI_TYPE_FLOAT, value);
1022
1023 if (reg->Register.Indirect) {
1024 unsigned file = reg->Register.File;
1025 unsigned reg_index = reg->Register.Index;
1026 store_value_to_array(bld_base, value, file, chan_index,
1027 reg_index, &reg->Indirect);
1028 } else {
1029 switch(reg->Register.File) {
1030 case TGSI_FILE_OUTPUT:
1031 temp_ptr = ctx->outputs[reg->Register.Index][chan_index];
1032 if (tgsi_type_is_64bit(dtype))
1033 temp_ptr2 = ctx->outputs[reg->Register.Index][chan_index + 1];
1034 break;
1035
1036 case TGSI_FILE_TEMPORARY:
1037 {
1038 if (reg->Register.Index >= ctx->temps_count)
1039 continue;
1040
1041 temp_ptr = ctx->temps[ TGSI_NUM_CHANNELS * reg->Register.Index + chan_index];
1042 if (tgsi_type_is_64bit(dtype))
1043 temp_ptr2 = ctx->temps[ TGSI_NUM_CHANNELS * reg->Register.Index + chan_index + 1];
1044
1045 break;
1046 }
1047 default:
1048 return;
1049 }
1050 if (!tgsi_type_is_64bit(dtype))
1051 LLVMBuildStore(builder, value, temp_ptr);
1052 else {
1053 LLVMValueRef ptr = LLVMBuildBitCast(builder, value,
1054 LLVMVectorType(ctx->i32, 2), "");
1055 LLVMValueRef val2;
1056 value = LLVMBuildExtractElement(builder, ptr,
1057 ctx->i32_0, "");
1058 val2 = LLVMBuildExtractElement(builder, ptr,
1059 ctx->i32_1, "");
1060
1061 LLVMBuildStore(builder, bitcast(bld_base, TGSI_TYPE_FLOAT, value), temp_ptr);
1062 LLVMBuildStore(builder, bitcast(bld_base, TGSI_TYPE_FLOAT, val2), temp_ptr2);
1063 }
1064 }
1065 }
1066 }
1067
1068 static void set_basicblock_name(LLVMBasicBlockRef bb, const char *base, int pc)
1069 {
1070 char buf[32];
1071 /* Subtract 1 so that the number shown is that of the corresponding
1072 * opcode in the TGSI dump, e.g. an if block has the same suffix as
1073 * the instruction number of the corresponding TGSI IF.
1074 */
1075 snprintf(buf, sizeof(buf), "%s%d", base, pc - 1);
1076 LLVMSetValueName(LLVMBasicBlockAsValue(bb), buf);
1077 }
1078
1079 /* Append a basic block at the level of the parent flow.
1080 */
1081 static LLVMBasicBlockRef append_basic_block(struct si_shader_context *ctx,
1082 const char *name)
1083 {
1084 struct gallivm_state *gallivm = &ctx->gallivm;
1085
1086 assert(ctx->flow_depth >= 1);
1087
1088 if (ctx->flow_depth >= 2) {
1089 struct si_llvm_flow *flow = &ctx->flow[ctx->flow_depth - 2];
1090
1091 return LLVMInsertBasicBlockInContext(gallivm->context,
1092 flow->next_block, name);
1093 }
1094
1095 return LLVMAppendBasicBlockInContext(gallivm->context, ctx->main_fn, name);
1096 }
1097
1098 /* Emit a branch to the given default target for the current block if
1099 * applicable -- that is, if the current block does not already contain a
1100 * branch from a break or continue.
1101 */
1102 static void emit_default_branch(LLVMBuilderRef builder, LLVMBasicBlockRef target)
1103 {
1104 if (!LLVMGetBasicBlockTerminator(LLVMGetInsertBlock(builder)))
1105 LLVMBuildBr(builder, target);
1106 }
1107
1108 static void bgnloop_emit(const struct lp_build_tgsi_action *action,
1109 struct lp_build_tgsi_context *bld_base,
1110 struct lp_build_emit_data *emit_data)
1111 {
1112 struct si_shader_context *ctx = si_shader_context(bld_base);
1113 struct gallivm_state *gallivm = &ctx->gallivm;
1114 struct si_llvm_flow *flow = push_flow(ctx);
1115 flow->loop_entry_block = append_basic_block(ctx, "LOOP");
1116 flow->next_block = append_basic_block(ctx, "ENDLOOP");
1117 set_basicblock_name(flow->loop_entry_block, "loop", bld_base->pc);
1118 LLVMBuildBr(gallivm->builder, flow->loop_entry_block);
1119 LLVMPositionBuilderAtEnd(gallivm->builder, flow->loop_entry_block);
1120 }
1121
1122 static void brk_emit(const struct lp_build_tgsi_action *action,
1123 struct lp_build_tgsi_context *bld_base,
1124 struct lp_build_emit_data *emit_data)
1125 {
1126 struct si_shader_context *ctx = si_shader_context(bld_base);
1127 struct gallivm_state *gallivm = &ctx->gallivm;
1128 struct si_llvm_flow *flow = get_innermost_loop(ctx);
1129
1130 LLVMBuildBr(gallivm->builder, flow->next_block);
1131 }
1132
1133 static void cont_emit(const struct lp_build_tgsi_action *action,
1134 struct lp_build_tgsi_context *bld_base,
1135 struct lp_build_emit_data *emit_data)
1136 {
1137 struct si_shader_context *ctx = si_shader_context(bld_base);
1138 struct gallivm_state *gallivm = &ctx->gallivm;
1139 struct si_llvm_flow *flow = get_innermost_loop(ctx);
1140
1141 LLVMBuildBr(gallivm->builder, flow->loop_entry_block);
1142 }
1143
1144 static void else_emit(const struct lp_build_tgsi_action *action,
1145 struct lp_build_tgsi_context *bld_base,
1146 struct lp_build_emit_data *emit_data)
1147 {
1148 struct si_shader_context *ctx = si_shader_context(bld_base);
1149 struct gallivm_state *gallivm = &ctx->gallivm;
1150 struct si_llvm_flow *current_branch = get_current_flow(ctx);
1151 LLVMBasicBlockRef endif_block;
1152
1153 assert(!current_branch->loop_entry_block);
1154
1155 endif_block = append_basic_block(ctx, "ENDIF");
1156 emit_default_branch(gallivm->builder, endif_block);
1157
1158 LLVMPositionBuilderAtEnd(gallivm->builder, current_branch->next_block);
1159 set_basicblock_name(current_branch->next_block, "else", bld_base->pc);
1160
1161 current_branch->next_block = endif_block;
1162 }
1163
1164 static void endif_emit(const struct lp_build_tgsi_action *action,
1165 struct lp_build_tgsi_context *bld_base,
1166 struct lp_build_emit_data *emit_data)
1167 {
1168 struct si_shader_context *ctx = si_shader_context(bld_base);
1169 struct gallivm_state *gallivm = &ctx->gallivm;
1170 struct si_llvm_flow *current_branch = get_current_flow(ctx);
1171
1172 assert(!current_branch->loop_entry_block);
1173
1174 emit_default_branch(gallivm->builder, current_branch->next_block);
1175 LLVMPositionBuilderAtEnd(gallivm->builder, current_branch->next_block);
1176 set_basicblock_name(current_branch->next_block, "endif", bld_base->pc);
1177
1178 ctx->flow_depth--;
1179 }
1180
1181 static void endloop_emit(const struct lp_build_tgsi_action *action,
1182 struct lp_build_tgsi_context *bld_base,
1183 struct lp_build_emit_data *emit_data)
1184 {
1185 struct si_shader_context *ctx = si_shader_context(bld_base);
1186 struct gallivm_state *gallivm = &ctx->gallivm;
1187 struct si_llvm_flow *current_loop = get_current_flow(ctx);
1188
1189 assert(current_loop->loop_entry_block);
1190
1191 emit_default_branch(gallivm->builder, current_loop->loop_entry_block);
1192
1193 LLVMPositionBuilderAtEnd(gallivm->builder, current_loop->next_block);
1194 set_basicblock_name(current_loop->next_block, "endloop", bld_base->pc);
1195 ctx->flow_depth--;
1196 }
1197
1198 static void if_cond_emit(const struct lp_build_tgsi_action *action,
1199 struct lp_build_tgsi_context *bld_base,
1200 struct lp_build_emit_data *emit_data,
1201 LLVMValueRef cond)
1202 {
1203 struct si_shader_context *ctx = si_shader_context(bld_base);
1204 struct gallivm_state *gallivm = &ctx->gallivm;
1205 struct si_llvm_flow *flow = push_flow(ctx);
1206 LLVMBasicBlockRef if_block;
1207
1208 if_block = append_basic_block(ctx, "IF");
1209 flow->next_block = append_basic_block(ctx, "ELSE");
1210 set_basicblock_name(if_block, "if", bld_base->pc);
1211 LLVMBuildCondBr(gallivm->builder, cond, if_block, flow->next_block);
1212 LLVMPositionBuilderAtEnd(gallivm->builder, if_block);
1213 }
1214
1215 static void if_emit(const struct lp_build_tgsi_action *action,
1216 struct lp_build_tgsi_context *bld_base,
1217 struct lp_build_emit_data *emit_data)
1218 {
1219 struct gallivm_state *gallivm = bld_base->base.gallivm;
1220 LLVMValueRef cond;
1221
1222 cond = LLVMBuildFCmp(gallivm->builder, LLVMRealUNE,
1223 emit_data->args[0],
1224 bld_base->base.zero, "");
1225
1226 if_cond_emit(action, bld_base, emit_data, cond);
1227 }
1228
1229 static void uif_emit(const struct lp_build_tgsi_action *action,
1230 struct lp_build_tgsi_context *bld_base,
1231 struct lp_build_emit_data *emit_data)
1232 {
1233 struct gallivm_state *gallivm = bld_base->base.gallivm;
1234 LLVMValueRef cond;
1235
1236 cond = LLVMBuildICmp(gallivm->builder, LLVMIntNE,
1237 bitcast(bld_base, TGSI_TYPE_UNSIGNED, emit_data->args[0]),
1238 bld_base->int_bld.zero, "");
1239
1240 if_cond_emit(action, bld_base, emit_data, cond);
1241 }
1242
1243 static void emit_immediate(struct lp_build_tgsi_context *bld_base,
1244 const struct tgsi_full_immediate *imm)
1245 {
1246 unsigned i;
1247 struct si_shader_context *ctx = si_shader_context(bld_base);
1248
1249 for (i = 0; i < 4; ++i) {
1250 ctx->imms[ctx->imms_num * TGSI_NUM_CHANNELS + i] =
1251 LLVMConstInt(ctx->i32, imm->u[i].Uint, false );
1252 }
1253
1254 ctx->imms_num++;
1255 }
1256
1257 void si_llvm_context_init(struct si_shader_context *ctx,
1258 struct si_screen *sscreen,
1259 LLVMTargetMachineRef tm)
1260 {
1261 struct lp_type type;
1262
1263 /* Initialize the gallivm object:
1264 * We are only using the module, context, and builder fields of this struct.
1265 * This should be enough for us to be able to pass our gallivm struct to the
1266 * helper functions in the gallivm module.
1267 */
1268 memset(ctx, 0, sizeof(*ctx));
1269 ctx->screen = sscreen;
1270 ctx->tm = tm;
1271
1272 ctx->gallivm.context = LLVMContextCreate();
1273 ctx->gallivm.module = LLVMModuleCreateWithNameInContext("tgsi",
1274 ctx->gallivm.context);
1275 LLVMSetTarget(ctx->gallivm.module, "amdgcn--");
1276
1277 #if HAVE_LLVM >= 0x0309
1278 LLVMTargetDataRef data_layout = LLVMCreateTargetDataLayout(tm);
1279 char *data_layout_str = LLVMCopyStringRepOfTargetData(data_layout);
1280 LLVMSetDataLayout(ctx->gallivm.module, data_layout_str);
1281 LLVMDisposeTargetData(data_layout);
1282 LLVMDisposeMessage(data_layout_str);
1283 #endif
1284
1285 bool unsafe_fpmath = (sscreen->b.debug_flags & DBG_UNSAFE_MATH) != 0;
1286 enum lp_float_mode float_mode =
1287 unsafe_fpmath ? LP_FLOAT_MODE_UNSAFE_FP_MATH :
1288 LP_FLOAT_MODE_NO_SIGNED_ZEROS_FP_MATH;
1289
1290 ctx->gallivm.builder = lp_create_builder(ctx->gallivm.context,
1291 float_mode);
1292
1293 ac_llvm_context_init(&ctx->ac, ctx->gallivm.context);
1294 ctx->ac.module = ctx->gallivm.module;
1295 ctx->ac.builder = ctx->gallivm.builder;
1296
1297 struct lp_build_tgsi_context *bld_base = &ctx->bld_base;
1298
1299 type.floating = true;
1300 type.fixed = false;
1301 type.sign = true;
1302 type.norm = false;
1303 type.width = 32;
1304 type.length = 1;
1305
1306 lp_build_context_init(&bld_base->base, &ctx->gallivm, type);
1307 lp_build_context_init(&ctx->bld_base.uint_bld, &ctx->gallivm, lp_uint_type(type));
1308 lp_build_context_init(&ctx->bld_base.int_bld, &ctx->gallivm, lp_int_type(type));
1309 type.width *= 2;
1310 lp_build_context_init(&ctx->bld_base.dbl_bld, &ctx->gallivm, type);
1311 lp_build_context_init(&ctx->bld_base.uint64_bld, &ctx->gallivm, lp_uint_type(type));
1312 lp_build_context_init(&ctx->bld_base.int64_bld, &ctx->gallivm, lp_int_type(type));
1313
1314 bld_base->soa = 1;
1315 bld_base->emit_swizzle = emit_swizzle;
1316 bld_base->emit_declaration = emit_declaration;
1317 bld_base->emit_immediate = emit_immediate;
1318
1319 /* metadata allowing 2.5 ULP */
1320 ctx->fpmath_md_kind = LLVMGetMDKindIDInContext(ctx->gallivm.context,
1321 "fpmath", 6);
1322 LLVMValueRef arg = lp_build_const_float(&ctx->gallivm, 2.5);
1323 ctx->fpmath_md_2p5_ulp = LLVMMDNodeInContext(ctx->gallivm.context,
1324 &arg, 1);
1325
1326 bld_base->op_actions[TGSI_OPCODE_BGNLOOP].emit = bgnloop_emit;
1327 bld_base->op_actions[TGSI_OPCODE_BRK].emit = brk_emit;
1328 bld_base->op_actions[TGSI_OPCODE_CONT].emit = cont_emit;
1329 bld_base->op_actions[TGSI_OPCODE_IF].emit = if_emit;
1330 bld_base->op_actions[TGSI_OPCODE_UIF].emit = uif_emit;
1331 bld_base->op_actions[TGSI_OPCODE_ELSE].emit = else_emit;
1332 bld_base->op_actions[TGSI_OPCODE_ENDIF].emit = endif_emit;
1333 bld_base->op_actions[TGSI_OPCODE_ENDLOOP].emit = endloop_emit;
1334
1335 si_shader_context_init_alu(&ctx->bld_base);
1336
1337 ctx->voidt = LLVMVoidTypeInContext(ctx->gallivm.context);
1338 ctx->i1 = LLVMInt1TypeInContext(ctx->gallivm.context);
1339 ctx->i8 = LLVMInt8TypeInContext(ctx->gallivm.context);
1340 ctx->i32 = LLVMInt32TypeInContext(ctx->gallivm.context);
1341 ctx->i64 = LLVMInt64TypeInContext(ctx->gallivm.context);
1342 ctx->i128 = LLVMIntTypeInContext(ctx->gallivm.context, 128);
1343 ctx->f32 = LLVMFloatTypeInContext(ctx->gallivm.context);
1344 ctx->v16i8 = LLVMVectorType(ctx->i8, 16);
1345 ctx->v2i32 = LLVMVectorType(ctx->i32, 2);
1346 ctx->v4i32 = LLVMVectorType(ctx->i32, 4);
1347 ctx->v4f32 = LLVMVectorType(ctx->f32, 4);
1348 ctx->v8i32 = LLVMVectorType(ctx->i32, 8);
1349
1350 ctx->i32_0 = LLVMConstInt(ctx->i32, 0, 0);
1351 ctx->i32_1 = LLVMConstInt(ctx->i32, 1, 0);
1352 }
1353
1354 /* Set the context to a certain TGSI shader. Can be called repeatedly
1355 * to change the shader. */
1356 void si_llvm_context_set_tgsi(struct si_shader_context *ctx,
1357 struct si_shader *shader)
1358 {
1359 const struct tgsi_shader_info *info = NULL;
1360 const struct tgsi_token *tokens = NULL;
1361
1362 if (shader && shader->selector) {
1363 info = &shader->selector->info;
1364 tokens = shader->selector->tokens;
1365 }
1366
1367 ctx->shader = shader;
1368 ctx->type = info ? info->processor : -1;
1369 ctx->bld_base.info = info;
1370
1371 /* Clean up the old contents. */
1372 FREE(ctx->temp_arrays);
1373 ctx->temp_arrays = NULL;
1374 FREE(ctx->temp_array_allocas);
1375 ctx->temp_array_allocas = NULL;
1376
1377 FREE(ctx->imms);
1378 ctx->imms = NULL;
1379 ctx->imms_num = 0;
1380
1381 FREE(ctx->temps);
1382 ctx->temps = NULL;
1383 ctx->temps_count = 0;
1384
1385 if (!info || !tokens)
1386 return;
1387
1388 if (info->array_max[TGSI_FILE_TEMPORARY] > 0) {
1389 int size = info->array_max[TGSI_FILE_TEMPORARY];
1390
1391 ctx->temp_arrays = CALLOC(size, sizeof(ctx->temp_arrays[0]));
1392 ctx->temp_array_allocas = CALLOC(size, sizeof(ctx->temp_array_allocas[0]));
1393
1394 tgsi_scan_arrays(tokens, TGSI_FILE_TEMPORARY, size,
1395 ctx->temp_arrays);
1396 }
1397 if (info->file_max[TGSI_FILE_IMMEDIATE] >= 0) {
1398 int size = info->file_max[TGSI_FILE_IMMEDIATE] + 1;
1399 ctx->imms = MALLOC(size * TGSI_NUM_CHANNELS * sizeof(LLVMValueRef));
1400 }
1401
1402 /* Re-set these to start with a clean slate. */
1403 ctx->bld_base.num_instructions = 0;
1404 ctx->bld_base.pc = 0;
1405 memset(ctx->outputs, 0, sizeof(ctx->outputs));
1406
1407 ctx->bld_base.emit_store = si_llvm_emit_store;
1408 ctx->bld_base.emit_fetch_funcs[TGSI_FILE_IMMEDIATE] = si_llvm_emit_fetch;
1409 ctx->bld_base.emit_fetch_funcs[TGSI_FILE_INPUT] = si_llvm_emit_fetch;
1410 ctx->bld_base.emit_fetch_funcs[TGSI_FILE_TEMPORARY] = si_llvm_emit_fetch;
1411 ctx->bld_base.emit_fetch_funcs[TGSI_FILE_OUTPUT] = si_llvm_emit_fetch;
1412 ctx->bld_base.emit_fetch_funcs[TGSI_FILE_SYSTEM_VALUE] = fetch_system_value;
1413 }
1414
1415 void si_llvm_create_func(struct si_shader_context *ctx,
1416 const char *name,
1417 LLVMTypeRef *return_types, unsigned num_return_elems,
1418 LLVMTypeRef *ParamTypes, unsigned ParamCount)
1419 {
1420 LLVMTypeRef main_fn_type, ret_type;
1421 LLVMBasicBlockRef main_fn_body;
1422
1423 if (num_return_elems)
1424 ret_type = LLVMStructTypeInContext(ctx->gallivm.context,
1425 return_types,
1426 num_return_elems, true);
1427 else
1428 ret_type = LLVMVoidTypeInContext(ctx->gallivm.context);
1429
1430 /* Setup the function */
1431 ctx->return_type = ret_type;
1432 main_fn_type = LLVMFunctionType(ret_type, ParamTypes, ParamCount, 0);
1433 ctx->main_fn = LLVMAddFunction(ctx->gallivm.module, name, main_fn_type);
1434 main_fn_body = LLVMAppendBasicBlockInContext(ctx->gallivm.context,
1435 ctx->main_fn, "main_body");
1436 LLVMPositionBuilderAtEnd(ctx->gallivm.builder, main_fn_body);
1437 }
1438
1439 void si_llvm_finalize_module(struct si_shader_context *ctx,
1440 bool run_verifier)
1441 {
1442 struct gallivm_state *gallivm = &ctx->gallivm;
1443 const char *triple = LLVMGetTarget(gallivm->module);
1444 LLVMTargetLibraryInfoRef target_library_info;
1445
1446 /* Create the pass manager */
1447 gallivm->passmgr = LLVMCreatePassManager();
1448
1449 target_library_info = gallivm_create_target_library_info(triple);
1450 LLVMAddTargetLibraryInfo(target_library_info, gallivm->passmgr);
1451
1452 if (run_verifier)
1453 LLVMAddVerifierPass(gallivm->passmgr);
1454
1455 LLVMAddAlwaysInlinerPass(gallivm->passmgr);
1456
1457 /* This pass should eliminate all the load and store instructions */
1458 LLVMAddPromoteMemoryToRegisterPass(gallivm->passmgr);
1459
1460 /* Add some optimization passes */
1461 LLVMAddScalarReplAggregatesPass(gallivm->passmgr);
1462 LLVMAddLICMPass(gallivm->passmgr);
1463 LLVMAddAggressiveDCEPass(gallivm->passmgr);
1464 LLVMAddCFGSimplificationPass(gallivm->passmgr);
1465 LLVMAddInstructionCombiningPass(gallivm->passmgr);
1466
1467 /* Run the pass */
1468 LLVMRunPassManager(gallivm->passmgr, ctx->gallivm.module);
1469
1470 LLVMDisposeBuilder(gallivm->builder);
1471 LLVMDisposePassManager(gallivm->passmgr);
1472 gallivm_dispose_target_library_info(target_library_info);
1473 }
1474
1475 void si_llvm_dispose(struct si_shader_context *ctx)
1476 {
1477 LLVMDisposeModule(ctx->gallivm.module);
1478 LLVMContextDispose(ctx->gallivm.context);
1479 FREE(ctx->temp_arrays);
1480 ctx->temp_arrays = NULL;
1481 FREE(ctx->temp_array_allocas);
1482 ctx->temp_array_allocas = NULL;
1483 FREE(ctx->temps);
1484 ctx->temps = NULL;
1485 ctx->temps_count = 0;
1486 FREE(ctx->imms);
1487 ctx->imms = NULL;
1488 ctx->imms_num = 0;
1489 FREE(ctx->flow);
1490 ctx->flow = NULL;
1491 ctx->flow_depth_max = 0;
1492 }