radeonsi: add struct si_compiler containing LLVMTargetMachineRef
[mesa.git] / src / gallium / drivers / radeonsi / si_shader_tgsi_setup.c
1 /*
2 * Copyright 2016 Advanced Micro Devices, Inc.
3 * All Rights Reserved.
4 *
5 * Permission is hereby granted, free of charge, to any person obtaining a
6 * copy of this software and associated documentation files (the "Software"),
7 * to deal in the Software without restriction, including without limitation
8 * on the rights to use, copy, modify, merge, publish, distribute, sub
9 * license, and/or sell copies of the Software, and to permit persons to whom
10 * the Software is furnished to do so, subject to the following conditions:
11 *
12 * The above copyright notice and this permission notice (including the next
13 * paragraph) shall be included in all copies or substantial portions of the
14 * Software.
15 *
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
19 * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
20 * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
21 * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
22 * USE OR OTHER DEALINGS IN THE SOFTWARE.
23 */
24
25 #include "si_shader_internal.h"
26 #include "si_pipe.h"
27
28 #include "gallivm/lp_bld_const.h"
29 #include "gallivm/lp_bld_gather.h"
30 #include "gallivm/lp_bld_flow.h"
31 #include "gallivm/lp_bld_init.h"
32 #include "gallivm/lp_bld_intr.h"
33 #include "gallivm/lp_bld_misc.h"
34 #include "gallivm/lp_bld_swizzle.h"
35 #include "tgsi/tgsi_info.h"
36 #include "tgsi/tgsi_parse.h"
37 #include "util/u_math.h"
38 #include "util/u_memory.h"
39 #include "util/u_debug.h"
40
41 #include <stdio.h>
42 #include <llvm-c/Transforms/IPO.h>
43 #include <llvm-c/Transforms/Scalar.h>
44 #if HAVE_LLVM >= 0x0700
45 #include <llvm-c/Transforms/Utils.h>
46 #endif
47
48 enum si_llvm_calling_convention {
49 RADEON_LLVM_AMDGPU_VS = 87,
50 RADEON_LLVM_AMDGPU_GS = 88,
51 RADEON_LLVM_AMDGPU_PS = 89,
52 RADEON_LLVM_AMDGPU_CS = 90,
53 RADEON_LLVM_AMDGPU_HS = 93,
54 };
55
56 struct si_llvm_diagnostics {
57 struct pipe_debug_callback *debug;
58 unsigned retval;
59 };
60
61 static void si_diagnostic_handler(LLVMDiagnosticInfoRef di, void *context)
62 {
63 struct si_llvm_diagnostics *diag = (struct si_llvm_diagnostics *)context;
64 LLVMDiagnosticSeverity severity = LLVMGetDiagInfoSeverity(di);
65 char *description = LLVMGetDiagInfoDescription(di);
66 const char *severity_str = NULL;
67
68 switch (severity) {
69 case LLVMDSError:
70 severity_str = "error";
71 break;
72 case LLVMDSWarning:
73 severity_str = "warning";
74 break;
75 case LLVMDSRemark:
76 severity_str = "remark";
77 break;
78 case LLVMDSNote:
79 severity_str = "note";
80 break;
81 default:
82 severity_str = "unknown";
83 }
84
85 pipe_debug_message(diag->debug, SHADER_INFO,
86 "LLVM diagnostic (%s): %s", severity_str, description);
87
88 if (severity == LLVMDSError) {
89 diag->retval = 1;
90 fprintf(stderr,"LLVM triggered Diagnostic Handler: %s\n", description);
91 }
92
93 LLVMDisposeMessage(description);
94 }
95
96 /**
97 * Compile an LLVM module to machine code.
98 *
99 * @returns 0 for success, 1 for failure
100 */
101 unsigned si_llvm_compile(LLVMModuleRef M, struct ac_shader_binary *binary,
102 struct si_compiler *compiler,
103 struct pipe_debug_callback *debug)
104 {
105 struct si_llvm_diagnostics diag;
106 char *err;
107 LLVMContextRef llvm_ctx;
108 LLVMMemoryBufferRef out_buffer;
109 unsigned buffer_size;
110 const char *buffer_data;
111 LLVMBool mem_err;
112
113 diag.debug = debug;
114 diag.retval = 0;
115
116 /* Setup Diagnostic Handler*/
117 llvm_ctx = LLVMGetModuleContext(M);
118
119 LLVMContextSetDiagnosticHandler(llvm_ctx, si_diagnostic_handler, &diag);
120
121 /* Compile IR*/
122 mem_err = LLVMTargetMachineEmitToMemoryBuffer(compiler->tm, M,
123 LLVMObjectFile, &err,
124 &out_buffer);
125
126 /* Process Errors/Warnings */
127 if (mem_err) {
128 fprintf(stderr, "%s: %s", __FUNCTION__, err);
129 pipe_debug_message(debug, SHADER_INFO,
130 "LLVM emit error: %s", err);
131 FREE(err);
132 diag.retval = 1;
133 goto out;
134 }
135
136 /* Extract Shader Code*/
137 buffer_size = LLVMGetBufferSize(out_buffer);
138 buffer_data = LLVMGetBufferStart(out_buffer);
139
140 if (!ac_elf_read(buffer_data, buffer_size, binary)) {
141 fprintf(stderr, "radeonsi: cannot read an ELF shader binary\n");
142 diag.retval = 1;
143 }
144
145 /* Clean up */
146 LLVMDisposeMemoryBuffer(out_buffer);
147
148 out:
149 if (diag.retval != 0)
150 pipe_debug_message(debug, SHADER_INFO, "LLVM compile failed");
151 return diag.retval;
152 }
153
154 LLVMTypeRef tgsi2llvmtype(struct lp_build_tgsi_context *bld_base,
155 enum tgsi_opcode_type type)
156 {
157 struct si_shader_context *ctx = si_shader_context(bld_base);
158
159 switch (type) {
160 case TGSI_TYPE_UNSIGNED:
161 case TGSI_TYPE_SIGNED:
162 return ctx->ac.i32;
163 case TGSI_TYPE_UNSIGNED64:
164 case TGSI_TYPE_SIGNED64:
165 return ctx->ac.i64;
166 case TGSI_TYPE_DOUBLE:
167 return ctx->ac.f64;
168 case TGSI_TYPE_UNTYPED:
169 case TGSI_TYPE_FLOAT:
170 return ctx->ac.f32;
171 default: break;
172 }
173 return 0;
174 }
175
176 LLVMValueRef bitcast(struct lp_build_tgsi_context *bld_base,
177 enum tgsi_opcode_type type, LLVMValueRef value)
178 {
179 struct si_shader_context *ctx = si_shader_context(bld_base);
180 LLVMTypeRef dst_type = tgsi2llvmtype(bld_base, type);
181
182 if (dst_type)
183 return LLVMBuildBitCast(ctx->ac.builder, value, dst_type, "");
184 else
185 return value;
186 }
187
188 /**
189 * Return a value that is equal to the given i32 \p index if it lies in [0,num)
190 * or an undefined value in the same interval otherwise.
191 */
192 LLVMValueRef si_llvm_bound_index(struct si_shader_context *ctx,
193 LLVMValueRef index,
194 unsigned num)
195 {
196 LLVMBuilderRef builder = ctx->ac.builder;
197 LLVMValueRef c_max = LLVMConstInt(ctx->i32, num - 1, 0);
198 LLVMValueRef cc;
199
200 if (util_is_power_of_two_or_zero(num)) {
201 index = LLVMBuildAnd(builder, index, c_max, "");
202 } else {
203 /* In theory, this MAX pattern should result in code that is
204 * as good as the bit-wise AND above.
205 *
206 * In practice, LLVM generates worse code (at the time of
207 * writing), because its value tracking is not strong enough.
208 */
209 cc = LLVMBuildICmp(builder, LLVMIntULE, index, c_max, "");
210 index = LLVMBuildSelect(builder, cc, index, c_max, "");
211 }
212
213 return index;
214 }
215
216 static LLVMValueRef emit_swizzle(struct lp_build_tgsi_context *bld_base,
217 LLVMValueRef value,
218 unsigned swizzle_x,
219 unsigned swizzle_y,
220 unsigned swizzle_z,
221 unsigned swizzle_w)
222 {
223 struct si_shader_context *ctx = si_shader_context(bld_base);
224 LLVMValueRef swizzles[4];
225
226 swizzles[0] = LLVMConstInt(ctx->i32, swizzle_x, 0);
227 swizzles[1] = LLVMConstInt(ctx->i32, swizzle_y, 0);
228 swizzles[2] = LLVMConstInt(ctx->i32, swizzle_z, 0);
229 swizzles[3] = LLVMConstInt(ctx->i32, swizzle_w, 0);
230
231 return LLVMBuildShuffleVector(ctx->ac.builder,
232 value,
233 LLVMGetUndef(LLVMTypeOf(value)),
234 LLVMConstVector(swizzles, 4), "");
235 }
236
237 /**
238 * Return the description of the array covering the given temporary register
239 * index.
240 */
241 static unsigned
242 get_temp_array_id(struct lp_build_tgsi_context *bld_base,
243 unsigned reg_index,
244 const struct tgsi_ind_register *reg)
245 {
246 struct si_shader_context *ctx = si_shader_context(bld_base);
247 unsigned num_arrays = ctx->bld_base.info->array_max[TGSI_FILE_TEMPORARY];
248 unsigned i;
249
250 if (reg && reg->ArrayID > 0 && reg->ArrayID <= num_arrays)
251 return reg->ArrayID;
252
253 for (i = 0; i < num_arrays; i++) {
254 const struct tgsi_array_info *array = &ctx->temp_arrays[i];
255
256 if (reg_index >= array->range.First && reg_index <= array->range.Last)
257 return i + 1;
258 }
259
260 return 0;
261 }
262
263 static struct tgsi_declaration_range
264 get_array_range(struct lp_build_tgsi_context *bld_base,
265 unsigned File, unsigned reg_index,
266 const struct tgsi_ind_register *reg)
267 {
268 struct si_shader_context *ctx = si_shader_context(bld_base);
269 struct tgsi_declaration_range range;
270
271 if (File == TGSI_FILE_TEMPORARY) {
272 unsigned array_id = get_temp_array_id(bld_base, reg_index, reg);
273 if (array_id)
274 return ctx->temp_arrays[array_id - 1].range;
275 }
276
277 range.First = 0;
278 range.Last = bld_base->info->file_max[File];
279 return range;
280 }
281
282 /**
283 * For indirect registers, construct a pointer directly to the requested
284 * element using getelementptr if possible.
285 *
286 * Returns NULL if the insertelement/extractelement fallback for array access
287 * must be used.
288 */
289 static LLVMValueRef
290 get_pointer_into_array(struct si_shader_context *ctx,
291 unsigned file,
292 unsigned swizzle,
293 unsigned reg_index,
294 const struct tgsi_ind_register *reg_indirect)
295 {
296 unsigned array_id;
297 struct tgsi_array_info *array;
298 LLVMBuilderRef builder = ctx->ac.builder;
299 LLVMValueRef idxs[2];
300 LLVMValueRef index;
301 LLVMValueRef alloca;
302
303 if (file != TGSI_FILE_TEMPORARY)
304 return NULL;
305
306 array_id = get_temp_array_id(&ctx->bld_base, reg_index, reg_indirect);
307 if (!array_id)
308 return NULL;
309
310 alloca = ctx->temp_array_allocas[array_id - 1];
311 if (!alloca)
312 return NULL;
313
314 array = &ctx->temp_arrays[array_id - 1];
315
316 if (!(array->writemask & (1 << swizzle)))
317 return ctx->undef_alloca;
318
319 index = si_get_indirect_index(ctx, reg_indirect, 1,
320 reg_index - ctx->temp_arrays[array_id - 1].range.First);
321
322 /* Ensure that the index is within a valid range, to guard against
323 * VM faults and overwriting critical data (e.g. spilled resource
324 * descriptors).
325 *
326 * TODO It should be possible to avoid the additional instructions
327 * if LLVM is changed so that it guarantuees:
328 * 1. the scratch space descriptor isolates the current wave (this
329 * could even save the scratch offset SGPR at the cost of an
330 * additional SALU instruction)
331 * 2. the memory for allocas must be allocated at the _end_ of the
332 * scratch space (after spilled registers)
333 */
334 index = si_llvm_bound_index(ctx, index, array->range.Last - array->range.First + 1);
335
336 index = LLVMBuildMul(
337 builder, index,
338 LLVMConstInt(ctx->i32, util_bitcount(array->writemask), 0),
339 "");
340 index = LLVMBuildAdd(
341 builder, index,
342 LLVMConstInt(ctx->i32,
343 util_bitcount(array->writemask & ((1 << swizzle) - 1)), 0),
344 "");
345 idxs[0] = ctx->i32_0;
346 idxs[1] = index;
347 return LLVMBuildGEP(ctx->ac.builder, alloca, idxs, 2, "");
348 }
349
350 LLVMValueRef
351 si_llvm_emit_fetch_64bit(struct lp_build_tgsi_context *bld_base,
352 LLVMTypeRef type,
353 LLVMValueRef ptr,
354 LLVMValueRef ptr2)
355 {
356 struct si_shader_context *ctx = si_shader_context(bld_base);
357 LLVMValueRef result;
358
359 result = LLVMGetUndef(LLVMVectorType(ctx->i32, 2));
360
361 result = LLVMBuildInsertElement(ctx->ac.builder,
362 result,
363 ac_to_integer(&ctx->ac, ptr),
364 ctx->i32_0, "");
365 result = LLVMBuildInsertElement(ctx->ac.builder,
366 result,
367 ac_to_integer(&ctx->ac, ptr2),
368 ctx->i32_1, "");
369 return LLVMBuildBitCast(ctx->ac.builder, result, type, "");
370 }
371
372 static LLVMValueRef
373 emit_array_fetch(struct lp_build_tgsi_context *bld_base,
374 unsigned File, enum tgsi_opcode_type type,
375 struct tgsi_declaration_range range,
376 unsigned swizzle)
377 {
378 struct si_shader_context *ctx = si_shader_context(bld_base);
379 unsigned i, size = range.Last - range.First + 1;
380 LLVMTypeRef vec = LLVMVectorType(tgsi2llvmtype(bld_base, type), size);
381 LLVMValueRef result = LLVMGetUndef(vec);
382
383 struct tgsi_full_src_register tmp_reg = {};
384 tmp_reg.Register.File = File;
385
386 for (i = 0; i < size; ++i) {
387 tmp_reg.Register.Index = i + range.First;
388 LLVMValueRef temp = si_llvm_emit_fetch(bld_base, &tmp_reg, type, swizzle);
389 result = LLVMBuildInsertElement(ctx->ac.builder, result, temp,
390 LLVMConstInt(ctx->i32, i, 0), "array_vector");
391 }
392 return result;
393 }
394
395 static LLVMValueRef
396 load_value_from_array(struct lp_build_tgsi_context *bld_base,
397 unsigned file,
398 enum tgsi_opcode_type type,
399 unsigned swizzle,
400 unsigned reg_index,
401 const struct tgsi_ind_register *reg_indirect)
402 {
403 struct si_shader_context *ctx = si_shader_context(bld_base);
404 LLVMBuilderRef builder = ctx->ac.builder;
405 LLVMValueRef ptr;
406
407 ptr = get_pointer_into_array(ctx, file, swizzle, reg_index, reg_indirect);
408 if (ptr) {
409 LLVMValueRef val = LLVMBuildLoad(builder, ptr, "");
410 if (tgsi_type_is_64bit(type)) {
411 LLVMValueRef ptr_hi, val_hi;
412 ptr_hi = LLVMBuildGEP(builder, ptr, &ctx->i32_1, 1, "");
413 val_hi = LLVMBuildLoad(builder, ptr_hi, "");
414 val = si_llvm_emit_fetch_64bit(bld_base, tgsi2llvmtype(bld_base, type),
415 val, val_hi);
416 }
417
418 return val;
419 } else {
420 struct tgsi_declaration_range range =
421 get_array_range(bld_base, file, reg_index, reg_indirect);
422 LLVMValueRef index =
423 si_get_indirect_index(ctx, reg_indirect, 1, reg_index - range.First);
424 LLVMValueRef array =
425 emit_array_fetch(bld_base, file, type, range, swizzle);
426 return LLVMBuildExtractElement(builder, array, index, "");
427 }
428 }
429
430 static void
431 store_value_to_array(struct lp_build_tgsi_context *bld_base,
432 LLVMValueRef value,
433 unsigned file,
434 unsigned chan_index,
435 unsigned reg_index,
436 const struct tgsi_ind_register *reg_indirect)
437 {
438 struct si_shader_context *ctx = si_shader_context(bld_base);
439 LLVMBuilderRef builder = ctx->ac.builder;
440 LLVMValueRef ptr;
441
442 ptr = get_pointer_into_array(ctx, file, chan_index, reg_index, reg_indirect);
443 if (ptr) {
444 LLVMBuildStore(builder, value, ptr);
445 } else {
446 unsigned i, size;
447 struct tgsi_declaration_range range = get_array_range(bld_base, file, reg_index, reg_indirect);
448 LLVMValueRef index = si_get_indirect_index(ctx, reg_indirect, 1, reg_index - range.First);
449 LLVMValueRef array =
450 emit_array_fetch(bld_base, file, TGSI_TYPE_FLOAT, range, chan_index);
451 LLVMValueRef temp_ptr;
452
453 array = LLVMBuildInsertElement(builder, array, value, index, "");
454
455 size = range.Last - range.First + 1;
456 for (i = 0; i < size; ++i) {
457 switch(file) {
458 case TGSI_FILE_OUTPUT:
459 temp_ptr = ctx->outputs[i + range.First][chan_index];
460 break;
461
462 case TGSI_FILE_TEMPORARY:
463 if (range.First + i >= ctx->temps_count)
464 continue;
465 temp_ptr = ctx->temps[(i + range.First) * TGSI_NUM_CHANNELS + chan_index];
466 break;
467
468 default:
469 continue;
470 }
471 value = LLVMBuildExtractElement(builder, array,
472 LLVMConstInt(ctx->i32, i, 0), "");
473 LLVMBuildStore(builder, value, temp_ptr);
474 }
475 }
476 }
477
478 /* If this is true, preload FS inputs at the beginning of shaders. Otherwise,
479 * reload them at each use. This must be true if the shader is using
480 * derivatives and KILL, because KILL can leave the WQM and then a lazy
481 * input load isn't in the WQM anymore.
482 */
483 static bool si_preload_fs_inputs(struct si_shader_context *ctx)
484 {
485 struct si_shader_selector *sel = ctx->shader->selector;
486
487 return sel->info.uses_derivatives &&
488 sel->info.uses_kill;
489 }
490
491 static LLVMValueRef
492 get_output_ptr(struct lp_build_tgsi_context *bld_base, unsigned index,
493 unsigned chan)
494 {
495 struct si_shader_context *ctx = si_shader_context(bld_base);
496
497 assert(index <= ctx->bld_base.info->file_max[TGSI_FILE_OUTPUT]);
498 return ctx->outputs[index][chan];
499 }
500
501 LLVMValueRef si_llvm_emit_fetch(struct lp_build_tgsi_context *bld_base,
502 const struct tgsi_full_src_register *reg,
503 enum tgsi_opcode_type type,
504 unsigned swizzle)
505 {
506 struct si_shader_context *ctx = si_shader_context(bld_base);
507 LLVMBuilderRef builder = ctx->ac.builder;
508 LLVMValueRef result = NULL, ptr, ptr2;
509
510 if (swizzle == ~0) {
511 LLVMValueRef values[TGSI_NUM_CHANNELS];
512 unsigned chan;
513 for (chan = 0; chan < TGSI_NUM_CHANNELS; chan++) {
514 values[chan] = si_llvm_emit_fetch(bld_base, reg, type, chan);
515 }
516 return lp_build_gather_values(&ctx->gallivm, values,
517 TGSI_NUM_CHANNELS);
518 }
519
520 if (reg->Register.Indirect) {
521 LLVMValueRef load = load_value_from_array(bld_base, reg->Register.File, type,
522 swizzle, reg->Register.Index, &reg->Indirect);
523 return bitcast(bld_base, type, load);
524 }
525
526 switch(reg->Register.File) {
527 case TGSI_FILE_IMMEDIATE: {
528 LLVMTypeRef ctype = tgsi2llvmtype(bld_base, type);
529 if (tgsi_type_is_64bit(type)) {
530 result = LLVMGetUndef(LLVMVectorType(ctx->i32, 2));
531 result = LLVMConstInsertElement(result,
532 ctx->imms[reg->Register.Index * TGSI_NUM_CHANNELS + swizzle],
533 ctx->i32_0);
534 result = LLVMConstInsertElement(result,
535 ctx->imms[reg->Register.Index * TGSI_NUM_CHANNELS + swizzle + 1],
536 ctx->i32_1);
537 return LLVMConstBitCast(result, ctype);
538 } else {
539 return LLVMConstBitCast(ctx->imms[reg->Register.Index * TGSI_NUM_CHANNELS + swizzle], ctype);
540 }
541 }
542
543 case TGSI_FILE_INPUT: {
544 unsigned index = reg->Register.Index;
545 LLVMValueRef input[4];
546
547 /* I don't think doing this for vertex shaders is beneficial.
548 * For those, we want to make sure the VMEM loads are executed
549 * only once. Fragment shaders don't care much, because
550 * v_interp instructions are much cheaper than VMEM loads.
551 */
552 if (!si_preload_fs_inputs(ctx) &&
553 ctx->bld_base.info->processor == PIPE_SHADER_FRAGMENT)
554 ctx->load_input(ctx, index, &ctx->input_decls[index], input);
555 else
556 memcpy(input, &ctx->inputs[index * 4], sizeof(input));
557
558 result = input[swizzle];
559
560 if (tgsi_type_is_64bit(type)) {
561 ptr = result;
562 ptr2 = input[swizzle + 1];
563 return si_llvm_emit_fetch_64bit(bld_base, tgsi2llvmtype(bld_base, type),
564 ptr, ptr2);
565 }
566 break;
567 }
568
569 case TGSI_FILE_TEMPORARY:
570 if (reg->Register.Index >= ctx->temps_count)
571 return LLVMGetUndef(tgsi2llvmtype(bld_base, type));
572 ptr = ctx->temps[reg->Register.Index * TGSI_NUM_CHANNELS + swizzle];
573 if (tgsi_type_is_64bit(type)) {
574 ptr2 = ctx->temps[reg->Register.Index * TGSI_NUM_CHANNELS + swizzle + 1];
575 return si_llvm_emit_fetch_64bit(bld_base, tgsi2llvmtype(bld_base, type),
576 LLVMBuildLoad(builder, ptr, ""),
577 LLVMBuildLoad(builder, ptr2, ""));
578 }
579 result = LLVMBuildLoad(builder, ptr, "");
580 break;
581
582 case TGSI_FILE_OUTPUT:
583 ptr = get_output_ptr(bld_base, reg->Register.Index, swizzle);
584 if (tgsi_type_is_64bit(type)) {
585 ptr2 = get_output_ptr(bld_base, reg->Register.Index, swizzle + 1);
586 return si_llvm_emit_fetch_64bit(bld_base, tgsi2llvmtype(bld_base, type),
587 LLVMBuildLoad(builder, ptr, ""),
588 LLVMBuildLoad(builder, ptr2, ""));
589 }
590 result = LLVMBuildLoad(builder, ptr, "");
591 break;
592
593 default:
594 return LLVMGetUndef(tgsi2llvmtype(bld_base, type));
595 }
596
597 return bitcast(bld_base, type, result);
598 }
599
600 static LLVMValueRef fetch_system_value(struct lp_build_tgsi_context *bld_base,
601 const struct tgsi_full_src_register *reg,
602 enum tgsi_opcode_type type,
603 unsigned swizzle)
604 {
605 struct si_shader_context *ctx = si_shader_context(bld_base);
606 LLVMBuilderRef builder = ctx->ac.builder;
607 LLVMValueRef cval = ctx->system_values[reg->Register.Index];
608
609 if (tgsi_type_is_64bit(type)) {
610 LLVMValueRef lo, hi;
611
612 assert(swizzle == 0 || swizzle == 2);
613
614 lo = LLVMBuildExtractElement(
615 builder, cval, LLVMConstInt(ctx->i32, swizzle, 0), "");
616 hi = LLVMBuildExtractElement(
617 builder, cval, LLVMConstInt(ctx->i32, swizzle + 1, 0), "");
618
619 return si_llvm_emit_fetch_64bit(bld_base, tgsi2llvmtype(bld_base, type),
620 lo, hi);
621 }
622
623 if (LLVMGetTypeKind(LLVMTypeOf(cval)) == LLVMVectorTypeKind) {
624 cval = LLVMBuildExtractElement(
625 builder, cval, LLVMConstInt(ctx->i32, swizzle, 0), "");
626 } else {
627 assert(swizzle == 0);
628 }
629
630 return bitcast(bld_base, type, cval);
631 }
632
633 static void emit_declaration(struct lp_build_tgsi_context *bld_base,
634 const struct tgsi_full_declaration *decl)
635 {
636 struct si_shader_context *ctx = si_shader_context(bld_base);
637 LLVMBuilderRef builder = ctx->ac.builder;
638 unsigned first, last, i;
639 switch(decl->Declaration.File) {
640 case TGSI_FILE_ADDRESS:
641 {
642 unsigned idx;
643 for (idx = decl->Range.First; idx <= decl->Range.Last; idx++) {
644 unsigned chan;
645 for (chan = 0; chan < TGSI_NUM_CHANNELS; chan++) {
646 ctx->addrs[idx][chan] = lp_build_alloca_undef(
647 &ctx->gallivm,
648 ctx->i32, "");
649 }
650 }
651 break;
652 }
653
654 case TGSI_FILE_TEMPORARY:
655 {
656 char name[18] = "";
657 LLVMValueRef array_alloca = NULL;
658 unsigned decl_size;
659 unsigned writemask = decl->Declaration.UsageMask;
660 first = decl->Range.First;
661 last = decl->Range.Last;
662 decl_size = 4 * ((last - first) + 1);
663
664 if (decl->Declaration.Array) {
665 unsigned id = decl->Array.ArrayID - 1;
666 unsigned array_size;
667
668 writemask &= ctx->temp_arrays[id].writemask;
669 ctx->temp_arrays[id].writemask = writemask;
670 array_size = ((last - first) + 1) * util_bitcount(writemask);
671
672 /* If the array has more than 16 elements, store it
673 * in memory using an alloca that spans the entire
674 * array.
675 *
676 * Otherwise, store each array element individually.
677 * We will then generate vectors (per-channel, up to
678 * <16 x float> if the usagemask is a single bit) for
679 * indirect addressing.
680 *
681 * Note that 16 is the number of vector elements that
682 * LLVM will store in a register, so theoretically an
683 * array with up to 4 * 16 = 64 elements could be
684 * handled this way, but whether that's a good idea
685 * depends on VGPR register pressure elsewhere.
686 *
687 * FIXME: We shouldn't need to have the non-alloca
688 * code path for arrays. LLVM should be smart enough to
689 * promote allocas into registers when profitable.
690 */
691 if (array_size > 16 ||
692 !ctx->screen->llvm_has_working_vgpr_indexing) {
693 array_alloca = lp_build_alloca_undef(&ctx->gallivm,
694 LLVMArrayType(ctx->f32,
695 array_size), "array");
696 ctx->temp_array_allocas[id] = array_alloca;
697 }
698 }
699
700 if (!ctx->temps_count) {
701 ctx->temps_count = bld_base->info->file_max[TGSI_FILE_TEMPORARY] + 1;
702 ctx->temps = MALLOC(TGSI_NUM_CHANNELS * ctx->temps_count * sizeof(LLVMValueRef));
703 }
704 if (!array_alloca) {
705 for (i = 0; i < decl_size; ++i) {
706 #ifdef DEBUG
707 snprintf(name, sizeof(name), "TEMP%d.%c",
708 first + i / 4, "xyzw"[i % 4]);
709 #endif
710 ctx->temps[first * TGSI_NUM_CHANNELS + i] =
711 lp_build_alloca_undef(&ctx->gallivm,
712 ctx->f32,
713 name);
714 }
715 } else {
716 LLVMValueRef idxs[2] = {
717 ctx->i32_0,
718 NULL
719 };
720 unsigned j = 0;
721
722 if (writemask != TGSI_WRITEMASK_XYZW &&
723 !ctx->undef_alloca) {
724 /* Create a dummy alloca. We use it so that we
725 * have a pointer that is safe to load from if
726 * a shader ever reads from a channel that
727 * it never writes to.
728 */
729 ctx->undef_alloca = lp_build_alloca_undef(
730 &ctx->gallivm,
731 ctx->f32, "undef");
732 }
733
734 for (i = 0; i < decl_size; ++i) {
735 LLVMValueRef ptr;
736 if (writemask & (1 << (i % 4))) {
737 #ifdef DEBUG
738 snprintf(name, sizeof(name), "TEMP%d.%c",
739 first + i / 4, "xyzw"[i % 4]);
740 #endif
741 idxs[1] = LLVMConstInt(ctx->i32, j, 0);
742 ptr = LLVMBuildGEP(builder, array_alloca, idxs, 2, name);
743 j++;
744 } else {
745 ptr = ctx->undef_alloca;
746 }
747 ctx->temps[first * TGSI_NUM_CHANNELS + i] = ptr;
748 }
749 }
750 break;
751 }
752 case TGSI_FILE_INPUT:
753 {
754 unsigned idx;
755 for (idx = decl->Range.First; idx <= decl->Range.Last; idx++) {
756 if (ctx->load_input &&
757 ctx->input_decls[idx].Declaration.File != TGSI_FILE_INPUT) {
758 ctx->input_decls[idx] = *decl;
759 ctx->input_decls[idx].Range.First = idx;
760 ctx->input_decls[idx].Range.Last = idx;
761 ctx->input_decls[idx].Semantic.Index += idx - decl->Range.First;
762
763 if (si_preload_fs_inputs(ctx) ||
764 bld_base->info->processor != PIPE_SHADER_FRAGMENT)
765 ctx->load_input(ctx, idx, &ctx->input_decls[idx],
766 &ctx->inputs[idx * 4]);
767 }
768 }
769 }
770 break;
771
772 case TGSI_FILE_SYSTEM_VALUE:
773 {
774 unsigned idx;
775 for (idx = decl->Range.First; idx <= decl->Range.Last; idx++) {
776 si_load_system_value(ctx, idx, decl);
777 }
778 }
779 break;
780
781 case TGSI_FILE_OUTPUT:
782 {
783 char name[16] = "";
784 unsigned idx;
785 for (idx = decl->Range.First; idx <= decl->Range.Last; idx++) {
786 unsigned chan;
787 assert(idx < RADEON_LLVM_MAX_OUTPUTS);
788 if (ctx->outputs[idx][0])
789 continue;
790 for (chan = 0; chan < TGSI_NUM_CHANNELS; chan++) {
791 #ifdef DEBUG
792 snprintf(name, sizeof(name), "OUT%d.%c",
793 idx, "xyzw"[chan % 4]);
794 #endif
795 ctx->outputs[idx][chan] = lp_build_alloca_undef(
796 &ctx->gallivm,
797 ctx->f32, name);
798 }
799 }
800 break;
801 }
802
803 case TGSI_FILE_MEMORY:
804 si_tgsi_declare_compute_memory(ctx, decl);
805 break;
806
807 default:
808 break;
809 }
810 }
811
812 void si_llvm_emit_store(struct lp_build_tgsi_context *bld_base,
813 const struct tgsi_full_instruction *inst,
814 const struct tgsi_opcode_info *info,
815 unsigned index,
816 LLVMValueRef dst[4])
817 {
818 struct si_shader_context *ctx = si_shader_context(bld_base);
819 const struct tgsi_full_dst_register *reg = &inst->Dst[index];
820 LLVMBuilderRef builder = ctx->ac.builder;
821 LLVMValueRef temp_ptr, temp_ptr2 = NULL;
822 bool is_vec_store = false;
823 enum tgsi_opcode_type dtype = tgsi_opcode_infer_dst_type(inst->Instruction.Opcode, index);
824
825 if (dst[0]) {
826 LLVMTypeKind k = LLVMGetTypeKind(LLVMTypeOf(dst[0]));
827 is_vec_store = (k == LLVMVectorTypeKind);
828 }
829
830 if (is_vec_store) {
831 LLVMValueRef values[4] = {};
832 uint32_t writemask = reg->Register.WriteMask;
833 while (writemask) {
834 unsigned chan = u_bit_scan(&writemask);
835 LLVMValueRef index = LLVMConstInt(ctx->i32, chan, 0);
836 values[chan] = LLVMBuildExtractElement(ctx->ac.builder,
837 dst[0], index, "");
838 }
839 bld_base->emit_store(bld_base, inst, info, index, values);
840 return;
841 }
842
843 uint32_t writemask = reg->Register.WriteMask;
844 while (writemask) {
845 unsigned chan_index = u_bit_scan(&writemask);
846 LLVMValueRef value = dst[chan_index];
847
848 if (tgsi_type_is_64bit(dtype) && (chan_index == 1 || chan_index == 3))
849 continue;
850 if (inst->Instruction.Saturate)
851 value = ac_build_clamp(&ctx->ac, value);
852
853 if (reg->Register.File == TGSI_FILE_ADDRESS) {
854 temp_ptr = ctx->addrs[reg->Register.Index][chan_index];
855 LLVMBuildStore(builder, value, temp_ptr);
856 continue;
857 }
858
859 if (!tgsi_type_is_64bit(dtype))
860 value = ac_to_float(&ctx->ac, value);
861
862 if (reg->Register.Indirect) {
863 unsigned file = reg->Register.File;
864 unsigned reg_index = reg->Register.Index;
865 store_value_to_array(bld_base, value, file, chan_index,
866 reg_index, &reg->Indirect);
867 } else {
868 switch(reg->Register.File) {
869 case TGSI_FILE_OUTPUT:
870 temp_ptr = ctx->outputs[reg->Register.Index][chan_index];
871 if (tgsi_type_is_64bit(dtype))
872 temp_ptr2 = ctx->outputs[reg->Register.Index][chan_index + 1];
873 break;
874
875 case TGSI_FILE_TEMPORARY:
876 {
877 if (reg->Register.Index >= ctx->temps_count)
878 continue;
879
880 temp_ptr = ctx->temps[ TGSI_NUM_CHANNELS * reg->Register.Index + chan_index];
881 if (tgsi_type_is_64bit(dtype))
882 temp_ptr2 = ctx->temps[ TGSI_NUM_CHANNELS * reg->Register.Index + chan_index + 1];
883
884 break;
885 }
886 default:
887 return;
888 }
889 if (!tgsi_type_is_64bit(dtype))
890 LLVMBuildStore(builder, value, temp_ptr);
891 else {
892 LLVMValueRef ptr = LLVMBuildBitCast(builder, value,
893 LLVMVectorType(ctx->i32, 2), "");
894 LLVMValueRef val2;
895 value = LLVMBuildExtractElement(builder, ptr,
896 ctx->i32_0, "");
897 val2 = LLVMBuildExtractElement(builder, ptr,
898 ctx->i32_1, "");
899
900 LLVMBuildStore(builder, ac_to_float(&ctx->ac, value), temp_ptr);
901 LLVMBuildStore(builder, ac_to_float(&ctx->ac, val2), temp_ptr2);
902 }
903 }
904 }
905 }
906
907 static int get_line(int pc)
908 {
909 /* Subtract 1 so that the number shown is that of the corresponding
910 * opcode in the TGSI dump, e.g. an if block has the same suffix as
911 * the instruction number of the corresponding TGSI IF.
912 */
913 return pc - 1;
914 }
915
916 static void bgnloop_emit(const struct lp_build_tgsi_action *action,
917 struct lp_build_tgsi_context *bld_base,
918 struct lp_build_emit_data *emit_data)
919 {
920 struct si_shader_context *ctx = si_shader_context(bld_base);
921 ac_build_bgnloop(&ctx->ac, get_line(bld_base->pc));
922 }
923
924 static void brk_emit(const struct lp_build_tgsi_action *action,
925 struct lp_build_tgsi_context *bld_base,
926 struct lp_build_emit_data *emit_data)
927 {
928 struct si_shader_context *ctx = si_shader_context(bld_base);
929 ac_build_break(&ctx->ac);
930 }
931
932 static void cont_emit(const struct lp_build_tgsi_action *action,
933 struct lp_build_tgsi_context *bld_base,
934 struct lp_build_emit_data *emit_data)
935 {
936 struct si_shader_context *ctx = si_shader_context(bld_base);
937 ac_build_continue(&ctx->ac);
938 }
939
940 static void else_emit(const struct lp_build_tgsi_action *action,
941 struct lp_build_tgsi_context *bld_base,
942 struct lp_build_emit_data *emit_data)
943 {
944 struct si_shader_context *ctx = si_shader_context(bld_base);
945 ac_build_else(&ctx->ac, get_line(bld_base->pc));
946 }
947
948 static void endif_emit(const struct lp_build_tgsi_action *action,
949 struct lp_build_tgsi_context *bld_base,
950 struct lp_build_emit_data *emit_data)
951 {
952 struct si_shader_context *ctx = si_shader_context(bld_base);
953 ac_build_endif(&ctx->ac, get_line(bld_base->pc));
954 }
955
956 static void endloop_emit(const struct lp_build_tgsi_action *action,
957 struct lp_build_tgsi_context *bld_base,
958 struct lp_build_emit_data *emit_data)
959 {
960 struct si_shader_context *ctx = si_shader_context(bld_base);
961 ac_build_endloop(&ctx->ac, get_line(bld_base->pc));
962 }
963
964 static void if_emit(const struct lp_build_tgsi_action *action,
965 struct lp_build_tgsi_context *bld_base,
966 struct lp_build_emit_data *emit_data)
967 {
968 struct si_shader_context *ctx = si_shader_context(bld_base);
969 ac_build_if(&ctx->ac, emit_data->args[0], get_line(bld_base->pc));
970 }
971
972 static void uif_emit(const struct lp_build_tgsi_action *action,
973 struct lp_build_tgsi_context *bld_base,
974 struct lp_build_emit_data *emit_data)
975 {
976 struct si_shader_context *ctx = si_shader_context(bld_base);
977 ac_build_uif(&ctx->ac, emit_data->args[0], get_line(bld_base->pc));
978 }
979
980 static void emit_immediate(struct lp_build_tgsi_context *bld_base,
981 const struct tgsi_full_immediate *imm)
982 {
983 unsigned i;
984 struct si_shader_context *ctx = si_shader_context(bld_base);
985
986 for (i = 0; i < 4; ++i) {
987 ctx->imms[ctx->imms_num * TGSI_NUM_CHANNELS + i] =
988 LLVMConstInt(ctx->i32, imm->u[i].Uint, false );
989 }
990
991 ctx->imms_num++;
992 }
993
994 void si_llvm_context_init(struct si_shader_context *ctx,
995 struct si_screen *sscreen,
996 struct si_compiler *compiler)
997 {
998 struct lp_type type;
999
1000 /* Initialize the gallivm object:
1001 * We are only using the module, context, and builder fields of this struct.
1002 * This should be enough for us to be able to pass our gallivm struct to the
1003 * helper functions in the gallivm module.
1004 */
1005 memset(ctx, 0, sizeof(*ctx));
1006 ctx->screen = sscreen;
1007 ctx->compiler = compiler;
1008
1009 ctx->gallivm.context = LLVMContextCreate();
1010 ctx->gallivm.module = LLVMModuleCreateWithNameInContext("tgsi",
1011 ctx->gallivm.context);
1012 LLVMSetTarget(ctx->gallivm.module, "amdgcn--");
1013
1014 LLVMTargetDataRef data_layout = LLVMCreateTargetDataLayout(compiler->tm);
1015 char *data_layout_str = LLVMCopyStringRepOfTargetData(data_layout);
1016 LLVMSetDataLayout(ctx->gallivm.module, data_layout_str);
1017 LLVMDisposeTargetData(data_layout);
1018 LLVMDisposeMessage(data_layout_str);
1019
1020 bool unsafe_fpmath = (sscreen->debug_flags & DBG(UNSAFE_MATH)) != 0;
1021 enum ac_float_mode float_mode =
1022 unsafe_fpmath ? AC_FLOAT_MODE_UNSAFE_FP_MATH :
1023 AC_FLOAT_MODE_NO_SIGNED_ZEROS_FP_MATH;
1024
1025 ctx->gallivm.builder = ac_create_builder(ctx->gallivm.context,
1026 float_mode);
1027
1028 ac_llvm_context_init(&ctx->ac, ctx->gallivm.context,
1029 sscreen->info.chip_class, sscreen->info.family);
1030 ctx->ac.module = ctx->gallivm.module;
1031 ctx->ac.builder = ctx->gallivm.builder;
1032
1033 struct lp_build_tgsi_context *bld_base = &ctx->bld_base;
1034
1035 type.floating = true;
1036 type.fixed = false;
1037 type.sign = true;
1038 type.norm = false;
1039 type.width = 32;
1040 type.length = 1;
1041
1042 lp_build_context_init(&bld_base->base, &ctx->gallivm, type);
1043 lp_build_context_init(&ctx->bld_base.uint_bld, &ctx->gallivm, lp_uint_type(type));
1044 lp_build_context_init(&ctx->bld_base.int_bld, &ctx->gallivm, lp_int_type(type));
1045 type.width *= 2;
1046 lp_build_context_init(&ctx->bld_base.dbl_bld, &ctx->gallivm, type);
1047 lp_build_context_init(&ctx->bld_base.uint64_bld, &ctx->gallivm, lp_uint_type(type));
1048 lp_build_context_init(&ctx->bld_base.int64_bld, &ctx->gallivm, lp_int_type(type));
1049
1050 bld_base->soa = 1;
1051 bld_base->emit_swizzle = emit_swizzle;
1052 bld_base->emit_declaration = emit_declaration;
1053 bld_base->emit_immediate = emit_immediate;
1054
1055 bld_base->op_actions[TGSI_OPCODE_BGNLOOP].emit = bgnloop_emit;
1056 bld_base->op_actions[TGSI_OPCODE_BRK].emit = brk_emit;
1057 bld_base->op_actions[TGSI_OPCODE_CONT].emit = cont_emit;
1058 bld_base->op_actions[TGSI_OPCODE_IF].emit = if_emit;
1059 bld_base->op_actions[TGSI_OPCODE_UIF].emit = uif_emit;
1060 bld_base->op_actions[TGSI_OPCODE_ELSE].emit = else_emit;
1061 bld_base->op_actions[TGSI_OPCODE_ENDIF].emit = endif_emit;
1062 bld_base->op_actions[TGSI_OPCODE_ENDLOOP].emit = endloop_emit;
1063
1064 si_shader_context_init_alu(&ctx->bld_base);
1065 si_shader_context_init_mem(ctx);
1066
1067 ctx->voidt = LLVMVoidTypeInContext(ctx->ac.context);
1068 ctx->i1 = LLVMInt1TypeInContext(ctx->ac.context);
1069 ctx->i8 = LLVMInt8TypeInContext(ctx->ac.context);
1070 ctx->i32 = LLVMInt32TypeInContext(ctx->ac.context);
1071 ctx->i64 = LLVMInt64TypeInContext(ctx->ac.context);
1072 ctx->i128 = LLVMIntTypeInContext(ctx->ac.context, 128);
1073 ctx->f32 = LLVMFloatTypeInContext(ctx->ac.context);
1074 ctx->v2i32 = LLVMVectorType(ctx->i32, 2);
1075 ctx->v4i32 = LLVMVectorType(ctx->i32, 4);
1076 ctx->v4f32 = LLVMVectorType(ctx->f32, 4);
1077 ctx->v8i32 = LLVMVectorType(ctx->i32, 8);
1078
1079 ctx->i32_0 = LLVMConstInt(ctx->i32, 0, 0);
1080 ctx->i32_1 = LLVMConstInt(ctx->i32, 1, 0);
1081 }
1082
1083 /* Set the context to a certain TGSI shader. Can be called repeatedly
1084 * to change the shader. */
1085 void si_llvm_context_set_tgsi(struct si_shader_context *ctx,
1086 struct si_shader *shader)
1087 {
1088 const struct tgsi_shader_info *info = NULL;
1089 const struct tgsi_token *tokens = NULL;
1090
1091 if (shader && shader->selector) {
1092 info = &shader->selector->info;
1093 tokens = shader->selector->tokens;
1094 }
1095
1096 ctx->shader = shader;
1097 ctx->type = info ? info->processor : -1;
1098 ctx->bld_base.info = info;
1099
1100 /* Clean up the old contents. */
1101 FREE(ctx->temp_arrays);
1102 ctx->temp_arrays = NULL;
1103 FREE(ctx->temp_array_allocas);
1104 ctx->temp_array_allocas = NULL;
1105
1106 FREE(ctx->imms);
1107 ctx->imms = NULL;
1108 ctx->imms_num = 0;
1109
1110 FREE(ctx->temps);
1111 ctx->temps = NULL;
1112 ctx->temps_count = 0;
1113
1114 if (!info)
1115 return;
1116
1117 ctx->num_const_buffers = util_last_bit(info->const_buffers_declared);
1118 ctx->num_shader_buffers = util_last_bit(info->shader_buffers_declared);
1119
1120 ctx->num_samplers = util_last_bit(info->samplers_declared);
1121 ctx->num_images = util_last_bit(info->images_declared);
1122
1123 if (!tokens)
1124 return;
1125
1126 if (info->array_max[TGSI_FILE_TEMPORARY] > 0) {
1127 int size = info->array_max[TGSI_FILE_TEMPORARY];
1128
1129 ctx->temp_arrays = CALLOC(size, sizeof(ctx->temp_arrays[0]));
1130 ctx->temp_array_allocas = CALLOC(size, sizeof(ctx->temp_array_allocas[0]));
1131
1132 tgsi_scan_arrays(tokens, TGSI_FILE_TEMPORARY, size,
1133 ctx->temp_arrays);
1134 }
1135 if (info->file_max[TGSI_FILE_IMMEDIATE] >= 0) {
1136 int size = info->file_max[TGSI_FILE_IMMEDIATE] + 1;
1137 ctx->imms = MALLOC(size * TGSI_NUM_CHANNELS * sizeof(LLVMValueRef));
1138 }
1139
1140 /* Re-set these to start with a clean slate. */
1141 ctx->bld_base.num_instructions = 0;
1142 ctx->bld_base.pc = 0;
1143 memset(ctx->outputs, 0, sizeof(ctx->outputs));
1144
1145 ctx->bld_base.emit_store = si_llvm_emit_store;
1146 ctx->bld_base.emit_fetch_funcs[TGSI_FILE_IMMEDIATE] = si_llvm_emit_fetch;
1147 ctx->bld_base.emit_fetch_funcs[TGSI_FILE_INPUT] = si_llvm_emit_fetch;
1148 ctx->bld_base.emit_fetch_funcs[TGSI_FILE_TEMPORARY] = si_llvm_emit_fetch;
1149 ctx->bld_base.emit_fetch_funcs[TGSI_FILE_OUTPUT] = si_llvm_emit_fetch;
1150 ctx->bld_base.emit_fetch_funcs[TGSI_FILE_SYSTEM_VALUE] = fetch_system_value;
1151 }
1152
1153 void si_llvm_create_func(struct si_shader_context *ctx,
1154 const char *name,
1155 LLVMTypeRef *return_types, unsigned num_return_elems,
1156 LLVMTypeRef *ParamTypes, unsigned ParamCount)
1157 {
1158 LLVMTypeRef main_fn_type, ret_type;
1159 LLVMBasicBlockRef main_fn_body;
1160 enum si_llvm_calling_convention call_conv;
1161 unsigned real_shader_type;
1162
1163 if (num_return_elems)
1164 ret_type = LLVMStructTypeInContext(ctx->ac.context,
1165 return_types,
1166 num_return_elems, true);
1167 else
1168 ret_type = ctx->voidt;
1169
1170 /* Setup the function */
1171 ctx->return_type = ret_type;
1172 main_fn_type = LLVMFunctionType(ret_type, ParamTypes, ParamCount, 0);
1173 ctx->main_fn = LLVMAddFunction(ctx->gallivm.module, name, main_fn_type);
1174 main_fn_body = LLVMAppendBasicBlockInContext(ctx->ac.context,
1175 ctx->main_fn, "main_body");
1176 LLVMPositionBuilderAtEnd(ctx->ac.builder, main_fn_body);
1177
1178 real_shader_type = ctx->type;
1179
1180 /* LS is merged into HS (TCS), and ES is merged into GS. */
1181 if (ctx->screen->info.chip_class >= GFX9) {
1182 if (ctx->shader->key.as_ls)
1183 real_shader_type = PIPE_SHADER_TESS_CTRL;
1184 else if (ctx->shader->key.as_es)
1185 real_shader_type = PIPE_SHADER_GEOMETRY;
1186 }
1187
1188 switch (real_shader_type) {
1189 case PIPE_SHADER_VERTEX:
1190 case PIPE_SHADER_TESS_EVAL:
1191 call_conv = RADEON_LLVM_AMDGPU_VS;
1192 break;
1193 case PIPE_SHADER_TESS_CTRL:
1194 call_conv = HAVE_LLVM >= 0x0500 ? RADEON_LLVM_AMDGPU_HS :
1195 RADEON_LLVM_AMDGPU_VS;
1196 break;
1197 case PIPE_SHADER_GEOMETRY:
1198 call_conv = RADEON_LLVM_AMDGPU_GS;
1199 break;
1200 case PIPE_SHADER_FRAGMENT:
1201 call_conv = RADEON_LLVM_AMDGPU_PS;
1202 break;
1203 case PIPE_SHADER_COMPUTE:
1204 call_conv = RADEON_LLVM_AMDGPU_CS;
1205 break;
1206 default:
1207 unreachable("Unhandle shader type");
1208 }
1209
1210 LLVMSetFunctionCallConv(ctx->main_fn, call_conv);
1211 }
1212
1213 void si_llvm_optimize_module(struct si_shader_context *ctx)
1214 {
1215 struct gallivm_state *gallivm = &ctx->gallivm;
1216 const char *triple = LLVMGetTarget(gallivm->module);
1217 LLVMTargetLibraryInfoRef target_library_info;
1218
1219 /* Dump LLVM IR before any optimization passes */
1220 if (ctx->screen->debug_flags & DBG(PREOPT_IR) &&
1221 si_can_dump_shader(ctx->screen, ctx->type))
1222 LLVMDumpModule(ctx->gallivm.module);
1223
1224 /* Create the pass manager */
1225 gallivm->passmgr = LLVMCreatePassManager();
1226
1227 target_library_info = gallivm_create_target_library_info(triple);
1228 LLVMAddTargetLibraryInfo(target_library_info, gallivm->passmgr);
1229
1230 if (si_extra_shader_checks(ctx->screen, ctx->type))
1231 LLVMAddVerifierPass(gallivm->passmgr);
1232
1233 LLVMAddAlwaysInlinerPass(gallivm->passmgr);
1234
1235 /* This pass should eliminate all the load and store instructions */
1236 LLVMAddPromoteMemoryToRegisterPass(gallivm->passmgr);
1237
1238 /* Add some optimization passes */
1239 LLVMAddScalarReplAggregatesPass(gallivm->passmgr);
1240 LLVMAddLICMPass(gallivm->passmgr);
1241 LLVMAddAggressiveDCEPass(gallivm->passmgr);
1242 LLVMAddCFGSimplificationPass(gallivm->passmgr);
1243 /* This is recommended by the instruction combining pass. */
1244 LLVMAddEarlyCSEMemSSAPass(gallivm->passmgr);
1245 LLVMAddInstructionCombiningPass(gallivm->passmgr);
1246
1247 /* Run the pass */
1248 LLVMRunPassManager(gallivm->passmgr, ctx->gallivm.module);
1249
1250 LLVMDisposeBuilder(ctx->ac.builder);
1251 LLVMDisposePassManager(gallivm->passmgr);
1252 gallivm_dispose_target_library_info(target_library_info);
1253 }
1254
1255 void si_llvm_dispose(struct si_shader_context *ctx)
1256 {
1257 LLVMDisposeModule(ctx->gallivm.module);
1258 LLVMContextDispose(ctx->gallivm.context);
1259 FREE(ctx->temp_arrays);
1260 ctx->temp_arrays = NULL;
1261 FREE(ctx->temp_array_allocas);
1262 ctx->temp_array_allocas = NULL;
1263 FREE(ctx->temps);
1264 ctx->temps = NULL;
1265 ctx->temps_count = 0;
1266 FREE(ctx->imms);
1267 ctx->imms = NULL;
1268 ctx->imms_num = 0;
1269 ac_llvm_context_dispose(&ctx->ac);
1270 }