util: Move util_is_power_of_two to bitscan.h and rename to util_is_power_of_two_or_zero
[mesa.git] / src / gallium / drivers / radeonsi / si_shader_tgsi_setup.c
1 /*
2 * Copyright 2016 Advanced Micro Devices, Inc.
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * on the rights to use, copy, modify, merge, publish, distribute, sub
8 * license, and/or sell copies of the Software, and to permit persons to whom
9 * the Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
19 * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
20 * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
21 * USE OR OTHER DEALINGS IN THE SOFTWARE.
22 */
23
24 #include "si_shader_internal.h"
25 #include "si_pipe.h"
26
27 #include "gallivm/lp_bld_const.h"
28 #include "gallivm/lp_bld_gather.h"
29 #include "gallivm/lp_bld_flow.h"
30 #include "gallivm/lp_bld_init.h"
31 #include "gallivm/lp_bld_intr.h"
32 #include "gallivm/lp_bld_misc.h"
33 #include "gallivm/lp_bld_swizzle.h"
34 #include "tgsi/tgsi_info.h"
35 #include "tgsi/tgsi_parse.h"
36 #include "util/u_math.h"
37 #include "util/u_memory.h"
38 #include "util/u_debug.h"
39
40 #include <stdio.h>
41 #include <llvm-c/Transforms/IPO.h>
42 #include <llvm-c/Transforms/Scalar.h>
43
44 enum si_llvm_calling_convention {
45 RADEON_LLVM_AMDGPU_VS = 87,
46 RADEON_LLVM_AMDGPU_GS = 88,
47 RADEON_LLVM_AMDGPU_PS = 89,
48 RADEON_LLVM_AMDGPU_CS = 90,
49 RADEON_LLVM_AMDGPU_HS = 93,
50 };
51
52 struct si_llvm_diagnostics {
53 struct pipe_debug_callback *debug;
54 unsigned retval;
55 };
56
57 static void si_diagnostic_handler(LLVMDiagnosticInfoRef di, void *context)
58 {
59 struct si_llvm_diagnostics *diag = (struct si_llvm_diagnostics *)context;
60 LLVMDiagnosticSeverity severity = LLVMGetDiagInfoSeverity(di);
61 char *description = LLVMGetDiagInfoDescription(di);
62 const char *severity_str = NULL;
63
64 switch (severity) {
65 case LLVMDSError:
66 severity_str = "error";
67 break;
68 case LLVMDSWarning:
69 severity_str = "warning";
70 break;
71 case LLVMDSRemark:
72 severity_str = "remark";
73 break;
74 case LLVMDSNote:
75 severity_str = "note";
76 break;
77 default:
78 severity_str = "unknown";
79 }
80
81 pipe_debug_message(diag->debug, SHADER_INFO,
82 "LLVM diagnostic (%s): %s", severity_str, description);
83
84 if (severity == LLVMDSError) {
85 diag->retval = 1;
86 fprintf(stderr,"LLVM triggered Diagnostic Handler: %s\n", description);
87 }
88
89 LLVMDisposeMessage(description);
90 }
91
92 /**
93 * Compile an LLVM module to machine code.
94 *
95 * @returns 0 for success, 1 for failure
96 */
97 unsigned si_llvm_compile(LLVMModuleRef M, struct ac_shader_binary *binary,
98 LLVMTargetMachineRef tm,
99 struct pipe_debug_callback *debug)
100 {
101 struct si_llvm_diagnostics diag;
102 char *err;
103 LLVMContextRef llvm_ctx;
104 LLVMMemoryBufferRef out_buffer;
105 unsigned buffer_size;
106 const char *buffer_data;
107 LLVMBool mem_err;
108
109 diag.debug = debug;
110 diag.retval = 0;
111
112 /* Setup Diagnostic Handler*/
113 llvm_ctx = LLVMGetModuleContext(M);
114
115 LLVMContextSetDiagnosticHandler(llvm_ctx, si_diagnostic_handler, &diag);
116
117 /* Compile IR*/
118 mem_err = LLVMTargetMachineEmitToMemoryBuffer(tm, M, LLVMObjectFile, &err,
119 &out_buffer);
120
121 /* Process Errors/Warnings */
122 if (mem_err) {
123 fprintf(stderr, "%s: %s", __FUNCTION__, err);
124 pipe_debug_message(debug, SHADER_INFO,
125 "LLVM emit error: %s", err);
126 FREE(err);
127 diag.retval = 1;
128 goto out;
129 }
130
131 /* Extract Shader Code*/
132 buffer_size = LLVMGetBufferSize(out_buffer);
133 buffer_data = LLVMGetBufferStart(out_buffer);
134
135 if (!ac_elf_read(buffer_data, buffer_size, binary)) {
136 fprintf(stderr, "radeonsi: cannot read an ELF shader binary\n");
137 diag.retval = 1;
138 }
139
140 /* Clean up */
141 LLVMDisposeMemoryBuffer(out_buffer);
142
143 out:
144 if (diag.retval != 0)
145 pipe_debug_message(debug, SHADER_INFO, "LLVM compile failed");
146 return diag.retval;
147 }
148
149 LLVMTypeRef tgsi2llvmtype(struct lp_build_tgsi_context *bld_base,
150 enum tgsi_opcode_type type)
151 {
152 struct si_shader_context *ctx = si_shader_context(bld_base);
153
154 switch (type) {
155 case TGSI_TYPE_UNSIGNED:
156 case TGSI_TYPE_SIGNED:
157 return ctx->ac.i32;
158 case TGSI_TYPE_UNSIGNED64:
159 case TGSI_TYPE_SIGNED64:
160 return ctx->ac.i64;
161 case TGSI_TYPE_DOUBLE:
162 return ctx->ac.f64;
163 case TGSI_TYPE_UNTYPED:
164 case TGSI_TYPE_FLOAT:
165 return ctx->ac.f32;
166 default: break;
167 }
168 return 0;
169 }
170
171 LLVMValueRef bitcast(struct lp_build_tgsi_context *bld_base,
172 enum tgsi_opcode_type type, LLVMValueRef value)
173 {
174 struct si_shader_context *ctx = si_shader_context(bld_base);
175 LLVMTypeRef dst_type = tgsi2llvmtype(bld_base, type);
176
177 if (dst_type)
178 return LLVMBuildBitCast(ctx->ac.builder, value, dst_type, "");
179 else
180 return value;
181 }
182
183 /**
184 * Return a value that is equal to the given i32 \p index if it lies in [0,num)
185 * or an undefined value in the same interval otherwise.
186 */
187 LLVMValueRef si_llvm_bound_index(struct si_shader_context *ctx,
188 LLVMValueRef index,
189 unsigned num)
190 {
191 LLVMBuilderRef builder = ctx->ac.builder;
192 LLVMValueRef c_max = LLVMConstInt(ctx->i32, num - 1, 0);
193 LLVMValueRef cc;
194
195 if (util_is_power_of_two_or_zero(num)) {
196 index = LLVMBuildAnd(builder, index, c_max, "");
197 } else {
198 /* In theory, this MAX pattern should result in code that is
199 * as good as the bit-wise AND above.
200 *
201 * In practice, LLVM generates worse code (at the time of
202 * writing), because its value tracking is not strong enough.
203 */
204 cc = LLVMBuildICmp(builder, LLVMIntULE, index, c_max, "");
205 index = LLVMBuildSelect(builder, cc, index, c_max, "");
206 }
207
208 return index;
209 }
210
211 static LLVMValueRef emit_swizzle(struct lp_build_tgsi_context *bld_base,
212 LLVMValueRef value,
213 unsigned swizzle_x,
214 unsigned swizzle_y,
215 unsigned swizzle_z,
216 unsigned swizzle_w)
217 {
218 struct si_shader_context *ctx = si_shader_context(bld_base);
219 LLVMValueRef swizzles[4];
220
221 swizzles[0] = LLVMConstInt(ctx->i32, swizzle_x, 0);
222 swizzles[1] = LLVMConstInt(ctx->i32, swizzle_y, 0);
223 swizzles[2] = LLVMConstInt(ctx->i32, swizzle_z, 0);
224 swizzles[3] = LLVMConstInt(ctx->i32, swizzle_w, 0);
225
226 return LLVMBuildShuffleVector(ctx->ac.builder,
227 value,
228 LLVMGetUndef(LLVMTypeOf(value)),
229 LLVMConstVector(swizzles, 4), "");
230 }
231
232 /**
233 * Return the description of the array covering the given temporary register
234 * index.
235 */
236 static unsigned
237 get_temp_array_id(struct lp_build_tgsi_context *bld_base,
238 unsigned reg_index,
239 const struct tgsi_ind_register *reg)
240 {
241 struct si_shader_context *ctx = si_shader_context(bld_base);
242 unsigned num_arrays = ctx->bld_base.info->array_max[TGSI_FILE_TEMPORARY];
243 unsigned i;
244
245 if (reg && reg->ArrayID > 0 && reg->ArrayID <= num_arrays)
246 return reg->ArrayID;
247
248 for (i = 0; i < num_arrays; i++) {
249 const struct tgsi_array_info *array = &ctx->temp_arrays[i];
250
251 if (reg_index >= array->range.First && reg_index <= array->range.Last)
252 return i + 1;
253 }
254
255 return 0;
256 }
257
258 static struct tgsi_declaration_range
259 get_array_range(struct lp_build_tgsi_context *bld_base,
260 unsigned File, unsigned reg_index,
261 const struct tgsi_ind_register *reg)
262 {
263 struct si_shader_context *ctx = si_shader_context(bld_base);
264 struct tgsi_declaration_range range;
265
266 if (File == TGSI_FILE_TEMPORARY) {
267 unsigned array_id = get_temp_array_id(bld_base, reg_index, reg);
268 if (array_id)
269 return ctx->temp_arrays[array_id - 1].range;
270 }
271
272 range.First = 0;
273 range.Last = bld_base->info->file_max[File];
274 return range;
275 }
276
277 /**
278 * For indirect registers, construct a pointer directly to the requested
279 * element using getelementptr if possible.
280 *
281 * Returns NULL if the insertelement/extractelement fallback for array access
282 * must be used.
283 */
284 static LLVMValueRef
285 get_pointer_into_array(struct si_shader_context *ctx,
286 unsigned file,
287 unsigned swizzle,
288 unsigned reg_index,
289 const struct tgsi_ind_register *reg_indirect)
290 {
291 unsigned array_id;
292 struct tgsi_array_info *array;
293 LLVMBuilderRef builder = ctx->ac.builder;
294 LLVMValueRef idxs[2];
295 LLVMValueRef index;
296 LLVMValueRef alloca;
297
298 if (file != TGSI_FILE_TEMPORARY)
299 return NULL;
300
301 array_id = get_temp_array_id(&ctx->bld_base, reg_index, reg_indirect);
302 if (!array_id)
303 return NULL;
304
305 alloca = ctx->temp_array_allocas[array_id - 1];
306 if (!alloca)
307 return NULL;
308
309 array = &ctx->temp_arrays[array_id - 1];
310
311 if (!(array->writemask & (1 << swizzle)))
312 return ctx->undef_alloca;
313
314 index = si_get_indirect_index(ctx, reg_indirect, 1,
315 reg_index - ctx->temp_arrays[array_id - 1].range.First);
316
317 /* Ensure that the index is within a valid range, to guard against
318 * VM faults and overwriting critical data (e.g. spilled resource
319 * descriptors).
320 *
321 * TODO It should be possible to avoid the additional instructions
322 * if LLVM is changed so that it guarantuees:
323 * 1. the scratch space descriptor isolates the current wave (this
324 * could even save the scratch offset SGPR at the cost of an
325 * additional SALU instruction)
326 * 2. the memory for allocas must be allocated at the _end_ of the
327 * scratch space (after spilled registers)
328 */
329 index = si_llvm_bound_index(ctx, index, array->range.Last - array->range.First + 1);
330
331 index = LLVMBuildMul(
332 builder, index,
333 LLVMConstInt(ctx->i32, util_bitcount(array->writemask), 0),
334 "");
335 index = LLVMBuildAdd(
336 builder, index,
337 LLVMConstInt(ctx->i32,
338 util_bitcount(array->writemask & ((1 << swizzle) - 1)), 0),
339 "");
340 idxs[0] = ctx->i32_0;
341 idxs[1] = index;
342 return LLVMBuildGEP(ctx->ac.builder, alloca, idxs, 2, "");
343 }
344
345 LLVMValueRef
346 si_llvm_emit_fetch_64bit(struct lp_build_tgsi_context *bld_base,
347 LLVMTypeRef type,
348 LLVMValueRef ptr,
349 LLVMValueRef ptr2)
350 {
351 struct si_shader_context *ctx = si_shader_context(bld_base);
352 LLVMValueRef result;
353
354 result = LLVMGetUndef(LLVMVectorType(ctx->i32, 2));
355
356 result = LLVMBuildInsertElement(ctx->ac.builder,
357 result,
358 ac_to_integer(&ctx->ac, ptr),
359 ctx->i32_0, "");
360 result = LLVMBuildInsertElement(ctx->ac.builder,
361 result,
362 ac_to_integer(&ctx->ac, ptr2),
363 ctx->i32_1, "");
364 return LLVMBuildBitCast(ctx->ac.builder, result, type, "");
365 }
366
367 static LLVMValueRef
368 emit_array_fetch(struct lp_build_tgsi_context *bld_base,
369 unsigned File, enum tgsi_opcode_type type,
370 struct tgsi_declaration_range range,
371 unsigned swizzle)
372 {
373 struct si_shader_context *ctx = si_shader_context(bld_base);
374 unsigned i, size = range.Last - range.First + 1;
375 LLVMTypeRef vec = LLVMVectorType(tgsi2llvmtype(bld_base, type), size);
376 LLVMValueRef result = LLVMGetUndef(vec);
377
378 struct tgsi_full_src_register tmp_reg = {};
379 tmp_reg.Register.File = File;
380
381 for (i = 0; i < size; ++i) {
382 tmp_reg.Register.Index = i + range.First;
383 LLVMValueRef temp = si_llvm_emit_fetch(bld_base, &tmp_reg, type, swizzle);
384 result = LLVMBuildInsertElement(ctx->ac.builder, result, temp,
385 LLVMConstInt(ctx->i32, i, 0), "array_vector");
386 }
387 return result;
388 }
389
390 static LLVMValueRef
391 load_value_from_array(struct lp_build_tgsi_context *bld_base,
392 unsigned file,
393 enum tgsi_opcode_type type,
394 unsigned swizzle,
395 unsigned reg_index,
396 const struct tgsi_ind_register *reg_indirect)
397 {
398 struct si_shader_context *ctx = si_shader_context(bld_base);
399 LLVMBuilderRef builder = ctx->ac.builder;
400 LLVMValueRef ptr;
401
402 ptr = get_pointer_into_array(ctx, file, swizzle, reg_index, reg_indirect);
403 if (ptr) {
404 LLVMValueRef val = LLVMBuildLoad(builder, ptr, "");
405 if (tgsi_type_is_64bit(type)) {
406 LLVMValueRef ptr_hi, val_hi;
407 ptr_hi = LLVMBuildGEP(builder, ptr, &ctx->i32_1, 1, "");
408 val_hi = LLVMBuildLoad(builder, ptr_hi, "");
409 val = si_llvm_emit_fetch_64bit(bld_base, tgsi2llvmtype(bld_base, type),
410 val, val_hi);
411 }
412
413 return val;
414 } else {
415 struct tgsi_declaration_range range =
416 get_array_range(bld_base, file, reg_index, reg_indirect);
417 LLVMValueRef index =
418 si_get_indirect_index(ctx, reg_indirect, 1, reg_index - range.First);
419 LLVMValueRef array =
420 emit_array_fetch(bld_base, file, type, range, swizzle);
421 return LLVMBuildExtractElement(builder, array, index, "");
422 }
423 }
424
425 static void
426 store_value_to_array(struct lp_build_tgsi_context *bld_base,
427 LLVMValueRef value,
428 unsigned file,
429 unsigned chan_index,
430 unsigned reg_index,
431 const struct tgsi_ind_register *reg_indirect)
432 {
433 struct si_shader_context *ctx = si_shader_context(bld_base);
434 LLVMBuilderRef builder = ctx->ac.builder;
435 LLVMValueRef ptr;
436
437 ptr = get_pointer_into_array(ctx, file, chan_index, reg_index, reg_indirect);
438 if (ptr) {
439 LLVMBuildStore(builder, value, ptr);
440 } else {
441 unsigned i, size;
442 struct tgsi_declaration_range range = get_array_range(bld_base, file, reg_index, reg_indirect);
443 LLVMValueRef index = si_get_indirect_index(ctx, reg_indirect, 1, reg_index - range.First);
444 LLVMValueRef array =
445 emit_array_fetch(bld_base, file, TGSI_TYPE_FLOAT, range, chan_index);
446 LLVMValueRef temp_ptr;
447
448 array = LLVMBuildInsertElement(builder, array, value, index, "");
449
450 size = range.Last - range.First + 1;
451 for (i = 0; i < size; ++i) {
452 switch(file) {
453 case TGSI_FILE_OUTPUT:
454 temp_ptr = ctx->outputs[i + range.First][chan_index];
455 break;
456
457 case TGSI_FILE_TEMPORARY:
458 if (range.First + i >= ctx->temps_count)
459 continue;
460 temp_ptr = ctx->temps[(i + range.First) * TGSI_NUM_CHANNELS + chan_index];
461 break;
462
463 default:
464 continue;
465 }
466 value = LLVMBuildExtractElement(builder, array,
467 LLVMConstInt(ctx->i32, i, 0), "");
468 LLVMBuildStore(builder, value, temp_ptr);
469 }
470 }
471 }
472
473 /* If this is true, preload FS inputs at the beginning of shaders. Otherwise,
474 * reload them at each use. This must be true if the shader is using
475 * derivatives and KILL, because KILL can leave the WQM and then a lazy
476 * input load isn't in the WQM anymore.
477 */
478 static bool si_preload_fs_inputs(struct si_shader_context *ctx)
479 {
480 struct si_shader_selector *sel = ctx->shader->selector;
481
482 return sel->info.uses_derivatives &&
483 sel->info.uses_kill;
484 }
485
486 static LLVMValueRef
487 get_output_ptr(struct lp_build_tgsi_context *bld_base, unsigned index,
488 unsigned chan)
489 {
490 struct si_shader_context *ctx = si_shader_context(bld_base);
491
492 assert(index <= ctx->bld_base.info->file_max[TGSI_FILE_OUTPUT]);
493 return ctx->outputs[index][chan];
494 }
495
496 LLVMValueRef si_llvm_emit_fetch(struct lp_build_tgsi_context *bld_base,
497 const struct tgsi_full_src_register *reg,
498 enum tgsi_opcode_type type,
499 unsigned swizzle)
500 {
501 struct si_shader_context *ctx = si_shader_context(bld_base);
502 LLVMBuilderRef builder = ctx->ac.builder;
503 LLVMValueRef result = NULL, ptr, ptr2;
504
505 if (swizzle == ~0) {
506 LLVMValueRef values[TGSI_NUM_CHANNELS];
507 unsigned chan;
508 for (chan = 0; chan < TGSI_NUM_CHANNELS; chan++) {
509 values[chan] = si_llvm_emit_fetch(bld_base, reg, type, chan);
510 }
511 return lp_build_gather_values(&ctx->gallivm, values,
512 TGSI_NUM_CHANNELS);
513 }
514
515 if (reg->Register.Indirect) {
516 LLVMValueRef load = load_value_from_array(bld_base, reg->Register.File, type,
517 swizzle, reg->Register.Index, &reg->Indirect);
518 return bitcast(bld_base, type, load);
519 }
520
521 switch(reg->Register.File) {
522 case TGSI_FILE_IMMEDIATE: {
523 LLVMTypeRef ctype = tgsi2llvmtype(bld_base, type);
524 if (tgsi_type_is_64bit(type)) {
525 result = LLVMGetUndef(LLVMVectorType(ctx->i32, 2));
526 result = LLVMConstInsertElement(result,
527 ctx->imms[reg->Register.Index * TGSI_NUM_CHANNELS + swizzle],
528 ctx->i32_0);
529 result = LLVMConstInsertElement(result,
530 ctx->imms[reg->Register.Index * TGSI_NUM_CHANNELS + swizzle + 1],
531 ctx->i32_1);
532 return LLVMConstBitCast(result, ctype);
533 } else {
534 return LLVMConstBitCast(ctx->imms[reg->Register.Index * TGSI_NUM_CHANNELS + swizzle], ctype);
535 }
536 }
537
538 case TGSI_FILE_INPUT: {
539 unsigned index = reg->Register.Index;
540 LLVMValueRef input[4];
541
542 /* I don't think doing this for vertex shaders is beneficial.
543 * For those, we want to make sure the VMEM loads are executed
544 * only once. Fragment shaders don't care much, because
545 * v_interp instructions are much cheaper than VMEM loads.
546 */
547 if (!si_preload_fs_inputs(ctx) &&
548 ctx->bld_base.info->processor == PIPE_SHADER_FRAGMENT)
549 ctx->load_input(ctx, index, &ctx->input_decls[index], input);
550 else
551 memcpy(input, &ctx->inputs[index * 4], sizeof(input));
552
553 result = input[swizzle];
554
555 if (tgsi_type_is_64bit(type)) {
556 ptr = result;
557 ptr2 = input[swizzle + 1];
558 return si_llvm_emit_fetch_64bit(bld_base, tgsi2llvmtype(bld_base, type),
559 ptr, ptr2);
560 }
561 break;
562 }
563
564 case TGSI_FILE_TEMPORARY:
565 if (reg->Register.Index >= ctx->temps_count)
566 return LLVMGetUndef(tgsi2llvmtype(bld_base, type));
567 ptr = ctx->temps[reg->Register.Index * TGSI_NUM_CHANNELS + swizzle];
568 if (tgsi_type_is_64bit(type)) {
569 ptr2 = ctx->temps[reg->Register.Index * TGSI_NUM_CHANNELS + swizzle + 1];
570 return si_llvm_emit_fetch_64bit(bld_base, tgsi2llvmtype(bld_base, type),
571 LLVMBuildLoad(builder, ptr, ""),
572 LLVMBuildLoad(builder, ptr2, ""));
573 }
574 result = LLVMBuildLoad(builder, ptr, "");
575 break;
576
577 case TGSI_FILE_OUTPUT:
578 ptr = get_output_ptr(bld_base, reg->Register.Index, swizzle);
579 if (tgsi_type_is_64bit(type)) {
580 ptr2 = get_output_ptr(bld_base, reg->Register.Index, swizzle + 1);
581 return si_llvm_emit_fetch_64bit(bld_base, tgsi2llvmtype(bld_base, type),
582 LLVMBuildLoad(builder, ptr, ""),
583 LLVMBuildLoad(builder, ptr2, ""));
584 }
585 result = LLVMBuildLoad(builder, ptr, "");
586 break;
587
588 default:
589 return LLVMGetUndef(tgsi2llvmtype(bld_base, type));
590 }
591
592 return bitcast(bld_base, type, result);
593 }
594
595 static LLVMValueRef fetch_system_value(struct lp_build_tgsi_context *bld_base,
596 const struct tgsi_full_src_register *reg,
597 enum tgsi_opcode_type type,
598 unsigned swizzle)
599 {
600 struct si_shader_context *ctx = si_shader_context(bld_base);
601 LLVMBuilderRef builder = ctx->ac.builder;
602 LLVMValueRef cval = ctx->system_values[reg->Register.Index];
603
604 if (tgsi_type_is_64bit(type)) {
605 LLVMValueRef lo, hi;
606
607 assert(swizzle == 0 || swizzle == 2);
608
609 lo = LLVMBuildExtractElement(
610 builder, cval, LLVMConstInt(ctx->i32, swizzle, 0), "");
611 hi = LLVMBuildExtractElement(
612 builder, cval, LLVMConstInt(ctx->i32, swizzle + 1, 0), "");
613
614 return si_llvm_emit_fetch_64bit(bld_base, tgsi2llvmtype(bld_base, type),
615 lo, hi);
616 }
617
618 if (LLVMGetTypeKind(LLVMTypeOf(cval)) == LLVMVectorTypeKind) {
619 cval = LLVMBuildExtractElement(
620 builder, cval, LLVMConstInt(ctx->i32, swizzle, 0), "");
621 } else {
622 assert(swizzle == 0);
623 }
624
625 return bitcast(bld_base, type, cval);
626 }
627
628 static void emit_declaration(struct lp_build_tgsi_context *bld_base,
629 const struct tgsi_full_declaration *decl)
630 {
631 struct si_shader_context *ctx = si_shader_context(bld_base);
632 LLVMBuilderRef builder = ctx->ac.builder;
633 unsigned first, last, i;
634 switch(decl->Declaration.File) {
635 case TGSI_FILE_ADDRESS:
636 {
637 unsigned idx;
638 for (idx = decl->Range.First; idx <= decl->Range.Last; idx++) {
639 unsigned chan;
640 for (chan = 0; chan < TGSI_NUM_CHANNELS; chan++) {
641 ctx->addrs[idx][chan] = lp_build_alloca_undef(
642 &ctx->gallivm,
643 ctx->i32, "");
644 }
645 }
646 break;
647 }
648
649 case TGSI_FILE_TEMPORARY:
650 {
651 char name[18] = "";
652 LLVMValueRef array_alloca = NULL;
653 unsigned decl_size;
654 unsigned writemask = decl->Declaration.UsageMask;
655 first = decl->Range.First;
656 last = decl->Range.Last;
657 decl_size = 4 * ((last - first) + 1);
658
659 if (decl->Declaration.Array) {
660 unsigned id = decl->Array.ArrayID - 1;
661 unsigned array_size;
662
663 writemask &= ctx->temp_arrays[id].writemask;
664 ctx->temp_arrays[id].writemask = writemask;
665 array_size = ((last - first) + 1) * util_bitcount(writemask);
666
667 /* If the array has more than 16 elements, store it
668 * in memory using an alloca that spans the entire
669 * array.
670 *
671 * Otherwise, store each array element individually.
672 * We will then generate vectors (per-channel, up to
673 * <16 x float> if the usagemask is a single bit) for
674 * indirect addressing.
675 *
676 * Note that 16 is the number of vector elements that
677 * LLVM will store in a register, so theoretically an
678 * array with up to 4 * 16 = 64 elements could be
679 * handled this way, but whether that's a good idea
680 * depends on VGPR register pressure elsewhere.
681 *
682 * FIXME: We shouldn't need to have the non-alloca
683 * code path for arrays. LLVM should be smart enough to
684 * promote allocas into registers when profitable.
685 */
686 if (array_size > 16 ||
687 !ctx->screen->llvm_has_working_vgpr_indexing) {
688 array_alloca = lp_build_alloca_undef(&ctx->gallivm,
689 LLVMArrayType(ctx->f32,
690 array_size), "array");
691 ctx->temp_array_allocas[id] = array_alloca;
692 }
693 }
694
695 if (!ctx->temps_count) {
696 ctx->temps_count = bld_base->info->file_max[TGSI_FILE_TEMPORARY] + 1;
697 ctx->temps = MALLOC(TGSI_NUM_CHANNELS * ctx->temps_count * sizeof(LLVMValueRef));
698 }
699 if (!array_alloca) {
700 for (i = 0; i < decl_size; ++i) {
701 #ifdef DEBUG
702 snprintf(name, sizeof(name), "TEMP%d.%c",
703 first + i / 4, "xyzw"[i % 4]);
704 #endif
705 ctx->temps[first * TGSI_NUM_CHANNELS + i] =
706 lp_build_alloca_undef(&ctx->gallivm,
707 ctx->f32,
708 name);
709 }
710 } else {
711 LLVMValueRef idxs[2] = {
712 ctx->i32_0,
713 NULL
714 };
715 unsigned j = 0;
716
717 if (writemask != TGSI_WRITEMASK_XYZW &&
718 !ctx->undef_alloca) {
719 /* Create a dummy alloca. We use it so that we
720 * have a pointer that is safe to load from if
721 * a shader ever reads from a channel that
722 * it never writes to.
723 */
724 ctx->undef_alloca = lp_build_alloca_undef(
725 &ctx->gallivm,
726 ctx->f32, "undef");
727 }
728
729 for (i = 0; i < decl_size; ++i) {
730 LLVMValueRef ptr;
731 if (writemask & (1 << (i % 4))) {
732 #ifdef DEBUG
733 snprintf(name, sizeof(name), "TEMP%d.%c",
734 first + i / 4, "xyzw"[i % 4]);
735 #endif
736 idxs[1] = LLVMConstInt(ctx->i32, j, 0);
737 ptr = LLVMBuildGEP(builder, array_alloca, idxs, 2, name);
738 j++;
739 } else {
740 ptr = ctx->undef_alloca;
741 }
742 ctx->temps[first * TGSI_NUM_CHANNELS + i] = ptr;
743 }
744 }
745 break;
746 }
747 case TGSI_FILE_INPUT:
748 {
749 unsigned idx;
750 for (idx = decl->Range.First; idx <= decl->Range.Last; idx++) {
751 if (ctx->load_input &&
752 ctx->input_decls[idx].Declaration.File != TGSI_FILE_INPUT) {
753 ctx->input_decls[idx] = *decl;
754 ctx->input_decls[idx].Range.First = idx;
755 ctx->input_decls[idx].Range.Last = idx;
756 ctx->input_decls[idx].Semantic.Index += idx - decl->Range.First;
757
758 if (si_preload_fs_inputs(ctx) ||
759 bld_base->info->processor != PIPE_SHADER_FRAGMENT)
760 ctx->load_input(ctx, idx, &ctx->input_decls[idx],
761 &ctx->inputs[idx * 4]);
762 }
763 }
764 }
765 break;
766
767 case TGSI_FILE_SYSTEM_VALUE:
768 {
769 unsigned idx;
770 for (idx = decl->Range.First; idx <= decl->Range.Last; idx++) {
771 si_load_system_value(ctx, idx, decl);
772 }
773 }
774 break;
775
776 case TGSI_FILE_OUTPUT:
777 {
778 char name[16] = "";
779 unsigned idx;
780 for (idx = decl->Range.First; idx <= decl->Range.Last; idx++) {
781 unsigned chan;
782 assert(idx < RADEON_LLVM_MAX_OUTPUTS);
783 if (ctx->outputs[idx][0])
784 continue;
785 for (chan = 0; chan < TGSI_NUM_CHANNELS; chan++) {
786 #ifdef DEBUG
787 snprintf(name, sizeof(name), "OUT%d.%c",
788 idx, "xyzw"[chan % 4]);
789 #endif
790 ctx->outputs[idx][chan] = lp_build_alloca_undef(
791 &ctx->gallivm,
792 ctx->f32, name);
793 }
794 }
795 break;
796 }
797
798 case TGSI_FILE_MEMORY:
799 si_tgsi_declare_compute_memory(ctx, decl);
800 break;
801
802 default:
803 break;
804 }
805 }
806
807 void si_llvm_emit_store(struct lp_build_tgsi_context *bld_base,
808 const struct tgsi_full_instruction *inst,
809 const struct tgsi_opcode_info *info,
810 unsigned index,
811 LLVMValueRef dst[4])
812 {
813 struct si_shader_context *ctx = si_shader_context(bld_base);
814 const struct tgsi_full_dst_register *reg = &inst->Dst[index];
815 LLVMBuilderRef builder = ctx->ac.builder;
816 LLVMValueRef temp_ptr, temp_ptr2 = NULL;
817 bool is_vec_store = false;
818 enum tgsi_opcode_type dtype = tgsi_opcode_infer_dst_type(inst->Instruction.Opcode, index);
819
820 if (dst[0]) {
821 LLVMTypeKind k = LLVMGetTypeKind(LLVMTypeOf(dst[0]));
822 is_vec_store = (k == LLVMVectorTypeKind);
823 }
824
825 if (is_vec_store) {
826 LLVMValueRef values[4] = {};
827 uint32_t writemask = reg->Register.WriteMask;
828 while (writemask) {
829 unsigned chan = u_bit_scan(&writemask);
830 LLVMValueRef index = LLVMConstInt(ctx->i32, chan, 0);
831 values[chan] = LLVMBuildExtractElement(ctx->ac.builder,
832 dst[0], index, "");
833 }
834 bld_base->emit_store(bld_base, inst, info, index, values);
835 return;
836 }
837
838 uint32_t writemask = reg->Register.WriteMask;
839 while (writemask) {
840 unsigned chan_index = u_bit_scan(&writemask);
841 LLVMValueRef value = dst[chan_index];
842
843 if (tgsi_type_is_64bit(dtype) && (chan_index == 1 || chan_index == 3))
844 continue;
845 if (inst->Instruction.Saturate)
846 value = ac_build_clamp(&ctx->ac, value);
847
848 if (reg->Register.File == TGSI_FILE_ADDRESS) {
849 temp_ptr = ctx->addrs[reg->Register.Index][chan_index];
850 LLVMBuildStore(builder, value, temp_ptr);
851 continue;
852 }
853
854 if (!tgsi_type_is_64bit(dtype))
855 value = ac_to_float(&ctx->ac, value);
856
857 if (reg->Register.Indirect) {
858 unsigned file = reg->Register.File;
859 unsigned reg_index = reg->Register.Index;
860 store_value_to_array(bld_base, value, file, chan_index,
861 reg_index, &reg->Indirect);
862 } else {
863 switch(reg->Register.File) {
864 case TGSI_FILE_OUTPUT:
865 temp_ptr = ctx->outputs[reg->Register.Index][chan_index];
866 if (tgsi_type_is_64bit(dtype))
867 temp_ptr2 = ctx->outputs[reg->Register.Index][chan_index + 1];
868 break;
869
870 case TGSI_FILE_TEMPORARY:
871 {
872 if (reg->Register.Index >= ctx->temps_count)
873 continue;
874
875 temp_ptr = ctx->temps[ TGSI_NUM_CHANNELS * reg->Register.Index + chan_index];
876 if (tgsi_type_is_64bit(dtype))
877 temp_ptr2 = ctx->temps[ TGSI_NUM_CHANNELS * reg->Register.Index + chan_index + 1];
878
879 break;
880 }
881 default:
882 return;
883 }
884 if (!tgsi_type_is_64bit(dtype))
885 LLVMBuildStore(builder, value, temp_ptr);
886 else {
887 LLVMValueRef ptr = LLVMBuildBitCast(builder, value,
888 LLVMVectorType(ctx->i32, 2), "");
889 LLVMValueRef val2;
890 value = LLVMBuildExtractElement(builder, ptr,
891 ctx->i32_0, "");
892 val2 = LLVMBuildExtractElement(builder, ptr,
893 ctx->i32_1, "");
894
895 LLVMBuildStore(builder, ac_to_float(&ctx->ac, value), temp_ptr);
896 LLVMBuildStore(builder, ac_to_float(&ctx->ac, val2), temp_ptr2);
897 }
898 }
899 }
900 }
901
902 static int get_line(int pc)
903 {
904 /* Subtract 1 so that the number shown is that of the corresponding
905 * opcode in the TGSI dump, e.g. an if block has the same suffix as
906 * the instruction number of the corresponding TGSI IF.
907 */
908 return pc - 1;
909 }
910
911 static void bgnloop_emit(const struct lp_build_tgsi_action *action,
912 struct lp_build_tgsi_context *bld_base,
913 struct lp_build_emit_data *emit_data)
914 {
915 struct si_shader_context *ctx = si_shader_context(bld_base);
916 ac_build_bgnloop(&ctx->ac, get_line(bld_base->pc));
917 }
918
919 static void brk_emit(const struct lp_build_tgsi_action *action,
920 struct lp_build_tgsi_context *bld_base,
921 struct lp_build_emit_data *emit_data)
922 {
923 struct si_shader_context *ctx = si_shader_context(bld_base);
924 ac_build_break(&ctx->ac);
925 }
926
927 static void cont_emit(const struct lp_build_tgsi_action *action,
928 struct lp_build_tgsi_context *bld_base,
929 struct lp_build_emit_data *emit_data)
930 {
931 struct si_shader_context *ctx = si_shader_context(bld_base);
932 ac_build_continue(&ctx->ac);
933 }
934
935 static void else_emit(const struct lp_build_tgsi_action *action,
936 struct lp_build_tgsi_context *bld_base,
937 struct lp_build_emit_data *emit_data)
938 {
939 struct si_shader_context *ctx = si_shader_context(bld_base);
940 ac_build_else(&ctx->ac, get_line(bld_base->pc));
941 }
942
943 static void endif_emit(const struct lp_build_tgsi_action *action,
944 struct lp_build_tgsi_context *bld_base,
945 struct lp_build_emit_data *emit_data)
946 {
947 struct si_shader_context *ctx = si_shader_context(bld_base);
948 ac_build_endif(&ctx->ac, get_line(bld_base->pc));
949 }
950
951 static void endloop_emit(const struct lp_build_tgsi_action *action,
952 struct lp_build_tgsi_context *bld_base,
953 struct lp_build_emit_data *emit_data)
954 {
955 struct si_shader_context *ctx = si_shader_context(bld_base);
956 ac_build_endloop(&ctx->ac, get_line(bld_base->pc));
957 }
958
959 static void if_emit(const struct lp_build_tgsi_action *action,
960 struct lp_build_tgsi_context *bld_base,
961 struct lp_build_emit_data *emit_data)
962 {
963 struct si_shader_context *ctx = si_shader_context(bld_base);
964 ac_build_if(&ctx->ac, emit_data->args[0], get_line(bld_base->pc));
965 }
966
967 static void uif_emit(const struct lp_build_tgsi_action *action,
968 struct lp_build_tgsi_context *bld_base,
969 struct lp_build_emit_data *emit_data)
970 {
971 struct si_shader_context *ctx = si_shader_context(bld_base);
972 ac_build_uif(&ctx->ac, emit_data->args[0], get_line(bld_base->pc));
973 }
974
975 static void emit_immediate(struct lp_build_tgsi_context *bld_base,
976 const struct tgsi_full_immediate *imm)
977 {
978 unsigned i;
979 struct si_shader_context *ctx = si_shader_context(bld_base);
980
981 for (i = 0; i < 4; ++i) {
982 ctx->imms[ctx->imms_num * TGSI_NUM_CHANNELS + i] =
983 LLVMConstInt(ctx->i32, imm->u[i].Uint, false );
984 }
985
986 ctx->imms_num++;
987 }
988
989 void si_llvm_context_init(struct si_shader_context *ctx,
990 struct si_screen *sscreen,
991 LLVMTargetMachineRef tm)
992 {
993 struct lp_type type;
994
995 /* Initialize the gallivm object:
996 * We are only using the module, context, and builder fields of this struct.
997 * This should be enough for us to be able to pass our gallivm struct to the
998 * helper functions in the gallivm module.
999 */
1000 memset(ctx, 0, sizeof(*ctx));
1001 ctx->screen = sscreen;
1002 ctx->tm = tm;
1003
1004 ctx->gallivm.context = LLVMContextCreate();
1005 ctx->gallivm.module = LLVMModuleCreateWithNameInContext("tgsi",
1006 ctx->gallivm.context);
1007 LLVMSetTarget(ctx->gallivm.module, "amdgcn--");
1008
1009 LLVMTargetDataRef data_layout = LLVMCreateTargetDataLayout(tm);
1010 char *data_layout_str = LLVMCopyStringRepOfTargetData(data_layout);
1011 LLVMSetDataLayout(ctx->gallivm.module, data_layout_str);
1012 LLVMDisposeTargetData(data_layout);
1013 LLVMDisposeMessage(data_layout_str);
1014
1015 bool unsafe_fpmath = (sscreen->debug_flags & DBG(UNSAFE_MATH)) != 0;
1016 enum ac_float_mode float_mode =
1017 unsafe_fpmath ? AC_FLOAT_MODE_UNSAFE_FP_MATH :
1018 AC_FLOAT_MODE_NO_SIGNED_ZEROS_FP_MATH;
1019
1020 ctx->gallivm.builder = ac_create_builder(ctx->gallivm.context,
1021 float_mode);
1022
1023 ac_llvm_context_init(&ctx->ac, ctx->gallivm.context,
1024 sscreen->info.chip_class, sscreen->info.family);
1025 ctx->ac.module = ctx->gallivm.module;
1026 ctx->ac.builder = ctx->gallivm.builder;
1027
1028 struct lp_build_tgsi_context *bld_base = &ctx->bld_base;
1029
1030 type.floating = true;
1031 type.fixed = false;
1032 type.sign = true;
1033 type.norm = false;
1034 type.width = 32;
1035 type.length = 1;
1036
1037 lp_build_context_init(&bld_base->base, &ctx->gallivm, type);
1038 lp_build_context_init(&ctx->bld_base.uint_bld, &ctx->gallivm, lp_uint_type(type));
1039 lp_build_context_init(&ctx->bld_base.int_bld, &ctx->gallivm, lp_int_type(type));
1040 type.width *= 2;
1041 lp_build_context_init(&ctx->bld_base.dbl_bld, &ctx->gallivm, type);
1042 lp_build_context_init(&ctx->bld_base.uint64_bld, &ctx->gallivm, lp_uint_type(type));
1043 lp_build_context_init(&ctx->bld_base.int64_bld, &ctx->gallivm, lp_int_type(type));
1044
1045 bld_base->soa = 1;
1046 bld_base->emit_swizzle = emit_swizzle;
1047 bld_base->emit_declaration = emit_declaration;
1048 bld_base->emit_immediate = emit_immediate;
1049
1050 bld_base->op_actions[TGSI_OPCODE_BGNLOOP].emit = bgnloop_emit;
1051 bld_base->op_actions[TGSI_OPCODE_BRK].emit = brk_emit;
1052 bld_base->op_actions[TGSI_OPCODE_CONT].emit = cont_emit;
1053 bld_base->op_actions[TGSI_OPCODE_IF].emit = if_emit;
1054 bld_base->op_actions[TGSI_OPCODE_UIF].emit = uif_emit;
1055 bld_base->op_actions[TGSI_OPCODE_ELSE].emit = else_emit;
1056 bld_base->op_actions[TGSI_OPCODE_ENDIF].emit = endif_emit;
1057 bld_base->op_actions[TGSI_OPCODE_ENDLOOP].emit = endloop_emit;
1058
1059 si_shader_context_init_alu(&ctx->bld_base);
1060 si_shader_context_init_mem(ctx);
1061
1062 ctx->voidt = LLVMVoidTypeInContext(ctx->ac.context);
1063 ctx->i1 = LLVMInt1TypeInContext(ctx->ac.context);
1064 ctx->i8 = LLVMInt8TypeInContext(ctx->ac.context);
1065 ctx->i32 = LLVMInt32TypeInContext(ctx->ac.context);
1066 ctx->i64 = LLVMInt64TypeInContext(ctx->ac.context);
1067 ctx->i128 = LLVMIntTypeInContext(ctx->ac.context, 128);
1068 ctx->f32 = LLVMFloatTypeInContext(ctx->ac.context);
1069 ctx->v2i32 = LLVMVectorType(ctx->i32, 2);
1070 ctx->v4i32 = LLVMVectorType(ctx->i32, 4);
1071 ctx->v4f32 = LLVMVectorType(ctx->f32, 4);
1072 ctx->v8i32 = LLVMVectorType(ctx->i32, 8);
1073
1074 ctx->i32_0 = LLVMConstInt(ctx->i32, 0, 0);
1075 ctx->i32_1 = LLVMConstInt(ctx->i32, 1, 0);
1076 }
1077
1078 /* Set the context to a certain TGSI shader. Can be called repeatedly
1079 * to change the shader. */
1080 void si_llvm_context_set_tgsi(struct si_shader_context *ctx,
1081 struct si_shader *shader)
1082 {
1083 const struct tgsi_shader_info *info = NULL;
1084 const struct tgsi_token *tokens = NULL;
1085
1086 if (shader && shader->selector) {
1087 info = &shader->selector->info;
1088 tokens = shader->selector->tokens;
1089 }
1090
1091 ctx->shader = shader;
1092 ctx->type = info ? info->processor : -1;
1093 ctx->bld_base.info = info;
1094
1095 /* Clean up the old contents. */
1096 FREE(ctx->temp_arrays);
1097 ctx->temp_arrays = NULL;
1098 FREE(ctx->temp_array_allocas);
1099 ctx->temp_array_allocas = NULL;
1100
1101 FREE(ctx->imms);
1102 ctx->imms = NULL;
1103 ctx->imms_num = 0;
1104
1105 FREE(ctx->temps);
1106 ctx->temps = NULL;
1107 ctx->temps_count = 0;
1108
1109 if (!info)
1110 return;
1111
1112 ctx->num_const_buffers = util_last_bit(info->const_buffers_declared);
1113 ctx->num_shader_buffers = util_last_bit(info->shader_buffers_declared);
1114
1115 ctx->num_samplers = util_last_bit(info->samplers_declared);
1116 ctx->num_images = util_last_bit(info->images_declared);
1117
1118 if (!tokens)
1119 return;
1120
1121 if (info->array_max[TGSI_FILE_TEMPORARY] > 0) {
1122 int size = info->array_max[TGSI_FILE_TEMPORARY];
1123
1124 ctx->temp_arrays = CALLOC(size, sizeof(ctx->temp_arrays[0]));
1125 ctx->temp_array_allocas = CALLOC(size, sizeof(ctx->temp_array_allocas[0]));
1126
1127 tgsi_scan_arrays(tokens, TGSI_FILE_TEMPORARY, size,
1128 ctx->temp_arrays);
1129 }
1130 if (info->file_max[TGSI_FILE_IMMEDIATE] >= 0) {
1131 int size = info->file_max[TGSI_FILE_IMMEDIATE] + 1;
1132 ctx->imms = MALLOC(size * TGSI_NUM_CHANNELS * sizeof(LLVMValueRef));
1133 }
1134
1135 /* Re-set these to start with a clean slate. */
1136 ctx->bld_base.num_instructions = 0;
1137 ctx->bld_base.pc = 0;
1138 memset(ctx->outputs, 0, sizeof(ctx->outputs));
1139
1140 ctx->bld_base.emit_store = si_llvm_emit_store;
1141 ctx->bld_base.emit_fetch_funcs[TGSI_FILE_IMMEDIATE] = si_llvm_emit_fetch;
1142 ctx->bld_base.emit_fetch_funcs[TGSI_FILE_INPUT] = si_llvm_emit_fetch;
1143 ctx->bld_base.emit_fetch_funcs[TGSI_FILE_TEMPORARY] = si_llvm_emit_fetch;
1144 ctx->bld_base.emit_fetch_funcs[TGSI_FILE_OUTPUT] = si_llvm_emit_fetch;
1145 ctx->bld_base.emit_fetch_funcs[TGSI_FILE_SYSTEM_VALUE] = fetch_system_value;
1146 }
1147
1148 void si_llvm_create_func(struct si_shader_context *ctx,
1149 const char *name,
1150 LLVMTypeRef *return_types, unsigned num_return_elems,
1151 LLVMTypeRef *ParamTypes, unsigned ParamCount)
1152 {
1153 LLVMTypeRef main_fn_type, ret_type;
1154 LLVMBasicBlockRef main_fn_body;
1155 enum si_llvm_calling_convention call_conv;
1156 unsigned real_shader_type;
1157
1158 if (num_return_elems)
1159 ret_type = LLVMStructTypeInContext(ctx->ac.context,
1160 return_types,
1161 num_return_elems, true);
1162 else
1163 ret_type = ctx->voidt;
1164
1165 /* Setup the function */
1166 ctx->return_type = ret_type;
1167 main_fn_type = LLVMFunctionType(ret_type, ParamTypes, ParamCount, 0);
1168 ctx->main_fn = LLVMAddFunction(ctx->gallivm.module, name, main_fn_type);
1169 main_fn_body = LLVMAppendBasicBlockInContext(ctx->ac.context,
1170 ctx->main_fn, "main_body");
1171 LLVMPositionBuilderAtEnd(ctx->ac.builder, main_fn_body);
1172
1173 real_shader_type = ctx->type;
1174
1175 /* LS is merged into HS (TCS), and ES is merged into GS. */
1176 if (ctx->screen->info.chip_class >= GFX9) {
1177 if (ctx->shader->key.as_ls)
1178 real_shader_type = PIPE_SHADER_TESS_CTRL;
1179 else if (ctx->shader->key.as_es)
1180 real_shader_type = PIPE_SHADER_GEOMETRY;
1181 }
1182
1183 switch (real_shader_type) {
1184 case PIPE_SHADER_VERTEX:
1185 case PIPE_SHADER_TESS_EVAL:
1186 call_conv = RADEON_LLVM_AMDGPU_VS;
1187 break;
1188 case PIPE_SHADER_TESS_CTRL:
1189 call_conv = HAVE_LLVM >= 0x0500 ? RADEON_LLVM_AMDGPU_HS :
1190 RADEON_LLVM_AMDGPU_VS;
1191 break;
1192 case PIPE_SHADER_GEOMETRY:
1193 call_conv = RADEON_LLVM_AMDGPU_GS;
1194 break;
1195 case PIPE_SHADER_FRAGMENT:
1196 call_conv = RADEON_LLVM_AMDGPU_PS;
1197 break;
1198 case PIPE_SHADER_COMPUTE:
1199 call_conv = RADEON_LLVM_AMDGPU_CS;
1200 break;
1201 default:
1202 unreachable("Unhandle shader type");
1203 }
1204
1205 LLVMSetFunctionCallConv(ctx->main_fn, call_conv);
1206 }
1207
1208 void si_llvm_optimize_module(struct si_shader_context *ctx)
1209 {
1210 struct gallivm_state *gallivm = &ctx->gallivm;
1211 const char *triple = LLVMGetTarget(gallivm->module);
1212 LLVMTargetLibraryInfoRef target_library_info;
1213
1214 /* Dump LLVM IR before any optimization passes */
1215 if (ctx->screen->debug_flags & DBG(PREOPT_IR) &&
1216 si_can_dump_shader(ctx->screen, ctx->type))
1217 LLVMDumpModule(ctx->gallivm.module);
1218
1219 /* Create the pass manager */
1220 gallivm->passmgr = LLVMCreatePassManager();
1221
1222 target_library_info = gallivm_create_target_library_info(triple);
1223 LLVMAddTargetLibraryInfo(target_library_info, gallivm->passmgr);
1224
1225 if (si_extra_shader_checks(ctx->screen, ctx->type))
1226 LLVMAddVerifierPass(gallivm->passmgr);
1227
1228 LLVMAddAlwaysInlinerPass(gallivm->passmgr);
1229
1230 /* This pass should eliminate all the load and store instructions */
1231 LLVMAddPromoteMemoryToRegisterPass(gallivm->passmgr);
1232
1233 /* Add some optimization passes */
1234 LLVMAddScalarReplAggregatesPass(gallivm->passmgr);
1235 LLVMAddLICMPass(gallivm->passmgr);
1236 LLVMAddAggressiveDCEPass(gallivm->passmgr);
1237 LLVMAddCFGSimplificationPass(gallivm->passmgr);
1238 /* This is recommended by the instruction combining pass. */
1239 LLVMAddEarlyCSEMemSSAPass(gallivm->passmgr);
1240 LLVMAddInstructionCombiningPass(gallivm->passmgr);
1241
1242 /* Run the pass */
1243 LLVMRunPassManager(gallivm->passmgr, ctx->gallivm.module);
1244
1245 LLVMDisposeBuilder(ctx->ac.builder);
1246 LLVMDisposePassManager(gallivm->passmgr);
1247 gallivm_dispose_target_library_info(target_library_info);
1248 }
1249
1250 void si_llvm_dispose(struct si_shader_context *ctx)
1251 {
1252 LLVMDisposeModule(ctx->gallivm.module);
1253 LLVMContextDispose(ctx->gallivm.context);
1254 FREE(ctx->temp_arrays);
1255 ctx->temp_arrays = NULL;
1256 FREE(ctx->temp_array_allocas);
1257 ctx->temp_array_allocas = NULL;
1258 FREE(ctx->temps);
1259 ctx->temps = NULL;
1260 ctx->temps_count = 0;
1261 FREE(ctx->imms);
1262 ctx->imms = NULL;
1263 ctx->imms_num = 0;
1264 ac_llvm_context_dispose(&ctx->ac);
1265 }