radeonsi: add always-inline pass to si_llvm_finalize_module
[mesa.git] / src / gallium / drivers / radeonsi / si_shader_tgsi_setup.c
1 /*
2 * Copyright 2016 Advanced Micro Devices, Inc.
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * on the rights to use, copy, modify, merge, publish, distribute, sub
8 * license, and/or sell copies of the Software, and to permit persons to whom
9 * the Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
19 * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
20 * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
21 * USE OR OTHER DEALINGS IN THE SOFTWARE.
22 */
23
24 #include "si_shader_internal.h"
25 #include "radeon/radeon_elf_util.h"
26
27 #include "gallivm/lp_bld_const.h"
28 #include "gallivm/lp_bld_gather.h"
29 #include "gallivm/lp_bld_flow.h"
30 #include "gallivm/lp_bld_init.h"
31 #include "gallivm/lp_bld_intr.h"
32 #include "gallivm/lp_bld_misc.h"
33 #include "gallivm/lp_bld_swizzle.h"
34 #include "tgsi/tgsi_info.h"
35 #include "tgsi/tgsi_parse.h"
36 #include "util/u_math.h"
37 #include "util/u_memory.h"
38 #include "util/u_debug.h"
39
40 #include <stdio.h>
41 #include <llvm-c/Transforms/IPO.h>
42 #include <llvm-c/Transforms/Scalar.h>
43
44 /* Data for if/else/endif and bgnloop/endloop control flow structures.
45 */
46 struct si_llvm_flow {
47 /* Loop exit or next part of if/else/endif. */
48 LLVMBasicBlockRef next_block;
49 LLVMBasicBlockRef loop_entry_block;
50 };
51
52 #define CPU_STRING_LEN 30
53 #define FS_STRING_LEN 30
54 #define TRIPLE_STRING_LEN 7
55
56 /**
57 * Shader types for the LLVM backend.
58 */
59 enum si_llvm_shader_type {
60 RADEON_LLVM_SHADER_PS = 0,
61 RADEON_LLVM_SHADER_VS = 1,
62 RADEON_LLVM_SHADER_GS = 2,
63 RADEON_LLVM_SHADER_CS = 3,
64 };
65
66 enum si_llvm_calling_convention {
67 RADEON_LLVM_AMDGPU_VS = 87,
68 RADEON_LLVM_AMDGPU_GS = 88,
69 RADEON_LLVM_AMDGPU_PS = 89,
70 RADEON_LLVM_AMDGPU_CS = 90,
71 };
72
73 void si_llvm_add_attribute(LLVMValueRef F, const char *name, int value)
74 {
75 char str[16];
76
77 snprintf(str, sizeof(str), "%i", value);
78 LLVMAddTargetDependentFunctionAttr(F, name, str);
79 }
80
81 /**
82 * Set the shader type we want to compile
83 *
84 * @param type shader type to set
85 */
86 void si_llvm_shader_type(LLVMValueRef F, unsigned type)
87 {
88 enum si_llvm_shader_type llvm_type;
89 enum si_llvm_calling_convention calling_conv;
90
91 switch (type) {
92 case PIPE_SHADER_VERTEX:
93 case PIPE_SHADER_TESS_CTRL:
94 case PIPE_SHADER_TESS_EVAL:
95 llvm_type = RADEON_LLVM_SHADER_VS;
96 calling_conv = RADEON_LLVM_AMDGPU_VS;
97 break;
98 case PIPE_SHADER_GEOMETRY:
99 llvm_type = RADEON_LLVM_SHADER_GS;
100 calling_conv = RADEON_LLVM_AMDGPU_GS;
101 break;
102 case PIPE_SHADER_FRAGMENT:
103 llvm_type = RADEON_LLVM_SHADER_PS;
104 calling_conv = RADEON_LLVM_AMDGPU_PS;
105 break;
106 case PIPE_SHADER_COMPUTE:
107 llvm_type = RADEON_LLVM_SHADER_CS;
108 calling_conv = RADEON_LLVM_AMDGPU_CS;
109 break;
110 default:
111 unreachable("Unhandle shader type");
112 }
113
114 if (HAVE_LLVM >= 0x309)
115 LLVMSetFunctionCallConv(F, calling_conv);
116 else
117 si_llvm_add_attribute(F, "ShaderType", llvm_type);
118 }
119
120 static void init_amdgpu_target()
121 {
122 gallivm_init_llvm_targets();
123 #if HAVE_LLVM < 0x0307
124 LLVMInitializeR600TargetInfo();
125 LLVMInitializeR600Target();
126 LLVMInitializeR600TargetMC();
127 LLVMInitializeR600AsmPrinter();
128 #else
129 LLVMInitializeAMDGPUTargetInfo();
130 LLVMInitializeAMDGPUTarget();
131 LLVMInitializeAMDGPUTargetMC();
132 LLVMInitializeAMDGPUAsmPrinter();
133
134 #endif
135 }
136
137 static once_flag init_amdgpu_target_once_flag = ONCE_FLAG_INIT;
138
139 LLVMTargetRef si_llvm_get_amdgpu_target(const char *triple)
140 {
141 LLVMTargetRef target = NULL;
142 char *err_message = NULL;
143
144 call_once(&init_amdgpu_target_once_flag, init_amdgpu_target);
145
146 if (LLVMGetTargetFromTriple(triple, &target, &err_message)) {
147 fprintf(stderr, "Cannot find target for triple %s ", triple);
148 if (err_message) {
149 fprintf(stderr, "%s\n", err_message);
150 }
151 LLVMDisposeMessage(err_message);
152 return NULL;
153 }
154 return target;
155 }
156
157 struct si_llvm_diagnostics {
158 struct pipe_debug_callback *debug;
159 unsigned retval;
160 };
161
162 static void si_diagnostic_handler(LLVMDiagnosticInfoRef di, void *context)
163 {
164 struct si_llvm_diagnostics *diag = (struct si_llvm_diagnostics *)context;
165 LLVMDiagnosticSeverity severity = LLVMGetDiagInfoSeverity(di);
166 char *description = LLVMGetDiagInfoDescription(di);
167 const char *severity_str = NULL;
168
169 switch (severity) {
170 case LLVMDSError:
171 severity_str = "error";
172 break;
173 case LLVMDSWarning:
174 severity_str = "warning";
175 break;
176 case LLVMDSRemark:
177 severity_str = "remark";
178 break;
179 case LLVMDSNote:
180 severity_str = "note";
181 break;
182 default:
183 severity_str = "unknown";
184 }
185
186 pipe_debug_message(diag->debug, SHADER_INFO,
187 "LLVM diagnostic (%s): %s", severity_str, description);
188
189 if (severity == LLVMDSError) {
190 diag->retval = 1;
191 fprintf(stderr,"LLVM triggered Diagnostic Handler: %s\n", description);
192 }
193
194 LLVMDisposeMessage(description);
195 }
196
197 /**
198 * Compile an LLVM module to machine code.
199 *
200 * @returns 0 for success, 1 for failure
201 */
202 unsigned si_llvm_compile(LLVMModuleRef M, struct radeon_shader_binary *binary,
203 LLVMTargetMachineRef tm,
204 struct pipe_debug_callback *debug)
205 {
206 struct si_llvm_diagnostics diag;
207 char *err;
208 LLVMContextRef llvm_ctx;
209 LLVMMemoryBufferRef out_buffer;
210 unsigned buffer_size;
211 const char *buffer_data;
212 LLVMBool mem_err;
213
214 diag.debug = debug;
215 diag.retval = 0;
216
217 /* Setup Diagnostic Handler*/
218 llvm_ctx = LLVMGetModuleContext(M);
219
220 LLVMContextSetDiagnosticHandler(llvm_ctx, si_diagnostic_handler, &diag);
221
222 /* Compile IR*/
223 mem_err = LLVMTargetMachineEmitToMemoryBuffer(tm, M, LLVMObjectFile, &err,
224 &out_buffer);
225
226 /* Process Errors/Warnings */
227 if (mem_err) {
228 fprintf(stderr, "%s: %s", __FUNCTION__, err);
229 pipe_debug_message(debug, SHADER_INFO,
230 "LLVM emit error: %s", err);
231 FREE(err);
232 diag.retval = 1;
233 goto out;
234 }
235
236 /* Extract Shader Code*/
237 buffer_size = LLVMGetBufferSize(out_buffer);
238 buffer_data = LLVMGetBufferStart(out_buffer);
239
240 radeon_elf_read(buffer_data, buffer_size, binary);
241
242 /* Clean up */
243 LLVMDisposeMemoryBuffer(out_buffer);
244
245 out:
246 if (diag.retval != 0)
247 pipe_debug_message(debug, SHADER_INFO, "LLVM compile failed");
248 return diag.retval;
249 }
250
251 LLVMTypeRef tgsi2llvmtype(struct lp_build_tgsi_context *bld_base,
252 enum tgsi_opcode_type type)
253 {
254 LLVMContextRef ctx = bld_base->base.gallivm->context;
255
256 switch (type) {
257 case TGSI_TYPE_UNSIGNED:
258 case TGSI_TYPE_SIGNED:
259 return LLVMInt32TypeInContext(ctx);
260 case TGSI_TYPE_UNSIGNED64:
261 case TGSI_TYPE_SIGNED64:
262 return LLVMInt64TypeInContext(ctx);
263 case TGSI_TYPE_DOUBLE:
264 return LLVMDoubleTypeInContext(ctx);
265 case TGSI_TYPE_UNTYPED:
266 case TGSI_TYPE_FLOAT:
267 return LLVMFloatTypeInContext(ctx);
268 default: break;
269 }
270 return 0;
271 }
272
273 LLVMValueRef bitcast(struct lp_build_tgsi_context *bld_base,
274 enum tgsi_opcode_type type, LLVMValueRef value)
275 {
276 LLVMBuilderRef builder = bld_base->base.gallivm->builder;
277 LLVMTypeRef dst_type = tgsi2llvmtype(bld_base, type);
278
279 if (dst_type)
280 return LLVMBuildBitCast(builder, value, dst_type, "");
281 else
282 return value;
283 }
284
285 /**
286 * Return a value that is equal to the given i32 \p index if it lies in [0,num)
287 * or an undefined value in the same interval otherwise.
288 */
289 LLVMValueRef si_llvm_bound_index(struct si_shader_context *ctx,
290 LLVMValueRef index,
291 unsigned num)
292 {
293 struct gallivm_state *gallivm = &ctx->gallivm;
294 LLVMBuilderRef builder = gallivm->builder;
295 LLVMValueRef c_max = lp_build_const_int32(gallivm, num - 1);
296 LLVMValueRef cc;
297
298 if (util_is_power_of_two(num)) {
299 index = LLVMBuildAnd(builder, index, c_max, "");
300 } else {
301 /* In theory, this MAX pattern should result in code that is
302 * as good as the bit-wise AND above.
303 *
304 * In practice, LLVM generates worse code (at the time of
305 * writing), because its value tracking is not strong enough.
306 */
307 cc = LLVMBuildICmp(builder, LLVMIntULE, index, c_max, "");
308 index = LLVMBuildSelect(builder, cc, index, c_max, "");
309 }
310
311 return index;
312 }
313
314 static struct si_llvm_flow *
315 get_current_flow(struct si_shader_context *ctx)
316 {
317 if (ctx->flow_depth > 0)
318 return &ctx->flow[ctx->flow_depth - 1];
319 return NULL;
320 }
321
322 static struct si_llvm_flow *
323 get_innermost_loop(struct si_shader_context *ctx)
324 {
325 for (unsigned i = ctx->flow_depth; i > 0; --i) {
326 if (ctx->flow[i - 1].loop_entry_block)
327 return &ctx->flow[i - 1];
328 }
329 return NULL;
330 }
331
332 static struct si_llvm_flow *
333 push_flow(struct si_shader_context *ctx)
334 {
335 struct si_llvm_flow *flow;
336
337 if (ctx->flow_depth >= ctx->flow_depth_max) {
338 unsigned new_max = MAX2(ctx->flow_depth << 1, RADEON_LLVM_INITIAL_CF_DEPTH);
339 ctx->flow = REALLOC(ctx->flow,
340 ctx->flow_depth_max * sizeof(*ctx->flow),
341 new_max * sizeof(*ctx->flow));
342 ctx->flow_depth_max = new_max;
343 }
344
345 flow = &ctx->flow[ctx->flow_depth];
346 ctx->flow_depth++;
347
348 flow->next_block = NULL;
349 flow->loop_entry_block = NULL;
350 return flow;
351 }
352
353 static LLVMValueRef emit_swizzle(struct lp_build_tgsi_context *bld_base,
354 LLVMValueRef value,
355 unsigned swizzle_x,
356 unsigned swizzle_y,
357 unsigned swizzle_z,
358 unsigned swizzle_w)
359 {
360 LLVMValueRef swizzles[4];
361 LLVMTypeRef i32t =
362 LLVMInt32TypeInContext(bld_base->base.gallivm->context);
363
364 swizzles[0] = LLVMConstInt(i32t, swizzle_x, 0);
365 swizzles[1] = LLVMConstInt(i32t, swizzle_y, 0);
366 swizzles[2] = LLVMConstInt(i32t, swizzle_z, 0);
367 swizzles[3] = LLVMConstInt(i32t, swizzle_w, 0);
368
369 return LLVMBuildShuffleVector(bld_base->base.gallivm->builder,
370 value,
371 LLVMGetUndef(LLVMTypeOf(value)),
372 LLVMConstVector(swizzles, 4), "");
373 }
374
375 /**
376 * Return the description of the array covering the given temporary register
377 * index.
378 */
379 static unsigned
380 get_temp_array_id(struct lp_build_tgsi_context *bld_base,
381 unsigned reg_index,
382 const struct tgsi_ind_register *reg)
383 {
384 struct si_shader_context *ctx = si_shader_context(bld_base);
385 unsigned num_arrays = ctx->soa.bld_base.info->array_max[TGSI_FILE_TEMPORARY];
386 unsigned i;
387
388 if (reg && reg->ArrayID > 0 && reg->ArrayID <= num_arrays)
389 return reg->ArrayID;
390
391 for (i = 0; i < num_arrays; i++) {
392 const struct tgsi_array_info *array = &ctx->temp_arrays[i];
393
394 if (reg_index >= array->range.First && reg_index <= array->range.Last)
395 return i + 1;
396 }
397
398 return 0;
399 }
400
401 static struct tgsi_declaration_range
402 get_array_range(struct lp_build_tgsi_context *bld_base,
403 unsigned File, unsigned reg_index,
404 const struct tgsi_ind_register *reg)
405 {
406 struct si_shader_context *ctx = si_shader_context(bld_base);
407 struct tgsi_declaration_range range;
408
409 if (File == TGSI_FILE_TEMPORARY) {
410 unsigned array_id = get_temp_array_id(bld_base, reg_index, reg);
411 if (array_id)
412 return ctx->temp_arrays[array_id - 1].range;
413 }
414
415 range.First = 0;
416 range.Last = bld_base->info->file_max[File];
417 return range;
418 }
419
420 static LLVMValueRef
421 emit_array_index(struct lp_build_tgsi_soa_context *bld,
422 const struct tgsi_ind_register *reg,
423 unsigned offset)
424 {
425 struct gallivm_state *gallivm = bld->bld_base.base.gallivm;
426
427 if (!reg) {
428 return lp_build_const_int32(gallivm, offset);
429 }
430 LLVMValueRef addr = LLVMBuildLoad(gallivm->builder, bld->addr[reg->Index][reg->Swizzle], "");
431 return LLVMBuildAdd(gallivm->builder, addr, lp_build_const_int32(gallivm, offset), "");
432 }
433
434 /**
435 * For indirect registers, construct a pointer directly to the requested
436 * element using getelementptr if possible.
437 *
438 * Returns NULL if the insertelement/extractelement fallback for array access
439 * must be used.
440 */
441 static LLVMValueRef
442 get_pointer_into_array(struct si_shader_context *ctx,
443 unsigned file,
444 unsigned swizzle,
445 unsigned reg_index,
446 const struct tgsi_ind_register *reg_indirect)
447 {
448 unsigned array_id;
449 struct tgsi_array_info *array;
450 struct gallivm_state *gallivm = ctx->soa.bld_base.base.gallivm;
451 LLVMBuilderRef builder = gallivm->builder;
452 LLVMValueRef idxs[2];
453 LLVMValueRef index;
454 LLVMValueRef alloca;
455
456 if (file != TGSI_FILE_TEMPORARY)
457 return NULL;
458
459 array_id = get_temp_array_id(&ctx->soa.bld_base, reg_index, reg_indirect);
460 if (!array_id)
461 return NULL;
462
463 alloca = ctx->temp_array_allocas[array_id - 1];
464 if (!alloca)
465 return NULL;
466
467 array = &ctx->temp_arrays[array_id - 1];
468
469 if (!(array->writemask & (1 << swizzle)))
470 return ctx->undef_alloca;
471
472 index = emit_array_index(&ctx->soa, reg_indirect,
473 reg_index - ctx->temp_arrays[array_id - 1].range.First);
474
475 /* Ensure that the index is within a valid range, to guard against
476 * VM faults and overwriting critical data (e.g. spilled resource
477 * descriptors).
478 *
479 * TODO It should be possible to avoid the additional instructions
480 * if LLVM is changed so that it guarantuees:
481 * 1. the scratch space descriptor isolates the current wave (this
482 * could even save the scratch offset SGPR at the cost of an
483 * additional SALU instruction)
484 * 2. the memory for allocas must be allocated at the _end_ of the
485 * scratch space (after spilled registers)
486 */
487 index = si_llvm_bound_index(ctx, index, array->range.Last - array->range.First + 1);
488
489 index = LLVMBuildMul(
490 builder, index,
491 lp_build_const_int32(gallivm, util_bitcount(array->writemask)),
492 "");
493 index = LLVMBuildAdd(
494 builder, index,
495 lp_build_const_int32(
496 gallivm,
497 util_bitcount(array->writemask & ((1 << swizzle) - 1))),
498 "");
499 idxs[0] = ctx->soa.bld_base.uint_bld.zero;
500 idxs[1] = index;
501 return LLVMBuildGEP(builder, alloca, idxs, 2, "");
502 }
503
504 LLVMValueRef
505 si_llvm_emit_fetch_64bit(struct lp_build_tgsi_context *bld_base,
506 enum tgsi_opcode_type type,
507 LLVMValueRef ptr,
508 LLVMValueRef ptr2)
509 {
510 LLVMBuilderRef builder = bld_base->base.gallivm->builder;
511 LLVMValueRef result;
512
513 result = LLVMGetUndef(LLVMVectorType(LLVMIntTypeInContext(bld_base->base.gallivm->context, 32), bld_base->base.type.length * 2));
514
515 result = LLVMBuildInsertElement(builder,
516 result,
517 bitcast(bld_base, TGSI_TYPE_UNSIGNED, ptr),
518 bld_base->int_bld.zero, "");
519 result = LLVMBuildInsertElement(builder,
520 result,
521 bitcast(bld_base, TGSI_TYPE_UNSIGNED, ptr2),
522 bld_base->int_bld.one, "");
523 return bitcast(bld_base, type, result);
524 }
525
526 static LLVMValueRef
527 emit_array_fetch(struct lp_build_tgsi_context *bld_base,
528 unsigned File, enum tgsi_opcode_type type,
529 struct tgsi_declaration_range range,
530 unsigned swizzle)
531 {
532 struct lp_build_tgsi_soa_context *bld = lp_soa_context(bld_base);
533 struct gallivm_state *gallivm = bld->bld_base.base.gallivm;
534 LLVMBuilderRef builder = bld_base->base.gallivm->builder;
535
536 unsigned i, size = range.Last - range.First + 1;
537 LLVMTypeRef vec = LLVMVectorType(tgsi2llvmtype(bld_base, type), size);
538 LLVMValueRef result = LLVMGetUndef(vec);
539
540 struct tgsi_full_src_register tmp_reg = {};
541 tmp_reg.Register.File = File;
542
543 for (i = 0; i < size; ++i) {
544 tmp_reg.Register.Index = i + range.First;
545 LLVMValueRef temp = si_llvm_emit_fetch(bld_base, &tmp_reg, type, swizzle);
546 result = LLVMBuildInsertElement(builder, result, temp,
547 lp_build_const_int32(gallivm, i), "array_vector");
548 }
549 return result;
550 }
551
552 static LLVMValueRef
553 load_value_from_array(struct lp_build_tgsi_context *bld_base,
554 unsigned file,
555 enum tgsi_opcode_type type,
556 unsigned swizzle,
557 unsigned reg_index,
558 const struct tgsi_ind_register *reg_indirect)
559 {
560 struct si_shader_context *ctx = si_shader_context(bld_base);
561 struct lp_build_tgsi_soa_context *bld = lp_soa_context(bld_base);
562 struct gallivm_state *gallivm = bld_base->base.gallivm;
563 LLVMBuilderRef builder = gallivm->builder;
564 LLVMValueRef ptr;
565
566 ptr = get_pointer_into_array(ctx, file, swizzle, reg_index, reg_indirect);
567 if (ptr) {
568 LLVMValueRef val = LLVMBuildLoad(builder, ptr, "");
569 if (tgsi_type_is_64bit(type)) {
570 LLVMValueRef ptr_hi, val_hi;
571 ptr_hi = LLVMBuildGEP(builder, ptr, &bld_base->uint_bld.one, 1, "");
572 val_hi = LLVMBuildLoad(builder, ptr_hi, "");
573 val = si_llvm_emit_fetch_64bit(bld_base, type, val, val_hi);
574 }
575
576 return val;
577 } else {
578 struct tgsi_declaration_range range =
579 get_array_range(bld_base, file, reg_index, reg_indirect);
580 LLVMValueRef index =
581 emit_array_index(bld, reg_indirect, reg_index - range.First);
582 LLVMValueRef array =
583 emit_array_fetch(bld_base, file, type, range, swizzle);
584 return LLVMBuildExtractElement(builder, array, index, "");
585 }
586 }
587
588 static void
589 store_value_to_array(struct lp_build_tgsi_context *bld_base,
590 LLVMValueRef value,
591 unsigned file,
592 unsigned chan_index,
593 unsigned reg_index,
594 const struct tgsi_ind_register *reg_indirect)
595 {
596 struct si_shader_context *ctx = si_shader_context(bld_base);
597 struct lp_build_tgsi_soa_context *bld = lp_soa_context(bld_base);
598 struct gallivm_state *gallivm = bld_base->base.gallivm;
599 LLVMBuilderRef builder = gallivm->builder;
600 LLVMValueRef ptr;
601
602 ptr = get_pointer_into_array(ctx, file, chan_index, reg_index, reg_indirect);
603 if (ptr) {
604 LLVMBuildStore(builder, value, ptr);
605 } else {
606 unsigned i, size;
607 struct tgsi_declaration_range range = get_array_range(bld_base, file, reg_index, reg_indirect);
608 LLVMValueRef index = emit_array_index(bld, reg_indirect, reg_index - range.First);
609 LLVMValueRef array =
610 emit_array_fetch(bld_base, file, TGSI_TYPE_FLOAT, range, chan_index);
611 LLVMValueRef temp_ptr;
612
613 array = LLVMBuildInsertElement(builder, array, value, index, "");
614
615 size = range.Last - range.First + 1;
616 for (i = 0; i < size; ++i) {
617 switch(file) {
618 case TGSI_FILE_OUTPUT:
619 temp_ptr = bld->outputs[i + range.First][chan_index];
620 break;
621
622 case TGSI_FILE_TEMPORARY:
623 if (range.First + i >= ctx->temps_count)
624 continue;
625 temp_ptr = ctx->temps[(i + range.First) * TGSI_NUM_CHANNELS + chan_index];
626 break;
627
628 default:
629 continue;
630 }
631 value = LLVMBuildExtractElement(builder, array,
632 lp_build_const_int32(gallivm, i), "");
633 LLVMBuildStore(builder, value, temp_ptr);
634 }
635 }
636 }
637
638 LLVMValueRef si_llvm_emit_fetch(struct lp_build_tgsi_context *bld_base,
639 const struct tgsi_full_src_register *reg,
640 enum tgsi_opcode_type type,
641 unsigned swizzle)
642 {
643 struct si_shader_context *ctx = si_shader_context(bld_base);
644 struct lp_build_tgsi_soa_context *bld = lp_soa_context(bld_base);
645 LLVMBuilderRef builder = bld_base->base.gallivm->builder;
646 LLVMValueRef result = NULL, ptr, ptr2;
647
648 if (swizzle == ~0) {
649 LLVMValueRef values[TGSI_NUM_CHANNELS];
650 unsigned chan;
651 for (chan = 0; chan < TGSI_NUM_CHANNELS; chan++) {
652 values[chan] = si_llvm_emit_fetch(bld_base, reg, type, chan);
653 }
654 return lp_build_gather_values(bld_base->base.gallivm, values,
655 TGSI_NUM_CHANNELS);
656 }
657
658 if (reg->Register.Indirect) {
659 LLVMValueRef load = load_value_from_array(bld_base, reg->Register.File, type,
660 swizzle, reg->Register.Index, &reg->Indirect);
661 return bitcast(bld_base, type, load);
662 }
663
664 switch(reg->Register.File) {
665 case TGSI_FILE_IMMEDIATE: {
666 LLVMTypeRef ctype = tgsi2llvmtype(bld_base, type);
667 if (tgsi_type_is_64bit(type)) {
668 result = LLVMGetUndef(LLVMVectorType(LLVMIntTypeInContext(bld_base->base.gallivm->context, 32), bld_base->base.type.length * 2));
669 result = LLVMConstInsertElement(result,
670 bld->immediates[reg->Register.Index][swizzle],
671 bld_base->int_bld.zero);
672 result = LLVMConstInsertElement(result,
673 bld->immediates[reg->Register.Index][swizzle + 1],
674 bld_base->int_bld.one);
675 return LLVMConstBitCast(result, ctype);
676 } else {
677 return LLVMConstBitCast(bld->immediates[reg->Register.Index][swizzle], ctype);
678 }
679 }
680
681 case TGSI_FILE_INPUT: {
682 unsigned index = reg->Register.Index;
683 LLVMValueRef input[4];
684
685 /* I don't think doing this for vertex shaders is beneficial.
686 * For those, we want to make sure the VMEM loads are executed
687 * only once. Fragment shaders don't care much, because
688 * v_interp instructions are much cheaper than VMEM loads.
689 */
690 if (ctx->soa.bld_base.info->processor == PIPE_SHADER_FRAGMENT)
691 ctx->load_input(ctx, index, &ctx->input_decls[index], input);
692 else
693 memcpy(input, &ctx->inputs[index * 4], sizeof(input));
694
695 result = input[swizzle];
696
697 if (tgsi_type_is_64bit(type)) {
698 ptr = result;
699 ptr2 = input[swizzle + 1];
700 return si_llvm_emit_fetch_64bit(bld_base, type, ptr, ptr2);
701 }
702 break;
703 }
704
705 case TGSI_FILE_TEMPORARY:
706 if (reg->Register.Index >= ctx->temps_count)
707 return LLVMGetUndef(tgsi2llvmtype(bld_base, type));
708 ptr = ctx->temps[reg->Register.Index * TGSI_NUM_CHANNELS + swizzle];
709 if (tgsi_type_is_64bit(type)) {
710 ptr2 = ctx->temps[reg->Register.Index * TGSI_NUM_CHANNELS + swizzle + 1];
711 return si_llvm_emit_fetch_64bit(bld_base, type,
712 LLVMBuildLoad(builder, ptr, ""),
713 LLVMBuildLoad(builder, ptr2, ""));
714 }
715 result = LLVMBuildLoad(builder, ptr, "");
716 break;
717
718 case TGSI_FILE_OUTPUT:
719 ptr = lp_get_output_ptr(bld, reg->Register.Index, swizzle);
720 if (tgsi_type_is_64bit(type)) {
721 ptr2 = lp_get_output_ptr(bld, reg->Register.Index, swizzle + 1);
722 return si_llvm_emit_fetch_64bit(bld_base, type,
723 LLVMBuildLoad(builder, ptr, ""),
724 LLVMBuildLoad(builder, ptr2, ""));
725 }
726 result = LLVMBuildLoad(builder, ptr, "");
727 break;
728
729 default:
730 return LLVMGetUndef(tgsi2llvmtype(bld_base, type));
731 }
732
733 return bitcast(bld_base, type, result);
734 }
735
736 static LLVMValueRef fetch_system_value(struct lp_build_tgsi_context *bld_base,
737 const struct tgsi_full_src_register *reg,
738 enum tgsi_opcode_type type,
739 unsigned swizzle)
740 {
741 struct si_shader_context *ctx = si_shader_context(bld_base);
742 struct gallivm_state *gallivm = bld_base->base.gallivm;
743
744 LLVMValueRef cval = ctx->system_values[reg->Register.Index];
745 if (LLVMGetTypeKind(LLVMTypeOf(cval)) == LLVMVectorTypeKind) {
746 cval = LLVMBuildExtractElement(gallivm->builder, cval,
747 lp_build_const_int32(gallivm, swizzle), "");
748 }
749 return bitcast(bld_base, type, cval);
750 }
751
752 static void emit_declaration(struct lp_build_tgsi_context *bld_base,
753 const struct tgsi_full_declaration *decl)
754 {
755 struct si_shader_context *ctx = si_shader_context(bld_base);
756 LLVMBuilderRef builder = bld_base->base.gallivm->builder;
757 unsigned first, last, i;
758 switch(decl->Declaration.File) {
759 case TGSI_FILE_ADDRESS:
760 {
761 unsigned idx;
762 for (idx = decl->Range.First; idx <= decl->Range.Last; idx++) {
763 unsigned chan;
764 for (chan = 0; chan < TGSI_NUM_CHANNELS; chan++) {
765 ctx->soa.addr[idx][chan] = lp_build_alloca_undef(
766 &ctx->gallivm,
767 ctx->soa.bld_base.uint_bld.elem_type, "");
768 }
769 }
770 break;
771 }
772
773 case TGSI_FILE_TEMPORARY:
774 {
775 char name[16] = "";
776 LLVMValueRef array_alloca = NULL;
777 unsigned decl_size;
778 unsigned writemask = decl->Declaration.UsageMask;
779 first = decl->Range.First;
780 last = decl->Range.Last;
781 decl_size = 4 * ((last - first) + 1);
782
783 if (decl->Declaration.Array) {
784 unsigned id = decl->Array.ArrayID - 1;
785 unsigned array_size;
786
787 writemask &= ctx->temp_arrays[id].writemask;
788 ctx->temp_arrays[id].writemask = writemask;
789 array_size = ((last - first) + 1) * util_bitcount(writemask);
790
791 /* If the array has more than 16 elements, store it
792 * in memory using an alloca that spans the entire
793 * array.
794 *
795 * Otherwise, store each array element individually.
796 * We will then generate vectors (per-channel, up to
797 * <16 x float> if the usagemask is a single bit) for
798 * indirect addressing.
799 *
800 * Note that 16 is the number of vector elements that
801 * LLVM will store in a register, so theoretically an
802 * array with up to 4 * 16 = 64 elements could be
803 * handled this way, but whether that's a good idea
804 * depends on VGPR register pressure elsewhere.
805 *
806 * FIXME: We shouldn't need to have the non-alloca
807 * code path for arrays. LLVM should be smart enough to
808 * promote allocas into registers when profitable.
809 *
810 * LLVM 3.8 crashes with this.
811 */
812 if (HAVE_LLVM >= 0x0309 && array_size > 16) {
813 array_alloca = LLVMBuildAlloca(builder,
814 LLVMArrayType(bld_base->base.vec_type,
815 array_size), "array");
816 ctx->temp_array_allocas[id] = array_alloca;
817 }
818 }
819
820 if (!ctx->temps_count) {
821 ctx->temps_count = bld_base->info->file_max[TGSI_FILE_TEMPORARY] + 1;
822 ctx->temps = MALLOC(TGSI_NUM_CHANNELS * ctx->temps_count * sizeof(LLVMValueRef));
823 }
824 if (!array_alloca) {
825 for (i = 0; i < decl_size; ++i) {
826 #ifdef DEBUG
827 snprintf(name, sizeof(name), "TEMP%d.%c",
828 first + i / 4, "xyzw"[i % 4]);
829 #endif
830 ctx->temps[first * TGSI_NUM_CHANNELS + i] =
831 lp_build_alloca_undef(bld_base->base.gallivm,
832 bld_base->base.vec_type,
833 name);
834 }
835 } else {
836 LLVMValueRef idxs[2] = {
837 bld_base->uint_bld.zero,
838 NULL
839 };
840 unsigned j = 0;
841
842 if (writemask != TGSI_WRITEMASK_XYZW &&
843 !ctx->undef_alloca) {
844 /* Create a dummy alloca. We use it so that we
845 * have a pointer that is safe to load from if
846 * a shader ever reads from a channel that
847 * it never writes to.
848 */
849 ctx->undef_alloca = lp_build_alloca_undef(
850 bld_base->base.gallivm,
851 bld_base->base.vec_type, "undef");
852 }
853
854 for (i = 0; i < decl_size; ++i) {
855 LLVMValueRef ptr;
856 if (writemask & (1 << (i % 4))) {
857 #ifdef DEBUG
858 snprintf(name, sizeof(name), "TEMP%d.%c",
859 first + i / 4, "xyzw"[i % 4]);
860 #endif
861 idxs[1] = lp_build_const_int32(bld_base->base.gallivm, j);
862 ptr = LLVMBuildGEP(builder, array_alloca, idxs, 2, name);
863 j++;
864 } else {
865 ptr = ctx->undef_alloca;
866 }
867 ctx->temps[first * TGSI_NUM_CHANNELS + i] = ptr;
868 }
869 }
870 break;
871 }
872 case TGSI_FILE_INPUT:
873 {
874 unsigned idx;
875 for (idx = decl->Range.First; idx <= decl->Range.Last; idx++) {
876 if (ctx->load_input &&
877 ctx->input_decls[idx].Declaration.File != TGSI_FILE_INPUT) {
878 ctx->input_decls[idx] = *decl;
879
880 if (bld_base->info->processor != PIPE_SHADER_FRAGMENT)
881 ctx->load_input(ctx, idx, decl,
882 &ctx->inputs[idx * 4]);
883 }
884 }
885 }
886 break;
887
888 case TGSI_FILE_SYSTEM_VALUE:
889 {
890 unsigned idx;
891 for (idx = decl->Range.First; idx <= decl->Range.Last; idx++) {
892 ctx->load_system_value(ctx, idx, decl);
893 }
894 }
895 break;
896
897 case TGSI_FILE_OUTPUT:
898 {
899 char name[16] = "";
900 unsigned idx;
901 for (idx = decl->Range.First; idx <= decl->Range.Last; idx++) {
902 unsigned chan;
903 assert(idx < RADEON_LLVM_MAX_OUTPUTS);
904 if (ctx->soa.outputs[idx][0])
905 continue;
906 for (chan = 0; chan < TGSI_NUM_CHANNELS; chan++) {
907 #ifdef DEBUG
908 snprintf(name, sizeof(name), "OUT%d.%c",
909 idx, "xyzw"[chan % 4]);
910 #endif
911 ctx->soa.outputs[idx][chan] = lp_build_alloca_undef(
912 &ctx->gallivm,
913 ctx->soa.bld_base.base.elem_type, name);
914 }
915 }
916 break;
917 }
918
919 case TGSI_FILE_MEMORY:
920 ctx->declare_memory_region(ctx, decl);
921 break;
922
923 default:
924 break;
925 }
926 }
927
928 LLVMValueRef si_llvm_saturate(struct lp_build_tgsi_context *bld_base,
929 LLVMValueRef value)
930 {
931 struct lp_build_emit_data clamp_emit_data;
932
933 memset(&clamp_emit_data, 0, sizeof(clamp_emit_data));
934 clamp_emit_data.arg_count = 3;
935 clamp_emit_data.args[0] = value;
936 clamp_emit_data.args[2] = bld_base->base.one;
937 clamp_emit_data.args[1] = bld_base->base.zero;
938
939 return lp_build_emit_llvm(bld_base, TGSI_OPCODE_CLAMP,
940 &clamp_emit_data);
941 }
942
943 void si_llvm_emit_store(struct lp_build_tgsi_context *bld_base,
944 const struct tgsi_full_instruction *inst,
945 const struct tgsi_opcode_info *info,
946 LLVMValueRef dst[4])
947 {
948 struct si_shader_context *ctx = si_shader_context(bld_base);
949 struct lp_build_tgsi_soa_context *bld = lp_soa_context(bld_base);
950 struct gallivm_state *gallivm = bld->bld_base.base.gallivm;
951 const struct tgsi_full_dst_register *reg = &inst->Dst[0];
952 LLVMBuilderRef builder = bld->bld_base.base.gallivm->builder;
953 LLVMValueRef temp_ptr, temp_ptr2 = NULL;
954 unsigned chan, chan_index;
955 bool is_vec_store = false;
956 enum tgsi_opcode_type dtype = tgsi_opcode_infer_dst_type(inst->Instruction.Opcode);
957
958 if (dst[0]) {
959 LLVMTypeKind k = LLVMGetTypeKind(LLVMTypeOf(dst[0]));
960 is_vec_store = (k == LLVMVectorTypeKind);
961 }
962
963 if (is_vec_store) {
964 LLVMValueRef values[4] = {};
965 TGSI_FOR_EACH_DST0_ENABLED_CHANNEL(inst, chan) {
966 LLVMValueRef index = lp_build_const_int32(gallivm, chan);
967 values[chan] = LLVMBuildExtractElement(gallivm->builder,
968 dst[0], index, "");
969 }
970 bld_base->emit_store(bld_base, inst, info, values);
971 return;
972 }
973
974 TGSI_FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
975 LLVMValueRef value = dst[chan_index];
976
977 if (tgsi_type_is_64bit(dtype) && (chan_index == 1 || chan_index == 3))
978 continue;
979 if (inst->Instruction.Saturate)
980 value = si_llvm_saturate(bld_base, value);
981
982 if (reg->Register.File == TGSI_FILE_ADDRESS) {
983 temp_ptr = bld->addr[reg->Register.Index][chan_index];
984 LLVMBuildStore(builder, value, temp_ptr);
985 continue;
986 }
987
988 if (!tgsi_type_is_64bit(dtype))
989 value = bitcast(bld_base, TGSI_TYPE_FLOAT, value);
990
991 if (reg->Register.Indirect) {
992 unsigned file = reg->Register.File;
993 unsigned reg_index = reg->Register.Index;
994 store_value_to_array(bld_base, value, file, chan_index,
995 reg_index, &reg->Indirect);
996 } else {
997 switch(reg->Register.File) {
998 case TGSI_FILE_OUTPUT:
999 temp_ptr = bld->outputs[reg->Register.Index][chan_index];
1000 if (tgsi_type_is_64bit(dtype))
1001 temp_ptr2 = bld->outputs[reg->Register.Index][chan_index + 1];
1002 break;
1003
1004 case TGSI_FILE_TEMPORARY:
1005 {
1006 if (reg->Register.Index >= ctx->temps_count)
1007 continue;
1008
1009 temp_ptr = ctx->temps[ TGSI_NUM_CHANNELS * reg->Register.Index + chan_index];
1010 if (tgsi_type_is_64bit(dtype))
1011 temp_ptr2 = ctx->temps[ TGSI_NUM_CHANNELS * reg->Register.Index + chan_index + 1];
1012
1013 break;
1014 }
1015 default:
1016 return;
1017 }
1018 if (!tgsi_type_is_64bit(dtype))
1019 LLVMBuildStore(builder, value, temp_ptr);
1020 else {
1021 LLVMValueRef ptr = LLVMBuildBitCast(builder, value,
1022 LLVMVectorType(LLVMIntTypeInContext(bld_base->base.gallivm->context, 32), 2), "");
1023 LLVMValueRef val2;
1024 value = LLVMBuildExtractElement(builder, ptr,
1025 bld_base->uint_bld.zero, "");
1026 val2 = LLVMBuildExtractElement(builder, ptr,
1027 bld_base->uint_bld.one, "");
1028
1029 LLVMBuildStore(builder, bitcast(bld_base, TGSI_TYPE_FLOAT, value), temp_ptr);
1030 LLVMBuildStore(builder, bitcast(bld_base, TGSI_TYPE_FLOAT, val2), temp_ptr2);
1031 }
1032 }
1033 }
1034 }
1035
1036 static void set_basicblock_name(LLVMBasicBlockRef bb, const char *base, int pc)
1037 {
1038 char buf[32];
1039 /* Subtract 1 so that the number shown is that of the corresponding
1040 * opcode in the TGSI dump, e.g. an if block has the same suffix as
1041 * the instruction number of the corresponding TGSI IF.
1042 */
1043 snprintf(buf, sizeof(buf), "%s%d", base, pc - 1);
1044 LLVMSetValueName(LLVMBasicBlockAsValue(bb), buf);
1045 }
1046
1047 /* Append a basic block at the level of the parent flow.
1048 */
1049 static LLVMBasicBlockRef append_basic_block(struct si_shader_context *ctx,
1050 const char *name)
1051 {
1052 struct gallivm_state *gallivm = &ctx->gallivm;
1053
1054 assert(ctx->flow_depth >= 1);
1055
1056 if (ctx->flow_depth >= 2) {
1057 struct si_llvm_flow *flow = &ctx->flow[ctx->flow_depth - 2];
1058
1059 return LLVMInsertBasicBlockInContext(gallivm->context,
1060 flow->next_block, name);
1061 }
1062
1063 return LLVMAppendBasicBlockInContext(gallivm->context, ctx->main_fn, name);
1064 }
1065
1066 /* Emit a branch to the given default target for the current block if
1067 * applicable -- that is, if the current block does not already contain a
1068 * branch from a break or continue.
1069 */
1070 static void emit_default_branch(LLVMBuilderRef builder, LLVMBasicBlockRef target)
1071 {
1072 if (!LLVMGetBasicBlockTerminator(LLVMGetInsertBlock(builder)))
1073 LLVMBuildBr(builder, target);
1074 }
1075
1076 static void bgnloop_emit(const struct lp_build_tgsi_action *action,
1077 struct lp_build_tgsi_context *bld_base,
1078 struct lp_build_emit_data *emit_data)
1079 {
1080 struct si_shader_context *ctx = si_shader_context(bld_base);
1081 struct gallivm_state *gallivm = bld_base->base.gallivm;
1082 struct si_llvm_flow *flow = push_flow(ctx);
1083 flow->loop_entry_block = append_basic_block(ctx, "LOOP");
1084 flow->next_block = append_basic_block(ctx, "ENDLOOP");
1085 set_basicblock_name(flow->loop_entry_block, "loop", bld_base->pc);
1086 LLVMBuildBr(gallivm->builder, flow->loop_entry_block);
1087 LLVMPositionBuilderAtEnd(gallivm->builder, flow->loop_entry_block);
1088 }
1089
1090 static void brk_emit(const struct lp_build_tgsi_action *action,
1091 struct lp_build_tgsi_context *bld_base,
1092 struct lp_build_emit_data *emit_data)
1093 {
1094 struct si_shader_context *ctx = si_shader_context(bld_base);
1095 struct gallivm_state *gallivm = bld_base->base.gallivm;
1096 struct si_llvm_flow *flow = get_innermost_loop(ctx);
1097
1098 LLVMBuildBr(gallivm->builder, flow->next_block);
1099 }
1100
1101 static void cont_emit(const struct lp_build_tgsi_action *action,
1102 struct lp_build_tgsi_context *bld_base,
1103 struct lp_build_emit_data *emit_data)
1104 {
1105 struct si_shader_context *ctx = si_shader_context(bld_base);
1106 struct gallivm_state *gallivm = bld_base->base.gallivm;
1107 struct si_llvm_flow *flow = get_innermost_loop(ctx);
1108
1109 LLVMBuildBr(gallivm->builder, flow->loop_entry_block);
1110 }
1111
1112 static void else_emit(const struct lp_build_tgsi_action *action,
1113 struct lp_build_tgsi_context *bld_base,
1114 struct lp_build_emit_data *emit_data)
1115 {
1116 struct si_shader_context *ctx = si_shader_context(bld_base);
1117 struct gallivm_state *gallivm = bld_base->base.gallivm;
1118 struct si_llvm_flow *current_branch = get_current_flow(ctx);
1119 LLVMBasicBlockRef endif_block;
1120
1121 assert(!current_branch->loop_entry_block);
1122
1123 endif_block = append_basic_block(ctx, "ENDIF");
1124 emit_default_branch(gallivm->builder, endif_block);
1125
1126 LLVMPositionBuilderAtEnd(gallivm->builder, current_branch->next_block);
1127 set_basicblock_name(current_branch->next_block, "else", bld_base->pc);
1128
1129 current_branch->next_block = endif_block;
1130 }
1131
1132 static void endif_emit(const struct lp_build_tgsi_action *action,
1133 struct lp_build_tgsi_context *bld_base,
1134 struct lp_build_emit_data *emit_data)
1135 {
1136 struct si_shader_context *ctx = si_shader_context(bld_base);
1137 struct gallivm_state *gallivm = bld_base->base.gallivm;
1138 struct si_llvm_flow *current_branch = get_current_flow(ctx);
1139
1140 assert(!current_branch->loop_entry_block);
1141
1142 emit_default_branch(gallivm->builder, current_branch->next_block);
1143 LLVMPositionBuilderAtEnd(gallivm->builder, current_branch->next_block);
1144 set_basicblock_name(current_branch->next_block, "endif", bld_base->pc);
1145
1146 ctx->flow_depth--;
1147 }
1148
1149 static void endloop_emit(const struct lp_build_tgsi_action *action,
1150 struct lp_build_tgsi_context *bld_base,
1151 struct lp_build_emit_data *emit_data)
1152 {
1153 struct si_shader_context *ctx = si_shader_context(bld_base);
1154 struct gallivm_state *gallivm = bld_base->base.gallivm;
1155 struct si_llvm_flow *current_loop = get_current_flow(ctx);
1156
1157 assert(current_loop->loop_entry_block);
1158
1159 emit_default_branch(gallivm->builder, current_loop->loop_entry_block);
1160
1161 LLVMPositionBuilderAtEnd(gallivm->builder, current_loop->next_block);
1162 set_basicblock_name(current_loop->next_block, "endloop", bld_base->pc);
1163 ctx->flow_depth--;
1164 }
1165
1166 static void if_cond_emit(const struct lp_build_tgsi_action *action,
1167 struct lp_build_tgsi_context *bld_base,
1168 struct lp_build_emit_data *emit_data,
1169 LLVMValueRef cond)
1170 {
1171 struct si_shader_context *ctx = si_shader_context(bld_base);
1172 struct gallivm_state *gallivm = bld_base->base.gallivm;
1173 struct si_llvm_flow *flow = push_flow(ctx);
1174 LLVMBasicBlockRef if_block;
1175
1176 if_block = append_basic_block(ctx, "IF");
1177 flow->next_block = append_basic_block(ctx, "ELSE");
1178 set_basicblock_name(if_block, "if", bld_base->pc);
1179 LLVMBuildCondBr(gallivm->builder, cond, if_block, flow->next_block);
1180 LLVMPositionBuilderAtEnd(gallivm->builder, if_block);
1181 }
1182
1183 static void if_emit(const struct lp_build_tgsi_action *action,
1184 struct lp_build_tgsi_context *bld_base,
1185 struct lp_build_emit_data *emit_data)
1186 {
1187 struct gallivm_state *gallivm = bld_base->base.gallivm;
1188 LLVMValueRef cond;
1189
1190 cond = LLVMBuildFCmp(gallivm->builder, LLVMRealUNE,
1191 emit_data->args[0],
1192 bld_base->base.zero, "");
1193
1194 if_cond_emit(action, bld_base, emit_data, cond);
1195 }
1196
1197 static void uif_emit(const struct lp_build_tgsi_action *action,
1198 struct lp_build_tgsi_context *bld_base,
1199 struct lp_build_emit_data *emit_data)
1200 {
1201 struct gallivm_state *gallivm = bld_base->base.gallivm;
1202 LLVMValueRef cond;
1203
1204 cond = LLVMBuildICmp(gallivm->builder, LLVMIntNE,
1205 bitcast(bld_base, TGSI_TYPE_UNSIGNED, emit_data->args[0]),
1206 bld_base->int_bld.zero, "");
1207
1208 if_cond_emit(action, bld_base, emit_data, cond);
1209 }
1210
1211 static void emit_immediate(struct lp_build_tgsi_context *bld_base,
1212 const struct tgsi_full_immediate *imm)
1213 {
1214 unsigned i;
1215 struct si_shader_context *ctx = si_shader_context(bld_base);
1216
1217 for (i = 0; i < 4; ++i) {
1218 ctx->soa.immediates[ctx->soa.num_immediates][i] =
1219 LLVMConstInt(bld_base->uint_bld.elem_type, imm->u[i].Uint, false );
1220 }
1221
1222 ctx->soa.num_immediates++;
1223 }
1224
1225 void si_llvm_context_init(struct si_shader_context *ctx, const char *triple,
1226 const struct tgsi_shader_info *info,
1227 const struct tgsi_token *tokens)
1228 {
1229 struct lp_type type;
1230
1231 /* Initialize the gallivm object:
1232 * We are only using the module, context, and builder fields of this struct.
1233 * This should be enough for us to be able to pass our gallivm struct to the
1234 * helper functions in the gallivm module.
1235 */
1236 memset(&ctx->gallivm, 0, sizeof (ctx->gallivm));
1237 memset(&ctx->soa, 0, sizeof(ctx->soa));
1238 ctx->gallivm.context = LLVMContextCreate();
1239 ctx->gallivm.module = LLVMModuleCreateWithNameInContext("tgsi",
1240 ctx->gallivm.context);
1241 LLVMSetTarget(ctx->gallivm.module, triple);
1242 ctx->gallivm.builder = LLVMCreateBuilderInContext(ctx->gallivm.context);
1243
1244 struct lp_build_tgsi_context *bld_base = &ctx->soa.bld_base;
1245
1246 bld_base->info = info;
1247
1248 if (info && info->array_max[TGSI_FILE_TEMPORARY] > 0) {
1249 int size = info->array_max[TGSI_FILE_TEMPORARY];
1250
1251 ctx->temp_arrays = CALLOC(size, sizeof(ctx->temp_arrays[0]));
1252 ctx->temp_array_allocas = CALLOC(size, sizeof(ctx->temp_array_allocas[0]));
1253
1254 if (tokens)
1255 tgsi_scan_arrays(tokens, TGSI_FILE_TEMPORARY, size,
1256 ctx->temp_arrays);
1257 }
1258
1259 type.floating = true;
1260 type.fixed = false;
1261 type.sign = true;
1262 type.norm = false;
1263 type.width = 32;
1264 type.length = 1;
1265
1266 lp_build_context_init(&bld_base->base, &ctx->gallivm, type);
1267 lp_build_context_init(&ctx->soa.bld_base.uint_bld, &ctx->gallivm, lp_uint_type(type));
1268 lp_build_context_init(&ctx->soa.bld_base.int_bld, &ctx->gallivm, lp_int_type(type));
1269 type.width *= 2;
1270 lp_build_context_init(&ctx->soa.bld_base.dbl_bld, &ctx->gallivm, type);
1271 lp_build_context_init(&ctx->soa.bld_base.uint64_bld, &ctx->gallivm, lp_uint_type(type));
1272 lp_build_context_init(&ctx->soa.bld_base.int64_bld, &ctx->gallivm, lp_int_type(type));
1273
1274 bld_base->soa = 1;
1275 bld_base->emit_store = si_llvm_emit_store;
1276 bld_base->emit_swizzle = emit_swizzle;
1277 bld_base->emit_declaration = emit_declaration;
1278 bld_base->emit_immediate = emit_immediate;
1279
1280 bld_base->emit_fetch_funcs[TGSI_FILE_IMMEDIATE] = si_llvm_emit_fetch;
1281 bld_base->emit_fetch_funcs[TGSI_FILE_INPUT] = si_llvm_emit_fetch;
1282 bld_base->emit_fetch_funcs[TGSI_FILE_TEMPORARY] = si_llvm_emit_fetch;
1283 bld_base->emit_fetch_funcs[TGSI_FILE_OUTPUT] = si_llvm_emit_fetch;
1284 bld_base->emit_fetch_funcs[TGSI_FILE_SYSTEM_VALUE] = fetch_system_value;
1285
1286 /* metadata allowing 2.5 ULP */
1287 ctx->fpmath_md_kind = LLVMGetMDKindIDInContext(ctx->gallivm.context,
1288 "fpmath", 6);
1289 LLVMValueRef arg = lp_build_const_float(&ctx->gallivm, 2.5);
1290 ctx->fpmath_md_2p5_ulp = LLVMMDNodeInContext(ctx->gallivm.context,
1291 &arg, 1);
1292
1293 /* Allocate outputs */
1294 ctx->soa.outputs = ctx->outputs;
1295
1296 bld_base->op_actions[TGSI_OPCODE_BGNLOOP].emit = bgnloop_emit;
1297 bld_base->op_actions[TGSI_OPCODE_BRK].emit = brk_emit;
1298 bld_base->op_actions[TGSI_OPCODE_CONT].emit = cont_emit;
1299 bld_base->op_actions[TGSI_OPCODE_IF].emit = if_emit;
1300 bld_base->op_actions[TGSI_OPCODE_UIF].emit = uif_emit;
1301 bld_base->op_actions[TGSI_OPCODE_ELSE].emit = else_emit;
1302 bld_base->op_actions[TGSI_OPCODE_ENDIF].emit = endif_emit;
1303 bld_base->op_actions[TGSI_OPCODE_ENDLOOP].emit = endloop_emit;
1304 }
1305
1306 void si_llvm_create_func(struct si_shader_context *ctx,
1307 LLVMTypeRef *return_types, unsigned num_return_elems,
1308 LLVMTypeRef *ParamTypes, unsigned ParamCount)
1309 {
1310 LLVMTypeRef main_fn_type, ret_type;
1311 LLVMBasicBlockRef main_fn_body;
1312
1313 if (num_return_elems)
1314 ret_type = LLVMStructTypeInContext(ctx->gallivm.context,
1315 return_types,
1316 num_return_elems, true);
1317 else
1318 ret_type = LLVMVoidTypeInContext(ctx->gallivm.context);
1319
1320 /* Setup the function */
1321 ctx->return_type = ret_type;
1322 main_fn_type = LLVMFunctionType(ret_type, ParamTypes, ParamCount, 0);
1323 ctx->main_fn = LLVMAddFunction(ctx->gallivm.module, "main", main_fn_type);
1324 main_fn_body = LLVMAppendBasicBlockInContext(ctx->gallivm.context,
1325 ctx->main_fn, "main_body");
1326 LLVMPositionBuilderAtEnd(ctx->gallivm.builder, main_fn_body);
1327 }
1328
1329 void si_llvm_finalize_module(struct si_shader_context *ctx,
1330 bool run_verifier)
1331 {
1332 struct gallivm_state *gallivm = ctx->soa.bld_base.base.gallivm;
1333 const char *triple = LLVMGetTarget(gallivm->module);
1334 LLVMTargetLibraryInfoRef target_library_info;
1335
1336 /* Create the pass manager */
1337 gallivm->passmgr = LLVMCreatePassManager();
1338
1339 target_library_info = gallivm_create_target_library_info(triple);
1340 LLVMAddTargetLibraryInfo(target_library_info, gallivm->passmgr);
1341
1342 if (run_verifier)
1343 LLVMAddVerifierPass(gallivm->passmgr);
1344
1345 LLVMAddAlwaysInlinerPass(gallivm->passmgr);
1346
1347 /* This pass should eliminate all the load and store instructions */
1348 LLVMAddPromoteMemoryToRegisterPass(gallivm->passmgr);
1349
1350 /* Add some optimization passes */
1351 LLVMAddScalarReplAggregatesPass(gallivm->passmgr);
1352 LLVMAddLICMPass(gallivm->passmgr);
1353 LLVMAddAggressiveDCEPass(gallivm->passmgr);
1354 LLVMAddCFGSimplificationPass(gallivm->passmgr);
1355 LLVMAddInstructionCombiningPass(gallivm->passmgr);
1356
1357 /* Run the pass */
1358 LLVMRunPassManager(gallivm->passmgr, ctx->gallivm.module);
1359
1360 LLVMDisposeBuilder(gallivm->builder);
1361 LLVMDisposePassManager(gallivm->passmgr);
1362 gallivm_dispose_target_library_info(target_library_info);
1363 }
1364
1365 void si_llvm_dispose(struct si_shader_context *ctx)
1366 {
1367 LLVMDisposeModule(ctx->soa.bld_base.base.gallivm->module);
1368 LLVMContextDispose(ctx->soa.bld_base.base.gallivm->context);
1369 FREE(ctx->temp_arrays);
1370 ctx->temp_arrays = NULL;
1371 FREE(ctx->temp_array_allocas);
1372 ctx->temp_array_allocas = NULL;
1373 FREE(ctx->temps);
1374 ctx->temps = NULL;
1375 ctx->temps_count = 0;
1376 FREE(ctx->flow);
1377 ctx->flow = NULL;
1378 ctx->flow_depth_max = 0;
1379 }