radeonsi: ensure that temp array allocas are in the entry block
[mesa.git] / src / gallium / drivers / radeonsi / si_shader_tgsi_setup.c
1 /*
2 * Copyright 2016 Advanced Micro Devices, Inc.
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * on the rights to use, copy, modify, merge, publish, distribute, sub
8 * license, and/or sell copies of the Software, and to permit persons to whom
9 * the Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
19 * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
20 * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
21 * USE OR OTHER DEALINGS IN THE SOFTWARE.
22 */
23
24 #include "si_shader_internal.h"
25 #include "si_pipe.h"
26
27 #include "gallivm/lp_bld_const.h"
28 #include "gallivm/lp_bld_gather.h"
29 #include "gallivm/lp_bld_flow.h"
30 #include "gallivm/lp_bld_init.h"
31 #include "gallivm/lp_bld_intr.h"
32 #include "gallivm/lp_bld_misc.h"
33 #include "gallivm/lp_bld_swizzle.h"
34 #include "tgsi/tgsi_info.h"
35 #include "tgsi/tgsi_parse.h"
36 #include "util/u_math.h"
37 #include "util/u_memory.h"
38 #include "util/u_debug.h"
39
40 #include <stdio.h>
41 #include <llvm-c/Transforms/IPO.h>
42 #include <llvm-c/Transforms/Scalar.h>
43
44 /* Data for if/else/endif and bgnloop/endloop control flow structures.
45 */
46 struct si_llvm_flow {
47 /* Loop exit or next part of if/else/endif. */
48 LLVMBasicBlockRef next_block;
49 LLVMBasicBlockRef loop_entry_block;
50 };
51
52 enum si_llvm_calling_convention {
53 RADEON_LLVM_AMDGPU_VS = 87,
54 RADEON_LLVM_AMDGPU_GS = 88,
55 RADEON_LLVM_AMDGPU_PS = 89,
56 RADEON_LLVM_AMDGPU_CS = 90,
57 RADEON_LLVM_AMDGPU_HS = 93,
58 };
59
60 void si_llvm_add_attribute(LLVMValueRef F, const char *name, int value)
61 {
62 char str[16];
63
64 snprintf(str, sizeof(str), "%i", value);
65 LLVMAddTargetDependentFunctionAttr(F, name, str);
66 }
67
68 struct si_llvm_diagnostics {
69 struct pipe_debug_callback *debug;
70 unsigned retval;
71 };
72
73 static void si_diagnostic_handler(LLVMDiagnosticInfoRef di, void *context)
74 {
75 struct si_llvm_diagnostics *diag = (struct si_llvm_diagnostics *)context;
76 LLVMDiagnosticSeverity severity = LLVMGetDiagInfoSeverity(di);
77 char *description = LLVMGetDiagInfoDescription(di);
78 const char *severity_str = NULL;
79
80 switch (severity) {
81 case LLVMDSError:
82 severity_str = "error";
83 break;
84 case LLVMDSWarning:
85 severity_str = "warning";
86 break;
87 case LLVMDSRemark:
88 severity_str = "remark";
89 break;
90 case LLVMDSNote:
91 severity_str = "note";
92 break;
93 default:
94 severity_str = "unknown";
95 }
96
97 pipe_debug_message(diag->debug, SHADER_INFO,
98 "LLVM diagnostic (%s): %s", severity_str, description);
99
100 if (severity == LLVMDSError) {
101 diag->retval = 1;
102 fprintf(stderr,"LLVM triggered Diagnostic Handler: %s\n", description);
103 }
104
105 LLVMDisposeMessage(description);
106 }
107
108 /**
109 * Compile an LLVM module to machine code.
110 *
111 * @returns 0 for success, 1 for failure
112 */
113 unsigned si_llvm_compile(LLVMModuleRef M, struct ac_shader_binary *binary,
114 LLVMTargetMachineRef tm,
115 struct pipe_debug_callback *debug)
116 {
117 struct si_llvm_diagnostics diag;
118 char *err;
119 LLVMContextRef llvm_ctx;
120 LLVMMemoryBufferRef out_buffer;
121 unsigned buffer_size;
122 const char *buffer_data;
123 LLVMBool mem_err;
124
125 diag.debug = debug;
126 diag.retval = 0;
127
128 /* Setup Diagnostic Handler*/
129 llvm_ctx = LLVMGetModuleContext(M);
130
131 LLVMContextSetDiagnosticHandler(llvm_ctx, si_diagnostic_handler, &diag);
132
133 /* Compile IR*/
134 mem_err = LLVMTargetMachineEmitToMemoryBuffer(tm, M, LLVMObjectFile, &err,
135 &out_buffer);
136
137 /* Process Errors/Warnings */
138 if (mem_err) {
139 fprintf(stderr, "%s: %s", __FUNCTION__, err);
140 pipe_debug_message(debug, SHADER_INFO,
141 "LLVM emit error: %s", err);
142 FREE(err);
143 diag.retval = 1;
144 goto out;
145 }
146
147 /* Extract Shader Code*/
148 buffer_size = LLVMGetBufferSize(out_buffer);
149 buffer_data = LLVMGetBufferStart(out_buffer);
150
151 ac_elf_read(buffer_data, buffer_size, binary);
152
153 /* Clean up */
154 LLVMDisposeMemoryBuffer(out_buffer);
155
156 out:
157 if (diag.retval != 0)
158 pipe_debug_message(debug, SHADER_INFO, "LLVM compile failed");
159 return diag.retval;
160 }
161
162 LLVMTypeRef tgsi2llvmtype(struct lp_build_tgsi_context *bld_base,
163 enum tgsi_opcode_type type)
164 {
165 LLVMContextRef ctx = bld_base->base.gallivm->context;
166
167 switch (type) {
168 case TGSI_TYPE_UNSIGNED:
169 case TGSI_TYPE_SIGNED:
170 return LLVMInt32TypeInContext(ctx);
171 case TGSI_TYPE_UNSIGNED64:
172 case TGSI_TYPE_SIGNED64:
173 return LLVMInt64TypeInContext(ctx);
174 case TGSI_TYPE_DOUBLE:
175 return LLVMDoubleTypeInContext(ctx);
176 case TGSI_TYPE_UNTYPED:
177 case TGSI_TYPE_FLOAT:
178 return LLVMFloatTypeInContext(ctx);
179 default: break;
180 }
181 return 0;
182 }
183
184 LLVMValueRef bitcast(struct lp_build_tgsi_context *bld_base,
185 enum tgsi_opcode_type type, LLVMValueRef value)
186 {
187 LLVMBuilderRef builder = bld_base->base.gallivm->builder;
188 LLVMTypeRef dst_type = tgsi2llvmtype(bld_base, type);
189
190 if (dst_type)
191 return LLVMBuildBitCast(builder, value, dst_type, "");
192 else
193 return value;
194 }
195
196 /**
197 * Return a value that is equal to the given i32 \p index if it lies in [0,num)
198 * or an undefined value in the same interval otherwise.
199 */
200 LLVMValueRef si_llvm_bound_index(struct si_shader_context *ctx,
201 LLVMValueRef index,
202 unsigned num)
203 {
204 struct gallivm_state *gallivm = &ctx->gallivm;
205 LLVMBuilderRef builder = gallivm->builder;
206 LLVMValueRef c_max = LLVMConstInt(ctx->i32, num - 1, 0);
207 LLVMValueRef cc;
208
209 if (util_is_power_of_two(num)) {
210 index = LLVMBuildAnd(builder, index, c_max, "");
211 } else {
212 /* In theory, this MAX pattern should result in code that is
213 * as good as the bit-wise AND above.
214 *
215 * In practice, LLVM generates worse code (at the time of
216 * writing), because its value tracking is not strong enough.
217 */
218 cc = LLVMBuildICmp(builder, LLVMIntULE, index, c_max, "");
219 index = LLVMBuildSelect(builder, cc, index, c_max, "");
220 }
221
222 return index;
223 }
224
225 static struct si_llvm_flow *
226 get_current_flow(struct si_shader_context *ctx)
227 {
228 if (ctx->flow_depth > 0)
229 return &ctx->flow[ctx->flow_depth - 1];
230 return NULL;
231 }
232
233 static struct si_llvm_flow *
234 get_innermost_loop(struct si_shader_context *ctx)
235 {
236 for (unsigned i = ctx->flow_depth; i > 0; --i) {
237 if (ctx->flow[i - 1].loop_entry_block)
238 return &ctx->flow[i - 1];
239 }
240 return NULL;
241 }
242
243 static struct si_llvm_flow *
244 push_flow(struct si_shader_context *ctx)
245 {
246 struct si_llvm_flow *flow;
247
248 if (ctx->flow_depth >= ctx->flow_depth_max) {
249 unsigned new_max = MAX2(ctx->flow_depth << 1, RADEON_LLVM_INITIAL_CF_DEPTH);
250 ctx->flow = REALLOC(ctx->flow,
251 ctx->flow_depth_max * sizeof(*ctx->flow),
252 new_max * sizeof(*ctx->flow));
253 ctx->flow_depth_max = new_max;
254 }
255
256 flow = &ctx->flow[ctx->flow_depth];
257 ctx->flow_depth++;
258
259 flow->next_block = NULL;
260 flow->loop_entry_block = NULL;
261 return flow;
262 }
263
264 static LLVMValueRef emit_swizzle(struct lp_build_tgsi_context *bld_base,
265 LLVMValueRef value,
266 unsigned swizzle_x,
267 unsigned swizzle_y,
268 unsigned swizzle_z,
269 unsigned swizzle_w)
270 {
271 LLVMValueRef swizzles[4];
272 LLVMTypeRef i32t =
273 LLVMInt32TypeInContext(bld_base->base.gallivm->context);
274
275 swizzles[0] = LLVMConstInt(i32t, swizzle_x, 0);
276 swizzles[1] = LLVMConstInt(i32t, swizzle_y, 0);
277 swizzles[2] = LLVMConstInt(i32t, swizzle_z, 0);
278 swizzles[3] = LLVMConstInt(i32t, swizzle_w, 0);
279
280 return LLVMBuildShuffleVector(bld_base->base.gallivm->builder,
281 value,
282 LLVMGetUndef(LLVMTypeOf(value)),
283 LLVMConstVector(swizzles, 4), "");
284 }
285
286 /**
287 * Return the description of the array covering the given temporary register
288 * index.
289 */
290 static unsigned
291 get_temp_array_id(struct lp_build_tgsi_context *bld_base,
292 unsigned reg_index,
293 const struct tgsi_ind_register *reg)
294 {
295 struct si_shader_context *ctx = si_shader_context(bld_base);
296 unsigned num_arrays = ctx->bld_base.info->array_max[TGSI_FILE_TEMPORARY];
297 unsigned i;
298
299 if (reg && reg->ArrayID > 0 && reg->ArrayID <= num_arrays)
300 return reg->ArrayID;
301
302 for (i = 0; i < num_arrays; i++) {
303 const struct tgsi_array_info *array = &ctx->temp_arrays[i];
304
305 if (reg_index >= array->range.First && reg_index <= array->range.Last)
306 return i + 1;
307 }
308
309 return 0;
310 }
311
312 static struct tgsi_declaration_range
313 get_array_range(struct lp_build_tgsi_context *bld_base,
314 unsigned File, unsigned reg_index,
315 const struct tgsi_ind_register *reg)
316 {
317 struct si_shader_context *ctx = si_shader_context(bld_base);
318 struct tgsi_declaration_range range;
319
320 if (File == TGSI_FILE_TEMPORARY) {
321 unsigned array_id = get_temp_array_id(bld_base, reg_index, reg);
322 if (array_id)
323 return ctx->temp_arrays[array_id - 1].range;
324 }
325
326 range.First = 0;
327 range.Last = bld_base->info->file_max[File];
328 return range;
329 }
330
331 static LLVMValueRef
332 emit_array_index(struct si_shader_context *ctx,
333 const struct tgsi_ind_register *reg,
334 unsigned offset)
335 {
336 struct gallivm_state *gallivm = &ctx->gallivm;
337
338 if (!reg) {
339 return LLVMConstInt(ctx->i32, offset, 0);
340 }
341 LLVMValueRef addr = LLVMBuildLoad(gallivm->builder, ctx->addrs[reg->Index][reg->Swizzle], "");
342 return LLVMBuildAdd(gallivm->builder, addr, LLVMConstInt(ctx->i32, offset, 0), "");
343 }
344
345 /**
346 * For indirect registers, construct a pointer directly to the requested
347 * element using getelementptr if possible.
348 *
349 * Returns NULL if the insertelement/extractelement fallback for array access
350 * must be used.
351 */
352 static LLVMValueRef
353 get_pointer_into_array(struct si_shader_context *ctx,
354 unsigned file,
355 unsigned swizzle,
356 unsigned reg_index,
357 const struct tgsi_ind_register *reg_indirect)
358 {
359 unsigned array_id;
360 struct tgsi_array_info *array;
361 struct gallivm_state *gallivm = &ctx->gallivm;
362 LLVMBuilderRef builder = gallivm->builder;
363 LLVMValueRef idxs[2];
364 LLVMValueRef index;
365 LLVMValueRef alloca;
366
367 if (file != TGSI_FILE_TEMPORARY)
368 return NULL;
369
370 array_id = get_temp_array_id(&ctx->bld_base, reg_index, reg_indirect);
371 if (!array_id)
372 return NULL;
373
374 alloca = ctx->temp_array_allocas[array_id - 1];
375 if (!alloca)
376 return NULL;
377
378 array = &ctx->temp_arrays[array_id - 1];
379
380 if (!(array->writemask & (1 << swizzle)))
381 return ctx->undef_alloca;
382
383 index = emit_array_index(ctx, reg_indirect,
384 reg_index - ctx->temp_arrays[array_id - 1].range.First);
385
386 /* Ensure that the index is within a valid range, to guard against
387 * VM faults and overwriting critical data (e.g. spilled resource
388 * descriptors).
389 *
390 * TODO It should be possible to avoid the additional instructions
391 * if LLVM is changed so that it guarantuees:
392 * 1. the scratch space descriptor isolates the current wave (this
393 * could even save the scratch offset SGPR at the cost of an
394 * additional SALU instruction)
395 * 2. the memory for allocas must be allocated at the _end_ of the
396 * scratch space (after spilled registers)
397 */
398 index = si_llvm_bound_index(ctx, index, array->range.Last - array->range.First + 1);
399
400 index = LLVMBuildMul(
401 builder, index,
402 LLVMConstInt(ctx->i32, util_bitcount(array->writemask), 0),
403 "");
404 index = LLVMBuildAdd(
405 builder, index,
406 LLVMConstInt(ctx->i32,
407 util_bitcount(array->writemask & ((1 << swizzle) - 1)), 0),
408 "");
409 idxs[0] = ctx->i32_0;
410 idxs[1] = index;
411 return LLVMBuildGEP(builder, alloca, idxs, 2, "");
412 }
413
414 LLVMValueRef
415 si_llvm_emit_fetch_64bit(struct lp_build_tgsi_context *bld_base,
416 enum tgsi_opcode_type type,
417 LLVMValueRef ptr,
418 LLVMValueRef ptr2)
419 {
420 LLVMBuilderRef builder = bld_base->base.gallivm->builder;
421 LLVMValueRef result;
422
423 result = LLVMGetUndef(LLVMVectorType(LLVMIntTypeInContext(bld_base->base.gallivm->context, 32), bld_base->base.type.length * 2));
424
425 result = LLVMBuildInsertElement(builder,
426 result,
427 bitcast(bld_base, TGSI_TYPE_UNSIGNED, ptr),
428 bld_base->int_bld.zero, "");
429 result = LLVMBuildInsertElement(builder,
430 result,
431 bitcast(bld_base, TGSI_TYPE_UNSIGNED, ptr2),
432 bld_base->int_bld.one, "");
433 return bitcast(bld_base, type, result);
434 }
435
436 static LLVMValueRef
437 emit_array_fetch(struct lp_build_tgsi_context *bld_base,
438 unsigned File, enum tgsi_opcode_type type,
439 struct tgsi_declaration_range range,
440 unsigned swizzle)
441 {
442 struct si_shader_context *ctx = si_shader_context(bld_base);
443
444 LLVMBuilderRef builder = ctx->gallivm.builder;
445
446 unsigned i, size = range.Last - range.First + 1;
447 LLVMTypeRef vec = LLVMVectorType(tgsi2llvmtype(bld_base, type), size);
448 LLVMValueRef result = LLVMGetUndef(vec);
449
450 struct tgsi_full_src_register tmp_reg = {};
451 tmp_reg.Register.File = File;
452
453 for (i = 0; i < size; ++i) {
454 tmp_reg.Register.Index = i + range.First;
455 LLVMValueRef temp = si_llvm_emit_fetch(bld_base, &tmp_reg, type, swizzle);
456 result = LLVMBuildInsertElement(builder, result, temp,
457 LLVMConstInt(ctx->i32, i, 0), "array_vector");
458 }
459 return result;
460 }
461
462 static LLVMValueRef
463 load_value_from_array(struct lp_build_tgsi_context *bld_base,
464 unsigned file,
465 enum tgsi_opcode_type type,
466 unsigned swizzle,
467 unsigned reg_index,
468 const struct tgsi_ind_register *reg_indirect)
469 {
470 struct si_shader_context *ctx = si_shader_context(bld_base);
471 struct gallivm_state *gallivm = &ctx->gallivm;
472 LLVMBuilderRef builder = gallivm->builder;
473 LLVMValueRef ptr;
474
475 ptr = get_pointer_into_array(ctx, file, swizzle, reg_index, reg_indirect);
476 if (ptr) {
477 LLVMValueRef val = LLVMBuildLoad(builder, ptr, "");
478 if (tgsi_type_is_64bit(type)) {
479 LLVMValueRef ptr_hi, val_hi;
480 ptr_hi = LLVMBuildGEP(builder, ptr, &ctx->i32_1, 1, "");
481 val_hi = LLVMBuildLoad(builder, ptr_hi, "");
482 val = si_llvm_emit_fetch_64bit(bld_base, type, val, val_hi);
483 }
484
485 return val;
486 } else {
487 struct tgsi_declaration_range range =
488 get_array_range(bld_base, file, reg_index, reg_indirect);
489 LLVMValueRef index =
490 emit_array_index(ctx, reg_indirect, reg_index - range.First);
491 LLVMValueRef array =
492 emit_array_fetch(bld_base, file, type, range, swizzle);
493 return LLVMBuildExtractElement(builder, array, index, "");
494 }
495 }
496
497 static void
498 store_value_to_array(struct lp_build_tgsi_context *bld_base,
499 LLVMValueRef value,
500 unsigned file,
501 unsigned chan_index,
502 unsigned reg_index,
503 const struct tgsi_ind_register *reg_indirect)
504 {
505 struct si_shader_context *ctx = si_shader_context(bld_base);
506 struct gallivm_state *gallivm = &ctx->gallivm;
507 LLVMBuilderRef builder = gallivm->builder;
508 LLVMValueRef ptr;
509
510 ptr = get_pointer_into_array(ctx, file, chan_index, reg_index, reg_indirect);
511 if (ptr) {
512 LLVMBuildStore(builder, value, ptr);
513 } else {
514 unsigned i, size;
515 struct tgsi_declaration_range range = get_array_range(bld_base, file, reg_index, reg_indirect);
516 LLVMValueRef index = emit_array_index(ctx, reg_indirect, reg_index - range.First);
517 LLVMValueRef array =
518 emit_array_fetch(bld_base, file, TGSI_TYPE_FLOAT, range, chan_index);
519 LLVMValueRef temp_ptr;
520
521 array = LLVMBuildInsertElement(builder, array, value, index, "");
522
523 size = range.Last - range.First + 1;
524 for (i = 0; i < size; ++i) {
525 switch(file) {
526 case TGSI_FILE_OUTPUT:
527 temp_ptr = ctx->outputs[i + range.First][chan_index];
528 break;
529
530 case TGSI_FILE_TEMPORARY:
531 if (range.First + i >= ctx->temps_count)
532 continue;
533 temp_ptr = ctx->temps[(i + range.First) * TGSI_NUM_CHANNELS + chan_index];
534 break;
535
536 default:
537 continue;
538 }
539 value = LLVMBuildExtractElement(builder, array,
540 LLVMConstInt(ctx->i32, i, 0), "");
541 LLVMBuildStore(builder, value, temp_ptr);
542 }
543 }
544 }
545
546 /* If this is true, preload FS inputs at the beginning of shaders. Otherwise,
547 * reload them at each use. This must be true if the shader is using
548 * derivatives and KILL, because KILL can leave the WQM and then a lazy
549 * input load isn't in the WQM anymore.
550 */
551 static bool si_preload_fs_inputs(struct si_shader_context *ctx)
552 {
553 struct si_shader_selector *sel = ctx->shader->selector;
554
555 return sel->info.uses_derivatives &&
556 sel->info.uses_kill;
557 }
558
559 static LLVMValueRef
560 get_output_ptr(struct lp_build_tgsi_context *bld_base, unsigned index,
561 unsigned chan)
562 {
563 struct si_shader_context *ctx = si_shader_context(bld_base);
564
565 assert(index <= ctx->bld_base.info->file_max[TGSI_FILE_OUTPUT]);
566 return ctx->outputs[index][chan];
567 }
568
569 LLVMValueRef si_llvm_emit_fetch(struct lp_build_tgsi_context *bld_base,
570 const struct tgsi_full_src_register *reg,
571 enum tgsi_opcode_type type,
572 unsigned swizzle)
573 {
574 struct si_shader_context *ctx = si_shader_context(bld_base);
575 LLVMBuilderRef builder = ctx->gallivm.builder;
576 LLVMValueRef result = NULL, ptr, ptr2;
577
578 if (swizzle == ~0) {
579 LLVMValueRef values[TGSI_NUM_CHANNELS];
580 unsigned chan;
581 for (chan = 0; chan < TGSI_NUM_CHANNELS; chan++) {
582 values[chan] = si_llvm_emit_fetch(bld_base, reg, type, chan);
583 }
584 return lp_build_gather_values(&ctx->gallivm, values,
585 TGSI_NUM_CHANNELS);
586 }
587
588 if (reg->Register.Indirect) {
589 LLVMValueRef load = load_value_from_array(bld_base, reg->Register.File, type,
590 swizzle, reg->Register.Index, &reg->Indirect);
591 return bitcast(bld_base, type, load);
592 }
593
594 switch(reg->Register.File) {
595 case TGSI_FILE_IMMEDIATE: {
596 LLVMTypeRef ctype = tgsi2llvmtype(bld_base, type);
597 if (tgsi_type_is_64bit(type)) {
598 result = LLVMGetUndef(LLVMVectorType(ctx->i32, bld_base->base.type.length * 2));
599 result = LLVMConstInsertElement(result,
600 ctx->imms[reg->Register.Index * TGSI_NUM_CHANNELS + swizzle],
601 ctx->i32_0);
602 result = LLVMConstInsertElement(result,
603 ctx->imms[reg->Register.Index * TGSI_NUM_CHANNELS + swizzle + 1],
604 ctx->i32_1);
605 return LLVMConstBitCast(result, ctype);
606 } else {
607 return LLVMConstBitCast(ctx->imms[reg->Register.Index * TGSI_NUM_CHANNELS + swizzle], ctype);
608 }
609 }
610
611 case TGSI_FILE_INPUT: {
612 unsigned index = reg->Register.Index;
613 LLVMValueRef input[4];
614
615 /* I don't think doing this for vertex shaders is beneficial.
616 * For those, we want to make sure the VMEM loads are executed
617 * only once. Fragment shaders don't care much, because
618 * v_interp instructions are much cheaper than VMEM loads.
619 */
620 if (!si_preload_fs_inputs(ctx) &&
621 ctx->bld_base.info->processor == PIPE_SHADER_FRAGMENT)
622 ctx->load_input(ctx, index, &ctx->input_decls[index], input);
623 else
624 memcpy(input, &ctx->inputs[index * 4], sizeof(input));
625
626 result = input[swizzle];
627
628 if (tgsi_type_is_64bit(type)) {
629 ptr = result;
630 ptr2 = input[swizzle + 1];
631 return si_llvm_emit_fetch_64bit(bld_base, type, ptr, ptr2);
632 }
633 break;
634 }
635
636 case TGSI_FILE_TEMPORARY:
637 if (reg->Register.Index >= ctx->temps_count)
638 return LLVMGetUndef(tgsi2llvmtype(bld_base, type));
639 ptr = ctx->temps[reg->Register.Index * TGSI_NUM_CHANNELS + swizzle];
640 if (tgsi_type_is_64bit(type)) {
641 ptr2 = ctx->temps[reg->Register.Index * TGSI_NUM_CHANNELS + swizzle + 1];
642 return si_llvm_emit_fetch_64bit(bld_base, type,
643 LLVMBuildLoad(builder, ptr, ""),
644 LLVMBuildLoad(builder, ptr2, ""));
645 }
646 result = LLVMBuildLoad(builder, ptr, "");
647 break;
648
649 case TGSI_FILE_OUTPUT:
650 ptr = get_output_ptr(bld_base, reg->Register.Index, swizzle);
651 if (tgsi_type_is_64bit(type)) {
652 ptr2 = get_output_ptr(bld_base, reg->Register.Index, swizzle + 1);
653 return si_llvm_emit_fetch_64bit(bld_base, type,
654 LLVMBuildLoad(builder, ptr, ""),
655 LLVMBuildLoad(builder, ptr2, ""));
656 }
657 result = LLVMBuildLoad(builder, ptr, "");
658 break;
659
660 default:
661 return LLVMGetUndef(tgsi2llvmtype(bld_base, type));
662 }
663
664 return bitcast(bld_base, type, result);
665 }
666
667 static LLVMValueRef fetch_system_value(struct lp_build_tgsi_context *bld_base,
668 const struct tgsi_full_src_register *reg,
669 enum tgsi_opcode_type type,
670 unsigned swizzle)
671 {
672 struct si_shader_context *ctx = si_shader_context(bld_base);
673 LLVMBuilderRef builder = ctx->gallivm.builder;
674 LLVMValueRef cval = ctx->system_values[reg->Register.Index];
675
676 if (tgsi_type_is_64bit(type)) {
677 LLVMValueRef lo, hi;
678
679 assert(swizzle == 0 || swizzle == 2);
680
681 lo = LLVMBuildExtractElement(
682 builder, cval, LLVMConstInt(ctx->i32, swizzle, 0), "");
683 hi = LLVMBuildExtractElement(
684 builder, cval, LLVMConstInt(ctx->i32, swizzle + 1, 0), "");
685
686 return si_llvm_emit_fetch_64bit(bld_base, type, lo, hi);
687 }
688
689 if (LLVMGetTypeKind(LLVMTypeOf(cval)) == LLVMVectorTypeKind) {
690 cval = LLVMBuildExtractElement(
691 builder, cval, LLVMConstInt(ctx->i32, swizzle, 0), "");
692 } else {
693 assert(swizzle == 0);
694 }
695
696 return bitcast(bld_base, type, cval);
697 }
698
699 static void emit_declaration(struct lp_build_tgsi_context *bld_base,
700 const struct tgsi_full_declaration *decl)
701 {
702 struct si_shader_context *ctx = si_shader_context(bld_base);
703 LLVMBuilderRef builder = ctx->gallivm.builder;
704 unsigned first, last, i;
705 switch(decl->Declaration.File) {
706 case TGSI_FILE_ADDRESS:
707 {
708 unsigned idx;
709 for (idx = decl->Range.First; idx <= decl->Range.Last; idx++) {
710 unsigned chan;
711 for (chan = 0; chan < TGSI_NUM_CHANNELS; chan++) {
712 ctx->addrs[idx][chan] = lp_build_alloca_undef(
713 &ctx->gallivm,
714 ctx->i32, "");
715 }
716 }
717 break;
718 }
719
720 case TGSI_FILE_TEMPORARY:
721 {
722 char name[16] = "";
723 LLVMValueRef array_alloca = NULL;
724 unsigned decl_size;
725 unsigned writemask = decl->Declaration.UsageMask;
726 first = decl->Range.First;
727 last = decl->Range.Last;
728 decl_size = 4 * ((last - first) + 1);
729
730 if (decl->Declaration.Array) {
731 unsigned id = decl->Array.ArrayID - 1;
732 unsigned array_size;
733
734 writemask &= ctx->temp_arrays[id].writemask;
735 ctx->temp_arrays[id].writemask = writemask;
736 array_size = ((last - first) + 1) * util_bitcount(writemask);
737
738 /* If the array has more than 16 elements, store it
739 * in memory using an alloca that spans the entire
740 * array.
741 *
742 * Otherwise, store each array element individually.
743 * We will then generate vectors (per-channel, up to
744 * <16 x float> if the usagemask is a single bit) for
745 * indirect addressing.
746 *
747 * Note that 16 is the number of vector elements that
748 * LLVM will store in a register, so theoretically an
749 * array with up to 4 * 16 = 64 elements could be
750 * handled this way, but whether that's a good idea
751 * depends on VGPR register pressure elsewhere.
752 *
753 * FIXME: We shouldn't need to have the non-alloca
754 * code path for arrays. LLVM should be smart enough to
755 * promote allocas into registers when profitable.
756 */
757 if (array_size > 16 ||
758 !ctx->screen->llvm_has_working_vgpr_indexing) {
759 array_alloca = lp_build_alloca_undef(&ctx->gallivm,
760 LLVMArrayType(ctx->f32,
761 array_size), "array");
762 ctx->temp_array_allocas[id] = array_alloca;
763 }
764 }
765
766 if (!ctx->temps_count) {
767 ctx->temps_count = bld_base->info->file_max[TGSI_FILE_TEMPORARY] + 1;
768 ctx->temps = MALLOC(TGSI_NUM_CHANNELS * ctx->temps_count * sizeof(LLVMValueRef));
769 }
770 if (!array_alloca) {
771 for (i = 0; i < decl_size; ++i) {
772 #ifdef DEBUG
773 snprintf(name, sizeof(name), "TEMP%d.%c",
774 first + i / 4, "xyzw"[i % 4]);
775 #endif
776 ctx->temps[first * TGSI_NUM_CHANNELS + i] =
777 lp_build_alloca_undef(&ctx->gallivm,
778 ctx->f32,
779 name);
780 }
781 } else {
782 LLVMValueRef idxs[2] = {
783 ctx->i32_0,
784 NULL
785 };
786 unsigned j = 0;
787
788 if (writemask != TGSI_WRITEMASK_XYZW &&
789 !ctx->undef_alloca) {
790 /* Create a dummy alloca. We use it so that we
791 * have a pointer that is safe to load from if
792 * a shader ever reads from a channel that
793 * it never writes to.
794 */
795 ctx->undef_alloca = lp_build_alloca_undef(
796 &ctx->gallivm,
797 ctx->f32, "undef");
798 }
799
800 for (i = 0; i < decl_size; ++i) {
801 LLVMValueRef ptr;
802 if (writemask & (1 << (i % 4))) {
803 #ifdef DEBUG
804 snprintf(name, sizeof(name), "TEMP%d.%c",
805 first + i / 4, "xyzw"[i % 4]);
806 #endif
807 idxs[1] = LLVMConstInt(ctx->i32, j, 0);
808 ptr = LLVMBuildGEP(builder, array_alloca, idxs, 2, name);
809 j++;
810 } else {
811 ptr = ctx->undef_alloca;
812 }
813 ctx->temps[first * TGSI_NUM_CHANNELS + i] = ptr;
814 }
815 }
816 break;
817 }
818 case TGSI_FILE_INPUT:
819 {
820 unsigned idx;
821 for (idx = decl->Range.First; idx <= decl->Range.Last; idx++) {
822 if (ctx->load_input &&
823 ctx->input_decls[idx].Declaration.File != TGSI_FILE_INPUT) {
824 ctx->input_decls[idx] = *decl;
825 ctx->input_decls[idx].Range.First = idx;
826 ctx->input_decls[idx].Range.Last = idx;
827 ctx->input_decls[idx].Semantic.Index += idx - decl->Range.First;
828
829 if (si_preload_fs_inputs(ctx) ||
830 bld_base->info->processor != PIPE_SHADER_FRAGMENT)
831 ctx->load_input(ctx, idx, &ctx->input_decls[idx],
832 &ctx->inputs[idx * 4]);
833 }
834 }
835 }
836 break;
837
838 case TGSI_FILE_SYSTEM_VALUE:
839 {
840 unsigned idx;
841 for (idx = decl->Range.First; idx <= decl->Range.Last; idx++) {
842 ctx->load_system_value(ctx, idx, decl);
843 }
844 }
845 break;
846
847 case TGSI_FILE_OUTPUT:
848 {
849 char name[16] = "";
850 unsigned idx;
851 for (idx = decl->Range.First; idx <= decl->Range.Last; idx++) {
852 unsigned chan;
853 assert(idx < RADEON_LLVM_MAX_OUTPUTS);
854 if (ctx->outputs[idx][0])
855 continue;
856 for (chan = 0; chan < TGSI_NUM_CHANNELS; chan++) {
857 #ifdef DEBUG
858 snprintf(name, sizeof(name), "OUT%d.%c",
859 idx, "xyzw"[chan % 4]);
860 #endif
861 ctx->outputs[idx][chan] = lp_build_alloca_undef(
862 &ctx->gallivm,
863 ctx->f32, name);
864 }
865 }
866 break;
867 }
868
869 case TGSI_FILE_MEMORY:
870 ctx->declare_memory_region(ctx, decl);
871 break;
872
873 default:
874 break;
875 }
876 }
877
878 void si_llvm_emit_store(struct lp_build_tgsi_context *bld_base,
879 const struct tgsi_full_instruction *inst,
880 const struct tgsi_opcode_info *info,
881 LLVMValueRef dst[4])
882 {
883 struct si_shader_context *ctx = si_shader_context(bld_base);
884 struct gallivm_state *gallivm = &ctx->gallivm;
885 const struct tgsi_full_dst_register *reg = &inst->Dst[0];
886 LLVMBuilderRef builder = ctx->gallivm.builder;
887 LLVMValueRef temp_ptr, temp_ptr2 = NULL;
888 unsigned chan, chan_index;
889 bool is_vec_store = false;
890 enum tgsi_opcode_type dtype = tgsi_opcode_infer_dst_type(inst->Instruction.Opcode);
891
892 if (dst[0]) {
893 LLVMTypeKind k = LLVMGetTypeKind(LLVMTypeOf(dst[0]));
894 is_vec_store = (k == LLVMVectorTypeKind);
895 }
896
897 if (is_vec_store) {
898 LLVMValueRef values[4] = {};
899 TGSI_FOR_EACH_DST0_ENABLED_CHANNEL(inst, chan) {
900 LLVMValueRef index = LLVMConstInt(ctx->i32, chan, 0);
901 values[chan] = LLVMBuildExtractElement(gallivm->builder,
902 dst[0], index, "");
903 }
904 bld_base->emit_store(bld_base, inst, info, values);
905 return;
906 }
907
908 TGSI_FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) {
909 LLVMValueRef value = dst[chan_index];
910
911 if (tgsi_type_is_64bit(dtype) && (chan_index == 1 || chan_index == 3))
912 continue;
913 if (inst->Instruction.Saturate)
914 value = ac_build_clamp(&ctx->ac, value);
915
916 if (reg->Register.File == TGSI_FILE_ADDRESS) {
917 temp_ptr = ctx->addrs[reg->Register.Index][chan_index];
918 LLVMBuildStore(builder, value, temp_ptr);
919 continue;
920 }
921
922 if (!tgsi_type_is_64bit(dtype))
923 value = bitcast(bld_base, TGSI_TYPE_FLOAT, value);
924
925 if (reg->Register.Indirect) {
926 unsigned file = reg->Register.File;
927 unsigned reg_index = reg->Register.Index;
928 store_value_to_array(bld_base, value, file, chan_index,
929 reg_index, &reg->Indirect);
930 } else {
931 switch(reg->Register.File) {
932 case TGSI_FILE_OUTPUT:
933 temp_ptr = ctx->outputs[reg->Register.Index][chan_index];
934 if (tgsi_type_is_64bit(dtype))
935 temp_ptr2 = ctx->outputs[reg->Register.Index][chan_index + 1];
936 break;
937
938 case TGSI_FILE_TEMPORARY:
939 {
940 if (reg->Register.Index >= ctx->temps_count)
941 continue;
942
943 temp_ptr = ctx->temps[ TGSI_NUM_CHANNELS * reg->Register.Index + chan_index];
944 if (tgsi_type_is_64bit(dtype))
945 temp_ptr2 = ctx->temps[ TGSI_NUM_CHANNELS * reg->Register.Index + chan_index + 1];
946
947 break;
948 }
949 default:
950 return;
951 }
952 if (!tgsi_type_is_64bit(dtype))
953 LLVMBuildStore(builder, value, temp_ptr);
954 else {
955 LLVMValueRef ptr = LLVMBuildBitCast(builder, value,
956 LLVMVectorType(ctx->i32, 2), "");
957 LLVMValueRef val2;
958 value = LLVMBuildExtractElement(builder, ptr,
959 ctx->i32_0, "");
960 val2 = LLVMBuildExtractElement(builder, ptr,
961 ctx->i32_1, "");
962
963 LLVMBuildStore(builder, bitcast(bld_base, TGSI_TYPE_FLOAT, value), temp_ptr);
964 LLVMBuildStore(builder, bitcast(bld_base, TGSI_TYPE_FLOAT, val2), temp_ptr2);
965 }
966 }
967 }
968 }
969
970 static void set_basicblock_name(LLVMBasicBlockRef bb, const char *base, int pc)
971 {
972 char buf[32];
973 /* Subtract 1 so that the number shown is that of the corresponding
974 * opcode in the TGSI dump, e.g. an if block has the same suffix as
975 * the instruction number of the corresponding TGSI IF.
976 */
977 snprintf(buf, sizeof(buf), "%s%d", base, pc - 1);
978 LLVMSetValueName(LLVMBasicBlockAsValue(bb), buf);
979 }
980
981 /* Append a basic block at the level of the parent flow.
982 */
983 static LLVMBasicBlockRef append_basic_block(struct si_shader_context *ctx,
984 const char *name)
985 {
986 struct gallivm_state *gallivm = &ctx->gallivm;
987
988 assert(ctx->flow_depth >= 1);
989
990 if (ctx->flow_depth >= 2) {
991 struct si_llvm_flow *flow = &ctx->flow[ctx->flow_depth - 2];
992
993 return LLVMInsertBasicBlockInContext(gallivm->context,
994 flow->next_block, name);
995 }
996
997 return LLVMAppendBasicBlockInContext(gallivm->context, ctx->main_fn, name);
998 }
999
1000 /* Emit a branch to the given default target for the current block if
1001 * applicable -- that is, if the current block does not already contain a
1002 * branch from a break or continue.
1003 */
1004 static void emit_default_branch(LLVMBuilderRef builder, LLVMBasicBlockRef target)
1005 {
1006 if (!LLVMGetBasicBlockTerminator(LLVMGetInsertBlock(builder)))
1007 LLVMBuildBr(builder, target);
1008 }
1009
1010 static void bgnloop_emit(const struct lp_build_tgsi_action *action,
1011 struct lp_build_tgsi_context *bld_base,
1012 struct lp_build_emit_data *emit_data)
1013 {
1014 struct si_shader_context *ctx = si_shader_context(bld_base);
1015 struct gallivm_state *gallivm = &ctx->gallivm;
1016 struct si_llvm_flow *flow = push_flow(ctx);
1017 flow->loop_entry_block = append_basic_block(ctx, "LOOP");
1018 flow->next_block = append_basic_block(ctx, "ENDLOOP");
1019 set_basicblock_name(flow->loop_entry_block, "loop", bld_base->pc);
1020 LLVMBuildBr(gallivm->builder, flow->loop_entry_block);
1021 LLVMPositionBuilderAtEnd(gallivm->builder, flow->loop_entry_block);
1022 }
1023
1024 static void brk_emit(const struct lp_build_tgsi_action *action,
1025 struct lp_build_tgsi_context *bld_base,
1026 struct lp_build_emit_data *emit_data)
1027 {
1028 struct si_shader_context *ctx = si_shader_context(bld_base);
1029 struct gallivm_state *gallivm = &ctx->gallivm;
1030 struct si_llvm_flow *flow = get_innermost_loop(ctx);
1031
1032 LLVMBuildBr(gallivm->builder, flow->next_block);
1033 }
1034
1035 static void cont_emit(const struct lp_build_tgsi_action *action,
1036 struct lp_build_tgsi_context *bld_base,
1037 struct lp_build_emit_data *emit_data)
1038 {
1039 struct si_shader_context *ctx = si_shader_context(bld_base);
1040 struct gallivm_state *gallivm = &ctx->gallivm;
1041 struct si_llvm_flow *flow = get_innermost_loop(ctx);
1042
1043 LLVMBuildBr(gallivm->builder, flow->loop_entry_block);
1044 }
1045
1046 static void else_emit(const struct lp_build_tgsi_action *action,
1047 struct lp_build_tgsi_context *bld_base,
1048 struct lp_build_emit_data *emit_data)
1049 {
1050 struct si_shader_context *ctx = si_shader_context(bld_base);
1051 struct gallivm_state *gallivm = &ctx->gallivm;
1052 struct si_llvm_flow *current_branch = get_current_flow(ctx);
1053 LLVMBasicBlockRef endif_block;
1054
1055 assert(!current_branch->loop_entry_block);
1056
1057 endif_block = append_basic_block(ctx, "ENDIF");
1058 emit_default_branch(gallivm->builder, endif_block);
1059
1060 LLVMPositionBuilderAtEnd(gallivm->builder, current_branch->next_block);
1061 set_basicblock_name(current_branch->next_block, "else", bld_base->pc);
1062
1063 current_branch->next_block = endif_block;
1064 }
1065
1066 static void endif_emit(const struct lp_build_tgsi_action *action,
1067 struct lp_build_tgsi_context *bld_base,
1068 struct lp_build_emit_data *emit_data)
1069 {
1070 struct si_shader_context *ctx = si_shader_context(bld_base);
1071 struct gallivm_state *gallivm = &ctx->gallivm;
1072 struct si_llvm_flow *current_branch = get_current_flow(ctx);
1073
1074 assert(!current_branch->loop_entry_block);
1075
1076 emit_default_branch(gallivm->builder, current_branch->next_block);
1077 LLVMPositionBuilderAtEnd(gallivm->builder, current_branch->next_block);
1078 set_basicblock_name(current_branch->next_block, "endif", bld_base->pc);
1079
1080 ctx->flow_depth--;
1081 }
1082
1083 static void endloop_emit(const struct lp_build_tgsi_action *action,
1084 struct lp_build_tgsi_context *bld_base,
1085 struct lp_build_emit_data *emit_data)
1086 {
1087 struct si_shader_context *ctx = si_shader_context(bld_base);
1088 struct gallivm_state *gallivm = &ctx->gallivm;
1089 struct si_llvm_flow *current_loop = get_current_flow(ctx);
1090
1091 assert(current_loop->loop_entry_block);
1092
1093 emit_default_branch(gallivm->builder, current_loop->loop_entry_block);
1094
1095 LLVMPositionBuilderAtEnd(gallivm->builder, current_loop->next_block);
1096 set_basicblock_name(current_loop->next_block, "endloop", bld_base->pc);
1097 ctx->flow_depth--;
1098 }
1099
1100 static void if_cond_emit(const struct lp_build_tgsi_action *action,
1101 struct lp_build_tgsi_context *bld_base,
1102 struct lp_build_emit_data *emit_data,
1103 LLVMValueRef cond)
1104 {
1105 struct si_shader_context *ctx = si_shader_context(bld_base);
1106 struct gallivm_state *gallivm = &ctx->gallivm;
1107 struct si_llvm_flow *flow = push_flow(ctx);
1108 LLVMBasicBlockRef if_block;
1109
1110 if_block = append_basic_block(ctx, "IF");
1111 flow->next_block = append_basic_block(ctx, "ELSE");
1112 set_basicblock_name(if_block, "if", bld_base->pc);
1113 LLVMBuildCondBr(gallivm->builder, cond, if_block, flow->next_block);
1114 LLVMPositionBuilderAtEnd(gallivm->builder, if_block);
1115 }
1116
1117 static void if_emit(const struct lp_build_tgsi_action *action,
1118 struct lp_build_tgsi_context *bld_base,
1119 struct lp_build_emit_data *emit_data)
1120 {
1121 struct gallivm_state *gallivm = bld_base->base.gallivm;
1122 LLVMValueRef cond;
1123
1124 cond = LLVMBuildFCmp(gallivm->builder, LLVMRealUNE,
1125 emit_data->args[0],
1126 bld_base->base.zero, "");
1127
1128 if_cond_emit(action, bld_base, emit_data, cond);
1129 }
1130
1131 static void uif_emit(const struct lp_build_tgsi_action *action,
1132 struct lp_build_tgsi_context *bld_base,
1133 struct lp_build_emit_data *emit_data)
1134 {
1135 struct gallivm_state *gallivm = bld_base->base.gallivm;
1136 LLVMValueRef cond;
1137
1138 cond = LLVMBuildICmp(gallivm->builder, LLVMIntNE,
1139 bitcast(bld_base, TGSI_TYPE_UNSIGNED, emit_data->args[0]),
1140 bld_base->int_bld.zero, "");
1141
1142 if_cond_emit(action, bld_base, emit_data, cond);
1143 }
1144
1145 static void emit_immediate(struct lp_build_tgsi_context *bld_base,
1146 const struct tgsi_full_immediate *imm)
1147 {
1148 unsigned i;
1149 struct si_shader_context *ctx = si_shader_context(bld_base);
1150
1151 for (i = 0; i < 4; ++i) {
1152 ctx->imms[ctx->imms_num * TGSI_NUM_CHANNELS + i] =
1153 LLVMConstInt(ctx->i32, imm->u[i].Uint, false );
1154 }
1155
1156 ctx->imms_num++;
1157 }
1158
1159 void si_llvm_context_init(struct si_shader_context *ctx,
1160 struct si_screen *sscreen,
1161 LLVMTargetMachineRef tm)
1162 {
1163 struct lp_type type;
1164
1165 /* Initialize the gallivm object:
1166 * We are only using the module, context, and builder fields of this struct.
1167 * This should be enough for us to be able to pass our gallivm struct to the
1168 * helper functions in the gallivm module.
1169 */
1170 memset(ctx, 0, sizeof(*ctx));
1171 ctx->screen = sscreen;
1172 ctx->tm = tm;
1173
1174 ctx->gallivm.context = LLVMContextCreate();
1175 ctx->gallivm.module = LLVMModuleCreateWithNameInContext("tgsi",
1176 ctx->gallivm.context);
1177 LLVMSetTarget(ctx->gallivm.module, "amdgcn--");
1178
1179 LLVMTargetDataRef data_layout = LLVMCreateTargetDataLayout(tm);
1180 char *data_layout_str = LLVMCopyStringRepOfTargetData(data_layout);
1181 LLVMSetDataLayout(ctx->gallivm.module, data_layout_str);
1182 LLVMDisposeTargetData(data_layout);
1183 LLVMDisposeMessage(data_layout_str);
1184
1185 bool unsafe_fpmath = (sscreen->b.debug_flags & DBG_UNSAFE_MATH) != 0;
1186 enum lp_float_mode float_mode =
1187 unsafe_fpmath ? LP_FLOAT_MODE_UNSAFE_FP_MATH :
1188 LP_FLOAT_MODE_NO_SIGNED_ZEROS_FP_MATH;
1189
1190 ctx->gallivm.builder = lp_create_builder(ctx->gallivm.context,
1191 float_mode);
1192
1193 ac_llvm_context_init(&ctx->ac, ctx->gallivm.context);
1194 ctx->ac.module = ctx->gallivm.module;
1195 ctx->ac.builder = ctx->gallivm.builder;
1196
1197 struct lp_build_tgsi_context *bld_base = &ctx->bld_base;
1198
1199 type.floating = true;
1200 type.fixed = false;
1201 type.sign = true;
1202 type.norm = false;
1203 type.width = 32;
1204 type.length = 1;
1205
1206 lp_build_context_init(&bld_base->base, &ctx->gallivm, type);
1207 lp_build_context_init(&ctx->bld_base.uint_bld, &ctx->gallivm, lp_uint_type(type));
1208 lp_build_context_init(&ctx->bld_base.int_bld, &ctx->gallivm, lp_int_type(type));
1209 type.width *= 2;
1210 lp_build_context_init(&ctx->bld_base.dbl_bld, &ctx->gallivm, type);
1211 lp_build_context_init(&ctx->bld_base.uint64_bld, &ctx->gallivm, lp_uint_type(type));
1212 lp_build_context_init(&ctx->bld_base.int64_bld, &ctx->gallivm, lp_int_type(type));
1213
1214 bld_base->soa = 1;
1215 bld_base->emit_swizzle = emit_swizzle;
1216 bld_base->emit_declaration = emit_declaration;
1217 bld_base->emit_immediate = emit_immediate;
1218
1219 /* metadata allowing 2.5 ULP */
1220 ctx->fpmath_md_kind = LLVMGetMDKindIDInContext(ctx->gallivm.context,
1221 "fpmath", 6);
1222 LLVMValueRef arg = lp_build_const_float(&ctx->gallivm, 2.5);
1223 ctx->fpmath_md_2p5_ulp = LLVMMDNodeInContext(ctx->gallivm.context,
1224 &arg, 1);
1225
1226 bld_base->op_actions[TGSI_OPCODE_BGNLOOP].emit = bgnloop_emit;
1227 bld_base->op_actions[TGSI_OPCODE_BRK].emit = brk_emit;
1228 bld_base->op_actions[TGSI_OPCODE_CONT].emit = cont_emit;
1229 bld_base->op_actions[TGSI_OPCODE_IF].emit = if_emit;
1230 bld_base->op_actions[TGSI_OPCODE_UIF].emit = uif_emit;
1231 bld_base->op_actions[TGSI_OPCODE_ELSE].emit = else_emit;
1232 bld_base->op_actions[TGSI_OPCODE_ENDIF].emit = endif_emit;
1233 bld_base->op_actions[TGSI_OPCODE_ENDLOOP].emit = endloop_emit;
1234
1235 si_shader_context_init_alu(&ctx->bld_base);
1236 si_shader_context_init_mem(ctx);
1237
1238 ctx->voidt = LLVMVoidTypeInContext(ctx->gallivm.context);
1239 ctx->i1 = LLVMInt1TypeInContext(ctx->gallivm.context);
1240 ctx->i8 = LLVMInt8TypeInContext(ctx->gallivm.context);
1241 ctx->i32 = LLVMInt32TypeInContext(ctx->gallivm.context);
1242 ctx->i64 = LLVMInt64TypeInContext(ctx->gallivm.context);
1243 ctx->i128 = LLVMIntTypeInContext(ctx->gallivm.context, 128);
1244 ctx->f32 = LLVMFloatTypeInContext(ctx->gallivm.context);
1245 ctx->v2i32 = LLVMVectorType(ctx->i32, 2);
1246 ctx->v4i32 = LLVMVectorType(ctx->i32, 4);
1247 ctx->v4f32 = LLVMVectorType(ctx->f32, 4);
1248 ctx->v8i32 = LLVMVectorType(ctx->i32, 8);
1249
1250 ctx->i32_0 = LLVMConstInt(ctx->i32, 0, 0);
1251 ctx->i32_1 = LLVMConstInt(ctx->i32, 1, 0);
1252 }
1253
1254 /* Set the context to a certain TGSI shader. Can be called repeatedly
1255 * to change the shader. */
1256 void si_llvm_context_set_tgsi(struct si_shader_context *ctx,
1257 struct si_shader *shader)
1258 {
1259 const struct tgsi_shader_info *info = NULL;
1260 const struct tgsi_token *tokens = NULL;
1261
1262 if (shader && shader->selector) {
1263 info = &shader->selector->info;
1264 tokens = shader->selector->tokens;
1265 }
1266
1267 ctx->shader = shader;
1268 ctx->type = info ? info->processor : -1;
1269 ctx->bld_base.info = info;
1270
1271 /* Clean up the old contents. */
1272 FREE(ctx->temp_arrays);
1273 ctx->temp_arrays = NULL;
1274 FREE(ctx->temp_array_allocas);
1275 ctx->temp_array_allocas = NULL;
1276
1277 FREE(ctx->imms);
1278 ctx->imms = NULL;
1279 ctx->imms_num = 0;
1280
1281 FREE(ctx->temps);
1282 ctx->temps = NULL;
1283 ctx->temps_count = 0;
1284
1285 if (!info || !tokens)
1286 return;
1287
1288 if (info->array_max[TGSI_FILE_TEMPORARY] > 0) {
1289 int size = info->array_max[TGSI_FILE_TEMPORARY];
1290
1291 ctx->temp_arrays = CALLOC(size, sizeof(ctx->temp_arrays[0]));
1292 ctx->temp_array_allocas = CALLOC(size, sizeof(ctx->temp_array_allocas[0]));
1293
1294 tgsi_scan_arrays(tokens, TGSI_FILE_TEMPORARY, size,
1295 ctx->temp_arrays);
1296 }
1297 if (info->file_max[TGSI_FILE_IMMEDIATE] >= 0) {
1298 int size = info->file_max[TGSI_FILE_IMMEDIATE] + 1;
1299 ctx->imms = MALLOC(size * TGSI_NUM_CHANNELS * sizeof(LLVMValueRef));
1300 }
1301
1302 /* Re-set these to start with a clean slate. */
1303 ctx->bld_base.num_instructions = 0;
1304 ctx->bld_base.pc = 0;
1305 memset(ctx->outputs, 0, sizeof(ctx->outputs));
1306
1307 ctx->bld_base.emit_store = si_llvm_emit_store;
1308 ctx->bld_base.emit_fetch_funcs[TGSI_FILE_IMMEDIATE] = si_llvm_emit_fetch;
1309 ctx->bld_base.emit_fetch_funcs[TGSI_FILE_INPUT] = si_llvm_emit_fetch;
1310 ctx->bld_base.emit_fetch_funcs[TGSI_FILE_TEMPORARY] = si_llvm_emit_fetch;
1311 ctx->bld_base.emit_fetch_funcs[TGSI_FILE_OUTPUT] = si_llvm_emit_fetch;
1312 ctx->bld_base.emit_fetch_funcs[TGSI_FILE_SYSTEM_VALUE] = fetch_system_value;
1313
1314 ctx->num_const_buffers = util_last_bit(info->const_buffers_declared);
1315 ctx->num_shader_buffers = util_last_bit(info->shader_buffers_declared);
1316 ctx->num_samplers = util_last_bit(info->samplers_declared);
1317 ctx->num_images = util_last_bit(info->images_declared);
1318 }
1319
1320 void si_llvm_create_func(struct si_shader_context *ctx,
1321 const char *name,
1322 LLVMTypeRef *return_types, unsigned num_return_elems,
1323 LLVMTypeRef *ParamTypes, unsigned ParamCount)
1324 {
1325 LLVMTypeRef main_fn_type, ret_type;
1326 LLVMBasicBlockRef main_fn_body;
1327 enum si_llvm_calling_convention call_conv;
1328 unsigned real_shader_type;
1329
1330 if (num_return_elems)
1331 ret_type = LLVMStructTypeInContext(ctx->gallivm.context,
1332 return_types,
1333 num_return_elems, true);
1334 else
1335 ret_type = LLVMVoidTypeInContext(ctx->gallivm.context);
1336
1337 /* Setup the function */
1338 ctx->return_type = ret_type;
1339 main_fn_type = LLVMFunctionType(ret_type, ParamTypes, ParamCount, 0);
1340 ctx->main_fn = LLVMAddFunction(ctx->gallivm.module, name, main_fn_type);
1341 main_fn_body = LLVMAppendBasicBlockInContext(ctx->gallivm.context,
1342 ctx->main_fn, "main_body");
1343 LLVMPositionBuilderAtEnd(ctx->gallivm.builder, main_fn_body);
1344
1345 real_shader_type = ctx->type;
1346
1347 /* LS is merged into HS (TCS), and ES is merged into GS. */
1348 if (ctx->screen->b.chip_class >= GFX9) {
1349 if (ctx->shader->key.as_ls)
1350 real_shader_type = PIPE_SHADER_TESS_CTRL;
1351 else if (ctx->shader->key.as_es)
1352 real_shader_type = PIPE_SHADER_GEOMETRY;
1353 }
1354
1355 switch (real_shader_type) {
1356 case PIPE_SHADER_VERTEX:
1357 case PIPE_SHADER_TESS_EVAL:
1358 call_conv = RADEON_LLVM_AMDGPU_VS;
1359 break;
1360 case PIPE_SHADER_TESS_CTRL:
1361 call_conv = HAVE_LLVM >= 0x0500 ? RADEON_LLVM_AMDGPU_HS :
1362 RADEON_LLVM_AMDGPU_VS;
1363 break;
1364 case PIPE_SHADER_GEOMETRY:
1365 call_conv = RADEON_LLVM_AMDGPU_GS;
1366 break;
1367 case PIPE_SHADER_FRAGMENT:
1368 call_conv = RADEON_LLVM_AMDGPU_PS;
1369 break;
1370 case PIPE_SHADER_COMPUTE:
1371 call_conv = RADEON_LLVM_AMDGPU_CS;
1372 break;
1373 default:
1374 unreachable("Unhandle shader type");
1375 }
1376
1377 LLVMSetFunctionCallConv(ctx->main_fn, call_conv);
1378 }
1379
1380 void si_llvm_optimize_module(struct si_shader_context *ctx)
1381 {
1382 struct gallivm_state *gallivm = &ctx->gallivm;
1383 const char *triple = LLVMGetTarget(gallivm->module);
1384 LLVMTargetLibraryInfoRef target_library_info;
1385
1386 /* Dump LLVM IR before any optimization passes */
1387 if (ctx->screen->b.debug_flags & DBG_PREOPT_IR &&
1388 r600_can_dump_shader(&ctx->screen->b, ctx->type))
1389 LLVMDumpModule(ctx->gallivm.module);
1390
1391 /* Create the pass manager */
1392 gallivm->passmgr = LLVMCreatePassManager();
1393
1394 target_library_info = gallivm_create_target_library_info(triple);
1395 LLVMAddTargetLibraryInfo(target_library_info, gallivm->passmgr);
1396
1397 if (r600_extra_shader_checks(&ctx->screen->b, ctx->type))
1398 LLVMAddVerifierPass(gallivm->passmgr);
1399
1400 LLVMAddAlwaysInlinerPass(gallivm->passmgr);
1401
1402 /* This pass should eliminate all the load and store instructions */
1403 LLVMAddPromoteMemoryToRegisterPass(gallivm->passmgr);
1404
1405 /* Add some optimization passes */
1406 LLVMAddScalarReplAggregatesPass(gallivm->passmgr);
1407 LLVMAddLICMPass(gallivm->passmgr);
1408 LLVMAddAggressiveDCEPass(gallivm->passmgr);
1409 LLVMAddCFGSimplificationPass(gallivm->passmgr);
1410 #if HAVE_LLVM >= 0x0400
1411 /* This is recommended by the instruction combining pass. */
1412 LLVMAddEarlyCSEMemSSAPass(gallivm->passmgr);
1413 #endif
1414 LLVMAddInstructionCombiningPass(gallivm->passmgr);
1415
1416 /* Run the pass */
1417 LLVMRunPassManager(gallivm->passmgr, ctx->gallivm.module);
1418
1419 LLVMDisposeBuilder(gallivm->builder);
1420 LLVMDisposePassManager(gallivm->passmgr);
1421 gallivm_dispose_target_library_info(target_library_info);
1422 }
1423
1424 void si_llvm_dispose(struct si_shader_context *ctx)
1425 {
1426 LLVMDisposeModule(ctx->gallivm.module);
1427 LLVMContextDispose(ctx->gallivm.context);
1428 FREE(ctx->temp_arrays);
1429 ctx->temp_arrays = NULL;
1430 FREE(ctx->temp_array_allocas);
1431 ctx->temp_array_allocas = NULL;
1432 FREE(ctx->temps);
1433 ctx->temps = NULL;
1434 ctx->temps_count = 0;
1435 FREE(ctx->imms);
1436 ctx->imms = NULL;
1437 ctx->imms_num = 0;
1438 FREE(ctx->flow);
1439 ctx->flow = NULL;
1440 ctx->flow_depth_max = 0;
1441 }