radeonsi: reorder code in si_llvm_context_init
[mesa.git] / src / gallium / drivers / radeonsi / si_shader_tgsi_setup.c
1 /*
2 * Copyright 2016 Advanced Micro Devices, Inc.
3 * All Rights Reserved.
4 *
5 * Permission is hereby granted, free of charge, to any person obtaining a
6 * copy of this software and associated documentation files (the "Software"),
7 * to deal in the Software without restriction, including without limitation
8 * on the rights to use, copy, modify, merge, publish, distribute, sub
9 * license, and/or sell copies of the Software, and to permit persons to whom
10 * the Software is furnished to do so, subject to the following conditions:
11 *
12 * The above copyright notice and this permission notice (including the next
13 * paragraph) shall be included in all copies or substantial portions of the
14 * Software.
15 *
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
19 * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
20 * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
21 * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
22 * USE OR OTHER DEALINGS IN THE SOFTWARE.
23 */
24
25 #include "si_shader_internal.h"
26 #include "si_pipe.h"
27 #include "ac_llvm_util.h"
28 #include "util/u_memory.h"
29
30 enum si_llvm_calling_convention {
31 RADEON_LLVM_AMDGPU_VS = 87,
32 RADEON_LLVM_AMDGPU_GS = 88,
33 RADEON_LLVM_AMDGPU_PS = 89,
34 RADEON_LLVM_AMDGPU_CS = 90,
35 RADEON_LLVM_AMDGPU_HS = 93,
36 };
37
38 struct si_llvm_diagnostics {
39 struct pipe_debug_callback *debug;
40 unsigned retval;
41 };
42
43 static void si_diagnostic_handler(LLVMDiagnosticInfoRef di, void *context)
44 {
45 struct si_llvm_diagnostics *diag = (struct si_llvm_diagnostics *)context;
46 LLVMDiagnosticSeverity severity = LLVMGetDiagInfoSeverity(di);
47 char *description = LLVMGetDiagInfoDescription(di);
48 const char *severity_str = NULL;
49
50 switch (severity) {
51 case LLVMDSError:
52 severity_str = "error";
53 break;
54 case LLVMDSWarning:
55 severity_str = "warning";
56 break;
57 case LLVMDSRemark:
58 severity_str = "remark";
59 break;
60 case LLVMDSNote:
61 severity_str = "note";
62 break;
63 default:
64 severity_str = "unknown";
65 }
66
67 pipe_debug_message(diag->debug, SHADER_INFO,
68 "LLVM diagnostic (%s): %s", severity_str, description);
69
70 if (severity == LLVMDSError) {
71 diag->retval = 1;
72 fprintf(stderr,"LLVM triggered Diagnostic Handler: %s\n", description);
73 }
74
75 LLVMDisposeMessage(description);
76 }
77
78 /**
79 * Compile an LLVM module to machine code.
80 *
81 * @returns 0 for success, 1 for failure
82 */
83 unsigned si_llvm_compile(LLVMModuleRef M, struct ac_shader_binary *binary,
84 struct ac_llvm_compiler *compiler,
85 struct pipe_debug_callback *debug)
86 {
87 struct si_llvm_diagnostics diag;
88 LLVMContextRef llvm_ctx;
89
90 diag.debug = debug;
91 diag.retval = 0;
92
93 /* Setup Diagnostic Handler*/
94 llvm_ctx = LLVMGetModuleContext(M);
95
96 LLVMContextSetDiagnosticHandler(llvm_ctx, si_diagnostic_handler, &diag);
97
98 /* Compile IR. */
99 if (!ac_compile_module_to_binary(compiler->passes, M, binary))
100 diag.retval = 1;
101
102 if (diag.retval != 0)
103 pipe_debug_message(debug, SHADER_INFO, "LLVM compile failed");
104 return diag.retval;
105 }
106
107 LLVMTypeRef tgsi2llvmtype(struct lp_build_tgsi_context *bld_base,
108 enum tgsi_opcode_type type)
109 {
110 struct si_shader_context *ctx = si_shader_context(bld_base);
111
112 switch (type) {
113 case TGSI_TYPE_UNSIGNED:
114 case TGSI_TYPE_SIGNED:
115 return ctx->ac.i32;
116 case TGSI_TYPE_UNSIGNED64:
117 case TGSI_TYPE_SIGNED64:
118 return ctx->ac.i64;
119 case TGSI_TYPE_DOUBLE:
120 return ctx->ac.f64;
121 case TGSI_TYPE_UNTYPED:
122 case TGSI_TYPE_FLOAT:
123 return ctx->ac.f32;
124 default: break;
125 }
126 return 0;
127 }
128
129 LLVMValueRef bitcast(struct lp_build_tgsi_context *bld_base,
130 enum tgsi_opcode_type type, LLVMValueRef value)
131 {
132 struct si_shader_context *ctx = si_shader_context(bld_base);
133 LLVMTypeRef dst_type = tgsi2llvmtype(bld_base, type);
134
135 if (dst_type)
136 return LLVMBuildBitCast(ctx->ac.builder, value, dst_type, "");
137 else
138 return value;
139 }
140
141 /**
142 * Return a value that is equal to the given i32 \p index if it lies in [0,num)
143 * or an undefined value in the same interval otherwise.
144 */
145 LLVMValueRef si_llvm_bound_index(struct si_shader_context *ctx,
146 LLVMValueRef index,
147 unsigned num)
148 {
149 LLVMBuilderRef builder = ctx->ac.builder;
150 LLVMValueRef c_max = LLVMConstInt(ctx->i32, num - 1, 0);
151 LLVMValueRef cc;
152
153 if (util_is_power_of_two_or_zero(num)) {
154 index = LLVMBuildAnd(builder, index, c_max, "");
155 } else {
156 /* In theory, this MAX pattern should result in code that is
157 * as good as the bit-wise AND above.
158 *
159 * In practice, LLVM generates worse code (at the time of
160 * writing), because its value tracking is not strong enough.
161 */
162 cc = LLVMBuildICmp(builder, LLVMIntULE, index, c_max, "");
163 index = LLVMBuildSelect(builder, cc, index, c_max, "");
164 }
165
166 return index;
167 }
168
169 static LLVMValueRef emit_swizzle(struct lp_build_tgsi_context *bld_base,
170 LLVMValueRef value,
171 unsigned swizzle_x,
172 unsigned swizzle_y,
173 unsigned swizzle_z,
174 unsigned swizzle_w)
175 {
176 struct si_shader_context *ctx = si_shader_context(bld_base);
177 LLVMValueRef swizzles[4];
178
179 swizzles[0] = LLVMConstInt(ctx->i32, swizzle_x, 0);
180 swizzles[1] = LLVMConstInt(ctx->i32, swizzle_y, 0);
181 swizzles[2] = LLVMConstInt(ctx->i32, swizzle_z, 0);
182 swizzles[3] = LLVMConstInt(ctx->i32, swizzle_w, 0);
183
184 return LLVMBuildShuffleVector(ctx->ac.builder,
185 value,
186 LLVMGetUndef(LLVMTypeOf(value)),
187 LLVMConstVector(swizzles, 4), "");
188 }
189
190 /**
191 * Return the description of the array covering the given temporary register
192 * index.
193 */
194 static unsigned
195 get_temp_array_id(struct lp_build_tgsi_context *bld_base,
196 unsigned reg_index,
197 const struct tgsi_ind_register *reg)
198 {
199 struct si_shader_context *ctx = si_shader_context(bld_base);
200 unsigned num_arrays = ctx->bld_base.info->array_max[TGSI_FILE_TEMPORARY];
201 unsigned i;
202
203 if (reg && reg->ArrayID > 0 && reg->ArrayID <= num_arrays)
204 return reg->ArrayID;
205
206 for (i = 0; i < num_arrays; i++) {
207 const struct tgsi_array_info *array = &ctx->temp_arrays[i];
208
209 if (reg_index >= array->range.First && reg_index <= array->range.Last)
210 return i + 1;
211 }
212
213 return 0;
214 }
215
216 static struct tgsi_declaration_range
217 get_array_range(struct lp_build_tgsi_context *bld_base,
218 unsigned File, unsigned reg_index,
219 const struct tgsi_ind_register *reg)
220 {
221 struct si_shader_context *ctx = si_shader_context(bld_base);
222 struct tgsi_declaration_range range;
223
224 if (File == TGSI_FILE_TEMPORARY) {
225 unsigned array_id = get_temp_array_id(bld_base, reg_index, reg);
226 if (array_id)
227 return ctx->temp_arrays[array_id - 1].range;
228 }
229
230 range.First = 0;
231 range.Last = bld_base->info->file_max[File];
232 return range;
233 }
234
235 /**
236 * For indirect registers, construct a pointer directly to the requested
237 * element using getelementptr if possible.
238 *
239 * Returns NULL if the insertelement/extractelement fallback for array access
240 * must be used.
241 */
242 static LLVMValueRef
243 get_pointer_into_array(struct si_shader_context *ctx,
244 unsigned file,
245 unsigned swizzle,
246 unsigned reg_index,
247 const struct tgsi_ind_register *reg_indirect)
248 {
249 unsigned array_id;
250 struct tgsi_array_info *array;
251 LLVMBuilderRef builder = ctx->ac.builder;
252 LLVMValueRef idxs[2];
253 LLVMValueRef index;
254 LLVMValueRef alloca;
255
256 if (file != TGSI_FILE_TEMPORARY)
257 return NULL;
258
259 array_id = get_temp_array_id(&ctx->bld_base, reg_index, reg_indirect);
260 if (!array_id)
261 return NULL;
262
263 alloca = ctx->temp_array_allocas[array_id - 1];
264 if (!alloca)
265 return NULL;
266
267 array = &ctx->temp_arrays[array_id - 1];
268
269 if (!(array->writemask & (1 << swizzle)))
270 return ctx->undef_alloca;
271
272 index = si_get_indirect_index(ctx, reg_indirect, 1,
273 reg_index - ctx->temp_arrays[array_id - 1].range.First);
274
275 /* Ensure that the index is within a valid range, to guard against
276 * VM faults and overwriting critical data (e.g. spilled resource
277 * descriptors).
278 *
279 * TODO It should be possible to avoid the additional instructions
280 * if LLVM is changed so that it guarantuees:
281 * 1. the scratch space descriptor isolates the current wave (this
282 * could even save the scratch offset SGPR at the cost of an
283 * additional SALU instruction)
284 * 2. the memory for allocas must be allocated at the _end_ of the
285 * scratch space (after spilled registers)
286 */
287 index = si_llvm_bound_index(ctx, index, array->range.Last - array->range.First + 1);
288
289 index = LLVMBuildMul(
290 builder, index,
291 LLVMConstInt(ctx->i32, util_bitcount(array->writemask), 0),
292 "");
293 index = LLVMBuildAdd(
294 builder, index,
295 LLVMConstInt(ctx->i32,
296 util_bitcount(array->writemask & ((1 << swizzle) - 1)), 0),
297 "");
298 idxs[0] = ctx->i32_0;
299 idxs[1] = index;
300 return LLVMBuildGEP(ctx->ac.builder, alloca, idxs, 2, "");
301 }
302
303 LLVMValueRef
304 si_llvm_emit_fetch_64bit(struct lp_build_tgsi_context *bld_base,
305 LLVMTypeRef type,
306 LLVMValueRef ptr,
307 LLVMValueRef ptr2)
308 {
309 struct si_shader_context *ctx = si_shader_context(bld_base);
310 LLVMValueRef result;
311
312 result = LLVMGetUndef(LLVMVectorType(ctx->i32, 2));
313
314 result = LLVMBuildInsertElement(ctx->ac.builder,
315 result,
316 ac_to_integer(&ctx->ac, ptr),
317 ctx->i32_0, "");
318 result = LLVMBuildInsertElement(ctx->ac.builder,
319 result,
320 ac_to_integer(&ctx->ac, ptr2),
321 ctx->i32_1, "");
322 return LLVMBuildBitCast(ctx->ac.builder, result, type, "");
323 }
324
325 static LLVMValueRef
326 emit_array_fetch(struct lp_build_tgsi_context *bld_base,
327 unsigned File, enum tgsi_opcode_type type,
328 struct tgsi_declaration_range range,
329 unsigned swizzle)
330 {
331 struct si_shader_context *ctx = si_shader_context(bld_base);
332 unsigned i, size = range.Last - range.First + 1;
333 LLVMTypeRef vec = LLVMVectorType(tgsi2llvmtype(bld_base, type), size);
334 LLVMValueRef result = LLVMGetUndef(vec);
335
336 struct tgsi_full_src_register tmp_reg = {};
337 tmp_reg.Register.File = File;
338
339 for (i = 0; i < size; ++i) {
340 tmp_reg.Register.Index = i + range.First;
341 LLVMValueRef temp = si_llvm_emit_fetch(bld_base, &tmp_reg, type, swizzle);
342 result = LLVMBuildInsertElement(ctx->ac.builder, result, temp,
343 LLVMConstInt(ctx->i32, i, 0), "array_vector");
344 }
345 return result;
346 }
347
348 static LLVMValueRef
349 load_value_from_array(struct lp_build_tgsi_context *bld_base,
350 unsigned file,
351 enum tgsi_opcode_type type,
352 unsigned swizzle,
353 unsigned reg_index,
354 const struct tgsi_ind_register *reg_indirect)
355 {
356 struct si_shader_context *ctx = si_shader_context(bld_base);
357 LLVMBuilderRef builder = ctx->ac.builder;
358 LLVMValueRef ptr;
359
360 ptr = get_pointer_into_array(ctx, file, swizzle, reg_index, reg_indirect);
361 if (ptr) {
362 LLVMValueRef val = LLVMBuildLoad(builder, ptr, "");
363 if (tgsi_type_is_64bit(type)) {
364 LLVMValueRef ptr_hi, val_hi;
365 ptr_hi = LLVMBuildGEP(builder, ptr, &ctx->i32_1, 1, "");
366 val_hi = LLVMBuildLoad(builder, ptr_hi, "");
367 val = si_llvm_emit_fetch_64bit(bld_base, tgsi2llvmtype(bld_base, type),
368 val, val_hi);
369 }
370
371 return val;
372 } else {
373 struct tgsi_declaration_range range =
374 get_array_range(bld_base, file, reg_index, reg_indirect);
375 LLVMValueRef index =
376 si_get_indirect_index(ctx, reg_indirect, 1, reg_index - range.First);
377 LLVMValueRef array =
378 emit_array_fetch(bld_base, file, type, range, swizzle);
379 return LLVMBuildExtractElement(builder, array, index, "");
380 }
381 }
382
383 static void
384 store_value_to_array(struct lp_build_tgsi_context *bld_base,
385 LLVMValueRef value,
386 unsigned file,
387 unsigned chan_index,
388 unsigned reg_index,
389 const struct tgsi_ind_register *reg_indirect)
390 {
391 struct si_shader_context *ctx = si_shader_context(bld_base);
392 LLVMBuilderRef builder = ctx->ac.builder;
393 LLVMValueRef ptr;
394
395 ptr = get_pointer_into_array(ctx, file, chan_index, reg_index, reg_indirect);
396 if (ptr) {
397 LLVMBuildStore(builder, value, ptr);
398 } else {
399 unsigned i, size;
400 struct tgsi_declaration_range range = get_array_range(bld_base, file, reg_index, reg_indirect);
401 LLVMValueRef index = si_get_indirect_index(ctx, reg_indirect, 1, reg_index - range.First);
402 LLVMValueRef array =
403 emit_array_fetch(bld_base, file, TGSI_TYPE_FLOAT, range, chan_index);
404 LLVMValueRef temp_ptr;
405
406 array = LLVMBuildInsertElement(builder, array, value, index, "");
407
408 size = range.Last - range.First + 1;
409 for (i = 0; i < size; ++i) {
410 switch(file) {
411 case TGSI_FILE_OUTPUT:
412 temp_ptr = ctx->outputs[i + range.First][chan_index];
413 break;
414
415 case TGSI_FILE_TEMPORARY:
416 if (range.First + i >= ctx->temps_count)
417 continue;
418 temp_ptr = ctx->temps[(i + range.First) * TGSI_NUM_CHANNELS + chan_index];
419 break;
420
421 default:
422 continue;
423 }
424 value = LLVMBuildExtractElement(builder, array,
425 LLVMConstInt(ctx->i32, i, 0), "");
426 LLVMBuildStore(builder, value, temp_ptr);
427 }
428 }
429 }
430
431 /* If this is true, preload FS inputs at the beginning of shaders. Otherwise,
432 * reload them at each use. This must be true if the shader is using
433 * derivatives and KILL, because KILL can leave the WQM and then a lazy
434 * input load isn't in the WQM anymore.
435 */
436 static bool si_preload_fs_inputs(struct si_shader_context *ctx)
437 {
438 struct si_shader_selector *sel = ctx->shader->selector;
439
440 return sel->info.uses_derivatives &&
441 sel->info.uses_kill;
442 }
443
444 static LLVMValueRef
445 get_output_ptr(struct lp_build_tgsi_context *bld_base, unsigned index,
446 unsigned chan)
447 {
448 struct si_shader_context *ctx = si_shader_context(bld_base);
449
450 assert(index <= ctx->bld_base.info->file_max[TGSI_FILE_OUTPUT]);
451 return ctx->outputs[index][chan];
452 }
453
454 LLVMValueRef si_llvm_emit_fetch(struct lp_build_tgsi_context *bld_base,
455 const struct tgsi_full_src_register *reg,
456 enum tgsi_opcode_type type,
457 unsigned swizzle)
458 {
459 struct si_shader_context *ctx = si_shader_context(bld_base);
460 LLVMBuilderRef builder = ctx->ac.builder;
461 LLVMValueRef result = NULL, ptr, ptr2;
462
463 if (swizzle == ~0) {
464 LLVMValueRef values[TGSI_NUM_CHANNELS];
465 unsigned chan;
466 for (chan = 0; chan < TGSI_NUM_CHANNELS; chan++) {
467 values[chan] = si_llvm_emit_fetch(bld_base, reg, type, chan);
468 }
469 return ac_build_gather_values(&ctx->ac, values,
470 TGSI_NUM_CHANNELS);
471 }
472
473 if (reg->Register.Indirect) {
474 LLVMValueRef load = load_value_from_array(bld_base, reg->Register.File, type,
475 swizzle, reg->Register.Index, &reg->Indirect);
476 return bitcast(bld_base, type, load);
477 }
478
479 switch(reg->Register.File) {
480 case TGSI_FILE_IMMEDIATE: {
481 LLVMTypeRef ctype = tgsi2llvmtype(bld_base, type);
482 if (tgsi_type_is_64bit(type)) {
483 result = LLVMGetUndef(LLVMVectorType(ctx->i32, 2));
484 result = LLVMConstInsertElement(result,
485 ctx->imms[reg->Register.Index * TGSI_NUM_CHANNELS + swizzle],
486 ctx->i32_0);
487 result = LLVMConstInsertElement(result,
488 ctx->imms[reg->Register.Index * TGSI_NUM_CHANNELS + swizzle + 1],
489 ctx->i32_1);
490 return LLVMConstBitCast(result, ctype);
491 } else {
492 return LLVMConstBitCast(ctx->imms[reg->Register.Index * TGSI_NUM_CHANNELS + swizzle], ctype);
493 }
494 }
495
496 case TGSI_FILE_INPUT: {
497 unsigned index = reg->Register.Index;
498 LLVMValueRef input[4];
499
500 /* I don't think doing this for vertex shaders is beneficial.
501 * For those, we want to make sure the VMEM loads are executed
502 * only once. Fragment shaders don't care much, because
503 * v_interp instructions are much cheaper than VMEM loads.
504 */
505 if (!si_preload_fs_inputs(ctx) &&
506 ctx->bld_base.info->processor == PIPE_SHADER_FRAGMENT)
507 ctx->load_input(ctx, index, &ctx->input_decls[index], input);
508 else
509 memcpy(input, &ctx->inputs[index * 4], sizeof(input));
510
511 result = input[swizzle];
512
513 if (tgsi_type_is_64bit(type)) {
514 ptr = result;
515 ptr2 = input[swizzle + 1];
516 return si_llvm_emit_fetch_64bit(bld_base, tgsi2llvmtype(bld_base, type),
517 ptr, ptr2);
518 }
519 break;
520 }
521
522 case TGSI_FILE_TEMPORARY:
523 if (reg->Register.Index >= ctx->temps_count)
524 return LLVMGetUndef(tgsi2llvmtype(bld_base, type));
525 ptr = ctx->temps[reg->Register.Index * TGSI_NUM_CHANNELS + swizzle];
526 if (tgsi_type_is_64bit(type)) {
527 ptr2 = ctx->temps[reg->Register.Index * TGSI_NUM_CHANNELS + swizzle + 1];
528 return si_llvm_emit_fetch_64bit(bld_base, tgsi2llvmtype(bld_base, type),
529 LLVMBuildLoad(builder, ptr, ""),
530 LLVMBuildLoad(builder, ptr2, ""));
531 }
532 result = LLVMBuildLoad(builder, ptr, "");
533 break;
534
535 case TGSI_FILE_OUTPUT:
536 ptr = get_output_ptr(bld_base, reg->Register.Index, swizzle);
537 if (tgsi_type_is_64bit(type)) {
538 ptr2 = get_output_ptr(bld_base, reg->Register.Index, swizzle + 1);
539 return si_llvm_emit_fetch_64bit(bld_base, tgsi2llvmtype(bld_base, type),
540 LLVMBuildLoad(builder, ptr, ""),
541 LLVMBuildLoad(builder, ptr2, ""));
542 }
543 result = LLVMBuildLoad(builder, ptr, "");
544 break;
545
546 default:
547 return LLVMGetUndef(tgsi2llvmtype(bld_base, type));
548 }
549
550 return bitcast(bld_base, type, result);
551 }
552
553 static LLVMValueRef fetch_system_value(struct lp_build_tgsi_context *bld_base,
554 const struct tgsi_full_src_register *reg,
555 enum tgsi_opcode_type type,
556 unsigned swizzle)
557 {
558 struct si_shader_context *ctx = si_shader_context(bld_base);
559 LLVMBuilderRef builder = ctx->ac.builder;
560 LLVMValueRef cval = ctx->system_values[reg->Register.Index];
561
562 if (tgsi_type_is_64bit(type)) {
563 LLVMValueRef lo, hi;
564
565 assert(swizzle == 0 || swizzle == 2);
566
567 lo = LLVMBuildExtractElement(
568 builder, cval, LLVMConstInt(ctx->i32, swizzle, 0), "");
569 hi = LLVMBuildExtractElement(
570 builder, cval, LLVMConstInt(ctx->i32, swizzle + 1, 0), "");
571
572 return si_llvm_emit_fetch_64bit(bld_base, tgsi2llvmtype(bld_base, type),
573 lo, hi);
574 }
575
576 if (LLVMGetTypeKind(LLVMTypeOf(cval)) == LLVMVectorTypeKind) {
577 cval = LLVMBuildExtractElement(
578 builder, cval, LLVMConstInt(ctx->i32, swizzle, 0), "");
579 } else {
580 assert(swizzle == 0);
581 }
582
583 return bitcast(bld_base, type, cval);
584 }
585
586 static void emit_declaration(struct lp_build_tgsi_context *bld_base,
587 const struct tgsi_full_declaration *decl)
588 {
589 struct si_shader_context *ctx = si_shader_context(bld_base);
590 LLVMBuilderRef builder = ctx->ac.builder;
591 unsigned first, last, i;
592 switch(decl->Declaration.File) {
593 case TGSI_FILE_ADDRESS:
594 {
595 unsigned idx;
596 for (idx = decl->Range.First; idx <= decl->Range.Last; idx++) {
597 unsigned chan;
598 for (chan = 0; chan < TGSI_NUM_CHANNELS; chan++) {
599 ctx->addrs[idx][chan] = ac_build_alloca_undef(
600 &ctx->ac, ctx->i32, "");
601 }
602 }
603 break;
604 }
605
606 case TGSI_FILE_TEMPORARY:
607 {
608 char name[18] = "";
609 LLVMValueRef array_alloca = NULL;
610 unsigned decl_size;
611 unsigned writemask = decl->Declaration.UsageMask;
612 first = decl->Range.First;
613 last = decl->Range.Last;
614 decl_size = 4 * ((last - first) + 1);
615
616 if (decl->Declaration.Array) {
617 unsigned id = decl->Array.ArrayID - 1;
618 unsigned array_size;
619
620 writemask &= ctx->temp_arrays[id].writemask;
621 ctx->temp_arrays[id].writemask = writemask;
622 array_size = ((last - first) + 1) * util_bitcount(writemask);
623
624 /* If the array has more than 16 elements, store it
625 * in memory using an alloca that spans the entire
626 * array.
627 *
628 * Otherwise, store each array element individually.
629 * We will then generate vectors (per-channel, up to
630 * <16 x float> if the usagemask is a single bit) for
631 * indirect addressing.
632 *
633 * Note that 16 is the number of vector elements that
634 * LLVM will store in a register, so theoretically an
635 * array with up to 4 * 16 = 64 elements could be
636 * handled this way, but whether that's a good idea
637 * depends on VGPR register pressure elsewhere.
638 *
639 * FIXME: We shouldn't need to have the non-alloca
640 * code path for arrays. LLVM should be smart enough to
641 * promote allocas into registers when profitable.
642 */
643 if (array_size > 16 ||
644 !ctx->screen->llvm_has_working_vgpr_indexing) {
645 array_alloca = ac_build_alloca_undef(&ctx->ac,
646 LLVMArrayType(ctx->f32,
647 array_size), "array");
648 ctx->temp_array_allocas[id] = array_alloca;
649 }
650 }
651
652 if (!ctx->temps_count) {
653 ctx->temps_count = bld_base->info->file_max[TGSI_FILE_TEMPORARY] + 1;
654 ctx->temps = MALLOC(TGSI_NUM_CHANNELS * ctx->temps_count * sizeof(LLVMValueRef));
655 }
656 if (!array_alloca) {
657 for (i = 0; i < decl_size; ++i) {
658 #ifdef DEBUG
659 snprintf(name, sizeof(name), "TEMP%d.%c",
660 first + i / 4, "xyzw"[i % 4]);
661 #endif
662 ctx->temps[first * TGSI_NUM_CHANNELS + i] =
663 ac_build_alloca_undef(&ctx->ac,
664 ctx->f32,
665 name);
666 }
667 } else {
668 LLVMValueRef idxs[2] = {
669 ctx->i32_0,
670 NULL
671 };
672 unsigned j = 0;
673
674 if (writemask != TGSI_WRITEMASK_XYZW &&
675 !ctx->undef_alloca) {
676 /* Create a dummy alloca. We use it so that we
677 * have a pointer that is safe to load from if
678 * a shader ever reads from a channel that
679 * it never writes to.
680 */
681 ctx->undef_alloca = ac_build_alloca_undef(
682 &ctx->ac, ctx->f32, "undef");
683 }
684
685 for (i = 0; i < decl_size; ++i) {
686 LLVMValueRef ptr;
687 if (writemask & (1 << (i % 4))) {
688 #ifdef DEBUG
689 snprintf(name, sizeof(name), "TEMP%d.%c",
690 first + i / 4, "xyzw"[i % 4]);
691 #endif
692 idxs[1] = LLVMConstInt(ctx->i32, j, 0);
693 ptr = LLVMBuildGEP(builder, array_alloca, idxs, 2, name);
694 j++;
695 } else {
696 ptr = ctx->undef_alloca;
697 }
698 ctx->temps[first * TGSI_NUM_CHANNELS + i] = ptr;
699 }
700 }
701 break;
702 }
703 case TGSI_FILE_INPUT:
704 {
705 unsigned idx;
706 for (idx = decl->Range.First; idx <= decl->Range.Last; idx++) {
707 if (ctx->load_input &&
708 ctx->input_decls[idx].Declaration.File != TGSI_FILE_INPUT) {
709 ctx->input_decls[idx] = *decl;
710 ctx->input_decls[idx].Range.First = idx;
711 ctx->input_decls[idx].Range.Last = idx;
712 ctx->input_decls[idx].Semantic.Index += idx - decl->Range.First;
713
714 if (si_preload_fs_inputs(ctx) ||
715 bld_base->info->processor != PIPE_SHADER_FRAGMENT)
716 ctx->load_input(ctx, idx, &ctx->input_decls[idx],
717 &ctx->inputs[idx * 4]);
718 }
719 }
720 }
721 break;
722
723 case TGSI_FILE_SYSTEM_VALUE:
724 {
725 unsigned idx;
726 for (idx = decl->Range.First; idx <= decl->Range.Last; idx++) {
727 si_load_system_value(ctx, idx, decl);
728 }
729 }
730 break;
731
732 case TGSI_FILE_OUTPUT:
733 {
734 char name[16] = "";
735 unsigned idx;
736 for (idx = decl->Range.First; idx <= decl->Range.Last; idx++) {
737 unsigned chan;
738 assert(idx < RADEON_LLVM_MAX_OUTPUTS);
739 if (ctx->outputs[idx][0])
740 continue;
741 for (chan = 0; chan < TGSI_NUM_CHANNELS; chan++) {
742 #ifdef DEBUG
743 snprintf(name, sizeof(name), "OUT%d.%c",
744 idx, "xyzw"[chan % 4]);
745 #endif
746 ctx->outputs[idx][chan] = ac_build_alloca_undef(
747 &ctx->ac, ctx->f32, name);
748 }
749 }
750 break;
751 }
752
753 case TGSI_FILE_MEMORY:
754 si_tgsi_declare_compute_memory(ctx, decl);
755 break;
756
757 default:
758 break;
759 }
760 }
761
762 void si_llvm_emit_store(struct lp_build_tgsi_context *bld_base,
763 const struct tgsi_full_instruction *inst,
764 const struct tgsi_opcode_info *info,
765 unsigned index,
766 LLVMValueRef dst[4])
767 {
768 struct si_shader_context *ctx = si_shader_context(bld_base);
769 const struct tgsi_full_dst_register *reg = &inst->Dst[index];
770 LLVMBuilderRef builder = ctx->ac.builder;
771 LLVMValueRef temp_ptr, temp_ptr2 = NULL;
772 bool is_vec_store = false;
773 enum tgsi_opcode_type dtype = tgsi_opcode_infer_dst_type(inst->Instruction.Opcode, index);
774
775 if (dst[0]) {
776 LLVMTypeKind k = LLVMGetTypeKind(LLVMTypeOf(dst[0]));
777 is_vec_store = (k == LLVMVectorTypeKind);
778 }
779
780 if (is_vec_store) {
781 LLVMValueRef values[4] = {};
782 uint32_t writemask = reg->Register.WriteMask;
783 while (writemask) {
784 unsigned chan = u_bit_scan(&writemask);
785 LLVMValueRef index = LLVMConstInt(ctx->i32, chan, 0);
786 values[chan] = LLVMBuildExtractElement(ctx->ac.builder,
787 dst[0], index, "");
788 }
789 bld_base->emit_store(bld_base, inst, info, index, values);
790 return;
791 }
792
793 uint32_t writemask = reg->Register.WriteMask;
794 while (writemask) {
795 unsigned chan_index = u_bit_scan(&writemask);
796 LLVMValueRef value = dst[chan_index];
797
798 if (tgsi_type_is_64bit(dtype) && (chan_index == 1 || chan_index == 3))
799 continue;
800 if (inst->Instruction.Saturate)
801 value = ac_build_clamp(&ctx->ac, value);
802
803 if (reg->Register.File == TGSI_FILE_ADDRESS) {
804 temp_ptr = ctx->addrs[reg->Register.Index][chan_index];
805 LLVMBuildStore(builder, value, temp_ptr);
806 continue;
807 }
808
809 if (!tgsi_type_is_64bit(dtype))
810 value = ac_to_float(&ctx->ac, value);
811
812 if (reg->Register.Indirect) {
813 unsigned file = reg->Register.File;
814 unsigned reg_index = reg->Register.Index;
815 store_value_to_array(bld_base, value, file, chan_index,
816 reg_index, &reg->Indirect);
817 } else {
818 switch(reg->Register.File) {
819 case TGSI_FILE_OUTPUT:
820 temp_ptr = ctx->outputs[reg->Register.Index][chan_index];
821 if (tgsi_type_is_64bit(dtype))
822 temp_ptr2 = ctx->outputs[reg->Register.Index][chan_index + 1];
823 break;
824
825 case TGSI_FILE_TEMPORARY:
826 {
827 if (reg->Register.Index >= ctx->temps_count)
828 continue;
829
830 temp_ptr = ctx->temps[ TGSI_NUM_CHANNELS * reg->Register.Index + chan_index];
831 if (tgsi_type_is_64bit(dtype))
832 temp_ptr2 = ctx->temps[ TGSI_NUM_CHANNELS * reg->Register.Index + chan_index + 1];
833
834 break;
835 }
836 default:
837 return;
838 }
839 if (!tgsi_type_is_64bit(dtype))
840 LLVMBuildStore(builder, value, temp_ptr);
841 else {
842 LLVMValueRef ptr = LLVMBuildBitCast(builder, value,
843 LLVMVectorType(ctx->i32, 2), "");
844 LLVMValueRef val2;
845 value = LLVMBuildExtractElement(builder, ptr,
846 ctx->i32_0, "");
847 val2 = LLVMBuildExtractElement(builder, ptr,
848 ctx->i32_1, "");
849
850 LLVMBuildStore(builder, ac_to_float(&ctx->ac, value), temp_ptr);
851 LLVMBuildStore(builder, ac_to_float(&ctx->ac, val2), temp_ptr2);
852 }
853 }
854 }
855 }
856
857 static int get_line(int pc)
858 {
859 /* Subtract 1 so that the number shown is that of the corresponding
860 * opcode in the TGSI dump, e.g. an if block has the same suffix as
861 * the instruction number of the corresponding TGSI IF.
862 */
863 return pc - 1;
864 }
865
866 static void bgnloop_emit(const struct lp_build_tgsi_action *action,
867 struct lp_build_tgsi_context *bld_base,
868 struct lp_build_emit_data *emit_data)
869 {
870 struct si_shader_context *ctx = si_shader_context(bld_base);
871 ac_build_bgnloop(&ctx->ac, get_line(bld_base->pc));
872 }
873
874 static void brk_emit(const struct lp_build_tgsi_action *action,
875 struct lp_build_tgsi_context *bld_base,
876 struct lp_build_emit_data *emit_data)
877 {
878 struct si_shader_context *ctx = si_shader_context(bld_base);
879 ac_build_break(&ctx->ac);
880 }
881
882 static void cont_emit(const struct lp_build_tgsi_action *action,
883 struct lp_build_tgsi_context *bld_base,
884 struct lp_build_emit_data *emit_data)
885 {
886 struct si_shader_context *ctx = si_shader_context(bld_base);
887 ac_build_continue(&ctx->ac);
888 }
889
890 static void else_emit(const struct lp_build_tgsi_action *action,
891 struct lp_build_tgsi_context *bld_base,
892 struct lp_build_emit_data *emit_data)
893 {
894 struct si_shader_context *ctx = si_shader_context(bld_base);
895 ac_build_else(&ctx->ac, get_line(bld_base->pc));
896 }
897
898 static void endif_emit(const struct lp_build_tgsi_action *action,
899 struct lp_build_tgsi_context *bld_base,
900 struct lp_build_emit_data *emit_data)
901 {
902 struct si_shader_context *ctx = si_shader_context(bld_base);
903 ac_build_endif(&ctx->ac, get_line(bld_base->pc));
904 }
905
906 static void endloop_emit(const struct lp_build_tgsi_action *action,
907 struct lp_build_tgsi_context *bld_base,
908 struct lp_build_emit_data *emit_data)
909 {
910 struct si_shader_context *ctx = si_shader_context(bld_base);
911 ac_build_endloop(&ctx->ac, get_line(bld_base->pc));
912 }
913
914 static void if_emit(const struct lp_build_tgsi_action *action,
915 struct lp_build_tgsi_context *bld_base,
916 struct lp_build_emit_data *emit_data)
917 {
918 struct si_shader_context *ctx = si_shader_context(bld_base);
919 ac_build_if(&ctx->ac, emit_data->args[0], get_line(bld_base->pc));
920 }
921
922 static void uif_emit(const struct lp_build_tgsi_action *action,
923 struct lp_build_tgsi_context *bld_base,
924 struct lp_build_emit_data *emit_data)
925 {
926 struct si_shader_context *ctx = si_shader_context(bld_base);
927 ac_build_uif(&ctx->ac, emit_data->args[0], get_line(bld_base->pc));
928 }
929
930 static void emit_immediate(struct lp_build_tgsi_context *bld_base,
931 const struct tgsi_full_immediate *imm)
932 {
933 unsigned i;
934 struct si_shader_context *ctx = si_shader_context(bld_base);
935
936 for (i = 0; i < 4; ++i) {
937 ctx->imms[ctx->imms_num * TGSI_NUM_CHANNELS + i] =
938 LLVMConstInt(ctx->i32, imm->u[i].Uint, false );
939 }
940
941 ctx->imms_num++;
942 }
943
944 void si_llvm_context_init(struct si_shader_context *ctx,
945 struct si_screen *sscreen,
946 struct ac_llvm_compiler *compiler)
947 {
948 struct lp_type type;
949
950 /* Initialize the gallivm object:
951 * We are only using the module, context, and builder fields of this struct.
952 * This should be enough for us to be able to pass our gallivm struct to the
953 * helper functions in the gallivm module.
954 */
955 memset(ctx, 0, sizeof(*ctx));
956 ctx->screen = sscreen;
957 ctx->compiler = compiler;
958
959 ctx->ac.context = LLVMContextCreate();
960 ac_llvm_context_init(&ctx->ac, ctx->ac.context,
961 sscreen->info.chip_class, sscreen->info.family);
962
963 ctx->ac.module = ac_create_module(compiler->tm, ctx->ac.context);
964
965 enum ac_float_mode float_mode =
966 sscreen->debug_flags & DBG(UNSAFE_MATH) ?
967 AC_FLOAT_MODE_UNSAFE_FP_MATH :
968 AC_FLOAT_MODE_NO_SIGNED_ZEROS_FP_MATH;
969 ctx->ac.builder = ac_create_builder(ctx->ac.context, float_mode);
970
971 ctx->gallivm.context = ctx->ac.context;
972 ctx->gallivm.module = ctx->ac.module;
973 ctx->gallivm.builder = ctx->ac.builder;
974
975 struct lp_build_tgsi_context *bld_base = &ctx->bld_base;
976
977 type.floating = true;
978 type.fixed = false;
979 type.sign = true;
980 type.norm = false;
981 type.width = 32;
982 type.length = 1;
983
984 lp_build_context_init(&bld_base->base, &ctx->gallivm, type);
985 lp_build_context_init(&ctx->bld_base.uint_bld, &ctx->gallivm, lp_uint_type(type));
986 lp_build_context_init(&ctx->bld_base.int_bld, &ctx->gallivm, lp_int_type(type));
987 type.width *= 2;
988 lp_build_context_init(&ctx->bld_base.dbl_bld, &ctx->gallivm, type);
989 lp_build_context_init(&ctx->bld_base.uint64_bld, &ctx->gallivm, lp_uint_type(type));
990 lp_build_context_init(&ctx->bld_base.int64_bld, &ctx->gallivm, lp_int_type(type));
991
992 bld_base->soa = 1;
993 bld_base->emit_swizzle = emit_swizzle;
994 bld_base->emit_declaration = emit_declaration;
995 bld_base->emit_immediate = emit_immediate;
996
997 bld_base->op_actions[TGSI_OPCODE_BGNLOOP].emit = bgnloop_emit;
998 bld_base->op_actions[TGSI_OPCODE_BRK].emit = brk_emit;
999 bld_base->op_actions[TGSI_OPCODE_CONT].emit = cont_emit;
1000 bld_base->op_actions[TGSI_OPCODE_IF].emit = if_emit;
1001 bld_base->op_actions[TGSI_OPCODE_UIF].emit = uif_emit;
1002 bld_base->op_actions[TGSI_OPCODE_ELSE].emit = else_emit;
1003 bld_base->op_actions[TGSI_OPCODE_ENDIF].emit = endif_emit;
1004 bld_base->op_actions[TGSI_OPCODE_ENDLOOP].emit = endloop_emit;
1005
1006 si_shader_context_init_alu(&ctx->bld_base);
1007 si_shader_context_init_mem(ctx);
1008
1009 ctx->voidt = LLVMVoidTypeInContext(ctx->ac.context);
1010 ctx->i1 = LLVMInt1TypeInContext(ctx->ac.context);
1011 ctx->i8 = LLVMInt8TypeInContext(ctx->ac.context);
1012 ctx->i32 = LLVMInt32TypeInContext(ctx->ac.context);
1013 ctx->i64 = LLVMInt64TypeInContext(ctx->ac.context);
1014 ctx->i128 = LLVMIntTypeInContext(ctx->ac.context, 128);
1015 ctx->f32 = LLVMFloatTypeInContext(ctx->ac.context);
1016 ctx->v2i32 = LLVMVectorType(ctx->i32, 2);
1017 ctx->v4i32 = LLVMVectorType(ctx->i32, 4);
1018 ctx->v4f32 = LLVMVectorType(ctx->f32, 4);
1019 ctx->v8i32 = LLVMVectorType(ctx->i32, 8);
1020
1021 ctx->i32_0 = LLVMConstInt(ctx->i32, 0, 0);
1022 ctx->i32_1 = LLVMConstInt(ctx->i32, 1, 0);
1023 }
1024
1025 /* Set the context to a certain TGSI shader. Can be called repeatedly
1026 * to change the shader. */
1027 void si_llvm_context_set_tgsi(struct si_shader_context *ctx,
1028 struct si_shader *shader)
1029 {
1030 const struct tgsi_shader_info *info = NULL;
1031 const struct tgsi_token *tokens = NULL;
1032
1033 if (shader && shader->selector) {
1034 info = &shader->selector->info;
1035 tokens = shader->selector->tokens;
1036 }
1037
1038 ctx->shader = shader;
1039 ctx->type = info ? info->processor : -1;
1040 ctx->bld_base.info = info;
1041
1042 /* Clean up the old contents. */
1043 FREE(ctx->temp_arrays);
1044 ctx->temp_arrays = NULL;
1045 FREE(ctx->temp_array_allocas);
1046 ctx->temp_array_allocas = NULL;
1047
1048 FREE(ctx->imms);
1049 ctx->imms = NULL;
1050 ctx->imms_num = 0;
1051
1052 FREE(ctx->temps);
1053 ctx->temps = NULL;
1054 ctx->temps_count = 0;
1055
1056 if (!info)
1057 return;
1058
1059 ctx->num_const_buffers = util_last_bit(info->const_buffers_declared);
1060 ctx->num_shader_buffers = util_last_bit(info->shader_buffers_declared);
1061
1062 ctx->num_samplers = util_last_bit(info->samplers_declared);
1063 ctx->num_images = util_last_bit(info->images_declared);
1064
1065 if (!tokens)
1066 return;
1067
1068 if (info->array_max[TGSI_FILE_TEMPORARY] > 0) {
1069 int size = info->array_max[TGSI_FILE_TEMPORARY];
1070
1071 ctx->temp_arrays = CALLOC(size, sizeof(ctx->temp_arrays[0]));
1072 ctx->temp_array_allocas = CALLOC(size, sizeof(ctx->temp_array_allocas[0]));
1073
1074 tgsi_scan_arrays(tokens, TGSI_FILE_TEMPORARY, size,
1075 ctx->temp_arrays);
1076 }
1077 if (info->file_max[TGSI_FILE_IMMEDIATE] >= 0) {
1078 int size = info->file_max[TGSI_FILE_IMMEDIATE] + 1;
1079 ctx->imms = MALLOC(size * TGSI_NUM_CHANNELS * sizeof(LLVMValueRef));
1080 }
1081
1082 /* Re-set these to start with a clean slate. */
1083 ctx->bld_base.num_instructions = 0;
1084 ctx->bld_base.pc = 0;
1085 memset(ctx->outputs, 0, sizeof(ctx->outputs));
1086
1087 ctx->bld_base.emit_store = si_llvm_emit_store;
1088 ctx->bld_base.emit_fetch_funcs[TGSI_FILE_IMMEDIATE] = si_llvm_emit_fetch;
1089 ctx->bld_base.emit_fetch_funcs[TGSI_FILE_INPUT] = si_llvm_emit_fetch;
1090 ctx->bld_base.emit_fetch_funcs[TGSI_FILE_TEMPORARY] = si_llvm_emit_fetch;
1091 ctx->bld_base.emit_fetch_funcs[TGSI_FILE_OUTPUT] = si_llvm_emit_fetch;
1092 ctx->bld_base.emit_fetch_funcs[TGSI_FILE_SYSTEM_VALUE] = fetch_system_value;
1093 }
1094
1095 void si_llvm_create_func(struct si_shader_context *ctx,
1096 const char *name,
1097 LLVMTypeRef *return_types, unsigned num_return_elems,
1098 LLVMTypeRef *ParamTypes, unsigned ParamCount)
1099 {
1100 LLVMTypeRef main_fn_type, ret_type;
1101 LLVMBasicBlockRef main_fn_body;
1102 enum si_llvm_calling_convention call_conv;
1103 unsigned real_shader_type;
1104
1105 if (num_return_elems)
1106 ret_type = LLVMStructTypeInContext(ctx->ac.context,
1107 return_types,
1108 num_return_elems, true);
1109 else
1110 ret_type = ctx->voidt;
1111
1112 /* Setup the function */
1113 ctx->return_type = ret_type;
1114 main_fn_type = LLVMFunctionType(ret_type, ParamTypes, ParamCount, 0);
1115 ctx->main_fn = LLVMAddFunction(ctx->gallivm.module, name, main_fn_type);
1116 main_fn_body = LLVMAppendBasicBlockInContext(ctx->ac.context,
1117 ctx->main_fn, "main_body");
1118 LLVMPositionBuilderAtEnd(ctx->ac.builder, main_fn_body);
1119
1120 real_shader_type = ctx->type;
1121
1122 /* LS is merged into HS (TCS), and ES is merged into GS. */
1123 if (ctx->screen->info.chip_class >= GFX9) {
1124 if (ctx->shader->key.as_ls)
1125 real_shader_type = PIPE_SHADER_TESS_CTRL;
1126 else if (ctx->shader->key.as_es)
1127 real_shader_type = PIPE_SHADER_GEOMETRY;
1128 }
1129
1130 switch (real_shader_type) {
1131 case PIPE_SHADER_VERTEX:
1132 case PIPE_SHADER_TESS_EVAL:
1133 call_conv = RADEON_LLVM_AMDGPU_VS;
1134 break;
1135 case PIPE_SHADER_TESS_CTRL:
1136 call_conv = RADEON_LLVM_AMDGPU_HS;
1137 break;
1138 case PIPE_SHADER_GEOMETRY:
1139 call_conv = RADEON_LLVM_AMDGPU_GS;
1140 break;
1141 case PIPE_SHADER_FRAGMENT:
1142 call_conv = RADEON_LLVM_AMDGPU_PS;
1143 break;
1144 case PIPE_SHADER_COMPUTE:
1145 call_conv = RADEON_LLVM_AMDGPU_CS;
1146 break;
1147 default:
1148 unreachable("Unhandle shader type");
1149 }
1150
1151 LLVMSetFunctionCallConv(ctx->main_fn, call_conv);
1152 }
1153
1154 void si_llvm_optimize_module(struct si_shader_context *ctx)
1155 {
1156 /* Dump LLVM IR before any optimization passes */
1157 if (ctx->screen->debug_flags & DBG(PREOPT_IR) &&
1158 si_can_dump_shader(ctx->screen, ctx->type))
1159 LLVMDumpModule(ctx->gallivm.module);
1160
1161 /* Run the pass */
1162 LLVMRunPassManager(ctx->compiler->passmgr, ctx->gallivm.module);
1163 LLVMDisposeBuilder(ctx->ac.builder);
1164 }
1165
1166 void si_llvm_dispose(struct si_shader_context *ctx)
1167 {
1168 LLVMDisposeModule(ctx->gallivm.module);
1169 LLVMContextDispose(ctx->gallivm.context);
1170 FREE(ctx->temp_arrays);
1171 ctx->temp_arrays = NULL;
1172 FREE(ctx->temp_array_allocas);
1173 ctx->temp_array_allocas = NULL;
1174 FREE(ctx->temps);
1175 ctx->temps = NULL;
1176 ctx->temps_count = 0;
1177 FREE(ctx->imms);
1178 ctx->imms = NULL;
1179 ctx->imms_num = 0;
1180 ac_llvm_context_dispose(&ctx->ac);
1181 }