gallivm: allow to pass two swizzles into fetches.
[mesa.git] / src / gallium / drivers / radeonsi / si_shader_tgsi_setup.c
1 /*
2 * Copyright 2016 Advanced Micro Devices, Inc.
3 * All Rights Reserved.
4 *
5 * Permission is hereby granted, free of charge, to any person obtaining a
6 * copy of this software and associated documentation files (the "Software"),
7 * to deal in the Software without restriction, including without limitation
8 * on the rights to use, copy, modify, merge, publish, distribute, sub
9 * license, and/or sell copies of the Software, and to permit persons to whom
10 * the Software is furnished to do so, subject to the following conditions:
11 *
12 * The above copyright notice and this permission notice (including the next
13 * paragraph) shall be included in all copies or substantial portions of the
14 * Software.
15 *
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
19 * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
20 * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
21 * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
22 * USE OR OTHER DEALINGS IN THE SOFTWARE.
23 */
24
25 #include "si_shader_internal.h"
26 #include "si_pipe.h"
27 #include "ac_llvm_util.h"
28 #include "util/u_memory.h"
29
30 enum si_llvm_calling_convention {
31 RADEON_LLVM_AMDGPU_VS = 87,
32 RADEON_LLVM_AMDGPU_GS = 88,
33 RADEON_LLVM_AMDGPU_PS = 89,
34 RADEON_LLVM_AMDGPU_CS = 90,
35 RADEON_LLVM_AMDGPU_HS = 93,
36 };
37
38 struct si_llvm_diagnostics {
39 struct pipe_debug_callback *debug;
40 unsigned retval;
41 };
42
43 static void si_diagnostic_handler(LLVMDiagnosticInfoRef di, void *context)
44 {
45 struct si_llvm_diagnostics *diag = (struct si_llvm_diagnostics *)context;
46 LLVMDiagnosticSeverity severity = LLVMGetDiagInfoSeverity(di);
47 char *description = LLVMGetDiagInfoDescription(di);
48 const char *severity_str = NULL;
49
50 switch (severity) {
51 case LLVMDSError:
52 severity_str = "error";
53 break;
54 case LLVMDSWarning:
55 severity_str = "warning";
56 break;
57 case LLVMDSRemark:
58 severity_str = "remark";
59 break;
60 case LLVMDSNote:
61 severity_str = "note";
62 break;
63 default:
64 severity_str = "unknown";
65 }
66
67 pipe_debug_message(diag->debug, SHADER_INFO,
68 "LLVM diagnostic (%s): %s", severity_str, description);
69
70 if (severity == LLVMDSError) {
71 diag->retval = 1;
72 fprintf(stderr,"LLVM triggered Diagnostic Handler: %s\n", description);
73 }
74
75 LLVMDisposeMessage(description);
76 }
77
78 /**
79 * Compile an LLVM module to machine code.
80 *
81 * @returns 0 for success, 1 for failure
82 */
83 unsigned si_llvm_compile(LLVMModuleRef M, struct ac_shader_binary *binary,
84 struct ac_llvm_compiler *compiler,
85 struct pipe_debug_callback *debug,
86 bool less_optimized)
87 {
88 struct ac_compiler_passes *passes =
89 less_optimized && compiler->low_opt_passes ?
90 compiler->low_opt_passes : compiler->passes;
91 struct si_llvm_diagnostics diag;
92 LLVMContextRef llvm_ctx;
93
94 diag.debug = debug;
95 diag.retval = 0;
96
97 /* Setup Diagnostic Handler*/
98 llvm_ctx = LLVMGetModuleContext(M);
99
100 LLVMContextSetDiagnosticHandler(llvm_ctx, si_diagnostic_handler, &diag);
101
102 /* Compile IR. */
103 if (!ac_compile_module_to_binary(passes, M, binary))
104 diag.retval = 1;
105
106 if (diag.retval != 0)
107 pipe_debug_message(debug, SHADER_INFO, "LLVM compile failed");
108 return diag.retval;
109 }
110
111 LLVMTypeRef tgsi2llvmtype(struct lp_build_tgsi_context *bld_base,
112 enum tgsi_opcode_type type)
113 {
114 struct si_shader_context *ctx = si_shader_context(bld_base);
115
116 switch (type) {
117 case TGSI_TYPE_UNSIGNED:
118 case TGSI_TYPE_SIGNED:
119 return ctx->ac.i32;
120 case TGSI_TYPE_UNSIGNED64:
121 case TGSI_TYPE_SIGNED64:
122 return ctx->ac.i64;
123 case TGSI_TYPE_DOUBLE:
124 return ctx->ac.f64;
125 case TGSI_TYPE_UNTYPED:
126 case TGSI_TYPE_FLOAT:
127 return ctx->ac.f32;
128 default: break;
129 }
130 return 0;
131 }
132
133 LLVMValueRef bitcast(struct lp_build_tgsi_context *bld_base,
134 enum tgsi_opcode_type type, LLVMValueRef value)
135 {
136 struct si_shader_context *ctx = si_shader_context(bld_base);
137 LLVMTypeRef dst_type = tgsi2llvmtype(bld_base, type);
138
139 if (dst_type)
140 return LLVMBuildBitCast(ctx->ac.builder, value, dst_type, "");
141 else
142 return value;
143 }
144
145 /**
146 * Return a value that is equal to the given i32 \p index if it lies in [0,num)
147 * or an undefined value in the same interval otherwise.
148 */
149 LLVMValueRef si_llvm_bound_index(struct si_shader_context *ctx,
150 LLVMValueRef index,
151 unsigned num)
152 {
153 LLVMBuilderRef builder = ctx->ac.builder;
154 LLVMValueRef c_max = LLVMConstInt(ctx->i32, num - 1, 0);
155 LLVMValueRef cc;
156
157 if (util_is_power_of_two_or_zero(num)) {
158 index = LLVMBuildAnd(builder, index, c_max, "");
159 } else {
160 /* In theory, this MAX pattern should result in code that is
161 * as good as the bit-wise AND above.
162 *
163 * In practice, LLVM generates worse code (at the time of
164 * writing), because its value tracking is not strong enough.
165 */
166 cc = LLVMBuildICmp(builder, LLVMIntULE, index, c_max, "");
167 index = LLVMBuildSelect(builder, cc, index, c_max, "");
168 }
169
170 return index;
171 }
172
173 static LLVMValueRef emit_swizzle(struct lp_build_tgsi_context *bld_base,
174 LLVMValueRef value,
175 unsigned swizzle_x,
176 unsigned swizzle_y,
177 unsigned swizzle_z,
178 unsigned swizzle_w)
179 {
180 struct si_shader_context *ctx = si_shader_context(bld_base);
181 LLVMValueRef swizzles[4];
182
183 swizzles[0] = LLVMConstInt(ctx->i32, swizzle_x, 0);
184 swizzles[1] = LLVMConstInt(ctx->i32, swizzle_y, 0);
185 swizzles[2] = LLVMConstInt(ctx->i32, swizzle_z, 0);
186 swizzles[3] = LLVMConstInt(ctx->i32, swizzle_w, 0);
187
188 return LLVMBuildShuffleVector(ctx->ac.builder,
189 value,
190 LLVMGetUndef(LLVMTypeOf(value)),
191 LLVMConstVector(swizzles, 4), "");
192 }
193
194 /**
195 * Return the description of the array covering the given temporary register
196 * index.
197 */
198 static unsigned
199 get_temp_array_id(struct lp_build_tgsi_context *bld_base,
200 unsigned reg_index,
201 const struct tgsi_ind_register *reg)
202 {
203 struct si_shader_context *ctx = si_shader_context(bld_base);
204 unsigned num_arrays = ctx->bld_base.info->array_max[TGSI_FILE_TEMPORARY];
205 unsigned i;
206
207 if (reg && reg->ArrayID > 0 && reg->ArrayID <= num_arrays)
208 return reg->ArrayID;
209
210 for (i = 0; i < num_arrays; i++) {
211 const struct tgsi_array_info *array = &ctx->temp_arrays[i];
212
213 if (reg_index >= array->range.First && reg_index <= array->range.Last)
214 return i + 1;
215 }
216
217 return 0;
218 }
219
220 static struct tgsi_declaration_range
221 get_array_range(struct lp_build_tgsi_context *bld_base,
222 unsigned File, unsigned reg_index,
223 const struct tgsi_ind_register *reg)
224 {
225 struct si_shader_context *ctx = si_shader_context(bld_base);
226 struct tgsi_declaration_range range;
227
228 if (File == TGSI_FILE_TEMPORARY) {
229 unsigned array_id = get_temp_array_id(bld_base, reg_index, reg);
230 if (array_id)
231 return ctx->temp_arrays[array_id - 1].range;
232 }
233
234 range.First = 0;
235 range.Last = bld_base->info->file_max[File];
236 return range;
237 }
238
239 /**
240 * For indirect registers, construct a pointer directly to the requested
241 * element using getelementptr if possible.
242 *
243 * Returns NULL if the insertelement/extractelement fallback for array access
244 * must be used.
245 */
246 static LLVMValueRef
247 get_pointer_into_array(struct si_shader_context *ctx,
248 unsigned file,
249 unsigned swizzle,
250 unsigned reg_index,
251 const struct tgsi_ind_register *reg_indirect)
252 {
253 unsigned array_id;
254 struct tgsi_array_info *array;
255 LLVMValueRef idxs[2];
256 LLVMValueRef index;
257 LLVMValueRef alloca;
258
259 if (file != TGSI_FILE_TEMPORARY)
260 return NULL;
261
262 array_id = get_temp_array_id(&ctx->bld_base, reg_index, reg_indirect);
263 if (!array_id)
264 return NULL;
265
266 alloca = ctx->temp_array_allocas[array_id - 1];
267 if (!alloca)
268 return NULL;
269
270 array = &ctx->temp_arrays[array_id - 1];
271
272 if (!(array->writemask & (1 << swizzle)))
273 return ctx->undef_alloca;
274
275 index = si_get_indirect_index(ctx, reg_indirect, 1,
276 reg_index - ctx->temp_arrays[array_id - 1].range.First);
277
278 /* Ensure that the index is within a valid range, to guard against
279 * VM faults and overwriting critical data (e.g. spilled resource
280 * descriptors).
281 *
282 * TODO It should be possible to avoid the additional instructions
283 * if LLVM is changed so that it guarantuees:
284 * 1. the scratch space descriptor isolates the current wave (this
285 * could even save the scratch offset SGPR at the cost of an
286 * additional SALU instruction)
287 * 2. the memory for allocas must be allocated at the _end_ of the
288 * scratch space (after spilled registers)
289 */
290 index = si_llvm_bound_index(ctx, index, array->range.Last - array->range.First + 1);
291
292 index = ac_build_imad(&ctx->ac, index,
293 LLVMConstInt(ctx->i32, util_bitcount(array->writemask), 0),
294 LLVMConstInt(ctx->i32,
295 util_bitcount(array->writemask & ((1 << swizzle) - 1)), 0));
296 idxs[0] = ctx->i32_0;
297 idxs[1] = index;
298 return LLVMBuildGEP(ctx->ac.builder, alloca, idxs, 2, "");
299 }
300
301 LLVMValueRef
302 si_llvm_emit_fetch_64bit(struct lp_build_tgsi_context *bld_base,
303 LLVMTypeRef type,
304 LLVMValueRef ptr,
305 LLVMValueRef ptr2)
306 {
307 struct si_shader_context *ctx = si_shader_context(bld_base);
308 LLVMValueRef values[2] = {
309 ac_to_integer(&ctx->ac, ptr),
310 ac_to_integer(&ctx->ac, ptr2),
311 };
312 LLVMValueRef result = ac_build_gather_values(&ctx->ac, values, 2);
313 return LLVMBuildBitCast(ctx->ac.builder, result, type, "");
314 }
315
316 static LLVMValueRef
317 emit_array_fetch(struct lp_build_tgsi_context *bld_base,
318 unsigned File, enum tgsi_opcode_type type,
319 struct tgsi_declaration_range range,
320 unsigned swizzle)
321 {
322 struct si_shader_context *ctx = si_shader_context(bld_base);
323 unsigned i, size = range.Last - range.First + 1;
324 LLVMTypeRef vec = LLVMVectorType(tgsi2llvmtype(bld_base, type), size);
325 LLVMValueRef result = LLVMGetUndef(vec);
326
327 struct tgsi_full_src_register tmp_reg = {};
328 tmp_reg.Register.File = File;
329
330 for (i = 0; i < size; ++i) {
331 tmp_reg.Register.Index = i + range.First;
332 LLVMValueRef temp = si_llvm_emit_fetch(bld_base, &tmp_reg, type, swizzle);
333 result = LLVMBuildInsertElement(ctx->ac.builder, result, temp,
334 LLVMConstInt(ctx->i32, i, 0), "array_vector");
335 }
336 return result;
337 }
338
339 static LLVMValueRef
340 load_value_from_array(struct lp_build_tgsi_context *bld_base,
341 unsigned file,
342 enum tgsi_opcode_type type,
343 unsigned swizzle,
344 unsigned reg_index,
345 const struct tgsi_ind_register *reg_indirect)
346 {
347 struct si_shader_context *ctx = si_shader_context(bld_base);
348 LLVMBuilderRef builder = ctx->ac.builder;
349 LLVMValueRef ptr;
350
351 ptr = get_pointer_into_array(ctx, file, swizzle, reg_index, reg_indirect);
352 if (ptr) {
353 LLVMValueRef val = LLVMBuildLoad(builder, ptr, "");
354 if (tgsi_type_is_64bit(type)) {
355 LLVMValueRef ptr_hi, val_hi;
356 ptr_hi = LLVMBuildGEP(builder, ptr, &ctx->i32_1, 1, "");
357 val_hi = LLVMBuildLoad(builder, ptr_hi, "");
358 val = si_llvm_emit_fetch_64bit(bld_base, tgsi2llvmtype(bld_base, type),
359 val, val_hi);
360 }
361
362 return val;
363 } else {
364 struct tgsi_declaration_range range =
365 get_array_range(bld_base, file, reg_index, reg_indirect);
366 LLVMValueRef index =
367 si_get_indirect_index(ctx, reg_indirect, 1, reg_index - range.First);
368 LLVMValueRef array =
369 emit_array_fetch(bld_base, file, type, range, swizzle);
370 return LLVMBuildExtractElement(builder, array, index, "");
371 }
372 }
373
374 static void
375 store_value_to_array(struct lp_build_tgsi_context *bld_base,
376 LLVMValueRef value,
377 unsigned file,
378 unsigned chan_index,
379 unsigned reg_index,
380 const struct tgsi_ind_register *reg_indirect)
381 {
382 struct si_shader_context *ctx = si_shader_context(bld_base);
383 LLVMBuilderRef builder = ctx->ac.builder;
384 LLVMValueRef ptr;
385
386 ptr = get_pointer_into_array(ctx, file, chan_index, reg_index, reg_indirect);
387 if (ptr) {
388 LLVMBuildStore(builder, value, ptr);
389 } else {
390 unsigned i, size;
391 struct tgsi_declaration_range range = get_array_range(bld_base, file, reg_index, reg_indirect);
392 LLVMValueRef index = si_get_indirect_index(ctx, reg_indirect, 1, reg_index - range.First);
393 LLVMValueRef array =
394 emit_array_fetch(bld_base, file, TGSI_TYPE_FLOAT, range, chan_index);
395 LLVMValueRef temp_ptr;
396
397 array = LLVMBuildInsertElement(builder, array, value, index, "");
398
399 size = range.Last - range.First + 1;
400 for (i = 0; i < size; ++i) {
401 switch(file) {
402 case TGSI_FILE_OUTPUT:
403 temp_ptr = ctx->outputs[i + range.First][chan_index];
404 break;
405
406 case TGSI_FILE_TEMPORARY:
407 if (range.First + i >= ctx->temps_count)
408 continue;
409 temp_ptr = ctx->temps[(i + range.First) * TGSI_NUM_CHANNELS + chan_index];
410 break;
411
412 default:
413 continue;
414 }
415 value = LLVMBuildExtractElement(builder, array,
416 LLVMConstInt(ctx->i32, i, 0), "");
417 LLVMBuildStore(builder, value, temp_ptr);
418 }
419 }
420 }
421
422 /* If this is true, preload FS inputs at the beginning of shaders. Otherwise,
423 * reload them at each use. This must be true if the shader is using
424 * derivatives and KILL, because KILL can leave the WQM and then a lazy
425 * input load isn't in the WQM anymore.
426 */
427 static bool si_preload_fs_inputs(struct si_shader_context *ctx)
428 {
429 struct si_shader_selector *sel = ctx->shader->selector;
430
431 return sel->info.uses_derivatives &&
432 sel->info.uses_kill;
433 }
434
435 static LLVMValueRef
436 get_output_ptr(struct lp_build_tgsi_context *bld_base, unsigned index,
437 unsigned chan)
438 {
439 struct si_shader_context *ctx = si_shader_context(bld_base);
440
441 assert(index <= ctx->bld_base.info->file_max[TGSI_FILE_OUTPUT]);
442 return ctx->outputs[index][chan];
443 }
444
445 LLVMValueRef si_llvm_emit_fetch(struct lp_build_tgsi_context *bld_base,
446 const struct tgsi_full_src_register *reg,
447 enum tgsi_opcode_type type,
448 unsigned swizzle_in)
449 {
450 struct si_shader_context *ctx = si_shader_context(bld_base);
451 LLVMBuilderRef builder = ctx->ac.builder;
452 LLVMValueRef result = NULL, ptr, ptr2;
453 unsigned swizzle = swizzle_in & 0xffff;
454
455 if (swizzle_in == ~0) {
456 LLVMValueRef values[TGSI_NUM_CHANNELS];
457 unsigned chan;
458 for (chan = 0; chan < TGSI_NUM_CHANNELS; chan++) {
459 values[chan] = si_llvm_emit_fetch(bld_base, reg, type, chan);
460 }
461 return ac_build_gather_values(&ctx->ac, values,
462 TGSI_NUM_CHANNELS);
463 }
464
465 if (reg->Register.Indirect) {
466 LLVMValueRef load = load_value_from_array(bld_base, reg->Register.File, type,
467 swizzle, reg->Register.Index, &reg->Indirect);
468 return bitcast(bld_base, type, load);
469 }
470
471 switch(reg->Register.File) {
472 case TGSI_FILE_IMMEDIATE: {
473 LLVMTypeRef ctype = tgsi2llvmtype(bld_base, type);
474 if (tgsi_type_is_64bit(type)) {
475 result = LLVMGetUndef(LLVMVectorType(ctx->i32, 2));
476 result = LLVMConstInsertElement(result,
477 ctx->imms[reg->Register.Index * TGSI_NUM_CHANNELS + swizzle],
478 ctx->i32_0);
479 result = LLVMConstInsertElement(result,
480 ctx->imms[reg->Register.Index * TGSI_NUM_CHANNELS + (swizzle_in >> 16)],
481 ctx->i32_1);
482 return LLVMConstBitCast(result, ctype);
483 } else {
484 return LLVMConstBitCast(ctx->imms[reg->Register.Index * TGSI_NUM_CHANNELS + swizzle], ctype);
485 }
486 }
487
488 case TGSI_FILE_INPUT: {
489 unsigned index = reg->Register.Index;
490 LLVMValueRef input[4];
491
492 /* I don't think doing this for vertex shaders is beneficial.
493 * For those, we want to make sure the VMEM loads are executed
494 * only once. Fragment shaders don't care much, because
495 * v_interp instructions are much cheaper than VMEM loads.
496 */
497 if (!si_preload_fs_inputs(ctx) &&
498 ctx->bld_base.info->processor == PIPE_SHADER_FRAGMENT)
499 ctx->load_input(ctx, index, &ctx->input_decls[index], input);
500 else
501 memcpy(input, &ctx->inputs[index * 4], sizeof(input));
502
503 result = input[swizzle];
504
505 if (tgsi_type_is_64bit(type)) {
506 ptr = result;
507 ptr2 = input[swizzle_in >> 16];
508 return si_llvm_emit_fetch_64bit(bld_base, tgsi2llvmtype(bld_base, type),
509 ptr, ptr2);
510 }
511 break;
512 }
513
514 case TGSI_FILE_TEMPORARY:
515 if (reg->Register.Index >= ctx->temps_count)
516 return LLVMGetUndef(tgsi2llvmtype(bld_base, type));
517 ptr = ctx->temps[reg->Register.Index * TGSI_NUM_CHANNELS + swizzle];
518 if (tgsi_type_is_64bit(type)) {
519 ptr2 = ctx->temps[reg->Register.Index * TGSI_NUM_CHANNELS + (swizzle_in >> 16)];
520 return si_llvm_emit_fetch_64bit(bld_base, tgsi2llvmtype(bld_base, type),
521 LLVMBuildLoad(builder, ptr, ""),
522 LLVMBuildLoad(builder, ptr2, ""));
523 }
524 result = LLVMBuildLoad(builder, ptr, "");
525 break;
526
527 case TGSI_FILE_OUTPUT:
528 ptr = get_output_ptr(bld_base, reg->Register.Index, swizzle);
529 if (tgsi_type_is_64bit(type)) {
530 ptr2 = get_output_ptr(bld_base, reg->Register.Index, (swizzle_in >> 16));
531 return si_llvm_emit_fetch_64bit(bld_base, tgsi2llvmtype(bld_base, type),
532 LLVMBuildLoad(builder, ptr, ""),
533 LLVMBuildLoad(builder, ptr2, ""));
534 }
535 result = LLVMBuildLoad(builder, ptr, "");
536 break;
537
538 default:
539 return LLVMGetUndef(tgsi2llvmtype(bld_base, type));
540 }
541
542 return bitcast(bld_base, type, result);
543 }
544
545 static LLVMValueRef fetch_system_value(struct lp_build_tgsi_context *bld_base,
546 const struct tgsi_full_src_register *reg,
547 enum tgsi_opcode_type type,
548 unsigned swizzle_in)
549 {
550 struct si_shader_context *ctx = si_shader_context(bld_base);
551 LLVMBuilderRef builder = ctx->ac.builder;
552 LLVMValueRef cval = ctx->system_values[reg->Register.Index];
553 unsigned swizzle = swizzle_in & 0xffff;
554
555 if (tgsi_type_is_64bit(type)) {
556 LLVMValueRef lo, hi;
557
558 assert(swizzle == 0 || swizzle == 2);
559
560 lo = LLVMBuildExtractElement(
561 builder, cval, LLVMConstInt(ctx->i32, swizzle, 0), "");
562 hi = LLVMBuildExtractElement(
563 builder, cval, LLVMConstInt(ctx->i32, (swizzle_in >> 16), 0), "");
564
565 return si_llvm_emit_fetch_64bit(bld_base, tgsi2llvmtype(bld_base, type),
566 lo, hi);
567 }
568
569 if (LLVMGetTypeKind(LLVMTypeOf(cval)) == LLVMVectorTypeKind) {
570 cval = LLVMBuildExtractElement(
571 builder, cval, LLVMConstInt(ctx->i32, swizzle, 0), "");
572 } else {
573 assert(swizzle == 0);
574 }
575
576 return bitcast(bld_base, type, cval);
577 }
578
579 static void emit_declaration(struct lp_build_tgsi_context *bld_base,
580 const struct tgsi_full_declaration *decl)
581 {
582 struct si_shader_context *ctx = si_shader_context(bld_base);
583 LLVMBuilderRef builder = ctx->ac.builder;
584 unsigned first, last, i;
585 switch(decl->Declaration.File) {
586 case TGSI_FILE_ADDRESS:
587 {
588 unsigned idx;
589 for (idx = decl->Range.First; idx <= decl->Range.Last; idx++) {
590 unsigned chan;
591 for (chan = 0; chan < TGSI_NUM_CHANNELS; chan++) {
592 ctx->addrs[idx][chan] = ac_build_alloca_undef(
593 &ctx->ac, ctx->i32, "");
594 }
595 }
596 break;
597 }
598
599 case TGSI_FILE_TEMPORARY:
600 {
601 char name[18] = "";
602 LLVMValueRef array_alloca = NULL;
603 unsigned decl_size;
604 unsigned writemask = decl->Declaration.UsageMask;
605 first = decl->Range.First;
606 last = decl->Range.Last;
607 decl_size = 4 * ((last - first) + 1);
608
609 if (decl->Declaration.Array) {
610 unsigned id = decl->Array.ArrayID - 1;
611 unsigned array_size;
612
613 writemask &= ctx->temp_arrays[id].writemask;
614 ctx->temp_arrays[id].writemask = writemask;
615 array_size = ((last - first) + 1) * util_bitcount(writemask);
616
617 /* If the array has more than 16 elements, store it
618 * in memory using an alloca that spans the entire
619 * array.
620 *
621 * Otherwise, store each array element individually.
622 * We will then generate vectors (per-channel, up to
623 * <16 x float> if the usagemask is a single bit) for
624 * indirect addressing.
625 *
626 * Note that 16 is the number of vector elements that
627 * LLVM will store in a register, so theoretically an
628 * array with up to 4 * 16 = 64 elements could be
629 * handled this way, but whether that's a good idea
630 * depends on VGPR register pressure elsewhere.
631 *
632 * FIXME: We shouldn't need to have the non-alloca
633 * code path for arrays. LLVM should be smart enough to
634 * promote allocas into registers when profitable.
635 */
636 if (array_size > 16 ||
637 !ctx->screen->llvm_has_working_vgpr_indexing) {
638 array_alloca = ac_build_alloca_undef(&ctx->ac,
639 LLVMArrayType(ctx->f32,
640 array_size), "array");
641 ctx->temp_array_allocas[id] = array_alloca;
642 }
643 }
644
645 if (!ctx->temps_count) {
646 ctx->temps_count = bld_base->info->file_max[TGSI_FILE_TEMPORARY] + 1;
647 ctx->temps = MALLOC(TGSI_NUM_CHANNELS * ctx->temps_count * sizeof(LLVMValueRef));
648 }
649 if (!array_alloca) {
650 for (i = 0; i < decl_size; ++i) {
651 #ifdef DEBUG
652 snprintf(name, sizeof(name), "TEMP%d.%c",
653 first + i / 4, "xyzw"[i % 4]);
654 #endif
655 ctx->temps[first * TGSI_NUM_CHANNELS + i] =
656 ac_build_alloca_undef(&ctx->ac,
657 ctx->f32,
658 name);
659 }
660 } else {
661 LLVMValueRef idxs[2] = {
662 ctx->i32_0,
663 NULL
664 };
665 unsigned j = 0;
666
667 if (writemask != TGSI_WRITEMASK_XYZW &&
668 !ctx->undef_alloca) {
669 /* Create a dummy alloca. We use it so that we
670 * have a pointer that is safe to load from if
671 * a shader ever reads from a channel that
672 * it never writes to.
673 */
674 ctx->undef_alloca = ac_build_alloca_undef(
675 &ctx->ac, ctx->f32, "undef");
676 }
677
678 for (i = 0; i < decl_size; ++i) {
679 LLVMValueRef ptr;
680 if (writemask & (1 << (i % 4))) {
681 #ifdef DEBUG
682 snprintf(name, sizeof(name), "TEMP%d.%c",
683 first + i / 4, "xyzw"[i % 4]);
684 #endif
685 idxs[1] = LLVMConstInt(ctx->i32, j, 0);
686 ptr = LLVMBuildGEP(builder, array_alloca, idxs, 2, name);
687 j++;
688 } else {
689 ptr = ctx->undef_alloca;
690 }
691 ctx->temps[first * TGSI_NUM_CHANNELS + i] = ptr;
692 }
693 }
694 break;
695 }
696 case TGSI_FILE_INPUT:
697 {
698 unsigned idx;
699 for (idx = decl->Range.First; idx <= decl->Range.Last; idx++) {
700 if (ctx->load_input &&
701 ctx->input_decls[idx].Declaration.File != TGSI_FILE_INPUT) {
702 ctx->input_decls[idx] = *decl;
703 ctx->input_decls[idx].Range.First = idx;
704 ctx->input_decls[idx].Range.Last = idx;
705 ctx->input_decls[idx].Semantic.Index += idx - decl->Range.First;
706
707 if (si_preload_fs_inputs(ctx) ||
708 bld_base->info->processor != PIPE_SHADER_FRAGMENT)
709 ctx->load_input(ctx, idx, &ctx->input_decls[idx],
710 &ctx->inputs[idx * 4]);
711 }
712 }
713 }
714 break;
715
716 case TGSI_FILE_SYSTEM_VALUE:
717 {
718 unsigned idx;
719 for (idx = decl->Range.First; idx <= decl->Range.Last; idx++) {
720 si_load_system_value(ctx, idx, decl);
721 }
722 }
723 break;
724
725 case TGSI_FILE_OUTPUT:
726 {
727 char name[16] = "";
728 unsigned idx;
729 for (idx = decl->Range.First; idx <= decl->Range.Last; idx++) {
730 unsigned chan;
731 assert(idx < RADEON_LLVM_MAX_OUTPUTS);
732 if (ctx->outputs[idx][0])
733 continue;
734 for (chan = 0; chan < TGSI_NUM_CHANNELS; chan++) {
735 #ifdef DEBUG
736 snprintf(name, sizeof(name), "OUT%d.%c",
737 idx, "xyzw"[chan % 4]);
738 #endif
739 ctx->outputs[idx][chan] = ac_build_alloca_undef(
740 &ctx->ac, ctx->f32, name);
741 }
742 }
743 break;
744 }
745
746 case TGSI_FILE_MEMORY:
747 si_tgsi_declare_compute_memory(ctx, decl);
748 break;
749
750 default:
751 break;
752 }
753 }
754
755 void si_llvm_emit_store(struct lp_build_tgsi_context *bld_base,
756 const struct tgsi_full_instruction *inst,
757 const struct tgsi_opcode_info *info,
758 unsigned index,
759 LLVMValueRef dst[4])
760 {
761 struct si_shader_context *ctx = si_shader_context(bld_base);
762 const struct tgsi_full_dst_register *reg = &inst->Dst[index];
763 LLVMBuilderRef builder = ctx->ac.builder;
764 LLVMValueRef temp_ptr, temp_ptr2 = NULL;
765 bool is_vec_store = false;
766 enum tgsi_opcode_type dtype = tgsi_opcode_infer_dst_type(inst->Instruction.Opcode, index);
767
768 if (dst[0]) {
769 LLVMTypeKind k = LLVMGetTypeKind(LLVMTypeOf(dst[0]));
770 is_vec_store = (k == LLVMVectorTypeKind);
771 }
772
773 if (is_vec_store) {
774 LLVMValueRef values[4] = {};
775 uint32_t writemask = reg->Register.WriteMask;
776 while (writemask) {
777 unsigned chan = u_bit_scan(&writemask);
778 LLVMValueRef index = LLVMConstInt(ctx->i32, chan, 0);
779 values[chan] = LLVMBuildExtractElement(ctx->ac.builder,
780 dst[0], index, "");
781 }
782 bld_base->emit_store(bld_base, inst, info, index, values);
783 return;
784 }
785
786 uint32_t writemask = reg->Register.WriteMask;
787 while (writemask) {
788 unsigned chan_index = u_bit_scan(&writemask);
789 LLVMValueRef value = dst[chan_index];
790
791 if (tgsi_type_is_64bit(dtype) && (chan_index == 1 || chan_index == 3))
792 continue;
793 if (inst->Instruction.Saturate)
794 value = ac_build_clamp(&ctx->ac, value);
795
796 if (reg->Register.File == TGSI_FILE_ADDRESS) {
797 temp_ptr = ctx->addrs[reg->Register.Index][chan_index];
798 LLVMBuildStore(builder, value, temp_ptr);
799 continue;
800 }
801
802 if (!tgsi_type_is_64bit(dtype))
803 value = ac_to_float(&ctx->ac, value);
804
805 if (reg->Register.Indirect) {
806 unsigned file = reg->Register.File;
807 unsigned reg_index = reg->Register.Index;
808 store_value_to_array(bld_base, value, file, chan_index,
809 reg_index, &reg->Indirect);
810 } else {
811 switch(reg->Register.File) {
812 case TGSI_FILE_OUTPUT:
813 temp_ptr = ctx->outputs[reg->Register.Index][chan_index];
814 if (tgsi_type_is_64bit(dtype))
815 temp_ptr2 = ctx->outputs[reg->Register.Index][chan_index + 1];
816 break;
817
818 case TGSI_FILE_TEMPORARY:
819 {
820 if (reg->Register.Index >= ctx->temps_count)
821 continue;
822
823 temp_ptr = ctx->temps[ TGSI_NUM_CHANNELS * reg->Register.Index + chan_index];
824 if (tgsi_type_is_64bit(dtype))
825 temp_ptr2 = ctx->temps[ TGSI_NUM_CHANNELS * reg->Register.Index + chan_index + 1];
826
827 break;
828 }
829 default:
830 return;
831 }
832 if (!tgsi_type_is_64bit(dtype))
833 LLVMBuildStore(builder, value, temp_ptr);
834 else {
835 LLVMValueRef ptr = LLVMBuildBitCast(builder, value,
836 LLVMVectorType(ctx->i32, 2), "");
837 LLVMValueRef val2;
838 value = LLVMBuildExtractElement(builder, ptr,
839 ctx->i32_0, "");
840 val2 = LLVMBuildExtractElement(builder, ptr,
841 ctx->i32_1, "");
842
843 LLVMBuildStore(builder, ac_to_float(&ctx->ac, value), temp_ptr);
844 LLVMBuildStore(builder, ac_to_float(&ctx->ac, val2), temp_ptr2);
845 }
846 }
847 }
848 }
849
850 static int get_line(int pc)
851 {
852 /* Subtract 1 so that the number shown is that of the corresponding
853 * opcode in the TGSI dump, e.g. an if block has the same suffix as
854 * the instruction number of the corresponding TGSI IF.
855 */
856 return pc - 1;
857 }
858
859 static void bgnloop_emit(const struct lp_build_tgsi_action *action,
860 struct lp_build_tgsi_context *bld_base,
861 struct lp_build_emit_data *emit_data)
862 {
863 struct si_shader_context *ctx = si_shader_context(bld_base);
864 ac_build_bgnloop(&ctx->ac, get_line(bld_base->pc));
865 }
866
867 static void brk_emit(const struct lp_build_tgsi_action *action,
868 struct lp_build_tgsi_context *bld_base,
869 struct lp_build_emit_data *emit_data)
870 {
871 struct si_shader_context *ctx = si_shader_context(bld_base);
872 ac_build_break(&ctx->ac);
873 }
874
875 static void cont_emit(const struct lp_build_tgsi_action *action,
876 struct lp_build_tgsi_context *bld_base,
877 struct lp_build_emit_data *emit_data)
878 {
879 struct si_shader_context *ctx = si_shader_context(bld_base);
880 ac_build_continue(&ctx->ac);
881 }
882
883 static void else_emit(const struct lp_build_tgsi_action *action,
884 struct lp_build_tgsi_context *bld_base,
885 struct lp_build_emit_data *emit_data)
886 {
887 struct si_shader_context *ctx = si_shader_context(bld_base);
888 ac_build_else(&ctx->ac, get_line(bld_base->pc));
889 }
890
891 static void endif_emit(const struct lp_build_tgsi_action *action,
892 struct lp_build_tgsi_context *bld_base,
893 struct lp_build_emit_data *emit_data)
894 {
895 struct si_shader_context *ctx = si_shader_context(bld_base);
896 ac_build_endif(&ctx->ac, get_line(bld_base->pc));
897 }
898
899 static void endloop_emit(const struct lp_build_tgsi_action *action,
900 struct lp_build_tgsi_context *bld_base,
901 struct lp_build_emit_data *emit_data)
902 {
903 struct si_shader_context *ctx = si_shader_context(bld_base);
904 ac_build_endloop(&ctx->ac, get_line(bld_base->pc));
905 }
906
907 static void if_emit(const struct lp_build_tgsi_action *action,
908 struct lp_build_tgsi_context *bld_base,
909 struct lp_build_emit_data *emit_data)
910 {
911 struct si_shader_context *ctx = si_shader_context(bld_base);
912 ac_build_if(&ctx->ac, emit_data->args[0], get_line(bld_base->pc));
913 }
914
915 static void uif_emit(const struct lp_build_tgsi_action *action,
916 struct lp_build_tgsi_context *bld_base,
917 struct lp_build_emit_data *emit_data)
918 {
919 struct si_shader_context *ctx = si_shader_context(bld_base);
920 ac_build_uif(&ctx->ac, emit_data->args[0], get_line(bld_base->pc));
921 }
922
923 static void emit_immediate(struct lp_build_tgsi_context *bld_base,
924 const struct tgsi_full_immediate *imm)
925 {
926 unsigned i;
927 struct si_shader_context *ctx = si_shader_context(bld_base);
928
929 for (i = 0; i < 4; ++i) {
930 ctx->imms[ctx->imms_num * TGSI_NUM_CHANNELS + i] =
931 LLVMConstInt(ctx->i32, imm->u[i].Uint, false );
932 }
933
934 ctx->imms_num++;
935 }
936
937 void si_llvm_context_init(struct si_shader_context *ctx,
938 struct si_screen *sscreen,
939 struct ac_llvm_compiler *compiler)
940 {
941 struct lp_type type;
942
943 /* Initialize the gallivm object:
944 * We are only using the module, context, and builder fields of this struct.
945 * This should be enough for us to be able to pass our gallivm struct to the
946 * helper functions in the gallivm module.
947 */
948 memset(ctx, 0, sizeof(*ctx));
949 ctx->screen = sscreen;
950 ctx->compiler = compiler;
951
952 ac_llvm_context_init(&ctx->ac, sscreen->info.chip_class, sscreen->info.family);
953 ctx->ac.module = ac_create_module(compiler->tm, ctx->ac.context);
954
955 enum ac_float_mode float_mode =
956 sscreen->debug_flags & DBG(UNSAFE_MATH) ?
957 AC_FLOAT_MODE_UNSAFE_FP_MATH :
958 AC_FLOAT_MODE_NO_SIGNED_ZEROS_FP_MATH;
959 ctx->ac.builder = ac_create_builder(ctx->ac.context, float_mode);
960
961 ctx->gallivm.context = ctx->ac.context;
962 ctx->gallivm.module = ctx->ac.module;
963 ctx->gallivm.builder = ctx->ac.builder;
964
965 struct lp_build_tgsi_context *bld_base = &ctx->bld_base;
966
967 type.floating = true;
968 type.fixed = false;
969 type.sign = true;
970 type.norm = false;
971 type.width = 32;
972 type.length = 1;
973
974 lp_build_context_init(&bld_base->base, &ctx->gallivm, type);
975 lp_build_context_init(&ctx->bld_base.uint_bld, &ctx->gallivm, lp_uint_type(type));
976 lp_build_context_init(&ctx->bld_base.int_bld, &ctx->gallivm, lp_int_type(type));
977 type.width *= 2;
978 lp_build_context_init(&ctx->bld_base.dbl_bld, &ctx->gallivm, type);
979 lp_build_context_init(&ctx->bld_base.uint64_bld, &ctx->gallivm, lp_uint_type(type));
980 lp_build_context_init(&ctx->bld_base.int64_bld, &ctx->gallivm, lp_int_type(type));
981
982 bld_base->soa = 1;
983 bld_base->emit_swizzle = emit_swizzle;
984 bld_base->emit_declaration = emit_declaration;
985 bld_base->emit_immediate = emit_immediate;
986
987 bld_base->op_actions[TGSI_OPCODE_BGNLOOP].emit = bgnloop_emit;
988 bld_base->op_actions[TGSI_OPCODE_BRK].emit = brk_emit;
989 bld_base->op_actions[TGSI_OPCODE_CONT].emit = cont_emit;
990 bld_base->op_actions[TGSI_OPCODE_IF].emit = if_emit;
991 bld_base->op_actions[TGSI_OPCODE_UIF].emit = uif_emit;
992 bld_base->op_actions[TGSI_OPCODE_ELSE].emit = else_emit;
993 bld_base->op_actions[TGSI_OPCODE_ENDIF].emit = endif_emit;
994 bld_base->op_actions[TGSI_OPCODE_ENDLOOP].emit = endloop_emit;
995
996 si_shader_context_init_alu(&ctx->bld_base);
997 si_shader_context_init_mem(ctx);
998
999 ctx->voidt = LLVMVoidTypeInContext(ctx->ac.context);
1000 ctx->i1 = LLVMInt1TypeInContext(ctx->ac.context);
1001 ctx->i8 = LLVMInt8TypeInContext(ctx->ac.context);
1002 ctx->i32 = LLVMInt32TypeInContext(ctx->ac.context);
1003 ctx->i64 = LLVMInt64TypeInContext(ctx->ac.context);
1004 ctx->i128 = LLVMIntTypeInContext(ctx->ac.context, 128);
1005 ctx->f32 = LLVMFloatTypeInContext(ctx->ac.context);
1006 ctx->v2i32 = LLVMVectorType(ctx->i32, 2);
1007 ctx->v4i32 = LLVMVectorType(ctx->i32, 4);
1008 ctx->v4f32 = LLVMVectorType(ctx->f32, 4);
1009 ctx->v8i32 = LLVMVectorType(ctx->i32, 8);
1010
1011 ctx->i32_0 = LLVMConstInt(ctx->i32, 0, 0);
1012 ctx->i32_1 = LLVMConstInt(ctx->i32, 1, 0);
1013 ctx->i1false = LLVMConstInt(ctx->i1, 0, 0);
1014 ctx->i1true = LLVMConstInt(ctx->i1, 1, 0);
1015 }
1016
1017 /* Set the context to a certain TGSI shader. Can be called repeatedly
1018 * to change the shader. */
1019 void si_llvm_context_set_tgsi(struct si_shader_context *ctx,
1020 struct si_shader *shader)
1021 {
1022 const struct tgsi_shader_info *info = NULL;
1023 const struct tgsi_token *tokens = NULL;
1024
1025 if (shader && shader->selector) {
1026 info = &shader->selector->info;
1027 tokens = shader->selector->tokens;
1028 }
1029
1030 ctx->shader = shader;
1031 ctx->type = info ? info->processor : -1;
1032 ctx->bld_base.info = info;
1033
1034 /* Clean up the old contents. */
1035 FREE(ctx->temp_arrays);
1036 ctx->temp_arrays = NULL;
1037 FREE(ctx->temp_array_allocas);
1038 ctx->temp_array_allocas = NULL;
1039
1040 FREE(ctx->imms);
1041 ctx->imms = NULL;
1042 ctx->imms_num = 0;
1043
1044 FREE(ctx->temps);
1045 ctx->temps = NULL;
1046 ctx->temps_count = 0;
1047
1048 if (!info)
1049 return;
1050
1051 ctx->num_const_buffers = util_last_bit(info->const_buffers_declared);
1052 ctx->num_shader_buffers = util_last_bit(info->shader_buffers_declared);
1053
1054 ctx->num_samplers = util_last_bit(info->samplers_declared);
1055 ctx->num_images = util_last_bit(info->images_declared);
1056
1057 if (!tokens)
1058 return;
1059
1060 if (info->array_max[TGSI_FILE_TEMPORARY] > 0) {
1061 int size = info->array_max[TGSI_FILE_TEMPORARY];
1062
1063 ctx->temp_arrays = CALLOC(size, sizeof(ctx->temp_arrays[0]));
1064 ctx->temp_array_allocas = CALLOC(size, sizeof(ctx->temp_array_allocas[0]));
1065
1066 tgsi_scan_arrays(tokens, TGSI_FILE_TEMPORARY, size,
1067 ctx->temp_arrays);
1068 }
1069 if (info->file_max[TGSI_FILE_IMMEDIATE] >= 0) {
1070 int size = info->file_max[TGSI_FILE_IMMEDIATE] + 1;
1071 ctx->imms = MALLOC(size * TGSI_NUM_CHANNELS * sizeof(LLVMValueRef));
1072 }
1073
1074 /* Re-set these to start with a clean slate. */
1075 ctx->bld_base.num_instructions = 0;
1076 ctx->bld_base.pc = 0;
1077 memset(ctx->outputs, 0, sizeof(ctx->outputs));
1078
1079 ctx->bld_base.emit_store = si_llvm_emit_store;
1080 ctx->bld_base.emit_fetch_funcs[TGSI_FILE_IMMEDIATE] = si_llvm_emit_fetch;
1081 ctx->bld_base.emit_fetch_funcs[TGSI_FILE_INPUT] = si_llvm_emit_fetch;
1082 ctx->bld_base.emit_fetch_funcs[TGSI_FILE_TEMPORARY] = si_llvm_emit_fetch;
1083 ctx->bld_base.emit_fetch_funcs[TGSI_FILE_OUTPUT] = si_llvm_emit_fetch;
1084 ctx->bld_base.emit_fetch_funcs[TGSI_FILE_SYSTEM_VALUE] = fetch_system_value;
1085 }
1086
1087 void si_llvm_create_func(struct si_shader_context *ctx,
1088 const char *name,
1089 LLVMTypeRef *return_types, unsigned num_return_elems,
1090 LLVMTypeRef *ParamTypes, unsigned ParamCount)
1091 {
1092 LLVMTypeRef main_fn_type, ret_type;
1093 LLVMBasicBlockRef main_fn_body;
1094 enum si_llvm_calling_convention call_conv;
1095 unsigned real_shader_type;
1096
1097 if (num_return_elems)
1098 ret_type = LLVMStructTypeInContext(ctx->ac.context,
1099 return_types,
1100 num_return_elems, true);
1101 else
1102 ret_type = ctx->voidt;
1103
1104 /* Setup the function */
1105 ctx->return_type = ret_type;
1106 main_fn_type = LLVMFunctionType(ret_type, ParamTypes, ParamCount, 0);
1107 ctx->main_fn = LLVMAddFunction(ctx->gallivm.module, name, main_fn_type);
1108 main_fn_body = LLVMAppendBasicBlockInContext(ctx->ac.context,
1109 ctx->main_fn, "main_body");
1110 LLVMPositionBuilderAtEnd(ctx->ac.builder, main_fn_body);
1111
1112 real_shader_type = ctx->type;
1113
1114 /* LS is merged into HS (TCS), and ES is merged into GS. */
1115 if (ctx->screen->info.chip_class >= GFX9) {
1116 if (ctx->shader->key.as_ls)
1117 real_shader_type = PIPE_SHADER_TESS_CTRL;
1118 else if (ctx->shader->key.as_es)
1119 real_shader_type = PIPE_SHADER_GEOMETRY;
1120 }
1121
1122 switch (real_shader_type) {
1123 case PIPE_SHADER_VERTEX:
1124 case PIPE_SHADER_TESS_EVAL:
1125 call_conv = RADEON_LLVM_AMDGPU_VS;
1126 break;
1127 case PIPE_SHADER_TESS_CTRL:
1128 call_conv = RADEON_LLVM_AMDGPU_HS;
1129 break;
1130 case PIPE_SHADER_GEOMETRY:
1131 call_conv = RADEON_LLVM_AMDGPU_GS;
1132 break;
1133 case PIPE_SHADER_FRAGMENT:
1134 call_conv = RADEON_LLVM_AMDGPU_PS;
1135 break;
1136 case PIPE_SHADER_COMPUTE:
1137 call_conv = RADEON_LLVM_AMDGPU_CS;
1138 break;
1139 default:
1140 unreachable("Unhandle shader type");
1141 }
1142
1143 LLVMSetFunctionCallConv(ctx->main_fn, call_conv);
1144 }
1145
1146 void si_llvm_optimize_module(struct si_shader_context *ctx)
1147 {
1148 /* Dump LLVM IR before any optimization passes */
1149 if (ctx->screen->debug_flags & DBG(PREOPT_IR) &&
1150 si_can_dump_shader(ctx->screen, ctx->type))
1151 LLVMDumpModule(ctx->gallivm.module);
1152
1153 /* Run the pass */
1154 LLVMRunPassManager(ctx->compiler->passmgr, ctx->gallivm.module);
1155 LLVMDisposeBuilder(ctx->ac.builder);
1156 }
1157
1158 void si_llvm_dispose(struct si_shader_context *ctx)
1159 {
1160 LLVMDisposeModule(ctx->gallivm.module);
1161 LLVMContextDispose(ctx->gallivm.context);
1162 FREE(ctx->temp_arrays);
1163 ctx->temp_arrays = NULL;
1164 FREE(ctx->temp_array_allocas);
1165 ctx->temp_array_allocas = NULL;
1166 FREE(ctx->temps);
1167 ctx->temps = NULL;
1168 ctx->temps_count = 0;
1169 FREE(ctx->imms);
1170 ctx->imms = NULL;
1171 ctx->imms_num = 0;
1172 ac_llvm_context_dispose(&ctx->ac);
1173 }